From 2fa802fb9d5f475148e0bbd5e372b0004db131e7 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 22 Jun 2026 04:03:27 +0000 Subject: [PATCH 1/3] Optimize semantic search by removing LIMIT from function body [ENG-1769] - Remove match_count parameter from match_content_embeddings function - Remove LIMIT clause from function body to improve query planner performance - Add space_id parameter for better filtering - Move LIMIT to caller side using Supabase .limit() method - Update TypeScript types to reflect new function signature Co-authored-by: Michael Gartner --- .../src/utils/discourseNodeSearchProviders.ts | 11 ++++--- apps/roam/src/utils/hyde.ts | 11 ++++--- packages/database/src/dbTypes.ts | 2 +- ...0252_optimize_match_content_embeddings.sql | 33 +++++++++++++++++++ .../database/supabase/schemas/embedding.sql | 12 +++---- 5 files changed, 52 insertions(+), 17 deletions(-) create mode 100644 packages/database/supabase/migrations/20260622040252_optimize_match_content_embeddings.sql diff --git a/apps/roam/src/utils/discourseNodeSearchProviders.ts b/apps/roam/src/utils/discourseNodeSearchProviders.ts index 8411a614c..a34d1be34 100644 --- a/apps/roam/src/utils/discourseNodeSearchProviders.ts +++ b/apps/roam/src/utils/discourseNodeSearchProviders.ts @@ -512,11 +512,12 @@ const runSupabaseSemanticSearch = async ({ } const queryEmbedding = await createEmbedding(trimmedQuery); - const { data, error } = await supabase.rpc("match_content_embeddings", { - query_embedding: JSON.stringify(queryEmbedding), - match_threshold: SUPABASE_MATCH_THRESHOLD, - match_count: SEARCH_TEST_RESULT_LIMIT, - }); + const { data, error } = await supabase + .rpc("match_content_embeddings", { + query_embedding: JSON.stringify(queryEmbedding), + match_threshold: SUPABASE_MATCH_THRESHOLD, + }) + .limit(SEARCH_TEST_RESULT_LIMIT); if (error) { throw new Error(error.message); diff --git a/apps/roam/src/utils/hyde.ts b/apps/roam/src/utils/hyde.ts index 03acdb10b..8a3a7cc5e 100644 --- a/apps/roam/src/utils/hyde.ts +++ b/apps/roam/src/utils/hyde.ts @@ -538,11 +538,12 @@ export const findSimilarNodesVectorOnly = async ({ const queryEmbedding = await createEmbedding(text); - const { data, error } = await supabase.rpc("match_content_embeddings", { - query_embedding: JSON.stringify(queryEmbedding), - match_threshold: threshold, - match_count: limit, - }); + const { data, error } = await supabase + .rpc("match_content_embeddings", { + query_embedding: JSON.stringify(queryEmbedding), + match_threshold: threshold, + }) + .limit(limit); if (error) { console.error("Vector search failed:", error); diff --git a/packages/database/src/dbTypes.ts b/packages/database/src/dbTypes.ts index 48342b32c..b84a1b939 100644 --- a/packages/database/src/dbTypes.ts +++ b/packages/database/src/dbTypes.ts @@ -1722,7 +1722,7 @@ export type Database = { match_content_embeddings: { Args: { current_document_id?: number - match_count: number + current_space_id?: number match_threshold: number query_embedding: string } diff --git a/packages/database/supabase/migrations/20260622040252_optimize_match_content_embeddings.sql b/packages/database/supabase/migrations/20260622040252_optimize_match_content_embeddings.sql new file mode 100644 index 000000000..bd6869595 --- /dev/null +++ b/packages/database/supabase/migrations/20260622040252_optimize_match_content_embeddings.sql @@ -0,0 +1,33 @@ +-- Optimize match_content_embeddings by removing LIMIT from function body +-- This improves query planner performance as the LIMIT parameter was killing the planner +-- Also adds space_id parameter for better filtering + +set search_path to public, extensions ; + +CREATE OR REPLACE FUNCTION public.match_content_embeddings ( +query_embedding extensions.vector, +match_threshold double precision, +current_document_id integer DEFAULT NULL::integer, +current_space_id bigint DEFAULT NULL::bigint) +RETURNS TABLE ( +content_id bigint, +roam_uid Text, +text_content Text, +similarity double precision) +SET search_path = 'extensions' +LANGUAGE sql STABLE +AS $$ +SELECT + c.id AS content_id, + c.source_local_id AS roam_uid, + c.text AS text_content, + 1 - (c.vector <=> query_embedding) AS similarity +FROM public.my_contents_with_embedding_openai_text_embedding_3_small_1536 AS c +WHERE 1 - (c.vector <=> query_embedding) > match_threshold + AND (current_document_id IS NULL OR c.document_id = current_document_id) + AND (current_space_id IS NULL OR c.space_id = current_space_id) +ORDER BY + c.vector <=> query_embedding ASC; +$$ ; + +RESET ALL ; diff --git a/packages/database/supabase/schemas/embedding.sql b/packages/database/supabase/schemas/embedding.sql index 273fd6eab..57dc6ad5b 100644 --- a/packages/database/supabase/schemas/embedding.sql +++ b/packages/database/supabase/schemas/embedding.sql @@ -57,8 +57,8 @@ set search_path to public, extensions ; CREATE OR REPLACE FUNCTION public.match_content_embeddings ( query_embedding extensions.vector, match_threshold double precision, -match_count integer, -current_document_id integer DEFAULT NULL::integer) +current_document_id integer DEFAULT NULL::integer, +current_space_id bigint DEFAULT NULL::bigint) RETURNS TABLE ( content_id bigint, roam_uid Text, @@ -75,16 +75,16 @@ SELECT FROM public.my_contents_with_embedding_openai_text_embedding_3_small_1536 AS c WHERE 1 - (c.vector <=> query_embedding) > match_threshold AND (current_document_id IS NULL OR c.document_id = current_document_id) + AND (current_space_id IS NULL OR c.space_id = current_space_id) ORDER BY - c.vector <=> query_embedding ASC -LIMIT match_count; + c.vector <=> query_embedding ASC; $$ ; ALTER FUNCTION public.match_content_embeddings ( query_embedding extensions.vector, match_threshold double precision, -match_count integer, -current_document_id integer) OWNER TO "postgres" ; +current_document_id integer, +current_space_id bigint) OWNER TO "postgres" ; CREATE OR REPLACE FUNCTION public.match_embeddings_for_subset_nodes ( "p_query_embedding" extensions.vector, From 68cd04e7be6efa02c3996c1d0cd36e03eb0cd5d8 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 22 Jun 2026 04:16:24 +0000 Subject: [PATCH 2/3] Remove space_id parameter - moved to separate ticket Focus this PR only on the LIMIT optimization that's killing the planner. The space_id filtering will be addressed in a separate ticket. Co-authored-by: Michael Gartner --- packages/database/src/dbTypes.ts | 1 - .../20260622040252_optimize_match_content_embeddings.sql | 5 +---- packages/database/supabase/schemas/embedding.sql | 7 ++----- 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/packages/database/src/dbTypes.ts b/packages/database/src/dbTypes.ts index b84a1b939..c37e3a030 100644 --- a/packages/database/src/dbTypes.ts +++ b/packages/database/src/dbTypes.ts @@ -1722,7 +1722,6 @@ export type Database = { match_content_embeddings: { Args: { current_document_id?: number - current_space_id?: number match_threshold: number query_embedding: string } diff --git a/packages/database/supabase/migrations/20260622040252_optimize_match_content_embeddings.sql b/packages/database/supabase/migrations/20260622040252_optimize_match_content_embeddings.sql index bd6869595..9154999dd 100644 --- a/packages/database/supabase/migrations/20260622040252_optimize_match_content_embeddings.sql +++ b/packages/database/supabase/migrations/20260622040252_optimize_match_content_embeddings.sql @@ -1,14 +1,12 @@ -- Optimize match_content_embeddings by removing LIMIT from function body -- This improves query planner performance as the LIMIT parameter was killing the planner --- Also adds space_id parameter for better filtering set search_path to public, extensions ; CREATE OR REPLACE FUNCTION public.match_content_embeddings ( query_embedding extensions.vector, match_threshold double precision, -current_document_id integer DEFAULT NULL::integer, -current_space_id bigint DEFAULT NULL::bigint) +current_document_id integer DEFAULT NULL::integer) RETURNS TABLE ( content_id bigint, roam_uid Text, @@ -25,7 +23,6 @@ SELECT FROM public.my_contents_with_embedding_openai_text_embedding_3_small_1536 AS c WHERE 1 - (c.vector <=> query_embedding) > match_threshold AND (current_document_id IS NULL OR c.document_id = current_document_id) - AND (current_space_id IS NULL OR c.space_id = current_space_id) ORDER BY c.vector <=> query_embedding ASC; $$ ; diff --git a/packages/database/supabase/schemas/embedding.sql b/packages/database/supabase/schemas/embedding.sql index 57dc6ad5b..0e989e229 100644 --- a/packages/database/supabase/schemas/embedding.sql +++ b/packages/database/supabase/schemas/embedding.sql @@ -57,8 +57,7 @@ set search_path to public, extensions ; CREATE OR REPLACE FUNCTION public.match_content_embeddings ( query_embedding extensions.vector, match_threshold double precision, -current_document_id integer DEFAULT NULL::integer, -current_space_id bigint DEFAULT NULL::bigint) +current_document_id integer DEFAULT NULL::integer) RETURNS TABLE ( content_id bigint, roam_uid Text, @@ -75,7 +74,6 @@ SELECT FROM public.my_contents_with_embedding_openai_text_embedding_3_small_1536 AS c WHERE 1 - (c.vector <=> query_embedding) > match_threshold AND (current_document_id IS NULL OR c.document_id = current_document_id) - AND (current_space_id IS NULL OR c.space_id = current_space_id) ORDER BY c.vector <=> query_embedding ASC; $$ ; @@ -83,8 +81,7 @@ $$ ; ALTER FUNCTION public.match_content_embeddings ( query_embedding extensions.vector, match_threshold double precision, -current_document_id integer, -current_space_id bigint) OWNER TO "postgres" ; +current_document_id integer) OWNER TO "postgres" ; CREATE OR REPLACE FUNCTION public.match_embeddings_for_subset_nodes ( "p_query_embedding" extensions.vector, From a0b28aaa61e7b0beef76a2d5e5e9471c89f183ee Mon Sep 17 00:00:00 2001 From: Michael Gartner Date: Mon, 22 Jun 2026 09:25:09 -0600 Subject: [PATCH 3/3] drop if exists --- .../20260622040252_optimize_match_content_embeddings.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packages/database/supabase/migrations/20260622040252_optimize_match_content_embeddings.sql b/packages/database/supabase/migrations/20260622040252_optimize_match_content_embeddings.sql index 9154999dd..4aea9334a 100644 --- a/packages/database/supabase/migrations/20260622040252_optimize_match_content_embeddings.sql +++ b/packages/database/supabase/migrations/20260622040252_optimize_match_content_embeddings.sql @@ -3,6 +3,8 @@ set search_path to public, extensions ; +DROP FUNCTION IF EXISTS public.match_content_embeddings(extensions.vector, double precision, integer, integer) ; + CREATE OR REPLACE FUNCTION public.match_content_embeddings ( query_embedding extensions.vector, match_threshold double precision,