From 5e1ea7feaf83a521e0467a7c073f7c1aa66b1543 Mon Sep 17 00:00:00 2001 From: Himanshu Soni Date: Wed, 8 Apr 2026 09:45:04 +0530 Subject: [PATCH 1/2] feat(search): surface Zoekt non-exhaustive results in UI and logs (#504) --- .../search/components/searchResultsPage.tsx | 26 +++++- .../src/app/(app)/search/useStreamedSearch.ts | 3 + packages/web/src/features/search/index.ts | 3 +- .../search/searchLimitExplanation.test.ts | 83 +++++++++++++++++++ .../features/search/searchLimitExplanation.ts | 63 ++++++++++++++ .../web/src/features/search/zoektSearcher.ts | 24 +++++- 6 files changed, 198 insertions(+), 4 deletions(-) create mode 100644 packages/web/src/features/search/searchLimitExplanation.test.ts create mode 100644 packages/web/src/features/search/searchLimitExplanation.ts diff --git a/packages/web/src/app/(app)/search/components/searchResultsPage.tsx b/packages/web/src/app/(app)/search/components/searchResultsPage.tsx index fb9f68bc1..86e3376f4 100644 --- a/packages/web/src/app/(app)/search/components/searchResultsPage.tsx +++ b/packages/web/src/app/(app)/search/components/searchResultsPage.tsx @@ -11,7 +11,8 @@ import { } from "@/components/ui/resizable"; import { Separator } from "@/components/ui/separator"; import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"; -import { RepositoryInfo, SearchResultFile, SearchStats } from "@/features/search"; +import { getSearchLimitExplanation, RepositoryInfo, SearchResultFile, SearchStats } from "@/features/search"; +import { Alert, AlertDescription, AlertTitle } from "@/components/ui/alert"; import useCaptureEvent from "@/hooks/useCaptureEvent"; import { useNonEmptyQueryParam } from "@/hooks/useNonEmptyQueryParam"; import { useSearchHistory } from "@/hooks/useSearchHistory"; @@ -205,6 +206,7 @@ export const SearchResultsPage = ({ searchStats={stats} isMoreResultsButtonVisible={!isExhaustive} isBranchFilteringEnabled={isBranchFilteringEnabled} + maxMatchDisplayCount={maxMatchCount} /> )} @@ -221,6 +223,7 @@ interface PanelGroupProps { searchDurationMs: number; numMatches: number; searchStats?: SearchStats; + maxMatchDisplayCount: number; } const PanelGroup = ({ @@ -233,6 +236,7 @@ const PanelGroup = ({ searchDurationMs: _searchDurationMs, numMatches, searchStats, + maxMatchDisplayCount, }: PanelGroupProps) => { const [previewedFile, setPreviewedFile] = useState(undefined); const filteredFileMatches = useFilteredMatches(fileMatches); @@ -258,6 +262,13 @@ const PanelGroup = ({ return Math.round(_searchDurationMs); }, [_searchDurationMs]); + const limitExplanation = useMemo(() => { + if (isStreaming || !isMoreResultsButtonVisible) { + return null; + } + return getSearchLimitExplanation(searchStats, maxMatchDisplayCount); + }, [isStreaming, isMoreResultsButtonVisible, searchStats, maxMatchDisplayCount]); + return ( )} + {limitExplanation && ( +
+ + + {limitExplanation.summary} + {limitExplanation.detail && ( + + {limitExplanation.detail} + + )} + +
+ )}
{filteredFileMatches.length > 0 ? ( (); @@ -101,6 +102,7 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex timeToSearchCompletionMs: cachedEntry.timeToSearchCompletionMs, timeToFirstSearchResultMs: cachedEntry.timeToFirstSearchResultMs, numMatches: cachedEntry.numMatches, + stats: cachedEntry.stats, }); return; } @@ -242,6 +244,7 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex timeToFirstSearchResultMs: prev.timeToFirstSearchResultMs, timeToSearchCompletionMs, timestamp: Date.now(), + stats: prev.stats, }); return { ...prev, diff --git a/packages/web/src/features/search/index.ts b/packages/web/src/features/search/index.ts index 665e070eb..0c18ec5d5 100644 --- a/packages/web/src/features/search/index.ts +++ b/packages/web/src/features/search/index.ts @@ -12,4 +12,5 @@ export type { StreamedSearchResponse, SearchResultChunk, SearchResponse, -} from './types'; \ No newline at end of file +} from './types'; +export { getSearchLimitExplanation } from './searchLimitExplanation'; \ No newline at end of file diff --git a/packages/web/src/features/search/searchLimitExplanation.test.ts b/packages/web/src/features/search/searchLimitExplanation.test.ts new file mode 100644 index 000000000..9bc3ec0e4 --- /dev/null +++ b/packages/web/src/features/search/searchLimitExplanation.test.ts @@ -0,0 +1,83 @@ +import { expect, test } from 'vitest'; +import type { SearchStats } from './types'; +import { getSearchLimitExplanation } from './searchLimitExplanation'; + +function stats(overrides: Partial): SearchStats { + return { + actualMatchCount: 10, + totalMatchCount: 10, + duration: 0, + fileCount: 1, + filesSkipped: 0, + contentBytesLoaded: 0, + indexBytesLoaded: 0, + crashes: 0, + shardFilesConsidered: 0, + filesConsidered: 0, + filesLoaded: 0, + shardsScanned: 1, + shardsSkipped: 0, + shardsSkippedFilter: 0, + ngramMatches: 0, + ngramLookups: 0, + wait: 0, + matchTreeConstruction: 0, + matchTreeSearch: 0, + regexpsConsidered: 0, + flushReason: 'FLUSH_REASON_UNKNOWN_UNSPECIFIED', + ...overrides, + }; +} + +test('missing stats yields generic incomplete message', () => { + const out = getSearchLimitExplanation(undefined, 100); + expect(out.summary).toContain('incomplete'); +}); + +test('shardsSkipped takes precedence (time limit / partial scan)', () => { + const out = getSearchLimitExplanation( + stats({ + shardsSkipped: 2, + totalMatchCount: 500, + filesSkipped: 99, + }), + 100, + ); + expect(out.summary).toContain('did not scan the entire index'); +}); + +test('totalMatchCount above display cap explains match budget', () => { + const out = getSearchLimitExplanation( + stats({ + actualMatchCount: 100, + totalMatchCount: 250, + }), + 100, + ); + expect(out.summary).toContain('More matches exist'); + expect(out.detail).toContain('250'); +}); + +test('filesSkipped without shard skip explains early stop', () => { + const out = getSearchLimitExplanation( + stats({ + totalMatchCount: 50, + actualMatchCount: 50, + filesSkipped: 10, + }), + 100, + ); + expect(out.summary).toContain('candidate files'); +}); + +test('flushReason timer when no higher-priority signal', () => { + const out = getSearchLimitExplanation( + stats({ + flushReason: 'FLUSH_REASON_TIMER_EXPIRED', + totalMatchCount: 10, + actualMatchCount: 10, + }), + 100, + ); + expect(out.summary).toContain('streaming timer'); +}); diff --git a/packages/web/src/features/search/searchLimitExplanation.ts b/packages/web/src/features/search/searchLimitExplanation.ts new file mode 100644 index 000000000..b09f639fb --- /dev/null +++ b/packages/web/src/features/search/searchLimitExplanation.ts @@ -0,0 +1,63 @@ +import type { SearchStats } from './types'; + +/** Values from zoekt `FlushReason` (grpc string enum names). */ +const FLUSH_REASON_TIMER_EXPIRED = 'FLUSH_REASON_TIMER_EXPIRED'; +const FLUSH_REASON_MAX_SIZE = 'FLUSH_REASON_MAX_SIZE'; + +/** + * User-facing copy when Zoekt returned a non-exhaustive search (more matches may exist + * than were returned or scanned). + * + * @see https://github.com/sourcebot-dev/sourcebot/issues/504 + */ +export function getSearchLimitExplanation( + stats: SearchStats | undefined, + maxMatchDisplayCount: number, +): { summary: string; detail?: string } { + if (!stats) { + return { + summary: 'Results may be incomplete.', + detail: 'Increase the match limit, narrow your query, or scope to a repository.', + }; + } + + if (stats.shardsSkipped > 0) { + return { + summary: 'Search did not scan the entire index.', + detail: 'One or more index shards were skipped (often because the search hit a time limit). Additional matches may exist.', + }; + } + + if (stats.flushReason === FLUSH_REASON_TIMER_EXPIRED) { + return { + summary: 'Results were flushed early due to a streaming timer.', + detail: 'Try narrowing your query or increasing limits.', + }; + } + + if (stats.flushReason === FLUSH_REASON_MAX_SIZE) { + return { + summary: 'Intermediate result set reached its size limit.', + detail: 'Try narrowing your query or increasing limits.', + }; + } + + if (stats.totalMatchCount > maxMatchDisplayCount) { + return { + summary: 'More matches exist than are shown.', + detail: `The index reported ${stats.totalMatchCount} matches, but this request only returns up to ${maxMatchDisplayCount}. Use “load more” or raise the match limit.`, + }; + } + + if (stats.filesSkipped > 0) { + return { + summary: 'Some candidate files were not fully searched.', + detail: 'The engine stopped after finding enough matches (per-shard or total limits). Additional matches may exist.', + }; + } + + return { + summary: 'More matches may exist than are shown.', + detail: 'Increase the match limit, narrow your query, or scope to a repository.', + }; +} diff --git a/packages/web/src/features/search/zoektSearcher.ts b/packages/web/src/features/search/zoektSearcher.ts index f3fa6278f..78fe01a2e 100644 --- a/packages/web/src/features/search/zoektSearcher.ts +++ b/packages/web/src/features/search/zoektSearcher.ts @@ -133,11 +133,21 @@ export const zoektSearch = async (searchRequest: ZoektGrpcSearchRequest, prisma: const reposMapCache = await createReposMapForChunk(response, new Map(), prisma); const { stats, files, repositoryInfo } = await transformZoektSearchResponse(response, reposMapCache); + const isSearchExhaustive = stats.totalMatchCount <= stats.actualMatchCount; + if (!isSearchExhaustive) { + logger.info('Zoekt search finished with non-exhaustive results', { + totalMatchCount: stats.totalMatchCount, + actualMatchCount: stats.actualMatchCount, + flushReason: stats.flushReason, + shardsSkipped: stats.shardsSkipped, + filesSkipped: stats.filesSkipped, + }); + } resolve({ stats, files, repositoryInfo, - isSearchExhaustive: stats.totalMatchCount <= stats.actualMatchCount, + isSearchExhaustive, } satisfies SearchResponse); } catch (err) { reject(err); @@ -180,10 +190,20 @@ export const zoektStreamSearch = async (searchRequest: ZoektGrpcSearchRequest, p async start(controller) { const tryCloseController = () => { if (!isStreamActive && pendingChunks === 0) { + const isSearchExhaustive = accumulatedStats.totalMatchCount <= accumulatedStats.actualMatchCount; + if (!isSearchExhaustive) { + logger.info('Zoekt search finished with non-exhaustive results', { + totalMatchCount: accumulatedStats.totalMatchCount, + actualMatchCount: accumulatedStats.actualMatchCount, + flushReason: accumulatedStats.flushReason, + shardsSkipped: accumulatedStats.shardsSkipped, + filesSkipped: accumulatedStats.filesSkipped, + }); + } const finalResponse: StreamedSearchResponse = { type: 'final', accumulatedStats, - isSearchExhaustive: accumulatedStats.totalMatchCount <= accumulatedStats.actualMatchCount, + isSearchExhaustive, } controller.enqueue(encodeSSEREsponseChunk(finalResponse)); From 44a787a8633a3654b76166dd5d9e05c3474cc03b Mon Sep 17 00:00:00 2001 From: Himanshu Soni Date: Wed, 8 Apr 2026 10:01:43 +0530 Subject: [PATCH 2/2] fix(web): avoid bundling search API in client; address review --- CHANGELOG.md | 1 + .../(app)/search/components/searchResultsPage.tsx | 3 ++- .../web/src/app/(app)/search/useStreamedSearch.ts | 3 ++- .../features/search/searchLimitExplanation.test.ts | 12 ++++++++++++ .../src/features/search/searchLimitExplanation.ts | 2 ++ 5 files changed, 19 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 222887e9d..9983a5798 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added +- Search results show an informational alert when a query may be incomplete (for example due to match limits, skipped index shards, or streaming flush), and non-exhaustive Zoekt searches are logged for operators. [#1098](https://github.com/sourcebot-dev/sourcebot/pull/1098) - Linear issue links in chat responses now render as a rich card-style UI showing the Linear logo, issue identifier, and title instead of plain hyperlinks. [#1060](https://github.com/sourcebot-dev/sourcebot/pull/1060) ### Changed diff --git a/packages/web/src/app/(app)/search/components/searchResultsPage.tsx b/packages/web/src/app/(app)/search/components/searchResultsPage.tsx index 86e3376f4..1612f6cf6 100644 --- a/packages/web/src/app/(app)/search/components/searchResultsPage.tsx +++ b/packages/web/src/app/(app)/search/components/searchResultsPage.tsx @@ -11,7 +11,8 @@ import { } from "@/components/ui/resizable"; import { Separator } from "@/components/ui/separator"; import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"; -import { getSearchLimitExplanation, RepositoryInfo, SearchResultFile, SearchStats } from "@/features/search"; +import { getSearchLimitExplanation } from "@/features/search/searchLimitExplanation"; +import type { RepositoryInfo, SearchResultFile, SearchStats } from "@/features/search/types"; import { Alert, AlertDescription, AlertTitle } from "@/components/ui/alert"; import useCaptureEvent from "@/hooks/useCaptureEvent"; import { useNonEmptyQueryParam } from "@/hooks/useNonEmptyQueryParam"; diff --git a/packages/web/src/app/(app)/search/useStreamedSearch.ts b/packages/web/src/app/(app)/search/useStreamedSearch.ts index e5e73f833..4a12e8f8b 100644 --- a/packages/web/src/app/(app)/search/useStreamedSearch.ts +++ b/packages/web/src/app/(app)/search/useStreamedSearch.ts @@ -1,6 +1,6 @@ 'use client'; -import { RepositoryInfo, SearchRequest, SearchResultFile, SearchStats, StreamedSearchResponse } from '@/features/search'; +import type { RepositoryInfo, SearchRequest, SearchResultFile, SearchStats, StreamedSearchResponse } from '@/features/search/types'; import { ServiceErrorException } from '@/lib/serviceError'; import { isServiceError } from '@/lib/utils'; import * as Sentry from '@sentry/nextjs'; @@ -116,6 +116,7 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex timeToSearchCompletionMs: 0, timeToFirstSearchResultMs: 0, numMatches: 0, + stats: undefined, }); try { diff --git a/packages/web/src/features/search/searchLimitExplanation.test.ts b/packages/web/src/features/search/searchLimitExplanation.test.ts index 9bc3ec0e4..10cf48334 100644 --- a/packages/web/src/features/search/searchLimitExplanation.test.ts +++ b/packages/web/src/features/search/searchLimitExplanation.test.ts @@ -81,3 +81,15 @@ test('flushReason timer when no higher-priority signal', () => { ); expect(out.summary).toContain('streaming timer'); }); + +test('flushReason max size when no higher-priority signal', () => { + const out = getSearchLimitExplanation( + stats({ + flushReason: 'FLUSH_REASON_MAX_SIZE', + totalMatchCount: 10, + actualMatchCount: 10, + }), + 100, + ); + expect(out.summary).toContain('size limit'); +}); diff --git a/packages/web/src/features/search/searchLimitExplanation.ts b/packages/web/src/features/search/searchLimitExplanation.ts index b09f639fb..f8acf97cc 100644 --- a/packages/web/src/features/search/searchLimitExplanation.ts +++ b/packages/web/src/features/search/searchLimitExplanation.ts @@ -56,6 +56,8 @@ export function getSearchLimitExplanation( }; } + // Defensive fallback: non-exhaustive searches should usually hit a branch above + // (e.g. totalMatchCount vs display cap, skipped shards/files, or flush reason). return { summary: 'More matches may exist than are shown.', detail: 'Increase the match limit, narrow your query, or scope to a repository.',