diff --git a/CHANGELOG.md b/CHANGELOG.md index 222887e9d..9983a5798 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added +- Search results show an informational alert when a query may be incomplete (for example due to match limits, skipped index shards, or streaming flush), and non-exhaustive Zoekt searches are logged for operators. [#1098](https://github.com/sourcebot-dev/sourcebot/pull/1098) - Linear issue links in chat responses now render as a rich card-style UI showing the Linear logo, issue identifier, and title instead of plain hyperlinks. [#1060](https://github.com/sourcebot-dev/sourcebot/pull/1060) ### Changed diff --git a/packages/web/src/app/(app)/search/components/searchResultsPage.tsx b/packages/web/src/app/(app)/search/components/searchResultsPage.tsx index fb9f68bc1..1612f6cf6 100644 --- a/packages/web/src/app/(app)/search/components/searchResultsPage.tsx +++ b/packages/web/src/app/(app)/search/components/searchResultsPage.tsx @@ -11,7 +11,9 @@ import { } from "@/components/ui/resizable"; import { Separator } from "@/components/ui/separator"; import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"; -import { RepositoryInfo, SearchResultFile, SearchStats } from "@/features/search"; +import { getSearchLimitExplanation } from "@/features/search/searchLimitExplanation"; +import type { RepositoryInfo, SearchResultFile, SearchStats } from "@/features/search/types"; +import { Alert, AlertDescription, AlertTitle } from "@/components/ui/alert"; import useCaptureEvent from "@/hooks/useCaptureEvent"; import { useNonEmptyQueryParam } from "@/hooks/useNonEmptyQueryParam"; import { useSearchHistory } from "@/hooks/useSearchHistory"; @@ -205,6 +207,7 @@ export const SearchResultsPage = ({ searchStats={stats} isMoreResultsButtonVisible={!isExhaustive} isBranchFilteringEnabled={isBranchFilteringEnabled} + maxMatchDisplayCount={maxMatchCount} /> )} @@ -221,6 +224,7 @@ interface PanelGroupProps { searchDurationMs: number; numMatches: number; searchStats?: SearchStats; + maxMatchDisplayCount: number; } const PanelGroup = ({ @@ -233,6 +237,7 @@ const PanelGroup = ({ searchDurationMs: _searchDurationMs, numMatches, searchStats, + maxMatchDisplayCount, }: PanelGroupProps) => { const [previewedFile, setPreviewedFile] = useState(undefined); const filteredFileMatches = useFilteredMatches(fileMatches); @@ -258,6 +263,13 @@ const PanelGroup = ({ return Math.round(_searchDurationMs); }, [_searchDurationMs]); + const limitExplanation = useMemo(() => { + if (isStreaming || !isMoreResultsButtonVisible) { + return null; + } + return getSearchLimitExplanation(searchStats, maxMatchDisplayCount); + }, [isStreaming, isMoreResultsButtonVisible, searchStats, maxMatchDisplayCount]); + return ( )} + {limitExplanation && ( +
+ + + {limitExplanation.summary} + {limitExplanation.detail && ( + + {limitExplanation.detail} + + )} + +
+ )}
{filteredFileMatches.length > 0 ? ( (); @@ -101,6 +102,7 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex timeToSearchCompletionMs: cachedEntry.timeToSearchCompletionMs, timeToFirstSearchResultMs: cachedEntry.timeToFirstSearchResultMs, numMatches: cachedEntry.numMatches, + stats: cachedEntry.stats, }); return; } @@ -114,6 +116,7 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex timeToSearchCompletionMs: 0, timeToFirstSearchResultMs: 0, numMatches: 0, + stats: undefined, }); try { @@ -242,6 +245,7 @@ export const useStreamedSearch = ({ query, matches, contextLines, whole, isRegex timeToFirstSearchResultMs: prev.timeToFirstSearchResultMs, timeToSearchCompletionMs, timestamp: Date.now(), + stats: prev.stats, }); return { ...prev, diff --git a/packages/web/src/features/search/index.ts b/packages/web/src/features/search/index.ts index 665e070eb..0c18ec5d5 100644 --- a/packages/web/src/features/search/index.ts +++ b/packages/web/src/features/search/index.ts @@ -12,4 +12,5 @@ export type { StreamedSearchResponse, SearchResultChunk, SearchResponse, -} from './types'; \ No newline at end of file +} from './types'; +export { getSearchLimitExplanation } from './searchLimitExplanation'; \ No newline at end of file diff --git a/packages/web/src/features/search/searchLimitExplanation.test.ts b/packages/web/src/features/search/searchLimitExplanation.test.ts new file mode 100644 index 000000000..10cf48334 --- /dev/null +++ b/packages/web/src/features/search/searchLimitExplanation.test.ts @@ -0,0 +1,95 @@ +import { expect, test } from 'vitest'; +import type { SearchStats } from './types'; +import { getSearchLimitExplanation } from './searchLimitExplanation'; + +function stats(overrides: Partial): SearchStats { + return { + actualMatchCount: 10, + totalMatchCount: 10, + duration: 0, + fileCount: 1, + filesSkipped: 0, + contentBytesLoaded: 0, + indexBytesLoaded: 0, + crashes: 0, + shardFilesConsidered: 0, + filesConsidered: 0, + filesLoaded: 0, + shardsScanned: 1, + shardsSkipped: 0, + shardsSkippedFilter: 0, + ngramMatches: 0, + ngramLookups: 0, + wait: 0, + matchTreeConstruction: 0, + matchTreeSearch: 0, + regexpsConsidered: 0, + flushReason: 'FLUSH_REASON_UNKNOWN_UNSPECIFIED', + ...overrides, + }; +} + +test('missing stats yields generic incomplete message', () => { + const out = getSearchLimitExplanation(undefined, 100); + expect(out.summary).toContain('incomplete'); +}); + +test('shardsSkipped takes precedence (time limit / partial scan)', () => { + const out = getSearchLimitExplanation( + stats({ + shardsSkipped: 2, + totalMatchCount: 500, + filesSkipped: 99, + }), + 100, + ); + expect(out.summary).toContain('did not scan the entire index'); +}); + +test('totalMatchCount above display cap explains match budget', () => { + const out = getSearchLimitExplanation( + stats({ + actualMatchCount: 100, + totalMatchCount: 250, + }), + 100, + ); + expect(out.summary).toContain('More matches exist'); + expect(out.detail).toContain('250'); +}); + +test('filesSkipped without shard skip explains early stop', () => { + const out = getSearchLimitExplanation( + stats({ + totalMatchCount: 50, + actualMatchCount: 50, + filesSkipped: 10, + }), + 100, + ); + expect(out.summary).toContain('candidate files'); +}); + +test('flushReason timer when no higher-priority signal', () => { + const out = getSearchLimitExplanation( + stats({ + flushReason: 'FLUSH_REASON_TIMER_EXPIRED', + totalMatchCount: 10, + actualMatchCount: 10, + }), + 100, + ); + expect(out.summary).toContain('streaming timer'); +}); + +test('flushReason max size when no higher-priority signal', () => { + const out = getSearchLimitExplanation( + stats({ + flushReason: 'FLUSH_REASON_MAX_SIZE', + totalMatchCount: 10, + actualMatchCount: 10, + }), + 100, + ); + expect(out.summary).toContain('size limit'); +}); diff --git a/packages/web/src/features/search/searchLimitExplanation.ts b/packages/web/src/features/search/searchLimitExplanation.ts new file mode 100644 index 000000000..f8acf97cc --- /dev/null +++ b/packages/web/src/features/search/searchLimitExplanation.ts @@ -0,0 +1,65 @@ +import type { SearchStats } from './types'; + +/** Values from zoekt `FlushReason` (grpc string enum names). */ +const FLUSH_REASON_TIMER_EXPIRED = 'FLUSH_REASON_TIMER_EXPIRED'; +const FLUSH_REASON_MAX_SIZE = 'FLUSH_REASON_MAX_SIZE'; + +/** + * User-facing copy when Zoekt returned a non-exhaustive search (more matches may exist + * than were returned or scanned). + * + * @see https://github.com/sourcebot-dev/sourcebot/issues/504 + */ +export function getSearchLimitExplanation( + stats: SearchStats | undefined, + maxMatchDisplayCount: number, +): { summary: string; detail?: string } { + if (!stats) { + return { + summary: 'Results may be incomplete.', + detail: 'Increase the match limit, narrow your query, or scope to a repository.', + }; + } + + if (stats.shardsSkipped > 0) { + return { + summary: 'Search did not scan the entire index.', + detail: 'One or more index shards were skipped (often because the search hit a time limit). Additional matches may exist.', + }; + } + + if (stats.flushReason === FLUSH_REASON_TIMER_EXPIRED) { + return { + summary: 'Results were flushed early due to a streaming timer.', + detail: 'Try narrowing your query or increasing limits.', + }; + } + + if (stats.flushReason === FLUSH_REASON_MAX_SIZE) { + return { + summary: 'Intermediate result set reached its size limit.', + detail: 'Try narrowing your query or increasing limits.', + }; + } + + if (stats.totalMatchCount > maxMatchDisplayCount) { + return { + summary: 'More matches exist than are shown.', + detail: `The index reported ${stats.totalMatchCount} matches, but this request only returns up to ${maxMatchDisplayCount}. Use “load more” or raise the match limit.`, + }; + } + + if (stats.filesSkipped > 0) { + return { + summary: 'Some candidate files were not fully searched.', + detail: 'The engine stopped after finding enough matches (per-shard or total limits). Additional matches may exist.', + }; + } + + // Defensive fallback: non-exhaustive searches should usually hit a branch above + // (e.g. totalMatchCount vs display cap, skipped shards/files, or flush reason). + return { + summary: 'More matches may exist than are shown.', + detail: 'Increase the match limit, narrow your query, or scope to a repository.', + }; +} diff --git a/packages/web/src/features/search/zoektSearcher.ts b/packages/web/src/features/search/zoektSearcher.ts index f3fa6278f..78fe01a2e 100644 --- a/packages/web/src/features/search/zoektSearcher.ts +++ b/packages/web/src/features/search/zoektSearcher.ts @@ -133,11 +133,21 @@ export const zoektSearch = async (searchRequest: ZoektGrpcSearchRequest, prisma: const reposMapCache = await createReposMapForChunk(response, new Map(), prisma); const { stats, files, repositoryInfo } = await transformZoektSearchResponse(response, reposMapCache); + const isSearchExhaustive = stats.totalMatchCount <= stats.actualMatchCount; + if (!isSearchExhaustive) { + logger.info('Zoekt search finished with non-exhaustive results', { + totalMatchCount: stats.totalMatchCount, + actualMatchCount: stats.actualMatchCount, + flushReason: stats.flushReason, + shardsSkipped: stats.shardsSkipped, + filesSkipped: stats.filesSkipped, + }); + } resolve({ stats, files, repositoryInfo, - isSearchExhaustive: stats.totalMatchCount <= stats.actualMatchCount, + isSearchExhaustive, } satisfies SearchResponse); } catch (err) { reject(err); @@ -180,10 +190,20 @@ export const zoektStreamSearch = async (searchRequest: ZoektGrpcSearchRequest, p async start(controller) { const tryCloseController = () => { if (!isStreamActive && pendingChunks === 0) { + const isSearchExhaustive = accumulatedStats.totalMatchCount <= accumulatedStats.actualMatchCount; + if (!isSearchExhaustive) { + logger.info('Zoekt search finished with non-exhaustive results', { + totalMatchCount: accumulatedStats.totalMatchCount, + actualMatchCount: accumulatedStats.actualMatchCount, + flushReason: accumulatedStats.flushReason, + shardsSkipped: accumulatedStats.shardsSkipped, + filesSkipped: accumulatedStats.filesSkipped, + }); + } const finalResponse: StreamedSearchResponse = { type: 'final', accumulatedStats, - isSearchExhaustive: accumulatedStats.totalMatchCount <= accumulatedStats.actualMatchCount, + isSearchExhaustive, } controller.enqueue(encodeSSEREsponseChunk(finalResponse));