diff --git a/packages/api-client/src/posthog-client.test.ts b/packages/api-client/src/posthog-client.test.ts index 1eacedc65a..9774de59b9 100644 --- a/packages/api-client/src/posthog-client.test.ts +++ b/packages/api-client/src/posthog-client.test.ts @@ -921,4 +921,115 @@ describe("PostHogAPIClient", () => { ); }); }); + + describe("batched scout emissions", () => { + const EMISSIONS_PATH = + "/api/projects/123/signals/scout/runs/emissions/batch/"; + const REPORTS_PATH = + "/api/projects/123/signals/scout/runs/emissions/reports/batch/"; + + function buildClient(fetch: ReturnType) { + const client = new PostHogAPIClient( + "http://localhost:8000", + async () => "token", + async () => "token", + 123, + ); + ( + client as unknown as { + api: { baseUrl: string; fetcher: { fetch: typeof fetch } }; + } + ).api = { baseUrl: "http://localhost:8000", fetcher: { fetch } }; + return client; + } + + // Both batch methods share the same scoutBatchByRunIds helper, so their + // empty short-circuit, request shape, and error path are exercised together. + const methods = [ + ["batchScoutRunEmissions", EMISSIONS_PATH], + ["batchScoutEmissionReports", REPORTS_PATH], + ] as const; + + it.each(methods)( + "%s short-circuits empty run ids without hitting the network", + async (method) => { + const fetch = vi.fn(); + const client = buildClient(fetch); + await expect(client[method](123, [])).resolves.toEqual([]); + expect(fetch).not.toHaveBeenCalled(); + }, + ); + + it.each(methods)( + "%s POSTs the run ids in one request and flattens the response", + async (method, path) => { + const rows = [ + { id: "e1", run_id: "r1" }, + { id: "e2", run_id: "r2" }, + ]; + const fetch = vi + .fn() + .mockResolvedValue({ ok: true, json: async () => rows }); + + await expect( + buildClient(fetch)[method](123, ["r1", "r2"]), + ).resolves.toEqual(rows); + expect(fetch).toHaveBeenCalledTimes(1); + expect(fetch.mock.calls[0][0]).toMatchObject({ method: "post", path }); + expect(JSON.parse(fetch.mock.calls[0][0].overrides.body)).toEqual({ + run_ids: ["r1", "r2"], + }); + }, + ); + + it.each(methods)( + "%s throws when the server responds non-OK", + async (method) => { + const fetch = vi + .fn() + .mockResolvedValue({ ok: false, statusText: "Bad Request" }); + await expect(buildClient(fetch)[method](123, ["r1"])).rejects.toThrow( + "Bad Request", + ); + }, + ); + + it("unwraps a paginated reports payload", async () => { + const links = [{ finding_id: "f1", source_id: "s1", report: null }]; + const fetch = vi.fn().mockResolvedValue({ + ok: true, + json: async () => ({ results: links }), + }); + + await expect( + buildClient(fetch).batchScoutEmissionReports(123, ["r1"]), + ).resolves.toEqual(links); + expect(fetch.mock.calls[0][0]).toMatchObject({ + method: "post", + path: REPORTS_PATH, + }); + }); + + it("splits >200 run ids into parallel chunks and concatenates them", async () => { + const runIds = Array.from({ length: 450 }, (_, i) => `r${i}`); + const fetch = vi.fn(async (req) => { + const { run_ids } = JSON.parse(req.overrides.body) as { + run_ids: string[]; + }; + return { + ok: true, + json: async () => run_ids.map((run_id) => ({ id: run_id, run_id })), + }; + }); + + const result = await buildClient(fetch).batchScoutRunEmissions( + 123, + runIds, + ); + // 450 ids → chunks of 200, 200, 50. + expect(fetch).toHaveBeenCalledTimes(3); + expect(result).toHaveLength(450); + expect(result.map((row) => row.run_id)).toEqual(runIds); + }); + }); }); diff --git a/packages/api-client/src/posthog-client.ts b/packages/api-client/src/posthog-client.ts index 1f80551d62..0e1e11255a 100644 --- a/packages/api-client/src/posthog-client.ts +++ b/packages/api-client/src/posthog-client.ts @@ -1855,6 +1855,29 @@ export class PostHogAPIClient { return (await response.json()) as T; } + private async scoutPost( + projectId: number, + subPath: string, + body: unknown, + ): Promise { + const urlPath = `/api/projects/${projectId}/signals/scout/${subPath}`; + const url = new URL(`${this.api.baseUrl}${urlPath}`); + const response = await this.api.fetcher.fetch({ + method: "post", + url, + path: urlPath, + overrides: { + body: JSON.stringify(body), + }, + }); + if (!response.ok) { + throw new Error( + `Scout request failed (${subPath}): ${response.statusText}`, + ); + } + return (await response.json()) as T; + } + async listScoutConfigs(projectId: number): Promise { const data = await this.scoutGet< { results: ScoutConfig[] } | ScoutConfig[] @@ -1919,29 +1942,67 @@ export class PostHogAPIClient { return await this.scoutGet(projectId, `runs/${runId}/`); } - async listScoutRunEmissions( + /** + * POST a run-id list to a scout batch endpoint and flatten the response. The + * API caps each call at SCOUT_BATCH_RUN_ID_LIMIT ids, so larger lists are + * split into parallel chunks and concatenated — the caller never has to know + * the cap exists. Run ids belonging to another team contribute no rows rather + * than erroring, so a single stale id can't blank the list. + */ + private async scoutBatchByRunIds( projectId: number, - runId: string, + subPath: string, + runIds: string[], + ): Promise { + if (runIds.length === 0) return []; + const SCOUT_BATCH_RUN_ID_LIMIT = 200; + const chunks: string[][] = []; + for (let i = 0; i < runIds.length; i += SCOUT_BATCH_RUN_ID_LIMIT) { + chunks.push(runIds.slice(i, i + SCOUT_BATCH_RUN_ID_LIMIT)); + } + const pages = await Promise.all( + chunks.map((chunk) => + this.scoutPost<{ results: T[] } | T[]>(projectId, subPath, { + run_ids: chunk, + }), + ), + ); + return pages.flatMap((data) => + Array.isArray(data) ? data : (data.results ?? []), + ); + } + + /** + * Every supplied run's emitted findings in one request, flattened newest-first + * (each row keeps its `run_id` so the caller can regroup). Replaces the old + * per-run fan-out — one Postgres query instead of one request per run. + */ + async batchScoutRunEmissions( + projectId: number, + runIds: string[], ): Promise { - const data = await this.scoutGet< - { results: ScoutEmission[] } | ScoutEmission[] - >(projectId, `runs/${runId}/emissions/`); - return Array.isArray(data) ? data : (data.results ?? []); + return this.scoutBatchByRunIds( + projectId, + "runs/emissions/batch/", + runIds, + ); } /** - * Best-effort reverse lookup: for each finding a run emitted, the inbox report - * (if any) its underlying signal grouped into. Pairs with the report's evidence - * list, which links the other direction. + * Best-effort reverse lookup: for each finding the supplied runs emitted, the + * inbox report (if any) its underlying signal grouped into. Resolves every + * run's findings in a single ClickHouse round-trip instead of one per run. + * Pairs with the report's evidence list, which links the other direction. */ - async listScoutEmissionReports( + async batchScoutEmissionReports( projectId: number, - runId: string, + runIds: string[], ): Promise { - const data = await this.scoutGet< - { results: ScoutEmissionReportLink[] } | ScoutEmissionReportLink[] - >(projectId, `runs/${runId}/emissions/reports/`); - return Array.isArray(data) ? data : (data.results ?? []); + return this.scoutBatchByRunIds( + projectId, + "runs/emissions/reports/batch/", + runIds, + ); } async searchScoutScratchpad( diff --git a/packages/ui/src/features/scouts/components/ScoutSignalsSection.tsx b/packages/ui/src/features/scouts/components/ScoutSignalsSection.tsx index ed8310f33b..ba046638fa 100644 --- a/packages/ui/src/features/scouts/components/ScoutSignalsSection.tsx +++ b/packages/ui/src/features/scouts/components/ScoutSignalsSection.tsx @@ -1,5 +1,6 @@ import type { LinkedSignalReport, + ScoutEmission, ScoutRun, } from "@posthog/api-client/posthog-client"; import { ANALYTICS_EVENTS } from "@posthog/shared"; @@ -17,14 +18,15 @@ import { ScoutTaskRunLink } from "./ScoutTaskRunLink"; /** * Cadence bounds a scout to ~48 runs per window (30-minute minimum interval), * but a backend-configured cadence below the UI presets could push past that; - * capping the initially mounted runs caps the emissions-query fan-out too. + * capping the initially shown runs keeps the batched emissions request small. */ const INITIAL_EMITTED_RUNS = 10; /** - * The signals this scout emitted in the runs window, newest first. Emissions - * are only fetchable per run, so each emitted run gets its own child query – - * runs beyond the cap stay unmounted (and unfetched) until "Show more". + * The signals this scout emitted in the runs window, newest first. The visible + * runs' emissions and report links are fetched in two batched requests (one each) + * rather than one request per run; "Show more" widens the window and refetches, + * keeping the already-rendered cards in place while the larger batch loads. */ export function ScoutSignalsSection({ runs, @@ -41,11 +43,46 @@ export function ScoutSignalsSection({ highlightFindingId?: string; }) { const [showAll, setShowAll] = useState(false); - const emittedRuns = runs.filter((run) => (run.emitted_count ?? 0) > 0); - const visibleRuns = showAll - ? emittedRuns - : emittedRuns.slice(0, INITIAL_EMITTED_RUNS); + const emittedRuns = useMemo( + () => runs.filter((run) => (run.emitted_count ?? 0) > 0), + [runs], + ); + const visibleRuns = useMemo( + () => (showAll ? emittedRuns : emittedRuns.slice(0, INITIAL_EMITTED_RUNS)), + [emittedRuns, showAll], + ); const hiddenCount = emittedRuns.length - visibleRuns.length; + const visibleRunIds = useMemo( + () => visibleRuns.map((run) => run.run_id), + [visibleRuns], + ); + + const { + data: emissions, + isLoading: emissionsLoading, + isError: emissionsError, + } = useScoutRunEmissions(visibleRunIds); + // Best-effort reverse lookup of which inbox report each finding grouped into. + // A failure here is non-fatal: the cards still render, just without the chip. + const { data: emissionReports } = useScoutEmissionReports(visibleRunIds); + + const emissionsByRunId = useMemo(() => { + const map = new Map(); + for (const emission of emissions ?? []) { + const list = map.get(emission.run_id); + if (list) list.push(emission); + else map.set(emission.run_id, [emission]); + } + return map; + }, [emissions]); + + const reportBySourceId = useMemo(() => { + const map = new Map(); + for (const link of emissionReports ?? []) { + if (link.report) map.set(link.source_id, link.report); + } + return map; + }, [emissionReports]); return ( @@ -67,6 +104,10 @@ export function ScoutSignalsSection({ ))} @@ -95,29 +136,22 @@ export function ScoutSignalsSection({ function RunEmissions({ run, + emissions, + reportBySourceId, + loading, + error, highlightFindingId, }: { run: ScoutRun; + emissions: ScoutEmission[] | undefined; + reportBySourceId: Map; + loading: boolean; + error: boolean; highlightFindingId?: string; }) { - const { - data: emissions, - isLoading, - isError, - } = useScoutRunEmissions(run.run_id); - // Best-effort reverse lookup of which inbox report each finding grouped into. - // A failure here is non-fatal: the cards still render, just without the chip. - const { data: emissionReports } = useScoutEmissionReports(run.run_id); - const reportBySourceId = useMemo(() => { - const map = new Map(); - for (const link of emissionReports ?? []) { - if (link.report) map.set(link.source_id, link.report); - } - return map; - }, [emissionReports]); const taskRunUrl = run.task_url ? getPostHogUrl(run.task_url) : null; - if (isLoading) { + if (loading) { return ( ); @@ -125,7 +159,7 @@ function RunEmissions({ // The run-level emitted_count promised signals; an errored or empty // emissions response must say so rather than render nothing. - if (isError || !emissions || emissions.length === 0) { + if (error || !emissions || emissions.length === 0) { return ( - {isError + {error ? "Couldn't load this run's signals." : "No signal details available for this run."} diff --git a/packages/ui/src/features/scouts/hooks/scoutQueryKeys.ts b/packages/ui/src/features/scouts/hooks/scoutQueryKeys.ts index 6c4949bce2..70c5f812e0 100644 --- a/packages/ui/src/features/scouts/hooks/scoutQueryKeys.ts +++ b/packages/ui/src/features/scouts/hooks/scoutQueryKeys.ts @@ -6,8 +6,8 @@ export const scoutQueryKeys = { runs: (projectId: number | null) => ["scouts", "runs", projectId] as const, scratchpad: (projectId: number | null) => ["scouts", "scratchpad", projectId] as const, - emissions: (projectId: number | null, runId: string) => - ["scouts", "emissions", projectId, runId] as const, - emissionReports: (projectId: number | null, runId: string) => - ["scouts", "emissionReports", projectId, runId] as const, + emissions: (projectId: number | null, runIds: string[]) => + ["scouts", "emissions", projectId, runIds] as const, + emissionReports: (projectId: number | null, runIds: string[]) => + ["scouts", "emissionReports", projectId, runIds] as const, }; diff --git a/packages/ui/src/features/scouts/hooks/useScoutEmissionReports.ts b/packages/ui/src/features/scouts/hooks/useScoutEmissionReports.ts index cbea32aaf8..6555d13e98 100644 --- a/packages/ui/src/features/scouts/hooks/useScoutEmissionReports.ts +++ b/packages/ui/src/features/scouts/hooks/useScoutEmissionReports.ts @@ -1,24 +1,31 @@ import type { ScoutEmissionReportLink } from "@posthog/api-client/posthog-client"; import { useAuthenticatedQuery } from "@posthog/ui/hooks/useAuthenticatedQuery"; +import { keepPreviousData } from "@tanstack/react-query"; +import { useMemo } from "react"; import { useAuthStateValue } from "../../auth/store"; import { scoutQueryKeys } from "./scoutQueryKeys"; /** - * Best-effort reverse lookup of which inbox report each of a run's findings - * grouped into. Loaded per run alongside {@link useScoutRunEmissions}; the - * caller keys the result by `source_id` to adorn each emission card. + * Best-effort reverse lookup of which inbox report each finding grouped into, + * for every run in the window. Loaded alongside {@link useScoutRunEmissions} in a + * single batched request (one ClickHouse round-trip); the caller keys the result + * by `source_id` to adorn each emission card. A failure here is non-fatal — the + * cards still render, just without the report chip. */ -export function useScoutEmissionReports(runId: string) { +export function useScoutEmissionReports(runIds: string[]) { const projectId = useAuthStateValue((state) => state.currentProjectId); + // Sort so the cache key is stable regardless of run ordering. + const sortedRunIds = useMemo(() => [...runIds].sort(), [runIds]); return useAuthenticatedQuery( - scoutQueryKeys.emissionReports(projectId, runId), + scoutQueryKeys.emissionReports(projectId, sortedRunIds), (client) => projectId - ? client.listScoutEmissionReports(projectId, runId) + ? client.batchScoutEmissionReports(projectId, sortedRunIds) : Promise.resolve([]), { - enabled: !!projectId && !!runId, + enabled: !!projectId && sortedRunIds.length > 0, staleTime: 60_000, + placeholderData: keepPreviousData, }, ); } diff --git a/packages/ui/src/features/scouts/hooks/useScoutRunEmissions.ts b/packages/ui/src/features/scouts/hooks/useScoutRunEmissions.ts index 0a44eeee05..b6ab75d8c1 100644 --- a/packages/ui/src/features/scouts/hooks/useScoutRunEmissions.ts +++ b/packages/ui/src/features/scouts/hooks/useScoutRunEmissions.ts @@ -1,19 +1,31 @@ import type { ScoutEmission } from "@posthog/api-client/posthog-client"; import { useAuthenticatedQuery } from "@posthog/ui/hooks/useAuthenticatedQuery"; +import { keepPreviousData } from "@tanstack/react-query"; +import { useMemo } from "react"; import { useAuthStateValue } from "../../auth/store"; import { scoutQueryKeys } from "./scoutQueryKeys"; -export function useScoutRunEmissions(runId: string) { +/** + * Every supplied run's emitted findings in one batched request, flattened + * newest-first (each row keeps its `run_id` so the caller can regroup). Replaces + * the old per-run fan-out — one request for the whole window instead of one per + * run. Previous results are kept while a widened window refetches so growing the + * window doesn't blank the already-rendered cards. + */ +export function useScoutRunEmissions(runIds: string[]) { const projectId = useAuthStateValue((state) => state.currentProjectId); + // Sort so the cache key is stable regardless of run ordering. + const sortedRunIds = useMemo(() => [...runIds].sort(), [runIds]); return useAuthenticatedQuery( - scoutQueryKeys.emissions(projectId, runId), + scoutQueryKeys.emissions(projectId, sortedRunIds), (client) => projectId - ? client.listScoutRunEmissions(projectId, runId) + ? client.batchScoutRunEmissions(projectId, sortedRunIds) : Promise.resolve([]), { - enabled: !!projectId && !!runId, + enabled: !!projectId && sortedRunIds.length > 0, staleTime: 60_000, + placeholderData: keepPreviousData, }, ); }