diff --git a/toolkit-docs-generator/scripts/check-stale-summaries.ts b/toolkit-docs-generator/scripts/check-stale-summaries.ts new file mode 100644 index 000000000..da093402d --- /dev/null +++ b/toolkit-docs-generator/scripts/check-stale-summaries.ts @@ -0,0 +1,82 @@ +#!/usr/bin/env tsx + +/** + * Check stale summaries + * + * Scans toolkit-docs-generator/data/toolkits/*.json and reports any toolkit + * whose summary was carried forward from a previous run (summaryStale: true) + * because regeneration was skipped or failed. Exits 1 if any are found so + * the script can gate CI or a local sanity check. + * + * Usage: + * pnpm dlx tsx toolkit-docs-generator/scripts/check-stale-summaries.ts + */ + +import { readdirSync, readFileSync } from "node:fs"; +import { dirname, join } from "node:path"; +import { fileURLToPath } from "node:url"; + +const here = dirname(fileURLToPath(import.meta.url)); +const TOOLKITS_DIR = join(here, "..", "data", "toolkits"); + +type ToolkitShape = { + id?: unknown; + summary?: unknown; + summaryStale?: unknown; + summaryStaleReason?: unknown; +}; + +const listToolkitFiles = (): string[] => { + try { + return readdirSync(TOOLKITS_DIR).filter( + (name) => name.endsWith(".json") && name !== "index.json" + ); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + console.error( + `✗ Cannot read toolkit directory ${TOOLKITS_DIR}: ${message}` + ); + process.exit(1); + } +}; + +const main = (): number => { + const stale: Array<{ file: string; id: string; reason: string }> = []; + for (const file of listToolkitFiles()) { + let toolkit: ToolkitShape; + try { + const raw = readFileSync(join(TOOLKITS_DIR, file), "utf8"); + toolkit = JSON.parse(raw) as ToolkitShape; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + console.error(`✗ Cannot parse ${file}: ${message}`); + return 1; + } + if (toolkit.summaryStale === true) { + stale.push({ + file, + id: typeof toolkit.id === "string" ? toolkit.id : file, + reason: + typeof toolkit.summaryStaleReason === "string" + ? toolkit.summaryStaleReason + : "unknown", + }); + } + } + + if (stale.length === 0) { + console.log(`✓ No stale summaries in ${TOOLKITS_DIR}`); + return 0; + } + + console.error(`✗ ${stale.length} toolkit(s) have a stale summary:\n`); + for (const finding of stale) { + console.error(` - ${finding.id} (${finding.file}): ${finding.reason}`); + } + console.error( + "\nRe-run the generator with a working LLM, or refresh the summary by hand and clear the `summaryStale` / `summaryStaleReason` fields." + ); + return 1; +}; + +process.exit(main()); diff --git a/toolkit-docs-generator/src/diff/previous-output.ts b/toolkit-docs-generator/src/diff/previous-output.ts index b17fd7d91..043a7bb90 100644 --- a/toolkit-docs-generator/src/diff/previous-output.ts +++ b/toolkit-docs-generator/src/diff/previous-output.ts @@ -276,6 +276,12 @@ const buildFallbackToolkit = ( const authResult = MergedToolkitAuthSchema.safeParse(record.auth); const summary = typeof record.summary === "string" ? record.summary : undefined; + const summaryStale = + typeof record.summaryStale === "boolean" ? record.summaryStale : undefined; + const summaryStaleReason = + typeof record.summaryStaleReason === "string" + ? record.summaryStaleReason + : undefined; const generatedAt = typeof record.generatedAt === "string" ? record.generatedAt : undefined; @@ -299,6 +305,8 @@ const buildFallbackToolkit = ( version, description, ...(summary ? { summary } : {}), + ...(summaryStale !== undefined ? { summaryStale } : {}), + ...(summaryStaleReason !== undefined ? { summaryStaleReason } : {}), metadata: metadataResult.success ? metadataResult.data : DEFAULT_PREVIOUS_TOOLKIT_METADATA, diff --git a/toolkit-docs-generator/src/merger/data-merger.ts b/toolkit-docs-generator/src/merger/data-merger.ts index e373d51a7..d3cf02ed5 100644 --- a/toolkit-docs-generator/src/merger/data-merger.ts +++ b/toolkit-docs-generator/src/merger/data-merger.ts @@ -467,6 +467,31 @@ const getToolDocumentationChunks = ( return fromPrevious; }; +/** + * Mark the toolkit's summary as stale — the summary is being carried forward + * from a previous run even though the toolkit signature changed (regen was + * skipped or failed). The CI check in tests/stale-summaries.test.ts surfaces + * these toolkits so a human rerun or fix can follow. + */ +export const STALE_SUMMARY_WARNING_PREFIX = "Summary is stale for"; + +const markSummaryStale = ( + toolkit: MergedToolkit, + reason: string, + warnings: string[] +): void => { + toolkit.summaryStale = true; + toolkit.summaryStaleReason = reason; + warnings.push( + `${STALE_SUMMARY_WARNING_PREFIX} ${toolkit.id}: ${reason}. Previous summary carried forward.` + ); +}; + +const clearStaleSummaryFlags = (toolkit: MergedToolkit): void => { + toolkit.summaryStale = undefined; + toolkit.summaryStaleReason = undefined; +}; + const isOverviewChunk = (chunk: DocumentationChunk): boolean => chunk.location === "header" && chunk.position === "before" && @@ -950,23 +975,49 @@ export class DataMerger { if (hasToolkitOverviewChunk(result.toolkit)) { // Keep overview as the canonical toolkit-level narrative. result.toolkit.summary = undefined; + clearStaleSummaryFlags(result.toolkit); + return; + } + + // Defensive guard: if something upstream already set a summary on + // `result.toolkit`, respect it. buildMergedToolkit does not set one + // today, but we don't want the reuse / fallback paths below to + // silently replace a pre-populated value. + if (result.toolkit.summary) { + clearStaleSummaryFlags(result.toolkit); return; } if (previousToolkit?.summary) { const currentSignature = buildToolkitSummarySignature(result.toolkit); const previousSignature = buildToolkitSummarySignature(previousToolkit); - if (currentSignature === previousSignature) { + // Signature-match reuse is only safe when the PREVIOUS summary itself + // was fresh. If the previous run already flagged the summary stale, + // a matching signature does not prove freshness — the stale summary + // was carried forward from an even earlier toolset and will stay + // wrong until an LLM actually regenerates it. + if ( + currentSignature === previousSignature && + !previousToolkit.summaryStale + ) { result.toolkit.summary = previousToolkit.summary; + clearStaleSummaryFlags(result.toolkit); return; } } if (!this.toolkitSummaryGenerator) { - return; - } - - if (result.toolkit.summary) { + // Preserve previous summary when no LLM is available to regenerate. + // A slightly stale summary is better than silently wiping hand-authored + // or previously generated content on every run that disables the LLM. + if (previousToolkit?.summary) { + result.toolkit.summary = previousToolkit.summary; + markSummaryStale( + result.toolkit, + "llm_generator_unavailable", + result.warnings + ); + } return; } @@ -975,11 +1026,24 @@ export class DataMerger { result.toolkit ); result.toolkit.summary = summary; + clearStaleSummaryFlags(result.toolkit); } catch (error) { const message = error instanceof Error ? error.message : String(error); result.warnings.push( `Summary generation failed for ${result.toolkit.id}: ${message}` ); + // Preserve previous summary on LLM failure. Losing the summary on a + // transient API error would require waiting for another run — and if + // the failure is persistent we keep showing the last known-good text + // instead of a blank overview. + if (previousToolkit?.summary) { + result.toolkit.summary = previousToolkit.summary; + markSummaryStale( + result.toolkit, + "llm_generation_failed", + result.warnings + ); + } } } diff --git a/toolkit-docs-generator/src/types/index.ts b/toolkit-docs-generator/src/types/index.ts index cdf5a7c53..366dde7aa 100644 --- a/toolkit-docs-generator/src/types/index.ts +++ b/toolkit-docs-generator/src/types/index.ts @@ -399,6 +399,20 @@ export const MergedToolkitSchema = z.object({ description: z.string().nullable(), /** LLM-generated summary (optional) */ summary: z.string().optional(), + /** + * True when the current `summary` is known to be out of date with the + * toolkit's current tools (the signature changed but regeneration was + * skipped or failed, so the previous summary was carried forward as a + * fallback). Cleared whenever a fresh summary is successfully generated + * or when the summary is verified against an unchanged signature. + */ + summaryStale: z.boolean().optional(), + /** + * Machine-readable reason the summary is stale (e.g. + * "llm_generator_unavailable", "llm_generation_failed"). Always set + * together with `summaryStale: true`. Cleared together with it. + */ + summaryStaleReason: z.string().optional(), /** Metadata from Design System */ metadata: MergedToolkitMetadataSchema, /** Authentication requirements */ diff --git a/toolkit-docs-generator/tests/merger/data-merger.test.ts b/toolkit-docs-generator/tests/merger/data-merger.test.ts index bb4991b3f..71155f0a6 100644 --- a/toolkit-docs-generator/tests/merger/data-merger.test.ts +++ b/toolkit-docs-generator/tests/merger/data-merger.test.ts @@ -1185,6 +1185,221 @@ describe("DataMerger", () => { expect(result.toolkit.summary).toBeUndefined(); }); + it("preserves previous summary when no LLM generator is available and the signature changed", async () => { + const toolkitDataSource = createCombinedToolkitDataSource({ + toolSource: new InMemoryToolDataSource([githubTool1, githubTool2]), + metadataSource: new InMemoryMetadataSource([githubMetadata]), + }); + const previousResult = await mergeToolkit( + "Github", + [githubTool1], + githubMetadata, + null, + createStubGenerator() + ); + previousResult.toolkit.summary = "Hand-authored summary"; + + const merger = new DataMerger({ + toolkitDataSource, + customSectionsSource: new EmptyCustomSectionsSource(), + toolExampleGenerator: createStubGenerator(), + previousToolkits: new Map([["github", previousResult.toolkit]]), + }); + + const result = await merger.mergeToolkit("Github"); + + expect(result.toolkit.tools).toHaveLength(2); + expect(result.toolkit.summary).toBe("Hand-authored summary"); + expect(result.toolkit.summaryStale).toBe(true); + expect(result.toolkit.summaryStaleReason).toBe( + "llm_generator_unavailable" + ); + expect( + result.warnings.some((warning) => + warning.includes("Summary is stale for Github") + ) + ).toBe(true); + }); + + it("preserves previous summary when the LLM generator throws", async () => { + const toolkitDataSource = createCombinedToolkitDataSource({ + toolSource: new InMemoryToolDataSource([githubTool1, githubTool2]), + metadataSource: new InMemoryMetadataSource([githubMetadata]), + }); + const previousResult = await mergeToolkit( + "Github", + [githubTool1], + githubMetadata, + null, + createStubGenerator() + ); + previousResult.toolkit.summary = "Hand-authored summary"; + + const failingSummary: ToolkitSummaryGenerator = { + generate: async () => { + throw new Error("rate limited"); + }, + }; + + const merger = new DataMerger({ + toolkitDataSource, + customSectionsSource: new EmptyCustomSectionsSource(), + toolExampleGenerator: createStubGenerator(), + toolkitSummaryGenerator: failingSummary, + previousToolkits: new Map([["github", previousResult.toolkit]]), + }); + + const result = await merger.mergeToolkit("Github"); + + expect(result.toolkit.summary).toBe("Hand-authored summary"); + expect( + result.warnings.some((warning) => + warning.includes("Summary generation failed for Github") + ) + ).toBe(true); + expect(result.toolkit.summaryStale).toBe(true); + expect(result.toolkit.summaryStaleReason).toBe("llm_generation_failed"); + }); + + it("clears the stale flag when the generator succeeds on the next run", async () => { + // A toolkit whose summary was stale on a prior run should come back + // clean once the generator actually produces a new summary. This + // proves the CI gate will stop flagging the toolkit once fixed. + const toolkitDataSource = createCombinedToolkitDataSource({ + toolSource: new InMemoryToolDataSource([githubTool1, githubTool2]), + metadataSource: new InMemoryMetadataSource([githubMetadata]), + }); + const previousResult = await mergeToolkit( + "Github", + [githubTool1], + githubMetadata, + null, + createStubGenerator() + ); + previousResult.toolkit.summary = "Older summary"; + previousResult.toolkit.summaryStale = true; + previousResult.toolkit.summaryStaleReason = "llm_generation_failed"; + + const merger = new DataMerger({ + toolkitDataSource, + customSectionsSource: new EmptyCustomSectionsSource(), + toolExampleGenerator: createStubGenerator(), + toolkitSummaryGenerator: createStubSummaryGenerator("Fresh summary"), + previousToolkits: new Map([["github", previousResult.toolkit]]), + }); + + const result = await merger.mergeToolkit("Github"); + + expect(result.toolkit.summary).toBe("Fresh summary (Github)"); + expect(result.toolkit.summaryStale).toBeUndefined(); + expect(result.toolkit.summaryStaleReason).toBeUndefined(); + }); + + it("does not flag stale when the signature matches a fresh previous summary", async () => { + // Baseline: if previous.summaryStale is falsy, signature match is a + // valid proof of freshness and the reuse path should keep the + // summary and stay clean. + const toolkitDataSource = createCombinedToolkitDataSource({ + toolSource: new InMemoryToolDataSource([githubTool1]), + metadataSource: new InMemoryMetadataSource([githubMetadata]), + }); + const previousResult = await mergeToolkit( + "Github", + [githubTool1], + githubMetadata, + null, + createStubGenerator() + ); + previousResult.toolkit.summary = "Cached summary"; + + const countingSummary = createCountingSummaryGenerator(); + const merger = new DataMerger({ + toolkitDataSource, + customSectionsSource: new EmptyCustomSectionsSource(), + toolExampleGenerator: createStubGenerator(), + toolkitSummaryGenerator: countingSummary.generator, + previousToolkits: new Map([["github", previousResult.toolkit]]), + }); + + const result = await merger.mergeToolkit("Github"); + + expect(result.toolkit.summary).toBe("Cached summary"); + expect(result.toolkit.summaryStale).toBeUndefined(); + expect(result.toolkit.summaryStaleReason).toBeUndefined(); + expect(countingSummary.getCalls()).toBe(0); + }); + + it("regenerates when signature matches but the previous summary was already stale", async () => { + // If the previous summary was already flagged stale, a matching + // signature does NOT prove freshness — the stale summary was + // carried forward from an earlier toolset. The reuse fast path + // must skip, and the LLM must actually regenerate. + const toolkitDataSource = createCombinedToolkitDataSource({ + toolSource: new InMemoryToolDataSource([githubTool1]), + metadataSource: new InMemoryMetadataSource([githubMetadata]), + }); + const previousResult = await mergeToolkit( + "Github", + [githubTool1], + githubMetadata, + null, + createStubGenerator() + ); + previousResult.toolkit.summary = "Stale carried-forward summary"; + previousResult.toolkit.summaryStale = true; + previousResult.toolkit.summaryStaleReason = "llm_generation_failed"; + + const merger = new DataMerger({ + toolkitDataSource, + customSectionsSource: new EmptyCustomSectionsSource(), + toolExampleGenerator: createStubGenerator(), + toolkitSummaryGenerator: createStubSummaryGenerator("Fresh"), + previousToolkits: new Map([["github", previousResult.toolkit]]), + }); + + const result = await merger.mergeToolkit("Github"); + + expect(result.toolkit.summary).toBe("Fresh (Github)"); + expect(result.toolkit.summaryStale).toBeUndefined(); + expect(result.toolkit.summaryStaleReason).toBeUndefined(); + }); + + it("keeps the stale flag when previous was stale, signature matches, and no generator is available", async () => { + // Same as above, but the regen attempt is not possible. The carried- + // forward summary remains, and the stale flag must persist so CI + // keeps flagging the toolkit until a working LLM run produces a + // fresh one. + const toolkitDataSource = createCombinedToolkitDataSource({ + toolSource: new InMemoryToolDataSource([githubTool1]), + metadataSource: new InMemoryMetadataSource([githubMetadata]), + }); + const previousResult = await mergeToolkit( + "Github", + [githubTool1], + githubMetadata, + null, + createStubGenerator() + ); + previousResult.toolkit.summary = "Stale carried-forward summary"; + previousResult.toolkit.summaryStale = true; + previousResult.toolkit.summaryStaleReason = "llm_generation_failed"; + + const merger = new DataMerger({ + toolkitDataSource, + customSectionsSource: new EmptyCustomSectionsSource(), + toolExampleGenerator: createStubGenerator(), + previousToolkits: new Map([["github", previousResult.toolkit]]), + }); + + const result = await merger.mergeToolkit("Github"); + + expect(result.toolkit.summary).toBe("Stale carried-forward summary"); + expect(result.toolkit.summaryStale).toBe(true); + expect(result.toolkit.summaryStaleReason).toBe( + "llm_generator_unavailable" + ); + }); + it("reuses previous examples when the tool is unchanged", async () => { const toolkitDataSource = createCombinedToolkitDataSource({ toolSource: new InMemoryToolDataSource([githubTool1]), diff --git a/toolkit-docs-generator/tests/stale-summaries.test.ts b/toolkit-docs-generator/tests/stale-summaries.test.ts new file mode 100644 index 000000000..9e88bd63a --- /dev/null +++ b/toolkit-docs-generator/tests/stale-summaries.test.ts @@ -0,0 +1,81 @@ +import { readdirSync, readFileSync } from "node:fs"; +import { dirname, join } from "node:path"; +import { fileURLToPath } from "node:url"; +import { describe, expect, it } from "vitest"; + +/** + * CI gate for stale summaries. + * + * When the generator carries a previous summary forward because + * regeneration was skipped (no LLM configured) or failed (LLM error), + * it marks the toolkit JSON with `summaryStale: true`. That is deliberate + * — a slightly stale overview is better than a blank one — but it must + * be fixed before the change merges. + * + * This test reads the committed `data/toolkits/*.json` files and fails + * if any toolkit is flagged stale. The auto-generated `[AUTO] Adding MCP + * Servers docs update` PR will still open on schedule, but its CI run + * will fail here, which is the intended signal for a reviewer to + * either rerun generation with a working LLM or manually refresh the + * affected summary. + */ +// Resolve from the test file's own location so the test works whether +// vitest is invoked from the repo root or from inside +// toolkit-docs-generator/. Falls back to process.cwd() only if +// import.meta.url is unavailable (it is in the vitest esm runtime). +const here = dirname(fileURLToPath(import.meta.url)); +const TOOLKITS_DIR = join(here, "..", "data", "toolkits"); + +type ToolkitSummaryShape = { + id?: unknown; + summary?: unknown; + summaryStale?: unknown; + summaryStaleReason?: unknown; +}; + +const readToolkit = (fileName: string): ToolkitSummaryShape => { + const raw = readFileSync(join(TOOLKITS_DIR, fileName), "utf8"); + return JSON.parse(raw) as ToolkitSummaryShape; +}; + +const listToolkitFiles = (): string[] => + readdirSync(TOOLKITS_DIR).filter( + (name) => name.endsWith(".json") && name !== "index.json" + ); + +describe("Stale summary CI gate", () => { + it("no committed toolkit JSON has summaryStale: true", () => { + const staleFindings: Array<{ + file: string; + id: string; + reason: string; + }> = []; + + for (const file of listToolkitFiles()) { + const toolkit = readToolkit(file); + if (toolkit.summaryStale === true) { + staleFindings.push({ + file, + id: typeof toolkit.id === "string" ? toolkit.id : file, + reason: + typeof toolkit.summaryStaleReason === "string" + ? toolkit.summaryStaleReason + : "unknown", + }); + } + } + + if (staleFindings.length > 0) { + const report = staleFindings + .map( + (finding) => ` - ${finding.id} (${finding.file}): ${finding.reason}` + ) + .join("\n"); + throw new Error( + `${staleFindings.length} toolkit(s) have a stale summary that must be regenerated before merge:\n${report}\n\n` + + "Re-run `toolkit-docs-generator generate` with a working --llm-provider/--llm-model, or refresh the summary by hand and clear the `summaryStale` / `summaryStaleReason` fields in the JSON." + ); + } + expect(staleFindings).toEqual([]); + }); +});