From 5696e3dcf63eb1753fb761b4af02dacd28b18f97 Mon Sep 17 00:00:00 2001 From: jottakka Date: Sat, 18 Apr 2026 13:52:16 -0300 Subject: [PATCH 01/11] feat(toolkit-docs-generator): secret-coherence scan + minimal LLM edits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a toolkit loses a secret upstream (typically because the tool that required it was dropped), the rendered docs can continue to mention that secret in the summary and in hand-authored documentation chunks. One concrete example on main: github.json still references GITHUB_CLASSIC_PERSONAL_ACCESS_TOKEN after the notification tools were removed in PR #922. Symmetrically, toolkits can end up with current secrets that the summary never mentions, or mention secrets without any link to the Arcade config docs — leaving readers without the information needed to actually configure them. This adds a two-stage pipeline that runs after summary generation: 1. Deterministic scanners (src/merger/secret-coherence.ts) - detectStaleSecretReferences: diffs current vs previous toolkit secret sets and scans summary, toolkit chunks, and per-tool chunks by exact substring for each removed secret. - detectSecretCoverageGaps: flags current secrets missing from the summary and a missing link to the Arcade secret config docs. - groupStaleRefsByTarget: collapses multiple removed-secret hits in the same artifact into a single edit target so the LLM is called at most once per (summary | chunk). 2. Targeted LLM editor (src/llm/secret-edit-generator.ts) - Unlike toolkit-summary-generator (which rewrites from scratch and tends to oversimplify), this editor is prompted to make the smallest possible change: delete sentences/rows that are only about the removed secret, minimally rewrite any sentence that mentions the removed secret alongside other content, and never re-summarize or reorder sections. - A separate fillCoverageGaps method adds missing secret mentions and, when required, the Arcade config docs link — also without rewriting unrelated text. Both steps are wired into DataMerger.enforceSecretCoherence, called after maybeGenerateSummary. The editor is optional: if unconfigured, the scanners still run and emit warnings, but no content is rewritten. Failures in the editor are caught and surfaced as warnings so a single LLM error does not break the run. Wiring changes: - DataMergerConfig gains an optional secretEditGenerator. - CLI gains --llm-editor-provider / --llm-editor-model / --llm-editor-api-key / --llm-editor-base-url / etc., mirrored by LLM_EDITOR_* env vars, with --skip-secret-coherence for the scan-and-edit step. Resolver fails open: a missing API key degrades to scanner-only warnings instead of crashing the run. - Workflow generate-toolkit-docs.yml now passes editor flags pointing at Anthropic + claude-sonnet-4-6 (overridable via secrets) so the editor stays on a stronger model than the gpt-4o-mini used for bulk summary and example generation. Summary prompt updates (src/llm/toolkit-summary-generator.ts): - Drop the hard 60–140 word cap; ask for "compact but complete". - Require each current secret be named in backticks with a one-line factual description of how to obtain it from the provider. - Require the Arcade secret config docs link at the end of the **Secrets** section. Tests: - tests/merger/secret-coherence.test.ts (13 tests) covers scanner behavior across summary, toolkit chunks, tool chunks, coverage gaps, and target grouping. - tests/llm/secret-edit-generator.test.ts (6 tests) exercises the cleanup/coverage flows and the fence-stripping / empty-response guards with a mocked LLM client. - Two new DataMerger integration tests verify that a removed secret surfacing in a doc chunk drives exactly one cleanup call and that the editor-disabled path still emits the warning. - tests/workflows/generate-toolkit-docs.test.ts asserts the new editor flags are present in CI. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/generate-toolkit-docs.yml | 8 + toolkit-docs-generator/src/cli/index.ts | 226 +++++++++++++ toolkit-docs-generator/src/llm/index.ts | 1 + .../src/llm/secret-edit-generator.ts | 186 +++++++++++ .../src/llm/toolkit-summary-generator.ts | 25 +- .../src/merger/data-merger.ts | 208 ++++++++++++ .../src/merger/secret-coherence.ts | 314 ++++++++++++++++++ .../tests/llm/secret-edit-generator.test.ts | 99 ++++++ .../tests/merger/data-merger.test.ts | 145 +++++++- .../tests/merger/secret-coherence.test.ts | 264 +++++++++++++++ .../workflows/generate-toolkit-docs.test.ts | 8 + 11 files changed, 1474 insertions(+), 10 deletions(-) create mode 100644 toolkit-docs-generator/src/llm/secret-edit-generator.ts create mode 100644 toolkit-docs-generator/src/merger/secret-coherence.ts create mode 100644 toolkit-docs-generator/tests/llm/secret-edit-generator.test.ts create mode 100644 toolkit-docs-generator/tests/merger/secret-coherence.test.ts diff --git a/.github/workflows/generate-toolkit-docs.yml b/.github/workflows/generate-toolkit-docs.yml index 20a00e134..836660104 100644 --- a/.github/workflows/generate-toolkit-docs.yml +++ b/.github/workflows/generate-toolkit-docs.yml @@ -57,6 +57,9 @@ jobs: --llm-provider openai \ --llm-model "$OPENAI_MODEL" \ --llm-api-key "$OPENAI_API_KEY" \ + --llm-editor-provider anthropic \ + --llm-editor-model "$ANTHROPIC_EDITOR_MODEL" \ + --llm-editor-api-key "$ANTHROPIC_API_KEY" \ --toolkit-concurrency 8 \ --llm-concurrency 15 \ --exclude-file ./excluded-toolkits.txt \ @@ -68,6 +71,11 @@ jobs: ENGINE_API_KEY: ${{ secrets.ENGINE_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} OPENAI_MODEL: ${{ secrets.OPENAI_MODEL || 'gpt-4o-mini' }} + # Stronger model for the secret-coherence editor. Keeps + # stale-secret cleanup precise instead of re-summarizing the whole + # artifact (which gpt-4o-mini tends to do). + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + ANTHROPIC_EDITOR_MODEL: ${{ secrets.ANTHROPIC_EDITOR_MODEL || 'claude-sonnet-4-6' }} - name: Sync toolkit sidebar navigation run: pnpm dlx tsx toolkit-docs-generator/scripts/sync-toolkit-sidebar.ts --remove-empty-sections=false --verbose diff --git a/toolkit-docs-generator/src/cli/index.ts b/toolkit-docs-generator/src/cli/index.ts index b6a1ca5af..06e61c5ae 100644 --- a/toolkit-docs-generator/src/cli/index.ts +++ b/toolkit-docs-generator/src/cli/index.ts @@ -33,6 +33,7 @@ import { createLlmClient, type LlmClient, type LlmProvider, + LlmSecretEditGenerator, LlmToolExampleGenerator, LlmToolkitSummaryGenerator, } from "../llm/index.js"; @@ -363,6 +364,104 @@ const resolveLlmConfig = ( }; }; +interface SecretEditorCliOptions { + llmEditorProvider?: string; + llmEditorModel?: string; + llmEditorApiKey?: string; + llmEditorBaseUrl?: string; + llmEditorTemperature?: number; + llmEditorMaxTokens?: number; + llmEditorMaxRetries?: number; + skipSecretCoherence?: boolean; +} + +const DEFAULT_EDITOR_MAX_TOKENS = 4096; + +const resolveEditorApiKey = ( + provider: LlmProvider, + explicit: string | undefined +): string | undefined => { + if (explicit) return explicit; + if (process.env.LLM_EDITOR_API_KEY) return process.env.LLM_EDITOR_API_KEY; + if (provider === "anthropic") { + return process.env.ANTHROPIC_API_KEY; + } + return process.env.OPENAI_API_KEY; +}; + +/** + * Build an LLM secret-edit generator from CLI options + env. Returns + * undefined when the editor is disabled or unconfigured; callers fall back + * to scanner-only warnings in that case. + */ +const resolveSecretEditGenerator = ( + options: SecretEditorCliOptions, + verbose: boolean +): LlmSecretEditGenerator | undefined => { + if (options.skipSecretCoherence) { + return; + } + + const providerRaw = + options.llmEditorProvider ?? process.env.LLM_EDITOR_PROVIDER; + const model = options.llmEditorModel ?? process.env.LLM_EDITOR_MODEL; + + // Editor stays opt-in: both provider and model must be explicitly set. + if (!(providerRaw && model)) { + return; + } + + const provider = resolveLlmProvider(providerRaw); + const apiKey = resolveEditorApiKey(provider, options.llmEditorApiKey); + if (!apiKey) { + // Fail open: unconfigured editor degrades to scanner-only warnings + // instead of crashing the whole generation run. CI and local scripts + // often point at the same flag set and shouldn't break when the + // editor's API key is simply absent. + if (verbose) { + console.log( + chalk.yellow( + `⚠ Secret-coherence editor skipped: no API key found for provider ${provider}.` + ) + ); + } + return; + } + + const onRetry = verbose + ? (attempt: number, error: Error, delayMs: number) => { + console.log( + chalk.yellow( + ` ⚠️ Secret editor call failed (attempt ${attempt}), retrying in ${delayMs}ms: ${error.message}` + ) + ); + } + : undefined; + + const client = createLlmClient({ + provider, + config: { + apiKey, + ...(options.llmEditorBaseUrl + ? { baseUrl: options.llmEditorBaseUrl } + : {}), + retry: { + maxRetries: options.llmEditorMaxRetries ?? 3, + onRetry, + }, + }, + }); + + return new LlmSecretEditGenerator({ + client, + model, + ...(options.llmEditorTemperature !== undefined + ? { temperature: options.llmEditorTemperature } + : {}), + maxTokens: options.llmEditorMaxTokens ?? DEFAULT_EDITOR_MAX_TOKENS, + }); +}; + const resolveApiSource = (options: { apiSource?: string; toolMetadataUrl?: string; @@ -856,6 +955,39 @@ program "Path to a .txt file with toolkit IDs to skip during generation (one per line)" ) .option("--verbose", "Enable verbose logging", false) + .option( + "--llm-editor-provider ", + "Secret-coherence editor LLM provider (openai|anthropic). Defaults to LLM_EDITOR_PROVIDER env." + ) + .option( + "--llm-editor-model ", + "Secret-coherence editor LLM model (e.g. claude-sonnet-4-6). Defaults to LLM_EDITOR_MODEL env." + ) + .option( + "--llm-editor-api-key ", + "Secret-coherence editor API key. Falls back to LLM_EDITOR_API_KEY or the provider-specific env var." + ) + .option("--llm-editor-base-url ", "Secret-coherence editor LLM base URL") + .option( + "--llm-editor-temperature ", + "Secret-coherence editor temperature", + (value) => Number.parseFloat(value) + ) + .option( + "--llm-editor-max-tokens ", + "Secret-coherence editor max tokens (default: 4096)", + (value) => Number.parseInt(value, 10) + ) + .option( + "--llm-editor-max-retries ", + "Secret-coherence editor max retry attempts (default: 3)", + (value) => Number.parseInt(value, 10) + ) + .option( + "--skip-secret-coherence", + "Disable the secret-coherence scan and edit step", + false + ) .action( async (options: { providers?: string; @@ -897,6 +1029,14 @@ program excludeFile?: string; ignoreFile?: string; verbose: boolean; + llmEditorProvider?: string; + llmEditorModel?: string; + llmEditorApiKey?: string; + llmEditorBaseUrl?: string; + llmEditorTemperature?: number; + llmEditorMaxTokens?: number; + llmEditorMaxRetries?: number; + skipSecretCoherence?: boolean; // biome-ignore lint/complexity/noExcessiveCognitiveComplexity: legacy CLI flow }) => { const spinner = ora("Parsing input...").start(); @@ -1078,6 +1218,27 @@ program } toolkitSummaryGenerator = new LlmToolkitSummaryGenerator(llmConfig); } + + const secretEditGenerator = resolveSecretEditGenerator( + options, + options.verbose + ); + if (options.verbose) { + if (secretEditGenerator) { + console.log( + chalk.dim( + `Secret-coherence editor enabled (model: ${options.llmEditorModel ?? process.env.LLM_EDITOR_MODEL})` + ) + ); + } else if (!options.skipSecretCoherence) { + console.log( + chalk.dim( + "Secret-coherence editor not configured; scanners will still emit warnings." + ) + ); + } + } + const previousOutputDir = options.forceRegenerate ? undefined : (options.previousOutput ?? @@ -1340,6 +1501,7 @@ program customSectionsSource, ...(toolExampleGenerator ? { toolExampleGenerator } : {}), ...(toolkitSummaryGenerator ? { toolkitSummaryGenerator } : {}), + ...(secretEditGenerator ? { secretEditGenerator } : {}), ...(previousToolkits ? { previousToolkits } : {}), ...(options.llmConcurrency ? { llmConcurrency: options.llmConcurrency } @@ -1468,6 +1630,7 @@ program customSectionsSource, ...(toolExampleGenerator ? { toolExampleGenerator } : {}), ...(toolkitSummaryGenerator ? { toolkitSummaryGenerator } : {}), + ...(secretEditGenerator ? { secretEditGenerator } : {}), ...(previousToolkits ? { previousToolkits } : {}), ...(options.llmConcurrency ? { llmConcurrency: options.llmConcurrency } @@ -1812,6 +1975,39 @@ program "Path to a .txt file with toolkit IDs to skip during generation (one per line)" ) .option("--verbose", "Enable verbose logging", false) + .option( + "--llm-editor-provider ", + "Secret-coherence editor LLM provider (openai|anthropic). Defaults to LLM_EDITOR_PROVIDER env." + ) + .option( + "--llm-editor-model ", + "Secret-coherence editor LLM model (e.g. claude-sonnet-4-6). Defaults to LLM_EDITOR_MODEL env." + ) + .option( + "--llm-editor-api-key ", + "Secret-coherence editor API key. Falls back to LLM_EDITOR_API_KEY or the provider-specific env var." + ) + .option("--llm-editor-base-url ", "Secret-coherence editor LLM base URL") + .option( + "--llm-editor-temperature ", + "Secret-coherence editor temperature", + (value) => Number.parseFloat(value) + ) + .option( + "--llm-editor-max-tokens ", + "Secret-coherence editor max tokens (default: 4096)", + (value) => Number.parseInt(value, 10) + ) + .option( + "--llm-editor-max-retries ", + "Secret-coherence editor max retry attempts (default: 3)", + (value) => Number.parseInt(value, 10) + ) + .option( + "--skip-secret-coherence", + "Disable the secret-coherence scan and edit step", + false + ) .action( async (options: { output: string; @@ -1848,6 +2044,14 @@ program ignoreFile?: string; requireComplete: boolean; verbose: boolean; + llmEditorProvider?: string; + llmEditorModel?: string; + llmEditorApiKey?: string; + llmEditorBaseUrl?: string; + llmEditorTemperature?: number; + llmEditorMaxTokens?: number; + llmEditorMaxRetries?: number; + skipSecretCoherence?: boolean; // biome-ignore lint/complexity/noExcessiveCognitiveComplexity: legacy CLI flow }) => { const spinner = ora("Initializing...").start(); @@ -1929,6 +2133,27 @@ program } toolkitSummaryGenerator = new LlmToolkitSummaryGenerator(llmConfig); } + + const secretEditGenerator = resolveSecretEditGenerator( + options, + options.verbose + ); + if (options.verbose) { + if (secretEditGenerator) { + console.log( + chalk.dim( + `Secret-coherence editor enabled (model: ${options.llmEditorModel ?? process.env.LLM_EDITOR_MODEL})` + ) + ); + } else if (!options.skipSecretCoherence) { + console.log( + chalk.dim( + "Secret-coherence editor not configured; scanners will still emit warnings." + ) + ); + } + } + const previousOutputDir = options.forceRegenerate ? undefined : (options.previousOutput ?? @@ -2127,6 +2352,7 @@ program customSectionsSource, ...(toolExampleGenerator ? { toolExampleGenerator } : {}), ...(toolkitSummaryGenerator ? { toolkitSummaryGenerator } : {}), + ...(secretEditGenerator ? { secretEditGenerator } : {}), ...(previousToolkits ? { previousToolkits } : {}), ...(options.llmConcurrency ? { llmConcurrency: options.llmConcurrency } diff --git a/toolkit-docs-generator/src/llm/index.ts b/toolkit-docs-generator/src/llm/index.ts index 162d4b879..ef58af37d 100644 --- a/toolkit-docs-generator/src/llm/index.ts +++ b/toolkit-docs-generator/src/llm/index.ts @@ -1,3 +1,4 @@ export * from "./client.js"; +export * from "./secret-edit-generator.js"; export * from "./tool-example-generator.js"; export * from "./toolkit-summary-generator.js"; diff --git a/toolkit-docs-generator/src/llm/secret-edit-generator.ts b/toolkit-docs-generator/src/llm/secret-edit-generator.ts new file mode 100644 index 000000000..0d5795059 --- /dev/null +++ b/toolkit-docs-generator/src/llm/secret-edit-generator.ts @@ -0,0 +1,186 @@ +/** + * LLM editor for secret-coherence fixes. + * + * Unlike toolkit-summary-generator (which rewrites a summary from scratch), + * this editor is asked to preserve the source text as-is and only change the + * passages that mention a removed secret — or, for coverage-gap fixes, to + * minimally weave in missing secret information without re-styling the rest. + * + * Keeping edits local prevents the "oversimplification on rerun" behavior + * users observed when the regenerator reprocessed richer hand-refined text. + */ +import { + ARCADE_SECRETS_DASHBOARD_URL, + ARCADE_SECRETS_DOC_URL, +} from "../merger/secret-coherence.js"; +import type { LlmClient } from "./client.js"; + +export interface SecretEditGeneratorConfig { + readonly client: LlmClient; + readonly model: string; + readonly temperature?: number; + readonly maxTokens?: number; + readonly systemPrompt?: string; +} + +export interface SecretCleanupEditInput { + readonly kind: "summary" | "documentation_chunk"; + readonly content: string; + readonly removedSecrets: readonly string[]; + readonly currentSecrets: readonly string[]; + readonly toolkitLabel: string; +} + +export interface SecretCoverageEditInput { + readonly content: string; + readonly missingSecretNames: readonly string[]; + readonly currentSecrets: readonly string[]; + readonly toolkitLabel: string; + readonly requireConfigLink: boolean; +} + +export interface ISecretEditGenerator { + /** + * Edit the provided content to remove all references to `removedSecrets` + * while preserving every other sentence, bullet, table row, heading, and + * example unchanged. Returns the edited content. + */ + cleanupStaleReferences: (input: SecretCleanupEditInput) => Promise; + + /** + * Edit the provided summary to add any missing secret mentions (one + * short, factual line per missing secret) and, if required, a link to + * the Arcade config doc. Must not alter existing content, ordering, or + * voice; new lines append to or minimally extend the **Secrets** section. + */ + fillCoverageGaps: (input: SecretCoverageEditInput) => Promise; +} + +const DEFAULT_SYSTEM_PROMPT = + "You are a careful documentation editor for the Arcade MCP toolkit docs. " + + "You make the smallest possible change that satisfies the request. " + + "Never re-summarize, shorten unrelated content, rewrite headings, or " + + "reorder existing sections. Preserve markdown syntax, backticks, tables, " + + "and code exactly."; + +const FENCE_PATTERN = /```(?:markdown|md|text)?\s*([\s\S]*?)```/; + +const stripOptionalFence = (text: string): string => { + const match = text.match(FENCE_PATTERN); + if (match?.[1]) { + return match[1].trim(); + } + return text.trim(); +}; + +const formatList = (values: readonly string[]): string => + values.length > 0 ? values.join(", ") : "None"; + +const buildCleanupPrompt = (input: SecretCleanupEditInput): string => { + const removedList = formatList(input.removedSecrets); + const currentList = formatList(input.currentSecrets); + const artifact = + input.kind === "summary" + ? "toolkit summary (markdown prose, roughly one screen)" + : "toolkit documentation chunk (markdown, may contain callouts, tables, and code blocks)"; + + return [ + `You are editing an Arcade MCP toolkit ${artifact} for ${input.toolkitLabel}.`, + "", + `Secrets that were REMOVED from this toolkit and must no longer appear: ${removedList}.`, + `Secrets that are STILL present and must be preserved: ${currentList}.`, + "", + "Rules:", + "- Delete any sentence, bullet, table row, or note whose ONLY topic is a removed secret.", + "- If a sentence or bullet mentions a removed secret alongside other content, rewrite that single sentence as minimally as possible to drop the removed-secret reference; do not paraphrase unrelated parts.", + "- Do not add new information. Do not rewrite unrelated content. Do not change headings, ordering, tone, or code blocks.", + "- If a whole section (for example an `## Authentication` or a setup table) becomes redundant because its only content referred to the removed secrets, remove that section cleanly (including its heading).", + "- Keep the result valid markdown. Preserve surrounding blank lines.", + "", + "Return ONLY the edited content, with no commentary, no explanation, and no code fences around the whole document.", + "", + "Content:", + "<<<", + input.content, + ">>>", + ].join("\n"); +}; + +const buildCoveragePrompt = (input: SecretCoverageEditInput): string => { + const missingList = formatList(input.missingSecretNames); + const currentList = formatList(input.currentSecrets); + const linkInstruction = input.requireConfigLink + ? `- The **Secrets** section must include a link to the Arcade config docs. Use this exact URL: ${ARCADE_SECRETS_DOC_URL}. If a short mention of the Arcade Dashboard secrets page is useful, ${ARCADE_SECRETS_DASHBOARD_URL} is acceptable as an additional reference.` + : "- Do not add any new links."; + + return [ + `You are editing an Arcade MCP toolkit summary for ${input.toolkitLabel}.`, + "", + `Secrets currently required by the toolkit: ${currentList}.`, + `Secrets missing from the summary that MUST be added: ${missingList}.`, + "", + "Rules:", + "- Ensure every current secret is mentioned by its exact name (inside backticks).", + "- Each missing secret should get at most one short, factual line describing what it is and, when possible, how a developer obtains it (e.g. provider Dashboard, region URL, API key). If you do not know, keep it to the name and a one-line purpose — do not invent provisioning steps.", + "- Prefer appending to or lightly extending an existing `**Secrets**` section. Only create a `**Secrets**` section if none exists.", + "- Do not rewrite unrelated content. Do not change headings, ordering, tone, or other sections.", + linkInstruction, + "- Keep the output compact: no filler, no marketing copy. The summary should remain easy to scan.", + "", + "Return ONLY the edited summary, with no commentary, no explanation, and no code fences around the whole document.", + "", + "Summary:", + "<<<", + input.content, + ">>>", + ].join("\n"); +}; + +export class LlmSecretEditGenerator implements ISecretEditGenerator { + private readonly client: LlmClient; + private readonly model: string; + private readonly temperature: number | undefined; + private readonly maxTokens: number | undefined; + private readonly systemPrompt: string; + + constructor(config: SecretEditGeneratorConfig) { + this.client = config.client; + this.model = config.model; + this.temperature = config.temperature; + this.maxTokens = config.maxTokens; + this.systemPrompt = config.systemPrompt ?? DEFAULT_SYSTEM_PROMPT; + } + + private async generate(prompt: string): Promise { + const response = await this.client.generateText({ + model: this.model, + prompt, + system: this.systemPrompt, + ...(this.temperature !== undefined + ? { temperature: this.temperature } + : {}), + ...(this.maxTokens !== undefined ? { maxTokens: this.maxTokens } : {}), + }); + const trimmed = stripOptionalFence(response); + if (trimmed.length === 0) { + throw new Error("Secret edit LLM response was empty"); + } + return trimmed; + } + + cleanupStaleReferences(input: SecretCleanupEditInput): Promise { + if (input.removedSecrets.length === 0) { + return Promise.resolve(input.content); + } + return this.generate(buildCleanupPrompt(input)); + } + + fillCoverageGaps(input: SecretCoverageEditInput): Promise { + const hasMissing = input.missingSecretNames.length > 0; + const needsLink = input.requireConfigLink; + if (!(hasMissing || needsLink)) { + return Promise.resolve(input.content); + } + return this.generate(buildCoveragePrompt(input)); + } +} diff --git a/toolkit-docs-generator/src/llm/toolkit-summary-generator.ts b/toolkit-docs-generator/src/llm/toolkit-summary-generator.ts index d2739a137..3b72e3595 100644 --- a/toolkit-docs-generator/src/llm/toolkit-summary-generator.ts +++ b/toolkit-docs-generator/src/llm/toolkit-summary-generator.ts @@ -1,4 +1,8 @@ import type { ToolkitSummaryGenerator } from "../merger/data-merger.js"; +import { + ARCADE_SECRETS_DASHBOARD_URL, + ARCADE_SECRETS_DOC_URL, +} from "../merger/secret-coherence.js"; import type { MergedTool, MergedToolkit, SecretType } from "../types/index.js"; import type { LlmClient } from "./client.js"; @@ -61,26 +65,29 @@ const collectSecrets = (tools: MergedTool[]) => { const buildPrompt = (toolkit: MergedToolkit): string => { const secrets = collectSecrets(toolkit.tools); + const hasSecrets = secrets.names.length > 0; return [ - "Write a concise summary for Arcade toolkit docs.", + "Write a summary for Arcade toolkit docs.", 'Return JSON: {"summary": ""}', "", + "Goals: compact but complete. No fixed word limit — use as many words as needed to cover every current capability and every current secret, and no more. Prefer scannable structure over prose padding.", + "", "Requirements:", - "- 60 to 140 words.", "- Start with 1 to 2 sentences that explain the provider and what the toolkit enables.", - "- Add a **Capabilities** section with 3 to 5 bullet points.", - "- Do not list tools one by one. Summarize shared capabilities.", - "- If auth type is oauth2 or mixed, add an **OAuth** section with provider and scopes.", - "- If auth type is api_key or mixed, mention API key usage in **OAuth**.", - "- If any secrets exist, add a **Secrets** section describing secret types and examples.", - "- Use Markdown. Keep it concise and developer-focused.", + "- Add a **Capabilities** section with 3 to 6 bullets summarizing shared capabilities (group tools by theme; do not list tools one by one).", + "- If auth type is oauth2 or mixed, add an **OAuth** section with provider and representative scopes.", + "- If auth type is api_key or mixed, mention API key usage under **OAuth** or a dedicated heading.", + `- If any secrets exist, add a **Secrets** section. List every secret by its exact name in backticks. Give each secret one short factual line covering what it is and how a developer obtains it from the provider; if you do not know, stay with a one-line purpose rather than inventing steps. End the section with the Arcade config docs link: ${ARCADE_SECRETS_DOC_URL} (and optionally mention ${ARCADE_SECRETS_DASHBOARD_URL}).`, + "- Use Markdown. Developer-focused. Say 'Arcade' (never 'Arcade AI').", + "- Do not add marketing copy, repetition, or filler.", "", `Toolkit: ${toolkit.label} (${toolkit.id})`, `Description: ${toolkit.description ?? "No description"}`, `Auth: ${formatAuth(toolkit)}`, + `Secrets required: ${hasSecrets ? "Yes" : "None"}`, `Secret types: ${secrets.types.length > 0 ? secrets.types.join(", ") : "None"}`, - `Secret names: ${secrets.names.length > 0 ? secrets.names.join(", ") : "None"}`, + `Secret names: ${hasSecrets ? secrets.names.join(", ") : "None"}`, `Tools (${toolkit.tools.length}):`, formatToolLines(toolkit.tools), ].join("\n"); diff --git a/toolkit-docs-generator/src/merger/data-merger.ts b/toolkit-docs-generator/src/merger/data-merger.ts index e373d51a7..738558859 100644 --- a/toolkit-docs-generator/src/merger/data-merger.ts +++ b/toolkit-docs-generator/src/merger/data-merger.ts @@ -5,6 +5,7 @@ * into the final MergedToolkit format. */ +import type { ISecretEditGenerator } from "../llm/secret-edit-generator.js"; import type { ICustomSectionsSource } from "../sources/interfaces.js"; import type { IToolkitDataSource, @@ -28,6 +29,13 @@ import { detectMetadataChanges, formatFreshnessWarnings, } from "./metadata-freshness.js"; +import { + detectSecretCoherenceIssues, + groupStaleRefsByTarget, + hasCoherenceIssues, + type SecretCoherenceIssues, + type StaleSecretEditTarget, +} from "./secret-coherence.js"; // ============================================================================ // Merger Configuration @@ -38,6 +46,12 @@ export interface DataMergerConfig { customSectionsSource: ICustomSectionsSource; toolExampleGenerator?: ToolExampleGenerator; toolkitSummaryGenerator?: ToolkitSummaryGenerator; + /** + * Optional editor used to repair stale secret references and fill + * coverage gaps in summary / documentation chunks. When omitted the + * scanners still run and emit warnings, but no content is rewritten. + */ + secretEditGenerator?: ISecretEditGenerator; previousToolkits?: ReadonlyMap; /** Maximum concurrent LLM calls for tool examples (default: 5) */ llmConcurrency?: number; @@ -467,6 +481,79 @@ const getToolDocumentationChunks = ( return fromPrevious; }; +const collectCurrentSecretNames = (toolkit: MergedToolkit): Set => { + const names = new Set(); + for (const tool of toolkit.tools) { + for (const name of tool.secrets) { + names.add(name); + } + for (const info of tool.secretsInfo ?? []) { + names.add(info.name); + } + } + return names; +}; + +const describeLocation = ( + location: + | { kind: "summary" } + | { kind: "toolkit_chunk"; chunkIndex: number } + | { + kind: "tool_chunk"; + toolQualifiedName: string; + chunkIndex: number; + } +): string => { + switch (location.kind) { + case "summary": + return "summary"; + case "toolkit_chunk": + return `toolkit documentation chunk #${location.chunkIndex}`; + case "tool_chunk": + return `tool chunk #${location.chunkIndex} of ${location.toolQualifiedName}`; + default: + return "unknown location"; + } +}; + +const applyEditedContent = ( + toolkit: MergedToolkit, + target: StaleSecretEditTarget, + edited: string +): void => { + switch (target.kind) { + case "summary": + toolkit.summary = edited; + return; + case "toolkit_chunk": { + const chunk = toolkit.documentationChunks[target.chunkIndex]; + if (chunk) { + toolkit.documentationChunks[target.chunkIndex] = { + ...chunk, + content: edited, + }; + } + return; + } + case "tool_chunk": { + const tool = toolkit.tools.find( + (candidate) => candidate.qualifiedName === target.toolQualifiedName + ); + if (!tool) return; + const chunk = tool.documentationChunks[target.chunkIndex]; + if (chunk) { + tool.documentationChunks[target.chunkIndex] = { + ...chunk, + content: edited, + }; + } + return; + } + default: + return; + } +}; + const isOverviewChunk = (chunk: DocumentationChunk): boolean => chunk.location === "header" && chunk.position === "before" && @@ -825,6 +912,7 @@ export class DataMerger { private readonly customSectionsSource: ICustomSectionsSource; private readonly toolExampleGenerator: ToolExampleGenerator | undefined; private readonly toolkitSummaryGenerator: ToolkitSummaryGenerator | undefined; + private readonly secretEditGenerator: ISecretEditGenerator | undefined; private readonly previousToolkits: | ReadonlyMap | undefined; @@ -851,6 +939,7 @@ export class DataMerger { this.customSectionsSource = config.customSectionsSource; this.toolExampleGenerator = config.toolExampleGenerator; this.toolkitSummaryGenerator = config.toolkitSummaryGenerator; + this.secretEditGenerator = config.secretEditGenerator; this.previousToolkits = config.previousToolkits; this.llmConcurrency = config.llmConcurrency ?? 10; this.toolkitConcurrency = config.toolkitConcurrency ?? 5; @@ -929,6 +1018,7 @@ export class DataMerger { } ); await this.maybeGenerateSummary(result, previousToolkit); + await this.enforceSecretCoherence(result, previousToolkit); // Write immediately if callback provided (incremental mode) if (this.onToolkitComplete) { @@ -983,6 +1073,123 @@ export class DataMerger { } } + private async enforceSecretCoherence( + result: MergeResult, + previousToolkit?: MergedToolkit + ): Promise { + const issues = detectSecretCoherenceIssues(result.toolkit, previousToolkit); + if (!hasCoherenceIssues(issues)) { + return; + } + + this.appendCoherenceWarnings(result, issues); + + if (!this.secretEditGenerator) { + return; + } + + await this.applyStaleRefCleanup(result, issues); + await this.applyCoverageFill(result, issues); + } + + private appendCoherenceWarnings( + result: MergeResult, + issues: SecretCoherenceIssues + ): void { + for (const stale of issues.staleReferences) { + const where = describeLocation(stale.location); + result.warnings.push( + `Stale secret reference in ${where}: ${stale.removedSecret} (removed from toolkit ${result.toolkit.id})` + ); + } + for (const gap of issues.coverageGaps) { + if (gap.kind === "missing_secret_in_summary") { + result.warnings.push( + `Summary does not mention current secret: ${gap.secretName} (toolkit ${result.toolkit.id})` + ); + } else { + result.warnings.push( + `Summary is missing a link to the Arcade secret config docs (toolkit ${result.toolkit.id})` + ); + } + } + } + + private async applyStaleRefCleanup( + result: MergeResult, + issues: SecretCoherenceIssues + ): Promise { + const editor = this.secretEditGenerator; + if (!editor) { + return; + } + const targets = groupStaleRefsByTarget(issues.staleReferences); + if (targets.length === 0) { + return; + } + const currentSecrets = Array.from(collectCurrentSecretNames(result.toolkit)) + .sort() + .map((name) => name); + for (const target of targets) { + try { + const edited = await editor.cleanupStaleReferences({ + kind: target.kind === "summary" ? "summary" : "documentation_chunk", + content: target.content, + removedSecrets: target.removedSecrets, + currentSecrets, + toolkitLabel: result.toolkit.label, + }); + applyEditedContent(result.toolkit, target, edited); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + result.warnings.push( + `Secret cleanup edit failed for ${result.toolkit.id} (${target.kind}): ${message}` + ); + } + } + } + + private async applyCoverageFill( + result: MergeResult, + issues: SecretCoherenceIssues + ): Promise { + const editor = this.secretEditGenerator; + if (!editor) { + return; + } + const summary = result.toolkit.summary; + if (!summary) { + return; + } + const missing = issues.coverageGaps + .filter((gap) => gap.kind === "missing_secret_in_summary") + .map((gap) => gap.secretName as string); + const needsLink = issues.coverageGaps.some( + (gap) => gap.kind === "missing_secret_config_link" + ); + if (missing.length === 0 && !needsLink) { + return; + } + const currentSecrets = Array.from(collectCurrentSecretNames(result.toolkit)) + .sort() + .map((name) => name); + try { + const edited = await editor.fillCoverageGaps({ + content: summary, + missingSecretNames: missing, + currentSecrets, + toolkitLabel: result.toolkit.label, + requireConfigLink: needsLink, + }); + result.toolkit.summary = edited; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + result.warnings.push( + `Secret coverage edit failed for ${result.toolkit.id}: ${message}` + ); + } + } + /** * Merge data for a single toolkit */ @@ -1015,6 +1222,7 @@ export class DataMerger { } ); await this.maybeGenerateSummary(result, previousToolkit); + await this.enforceSecretCoherence(result, previousToolkit); return result; } diff --git a/toolkit-docs-generator/src/merger/secret-coherence.ts b/toolkit-docs-generator/src/merger/secret-coherence.ts new file mode 100644 index 000000000..2289511f1 --- /dev/null +++ b/toolkit-docs-generator/src/merger/secret-coherence.ts @@ -0,0 +1,314 @@ +/** + * Secret coherence scanners + * + * Deterministic checks that keep the rendered toolkit docs consistent with + * the toolkit's current secret set. Two classes of problem: + * + * - Stale reference: a secret name appears in summary or documentation + * chunks but is no longer declared by any tool in the toolkit (typically + * because the tool that required it was removed upstream). + * - Coverage gap: a current secret is not documented in the summary, or + * secrets are mentioned without a link to the Arcade config docs. + * + * These scanners return structured issues. Remediation (LLM-driven edits or + * warnings) is performed by callers in the merger pipeline. + */ +import type { DocumentationChunk, MergedToolkit } from "../types/index.js"; + +export const ARCADE_SECRETS_DOC_URL = + "https://docs.arcade.dev/en/guides/create-tools/tool-basics/create-tool-secrets"; +export const ARCADE_SECRETS_DASHBOARD_URL = + "https://api.arcade.dev/dashboard/auth/secrets"; + +const SECRET_REFERENCE_URLS: readonly string[] = [ + ARCADE_SECRETS_DOC_URL, + ARCADE_SECRETS_DASHBOARD_URL, + // Tolerate language-path variants or bare domains that still link somewhere + // useful on docs.arcade.dev/dashboard. + "docs.arcade.dev/en/guides/create-tools/tool-basics/create-tool-secrets", + "arcade.dev/dashboard/auth/secrets", +]; + +export type StaleSecretLocation = + | { kind: "summary" } + | { kind: "toolkit_chunk"; chunkIndex: number } + | { kind: "tool_chunk"; toolQualifiedName: string; chunkIndex: number }; + +export type StaleSecretReference = { + removedSecret: string; + location: StaleSecretLocation; + content: string; +}; + +export type SecretCoverageGap = + | { kind: "missing_secret_in_summary"; secretName: string } + | { kind: "missing_secret_config_link" }; + +export type SecretCoherenceIssues = { + staleReferences: StaleSecretReference[]; + coverageGaps: SecretCoverageGap[]; +}; + +const collectToolkitSecrets = (toolkit: MergedToolkit): Set => { + const names = new Set(); + for (const tool of toolkit.tools) { + for (const secret of tool.secrets) { + names.add(secret); + } + for (const info of tool.secretsInfo ?? []) { + names.add(info.name); + } + } + return names; +}; + +const toolkitChunks = (toolkit: MergedToolkit): readonly DocumentationChunk[] => + toolkit.documentationChunks ?? []; + +/** + * Exact-substring test. Secret names are ALLCAPS_WITH_UNDER and distinctive + * enough that we don't need word-boundary regex; a plain substring match + * avoids false negatives around punctuation, backticks, and table pipes. + */ +const contentMentionsSecret = (content: string, secret: string): boolean => + content.includes(secret); + +const findSummaryStaleRefs = ( + toolkit: MergedToolkit, + removedSecrets: readonly string[] +): StaleSecretReference[] => { + const summary = toolkit.summary; + if (!summary) { + return []; + } + + const refs: StaleSecretReference[] = []; + for (const removedSecret of removedSecrets) { + if (contentMentionsSecret(summary, removedSecret)) { + refs.push({ + removedSecret, + location: { kind: "summary" }, + content: summary, + }); + } + } + return refs; +}; + +const findToolkitChunkStaleRefs = ( + toolkit: MergedToolkit, + removedSecrets: readonly string[] +): StaleSecretReference[] => { + const refs: StaleSecretReference[] = []; + const chunks = toolkitChunks(toolkit); + for (let chunkIndex = 0; chunkIndex < chunks.length; chunkIndex += 1) { + const chunk = chunks[chunkIndex]; + if (!chunk) continue; + for (const removedSecret of removedSecrets) { + if (contentMentionsSecret(chunk.content, removedSecret)) { + refs.push({ + removedSecret, + location: { kind: "toolkit_chunk", chunkIndex }, + content: chunk.content, + }); + } + } + } + return refs; +}; + +const findToolChunkStaleRefs = ( + toolkit: MergedToolkit, + removedSecrets: readonly string[] +): StaleSecretReference[] => { + const refs: StaleSecretReference[] = []; + for (const tool of toolkit.tools) { + const chunks = tool.documentationChunks ?? []; + for (let chunkIndex = 0; chunkIndex < chunks.length; chunkIndex += 1) { + const chunk = chunks[chunkIndex]; + if (!chunk) continue; + for (const removedSecret of removedSecrets) { + if (contentMentionsSecret(chunk.content, removedSecret)) { + refs.push({ + removedSecret, + location: { + kind: "tool_chunk", + toolQualifiedName: tool.qualifiedName, + chunkIndex, + }, + content: chunk.content, + }); + } + } + } + } + return refs; +}; + +/** + * Compare current toolkit secrets against the previous snapshot to identify + * secrets that were removed, then scan summary and documentation chunks for + * any lingering references. Returns at most one entry per + * (location, removedSecret) pair so callers can drive an LLM edit for each. + */ +export const detectStaleSecretReferences = ( + toolkit: MergedToolkit, + previousToolkit?: MergedToolkit +): StaleSecretReference[] => { + if (!previousToolkit) { + return []; + } + + const currentSecrets = collectToolkitSecrets(toolkit); + const previousSecrets = collectToolkitSecrets(previousToolkit); + const removedSecrets: string[] = []; + for (const name of previousSecrets) { + if (!currentSecrets.has(name)) { + removedSecrets.push(name); + } + } + + if (removedSecrets.length === 0) { + return []; + } + + return [ + ...findSummaryStaleRefs(toolkit, removedSecrets), + ...findToolkitChunkStaleRefs(toolkit, removedSecrets), + ...findToolChunkStaleRefs(toolkit, removedSecrets), + ]; +}; + +const summaryHasConfigLink = (summary: string): boolean => + SECRET_REFERENCE_URLS.some((url) => summary.includes(url)); + +/** + * Gaps in the summary's coverage of the toolkit's current secrets: + * - Any current secret name that is not mentioned by exact substring. + * - If at least one secret exists, a missing link to the Arcade config docs. + * + * Only runs when a summary is present; toolkits without a summary are a + * separate concern handled by the summary generator itself. + */ +export const detectSecretCoverageGaps = ( + toolkit: MergedToolkit +): SecretCoverageGap[] => { + const summary = toolkit.summary; + if (!summary) { + return []; + } + + const currentSecrets = collectToolkitSecrets(toolkit); + if (currentSecrets.size === 0) { + return []; + } + + const gaps: SecretCoverageGap[] = []; + for (const secretName of currentSecrets) { + if (!summary.includes(secretName)) { + gaps.push({ kind: "missing_secret_in_summary", secretName }); + } + } + if (!summaryHasConfigLink(summary)) { + gaps.push({ kind: "missing_secret_config_link" }); + } + return gaps; +}; + +export const detectSecretCoherenceIssues = ( + toolkit: MergedToolkit, + previousToolkit?: MergedToolkit +): SecretCoherenceIssues => ({ + staleReferences: detectStaleSecretReferences(toolkit, previousToolkit), + coverageGaps: detectSecretCoverageGaps(toolkit), +}); + +export const hasCoherenceIssues = (issues: SecretCoherenceIssues): boolean => + issues.staleReferences.length > 0 || issues.coverageGaps.length > 0; + +/** + * Group stale references by the artifact they live in so an LLM editor can + * be asked to edit each artifact exactly once, even when multiple removed + * secrets appear in the same chunk or summary. + */ +export type StaleSecretEditTarget = + | { kind: "summary"; removedSecrets: string[]; content: string } + | { + kind: "toolkit_chunk"; + chunkIndex: number; + removedSecrets: string[]; + content: string; + } + | { + kind: "tool_chunk"; + toolQualifiedName: string; + chunkIndex: number; + removedSecrets: string[]; + content: string; + }; + +const locationKey = (ref: StaleSecretReference): string => { + const location = ref.location; + switch (location.kind) { + case "summary": + return "summary"; + case "toolkit_chunk": + return `toolkit_chunk:${location.chunkIndex}`; + case "tool_chunk": + return `tool_chunk:${location.toolQualifiedName}:${location.chunkIndex}`; + default: + return ""; + } +}; + +export const groupStaleRefsByTarget = ( + refs: readonly StaleSecretReference[] +): StaleSecretEditTarget[] => { + const byKey = new Map< + string, + { ref: StaleSecretReference; removedSecrets: Set } + >(); + + for (const ref of refs) { + const key = locationKey(ref); + const existing = byKey.get(key); + if (existing) { + existing.removedSecrets.add(ref.removedSecret); + } else { + byKey.set(key, { + ref, + removedSecrets: new Set([ref.removedSecret]), + }); + } + } + + return Array.from(byKey.values()).map(({ ref, removedSecrets }) => { + const location = ref.location; + const sortedRemovedSecrets = Array.from(removedSecrets).sort(); + switch (location.kind) { + case "summary": + return { + kind: "summary", + removedSecrets: sortedRemovedSecrets, + content: ref.content, + }; + case "toolkit_chunk": + return { + kind: "toolkit_chunk", + chunkIndex: location.chunkIndex, + removedSecrets: sortedRemovedSecrets, + content: ref.content, + }; + case "tool_chunk": + return { + kind: "tool_chunk", + toolQualifiedName: location.toolQualifiedName, + chunkIndex: location.chunkIndex, + removedSecrets: sortedRemovedSecrets, + content: ref.content, + }; + default: + throw new Error("Unknown stale secret location kind"); + } + }); +}; diff --git a/toolkit-docs-generator/tests/llm/secret-edit-generator.test.ts b/toolkit-docs-generator/tests/llm/secret-edit-generator.test.ts new file mode 100644 index 000000000..2b66a57a3 --- /dev/null +++ b/toolkit-docs-generator/tests/llm/secret-edit-generator.test.ts @@ -0,0 +1,99 @@ +import { describe, expect, it, vi } from "vitest"; +import type { LlmClient } from "../../src/llm/client.js"; +import { LlmSecretEditGenerator } from "../../src/llm/secret-edit-generator.js"; + +const fakeClient = (response: string): LlmClient => ({ + provider: "anthropic", + generateText: vi.fn(async () => response), +}); + +describe("LlmSecretEditGenerator.cleanupStaleReferences", () => { + it("skips the LLM call when no secrets were removed", async () => { + const client = fakeClient("unused"); + const editor = new LlmSecretEditGenerator({ client, model: "test" }); + const out = await editor.cleanupStaleReferences({ + kind: "summary", + content: "original", + removedSecrets: [], + currentSecrets: ["KEEP"], + toolkitLabel: "GitHub", + }); + expect(out).toBe("original"); + expect(client.generateText).not.toHaveBeenCalled(); + }); + + it("strips an optional code fence from the response", async () => { + const client = fakeClient("```markdown\nedited content\n```"); + const editor = new LlmSecretEditGenerator({ client, model: "test" }); + const out = await editor.cleanupStaleReferences({ + kind: "summary", + content: "mentions OLD", + removedSecrets: ["OLD"], + currentSecrets: ["KEEP"], + toolkitLabel: "GitHub", + }); + expect(out).toBe("edited content"); + }); + + it("passes the removed and current secrets into the prompt", async () => { + const client = fakeClient("ok"); + const editor = new LlmSecretEditGenerator({ client, model: "test" }); + await editor.cleanupStaleReferences({ + kind: "documentation_chunk", + content: "Mentions OLD_TOKEN in a row.", + removedSecrets: ["OLD_TOKEN"], + currentSecrets: ["KEEP_URL"], + toolkitLabel: "GitHub", + }); + expect(client.generateText).toHaveBeenCalledTimes(1); + const call = (client.generateText as ReturnType).mock + .calls[0]?.[0] as { prompt: string }; + expect(call.prompt).toContain("OLD_TOKEN"); + expect(call.prompt).toContain("KEEP_URL"); + expect(call.prompt).toContain("GitHub"); + }); + + it("throws when the LLM response is empty", async () => { + const client = fakeClient(""); + const editor = new LlmSecretEditGenerator({ client, model: "test" }); + await expect( + editor.cleanupStaleReferences({ + kind: "summary", + content: "mentions OLD", + removedSecrets: ["OLD"], + currentSecrets: [], + toolkitLabel: "GitHub", + }) + ).rejects.toThrow(/empty/i); + }); +}); + +describe("LlmSecretEditGenerator.fillCoverageGaps", () => { + it("skips the LLM call when nothing is missing and no link is required", async () => { + const client = fakeClient("unused"); + const editor = new LlmSecretEditGenerator({ client, model: "test" }); + const out = await editor.fillCoverageGaps({ + content: "original", + missingSecretNames: [], + currentSecrets: ["KEEP"], + toolkitLabel: "GitHub", + requireConfigLink: false, + }); + expect(out).toBe("original"); + expect(client.generateText).not.toHaveBeenCalled(); + }); + + it("calls the LLM when a config link is required", async () => { + const client = fakeClient("edited"); + const editor = new LlmSecretEditGenerator({ client, model: "test" }); + const out = await editor.fillCoverageGaps({ + content: "Summary without a link.", + missingSecretNames: [], + currentSecrets: ["KEEP"], + toolkitLabel: "GitHub", + requireConfigLink: true, + }); + expect(out).toBe("edited"); + expect(client.generateText).toHaveBeenCalledTimes(1); + }); +}); diff --git a/toolkit-docs-generator/tests/merger/data-merger.test.ts b/toolkit-docs-generator/tests/merger/data-merger.test.ts index bb4991b3f..a5068acd0 100644 --- a/toolkit-docs-generator/tests/merger/data-merger.test.ts +++ b/toolkit-docs-generator/tests/merger/data-merger.test.ts @@ -4,7 +4,8 @@ * These tests use in-memory implementations (NOT mocks) to verify * the merge logic works correctly. */ -import { describe, expect, it } from "vitest"; +import { describe, expect, it, vi } from "vitest"; +import type { ISecretEditGenerator } from "../../src/llm/secret-edit-generator.js"; import { computeAllScopes, DataMerger, @@ -1185,6 +1186,148 @@ describe("DataMerger", () => { expect(result.toolkit.summary).toBeUndefined(); }); + it("runs the secret-coherence editor when a removed secret still appears in a toolkit documentation chunk", async () => { + const toolWithSecret = createTool({ + name: "CreateIssue", + qualifiedName: "Github.CreateIssue", + fullyQualifiedName: "Github.CreateIssue@1.0.0", + auth: { providerId: "github", providerType: "oauth2", scopes: [] }, + secrets: ["GITHUB_SERVER_URL"], + }); + const toolkitDataSource = createCombinedToolkitDataSource({ + toolSource: new InMemoryToolDataSource([toolWithSecret]), + metadataSource: new InMemoryMetadataSource([githubMetadata]), + }); + + const previousWithOldSecret = await mergeToolkit( + "Github", + [ + createTool({ + ...toolWithSecret, + secrets: [ + "GITHUB_SERVER_URL", + "GITHUB_CLASSIC_PERSONAL_ACCESS_TOKEN", + ], + }), + ], + githubMetadata, + null, + createStubGenerator() + ); + // Put the stale reference inside a toolkit-level doc chunk — chunks + // persist verbatim across runs, unlike the summary which gets + // regenerated when the signature changes. + previousWithOldSecret.toolkit.documentationChunks = [ + { + type: "section", + location: "before_available_tools", + position: "after", + content: + "| Secret | Required For |\n| `GITHUB_CLASSIC_PERSONAL_ACCESS_TOKEN` | Notifications |", + }, + ]; + + const cleanupSpy = vi.fn( + async () => + "| Secret | Required For |\n| `GITHUB_SERVER_URL` | All tools |" + ); + const coverageSpy = vi.fn( + async (input: { content: string }) => + `${input.content}\n\n[config link]` + ); + const secretEditGenerator: ISecretEditGenerator = { + cleanupStaleReferences: cleanupSpy, + fillCoverageGaps: coverageSpy, + }; + + const merger = new DataMerger({ + toolkitDataSource, + customSectionsSource: new EmptyCustomSectionsSource(), + toolExampleGenerator: createStubGenerator(), + toolkitSummaryGenerator: createStubSummaryGenerator("Stub summary"), + secretEditGenerator, + previousToolkits: new Map([["github", previousWithOldSecret.toolkit]]), + }); + + const result = await merger.mergeToolkit("Github"); + + expect(cleanupSpy).toHaveBeenCalledTimes(1); + const cleanupCall = cleanupSpy.mock.calls[0]?.[0] as { + removedSecrets: string[]; + kind: string; + }; + expect(cleanupCall.removedSecrets).toEqual([ + "GITHUB_CLASSIC_PERSONAL_ACCESS_TOKEN", + ]); + expect(cleanupCall.kind).toBe("documentation_chunk"); + // The chunk content in the result reflects the editor output. + expect( + result.toolkit.documentationChunks[0]?.content.includes( + "GITHUB_CLASSIC_PERSONAL_ACCESS_TOKEN" + ) + ).toBe(false); + expect(result.toolkit.documentationChunks[0]?.content).toContain( + "GITHUB_SERVER_URL" + ); + expect( + result.warnings.some((warning) => + warning.includes("Stale secret reference") + ) + ).toBe(true); + }); + + it("emits warnings but no LLM calls when no editor is configured", async () => { + const toolWithSecret = createTool({ + name: "CreateIssue", + qualifiedName: "Github.CreateIssue", + fullyQualifiedName: "Github.CreateIssue@1.0.0", + auth: { providerId: "github", providerType: "oauth2", scopes: [] }, + secrets: ["GITHUB_SERVER_URL"], + }); + const toolkitDataSource = createCombinedToolkitDataSource({ + toolSource: new InMemoryToolDataSource([toolWithSecret]), + metadataSource: new InMemoryMetadataSource([githubMetadata]), + }); + const previousWithOldSecret = await mergeToolkit( + "Github", + [ + createTool({ + ...toolWithSecret, + secrets: ["GITHUB_SERVER_URL", "OLD_SECRET"], + }), + ], + githubMetadata, + null, + createStubGenerator() + ); + previousWithOldSecret.toolkit.documentationChunks = [ + { + type: "markdown", + location: "header", + position: "after", + content: "Still references OLD_SECRET for legacy flows.", + }, + ]; + + const merger = new DataMerger({ + toolkitDataSource, + customSectionsSource: new EmptyCustomSectionsSource(), + toolExampleGenerator: createStubGenerator(), + toolkitSummaryGenerator: createStubSummaryGenerator("Stub summary"), + previousToolkits: new Map([["github", previousWithOldSecret.toolkit]]), + }); + + const result = await merger.mergeToolkit("Github"); + + expect( + result.warnings.some( + (warning) => + warning.includes("Stale secret reference") && + warning.includes("OLD_SECRET") + ) + ).toBe(true); + }); + it("reuses previous examples when the tool is unchanged", async () => { const toolkitDataSource = createCombinedToolkitDataSource({ toolSource: new InMemoryToolDataSource([githubTool1]), diff --git a/toolkit-docs-generator/tests/merger/secret-coherence.test.ts b/toolkit-docs-generator/tests/merger/secret-coherence.test.ts new file mode 100644 index 000000000..efe5a686f --- /dev/null +++ b/toolkit-docs-generator/tests/merger/secret-coherence.test.ts @@ -0,0 +1,264 @@ +import { describe, expect, it } from "vitest"; +import { + ARCADE_SECRETS_DOC_URL, + detectSecretCoherenceIssues, + detectSecretCoverageGaps, + detectStaleSecretReferences, + groupStaleRefsByTarget, + hasCoherenceIssues, +} from "../../src/merger/secret-coherence.js"; +import type { + DocumentationChunk, + MergedTool, + MergedToolkit, +} from "../../src/types/index.js"; + +const chunk = ( + overrides: Partial = {} +): DocumentationChunk => ({ + type: "markdown", + location: "header", + position: "before", + content: "", + ...overrides, +}); + +const tool = (overrides: Partial = {}): MergedTool => ({ + name: "Example", + qualifiedName: "Github.Example", + fullyQualifiedName: "Github.Example@1.0.0", + description: "desc", + parameters: [], + auth: null, + secrets: [], + secretsInfo: [], + output: null, + documentationChunks: [], + ...overrides, +}); + +const toolkit = (overrides: Partial = {}): MergedToolkit => ({ + id: "github", + label: "GitHub", + version: "1.0.0", + description: null, + metadata: { + category: "development", + iconUrl: "", + isBYOC: false, + isPro: false, + type: "arcade", + docsLink: "", + isComingSoon: false, + isHidden: false, + }, + auth: null, + tools: [], + documentationChunks: [], + customImports: [], + subPages: [], + ...overrides, +}); + +describe("detectStaleSecretReferences", () => { + it("returns nothing when there is no previous toolkit", () => { + const result = detectStaleSecretReferences(toolkit()); + expect(result).toEqual([]); + }); + + it("returns nothing when no secrets were removed", () => { + const previous = toolkit({ + tools: [tool({ secrets: ["GITHUB_SERVER_URL"] })], + }); + const current = toolkit({ + tools: [tool({ secrets: ["GITHUB_SERVER_URL"] })], + }); + expect(detectStaleSecretReferences(current, previous)).toEqual([]); + }); + + it("finds a removed secret still mentioned in the summary", () => { + const previous = toolkit({ + tools: [ + tool({ + secrets: [ + "GITHUB_SERVER_URL", + "GITHUB_CLASSIC_PERSONAL_ACCESS_TOKEN", + ], + }), + ], + }); + const current = toolkit({ + summary: + "GitHub toolkit. Set `GITHUB_CLASSIC_PERSONAL_ACCESS_TOKEN` to use notifications.", + tools: [tool({ secrets: ["GITHUB_SERVER_URL"] })], + }); + const result = detectStaleSecretReferences(current, previous); + expect(result).toHaveLength(1); + expect(result[0]?.removedSecret).toBe( + "GITHUB_CLASSIC_PERSONAL_ACCESS_TOKEN" + ); + expect(result[0]?.location).toEqual({ kind: "summary" }); + }); + + it("finds a removed secret still mentioned in a toolkit documentation chunk", () => { + const previous = toolkit({ + tools: [ + tool({ + secrets: [ + "GITHUB_SERVER_URL", + "GITHUB_CLASSIC_PERSONAL_ACCESS_TOKEN", + ], + }), + ], + }); + const current = toolkit({ + tools: [tool({ secrets: ["GITHUB_SERVER_URL"] })], + documentationChunks: [ + chunk({ + location: "before_available_tools", + content: + "| Secret | Required |\n| `GITHUB_CLASSIC_PERSONAL_ACCESS_TOKEN` | Notifications |", + }), + ], + }); + const result = detectStaleSecretReferences(current, previous); + expect(result).toHaveLength(1); + expect(result[0]?.location).toEqual({ + kind: "toolkit_chunk", + chunkIndex: 0, + }); + }); + + it("finds a removed secret in a per-tool documentation chunk", () => { + const previous = toolkit({ + tools: [ + tool({ + qualifiedName: "Github.GetNotificationSummary", + secrets: [ + "GITHUB_SERVER_URL", + "GITHUB_CLASSIC_PERSONAL_ACCESS_TOKEN", + ], + }), + ], + }); + const current = toolkit({ + tools: [ + tool({ + qualifiedName: "Github.GetNotificationSummary", + secrets: ["GITHUB_SERVER_URL"], + documentationChunks: [ + chunk({ + content: + "Requires GITHUB_CLASSIC_PERSONAL_ACCESS_TOKEN for notifications.", + }), + ], + }), + ], + }); + const result = detectStaleSecretReferences(current, previous); + expect(result).toHaveLength(1); + expect(result[0]?.location).toEqual({ + kind: "tool_chunk", + toolQualifiedName: "Github.GetNotificationSummary", + chunkIndex: 0, + }); + }); +}); + +describe("detectSecretCoverageGaps", () => { + it("returns nothing when the toolkit has no summary", () => { + const current = toolkit({ + tools: [tool({ secrets: ["GITHUB_SERVER_URL"] })], + }); + expect(detectSecretCoverageGaps(current)).toEqual([]); + }); + + it("returns nothing when the toolkit has no secrets", () => { + const current = toolkit({ summary: "No secrets here." }); + expect(detectSecretCoverageGaps(current)).toEqual([]); + }); + + it("flags secrets that are missing from the summary", () => { + const current = toolkit({ + summary: `Toolkit info. See ${ARCADE_SECRETS_DOC_URL} to configure.`, + tools: [ + tool({ + secrets: ["GITHUB_SERVER_URL", "GITHUB_WEBHOOK_SECRET"], + }), + ], + }); + const gaps = detectSecretCoverageGaps(current); + const missing = gaps.filter( + (gap) => gap.kind === "missing_secret_in_summary" + ); + expect(missing).toHaveLength(2); + }); + + it("flags a missing Arcade config link when secrets exist", () => { + const current = toolkit({ + summary: + "Toolkit info. Uses `GITHUB_SERVER_URL` but no configuration link.", + tools: [tool({ secrets: ["GITHUB_SERVER_URL"] })], + }); + const gaps = detectSecretCoverageGaps(current); + expect(gaps.some((gap) => gap.kind === "missing_secret_config_link")).toBe( + true + ); + }); + + it("does not flag the link when the dashboard URL is present", () => { + const current = toolkit({ + summary: + "Toolkit info. Set `GITHUB_SERVER_URL` in the Arcade Dashboard: https://api.arcade.dev/dashboard/auth/secrets.", + tools: [tool({ secrets: ["GITHUB_SERVER_URL"] })], + }); + const gaps = detectSecretCoverageGaps(current); + expect(gaps.some((gap) => gap.kind === "missing_secret_config_link")).toBe( + false + ); + }); +}); + +describe("groupStaleRefsByTarget", () => { + it("groups multiple removed secrets hitting the same artifact into one edit target", () => { + const previous = toolkit({ + tools: [ + tool({ + secrets: ["A_SECRET", "B_SECRET", "KEEP_SECRET"], + }), + ], + }); + const current = toolkit({ + summary: "Mentions A_SECRET and B_SECRET together.", + tools: [tool({ secrets: ["KEEP_SECRET"] })], + }); + const refs = detectStaleSecretReferences(current, previous); + const targets = groupStaleRefsByTarget(refs); + expect(targets).toHaveLength(1); + expect(targets[0]?.kind).toBe("summary"); + expect(targets[0]?.removedSecrets).toEqual(["A_SECRET", "B_SECRET"]); + }); +}); + +describe("hasCoherenceIssues", () => { + it("is true when either stale refs or coverage gaps exist", () => { + const previous = toolkit({ + tools: [tool({ secrets: ["OLD_SECRET", "KEEP"] })], + }); + const current = toolkit({ + summary: "Still says OLD_SECRET here.", + tools: [tool({ secrets: ["KEEP"] })], + }); + const issues = detectSecretCoherenceIssues(current, previous); + expect(hasCoherenceIssues(issues)).toBe(true); + }); + + it("is false when the toolkit is coherent", () => { + const current = toolkit({ + summary: `Uses \`SECRET_A\`. Configure via ${ARCADE_SECRETS_DOC_URL}.`, + tools: [tool({ secrets: ["SECRET_A"] })], + }); + const issues = detectSecretCoherenceIssues(current); + expect(hasCoherenceIssues(issues)).toBe(false); + }); +}); diff --git a/toolkit-docs-generator/tests/workflows/generate-toolkit-docs.test.ts b/toolkit-docs-generator/tests/workflows/generate-toolkit-docs.test.ts index 5ba776a36..76093cf2e 100644 --- a/toolkit-docs-generator/tests/workflows/generate-toolkit-docs.test.ts +++ b/toolkit-docs-generator/tests/workflows/generate-toolkit-docs.test.ts @@ -36,3 +36,11 @@ test("porter workflow generates docs and opens a PR", () => { expect(workflowContents).toContain("[AUTO] Adding MCP Servers docs update"); expect(workflowContents).toContain("pull-requests: write"); }); + +test("porter workflow wires the secret-coherence editor", () => { + expect(workflowContents).toContain("--llm-editor-provider anthropic"); + expect(workflowContents).toContain("--llm-editor-model"); + expect(workflowContents).toContain("--llm-editor-api-key"); + expect(workflowContents).toContain("ANTHROPIC_API_KEY"); + expect(workflowContents).toContain("claude-sonnet-4-6"); +}); From 45b1c1acbb10840c5e83c6cbb47df7ba75823c33 Mon Sep 17 00:00:00 2001 From: jottakka Date: Sat, 18 Apr 2026 14:05:05 -0300 Subject: [PATCH 02/11] fix(toolkit-docs-generator): address ACR findings on secret coherence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two issues surfaced by `/acr-run`: 1. FENCE_PATTERN (secret-edit-generator.ts) was non-greedy and unanchored, so stripOptionalFence stopped at the FIRST inner ``` when the LLM wrapped its edit in a markdown fence and the edit itself contained a fenced code block. Result: the rest of the edit was silently dropped with no error — corrupted doc chunks written to disk. Fix: anchor the pattern to ^…$ and use a greedy capture so the match extends to the outer closing fence. 2. enforceSecretCoherence (data-merger.ts) computed coverage gaps once, before stale cleanup ran. If cleanup modifies the summary and incidentally drops a passage that mentioned a current secret, the pre-cleanup gap snapshot would miss it. Fix: re-run detectSecretCoherenceIssues after applyStaleRefCleanup so the coverage fill sees post-cleanup state. Tests: - Two new fence tests cover (a) preserving inner code blocks when unwrapping the outer fence, and (b) leaving unwrapped responses with inner blocks untouched. - One new DataMerger test proves the coverage editor receives post- cleanup summary content (not a stale snapshot). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../src/llm/secret-edit-generator.ts | 6 +- .../src/merger/data-merger.ts | 10 ++- .../tests/llm/secret-edit-generator.test.ts | 45 +++++++++++ .../tests/merger/data-merger.test.ts | 76 +++++++++++++++++++ 4 files changed, 135 insertions(+), 2 deletions(-) diff --git a/toolkit-docs-generator/src/llm/secret-edit-generator.ts b/toolkit-docs-generator/src/llm/secret-edit-generator.ts index 0d5795059..e34abfee1 100644 --- a/toolkit-docs-generator/src/llm/secret-edit-generator.ts +++ b/toolkit-docs-generator/src/llm/secret-edit-generator.ts @@ -63,7 +63,11 @@ const DEFAULT_SYSTEM_PROMPT = "reorder existing sections. Preserve markdown syntax, backticks, tables, " + "and code exactly."; -const FENCE_PATTERN = /```(?:markdown|md|text)?\s*([\s\S]*?)```/; +// Anchored to the start/end of the full string and uses a greedy capture so +// that inner fenced code blocks inside an edited documentation chunk are +// preserved. A non-greedy capture would stop at the first inner ``` and +// silently truncate the rest of the content. +const FENCE_PATTERN = /^\s*```(?:markdown|md|text)?\s*([\s\S]*)```\s*$/; const stripOptionalFence = (text: string): string => { const match = text.match(FENCE_PATTERN); diff --git a/toolkit-docs-generator/src/merger/data-merger.ts b/toolkit-docs-generator/src/merger/data-merger.ts index 738558859..9cd9cafdc 100644 --- a/toolkit-docs-generator/src/merger/data-merger.ts +++ b/toolkit-docs-generator/src/merger/data-merger.ts @@ -1088,8 +1088,16 @@ export class DataMerger { return; } + // Order matters: stale cleanup runs first, then coverage gaps are + // re-detected against the edited summary. If cleanup accidentally + // dropped a passage that incidentally mentioned a current secret, + // the fresh scan notices and the editor restores it. await this.applyStaleRefCleanup(result, issues); - await this.applyCoverageFill(result, issues); + const postCleanupIssues = detectSecretCoherenceIssues( + result.toolkit, + previousToolkit + ); + await this.applyCoverageFill(result, postCleanupIssues); } private appendCoherenceWarnings( diff --git a/toolkit-docs-generator/tests/llm/secret-edit-generator.test.ts b/toolkit-docs-generator/tests/llm/secret-edit-generator.test.ts index 2b66a57a3..fc5df2d64 100644 --- a/toolkit-docs-generator/tests/llm/secret-edit-generator.test.ts +++ b/toolkit-docs-generator/tests/llm/secret-edit-generator.test.ts @@ -35,6 +35,51 @@ describe("LlmSecretEditGenerator.cleanupStaleReferences", () => { expect(out).toBe("edited content"); }); + it("preserves inner fenced code blocks when unwrapping the outer fence", async () => { + // The LLM may wrap an edited documentation chunk (which itself + // contains a fenced code block) inside an outer markdown fence. A + // non-greedy fence regex would stop at the first inner ``` and + // silently truncate the rest of the content. + const wrappedEdit = [ + "```markdown", + "Setup steps:", + "", + "```python", + "arcade.run(tool='Github.CreateIssue')", + "```", + "", + "Further notes follow.", + "```", + ].join("\n"); + const client = fakeClient(wrappedEdit); + const editor = new LlmSecretEditGenerator({ client, model: "test" }); + const out = await editor.cleanupStaleReferences({ + kind: "documentation_chunk", + content: "mentions OLD", + removedSecrets: ["OLD"], + currentSecrets: ["KEEP"], + toolkitLabel: "GitHub", + }); + expect(out).toContain("```python"); + expect(out).toContain("Further notes follow."); + }); + + it("leaves non-wrapped content untouched (no outer fence)", async () => { + // Response has inner fenced blocks but no outer fence — must pass + // through verbatim, not partially matched. + const response = "No wrapper.\n\n```js\nconsole.log(1);\n```\nTail text."; + const client = fakeClient(response); + const editor = new LlmSecretEditGenerator({ client, model: "test" }); + const out = await editor.cleanupStaleReferences({ + kind: "documentation_chunk", + content: "mentions OLD", + removedSecrets: ["OLD"], + currentSecrets: ["KEEP"], + toolkitLabel: "GitHub", + }); + expect(out).toBe(response); + }); + it("passes the removed and current secrets into the prompt", async () => { const client = fakeClient("ok"); const editor = new LlmSecretEditGenerator({ client, model: "test" }); diff --git a/toolkit-docs-generator/tests/merger/data-merger.test.ts b/toolkit-docs-generator/tests/merger/data-merger.test.ts index a5068acd0..675cb0da1 100644 --- a/toolkit-docs-generator/tests/merger/data-merger.test.ts +++ b/toolkit-docs-generator/tests/merger/data-merger.test.ts @@ -1276,6 +1276,82 @@ describe("DataMerger", () => { ).toBe(true); }); + it("passes the post-cleanup summary to the coverage editor, not the original", async () => { + // Ordering guarantee: applyStaleRefCleanup runs before the coverage + // scan is re-computed. We prove this by making cleanup mutate a + // chunk (unrelated to summary), then verifying the coverage editor + // receives the current-summary content rather than a pre-cleanup + // snapshot. This also demonstrates that re-detection uses the + // updated toolkit state. + const toolWithSecrets = createTool({ + name: "CreateIssue", + qualifiedName: "Github.CreateIssue", + fullyQualifiedName: "Github.CreateIssue@1.0.0", + auth: { providerId: "github", providerType: "oauth2", scopes: [] }, + secrets: ["GITHUB_SERVER_URL"], + }); + const toolkitDataSource = createCombinedToolkitDataSource({ + toolSource: new InMemoryToolDataSource([toolWithSecrets]), + metadataSource: new InMemoryMetadataSource([githubMetadata]), + }); + const previous = await mergeToolkit( + "Github", + [ + createTool({ + ...toolWithSecrets, + secrets: ["GITHUB_SERVER_URL", "OLD_SECRET"], + }), + ], + githubMetadata, + null, + createStubGenerator() + ); + previous.toolkit.documentationChunks = [ + { + type: "markdown", + location: "header", + position: "after", + content: "Legacy note about OLD_SECRET.", + }, + ]; + + const cleanupSpy = vi.fn(async () => "Edited chunk."); + const coverageSpy = vi.fn( + async (input: { content: string }) => `${input.content} [link]` + ); + const secretEditGenerator: ISecretEditGenerator = { + cleanupStaleReferences: cleanupSpy, + fillCoverageGaps: coverageSpy, + }; + const merger = new DataMerger({ + toolkitDataSource, + customSectionsSource: new EmptyCustomSectionsSource(), + toolExampleGenerator: createStubGenerator(), + toolkitSummaryGenerator: createStubSummaryGenerator("Stub summary"), + secretEditGenerator, + previousToolkits: new Map([["github", previous.toolkit]]), + }); + + const result = await merger.mergeToolkit("Github"); + + expect(cleanupSpy).toHaveBeenCalledTimes(1); + // Coverage editor runs because the stub summary does not mention + // GITHUB_SERVER_URL. It must see the current summary state — post- + // cleanup — not any snapshot taken before cleanup. + expect(coverageSpy).toHaveBeenCalledTimes(1); + const coverageCall = coverageSpy.mock.calls[0]?.[0] as { + content: string; + missingSecretNames: string[]; + }; + // The content passed in was the post-cleanup summary (unchanged by + // cleanup in this scenario, since the stale ref was in a chunk). + expect(coverageCall.content).toBe("Stub summary (Github)"); + expect(coverageCall.missingSecretNames).toContain("GITHUB_SERVER_URL"); + // After the coverage edit, the summary should reflect the editor's + // output built on top of the post-cleanup content. + expect(result.toolkit.summary).toBe("Stub summary (Github) [link]"); + }); + it("emits warnings but no LLM calls when no editor is configured", async () => { const toolWithSecret = createTool({ name: "CreateIssue", From 5a7de0c32b96e7280a14ea67d65be55eebd93711 Mon Sep 17 00:00:00 2001 From: jottakka Date: Sat, 18 Apr 2026 14:24:41 -0300 Subject: [PATCH 03/11] chore(toolkit-docs-generator): raise editor max-tokens default to 8192 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 4096 was tight. Largest single artifact in current data is a ~6K-char doc chunk (googlenews) ≈ 1.5K output tokens for a minimal-edit rewrite; a summary with no word cap for a 40+ tool toolkit with several secrets can land in the 2–3K output-token range. 8K gives comfortable margin without meaningful cost or latency impact on Sonnet 4.6. Help text updated to match. Callers can still override via --llm-editor-max-tokens. Co-Authored-By: Claude Opus 4.7 (1M context) --- toolkit-docs-generator/src/cli/index.ts | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/toolkit-docs-generator/src/cli/index.ts b/toolkit-docs-generator/src/cli/index.ts index 06e61c5ae..94276e7d1 100644 --- a/toolkit-docs-generator/src/cli/index.ts +++ b/toolkit-docs-generator/src/cli/index.ts @@ -375,7 +375,14 @@ interface SecretEditorCliOptions { skipSecretCoherence?: boolean; } -const DEFAULT_EDITOR_MAX_TOKENS = 4096; +// Headroom is calibrated against the largest single artifact the editor +// might receive: a long documentation chunk that must be reproduced +// verbatim minus a removed secret (worst-case output size ≈ input size). +// Largest chunk in current data is ~6K chars (~1.5K tokens); a summary +// with no word cap for a 40+ tool toolkit with several secrets can land +// in the 2–3K output-token range. 8K keeps a safe margin without any +// meaningful cost or latency penalty on Sonnet 4.6. +const DEFAULT_EDITOR_MAX_TOKENS = 8192; const resolveEditorApiKey = ( provider: LlmProvider, @@ -975,7 +982,7 @@ program ) .option( "--llm-editor-max-tokens ", - "Secret-coherence editor max tokens (default: 4096)", + "Secret-coherence editor max tokens (default: 8192)", (value) => Number.parseInt(value, 10) ) .option( @@ -1995,7 +2002,7 @@ program ) .option( "--llm-editor-max-tokens ", - "Secret-coherence editor max tokens (default: 4096)", + "Secret-coherence editor max tokens (default: 8192)", (value) => Number.parseInt(value, 10) ) .option( From a80d24303e2c1bbebcf37602f92e5b6a8c343c45 Mon Sep 17 00:00:00 2001 From: jottakka Date: Sat, 18 Apr 2026 14:28:29 -0300 Subject: [PATCH 04/11] docs(toolkit-docs-generator): document secret coherence + loosen per-secret prose cap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two changes: 1. README: new Secret coherence section covering the scan/edit pipeline, the editor CLI flags, the claude-sonnet-4-6 default, fail-open behavior when no API key is set, and a local invocation example. Required/optional CI secrets updated with ANTHROPIC_API_KEY and ANTHROPIC_EDITOR_MODEL. Key CLI options list updated with the new flags. 2. Prompts (summary generator + coverage-fill editor) no longer cap each secret at one line. Instead they ask for as much detail as the secret actually needs — a short URL override may be one line; a scoped API key typically needs several sentences naming the provider dashboard page, required scopes or permissions, and any account tier. Both prompts also request an inline markdown link to the provider's own docs page for how to create/retrieve the secret when the model knows it, and explicitly forbid inventing URLs. Co-Authored-By: Claude Opus 4.7 (1M context) --- toolkit-docs-generator/README.md | 51 +++++++++++++++++++ .../src/llm/secret-edit-generator.ts | 5 +- .../src/llm/toolkit-summary-generator.ts | 2 +- 3 files changed, 55 insertions(+), 3 deletions(-) diff --git a/toolkit-docs-generator/README.md b/toolkit-docs-generator/README.md index 9a5743f13..fba72a3ba 100644 --- a/toolkit-docs-generator/README.md +++ b/toolkit-docs-generator/README.md @@ -42,6 +42,8 @@ Required secrets: Optional secrets: - `OPENAI_MODEL` (defaults in the workflow) +- `ANTHROPIC_API_KEY` enables the secret-coherence editor (see below). Without it the workflow still runs; the scanners emit warnings but no LLM edits are applied. +- `ANTHROPIC_EDITOR_MODEL` (defaults to `claude-sonnet-4-6` in the workflow) ## Rendering pipeline (docs site) @@ -66,6 +68,53 @@ The docs site consumes the generated JSON directly: This step does not change JSON output. It only updates navigation files. +## Secret coherence (stale-reference cleanup + coverage check) + +When a toolkit loses a secret upstream (typically because the tool that required it was removed), the rendered docs can keep mentioning it in the summary and in hand-authored documentation chunks. Symmetrically, a toolkit can end up with current secrets the summary never names, or name them without any link to the Arcade config docs. + +The generator runs two checks after summary generation, in [`src/merger/secret-coherence.ts`](src/merger/secret-coherence.ts) and [`src/llm/secret-edit-generator.ts`](src/llm/secret-edit-generator.ts): + +1. **Stale-reference scan** (deterministic): diffs current vs previous toolkit secret sets and searches the summary, every toolkit-level `documentationChunks` entry, and every per-tool chunk for any removed secret name. Exact substring match — secret names are distinctive ALLCAPS_WITH_UNDER. +2. **Coverage-gap scan** (deterministic): flags any current secret that is not mentioned in the summary and any summary that lacks a link to the Arcade secret config docs. + +If an LLM editor is configured (`--llm-editor-provider` / `--llm-editor-model` / `--llm-editor-api-key`), both classes of issue are auto-fixed: + +- Stale references are removed with a **minimum-necessary edit** prompt — whole sentences, bullets, or table rows that exist only to describe the removed secret are deleted; sentences that mention the removed secret alongside other content are minimally rewritten; nothing else is touched. This is intentionally different from the summary generator, which rewrites from scratch and tends to oversimplify. +- Missing secrets get appended to the summary's `**Secrets**` section with as much detail as the secret actually needs — a short URL override may be one line; a scoped API key typically needs several sentences describing the provider dashboard page, required scopes or permissions, and account-tier constraints, plus an inline link to the provider's own documentation for how to create it. The prompt explicitly forbids inventing docs URLs. +- Missing Arcade-config links are added at the end of the `**Secrets**` section. +- The editor is instructed to preserve surrounding content verbatim (no re-summarization, no reorder). + +When the editor is not configured, the scanners still run and their findings land as non-fatal warnings in the run log. Editor exceptions are caught individually so a single LLM failure does not break the run. + +The default editor model is **Claude Sonnet 4.6** — chosen to avoid the oversimplification observed when bulk summaries were regenerated by `gpt-4o-mini`. Override with `--llm-editor-model` or the `LLM_EDITOR_MODEL` / `ANTHROPIC_EDITOR_MODEL` env var. + +### CLI flags + +- `--llm-editor-provider ` — editor provider. Falls back to `LLM_EDITOR_PROVIDER`. +- `--llm-editor-model ` — editor model. Falls back to `LLM_EDITOR_MODEL` / `ANTHROPIC_EDITOR_MODEL`. +- `--llm-editor-api-key ` — editor API key. Falls back to `LLM_EDITOR_API_KEY`, then `ANTHROPIC_API_KEY` / `OPENAI_API_KEY` per provider. +- `--llm-editor-base-url ` — override editor base URL. +- `--llm-editor-temperature ` — editor temperature. +- `--llm-editor-max-tokens ` — editor max output tokens (default `8192`). +- `--llm-editor-max-retries ` — retry attempts on transient errors (default `3`). +- `--skip-secret-coherence` — disable both the scan and the edit step entirely. + +### Local example (editor on) + +```bash +pnpm dlx tsx src/cli/index.ts generate \ + --providers "Github" \ + --tool-metadata-url "$ENGINE_API_URL" \ + --tool-metadata-key "$ENGINE_API_KEY" \ + --llm-provider openai \ + --llm-model gpt-4.1-mini \ + --llm-api-key "$OPENAI_API_KEY" \ + --llm-editor-provider anthropic \ + --llm-editor-model claude-sonnet-4-6 \ + --llm-editor-api-key "$ANTHROPIC_API_KEY" \ + --output data/toolkits +``` + ## Architecture at a glance - **CLI**: `toolkit-docs-generator/src/cli/index.ts` @@ -182,6 +231,8 @@ deletes it and rebuilds `index.json`. - `--previous-output` compare against a previous output directory - `--custom-sections` load curated docs sections - `--skip-examples`, `--skip-summary` disable LLM steps +- `--skip-secret-coherence` disable the stale-reference scan + coverage fill (see the Secret coherence section) +- `--llm-editor-provider`, `--llm-editor-model`, `--llm-editor-api-key` configure the secret-coherence editor (Sonnet 4.6 by default) - `--no-verify-output` skip output verification ## Troubleshooting diff --git a/toolkit-docs-generator/src/llm/secret-edit-generator.ts b/toolkit-docs-generator/src/llm/secret-edit-generator.ts index e34abfee1..90a1b7524 100644 --- a/toolkit-docs-generator/src/llm/secret-edit-generator.ts +++ b/toolkit-docs-generator/src/llm/secret-edit-generator.ts @@ -125,11 +125,12 @@ const buildCoveragePrompt = (input: SecretCoverageEditInput): string => { "", "Rules:", "- Ensure every current secret is mentioned by its exact name (inside backticks).", - "- Each missing secret should get at most one short, factual line describing what it is and, when possible, how a developer obtains it (e.g. provider Dashboard, region URL, API key). If you do not know, keep it to the name and a one-line purpose — do not invent provisioning steps.", + "- For each missing secret, add a factual explanation of what it is and how a developer obtains it from the provider. Use as much detail as the secret actually needs — a short URL override may be a single line; a scoped API key may need several sentences naming the provider dashboard page, the required scopes or permissions, and any account tier constraints.", + "- When possible include an inline markdown link to the provider's own documentation page that tells the reader how to create or retrieve that specific secret. If you do not know the provider's docs URL, omit the link rather than inventing one.", "- Prefer appending to or lightly extending an existing `**Secrets**` section. Only create a `**Secrets**` section if none exists.", "- Do not rewrite unrelated content. Do not change headings, ordering, tone, or other sections.", linkInstruction, - "- Keep the output compact: no filler, no marketing copy. The summary should remain easy to scan.", + "- Keep phrasing factual, developer-focused, and free of marketing copy.", "", "Return ONLY the edited summary, with no commentary, no explanation, and no code fences around the whole document.", "", diff --git a/toolkit-docs-generator/src/llm/toolkit-summary-generator.ts b/toolkit-docs-generator/src/llm/toolkit-summary-generator.ts index 3b72e3595..5cbdeface 100644 --- a/toolkit-docs-generator/src/llm/toolkit-summary-generator.ts +++ b/toolkit-docs-generator/src/llm/toolkit-summary-generator.ts @@ -78,7 +78,7 @@ const buildPrompt = (toolkit: MergedToolkit): string => { "- Add a **Capabilities** section with 3 to 6 bullets summarizing shared capabilities (group tools by theme; do not list tools one by one).", "- If auth type is oauth2 or mixed, add an **OAuth** section with provider and representative scopes.", "- If auth type is api_key or mixed, mention API key usage under **OAuth** or a dedicated heading.", - `- If any secrets exist, add a **Secrets** section. List every secret by its exact name in backticks. Give each secret one short factual line covering what it is and how a developer obtains it from the provider; if you do not know, stay with a one-line purpose rather than inventing steps. End the section with the Arcade config docs link: ${ARCADE_SECRETS_DOC_URL} (and optionally mention ${ARCADE_SECRETS_DASHBOARD_URL}).`, + `- If any secrets exist, add a **Secrets** section. List every secret by its exact name in backticks. For each secret, give a factual explanation of what it is and how a developer obtains it from the provider — use as much detail as the secret actually needs (a short URL override may be one line; a scoped API key may need several sentences naming the provider dashboard page, required scopes/permissions, and any account tier). When possible include an inline markdown link to the provider's own documentation page that tells the reader how to create/retrieve that specific secret. If you do not know the provider's docs URL, omit the link rather than inventing one. End the section with the Arcade config docs link: ${ARCADE_SECRETS_DOC_URL} (and optionally mention ${ARCADE_SECRETS_DASHBOARD_URL}).`, "- Use Markdown. Developer-focused. Say 'Arcade' (never 'Arcade AI').", "- Do not add marketing copy, repetition, or filler.", "", From ae2064aab8b03e0a5137192c748d62389776f14e Mon Sep 17 00:00:00 2001 From: jottakka Date: Sat, 18 Apr 2026 15:32:40 -0300 Subject: [PATCH 05/11] feat(toolkit-docs-generator): summary prompt no longer repeats OAuth scopes Per follow-up on PRs #928 and #929, the OAuth section of each summary should name the provider and link to the Arcade per-provider auth docs rather than enumerate scopes. Scopes already live on the provider reference page and repeating them in toolkit summaries creates drift every time a provider page updates. Changes: - Add ARCADE_AUTH_PROVIDERS_BASE_URL constant alongside the existing Arcade secret URLs in secret-coherence.ts. - Rewrite the OAuth bullet in toolkit-summary-generator.ts's prompt to require a link to {base}/ and explicitly forbid listing scopes. - Drop scopes from formatAuth's prompt payload so the model has no stray scope list to fall back on. - README: note the no-scopes-in-summary rule and point to the provider reference pages as the source of truth. Co-Authored-By: Claude Opus 4.7 (1M context) --- toolkit-docs-generator/README.md | 4 ++++ .../src/llm/toolkit-summary-generator.ts | 12 ++++++------ .../src/merger/secret-coherence.ts | 9 +++++++++ 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/toolkit-docs-generator/README.md b/toolkit-docs-generator/README.md index fba72a3ba..7b3a65c0e 100644 --- a/toolkit-docs-generator/README.md +++ b/toolkit-docs-generator/README.md @@ -88,6 +88,10 @@ When the editor is not configured, the scanners still run and their findings lan The default editor model is **Claude Sonnet 4.6** — chosen to avoid the oversimplification observed when bulk summaries were regenerated by `gpt-4o-mini`. Override with `--llm-editor-model` or the `LLM_EDITOR_MODEL` / `ANTHROPIC_EDITOR_MODEL` env var. +### OAuth section in summaries + +The summary generator is configured to **never list OAuth scopes** in the generated overview. Each per-provider Arcade auth docs page (under `/en/references/auth-providers/`) is the source of truth for scopes and configuration; the summary links to it instead of duplicating. This keeps the overview scannable and prevents drift when provider pages update their scope lists. + ### CLI flags - `--llm-editor-provider ` — editor provider. Falls back to `LLM_EDITOR_PROVIDER`. diff --git a/toolkit-docs-generator/src/llm/toolkit-summary-generator.ts b/toolkit-docs-generator/src/llm/toolkit-summary-generator.ts index 5cbdeface..892fc7367 100644 --- a/toolkit-docs-generator/src/llm/toolkit-summary-generator.ts +++ b/toolkit-docs-generator/src/llm/toolkit-summary-generator.ts @@ -1,5 +1,6 @@ import type { ToolkitSummaryGenerator } from "../merger/data-merger.js"; import { + ARCADE_AUTH_PROVIDERS_BASE_URL, ARCADE_SECRETS_DASHBOARD_URL, ARCADE_SECRETS_DOC_URL, } from "../merger/secret-coherence.js"; @@ -35,13 +36,12 @@ const formatAuth = (toolkit: MergedToolkit): string => { return "none"; } - const scopes = - toolkit.auth.allScopes.length > 0 - ? toolkit.auth.allScopes.join(", ") - : "None"; const provider = toolkit.auth.providerId ?? "unknown"; - return `${toolkit.auth.type}; provider: ${provider}; scopes: ${scopes}`; + // Scopes are intentionally omitted from the prompt: the summary should + // not re-list them — it points readers at the per-provider Arcade docs + // page where scopes live and stay in sync with the source of truth. + return `${toolkit.auth.type}; provider: ${provider}`; }; const collectSecrets = (tools: MergedTool[]) => { @@ -76,7 +76,7 @@ const buildPrompt = (toolkit: MergedToolkit): string => { "Requirements:", "- Start with 1 to 2 sentences that explain the provider and what the toolkit enables.", "- Add a **Capabilities** section with 3 to 6 bullets summarizing shared capabilities (group tools by theme; do not list tools one by one).", - "- If auth type is oauth2 or mixed, add an **OAuth** section with provider and representative scopes.", + `- If auth type is oauth2 or mixed, add an **OAuth** section that names the provider and links to the Arcade provider docs at ${ARCADE_AUTH_PROVIDERS_BASE_URL}/ (use the OAuth provider ID supplied in the Auth line below as the slug). Do NOT list scopes — the provider page already documents them and repeating scopes here drifts.`, "- If auth type is api_key or mixed, mention API key usage under **OAuth** or a dedicated heading.", `- If any secrets exist, add a **Secrets** section. List every secret by its exact name in backticks. For each secret, give a factual explanation of what it is and how a developer obtains it from the provider — use as much detail as the secret actually needs (a short URL override may be one line; a scoped API key may need several sentences naming the provider dashboard page, required scopes/permissions, and any account tier). When possible include an inline markdown link to the provider's own documentation page that tells the reader how to create/retrieve that specific secret. If you do not know the provider's docs URL, omit the link rather than inventing one. End the section with the Arcade config docs link: ${ARCADE_SECRETS_DOC_URL} (and optionally mention ${ARCADE_SECRETS_DASHBOARD_URL}).`, "- Use Markdown. Developer-focused. Say 'Arcade' (never 'Arcade AI').", diff --git a/toolkit-docs-generator/src/merger/secret-coherence.ts b/toolkit-docs-generator/src/merger/secret-coherence.ts index 2289511f1..5b991a799 100644 --- a/toolkit-docs-generator/src/merger/secret-coherence.ts +++ b/toolkit-docs-generator/src/merger/secret-coherence.ts @@ -20,6 +20,15 @@ export const ARCADE_SECRETS_DOC_URL = export const ARCADE_SECRETS_DASHBOARD_URL = "https://api.arcade.dev/dashboard/auth/secrets"; +/** + * Base URL for Arcade's per-provider OAuth docs. Specific provider pages + * live at `${base}/` — e.g. `/github`, `/google`, `/atlassian`. + * Used by the summary prompt so OAuth sections can link out instead of + * repeating scope lists that would drift from the provider page. + */ +export const ARCADE_AUTH_PROVIDERS_BASE_URL = + "https://docs.arcade.dev/en/references/auth-providers"; + const SECRET_REFERENCE_URLS: readonly string[] = [ ARCADE_SECRETS_DOC_URL, ARCADE_SECRETS_DASHBOARD_URL, From 8a3ca50be0bc091dcf7d7003cc7c9e652ef93257 Mon Sep 17 00:00:00 2001 From: jottakka Date: Sat, 18 Apr 2026 16:05:32 -0300 Subject: [PATCH 06/11] fix(toolkit-docs-generator): address ACR findings on secret coherence (round 2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Four findings from /acr-run: 1. HIGH (5/5) — ANTHROPIC_EDITOR_MODEL was documented as a fallback env var in the README but never read by resolveSecretEditGenerator. A local dev setting only ANTHROPIC_EDITOR_MODEL would get `model = undefined`, the (provider && model) guard would fire, and the editor would silently stay inactive. Extract resolveEditorModel helper that walks `--llm-editor-model` → LLM_EDITOR_MODEL → ANTHROPIC_EDITOR_MODEL in documented order, and use it from both the resolver and the verbose-log blocks. 2. MEDIUM — --skip-secret-coherence was documented to "disable both the scan and the edit step entirely" but DataMerger never received the flag; enforceSecretCoherence always ran, so coherence warnings still appeared when the user explicitly opted out. Add `skipSecretCoherence` to DataMergerConfig, gate enforcement on it, and pass it through from all three merger construction sites in the CLI. 3. MEDIUM — FENCE_PATTERN matched non-markdown language fences (```python, ```bash, ```json). A documentation chunk whose content was a code block would have its fences stripped, corrupting the edited output. Tightened the pattern to require either an empty, markdown, md, or text tag followed by a newline between the opening fence and the captured content, so language-tagged code blocks fall through stripOptionalFence unchanged. 4. LOW — verbose log showed "model: undefined" when only ANTHROPIC_EDITOR_MODEL was set. Fixed by #1. Tests added: - fence strip preserves `\`\`\`python` and `\`\`\`bash` code blocks verbatim - skipSecretCoherence suppresses both edits and warnings 549 tests pass, type-check clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- toolkit-docs-generator/src/cli/index.ts | 30 +++++++-- .../src/llm/secret-edit-generator.ts | 15 +++-- .../src/merger/data-merger.ts | 15 +++++ .../tests/llm/secret-edit-generator.test.ts | 31 +++++++++ .../tests/merger/data-merger.test.ts | 65 +++++++++++++++++++ 5 files changed, 146 insertions(+), 10 deletions(-) diff --git a/toolkit-docs-generator/src/cli/index.ts b/toolkit-docs-generator/src/cli/index.ts index 94276e7d1..8b0a430ce 100644 --- a/toolkit-docs-generator/src/cli/index.ts +++ b/toolkit-docs-generator/src/cli/index.ts @@ -396,6 +396,19 @@ const resolveEditorApiKey = ( return process.env.OPENAI_API_KEY; }; +/** + * Resolve the editor model from CLI options, then from env vars in + * documented precedence order. Kept as a shared helper so the verbose + * log in each `generate` action displays the same model that + * resolveSecretEditGenerator will actually use. + */ +const resolveEditorModel = (options: { + llmEditorModel?: string; +}): string | undefined => + options.llmEditorModel ?? + process.env.LLM_EDITOR_MODEL ?? + process.env.ANTHROPIC_EDITOR_MODEL; + /** * Build an LLM secret-edit generator from CLI options + env. Returns * undefined when the editor is disabled or unconfigured; callers fall back @@ -411,7 +424,7 @@ const resolveSecretEditGenerator = ( const providerRaw = options.llmEditorProvider ?? process.env.LLM_EDITOR_PROVIDER; - const model = options.llmEditorModel ?? process.env.LLM_EDITOR_MODEL; + const model = resolveEditorModel(options); // Editor stays opt-in: both provider and model must be explicitly set. if (!(providerRaw && model)) { @@ -968,7 +981,7 @@ program ) .option( "--llm-editor-model ", - "Secret-coherence editor LLM model (e.g. claude-sonnet-4-6). Defaults to LLM_EDITOR_MODEL env." + "Secret-coherence editor LLM model (e.g. claude-sonnet-4-6). Defaults to LLM_EDITOR_MODEL or ANTHROPIC_EDITOR_MODEL env." ) .option( "--llm-editor-api-key ", @@ -1234,7 +1247,7 @@ program if (secretEditGenerator) { console.log( chalk.dim( - `Secret-coherence editor enabled (model: ${options.llmEditorModel ?? process.env.LLM_EDITOR_MODEL})` + `Secret-coherence editor enabled (model: ${resolveEditorModel(options)})` ) ); } else if (!options.skipSecretCoherence) { @@ -1509,6 +1522,7 @@ program ...(toolExampleGenerator ? { toolExampleGenerator } : {}), ...(toolkitSummaryGenerator ? { toolkitSummaryGenerator } : {}), ...(secretEditGenerator ? { secretEditGenerator } : {}), + ...(options.skipSecretCoherence ? { skipSecretCoherence: true } : {}), ...(previousToolkits ? { previousToolkits } : {}), ...(options.llmConcurrency ? { llmConcurrency: options.llmConcurrency } @@ -1638,6 +1652,9 @@ program ...(toolExampleGenerator ? { toolExampleGenerator } : {}), ...(toolkitSummaryGenerator ? { toolkitSummaryGenerator } : {}), ...(secretEditGenerator ? { secretEditGenerator } : {}), + ...(options.skipSecretCoherence + ? { skipSecretCoherence: true } + : {}), ...(previousToolkits ? { previousToolkits } : {}), ...(options.llmConcurrency ? { llmConcurrency: options.llmConcurrency } @@ -1988,7 +2005,7 @@ program ) .option( "--llm-editor-model ", - "Secret-coherence editor LLM model (e.g. claude-sonnet-4-6). Defaults to LLM_EDITOR_MODEL env." + "Secret-coherence editor LLM model (e.g. claude-sonnet-4-6). Defaults to LLM_EDITOR_MODEL or ANTHROPIC_EDITOR_MODEL env." ) .option( "--llm-editor-api-key ", @@ -2149,7 +2166,7 @@ program if (secretEditGenerator) { console.log( chalk.dim( - `Secret-coherence editor enabled (model: ${options.llmEditorModel ?? process.env.LLM_EDITOR_MODEL})` + `Secret-coherence editor enabled (model: ${resolveEditorModel(options)})` ) ); } else if (!options.skipSecretCoherence) { @@ -2360,6 +2377,9 @@ program ...(toolExampleGenerator ? { toolExampleGenerator } : {}), ...(toolkitSummaryGenerator ? { toolkitSummaryGenerator } : {}), ...(secretEditGenerator ? { secretEditGenerator } : {}), + ...(options.skipSecretCoherence + ? { skipSecretCoherence: true } + : {}), ...(previousToolkits ? { previousToolkits } : {}), ...(options.llmConcurrency ? { llmConcurrency: options.llmConcurrency } diff --git a/toolkit-docs-generator/src/llm/secret-edit-generator.ts b/toolkit-docs-generator/src/llm/secret-edit-generator.ts index 90a1b7524..ce8bc162e 100644 --- a/toolkit-docs-generator/src/llm/secret-edit-generator.ts +++ b/toolkit-docs-generator/src/llm/secret-edit-generator.ts @@ -63,11 +63,16 @@ const DEFAULT_SYSTEM_PROMPT = "reorder existing sections. Preserve markdown syntax, backticks, tables, " + "and code exactly."; -// Anchored to the start/end of the full string and uses a greedy capture so -// that inner fenced code blocks inside an edited documentation chunk are -// preserved. A non-greedy capture would stop at the first inner ``` and -// silently truncate the rest of the content. -const FENCE_PATTERN = /^\s*```(?:markdown|md|text)?\s*([\s\S]*)```\s*$/; +// Anchored to the start/end of the full string, with a required newline +// between the opening fence (optionally followed by `markdown`/`md`/`text` +// plus horizontal whitespace) and the captured content. A bare +// ```python / ```bash / ```json at the start means the LLM returned a +// code block that *is* the content — not a wrapper — so the pattern must +// not match and stripOptionalFence will return the text unchanged. +// Greedy capture extends to the last closing fence so inner fenced +// blocks survive. +const FENCE_PATTERN = + /^\s*```(?:markdown|md|text)?[ \t]*\r?\n([\s\S]*)\r?\n```\s*$/; const stripOptionalFence = (text: string): string => { const match = text.match(FENCE_PATTERN); diff --git a/toolkit-docs-generator/src/merger/data-merger.ts b/toolkit-docs-generator/src/merger/data-merger.ts index 9cd9cafdc..67d6f2db3 100644 --- a/toolkit-docs-generator/src/merger/data-merger.ts +++ b/toolkit-docs-generator/src/merger/data-merger.ts @@ -52,6 +52,12 @@ export interface DataMergerConfig { * scanners still run and emit warnings, but no content is rewritten. */ secretEditGenerator?: ISecretEditGenerator; + /** + * When true, the secret-coherence step is disabled entirely — neither + * the scan nor the LLM edit runs, and no warnings are emitted. Wired + * from the CLI's `--skip-secret-coherence` flag. + */ + skipSecretCoherence?: boolean; previousToolkits?: ReadonlyMap; /** Maximum concurrent LLM calls for tool examples (default: 5) */ llmConcurrency?: number; @@ -913,6 +919,7 @@ export class DataMerger { private readonly toolExampleGenerator: ToolExampleGenerator | undefined; private readonly toolkitSummaryGenerator: ToolkitSummaryGenerator | undefined; private readonly secretEditGenerator: ISecretEditGenerator | undefined; + private readonly skipSecretCoherence: boolean; private readonly previousToolkits: | ReadonlyMap | undefined; @@ -940,6 +947,7 @@ export class DataMerger { this.toolExampleGenerator = config.toolExampleGenerator; this.toolkitSummaryGenerator = config.toolkitSummaryGenerator; this.secretEditGenerator = config.secretEditGenerator; + this.skipSecretCoherence = config.skipSecretCoherence ?? false; this.previousToolkits = config.previousToolkits; this.llmConcurrency = config.llmConcurrency ?? 10; this.toolkitConcurrency = config.toolkitConcurrency ?? 5; @@ -1077,6 +1085,13 @@ export class DataMerger { result: MergeResult, previousToolkit?: MergedToolkit ): Promise { + if (this.skipSecretCoherence) { + // --skip-secret-coherence disables the entire step: no scan, no + // warnings, no edits. Callers who want warnings without edits + // should leave the flag off and simply not configure a + // secretEditGenerator. + return; + } const issues = detectSecretCoherenceIssues(result.toolkit, previousToolkit); if (!hasCoherenceIssues(issues)) { return; diff --git a/toolkit-docs-generator/tests/llm/secret-edit-generator.test.ts b/toolkit-docs-generator/tests/llm/secret-edit-generator.test.ts index fc5df2d64..c8f341d3d 100644 --- a/toolkit-docs-generator/tests/llm/secret-edit-generator.test.ts +++ b/toolkit-docs-generator/tests/llm/secret-edit-generator.test.ts @@ -80,6 +80,37 @@ describe("LlmSecretEditGenerator.cleanupStaleReferences", () => { expect(out).toBe(response); }); + it("does not strip a non-markdown language fence (e.g. ```python)", async () => { + // A documentation chunk that IS a code block must survive the fence + // strip. Only plain ``` or ```markdown/md/text qualifies as a + // wrapper; a ```python block is the content itself. + const response = "```python\nimport arcade\narcade.run()\n```"; + const client = fakeClient(response); + const editor = new LlmSecretEditGenerator({ client, model: "test" }); + const out = await editor.cleanupStaleReferences({ + kind: "documentation_chunk", + content: "mentions OLD", + removedSecrets: ["OLD"], + currentSecrets: ["KEEP"], + toolkitLabel: "GitHub", + }); + expect(out).toBe(response); + }); + + it("does not strip a bash fence either", async () => { + const response = "```bash\narcade deploy --env prod\n```"; + const client = fakeClient(response); + const editor = new LlmSecretEditGenerator({ client, model: "test" }); + const out = await editor.cleanupStaleReferences({ + kind: "documentation_chunk", + content: "mentions OLD", + removedSecrets: ["OLD"], + currentSecrets: ["KEEP"], + toolkitLabel: "GitHub", + }); + expect(out).toBe(response); + }); + it("passes the removed and current secrets into the prompt", async () => { const client = fakeClient("ok"); const editor = new LlmSecretEditGenerator({ client, model: "test" }); diff --git a/toolkit-docs-generator/tests/merger/data-merger.test.ts b/toolkit-docs-generator/tests/merger/data-merger.test.ts index 675cb0da1..7a1bacfc2 100644 --- a/toolkit-docs-generator/tests/merger/data-merger.test.ts +++ b/toolkit-docs-generator/tests/merger/data-merger.test.ts @@ -1404,6 +1404,71 @@ describe("DataMerger", () => { ).toBe(true); }); + it("skipSecretCoherence suppresses both the edit step and the scan warnings", async () => { + // --skip-secret-coherence is documented to disable the entire + // step. That means no LLM edits AND no coherence warnings in the + // run log. A stale secret reference in a chunk must pass through + // without any signal to the run log. + const toolWithSecret = createTool({ + name: "CreateIssue", + qualifiedName: "Github.CreateIssue", + fullyQualifiedName: "Github.CreateIssue@1.0.0", + auth: { providerId: "github", providerType: "oauth2", scopes: [] }, + secrets: ["GITHUB_SERVER_URL"], + }); + const toolkitDataSource = createCombinedToolkitDataSource({ + toolSource: new InMemoryToolDataSource([toolWithSecret]), + metadataSource: new InMemoryMetadataSource([githubMetadata]), + }); + const previous = await mergeToolkit( + "Github", + [ + createTool({ + ...toolWithSecret, + secrets: ["GITHUB_SERVER_URL", "OLD_SECRET"], + }), + ], + githubMetadata, + null, + createStubGenerator() + ); + previous.toolkit.documentationChunks = [ + { + type: "markdown", + location: "header", + position: "after", + content: "Still references OLD_SECRET for legacy flows.", + }, + ]; + + const cleanupSpy = vi.fn(async () => "unreached"); + const coverageSpy = vi.fn( + async (input: { content: string }) => input.content + ); + const merger = new DataMerger({ + toolkitDataSource, + customSectionsSource: new EmptyCustomSectionsSource(), + toolExampleGenerator: createStubGenerator(), + toolkitSummaryGenerator: createStubSummaryGenerator("Stub"), + secretEditGenerator: { + cleanupStaleReferences: cleanupSpy, + fillCoverageGaps: coverageSpy, + }, + skipSecretCoherence: true, + previousToolkits: new Map([["github", previous.toolkit]]), + }); + + const result = await merger.mergeToolkit("Github"); + + expect(cleanupSpy).not.toHaveBeenCalled(); + expect(coverageSpy).not.toHaveBeenCalled(); + expect( + result.warnings.some((warning) => + warning.includes("Stale secret reference") + ) + ).toBe(false); + }); + it("reuses previous examples when the tool is unchanged", async () => { const toolkitDataSource = createCombinedToolkitDataSource({ toolSource: new InMemoryToolDataSource([githubTool1]), From 8c0d5aeec71b97ebdab1929b05a58168b992c863 Mon Sep 17 00:00:00 2001 From: jottakka Date: Sat, 18 Apr 2026 16:16:39 -0300 Subject: [PATCH 07/11] feat(workflow): Node 24 opt-in + focused workflow_dispatch for manual runs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two workflow additions driven by PR #936 feedback: 1. Job-level `FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"` opts all JavaScript actions into Node 24 ahead of the 2026-06-02 deprecation. actions/checkout@v4, actions/setup-node@v4, peter-evans/create-pull-request@v7, and pnpm/action-setup@v4 all trigger the "Node.js 20 actions are deprecated" annotation today; the opt-in silences it and matches the runtime we'll be forced onto anyway. 2. New `workflow_dispatch` input `providers`. When set to a comma-separated provider list (e.g. "Github"), the run uses `--providers "$providers"` AND drops `--skip-unchanged` so the secret-coherence scan actually re-evaluates those toolkits — even when the Engine API reports no version change. Scheduled and porter_deploy_succeeded runs keep the previous `--all --skip-unchanged` behavior. This is what lets the #935 demo PR actually exercise the pipeline end-to-end: trigger the workflow with `providers=Github` and the phantom secret gets surfaced + cleaned. Tests added: workflow assertions for the new env var and the providers input fallback structure. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/generate-toolkit-docs.yml | 30 +++++++++++++++++-- .../workflows/generate-toolkit-docs.test.ts | 15 ++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/.github/workflows/generate-toolkit-docs.yml b/.github/workflows/generate-toolkit-docs.yml index 836660104..bf21b8ec0 100644 --- a/.github/workflows/generate-toolkit-docs.yml +++ b/.github/workflows/generate-toolkit-docs.yml @@ -10,6 +10,16 @@ on: repository_dispatch: types: [porter_deploy_succeeded] workflow_dispatch: + inputs: + providers: + description: >- + Comma-separated list of provider IDs (e.g. "Github,Slack"). When set, + the workflow runs only these providers and bypasses --skip-unchanged, + forcing the full merge (including the secret-coherence scan and LLM + edits) to re-evaluate even when the Engine API reports no version + change. Leave empty to run the full scheduled flow. + required: false + default: "" # 11:00 UTC = 3 AM PST / 4 AM PDT — late enough that DST drift doesn't matter. schedule: - cron: "0 11 * * *" @@ -21,6 +31,10 @@ permissions: jobs: generate: runs-on: ubuntu-latest + # Opt in to Node 24 for JavaScript actions before GitHub forces the + # switch on 2026-06-02. Harmless today; unblocks the cutover. + env: + FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true" steps: - name: Checkout repository @@ -46,9 +60,21 @@ jobs: - name: Generate toolkit docs run: | + # Manual focused runs: workflow_dispatch with `providers` input + # bypasses --skip-unchanged and --all, so the secret-coherence + # scan actually re-evaluates the chosen toolkits even when the + # Engine API reports no version change. Scheduled and porter + # runs keep the default --all --skip-unchanged behavior. + PROVIDERS_INPUT="${{ inputs.providers }}" + if [ -n "$PROVIDERS_INPUT" ]; then + SCOPE_ARGS=(--providers "$PROVIDERS_INPUT") + echo "Focused run for: $PROVIDERS_INPUT (--skip-unchanged disabled)" + else + SCOPE_ARGS=(--all --skip-unchanged) + fi + pnpm dlx tsx src/cli/index.ts generate \ - --all \ - --skip-unchanged \ + "${SCOPE_ARGS[@]}" \ --require-complete \ --verbose \ --api-source tool-metadata \ diff --git a/toolkit-docs-generator/tests/workflows/generate-toolkit-docs.test.ts b/toolkit-docs-generator/tests/workflows/generate-toolkit-docs.test.ts index 76093cf2e..32675ae9e 100644 --- a/toolkit-docs-generator/tests/workflows/generate-toolkit-docs.test.ts +++ b/toolkit-docs-generator/tests/workflows/generate-toolkit-docs.test.ts @@ -44,3 +44,18 @@ test("porter workflow wires the secret-coherence editor", () => { expect(workflowContents).toContain("ANTHROPIC_API_KEY"); expect(workflowContents).toContain("claude-sonnet-4-6"); }); + +test("porter workflow opts JS actions into Node 24 to unblock the 2026-06-02 deprecation", () => { + expect(workflowContents).toContain( + 'FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"' + ); +}); + +test("workflow_dispatch accepts a providers input for focused manual runs", () => { + expect(workflowContents).toContain("providers:"); + expect(workflowContents).toContain("inputs.providers"); + // A non-empty providers input must bypass --skip-unchanged so the + // secret-coherence scan actually re-evaluates the chosen toolkits. + expect(workflowContents).toContain("PROVIDERS_INPUT="); + expect(workflowContents).toContain("--all --skip-unchanged"); +}); From e6381edb21873626a9a5121abf7e61fffd5c71cd Mon Sep 17 00:00:00 2001 From: jottakka Date: Sat, 18 Apr 2026 16:52:43 -0300 Subject: [PATCH 08/11] feat(cli): surface per-toolkit merge warnings to stdout in --all runs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The stale-secret scanner, coverage-gap detector, and summary-generation failures all push warnings onto `result.warnings`. Per-provider mode already echoes those to stdout (line 848 of cli/index.ts). The --all and regenerate-all paths did not — they only appended to the run log file on disk, which GitHub Actions runs don't expose. Result: on the #935 demo, the workflow ran, the phantom secret was removed from the tool's .secrets array, but no cleanup was applied to the stale doc chunk that still referenced it AND there was no signal in the CI log explaining why. The warnings that would have explained "stale secret detected but edit failed" or "stale secret detected but no editor configured" were present in memory but discarded. This commit prints every non-empty `mergeResult.warnings` to stdout right after `mergeAllToolkits()` returns, in both the `generate --all` and `regenerate --all` paths. Format matches existing spinner output: ⚠ Github: 2 warning(s) - Stale secret reference in toolkit_chunk #4: GITHUB_CLASSIC_... - Secret cleanup edit failed for Github (documentation_chunk): ... 551 tests pass, type-check clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- toolkit-docs-generator/src/cli/index.ts | 32 +++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/toolkit-docs-generator/src/cli/index.ts b/toolkit-docs-generator/src/cli/index.ts index 8b0a430ce..1137c3703 100644 --- a/toolkit-docs-generator/src/cli/index.ts +++ b/toolkit-docs-generator/src/cli/index.ts @@ -1676,6 +1676,22 @@ program spinner.succeed( `Processed ${summary.completed} toolkit(s) with ${summary.totalTools} tools in ${summary.elapsed}` ); + + // Surface per-toolkit warnings to stdout so CI logs show what + // the merger saw. Without this, stale-secret / coverage / + // summary-generation warnings only land in the run log file + // on disk — which isn't visible in GitHub Actions output. + for (const mergeResult of allResults) { + if (mergeResult.warnings.length === 0) continue; + console.log( + chalk.yellow( + `⚠ ${mergeResult.toolkit.id}: ${mergeResult.warnings.length} warning(s)` + ) + ); + for (const warning of mergeResult.warnings) { + console.log(chalk.dim(` - ${warning}`)); + } + } } } else { const { providersToProcess } = filterProvidersBySkipIds( @@ -2401,6 +2417,22 @@ program `Processed ${summary.completed} toolkit(s) with ${summary.totalTools} tools in ${summary.elapsed}` ); + // Surface per-toolkit warnings to stdout (stale-secret scan, + // coverage gaps, summary-gen failures) so CI logs show what + // the merger saw — otherwise they only land in the run log + // file on disk. + for (const mergeResult of results) { + if (mergeResult.warnings.length === 0) continue; + console.log( + chalk.yellow( + `⚠ ${mergeResult.toolkit.id}: ${mergeResult.warnings.length} warning(s)` + ) + ); + for (const warning of mergeResult.warnings) { + console.log(chalk.dim(` - ${warning}`)); + } + } + // Generate output (batch mode if not incremental) if (!useIncremental && results.length > 0) { spinner.start("Writing output files..."); From 43117f64f346dee3bb1e8ed347bcb4aafa025569 Mon Sep 17 00:00:00 2001 From: jottakka Date: Mon, 20 Apr 2026 22:38:02 -0300 Subject: [PATCH 09/11] fix(workflow): remove focused providers dispatch input Simplify manual toolkit docs runs by removing the workflow_dispatch providers override and restoring the default full run path with --all --skip-unchanged. Made-with: Cursor --- .github/workflows/generate-toolkit-docs.yml | 26 ++----------------- .../workflows/generate-toolkit-docs.test.ts | 14 +++++----- 2 files changed, 9 insertions(+), 31 deletions(-) diff --git a/.github/workflows/generate-toolkit-docs.yml b/.github/workflows/generate-toolkit-docs.yml index bf21b8ec0..c501cbb21 100644 --- a/.github/workflows/generate-toolkit-docs.yml +++ b/.github/workflows/generate-toolkit-docs.yml @@ -10,16 +10,6 @@ on: repository_dispatch: types: [porter_deploy_succeeded] workflow_dispatch: - inputs: - providers: - description: >- - Comma-separated list of provider IDs (e.g. "Github,Slack"). When set, - the workflow runs only these providers and bypasses --skip-unchanged, - forcing the full merge (including the secret-coherence scan and LLM - edits) to re-evaluate even when the Engine API reports no version - change. Leave empty to run the full scheduled flow. - required: false - default: "" # 11:00 UTC = 3 AM PST / 4 AM PDT — late enough that DST drift doesn't matter. schedule: - cron: "0 11 * * *" @@ -60,21 +50,9 @@ jobs: - name: Generate toolkit docs run: | - # Manual focused runs: workflow_dispatch with `providers` input - # bypasses --skip-unchanged and --all, so the secret-coherence - # scan actually re-evaluates the chosen toolkits even when the - # Engine API reports no version change. Scheduled and porter - # runs keep the default --all --skip-unchanged behavior. - PROVIDERS_INPUT="${{ inputs.providers }}" - if [ -n "$PROVIDERS_INPUT" ]; then - SCOPE_ARGS=(--providers "$PROVIDERS_INPUT") - echo "Focused run for: $PROVIDERS_INPUT (--skip-unchanged disabled)" - else - SCOPE_ARGS=(--all --skip-unchanged) - fi - pnpm dlx tsx src/cli/index.ts generate \ - "${SCOPE_ARGS[@]}" \ + --all \ + --skip-unchanged \ --require-complete \ --verbose \ --api-source tool-metadata \ diff --git a/toolkit-docs-generator/tests/workflows/generate-toolkit-docs.test.ts b/toolkit-docs-generator/tests/workflows/generate-toolkit-docs.test.ts index 32675ae9e..f9ff3407d 100644 --- a/toolkit-docs-generator/tests/workflows/generate-toolkit-docs.test.ts +++ b/toolkit-docs-generator/tests/workflows/generate-toolkit-docs.test.ts @@ -51,11 +51,11 @@ test("porter workflow opts JS actions into Node 24 to unblock the 2026-06-02 dep ); }); -test("workflow_dispatch accepts a providers input for focused manual runs", () => { - expect(workflowContents).toContain("providers:"); - expect(workflowContents).toContain("inputs.providers"); - // A non-empty providers input must bypass --skip-unchanged so the - // secret-coherence scan actually re-evaluates the chosen toolkits. - expect(workflowContents).toContain("PROVIDERS_INPUT="); - expect(workflowContents).toContain("--all --skip-unchanged"); +test("workflow dispatch keeps default full-run behavior", () => { + expect(workflowContents).toContain("workflow_dispatch:"); + expect(workflowContents).toContain("--all"); + expect(workflowContents).toContain("--skip-unchanged"); + expect(workflowContents).not.toContain("providers:"); + expect(workflowContents).not.toContain("inputs.providers"); + expect(workflowContents).not.toContain("PROVIDERS_INPUT="); }); From ad14bd8aea4e12dafa17e90c68e6d6a047c90ede Mon Sep 17 00:00:00 2001 From: jottakka Date: Tue, 21 Apr 2026 12:52:34 -0300 Subject: [PATCH 10/11] fix(toolkit-docs-generator): address cursor review on secret coherence Reuse shared secret collection logic across merger modules and restore stale-summary coverage in data-merger tests. Made-with: Cursor --- .../src/merger/data-merger.ts | 18 +- .../src/merger/secret-coherence.ts | 2 +- .../tests/merger/data-merger.test.ts | 215 ++++++++++++++++++ 3 files changed, 219 insertions(+), 16 deletions(-) diff --git a/toolkit-docs-generator/src/merger/data-merger.ts b/toolkit-docs-generator/src/merger/data-merger.ts index cefc08dde..6696dfd89 100644 --- a/toolkit-docs-generator/src/merger/data-merger.ts +++ b/toolkit-docs-generator/src/merger/data-merger.ts @@ -30,6 +30,7 @@ import { formatFreshnessWarnings, } from "./metadata-freshness.js"; import { + collectToolkitSecrets, detectSecretCoherenceIssues, groupStaleRefsByTarget, hasCoherenceIssues, @@ -487,19 +488,6 @@ const getToolDocumentationChunks = ( return fromPrevious; }; -const collectCurrentSecretNames = (toolkit: MergedToolkit): Set => { - const names = new Set(); - for (const tool of toolkit.tools) { - for (const name of tool.secrets) { - names.add(name); - } - for (const info of tool.secretsInfo ?? []) { - names.add(info.name); - } - } - return names; -}; - const describeLocation = ( location: | { kind: "summary" } @@ -1214,7 +1202,7 @@ export class DataMerger { if (targets.length === 0) { return; } - const currentSecrets = Array.from(collectCurrentSecretNames(result.toolkit)) + const currentSecrets = Array.from(collectToolkitSecrets(result.toolkit)) .sort() .map((name) => name); for (const target of targets) { @@ -1257,7 +1245,7 @@ export class DataMerger { if (missing.length === 0 && !needsLink) { return; } - const currentSecrets = Array.from(collectCurrentSecretNames(result.toolkit)) + const currentSecrets = Array.from(collectToolkitSecrets(result.toolkit)) .sort() .map((name) => name); try { diff --git a/toolkit-docs-generator/src/merger/secret-coherence.ts b/toolkit-docs-generator/src/merger/secret-coherence.ts index 5b991a799..5ddeb83c0 100644 --- a/toolkit-docs-generator/src/merger/secret-coherence.ts +++ b/toolkit-docs-generator/src/merger/secret-coherence.ts @@ -58,7 +58,7 @@ export type SecretCoherenceIssues = { coverageGaps: SecretCoverageGap[]; }; -const collectToolkitSecrets = (toolkit: MergedToolkit): Set => { +export const collectToolkitSecrets = (toolkit: MergedToolkit): Set => { const names = new Set(); for (const tool of toolkit.tools) { for (const secret of tool.secrets) { diff --git a/toolkit-docs-generator/tests/merger/data-merger.test.ts b/toolkit-docs-generator/tests/merger/data-merger.test.ts index 7a1bacfc2..70b0afe0c 100644 --- a/toolkit-docs-generator/tests/merger/data-merger.test.ts +++ b/toolkit-docs-generator/tests/merger/data-merger.test.ts @@ -1186,6 +1186,221 @@ describe("DataMerger", () => { expect(result.toolkit.summary).toBeUndefined(); }); + it("preserves previous summary when no LLM generator is available and the signature changed", async () => { + const toolkitDataSource = createCombinedToolkitDataSource({ + toolSource: new InMemoryToolDataSource([githubTool1, githubTool2]), + metadataSource: new InMemoryMetadataSource([githubMetadata]), + }); + const previousResult = await mergeToolkit( + "Github", + [githubTool1], + githubMetadata, + null, + createStubGenerator() + ); + previousResult.toolkit.summary = "Hand-authored summary"; + + const merger = new DataMerger({ + toolkitDataSource, + customSectionsSource: new EmptyCustomSectionsSource(), + toolExampleGenerator: createStubGenerator(), + previousToolkits: new Map([["github", previousResult.toolkit]]), + }); + + const result = await merger.mergeToolkit("Github"); + + expect(result.toolkit.tools).toHaveLength(2); + expect(result.toolkit.summary).toBe("Hand-authored summary"); + expect(result.toolkit.summaryStale).toBe(true); + expect(result.toolkit.summaryStaleReason).toBe( + "llm_generator_unavailable" + ); + expect( + result.warnings.some((warning) => + warning.includes("Summary is stale for Github") + ) + ).toBe(true); + }); + + it("preserves previous summary when the LLM generator throws", async () => { + const toolkitDataSource = createCombinedToolkitDataSource({ + toolSource: new InMemoryToolDataSource([githubTool1, githubTool2]), + metadataSource: new InMemoryMetadataSource([githubMetadata]), + }); + const previousResult = await mergeToolkit( + "Github", + [githubTool1], + githubMetadata, + null, + createStubGenerator() + ); + previousResult.toolkit.summary = "Hand-authored summary"; + + const failingSummary: ToolkitSummaryGenerator = { + generate: async () => { + throw new Error("rate limited"); + }, + }; + + const merger = new DataMerger({ + toolkitDataSource, + customSectionsSource: new EmptyCustomSectionsSource(), + toolExampleGenerator: createStubGenerator(), + toolkitSummaryGenerator: failingSummary, + previousToolkits: new Map([["github", previousResult.toolkit]]), + }); + + const result = await merger.mergeToolkit("Github"); + + expect(result.toolkit.summary).toBe("Hand-authored summary"); + expect( + result.warnings.some((warning) => + warning.includes("Summary generation failed for Github") + ) + ).toBe(true); + expect(result.toolkit.summaryStale).toBe(true); + expect(result.toolkit.summaryStaleReason).toBe("llm_generation_failed"); + }); + + it("clears the stale flag when the generator succeeds on the next run", async () => { + // A toolkit whose summary was stale on a prior run should come back + // clean once the generator actually produces a new summary. This + // proves the CI gate will stop flagging the toolkit once fixed. + const toolkitDataSource = createCombinedToolkitDataSource({ + toolSource: new InMemoryToolDataSource([githubTool1, githubTool2]), + metadataSource: new InMemoryMetadataSource([githubMetadata]), + }); + const previousResult = await mergeToolkit( + "Github", + [githubTool1], + githubMetadata, + null, + createStubGenerator() + ); + previousResult.toolkit.summary = "Older summary"; + previousResult.toolkit.summaryStale = true; + previousResult.toolkit.summaryStaleReason = "llm_generation_failed"; + + const merger = new DataMerger({ + toolkitDataSource, + customSectionsSource: new EmptyCustomSectionsSource(), + toolExampleGenerator: createStubGenerator(), + toolkitSummaryGenerator: createStubSummaryGenerator("Fresh summary"), + previousToolkits: new Map([["github", previousResult.toolkit]]), + }); + + const result = await merger.mergeToolkit("Github"); + + expect(result.toolkit.summary).toBe("Fresh summary (Github)"); + expect(result.toolkit.summaryStale).toBeUndefined(); + expect(result.toolkit.summaryStaleReason).toBeUndefined(); + }); + + it("does not flag stale when the signature matches a fresh previous summary", async () => { + // Baseline: if previous.summaryStale is falsy, signature match is a + // valid proof of freshness and the reuse path should keep the + // summary and stay clean. + const toolkitDataSource = createCombinedToolkitDataSource({ + toolSource: new InMemoryToolDataSource([githubTool1]), + metadataSource: new InMemoryMetadataSource([githubMetadata]), + }); + const previousResult = await mergeToolkit( + "Github", + [githubTool1], + githubMetadata, + null, + createStubGenerator() + ); + previousResult.toolkit.summary = "Cached summary"; + + const countingSummary = createCountingSummaryGenerator(); + const merger = new DataMerger({ + toolkitDataSource, + customSectionsSource: new EmptyCustomSectionsSource(), + toolExampleGenerator: createStubGenerator(), + toolkitSummaryGenerator: countingSummary.generator, + previousToolkits: new Map([["github", previousResult.toolkit]]), + }); + + const result = await merger.mergeToolkit("Github"); + + expect(result.toolkit.summary).toBe("Cached summary"); + expect(result.toolkit.summaryStale).toBeUndefined(); + expect(result.toolkit.summaryStaleReason).toBeUndefined(); + expect(countingSummary.getCalls()).toBe(0); + }); + + it("regenerates when signature matches but the previous summary was already stale", async () => { + // If the previous summary was already flagged stale, a matching + // signature does NOT prove freshness — the stale summary was + // carried forward from an earlier toolset. The reuse fast path + // must skip, and the LLM must actually regenerate. + const toolkitDataSource = createCombinedToolkitDataSource({ + toolSource: new InMemoryToolDataSource([githubTool1]), + metadataSource: new InMemoryMetadataSource([githubMetadata]), + }); + const previousResult = await mergeToolkit( + "Github", + [githubTool1], + githubMetadata, + null, + createStubGenerator() + ); + previousResult.toolkit.summary = "Stale carried-forward summary"; + previousResult.toolkit.summaryStale = true; + previousResult.toolkit.summaryStaleReason = "llm_generation_failed"; + + const merger = new DataMerger({ + toolkitDataSource, + customSectionsSource: new EmptyCustomSectionsSource(), + toolExampleGenerator: createStubGenerator(), + toolkitSummaryGenerator: createStubSummaryGenerator("Fresh"), + previousToolkits: new Map([["github", previousResult.toolkit]]), + }); + + const result = await merger.mergeToolkit("Github"); + + expect(result.toolkit.summary).toBe("Fresh (Github)"); + expect(result.toolkit.summaryStale).toBeUndefined(); + expect(result.toolkit.summaryStaleReason).toBeUndefined(); + }); + + it("keeps the stale flag when previous was stale, signature matches, and no generator is available", async () => { + // Same as above, but the regen attempt is not possible. The carried- + // forward summary remains, and the stale flag must persist so CI + // keeps flagging the toolkit until a working LLM run produces a + // fresh one. + const toolkitDataSource = createCombinedToolkitDataSource({ + toolSource: new InMemoryToolDataSource([githubTool1]), + metadataSource: new InMemoryMetadataSource([githubMetadata]), + }); + const previousResult = await mergeToolkit( + "Github", + [githubTool1], + githubMetadata, + null, + createStubGenerator() + ); + previousResult.toolkit.summary = "Stale carried-forward summary"; + previousResult.toolkit.summaryStale = true; + previousResult.toolkit.summaryStaleReason = "llm_generation_failed"; + + const merger = new DataMerger({ + toolkitDataSource, + customSectionsSource: new EmptyCustomSectionsSource(), + toolExampleGenerator: createStubGenerator(), + previousToolkits: new Map([["github", previousResult.toolkit]]), + }); + + const result = await merger.mergeToolkit("Github"); + + expect(result.toolkit.summary).toBe("Stale carried-forward summary"); + expect(result.toolkit.summaryStale).toBe(true); + expect(result.toolkit.summaryStaleReason).toBe( + "llm_generator_unavailable" + ); + }); + it("runs the secret-coherence editor when a removed secret still appears in a toolkit documentation chunk", async () => { const toolWithSecret = createTool({ name: "CreateIssue", From 4e01ccc7932490f4eb67cea3fa4f31ae055892e0 Mon Sep 17 00:00:00 2001 From: jottakka Date: Tue, 21 Apr 2026 14:40:03 -0300 Subject: [PATCH 11/11] fix(cli): use editor flag in provider validation message Pass the editor-specific option name to provider validation so invalid --llm-editor-provider values return actionable guidance. Made-with: Cursor --- toolkit-docs-generator/src/cli/index.ts | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/toolkit-docs-generator/src/cli/index.ts b/toolkit-docs-generator/src/cli/index.ts index 1137c3703..bb05939f6 100644 --- a/toolkit-docs-generator/src/cli/index.ts +++ b/toolkit-docs-generator/src/cli/index.ts @@ -276,12 +276,15 @@ const clearOutputDir = async ( } }; -const resolveLlmProvider = (value?: string): LlmProvider => { +const resolveLlmProvider = ( + value?: string, + optionName = "--llm-provider" +): LlmProvider => { if (value === "openai" || value === "anthropic") { return value; } throw new Error( - 'LLM provider is required. Use --llm-provider "openai" or "anthropic".' + `LLM provider is required. Use ${optionName} "openai" or "anthropic".` ); }; @@ -431,7 +434,7 @@ const resolveSecretEditGenerator = ( return; } - const provider = resolveLlmProvider(providerRaw); + const provider = resolveLlmProvider(providerRaw, "--llm-editor-provider"); const apiKey = resolveEditorApiKey(provider, options.llmEditorApiKey); if (!apiKey) { // Fail open: unconfigured editor degrades to scanner-only warnings