diff --git a/packages/frameworks/src/detector.test.ts b/packages/frameworks/src/detector.test.ts index bbbebcce..efb9ca96 100644 --- a/packages/frameworks/src/detector.test.ts +++ b/packages/frameworks/src/detector.test.ts @@ -29,11 +29,13 @@ function mkInput( files: readonly string[], manifests: ReadonlyArray, detectedLanguages: readonly string[], + configText?: ReadonlyArray, ): FrameworkDetectorInput { return { relPaths: new Set(files), manifestText: new Map(manifests), detectedLanguages, + ...(configText !== undefined ? { configText: new Map(configText) } : {}), }; } @@ -799,3 +801,70 @@ describe("framework detection — version resolves from either dependency bucket assert.equal(vite?.version, "5.4.0", "version read from devDependencies bucket"); }); }); + +// --------------------------------------------------------------------------- +// Stage 3 — config-AST evidence (wired via configText) +// --------------------------------------------------------------------------- + +describe("stage 3 — config-AST evidence", () => { + it("merges next.config router evidence into the nextjs detection", () => { + const input = mkInput( + ["package.json", "next.config.js", "app/page.tsx"], + [["package.json", JSON.stringify({ dependencies: { next: "14.2.0" } })]], + ["typescript"], + [["next.config.js", "module.exports = { experimental: { appDir: true } };\n"]], + ); + const out = detectFrameworksStructured(input); + const next = findByName(out, "nextjs"); + assert.ok(next, "nextjs detected"); + const stage3 = next?.evidence.filter((e) => e.stage === 3) ?? []; + assert.ok(stage3.length > 0, "expected stage-3 config-AST evidence on the nextjs detection"); + assert.ok( + stage3.some((e) => e.source === "next.config.js"), + "stage-3 evidence should cite next.config.js", + ); + }); + + it("does NOT create a detection from config text alone (corroborates only)", () => { + // spring.factories is a stage-3 config signal but NOT a catalog file/layout + // marker (spring-boot keys on pom.xml), so config text with no pom.xml and + // no Java layout must not conjure a spring-boot detection. + const input = mkInput( + ["META-INF/spring.factories"], + [], + ["java"], + [ + [ + "META-INF/spring.factories", + "org.springframework.boot.autoconfigure.EnableAutoConfiguration=com.example.MyAutoConfig\n", + ], + ], + ); + const out = detectFrameworksStructured(input); + assert.equal( + findByName(out, "spring-boot"), + undefined, + "config text alone must not detect spring-boot", + ); + }); + + it("is a no-op when configText is omitted (legacy callers unchanged)", () => { + const withCfg = mkInput( + ["package.json", "next.config.js", "app/page.tsx"], + [["package.json", JSON.stringify({ dependencies: { next: "14.2.0" } })]], + ["typescript"], + [["next.config.js", "module.exports = {};\n"]], + ); + const withoutCfg = mkInput( + ["package.json", "next.config.js", "app/page.tsx"], + [["package.json", JSON.stringify({ dependencies: { next: "14.2.0" } })]], + ["typescript"], + ); + const a = findByName(detectFrameworksStructured(withoutCfg), "nextjs"); + const b = findByName(detectFrameworksStructured(withCfg), "nextjs"); + assert.ok(a && b, "nextjs detected both ways"); + // Without configText: no stage-3 evidence. With it: stage-3 present. + assert.equal((a?.evidence.filter((e) => e.stage === 3) ?? []).length, 0); + assert.ok((b?.evidence.filter((e) => e.stage === 3) ?? []).length > 0); + }); +}); diff --git a/packages/frameworks/src/detector.ts b/packages/frameworks/src/detector.ts index 46ce21fd..2210bd59 100644 --- a/packages/frameworks/src/detector.ts +++ b/packages/frameworks/src/detector.ts @@ -31,6 +31,7 @@ import { type FrameworkRule, type ManifestKey, } from "./catalog.js"; +import { type ConfigAstFinding, inspectConfigAst } from "./stages/config-ast.js"; import { VARIANT_RESOLVERS, type VariantResolveInput, @@ -56,6 +57,15 @@ export interface FrameworkDetectorInput { * substitutes the lockfile's pinned version. Absent for legacy callers. */ readonly lockfileVersions?: ReadonlyMap; + /** + * Stage 3 — raw text of framework config files (`next.config.*`, + * `astro.config.*`, `vite.config.*`, `META-INF/spring.factories`), keyed by + * relPath. When present, `inspectConfigAst` runs and its findings are merged + * as stage-3 evidence into the matching framework's detection (corroborating + * a manifest/layout hit; it never creates a detection on its own). Absent for + * legacy callers — stage 3 simply contributes no evidence. + */ + readonly configText?: ReadonlyMap; } /** Mapping language → ecosystem. Covers the tree-sitter languages OpenCodeHub indexes. */ @@ -85,11 +95,16 @@ export function detectFrameworksStructured( manifestJson, manifestText: input.manifestText, }; + // Stage 3 — config-AST findings, grouped by the framework name they + // implicate. Computed once; merged into a detection's evidence when that + // framework already hit on a manifest/layout signal (stage 3 corroborates, + // never creates). + const configFindingsByFramework = groupConfigFindings(input.configText, input.relPaths); const out: FrameworkDetection[] = []; for (const rule of FRAMEWORK_CATALOG) { if (rule.ecosystem !== "any" && !activeEcosystems.has(rule.ecosystem)) continue; - const hit = evaluateRule(rule, input, manifestJson); + const hit = evaluateRule(rule, input, manifestJson, configFindingsByFramework.get(rule.name)); if (hit === null) continue; const detection = buildDetection( rule, @@ -104,6 +119,26 @@ export function detectFrameworksStructured( return out; } +/** + * Run stage 3 (config-AST) once and group its findings by the framework name + * they implicate, so `evaluateRule` can look up a rule's corroborating + * findings by `rule.name`. Returns an empty map when no config text was + * supplied (legacy callers) — stage 3 then contributes nothing. + */ +function groupConfigFindings( + configText: ReadonlyMap | undefined, + relPaths: ReadonlySet, +): ReadonlyMap { + const grouped = new Map(); + if (configText === undefined || configText.size === 0) return grouped; + for (const finding of inspectConfigAst(configText, relPaths)) { + const list = grouped.get(finding.framework) ?? []; + list.push(finding); + grouped.set(finding.framework, list); + } + return grouped; +} + // --------------------------------------------------------------------------- // Evaluation helpers // --------------------------------------------------------------------------- @@ -128,6 +163,7 @@ function evaluateRule( rule: FrameworkRule, input: FrameworkDetectorInput, manifestJson: ReadonlyMap, + configFindings: readonly ConfigAstFinding[] | undefined, ): RuleHit | null { const evidenceSeen = new Map(); let hasManifestHit = false; @@ -173,6 +209,16 @@ function evaluateRule( } } + // Stage 3 — config-AST corroboration. Only merged when a manifest/layout + // signal already fired: config text alone never creates a detection (a repo + // can carry a vendored config without using the framework). Stage-3 evidence + // sharpens an existing hit (e.g. Next.js App vs Pages router). + if ((hasManifestHit || hasFileHit) && configFindings !== undefined) { + for (const f of configFindings) { + push({ stage: 3, source: f.source, detail: f.detail }); + } + } + if (!hasManifestHit && !hasFileHit) return null; const sorted = [...evidenceSeen.values()].sort((a, b) => { if (a.stage !== b.stage) return a.stage - b.stage; diff --git a/packages/frameworks/src/frameworks.ts b/packages/frameworks/src/frameworks.ts index 934c465f..696cffea 100644 --- a/packages/frameworks/src/frameworks.ts +++ b/packages/frameworks/src/frameworks.ts @@ -17,6 +17,7 @@ import { promises as fs } from "node:fs"; import path from "node:path"; import { detectFrameworksStructured } from "./detector.js"; +import { CONFIG_AST_FILES } from "./stages/config-ast.js"; import { indexResolutions, KNOWN_LOCKFILES, parseLockfile } from "./stages/lockfile.js"; /** @@ -109,6 +110,29 @@ async function preReadLockfiles( return indexResolutions(all); } +/** + * Stage 3 — pre-read every framework config file present at the repo root + * (`next.config.*`, `astro.config.*`, `vite.config.*`, + * `META-INF/spring.factories`). Returns a relPath → text map; unreadable / + * missing files are simply absent (FRM-UN-002 log-and-continue). + */ +async function preReadConfigFiles( + repoRoot: string, + relPaths: ReadonlySet, +): Promise> { + const out = new Map(); + for (const name of CONFIG_AST_FILES) { + if (!relPaths.has(name)) continue; + try { + const text = await fs.readFile(path.join(repoRoot, name), "utf8"); + out.set(name, text); + } catch { + // Malformed / unreadable — skip. + } + } + return out; +} + const ALL_ECOSYSTEM_LANGUAGES: readonly string[] = [ "javascript", "typescript", @@ -128,14 +152,16 @@ const ALL_ECOSYSTEM_LANGUAGES: readonly string[] = [ */ export async function detectFrameworks(input: FrameworkDetectionInput): Promise { const relPaths = new Set(input.files.map((f) => f.relPath)); - const [manifestText, lockfileVersions] = await Promise.all([ + const [manifestText, lockfileVersions, configText] = await Promise.all([ preReadManifests(input.repoRoot, relPaths), preReadLockfiles(input.repoRoot, relPaths), + preReadConfigFiles(input.repoRoot, relPaths), ]); const detections = detectFrameworksStructured({ relPaths, manifestText, lockfileVersions, + configText, detectedLanguages: input.detectedLanguages ?? ALL_ECOSYSTEM_LANGUAGES, }); return detections.map((d) => d.name); @@ -151,14 +177,16 @@ export async function detectFrameworksDetailed( input: FrameworkDetectionInput, ): Promise> { const relPaths = new Set(input.files.map((f) => f.relPath)); - const [manifestText, lockfileVersions] = await Promise.all([ + const [manifestText, lockfileVersions, configText] = await Promise.all([ preReadManifests(input.repoRoot, relPaths), preReadLockfiles(input.repoRoot, relPaths), + preReadConfigFiles(input.repoRoot, relPaths), ]); return detectFrameworksStructured({ relPaths, manifestText, lockfileVersions, + configText, detectedLanguages: input.detectedLanguages ?? ALL_ECOSYSTEM_LANGUAGES, }); } diff --git a/packages/frameworks/src/index.ts b/packages/frameworks/src/index.ts index 44aa829f..f0d021ab 100644 --- a/packages/frameworks/src/index.ts +++ b/packages/frameworks/src/index.ts @@ -1,24 +1,27 @@ /** * `@opencodehub/frameworks` — framework detection over a curated catalog. * - * The dispatcher (`detector.ts`) merges three stages into each + * The dispatcher (`detector.ts`) merges four stages into each * `FrameworkDetection` (`{name, version?, confidence, evidence[]}`): * 1. Manifest presence + declared deps (`package.json`, `pyproject.toml`, * `pom.xml`, …) * 2. Lockfile exact versions, overriding manifest semver ranges * (`package-lock.json`, `pnpm-lock.yaml`, `Gemfile.lock`, * `poetry.lock`, `uv.lock`, `Cargo.lock`) + * 3. Config AST (`config-ast.ts`) — `next.config.*`, `astro.config.*`, + * `vite.config.*`, `spring.factories`. The wrapper pre-reads these and + * passes `configText`; the dispatcher merges the findings as stage-3 + * evidence into a framework that already hit on a manifest/layout signal + * (it corroborates, never creates a detection on its own). * 4. Folder / file-marker convention (`app/`, `pages/`, `vite.config.ts`, * `src/main/java/`, …) * - * Two further stages ship as standalone, independently tested modules but - * are not yet wired into the ingestion profile phase (their findings do not - * reach `FrameworkDetection.evidence` until a caller passes the extra - * inputs through): - * 3. Config AST (`config-ast.ts`) — `next.config.*`, `astro.config.*`, - * `vite.config.*`, `spring.factories`; needs the config-file text. - * 5. Import / SCIP (`imports.ts`) — consumes the graph's `IMPORTS` edges; - * needs the `KnowledgeGraph`. + * One stage ships as a standalone, independently tested module but is not yet + * wired into the ingestion profile phase: + * 5. Import / SCIP (`imports.ts`) — consumes the graph's `IMPORTS` edges, + * which the profile phase (deps: [scan]) runs before. Wiring it needs a + * phase-ordering change (run framework detection after `crossFile`); a + * caller that already holds the resolved graph can pass it through today. * * Every stage is pure-local file-system + string/regex inspection; no * network, no LLM, no subprocess.