Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 69 additions & 0 deletions packages/frameworks/src/detector.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,13 @@ function mkInput(
files: readonly string[],
manifests: ReadonlyArray<readonly [string, string]>,
detectedLanguages: readonly string[],
configText?: ReadonlyArray<readonly [string, string]>,
): FrameworkDetectorInput {
return {
relPaths: new Set(files),
manifestText: new Map(manifests),
detectedLanguages,
...(configText !== undefined ? { configText: new Map(configText) } : {}),
};
}

Expand Down Expand Up @@ -799,3 +801,70 @@ describe("framework detection — version resolves from either dependency bucket
assert.equal(vite?.version, "5.4.0", "version read from devDependencies bucket");
});
});

// ---------------------------------------------------------------------------
// Stage 3 — config-AST evidence (wired via configText)
// ---------------------------------------------------------------------------

describe("stage 3 — config-AST evidence", () => {
it("merges next.config router evidence into the nextjs detection", () => {
const input = mkInput(
["package.json", "next.config.js", "app/page.tsx"],
[["package.json", JSON.stringify({ dependencies: { next: "14.2.0" } })]],
["typescript"],
[["next.config.js", "module.exports = { experimental: { appDir: true } };\n"]],
);
const out = detectFrameworksStructured(input);
const next = findByName(out, "nextjs");
assert.ok(next, "nextjs detected");
const stage3 = next?.evidence.filter((e) => e.stage === 3) ?? [];
assert.ok(stage3.length > 0, "expected stage-3 config-AST evidence on the nextjs detection");
assert.ok(
stage3.some((e) => e.source === "next.config.js"),
"stage-3 evidence should cite next.config.js",
);
});

it("does NOT create a detection from config text alone (corroborates only)", () => {
// spring.factories is a stage-3 config signal but NOT a catalog file/layout
// marker (spring-boot keys on pom.xml), so config text with no pom.xml and
// no Java layout must not conjure a spring-boot detection.
const input = mkInput(
["META-INF/spring.factories"],
[],
["java"],
[
[
"META-INF/spring.factories",
"org.springframework.boot.autoconfigure.EnableAutoConfiguration=com.example.MyAutoConfig\n",
],
],
);
const out = detectFrameworksStructured(input);
assert.equal(
findByName(out, "spring-boot"),
undefined,
"config text alone must not detect spring-boot",
);
});

it("is a no-op when configText is omitted (legacy callers unchanged)", () => {
const withCfg = mkInput(
["package.json", "next.config.js", "app/page.tsx"],
[["package.json", JSON.stringify({ dependencies: { next: "14.2.0" } })]],
["typescript"],
[["next.config.js", "module.exports = {};\n"]],
);
const withoutCfg = mkInput(
["package.json", "next.config.js", "app/page.tsx"],
[["package.json", JSON.stringify({ dependencies: { next: "14.2.0" } })]],
["typescript"],
);
const a = findByName(detectFrameworksStructured(withoutCfg), "nextjs");
const b = findByName(detectFrameworksStructured(withCfg), "nextjs");
assert.ok(a && b, "nextjs detected both ways");
// Without configText: no stage-3 evidence. With it: stage-3 present.
assert.equal((a?.evidence.filter((e) => e.stage === 3) ?? []).length, 0);
assert.ok((b?.evidence.filter((e) => e.stage === 3) ?? []).length > 0);
});
});
48 changes: 47 additions & 1 deletion packages/frameworks/src/detector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import {
type FrameworkRule,
type ManifestKey,
} from "./catalog.js";
import { type ConfigAstFinding, inspectConfigAst } from "./stages/config-ast.js";
import {
VARIANT_RESOLVERS,
type VariantResolveInput,
Expand All @@ -56,6 +57,15 @@ export interface FrameworkDetectorInput {
* substitutes the lockfile's pinned version. Absent for legacy callers.
*/
readonly lockfileVersions?: ReadonlyMap<string, string>;
/**
* Stage 3 — raw text of framework config files (`next.config.*`,
* `astro.config.*`, `vite.config.*`, `META-INF/spring.factories`), keyed by
* relPath. When present, `inspectConfigAst` runs and its findings are merged
* as stage-3 evidence into the matching framework's detection (corroborating
* a manifest/layout hit; it never creates a detection on its own). Absent for
* legacy callers — stage 3 simply contributes no evidence.
*/
readonly configText?: ReadonlyMap<string, string>;
}

/** Mapping language → ecosystem. Covers the tree-sitter languages OpenCodeHub indexes. */
Expand Down Expand Up @@ -85,11 +95,16 @@ export function detectFrameworksStructured(
manifestJson,
manifestText: input.manifestText,
};
// Stage 3 — config-AST findings, grouped by the framework name they
// implicate. Computed once; merged into a detection's evidence when that
// framework already hit on a manifest/layout signal (stage 3 corroborates,
// never creates).
const configFindingsByFramework = groupConfigFindings(input.configText, input.relPaths);

const out: FrameworkDetection[] = [];
for (const rule of FRAMEWORK_CATALOG) {
if (rule.ecosystem !== "any" && !activeEcosystems.has(rule.ecosystem)) continue;
const hit = evaluateRule(rule, input, manifestJson);
const hit = evaluateRule(rule, input, manifestJson, configFindingsByFramework.get(rule.name));
if (hit === null) continue;
const detection = buildDetection(
rule,
Expand All @@ -104,6 +119,26 @@ export function detectFrameworksStructured(
return out;
}

/**
* Run stage 3 (config-AST) once and group its findings by the framework name
* they implicate, so `evaluateRule` can look up a rule's corroborating
* findings by `rule.name`. Returns an empty map when no config text was
* supplied (legacy callers) — stage 3 then contributes nothing.
*/
function groupConfigFindings(
configText: ReadonlyMap<string, string> | undefined,
relPaths: ReadonlySet<string>,
): ReadonlyMap<string, readonly ConfigAstFinding[]> {
const grouped = new Map<string, ConfigAstFinding[]>();
if (configText === undefined || configText.size === 0) return grouped;
for (const finding of inspectConfigAst(configText, relPaths)) {
const list = grouped.get(finding.framework) ?? [];
list.push(finding);
grouped.set(finding.framework, list);
}
return grouped;
}

// ---------------------------------------------------------------------------
// Evaluation helpers
// ---------------------------------------------------------------------------
Expand All @@ -128,6 +163,7 @@ function evaluateRule(
rule: FrameworkRule,
input: FrameworkDetectorInput,
manifestJson: ReadonlyMap<string, unknown>,
configFindings: readonly ConfigAstFinding[] | undefined,
): RuleHit | null {
const evidenceSeen = new Map<string, Evidence>();
let hasManifestHit = false;
Expand Down Expand Up @@ -173,6 +209,16 @@ function evaluateRule(
}
}

// Stage 3 — config-AST corroboration. Only merged when a manifest/layout
// signal already fired: config text alone never creates a detection (a repo
// can carry a vendored config without using the framework). Stage-3 evidence
// sharpens an existing hit (e.g. Next.js App vs Pages router).
if ((hasManifestHit || hasFileHit) && configFindings !== undefined) {
for (const f of configFindings) {
push({ stage: 3, source: f.source, detail: f.detail });
}
}

if (!hasManifestHit && !hasFileHit) return null;
const sorted = [...evidenceSeen.values()].sort((a, b) => {
if (a.stage !== b.stage) return a.stage - b.stage;
Expand Down
32 changes: 30 additions & 2 deletions packages/frameworks/src/frameworks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import { promises as fs } from "node:fs";
import path from "node:path";
import { detectFrameworksStructured } from "./detector.js";
import { CONFIG_AST_FILES } from "./stages/config-ast.js";
import { indexResolutions, KNOWN_LOCKFILES, parseLockfile } from "./stages/lockfile.js";

/**
Expand Down Expand Up @@ -109,6 +110,29 @@ async function preReadLockfiles(
return indexResolutions(all);
}

/**
* Stage 3 — pre-read every framework config file present at the repo root
* (`next.config.*`, `astro.config.*`, `vite.config.*`,
* `META-INF/spring.factories`). Returns a relPath → text map; unreadable /
* missing files are simply absent (FRM-UN-002 log-and-continue).
*/
async function preReadConfigFiles(
repoRoot: string,
relPaths: ReadonlySet<string>,
): Promise<ReadonlyMap<string, string>> {
const out = new Map<string, string>();
for (const name of CONFIG_AST_FILES) {
if (!relPaths.has(name)) continue;
try {
const text = await fs.readFile(path.join(repoRoot, name), "utf8");
out.set(name, text);
} catch {
// Malformed / unreadable — skip.
}
}
return out;
}

const ALL_ECOSYSTEM_LANGUAGES: readonly string[] = [
"javascript",
"typescript",
Expand All @@ -128,14 +152,16 @@ const ALL_ECOSYSTEM_LANGUAGES: readonly string[] = [
*/
export async function detectFrameworks(input: FrameworkDetectionInput): Promise<readonly string[]> {
const relPaths = new Set(input.files.map((f) => f.relPath));
const [manifestText, lockfileVersions] = await Promise.all([
const [manifestText, lockfileVersions, configText] = await Promise.all([
preReadManifests(input.repoRoot, relPaths),
preReadLockfiles(input.repoRoot, relPaths),
preReadConfigFiles(input.repoRoot, relPaths),
]);
const detections = detectFrameworksStructured({
relPaths,
manifestText,
lockfileVersions,
configText,
detectedLanguages: input.detectedLanguages ?? ALL_ECOSYSTEM_LANGUAGES,
});
return detections.map((d) => d.name);
Expand All @@ -151,14 +177,16 @@ export async function detectFrameworksDetailed(
input: FrameworkDetectionInput,
): Promise<ReturnType<typeof detectFrameworksStructured>> {
const relPaths = new Set(input.files.map((f) => f.relPath));
const [manifestText, lockfileVersions] = await Promise.all([
const [manifestText, lockfileVersions, configText] = await Promise.all([
preReadManifests(input.repoRoot, relPaths),
preReadLockfiles(input.repoRoot, relPaths),
preReadConfigFiles(input.repoRoot, relPaths),
]);
return detectFrameworksStructured({
relPaths,
manifestText,
lockfileVersions,
configText,
detectedLanguages: input.detectedLanguages ?? ALL_ECOSYSTEM_LANGUAGES,
});
}
21 changes: 12 additions & 9 deletions packages/frameworks/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,24 +1,27 @@
/**
* `@opencodehub/frameworks` — framework detection over a curated catalog.
*
* The dispatcher (`detector.ts`) merges three stages into each
* The dispatcher (`detector.ts`) merges four stages into each
* `FrameworkDetection` (`{name, version?, confidence, evidence[]}`):
* 1. Manifest presence + declared deps (`package.json`, `pyproject.toml`,
* `pom.xml`, …)
* 2. Lockfile exact versions, overriding manifest semver ranges
* (`package-lock.json`, `pnpm-lock.yaml`, `Gemfile.lock`,
* `poetry.lock`, `uv.lock`, `Cargo.lock`)
* 3. Config AST (`config-ast.ts`) — `next.config.*`, `astro.config.*`,
* `vite.config.*`, `spring.factories`. The wrapper pre-reads these and
* passes `configText`; the dispatcher merges the findings as stage-3
* evidence into a framework that already hit on a manifest/layout signal
* (it corroborates, never creates a detection on its own).
* 4. Folder / file-marker convention (`app/`, `pages/`, `vite.config.ts`,
* `src/main/java/`, …)
*
* Two further stages ship as standalone, independently tested modules but
* are not yet wired into the ingestion profile phase (their findings do not
* reach `FrameworkDetection.evidence` until a caller passes the extra
* inputs through):
* 3. Config AST (`config-ast.ts`) — `next.config.*`, `astro.config.*`,
* `vite.config.*`, `spring.factories`; needs the config-file text.
* 5. Import / SCIP (`imports.ts`) — consumes the graph's `IMPORTS` edges;
* needs the `KnowledgeGraph`.
* One stage ships as a standalone, independently tested module but is not yet
* wired into the ingestion profile phase:
* 5. Import / SCIP (`imports.ts`) — consumes the graph's `IMPORTS` edges,
* which the profile phase (deps: [scan]) runs before. Wiring it needs a
* phase-ordering change (run framework detection after `crossFile`); a
* caller that already holds the resolved graph can pass it through today.
*
* Every stage is pure-local file-system + string/regex inspection; no
* network, no LLM, no subprocess.
Expand Down
Loading