Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 85 additions & 0 deletions packages/frameworks/src/detector.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,39 @@ function mkInput(
manifests: ReadonlyArray<readonly [string, string]>,
detectedLanguages: readonly string[],
configText?: ReadonlyArray<readonly [string, string]>,
importGraph?: FrameworkDetectorInput["importGraph"],
): FrameworkDetectorInput {
return {
relPaths: new Set(files),
manifestText: new Map(manifests),
detectedLanguages,
...(configText !== undefined ? { configText: new Map(configText) } : {}),
...(importGraph !== undefined ? { importGraph } : {}),
};
}

/**
* Build a minimal ImportStageGraph: each spec is an IMPORTS edge to an
* external-stub CodeElement node carrying `external import: <source>:<symbol>`
* content. `confidence` 1 = scip-resolved (deterministic), <1 = heuristic.
*/
function makeImportGraph(
specs: ReadonlyArray<{ source: string; symbol?: string; confidence: number }>,
): FrameworkDetectorInput["importGraph"] {
const nodes = new Map<string, { id: string; kind: string; content: string }>();
const edges: Array<{ from: string; to: string; type: string; confidence: number }> = [];
specs.forEach((s, i) => {
const to = `CodeElement:ext:${s.source}:${i}`;
nodes.set(to, {
id: to,
kind: "CodeElement",
content: `external import: ${s.source}:${s.symbol ?? "*"}`,
});
edges.push({ from: `File:src/app.py:${i}`, to, type: "IMPORTS", confidence: s.confidence });
});
return {
edges: () => edges[Symbol.iterator](),
getNode: (id: string) => nodes.get(id),
};
}

Expand Down Expand Up @@ -868,3 +895,61 @@ describe("stage 3 — config-AST evidence", () => {
assert.ok((b?.evidence.filter((e) => e.stage === 3) ?? []).length > 0);
});
});

// ---------------------------------------------------------------------------
// Stage 5 — import-graph evidence (wired via importGraph)
// ---------------------------------------------------------------------------

describe("stage 5 — import-graph evidence", () => {
it("a deterministic import CREATES a detection (scip-resolved is authoritative)", () => {
// No manifest, no layout marker — a scip-resolved `import fastapi` alone.
const input = mkInput(
["src/app.py"],
[],
["python"],
undefined,
makeImportGraph([{ source: "fastapi", symbol: "FastAPI", confidence: 1 }]),
);
const out = detectFrameworksStructured(input);
const fastapi = findByName(out, "fastapi");
assert.ok(fastapi, "deterministic import should create the fastapi detection");
assert.equal(fastapi.confidence, "deterministic");
const stage5 = fastapi.evidence.filter((e) => e.stage === 5);
assert.ok(stage5.length > 0, "stage-5 evidence should be attached");
});

it("a heuristic import does NOT create a detection on its own (corroborates only)", () => {
const input = mkInput(
["src/app.py"],
[],
["python"],
undefined,
makeImportGraph([{ source: "fastapi", symbol: "FastAPI", confidence: 0.5 }]),
);
const out = detectFrameworksStructured(input);
assert.equal(findByName(out, "fastapi"), undefined, "heuristic import alone must not detect");
});

it("a heuristic import corroborates a manifest hit with stage-5 evidence", () => {
const input = mkInput(
["pyproject.toml", "src/app.py"],
[["pyproject.toml", '[project]\ndependencies = ["fastapi>=0.110"]\n']],
["python"],
undefined,
makeImportGraph([{ source: "fastapi", symbol: "FastAPI", confidence: 0.5 }]),
);
const out = detectFrameworksStructured(input);
const fastapi = findByName(out, "fastapi");
assert.ok(fastapi, "manifest hit detects fastapi");
assert.ok(
fastapi.evidence.some((e) => e.stage === 5),
"heuristic import should still attach stage-5 evidence to the existing hit",
);
});

it("is a no-op when importGraph is omitted (legacy callers unchanged)", () => {
const input = mkInput(["src/app.py"], [], ["python"]);
const out = detectFrameworksStructured(input);
assert.equal(findByName(out, "fastapi"), undefined);
});
});
72 changes: 63 additions & 9 deletions packages/frameworks/src/detector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import {
type ManifestKey,
} from "./catalog.js";
import { type ConfigAstFinding, inspectConfigAst } from "./stages/config-ast.js";
import { detectFromImports, type ImportFinding, type ImportStageGraph } from "./stages/imports.js";
import {
VARIANT_RESOLVERS,
type VariantResolveInput,
Expand Down Expand Up @@ -66,6 +67,15 @@ export interface FrameworkDetectorInput {
* legacy callers — stage 3 simply contributes no evidence.
*/
readonly configText?: ReadonlyMap<string, string>;
/**
* Stage 5 — the import graph (parse-phase `KnowledgeGraph`). When present,
* `detectFromImports` reads its IMPORTS edges to external stubs; the findings
* merge as stage-5 evidence. A `deterministic` import (scip-resolved,
* confidence 1.0) is authoritative enough to CREATE a detection on its own
* (an `import fastapi` is as strong as a manifest dep); a `heuristic` import
* only corroborates a framework that already hit. Absent for legacy callers.
*/
readonly importGraph?: ImportStageGraph;
}

/** Mapping language → ecosystem. Covers the tree-sitter languages OpenCodeHub indexes. */
Expand Down Expand Up @@ -100,11 +110,20 @@ export function detectFrameworksStructured(
// framework already hit on a manifest/layout signal (stage 3 corroborates,
// never creates).
const configFindingsByFramework = groupConfigFindings(input.configText, input.relPaths);
// Stage 5 — import-graph findings, grouped by framework. A `deterministic`
// import can create a detection; a `heuristic` one only corroborates.
const importFindingsByFramework = groupImportFindings(input.importGraph);

const out: FrameworkDetection[] = [];
for (const rule of FRAMEWORK_CATALOG) {
if (rule.ecosystem !== "any" && !activeEcosystems.has(rule.ecosystem)) continue;
const hit = evaluateRule(rule, input, manifestJson, configFindingsByFramework.get(rule.name));
const hit = evaluateRule(
rule,
input,
manifestJson,
configFindingsByFramework.get(rule.name),
importFindingsByFramework.get(rule.name),
);
if (hit === null) continue;
const detection = buildDetection(
rule,
Expand Down Expand Up @@ -139,6 +158,24 @@ function groupConfigFindings(
return grouped;
}

/**
* Run stage 5 (import graph) once and group its findings by framework name, so
* `evaluateRule` can look up a rule's import findings by `rule.name`. Returns
* an empty map when no import graph was supplied (legacy callers).
*/
function groupImportFindings(
importGraph: ImportStageGraph | undefined,
): ReadonlyMap<string, readonly ImportFinding[]> {
const grouped = new Map<string, ImportFinding[]>();
if (importGraph === undefined) return grouped;
for (const finding of detectFromImports(importGraph)) {
const list = grouped.get(finding.framework) ?? [];
list.push(finding);
grouped.set(finding.framework, list);
}
return grouped;
}

// ---------------------------------------------------------------------------
// Evaluation helpers
// ---------------------------------------------------------------------------
Expand All @@ -153,6 +190,8 @@ interface RuleHit {
readonly hasManifestHit: boolean;
/** Whether a layout/heuristic (stage 4, tier H) signal fired. */
readonly hasFileHit: boolean;
/** Whether a scip-resolved (stage 5, deterministic) import fired. */
readonly hasDeterministicImport: boolean;
}

function evidenceKey(e: Evidence): string {
Expand All @@ -164,10 +203,12 @@ function evaluateRule(
input: FrameworkDetectorInput,
manifestJson: ReadonlyMap<string, unknown>,
configFindings: readonly ConfigAstFinding[] | undefined,
importFindings: readonly ImportFinding[] | undefined,
): RuleHit | null {
const evidenceSeen = new Map<string, Evidence>();
let hasManifestHit = false;
let hasFileHit = false;
let hasDeterministicImport = false;

const push = (e: Evidence): void => {
const key = evidenceKey(e);
Expand Down Expand Up @@ -209,23 +250,35 @@ function evaluateRule(
}
}

// Stage 3 — config-AST corroboration. Only merged when a manifest/layout
// signal already fired: config text alone never creates a detection (a repo
// can carry a vendored config without using the framework). Stage-3 evidence
// sharpens an existing hit (e.g. Next.js App vs Pages router).
if ((hasManifestHit || hasFileHit) && configFindings !== undefined) {
// Stage 5 — import-graph signal. A `deterministic` import (scip-resolved) is
// authoritative enough to create a detection on its own; a `heuristic` one is
// recorded but only corroborates a hit from another stage. Evaluated before
// the stage-3 gate so a deterministic import satisfies the "something hit"
// condition.
if (importFindings !== undefined) {
for (const f of importFindings) {
push({ stage: 5, source: f.source, detail: `import: ${f.source} (${f.confidence})` });
if (f.confidence === "deterministic") hasDeterministicImport = true;
}
}

// Stage 3 — config-AST corroboration. Only merged when a manifest/layout/
// import signal already fired: config text alone never creates a detection
// (a repo can carry a vendored config without using the framework). Stage-3
// evidence sharpens an existing hit (e.g. Next.js App vs Pages router).
if ((hasManifestHit || hasFileHit || hasDeterministicImport) && configFindings !== undefined) {
for (const f of configFindings) {
push({ stage: 3, source: f.source, detail: f.detail });
}
}

if (!hasManifestHit && !hasFileHit) return null;
if (!hasManifestHit && !hasFileHit && !hasDeterministicImport) return null;
const sorted = [...evidenceSeen.values()].sort((a, b) => {
if (a.stage !== b.stage) return a.stage - b.stage;
if (a.source !== b.source) return a.source < b.source ? -1 : 1;
return a.detail < b.detail ? -1 : a.detail > b.detail ? 1 : 0;
});
return { evidence: sorted, hasManifestHit, hasFileHit };
return { evidence: sorted, hasManifestHit, hasFileHit, hasDeterministicImport };
}

function matchManifestKey(
Expand Down Expand Up @@ -268,7 +321,8 @@ function buildDetection(

function inferConfidence(rule: FrameworkRule, hit: RuleHit): FrameworkDetection["confidence"] {
if (rule.tier === "C") return "composite";
if (hit.hasManifestHit) return "deterministic";
// A manifest dep or a scip-resolved import are both authoritative.
if (hit.hasManifestHit || hit.hasDeterministicImport) return "deterministic";
// tier D/H with only file-level hits → heuristic.
return "heuristic";
}
Expand Down
10 changes: 10 additions & 0 deletions packages/frameworks/src/frameworks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import { promises as fs } from "node:fs";
import path from "node:path";
import { detectFrameworksStructured } from "./detector.js";
import { CONFIG_AST_FILES } from "./stages/config-ast.js";
import type { ImportStageGraph } from "./stages/imports.js";
import { indexResolutions, KNOWN_LOCKFILES, parseLockfile } from "./stages/lockfile.js";

/**
Expand All @@ -42,6 +43,13 @@ export interface FrameworkDetectionInput {
* every ecosystem" when omitted (keeps the legacy contract).
*/
readonly detectedLanguages?: readonly string[];
/**
* Optional stage-5 import graph (the `KnowledgeGraph` after the parse phase).
* When supplied, `detectFromImports` reads its IMPORTS edges to external
* stubs and feeds the findings into detection. Absent for callers that run
* before parse or don't hold a graph — stage 5 then contributes nothing.
*/
readonly importGraph?: ImportStageGraph;
}

/**
Expand Down Expand Up @@ -163,6 +171,7 @@ export async function detectFrameworks(input: FrameworkDetectionInput): Promise<
lockfileVersions,
configText,
detectedLanguages: input.detectedLanguages ?? ALL_ECOSYSTEM_LANGUAGES,
...(input.importGraph !== undefined ? { importGraph: input.importGraph } : {}),
});
return detections.map((d) => d.name);
}
Expand All @@ -188,5 +197,6 @@ export async function detectFrameworksDetailed(
lockfileVersions,
configText,
detectedLanguages: input.detectedLanguages ?? ALL_ECOSYSTEM_LANGUAGES,
...(input.importGraph !== undefined ? { importGraph: input.importGraph } : {}),
});
}
16 changes: 7 additions & 9 deletions packages/frameworks/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/**
* `@opencodehub/frameworks` — framework detection over a curated catalog.
*
* The dispatcher (`detector.ts`) merges four stages into each
* The dispatcher (`detector.ts`) merges five stages into each
* `FrameworkDetection` (`{name, version?, confidence, evidence[]}`):
* 1. Manifest presence + declared deps (`package.json`, `pyproject.toml`,
* `pom.xml`, …)
Expand All @@ -15,15 +15,13 @@
* (it corroborates, never creates a detection on its own).
* 4. Folder / file-marker convention (`app/`, `pages/`, `vite.config.ts`,
* `src/main/java/`, …)
* 5. Import / SCIP (`imports.ts`) — reads the graph's `IMPORTS` edges to
* external-import stubs via the `importGraph` input. The profile phase
* depends on `parse` so those edges exist by detection time. A
* `deterministic` (scip-resolved) import can create a detection on its
* own; a `heuristic` import only corroborates an existing hit.
*
* One stage ships as a standalone, independently tested module but is not yet
* wired into the ingestion profile phase:
* 5. Import / SCIP (`imports.ts`) — consumes the graph's `IMPORTS` edges,
* which the profile phase (deps: [scan]) runs before. Wiring it needs a
* phase-ordering change (run framework detection after `crossFile`); a
* caller that already holds the resolved graph can pass it through today.
*
* Every stage is pure-local file-system + string/regex inspection; no
* Every stage is pure-local file-system / graph + string/regex inspection; no
* network, no LLM, no subprocess.
*/

Expand Down
24 changes: 9 additions & 15 deletions packages/ingestion/src/pipeline/orchestrator.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,35 +32,29 @@ describe("runIngestion (end-to-end)", () => {
assert.ok(result.graphHash.length === 64, "graphHash must be sha256 hex");
assert.ok(result.stats.nodeCount >= 2, "should have File + definition nodes");
assert.ok(result.stats.edgeCount >= 1);
// Topological order with alphabetic tiebreak — parse's descendants
// (orm/routes/tools) sort lexicographically, and crossFile/mro/
// communities/processes/annotate follow.
// Topological order with alphabetic tiebreak. `profile` now depends on
// `parse` (so framework-detection stage 5 sees the parse-emitted IMPORTS
// edges), which lands it — plus its dependents `dependencies`/`repo-node`/
// `coverage`/`sbom` — after `parse`/`complexity`/`orm`.
assert.deepEqual(
result.stats.phases.map((p) => p.name),
[
"scan",
"incremental-scope",
"profile",
"dependencies",
// `repo-node` depends on `profile` only, so the topological
// alphabetic tiebreak lands it after `dependencies` and before `sbom`.
"repo-node",
"sbom",
"structure",
// `coverage` becomes runnable once `structure` completes; the
// alphabetic tiebreak lands it before `markdown`.
"coverage",
"markdown",
"parse",
// `business-logic` depends on parse + scan; once parse completes it is
// ready alongside complexity/orm/routes and the alphabetic tiebreak
// ("business-logic" < "complexity") lands it first.
"business-logic",
"complexity",
"orm",
"profile",
"coverage",
"dependencies",
"repo-node",
"routes",
"fetches",
"openapi",
"sbom",
"temporal",
"cochange",
"tools",
Expand Down
10 changes: 9 additions & 1 deletion packages/ingestion/src/pipeline/phases/profile.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import { detectIaCTypes } from "../profile-detectors/iac.js";
import { detectLanguages } from "../profile-detectors/languages.js";
import { detectSrcDirs } from "../profile-detectors/src-dirs.js";
import type { PipelineContext, PipelinePhase } from "../types.js";
import { PARSE_PHASE_NAME } from "./parse.js";
import { SCAN_PHASE_NAME, type ScanOutput } from "./scan.js";

export const PROFILE_PHASE_NAME = "profile" as const;
Expand All @@ -43,7 +44,10 @@ export interface ProfileOutput {

export const profilePhase: PipelinePhase<ProfileOutput> = {
name: PROFILE_PHASE_NAME,
deps: [SCAN_PHASE_NAME],
// `parse` is a dep (not just `scan`) so the import-graph IMPORTS edges that
// framework detection stage 5 reads from `ctx.graph` are guaranteed present.
// No cycle: parse deps [scan, structure], neither of which deps profile.
deps: [SCAN_PHASE_NAME, PARSE_PHASE_NAME],
async run(ctx, deps) {
const scan = deps.get(SCAN_PHASE_NAME) as ScanOutput | undefined;
if (scan === undefined) {
Expand All @@ -68,6 +72,10 @@ async function runProfile(ctx: PipelineContext, scan: ScanOutput): Promise<Profi
files,
manifests,
detectedLanguages: languages,
// Stage 5 — the parse phase has populated IMPORTS edges (incl. external
// stubs) on the graph by now; `KnowledgeGraph` structurally satisfies the
// import-stage's `edges()`/`getNode()` reader.
importGraph: ctx.graph,
}),
]);
const srcDirs = detectSrcDirs(files);
Expand Down
Loading