Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import {
findEnclosingSymbolId,
indexNodesByFile,
type NodeRow,
} from "./find-enclosing-symbol.js";
} from "./enclosing-symbol.js";

function row(
id: string,
Expand Down
122 changes: 122 additions & 0 deletions packages/analysis/src/enclosing-symbol.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
/**
* `findEnclosingSymbolId` — deterministic tightest-span lookup mapping a
* `(filePath, line)` pair back to the OpenCodeHub graph node that owns the
* line (a Function / Method / Class / …).
*
* Canonical home for an algorithm that was previously cloned in two places —
* `@opencodehub/cli`'s `ingest-sarif` (SARIF finding → enclosing symbol) and
* `@opencodehub/ingestion`'s `scip-index` (SCIP call site → enclosing symbol).
* Both now import from here. `@opencodehub/analysis` is the shared home
* because both `cli` and `ingestion` already depend on it (no new edge, no
* cycle).
*
* The two former clones differed only in their kind allow-set and their node
* source, so this module exposes the pure core parameterized by a kind-set
* plus the two named sets; each caller projects its own nodes into `NodeRow[]`
* and calls the shared index/lookup.
*
* 1-indexing note: SARIF 2.1.0 `region.startLine` and OpenCodeHub node
* `startLine`/`endLine` are both 1-based, so call sites pass lines through
* unadjusted.
*/

import type { NodeId, NodeKind } from "@opencodehub/core-types";

/** A graph-node projection carrying only the fields the lookup needs. */
export interface NodeRow {
readonly id: NodeId;
readonly filePath: string;
readonly startLine: number;
readonly endLine: number;
readonly kind: NodeKind;
}

/** Per-file, start-line-ascending index used by {@link findEnclosingSymbolId}. */
export type NodesByFile = ReadonlyMap<string, readonly NodeRow[]>;

/**
* SARIF-linkage allow-set — a strict superset of {@link SCIP_SYMBOL_KINDS}
* that additionally admits `Constructor`, because SARIF tooling routinely
* emits findings inside constructor bodies.
*/
export const ENCLOSING_SYMBOL_KINDS: ReadonlySet<NodeKind> = new Set<NodeKind>([
"Function",
"Method",
"Constructor",
"Class",
"Interface",
"Struct",
"Enum",
"Trait",
]);

/**
* SCIP-derivation allow-set — the kinds the scip-index phase resolves call
* sites and definitions against. No `Constructor` (SCIP definition occurrences
* never land on a bare constructor in the indexers OpenCodeHub ships).
*/
export const SCIP_SYMBOL_KINDS: ReadonlySet<NodeKind> = new Set<NodeKind>([
"Class",
"Method",
"Function",
"Interface",
"Struct",
"Enum",
"Trait",
]);

/**
* Build a per-file, start-line-ascending index over `rows`, keeping only nodes
* whose `kind` is in `kinds` (default {@link ENCLOSING_SYMBOL_KINDS}) and that
* carry finite `startLine`/`endLine`. Within each file the array is sorted by
* `startLine` asc, `endLine` asc — the sort lets {@link findEnclosingSymbolId}
* early-break once it passes the target line.
*/
export function indexNodesByFile(
rows: readonly NodeRow[],
kinds: ReadonlySet<NodeKind> = ENCLOSING_SYMBOL_KINDS,
): NodesByFile {
const map = new Map<string, NodeRow[]>();
for (const row of rows) {
if (!kinds.has(row.kind)) continue;
if (!Number.isFinite(row.startLine) || !Number.isFinite(row.endLine)) continue;
const bucket = map.get(row.filePath);
if (bucket === undefined) map.set(row.filePath, [row]);
else bucket.push(row);
}
for (const arr of map.values()) {
arr.sort((a, b) => {
if (a.startLine !== b.startLine) return a.startLine - b.startLine;
return a.endLine - b.endLine;
});
}
return map;
}

/**
* Return the id of the tightest-span node in `nodesByFile[filePath]` that
* encloses `line` (`startLine <= line <= endLine`). "Tightest" means smallest
* `endLine - startLine`, so a nested method wins over its containing class.
* Returns `undefined` when the file is unknown or no candidate contains the
* line.
*/
export function findEnclosingSymbolId(
nodesByFile: NodesByFile,
filePath: string,
line: number,
): NodeId | undefined {
const candidates = nodesByFile.get(filePath);
if (candidates === undefined) return undefined;
let best: NodeRow | undefined;
let bestSpan = Number.POSITIVE_INFINITY;
for (const rec of candidates) {
if (rec.startLine > line) break;
if (rec.endLine < line) continue;
const span = rec.endLine - rec.startLine;
if (span < bestSpan) {
best = rec;
bestSpan = span;
}
}
return best?.id;
}
7 changes: 7 additions & 0 deletions packages/analysis/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,13 @@ export {
symbolKinds as deadCodeSymbolKinds,
} from "./dead-code.js";
export { runDetectChanges } from "./detect-changes.js";
export type { NodeRow, NodesByFile } from "./enclosing-symbol.js";
export {
ENCLOSING_SYMBOL_KINDS,
findEnclosingSymbolId,
indexNodesByFile,
SCIP_SYMBOL_KINDS,
} from "./enclosing-symbol.js";
export { createNodeFs } from "./fs.js";
export {
gitDiffHunks,
Expand Down
160 changes: 154 additions & 6 deletions packages/analysis/src/scan-enrich.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,20 @@ import type { SarifLog } from "@opencodehub/sarif";
import { buildScanEnrichment } from "./scan-enrich.js";
import { FakeStore } from "./test-utils.js";

/** Minimal SARIF result with a primary-location uri + fingerprint. */
function result(uri: string, fingerprint: string) {
/** Minimal SARIF result with a primary-location uri + fingerprint (+ line). */
function result(uri: string, fingerprint: string, startLine?: number) {
return {
ruleId: "demo-rule",
level: "warning",
message: { text: "x" },
locations: [{ physicalLocation: { artifactLocation: { uri } } }],
locations: [
{
physicalLocation: {
artifactLocation: { uri },
...(startLine !== undefined ? { region: { startLine } } : {}),
},
},
],
partialFingerprints: { primaryLocationLineHash: fingerprint },
};
}
Expand Down Expand Up @@ -49,7 +56,7 @@ test("buildScanEnrichment maps a result to its File node signals by fingerprint"
assert.ok(byFp !== undefined, "byResultFingerprint must be present");
assert.deepEqual(byFp?.get("fp-a"), { busFactor: 2, temporalFixDensity: 0.5 });
// Run-level stamp is deterministic (no clock / run id).
assert.deepEqual(enrichment.run, { enrichmentVersion: "1", sources: ["graph"] });
assert.deepEqual(enrichment.run, { enrichmentVersion: "2", sources: ["graph"] });
});

test("buildScanEnrichment normalizes an absolute result uri to the repo-relative node id", async () => {
Expand Down Expand Up @@ -83,7 +90,7 @@ test("buildScanEnrichment omits results whose file has no materialized signals",
const enrichment = await buildScanEnrichment(store, log, "/repo");
// No signals → no per-result map, but the run-level stamp still returns.
assert.equal(enrichment.byResultFingerprint, undefined);
assert.deepEqual(enrichment.run, { enrichmentVersion: "1", sources: ["graph"] });
assert.deepEqual(enrichment.run, { enrichmentVersion: "2", sources: ["graph"] });
});

test("buildScanEnrichment is byte-stable across two runs (no clock/run id)", async () => {
Expand All @@ -106,5 +113,146 @@ test("buildScanEnrichment returns only the run stamp for an empty log", async ()
const store = new FakeStore();
const enrichment = await buildScanEnrichment(store, logWith([]), "/repo");
assert.equal(enrichment.byResultFingerprint, undefined);
assert.deepEqual(enrichment.run, { enrichmentVersion: "1", sources: ["graph"] });
assert.deepEqual(enrichment.run, { enrichmentVersion: "2", sources: ["graph"] });
});

// ---------------------------------------------------------------------------
// Symbol-level signals: blastRadius (upstream runImpact) + community.
// ---------------------------------------------------------------------------

/** Add a File + an enclosing Function spanning lines 1-20 in one helper. */
function addFileWithFn(store: FakeStore, file: string, fnId: string): void {
store.addNode({ id: `File:${file}:${file}`, kind: "File", name: file, filePath: file });
store.addNode({
id: fnId,
kind: "Function",
name: "target",
filePath: file,
startLine: 1,
endLine: 20,
});
}

test("buildScanEnrichment attaches blastRadius from the finding's enclosing symbol", async () => {
const store = new FakeStore();
addFileWithFn(store, "src/a.ts", "Function:src/a.ts:target#0");
// One caller → upstream blast radius of 1 for the target.
store.addNode({
id: "Function:src/b.ts:caller#0",
kind: "Function",
name: "caller",
filePath: "src/b.ts",
startLine: 1,
endLine: 5,
});
store.addEdge({
fromId: "Function:src/b.ts:caller#0",
toId: "Function:src/a.ts:target#0",
type: "CALLS",
confidence: 0.9,
});

// Finding on line 10 → inside target (1-20).
const enrichment = await buildScanEnrichment(
store,
logWith([result("src/a.ts", "fp-x", 10)]),
"/repo",
);
assert.equal(enrichment.byResultFingerprint?.get("fp-x")?.blastRadius, 1);
});

test("buildScanEnrichment attaches community label from MEMBER_OF", async () => {
const store = new FakeStore();
addFileWithFn(store, "src/a.ts", "Function:src/a.ts:target#0");
store.addNode({
id: "Community:1",
kind: "Community",
name: "auth",
filePath: "<communities>",
inferredLabel: "auth-core",
});
store.addEdge({
fromId: "Function:src/a.ts:target#0",
toId: "Community:1",
type: "MEMBER_OF",
confidence: 1,
});

const enrichment = await buildScanEnrichment(
store,
logWith([result("src/a.ts", "fp-x", 10)]),
"/repo",
);
assert.equal(enrichment.byResultFingerprint?.get("fp-x")?.community, "auth-core");
});

test("buildScanEnrichment merges file + symbol signals on one result", async () => {
const store = new FakeStore();
store.addNode({
id: "File:src/a.ts:src/a.ts",
kind: "File",
name: "a.ts",
filePath: "src/a.ts",
busFactor: 3,
});
store.addNode({
id: "Function:src/a.ts:target#0",
kind: "Function",
name: "target",
filePath: "src/a.ts",
startLine: 1,
endLine: 20,
});
store.addNode({
id: "Community:1",
kind: "Community",
name: "auth",
filePath: "<communities>",
inferredLabel: "auth-core",
});
store.addEdge({
fromId: "Function:src/a.ts:target#0",
toId: "Community:1",
type: "MEMBER_OF",
confidence: 1,
});

const enrichment = await buildScanEnrichment(
store,
logWith([result("src/a.ts", "fp-x", 10)]),
"/repo",
);
// busFactor (file) + community (symbol) + blastRadius 0 (symbol resolved, no
// callers — a real "nothing depends on this" signal, not "not computed").
assert.deepEqual(enrichment.byResultFingerprint?.get("fp-x"), {
busFactor: 3,
blastRadius: 0,
community: "auth-core",
});
});

test("buildScanEnrichment leaves a finding with no enclosing symbol at file signals only", async () => {
const store = new FakeStore();
store.addNode({
id: "File:src/a.ts:src/a.ts",
kind: "File",
name: "a.ts",
filePath: "src/a.ts",
busFactor: 2,
});
store.addNode({
id: "Function:src/a.ts:target#0",
kind: "Function",
name: "target",
filePath: "src/a.ts",
startLine: 1,
endLine: 5,
});
// Finding on line 99 → outside the function → no symbol-level signals.
const enrichment = await buildScanEnrichment(
store,
logWith([result("src/a.ts", "fp-x", 99)]),
"/repo",
);
assert.deepEqual(enrichment.byResultFingerprint?.get("fp-x"), { busFactor: 2 });
});
Loading
Loading