Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 19 additions & 5 deletions crates/codegraph-core/src/extractors/julia.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,14 @@ fn handle_module_def(node: &Node, source: &[u8], symbols: &mut FileSymbols) -> O
/// `function_definition` / `macro_definition` in a `signature` node whose
/// first child is the `call_expression` — `find_child` only inspects direct
/// children, so we unwrap one level explicitly.
///
/// Grammar assumption: every `function_definition` / `macro_definition` emits
/// a `signature` child in the current tree-sitter-julia grammar. The fallback
/// to `find_child(node, "call_expression")` exists only as a defensive measure
/// for grammar drift — if it ever fires on a real definition, that fallback
/// would silently match the first body call_expression and mis-record the
/// function name. Callers must therefore treat a missing `signature` as a
/// parser/grammar mismatch worth investigating, not as a routine code path.
fn signature_call<'a>(node: &Node<'a>) -> Option<Node<'a>> {
if let Some(sig) = find_child(node, "signature") {
return find_child(&sig, "call_expression");
Expand Down Expand Up @@ -297,9 +305,17 @@ fn handle_abstract_def(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
///
/// Handles plain identifiers, `Name <: Super` binary expressions, and
/// parameterized forms like `Name{T}` / `Name{T} <: Super{T,1}` by recursing
/// into common wrapper kinds (binary expressions, parametrized identifiers,
/// type-parameter lists). Returns `None` when no identifier can be located —
/// into wrapper kinds the Julia grammar actually emits for type heads
/// (binary expressions, parametrized type expressions, parameterized
/// identifiers). Returns `None` when no identifier can be located —
/// callers should skip emitting a definition in that case.
///
/// Note: `type_parameter_list` / `type_argument_list` are intentionally
/// excluded — Julia's grammar uses `curly_expression` for `{T}` constructs,
/// not those node kinds. Including them would risk recursing into a
/// type-parameter list and returning a type variable (e.g. `T`) instead of
/// the struct name if `find_base_name` were ever called on a node lacking a
/// direct `identifier` child.
fn find_base_name<'a>(node: &Node<'a>) -> Option<Node<'a>> {
// The node itself may already be the identifier (e.g. when called on a
// direct side of a binary_expression like `Point <: AbstractPoint`).
Expand All @@ -317,9 +333,7 @@ fn find_base_name<'a>(node: &Node<'a>) -> Option<Node<'a>> {
match child.kind() {
"binary_expression"
| "parametrized_type_expression"
| "parameterized_identifier"
| "type_parameter_list"
| "type_argument_list" => {
| "parameterized_identifier" => {
if let Some(found) = find_base_name(&child) {
return Some(found);
}
Expand Down
207 changes: 164 additions & 43 deletions src/extractors/julia.ts
Original file line number Diff line number Diff line change
Expand Up @@ -83,17 +83,49 @@ function handleModuleDef(node: TreeSitterNode, ctx: ExtractorOutput): string | n
return nameNode.text;
}

function qualifyName(base: string, currentModule: string | null): string {
// For qualified names (`function Base.show ... end` inside `module Foo`,
// or short-form `Foo.bar(x, y) = x + y` inside `module Outer`), the LHS
// is a `scoped_identifier` already containing the qualifier — skip the
// module prefix to avoid producing `Foo.Base.show` / `Outer.Foo.bar`.
if (currentModule && !base.includes('.')) return `${currentModule}.${base}`;
return base;
}

/**
* Extract the call_expression from a function/macro definition's signature.
*
* tree-sitter-julia wraps the signature in a `signature` node whose direct
* children include the `call_expression` for the function name and parameters.
* `findChild` only inspects direct children, so we unwrap one level explicitly.
* Without this step, `findChild(node, 'call_expression')` on a
* `function_definition` would match the *body's* first call_expression
* (e.g. `println(...)` inside the body) instead of the signature.
*
* Grammar assumption: every `function_definition` / `macro_definition` emits a
* `signature` child in the current tree-sitter-julia grammar. The fallback to
* `findChild(node, 'call_expression')` exists only as a defensive measure for
* grammar drift — if it ever fires on a real definition, that fallback would
* silently match the first body call_expression and mis-record the function
* name. Callers must therefore treat a missing `signature` as a parser/grammar
* mismatch worth investigating, not as a routine code path.
*/
function signatureCall(node: TreeSitterNode): TreeSitterNode | null {
const sig = findChild(node, 'signature');
if (sig) return findChild(sig, 'call_expression');
return findChild(node, 'call_expression');
}

function handleFunctionDef(
node: TreeSitterNode,
ctx: ExtractorOutput,
currentModule: string | null,
): void {
// function_definition may have a call_expression child as the signature
const callSig = findChild(node, 'call_expression');
const callSig = signatureCall(node);
if (callSig) {
const funcNameNode = callSig.child(0);
if (funcNameNode) {
const name = currentModule ? `${currentModule}.${funcNameNode.text}` : funcNameNode.text;
const name = qualifyName(funcNameNode.text, currentModule);
const params = extractJuliaParams(callSig);
ctx.definitions.push({
name,
Expand All @@ -110,9 +142,8 @@ function handleFunctionDef(
const nameNode = node.childForFieldName('name') || findChild(node, 'identifier');
if (!nameNode) return;

const name = currentModule ? `${currentModule}.${nameNode.text}` : nameNode.text;
ctx.definitions.push({
name,
name: qualifyName(nameNode.text, currentModule),
kind: 'function',
line: node.startPosition.row + 1,
endLine: nodeEndLine(node),
Expand All @@ -133,11 +164,10 @@ function handleAssignment(
const funcNameNode = lhs.child(0);
if (!funcNameNode) return;

const name = currentModule ? `${currentModule}.${funcNameNode.text}` : funcNameNode.text;
const params = extractJuliaParams(lhs);

ctx.definitions.push({
name,
name: qualifyName(funcNameNode.text, currentModule),
kind: 'function',
line: node.startPosition.row + 1,
endLine: nodeEndLine(node),
Expand All @@ -146,16 +176,74 @@ function handleAssignment(
}
}

/**
* Locate the base-name identifier within a `type_head` node.
*
* Handles plain identifiers, `Name <: Super` binary expressions, and
* parameterized forms like `Name{T}` / `Name{T} <: Super{T,1}` by recursing
* into wrapper kinds the Julia grammar actually emits for type heads
* (binary expressions, parametrized type expressions, parameterized
* identifiers). Returns `null` when no identifier can be located — callers
* should skip emitting a definition in that case.
*
* Note: `type_parameter_list` / `type_argument_list` are intentionally
* excluded — Julia's grammar uses `curly_expression` for `{T}` constructs,
* not those node kinds. Including them would risk recursing into a
* type-parameter list and returning a type variable (e.g. `T`) instead of
* the struct name if `findBaseName` were ever called on a node lacking a
* direct `identifier` child.
*/
const TYPE_HEAD_WRAPPERS: ReadonlySet<string> = new Set([
'binary_expression',
'parametrized_type_expression',
'parameterized_identifier',
]);

function findBaseName(node: TreeSitterNode): TreeSitterNode | null {
if (node.type === 'identifier') return node;
const direct = findChild(node, 'identifier');
if (direct) return direct;
for (let i = 0; i < node.childCount; i++) {
const child = node.child(i);
if (!child) continue;
if (TYPE_HEAD_WRAPPERS.has(child.type)) {
const found = findBaseName(child);
if (found) return found;
}
}
return null;
}
Comment on lines +196 to +215
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 type_parameter_list / type_argument_list in TYPE_HEAD_WRAPPERS can yield the wrong identifier

findBaseName checks findChild(node, 'identifier') before recursing, so in practice the struct name is found before the loop reaches a type_parameter_list or type_argument_list. However, if findBaseName is ever called with a node that lacks a direct identifier child and does have one of those wrapper types as a child — for example, a future call site or an unusual parameterized form — the function will recurse into type_parameter_list and return the first type-parameter identifier (e.g. T) instead of the struct name. Removing those two entries from TYPE_HEAD_WRAPPERS would eliminate the risk without affecting correctness.

Fix in Claude Code

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in 8c2e148 — removed type_parameter_list and type_argument_list from TYPE_HEAD_WRAPPERS in both the WASM and native engines (preserving dual-engine parity per CLAUDE.md). AST inspection confirmed Julia's grammar uses curly_expression for {T} constructs, not those node kinds, so the entries were dead code. Removing them eliminates the risk of recursing into a type-parameter list and returning a type variable as the struct name, as you noted.


function handleStructDef(node: TreeSitterNode, ctx: ExtractorOutput): void {
// struct_definition: struct type_head fields... end
// type_head wraps the name and optional supertype. The name may be a
// bare `identifier`, a parameterized form (e.g. `Vec{T}`), or either
// of those nested inside a `binary_expression` (`Name <: Super`).
const typeHead = findChild(node, 'type_head');
const nameNode = typeHead
? (findChild(typeHead, 'identifier') ?? typeHead)
: findChild(node, 'identifier');
if (!typeHead) return;

let nameNode: TreeSitterNode | null;
let supertypeNode: TreeSitterNode | null = null;

const binary = findChild(typeHead, 'binary_expression');
if (binary) {
// Walk into each side of the binary expression to find the base-name
// identifier — handles parameterized forms like `Vec{T} <: AbstractArray{T,1}`.
const sides: TreeSitterNode[] = [];
for (let i = 0; i < binary.childCount; i++) {
const c = binary.child(i);
if (c && c.type !== 'operator') sides.push(c);
}
nameNode = sides[0] ? findBaseName(sides[0]) : null;
supertypeNode = sides[1] ? findBaseName(sides[1]) : null;
} else {
nameNode = findBaseName(typeHead);
}
Comment on lines +228 to +241
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Missing test for non-parameterized struct inheritance

The old code explicitly looked for a subtype_expression node (findChild(typeHead, 'subtype_expression')) to detect Point <: AbstractPoint. That path has been entirely removed and replaced with a binary_expression lookup. If the tree-sitter-julia grammar represents simple non-parameterized inheritance with a subtype_expression node (rather than binary_expression), findChild(typeHead, 'binary_expression') returns null, findBaseName(typeHead) recurses without entering subtype_expression (not in TYPE_HEAD_WRAPPERS), and returns null — so the entire struct is silently dropped from ctx.definitions. The new parameterized test (Vec{T} <: AbstractArray{T,1}) confirms the grammar uses binary_expression for that form, but there is no test for the simple case struct Point <: AbstractPoint to verify the same grammar node is used and the extends relationship is still recorded.

Fix in Claude Code

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in 8c2e148 — added a test for non-parameterized struct inheritance (struct Point <: AbstractPoint) in tests/parsers/julia.test.ts. Confirmed via AST inspection that the Julia grammar wraps both the simple and parameterized cases in a binary_expression node, so the new code path handles both correctly. The native engine already had this test (crates/codegraph-core/src/extractors/julia.rs:592), so this brings WASM to parity.


if (!nameNode) return;
const structName = nameNode.text;

const children: SubDeclaration[] = [];
// Fields are typed_expression children of struct_definition
for (let i = 0; i < node.childCount; i++) {
const child = node.child(i);
if (!child) continue;
Expand All @@ -168,33 +256,24 @@ function handleStructDef(node: TreeSitterNode, ctx: ExtractorOutput): void {
line: child.startPosition.row + 1,
});
}
}
// Plain identifier fields (no type annotation)
if (child.type === 'identifier' && child !== nameNode && typeHead && child !== typeHead) {
} else if (child.type === 'identifier') {
// Plain identifier fields (no type annotation) appear as direct
// identifier children of struct_definition. The type_head is a
// separate node so there is nothing to filter out here.
children.push({ name: child.text, kind: 'property', line: child.startPosition.row + 1 });
}
}

// Check for supertype in type_head (Point <: AbstractPoint)
if (typeHead) {
const subtypeExpr = findChild(typeHead, 'subtype_expression');
if (subtypeExpr) {
// Find the supertype identifier
for (let i = 0; i < subtypeExpr.childCount; i++) {
const child = subtypeExpr.child(i);
if (child?.type === 'identifier' && i > 0) {
ctx.classes.push({
name: nameNode.text,
extends: child.text,
line: node.startPosition.row + 1,
});
}
}
}
if (supertypeNode) {
ctx.classes.push({
name: structName,
extends: supertypeNode.text,
line: node.startPosition.row + 1,
});
}

ctx.definitions.push({
name: nameNode.text,
name: structName,
kind: 'struct',
line: node.startPosition.row + 1,
endLine: nodeEndLine(node),
Expand All @@ -203,7 +282,14 @@ function handleStructDef(node: TreeSitterNode, ctx: ExtractorOutput): void {
}

function handleAbstractDef(node: TreeSitterNode, ctx: ExtractorOutput): void {
const nameNode = node.childForFieldName('name') || findChild(node, 'identifier');
// abstract_definition: `abstract type` type_head `end`
// The identifier is nested inside `type_head` — possibly wrapped in a
// `Name <: Super` binary_expression or a `Name{T,...}` parameterized form.
// Mirror handleStructDef and skip rather than emit a garbled name when no
// base identifier can be located.
const typeHead = findChild(node, 'type_head');
if (!typeHead) return;
const nameNode = findBaseName(typeHead);
if (!nameNode) return;

ctx.definitions.push({
Expand All @@ -219,10 +305,17 @@ function handleMacroDef(
ctx: ExtractorOutput,
currentModule: string | null,
): void {
const nameNode = node.childForFieldName('name') || findChild(node, 'identifier');
// macro_definition: `macro` signature/call_expression body `end`.
// The name lives in the same shape as a function signature — unwrap via
// signatureCall so we don't pick up an identifier from the body (e.g.
// `macro mymac(x) x end` would otherwise resolve to `@x`).
const callSig = signatureCall(node);
const nameNode =
callSig?.child(0) ?? node.childForFieldName('name') ?? findChild(node, 'identifier');
if (!nameNode) return;

const name = currentModule ? `${currentModule}.@${nameNode.text}` : `@${nameNode.text}`;
const base = nameNode.text;
const name = currentModule ? `${currentModule}.@${base}` : `@${base}`;
ctx.definitions.push({
name,
kind: 'function',
Expand All @@ -232,19 +325,40 @@ function handleMacroDef(
}

function handleImport(node: TreeSitterNode, ctx: ExtractorOutput): void {
// tree-sitter-julia shapes:
// `using LinearAlgebra` → using_statement [ using, identifier ]
// `import Foo.Bar` → import_statement [ import, scoped_identifier ]
// `import Base: show` → import_statement [ import, selected_import[Base, show] ]
// `import Foo.Bar: baz` → import_statement [ import, selected_import[scoped_identifier, baz] ]
const names: string[] = [];
let source = '';

for (let i = 0; i < node.childCount; i++) {
const child = node.child(i);
if (!child) continue;
if (
child.type === 'identifier' ||
child.type === 'scoped_identifier' ||
child.type === 'selected_import'
) {
if (!source) source = child.text;
names.push(child.text.split('.').pop() || child.text);
if (child.type === 'identifier' || child.type === 'scoped_identifier') {
const txt = child.text;
if (!source) source = txt;
names.push(txt.split('.').pop() || txt);
} else if (child.type === 'selected_import') {
// First identifier-bearing node is the source module; the rest are
// imported names. The module may itself be a `scoped_identifier`
// (e.g. `import Foo.Bar: baz`) — handle it alongside bare
// `identifier` and use the trailing segment as the display name,
// mirroring the outer loop.
let first = true;
for (let j = 0; j < child.childCount; j++) {
const part = child.child(j);
if (!part) continue;
if (part.type !== 'identifier' && part.type !== 'scoped_identifier') continue;
const txt = part.text;
if (first) {
if (!source) source = txt;
first = false;
} else {
names.push(txt.split('.').pop() || txt);
}
}
}
}

Expand All @@ -260,8 +374,15 @@ function handleImport(node: TreeSitterNode, ctx: ExtractorOutput): void {
function handleCall(node: TreeSitterNode, ctx: ExtractorOutput): void {
// Don't record if parent is assignment LHS (that's a function definition)
if (node.parent?.type === 'assignment' && node === node.parent.child(0)) return;
// Don't record if parent is function_definition (that's a signature)
if (node.parent?.type === 'function_definition') return;
// Skip when this call is the signature of a function/macro definition.
// tree-sitter-julia wraps the signature in a `signature` node whose parent
// is `function_definition` or `macro_definition`. Body calls (e.g.
// `println(name)` inside `function greet ... end`) appear as descendants of
// the body, not as direct children of `signature`, so they are unaffected.
if (node.parent?.type === 'signature') {
const grand = node.parent.parent;
if (grand?.type === 'function_definition' || grand?.type === 'macro_definition') return;
}

const funcNode = node.child(0);
if (!funcNode) return;
Expand Down
Loading
Loading