Skip to content

Commit da4f4c7

Browse files
jahoomaclaude
andcommitted
Pass tree-sitter wasm bytes to Parser.init on Windows
The previous fix (d79d758) embedded the wasm path on process.env and let init-node.ts resolve it via fs.existsSync. That worked on Linux/mac but kept crashing on Windows: bun --compile reports the embedded asset as `B:\~BUN\root\tree-sitter.wasm`, and fs.existsSync returns false for that path even though fs.readFileSync succeeds. resolveTreeSitterWasm fell through every branch and threw "Internal error: tree-sitter.wasm not found". Read the bytes once in pre-init via fs.readFileSync (which works on Windows bunfs) and stash them on globalThis. init-node.ts now passes them straight to Parser.init({ wasmBinary }), bypassing locateFile and filesystem path resolution entirely. globalThis is the cross-bundle channel: the SDK pre-built bundle inlines its own copy of init-node.ts, so a module-level variable in this package isn't visible to the singleton initialized via the SDK. The path-based fallback is preserved for external SDK consumers that don't pre-load. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 869e34b commit da4f4c7

2 files changed

Lines changed: 55 additions & 18 deletions

File tree

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
1-
// Embed tree-sitter.wasm into the bun-compile binary at a bunfs path the runtime
2-
// can find. Must be the very first import in `index.tsx`: subsequent imports
3-
// (the SDK / code-map) eagerly construct a tree-sitter parser singleton, and its
4-
// `locateFile` callback reads `CODEBUFF_TREE_SITTER_WASM_PATH` from `process.env`.
1+
// Embed tree-sitter.wasm into the bun-compile binary so the SDK's tree-sitter
2+
// parser singleton can find it at runtime. Must be the very first import in
3+
// `index.tsx`: subsequent imports (the SDK / code-map) eagerly construct the
4+
// parser, and its init reads what we publish here on `globalThis` and `process.env`.
55
//
6-
// Without this, web-tree-sitter@0.25.10 falls back to `require.resolve` which —
7-
// per the package's split `import`/`require` exports map — returns the build-time
8-
// absolute path of `tree-sitter.cjs` and fails on user machines.
6+
// Why not just `locateFile` + a path? On Windows, bun --compile reports the
7+
// embedded path as `B:\~BUN\root\...`, and `fs.existsSync` returns false for
8+
// that path inside the running binary even though `fs.readFileSync` works. So
9+
// we read the bytes once at startup and pass them straight to `Parser.init`
10+
// via `wasmBinary`, sidestepping filesystem resolution entirely.
11+
12+
import * as fs from 'fs'
913

1014
// @ts-expect-error - Bun's `with { type: 'file' }` returns a string path; TS resolves
1115
// the .wasm file via web-tree-sitter's exports map and has no loader for it.
@@ -14,5 +18,20 @@ import treeSitterWasmPath from 'web-tree-sitter/tree-sitter.wasm' with {
1418
}
1519

1620
if (treeSitterWasmPath) {
21+
// Path stays for any consumer (tests, dev runs) that still resolves via fs.
1722
process.env.CODEBUFF_TREE_SITTER_WASM_PATH = treeSitterWasmPath
23+
24+
try {
25+
const binary = fs.readFileSync(treeSitterWasmPath)
26+
// globalThis is the only cross-bundle channel: the SDK pre-built bundle
27+
// inlines its own copy of `init-node.ts`, so a module-level variable in
28+
// the source package wouldn't be visible to the singleton initialized
29+
// via the SDK.
30+
;(globalThis as { __CODEBUFF_TREE_SITTER_WASM_BINARY__?: Uint8Array }).__CODEBUFF_TREE_SITTER_WASM_BINARY__ =
31+
new Uint8Array(binary.buffer, binary.byteOffset, binary.byteLength)
32+
} catch {
33+
// readFileSync failure is unexpected (the file is supposed to be embedded)
34+
// but we let init-node.ts fall back to path-based resolution and surface
35+
// a clearer error if that also fails.
36+
}
1837
}

packages/code-map/src/init-node.ts

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,26 +4,32 @@ import * as path from 'path'
44
import { Parser } from 'web-tree-sitter'
55

66
const TREE_SITTER_WASM_ENV_VAR = 'CODEBUFF_TREE_SITTER_WASM_PATH'
7+
const WASM_BINARY_GLOBAL_KEY = '__CODEBUFF_TREE_SITTER_WASM_BINARY__'
78

89
/**
910
* Override the path to `tree-sitter.wasm` used during {@link initTreeSitterForNode}.
1011
*
11-
* Needed for `bun build --compile` binaries: the embedded `tree-sitter.js` reports a
12-
* `scriptDir` like `/$bunfs/root/`, but the runtime wasm isn't auto-embedded next to
13-
* it, and `require.resolve('web-tree-sitter')` resolves to the build-time absolute
14-
* path of `tree-sitter.cjs` (per the package's `require` exports condition added in
15-
* 0.25.10), which doesn't exist on the end user's machine. Callers building binaries
16-
* should embed the wasm via Bun's `import ... with { type: 'file' }` and pass the
17-
* resulting path here before any tree-sitter use.
12+
* Path-based fallback for environments that can't pre-load the wasm bytes (e.g.
13+
* external SDK consumers using a custom layout). The CLI binary instead pre-loads
14+
* bytes onto `globalThis.__CODEBUFF_TREE_SITTER_WASM_BINARY__` because Windows
15+
* bunfs paths (`B:\~BUN\root\...`) round-trip inconsistently through
16+
* `fs.existsSync` even when `fs.readFileSync` succeeds.
1817
*
19-
* Stored on `process.env` so it reaches every copy of this module — the SDK
20-
* pre-built bundle inlines its own copy of `init-node.ts`, so a module-level
21-
* variable here wouldn't be visible to the singleton initialized via the SDK.
18+
* Stored on `process.env` (not a module-level var) so the value reaches every
19+
* copy of this module — the SDK pre-built bundle inlines its own copy of
20+
* `init-node.ts`, so a local variable here wouldn't be visible to the singleton
21+
* initialized via the SDK.
2222
*/
2323
export function setTreeSitterWasmPath(wasmPath: string): void {
2424
process.env[TREE_SITTER_WASM_ENV_VAR] = wasmPath
2525
}
2626

27+
function getEmbeddedWasmBinary(): Uint8Array | undefined {
28+
return (
29+
globalThis as { [WASM_BINARY_GLOBAL_KEY]?: Uint8Array }
30+
)[WASM_BINARY_GLOBAL_KEY]
31+
}
32+
2733
function resolveTreeSitterWasm(scriptDir: string): string {
2834
const override = process.env[TREE_SITTER_WASM_ENV_VAR]
2935
if (override && fs.existsSync(override)) {
@@ -45,15 +51,27 @@ function resolveTreeSitterWasm(scriptDir: string): string {
4551
// Package not resolvable; fall through.
4652
}
4753

54+
const overrideDiagnostic = override
55+
? ` (env ${TREE_SITTER_WASM_ENV_VAR}=${override} did not exist)`
56+
: ''
4857
throw new Error(
49-
`Internal error: tree-sitter.wasm not found (looked at scriptDir=${scriptDir} and via web-tree-sitter package). Set ${TREE_SITTER_WASM_ENV_VAR} or ensure the file is included in your deployment bundle.`,
58+
`Internal error: tree-sitter.wasm not found (looked at scriptDir=${scriptDir} and via web-tree-sitter package${overrideDiagnostic}). Set ${TREE_SITTER_WASM_ENV_VAR} or ensure the file is included in your deployment bundle.`,
5059
)
5160
}
5261

5362
/**
5463
* Initialize web-tree-sitter for Node.js environments with proper WASM file location
5564
*/
5665
export async function initTreeSitterForNode(): Promise<void> {
66+
const embedded = getEmbeddedWasmBinary()
67+
if (embedded) {
68+
// Pass the bytes directly so emscripten's `getBinarySync` returns them
69+
// without ever calling `locateFile`. This avoids the path-resolution
70+
// failure mode entirely and is the path the CLI binary takes.
71+
await Parser.init({ wasmBinary: embedded })
72+
return
73+
}
74+
5775
// Use locateFile to override where the runtime looks for tree-sitter.wasm
5876
await Parser.init({
5977
locateFile: (name: string, scriptDir: string) => {

0 commit comments

Comments
 (0)