@@ -145,9 +145,10 @@ async function main() {
145145 patchOpenTuiAssetPaths ( )
146146 await ensureOpenTuiNativeBundle ( targetInfo )
147147
148- const wasmCopy = stagePreInitWasm ( )
149- // Even on a build-script crash, leave the developer's working tree clean.
150- process . on ( 'exit' , wasmCopy . cleanup )
148+ const treeSitterEmbed = embedTreeSitterWasmAsChunks ( )
149+ // Even on a build-script crash, restore the empty stub so a developer's
150+ // working tree doesn't end up with a multi-MB diff.
151+ process . on ( 'exit' , treeSitterEmbed . restore )
151152
152153 const outputFilename =
153154 targetInfo . platform === 'win32' ? `${ binaryName } .exe` : binaryName
@@ -190,17 +191,16 @@ async function main() {
190191
191192 runCommand ( 'bun' , buildArgs , { cwd : cliRoot } )
192193
193- // Remove the staged pre-init wasm now that the build has read it . Eager
194- // cleanup keeps a successful build clean; the exit handler above is a
195- // backstop for crashes between stage and now.
196- wasmCopy . cleanup ( )
194+ // Restore the empty stub now that the build read the chunks . Eager
195+ // cleanup keeps a successful build clean; the exit handler is a
196+ // backstop for crashes between embed and now.
197+ treeSitterEmbed . restore ( )
197198
198- // Fail the build if the wasm asset didn't actually make it into the
199- // compiled binary. The pre-init imports tree-sitter.wasm with `with {
200- // type: 'file' }`, which Bun should embed; this scan catches silent
201- // regressions (e.g. tree-shaking eliminating the import) before we ship
202- // a broken artifact.
203- verifyTreeSitterWasmEmbedded ( outputFile )
199+ // Fail the build if the chunks didn't actually make it into the
200+ // compiled binary. Catches silent regressions (tree-shaking, minifier
201+ // dropping literals, file-write timing) before we upload an artifact
202+ // that would crash for users.
203+ verifyTreeSitterWasmEmbedded ( outputFile , treeSitterEmbed . sampleChunks )
204204
205205 if ( targetInfo . platform !== 'win32' ) {
206206 chmodSync ( outputFile , 0o755 )
@@ -247,82 +247,107 @@ function findWebTreeSitterWasm(): string {
247247}
248248
249249/**
250- * Copy `tree-sitter.wasm` into `cli/src/pre-init/` so the pre-init module
251- * can import it via a relative `with { type: 'file' }` path. We can't
252- * import it directly as a node_modules subpath: on Windows, bun's
253- * `with { type: 'file' }` resolution returned falsy at runtime for
254- * `web-tree-sitter/tree-sitter.wasm` even though the bytes ended up in
255- * the binary, breaking the pre-init's runtime path lookup. OpenTUI's own
256- * tree-sitter assets work because they're imported relatively from
257- * inside the package — same trick here.
250+ * Inline `tree-sitter.wasm` into the binary as base64-encoded string
251+ * literals — but split into many small chunks. A single 274KB string
252+ * literal got dropped/transformed by bun's Windows minifier in an
253+ * earlier attempt; small chunks are individually unremarkable to the
254+ * minifier and survive intact. The pre-init joins them at runtime and
255+ * decodes back to the wasm bytes.
258256 *
259- * Returns a cleanup function. The build calls it eagerly after compile
260- * and registers it as an exit handler so a mid-build crash doesn't leave
261- * a multi-MB untracked file in the working tree.
257+ * Returns a `restore` function (resets the stub) and a small set of
258+ * `sampleChunks` for the post-build verification step to look for in
259+ * the compiled binary. Always invoke `restore` (eagerly + on exit) so
260+ * a developer's working tree doesn't end up with a multi-MB diff after
261+ * a build.
262262 */
263- function stagePreInitWasm ( ) : { cleanup : ( ) => void } {
264- const sourceWasm = findWebTreeSitterWasm ( )
265- const stagedPath = join ( cliRoot , 'src' , 'pre-init' , 'tree-sitter.wasm' )
266- let cleaned = false
267- const cleanup = ( ) : void => {
268- if ( cleaned ) return
269- cleaned = true
270- if ( existsSync ( stagedPath ) ) {
271- try {
272- rmSync ( stagedPath )
273- } catch ( error ) {
274- console . error ( 'Failed to remove staged pre-init wasm:' , error )
275- }
263+ function embedTreeSitterWasmAsChunks ( ) : {
264+ restore : ( ) => void
265+ sampleChunks : string [ ]
266+ } {
267+ const stubPath = join ( cliRoot , 'src' , 'pre-init' , 'tree-sitter-wasm-bytes.ts' )
268+ const originalStub = readFileSync ( stubPath , 'utf8' )
269+ let restored = false
270+ const restore = ( ) : void => {
271+ if ( restored ) return
272+ restored = true
273+ try {
274+ writeFileSync ( stubPath , originalStub )
275+ } catch ( error ) {
276+ console . error ( 'Failed to restore tree-sitter-wasm-bytes stub:' , error )
276277 }
277278 }
278279
279- // Read + write rather than copyFile so we don't accidentally hardlink
280- // (some Windows hosts fail to delete hardlinks while bun has the file
281- // mmapped from the compile step).
282- writeFileSync ( stagedPath , readFileSync ( sourceWasm ) )
283- logAlways ( `Staged pre-init wasm: ${ sourceWasm } → ${ stagedPath } ` )
284- return { cleanup }
280+ const sourceWasm = findWebTreeSitterWasm ( )
281+ const wasmBytes = readFileSync ( sourceWasm )
282+ const fullBase64 = wasmBytes . toString ( 'base64' )
283+
284+ // ~1KB per chunk: well under any plausible minifier-dropped-literal
285+ // threshold, and small enough that even a heavy-handed inliner would
286+ // emit them as runtime references rather than evaluating the whole
287+ // .join() at compile time. Keeps total chunk count manageable too
288+ // (~270 chunks for a 205KB wasm).
289+ const CHUNK_SIZE = 1024
290+ const chunks : string [ ] = [ ]
291+ for ( let i = 0 ; i < fullBase64 . length ; i += CHUNK_SIZE ) {
292+ chunks . push ( fullBase64 . slice ( i , i + CHUNK_SIZE ) )
293+ }
294+
295+ const generated =
296+ `// AUTO-GENERATED by cli/scripts/build-binary.ts during \`bun build --compile\`.\n` +
297+ `// Restored to the empty stub after the build finishes — do not commit a\n` +
298+ `// non-empty value here.\n` +
299+ `export const TREE_SITTER_WASM_BASE64_CHUNKS: readonly string[] = [\n` +
300+ chunks . map ( ( c ) => ` ${ JSON . stringify ( c ) } ,` ) . join ( '\n' ) +
301+ `\n]\n`
302+
303+ writeFileSync ( stubPath , generated )
304+ logAlways (
305+ `Embedded tree-sitter.wasm from ${ sourceWasm } (${ wasmBytes . length } bytes → ${ chunks . length } chunks of ~${ CHUNK_SIZE } chars).` ,
306+ )
307+
308+ // Pull a few sample chunks from the start, middle, and end for the
309+ // post-build verification scan. If any one is missing in the compiled
310+ // binary, something dropped or transformed the literals.
311+ const samples = [
312+ chunks [ 0 ] ,
313+ chunks [ Math . floor ( chunks . length / 2 ) ] ,
314+ chunks [ chunks . length - 1 ] ,
315+ ] . filter ( ( c ) : c is string => Boolean ( c ) )
316+
317+ return { restore, sampleChunks : samples }
285318}
286319
287320/**
288- * Sanity-check the compiled binary actually contains web-tree-sitter's
289- * tree-sitter.wasm. The pre-init imports it via `with { type: 'file' }`,
290- * which should bundle the asset at a bunfs path. If tree-shaking or a
291- * future bundler change drops the import, the binary still compiles but
292- * tree-sitter init fails at runtime — this scan fails the build before
293- * we upload that artifact.
294- *
295- * Looks for the actual wasm bytes (a unique 64-byte chunk pulled from
296- * the source file's interior), not just the wasm magic header — OpenTUI
297- * embeds its own tree-sitter language wasms, so a magic-bytes-only scan
298- * would false-pass even without our import. A literal bytes match
299- * proves *this specific* wasm shipped.
321+ * Sanity-check the compiled binary actually contains all the chunked
322+ * base64 we just embedded. We pass in a few sample chunks from the
323+ * start / middle / end of the array; each must appear in the binary.
324+ * If any one is missing, the bundler dropped or inlined-away part of
325+ * the literal table, and the runtime decode would produce garbage.
300326 */
301- function verifyTreeSitterWasmEmbedded ( outputFile : string ) : void {
302- const wasmPath = findWebTreeSitterWasm ( )
303- const wasm = readFileSync ( wasmPath )
304- // Take a 64-byte slice from the middle of the file. The header has
305- // generic wasm magic + section markers; the tail can be padding. The
306- // middle is densely packed code/data unique to this specific wasm
307- // module.
308- const needleStart = Math . floor ( wasm . length / 2 )
309- const needle = wasm . subarray ( needleStart , needleStart + 64 )
310-
327+ function verifyTreeSitterWasmEmbedded (
328+ outputFile : string ,
329+ sampleChunks : string [ ] ,
330+ ) : void {
331+ if ( sampleChunks . length === 0 ) {
332+ throw new Error ( 'verifyTreeSitterWasmEmbedded called with no sample chunks' )
333+ }
311334 const binary = readFileSync ( outputFile )
312- const idx = binary . indexOf ( needle )
313- if ( idx === - 1 ) {
314- throw new Error (
315- `web-tree-sitter wasm content not found in ${ outputFile } .\n` +
316- `Source wasm: ${ wasmPath } (${ wasm . length } bytes)\n` +
317- `Searched for 64 bytes from offset ${ needleStart } of the source.\n` +
318- `Either the \`with { type: 'file' }\` import in the pre-init was\n` +
319- `tree-shaken out, or bun --compile didn't embed the asset on this\n` +
320- `platform. The runtime tree-sitter init would fail with\n` +
321- `"Internal error: tree-sitter.wasm not found".` ,
322- )
335+ for ( const chunk of sampleChunks ) {
336+ const needle = Buffer . from ( chunk , 'utf8' )
337+ const idx = binary . indexOf ( needle )
338+ if ( idx === - 1 ) {
339+ throw new Error (
340+ `Embedded tree-sitter wasm chunk not found in ${ outputFile } .\n` +
341+ `Missing chunk (first 80 chars): ${ chunk . slice ( 0 , 80 ) } …\n` +
342+ `Either the \`tree-sitter-wasm-bytes.ts\` literals were tree-shaken,\n` +
343+ `the minifier transformed them away, or the pre-init's import wasn't\n` +
344+ `actually consumed. The runtime tree-sitter init would fail with\n` +
345+ `"Internal error: tree-sitter.wasm not found".` ,
346+ )
347+ }
323348 }
324349 logAlways (
325- `Verified embedded tree-sitter.wasm at offset ${ idx } of compiled binary (source: ${ wasmPath } ) .` ,
350+ `Verified ${ sampleChunks . length } embedded base64 chunks in compiled binary.` ,
326351 )
327352}
328353
0 commit comments