From 4c36c8d6e27f355ceb7ffd07351a41e9bc569399 Mon Sep 17 00:00:00 2001 From: Kuba Sunderland-Ober Date: Fri, 29 May 2026 01:18:18 +0200 Subject: [PATCH 1/9] Move the book builder code into book/ folder out of docs/ --- .github/workflows/jekyll-gh-pages.yml | 2 +- WIP.md | 2 +- {docs => book}/lib/fast-array-onebuf.mjs | 0 {docs => book}/lib/fast-decode-name.mjs | 0 {docs => book}/lib/fast-dict-array.mjs | 0 {docs => book}/lib/fast-dict-iter.mjs | 0 {docs => book}/lib/fast-dict-onebuf.mjs | 0 {docs => book}/lib/fast-indirect-objects.mjs | 0 {docs => book}/lib/fast-inflate.mjs | 0 {docs => book}/lib/fast-number-to-string.mjs | 0 {docs => book}/lib/fast-parse-dict.mjs | 0 {docs => book}/lib/fast-parse-name.mjs | 0 {docs => book}/lib/fast-parse-number.mjs | 0 {docs => book}/lib/fast-parse-object.mjs | 0 {docs => book}/lib/fast-pdfnumber-pool.mjs | 0 {docs => book}/lib/fast-refs-class.mjs | 0 {docs => book}/lib/fast-refs.mjs | 0 {docs => book}/lib/fast-size-in-bytes.mjs | 0 {docs => book}/lib/fast-sync-load.mjs | 0 {docs => book}/lib/measure-pass.mjs | 0 {docs => book}/lib/outline.mjs | 0 {docs => book}/lib/paged.browser.js | 0 {docs => book}/lib/parallel-deflate.mjs | 0 {docs => book}/lib/postprocesser.mjs | 0 {docs => book}/lib/progress-handler.js | 0 {docs => book}/render-book.mjs | 2 +- docs/Documentation/Builder.md | 6 +-- docs/Documentation/Fixes-PDFLib.md | 2 +- docs/Documentation/Fixes-PagedJS.md | 2 +- docs/Documentation/Fixes.md | 4 +- docs/Documentation/PDF-Generation.md | 10 ++-- docs/Documentation/Tools.md | 7 +-- docs/book.bat | 2 +- perf/instrument-objclasses.mjs | 26 ++++----- perf/instrument-pioh.mjs | 26 ++++----- perf/instrument-slot-types.mjs | 2 +- perf/measure.mjs | 56 ++++++++++---------- perf/phase0-measure.mjs | 24 ++++----- perf/probe-parallel.mjs | 2 +- perf/probe-renderer-mem.mjs | 2 +- 40 files changed, 89 insertions(+), 88 deletions(-) rename {docs => book}/lib/fast-array-onebuf.mjs (100%) rename {docs => book}/lib/fast-decode-name.mjs (100%) rename {docs => book}/lib/fast-dict-array.mjs (100%) rename {docs => book}/lib/fast-dict-iter.mjs (100%) rename {docs => book}/lib/fast-dict-onebuf.mjs (100%) rename {docs => book}/lib/fast-indirect-objects.mjs (100%) rename {docs => book}/lib/fast-inflate.mjs (100%) rename {docs => book}/lib/fast-number-to-string.mjs (100%) rename {docs => book}/lib/fast-parse-dict.mjs (100%) rename {docs => book}/lib/fast-parse-name.mjs (100%) rename {docs => book}/lib/fast-parse-number.mjs (100%) rename {docs => book}/lib/fast-parse-object.mjs (100%) rename {docs => book}/lib/fast-pdfnumber-pool.mjs (100%) rename {docs => book}/lib/fast-refs-class.mjs (100%) rename {docs => book}/lib/fast-refs.mjs (100%) rename {docs => book}/lib/fast-size-in-bytes.mjs (100%) rename {docs => book}/lib/fast-sync-load.mjs (100%) rename {docs => book}/lib/measure-pass.mjs (100%) rename {docs => book}/lib/outline.mjs (100%) rename {docs => book}/lib/paged.browser.js (100%) rename {docs => book}/lib/parallel-deflate.mjs (100%) rename {docs => book}/lib/postprocesser.mjs (100%) rename {docs => book}/lib/progress-handler.js (100%) rename {docs => book}/render-book.mjs (99%) diff --git a/.github/workflows/jekyll-gh-pages.yml b/.github/workflows/jekyll-gh-pages.yml index f2524974..0366dcf3 100644 --- a/.github/workflows/jekyll-gh-pages.yml +++ b/.github/workflows/jekyll-gh-pages.yml @@ -91,7 +91,7 @@ jobs: - name: Render book PDF run: | mkdir -p _pdf - node render-book.mjs _site-pdf/book.html -o "_pdf/twinBASIC Book.pdf" --outline-tags h1,h2,h3,h4 --additional-script ../perf/detach-pages.js + node ../book/render-book.mjs _site-pdf/book.html -o "_pdf/twinBASIC Book.pdf" --outline-tags h1,h2,h3,h4 --additional-script ../perf/detach-pages.js working-directory: ./docs - name: Upload Pages artifact uses: actions/upload-pages-artifact@v5 diff --git a/WIP.md b/WIP.md index 44b2d31e..595c269c 100644 --- a/WIP.md +++ b/WIP.md @@ -437,7 +437,7 @@ From `docs/`: - `build.bat` — runs `node ..\builder\tbdocs.mjs --src .` which produces three trees in one pass: the online copy at `_site/`, a `file://`-browsable copy at `_site-offline/`, and the sparse pagedjs source at `_site-pdf/`. The offline pass adds ~700 ms and the PDF pass adds ~150 ms on top of the ~2 s online build. Toggle `also_build_offline` / `also_build_pdf` in `_config.yml` (or pass `--no-offline` / `--no-pdf`) to skip a sibling output. - `serve.bat` — runs `tbdocs --serve`: initial build, then a long-lived process with watcher, debounced rebuilds, and SSE-driven browser auto-reload. Ctrl+C to stop. - `check.bat` — link + integrity check (offline `scripts/check_links.mjs` against `_site/` and `_site-offline/`; the offline pass also runs `--forbid 'https://docs.twinbasic.com'` to catch surviving live-site links). -- `book.bat` — renders the PDF from `_site-pdf/book.html` via `node render-book.mjs` into `_pdf/book.pdf`. Run `build.bat` first to populate `_site-pdf/`. +- `book.bat` — renders the PDF from `_site-pdf/book.html` via `node ..\book\render-book.mjs` into `_pdf/book.pdf`. Run `build.bat` first to populate `_site-pdf/`. ## Site integrity check diff --git a/docs/lib/fast-array-onebuf.mjs b/book/lib/fast-array-onebuf.mjs similarity index 100% rename from docs/lib/fast-array-onebuf.mjs rename to book/lib/fast-array-onebuf.mjs diff --git a/docs/lib/fast-decode-name.mjs b/book/lib/fast-decode-name.mjs similarity index 100% rename from docs/lib/fast-decode-name.mjs rename to book/lib/fast-decode-name.mjs diff --git a/docs/lib/fast-dict-array.mjs b/book/lib/fast-dict-array.mjs similarity index 100% rename from docs/lib/fast-dict-array.mjs rename to book/lib/fast-dict-array.mjs diff --git a/docs/lib/fast-dict-iter.mjs b/book/lib/fast-dict-iter.mjs similarity index 100% rename from docs/lib/fast-dict-iter.mjs rename to book/lib/fast-dict-iter.mjs diff --git a/docs/lib/fast-dict-onebuf.mjs b/book/lib/fast-dict-onebuf.mjs similarity index 100% rename from docs/lib/fast-dict-onebuf.mjs rename to book/lib/fast-dict-onebuf.mjs diff --git a/docs/lib/fast-indirect-objects.mjs b/book/lib/fast-indirect-objects.mjs similarity index 100% rename from docs/lib/fast-indirect-objects.mjs rename to book/lib/fast-indirect-objects.mjs diff --git a/docs/lib/fast-inflate.mjs b/book/lib/fast-inflate.mjs similarity index 100% rename from docs/lib/fast-inflate.mjs rename to book/lib/fast-inflate.mjs diff --git a/docs/lib/fast-number-to-string.mjs b/book/lib/fast-number-to-string.mjs similarity index 100% rename from docs/lib/fast-number-to-string.mjs rename to book/lib/fast-number-to-string.mjs diff --git a/docs/lib/fast-parse-dict.mjs b/book/lib/fast-parse-dict.mjs similarity index 100% rename from docs/lib/fast-parse-dict.mjs rename to book/lib/fast-parse-dict.mjs diff --git a/docs/lib/fast-parse-name.mjs b/book/lib/fast-parse-name.mjs similarity index 100% rename from docs/lib/fast-parse-name.mjs rename to book/lib/fast-parse-name.mjs diff --git a/docs/lib/fast-parse-number.mjs b/book/lib/fast-parse-number.mjs similarity index 100% rename from docs/lib/fast-parse-number.mjs rename to book/lib/fast-parse-number.mjs diff --git a/docs/lib/fast-parse-object.mjs b/book/lib/fast-parse-object.mjs similarity index 100% rename from docs/lib/fast-parse-object.mjs rename to book/lib/fast-parse-object.mjs diff --git a/docs/lib/fast-pdfnumber-pool.mjs b/book/lib/fast-pdfnumber-pool.mjs similarity index 100% rename from docs/lib/fast-pdfnumber-pool.mjs rename to book/lib/fast-pdfnumber-pool.mjs diff --git a/docs/lib/fast-refs-class.mjs b/book/lib/fast-refs-class.mjs similarity index 100% rename from docs/lib/fast-refs-class.mjs rename to book/lib/fast-refs-class.mjs diff --git a/docs/lib/fast-refs.mjs b/book/lib/fast-refs.mjs similarity index 100% rename from docs/lib/fast-refs.mjs rename to book/lib/fast-refs.mjs diff --git a/docs/lib/fast-size-in-bytes.mjs b/book/lib/fast-size-in-bytes.mjs similarity index 100% rename from docs/lib/fast-size-in-bytes.mjs rename to book/lib/fast-size-in-bytes.mjs diff --git a/docs/lib/fast-sync-load.mjs b/book/lib/fast-sync-load.mjs similarity index 100% rename from docs/lib/fast-sync-load.mjs rename to book/lib/fast-sync-load.mjs diff --git a/docs/lib/measure-pass.mjs b/book/lib/measure-pass.mjs similarity index 100% rename from docs/lib/measure-pass.mjs rename to book/lib/measure-pass.mjs diff --git a/docs/lib/outline.mjs b/book/lib/outline.mjs similarity index 100% rename from docs/lib/outline.mjs rename to book/lib/outline.mjs diff --git a/docs/lib/paged.browser.js b/book/lib/paged.browser.js similarity index 100% rename from docs/lib/paged.browser.js rename to book/lib/paged.browser.js diff --git a/docs/lib/parallel-deflate.mjs b/book/lib/parallel-deflate.mjs similarity index 100% rename from docs/lib/parallel-deflate.mjs rename to book/lib/parallel-deflate.mjs diff --git a/docs/lib/postprocesser.mjs b/book/lib/postprocesser.mjs similarity index 100% rename from docs/lib/postprocesser.mjs rename to book/lib/postprocesser.mjs diff --git a/docs/lib/progress-handler.js b/book/lib/progress-handler.js similarity index 100% rename from docs/lib/progress-handler.js rename to book/lib/progress-handler.js diff --git a/docs/render-book.mjs b/book/render-book.mjs similarity index 99% rename from docs/render-book.mjs rename to book/render-book.mjs index 08117d54..427586b0 100644 --- a/docs/render-book.mjs +++ b/book/render-book.mjs @@ -155,7 +155,7 @@ import { PDFDocument } from 'pdf-lib'; // heap traffic drops ~13 % (123 MB -> 107 MB). PDFNumber is // immutable so sharing is safe. // measure-pass (Phase 1) -- no-allocate byte walker -// (docs/lib/measure-pass.mjs) that runs in front of +// (lib/measure-pass.mjs) that runs in front of // PDFDocument.load on the raw Chrome PDF and counts dictSlots // + arraySlots. The counts drive setExpectedDictSlots() on // fast-dict-onebuf and setExpectedArraySlots() on diff --git a/docs/Documentation/Builder.md b/docs/Documentation/Builder.md index 39775046..64600bab 100644 --- a/docs/Documentation/Builder.md +++ b/docs/Documentation/Builder.md @@ -106,7 +106,7 @@ A single `package.json` at the repo root carries everything --- the static site } ``` -No template engine, no framework, no bundler. `acorn` + `acorn-walk` parse the upstream `just-the-docs.js` so the offline patcher can target the AST instead of regex-matching strings; `markdown-it-{attrs,deflist,footnote}` cover the kramdown extensions the legacy renderer supported; `shiki` does the syntax highlighting; `lunr` powers the search index. `mermaid` and `puppeteer` together drive the `.mmd` → `.svg` pre-phase (one headless Chromium per batch, replacing the old per-diagram `npx mmdc` fork); `puppeteer` is shared with the PDF renderer (`docs/render-book.mjs`). `pdf-lib` + `html-entities` + `htmlparser2` are the PDF renderer's own toolchain. The `postinstall` runs `builder/scripts/patch-dagre.mjs`, which rewrites mermaid's bundled dagre adapter --- see [Mermaid Dagre Patches](Fixes/Dagre). +No template engine, no framework, no bundler. `acorn` + `acorn-walk` parse the upstream `just-the-docs.js` so the offline patcher can target the AST instead of regex-matching strings; `markdown-it-{attrs,deflist,footnote}` cover the kramdown extensions the legacy renderer supported; `shiki` does the syntax highlighting; `lunr` powers the search index. `mermaid` and `puppeteer` together drive the `.mmd` → `.svg` pre-phase (one headless Chromium per batch, replacing the old per-diagram `npx mmdc` fork); `puppeteer` is shared with the PDF renderer (`book/render-book.mjs`). `pdf-lib` + `html-entities` + `htmlparser2` are the PDF renderer's own toolchain. The `postinstall` runs `builder/scripts/patch-dagre.mjs`, which rewrites mermaid's bundled dagre adapter --- see [Mermaid Dagre Patches](Fixes/Dagre). `mermaid` is **exact-pinned** (`"11.15.0"`, not `"^11.15.0"`). The dagre patches target a chunk filename whose hash component (`dagre-ZXKKJJHT.mjs`) is regenerated on each mermaid release, so a floated range could break the postinstall on a transparent patch bump. @@ -162,7 +162,7 @@ The largest module by line count (~990 lines), split into two clearly-labelled h 4. shift heading levels by `n in [0, 3]` capped at `h7-stub`; 5. prefix every heading id and intra-chapter `href="#"` with the chapter anchor. -Each part and chapter divider page contains the entry's title as an H1/H2 heading (or a silent `

` when `no_outline_entry:` is set), which becomes the PDF bookmark target. When `landing_is_target:` is set on an entry, the heading is instead injected directly into the landing-page article so the PDF bookmark navigates there rather than to the blank divider page; `rewriteBookHrefs`'s landing-H1 strip skips the injected heading via a `data-divider-heading` attribute. `outline_closed:` stamps `data-pdf-bookmark-closed` on the heading (or on the first content article for `no_outline_entry` entries), and `parseOutline` in `docs/lib/outline.mjs` reads the attribute to write a negative PDF `/Count` for that bookmark node. Full schema is documented in the `_data/book.yml` file header. +Each part and chapter divider page contains the entry's title as an H1/H2 heading (or a silent `

` when `no_outline_entry:` is set), which becomes the PDF bookmark target. When `landing_is_target:` is set on an entry, the heading is instead injected directly into the landing-page article so the PDF bookmark navigates there rather than to the blank divider page; `rewriteBookHrefs`'s landing-H1 strip skips the injected heading via a `data-divider-heading` attribute. `outline_closed:` stamps `data-pdf-bookmark-closed` on the heading (or on the first content article for `no_outline_entry` entries), and `parseOutline` in `book/lib/outline.mjs` reads the attribute to write a negative PDF `/Count` for that bookmark node. Full schema is documented in the `_data/book.yml` file header. `augmentWithRedirectStubs` synthesises virtual `Page` records from each real page's `redirect_from` so the cross-ref rewriter still captures legacy URLs the way Jekyll's `jekyll-redirect-from` did (its stubs appeared in `site.pages` and got swept into the lookup table). `chapterAnchorFromUrl` is the URL → `ch-…` slug helper that generates both `id="..."` and the `#…` href targets. @@ -295,7 +295,7 @@ so an accidental discovery-rule regression that silently drops pages appears as Some build-adjacent code lives at the repo root rather than under `builder/`: -- **PDF rendering** --- `docs/render-book.mjs` plus its `docs/lib/*.mjs` helpers and the `paged.browser.js` bundle. `tbdocs` produces `_site-pdf/book.html`; the actual PDF render runs separately via `book.bat`. The driver is intentionally not part of the site generator: `pdf-lib` is a heavy dep used only at PDF time. `puppeteer` is shared between `render-book.mjs` and `builder/mermaid.mjs` (one Chromium binary, two consumers). See [PDF Generation](PDF-Generation) for the full internals. +- **PDF rendering** --- `book/render-book.mjs` plus its `book/lib/*.mjs` helpers and the `paged.browser.js` bundle. `tbdocs` produces `_site-pdf/book.html`; the actual PDF render runs separately via `book.bat`. The driver is intentionally not part of the site generator: `pdf-lib` is a heavy dep used only at PDF time. `puppeteer` is shared between `render-book.mjs` and `builder/mermaid.mjs` (one Chromium binary, two consumers). See [PDF Generation](PDF-Generation) for the full internals. - **Link checking** --- `scripts/check_links.mjs` reads from disk after the build; not part of the generator. - **External link crawling** --- `scripts/crawl_check.mjs` reads from HTTP; not part of the generator. - **Mermaid source files** --- `docs/assets/images/mmd/*.mmd` are source, `*.svg` are build artifacts that `tbdocs` regenerates as needed. diff --git a/docs/Documentation/Fixes-PDFLib.md b/docs/Documentation/Fixes-PDFLib.md index b2af0d6e..1abe6509 100644 --- a/docs/Documentation/Fixes-PDFLib.md +++ b/docs/Documentation/Fixes-PDFLib.md @@ -9,7 +9,7 @@ permalink: /Documentation/Development/Fixes/PDFLib # pdf-lib Patches {: .no_toc } -The files under `docs/lib/fast-*.mjs` and `docs/lib/parallel-deflate.mjs` are side-effecting ES modules that patch pdf-lib's live exports. All are imported at the top of `render-book.mjs` before any pdf-lib operation runs; they are mutually compatible and idempotent (each guards its installation with a flag on the patched prototype or module). Together they reduce the process phase --- parsing Chromium's raw PDF output, adding bookmarks and metadata, and serialising the result --- from ~40 seconds to ~1.6 seconds on the 1651-page book. +The files under `book/lib/fast-*.mjs` and `book/lib/parallel-deflate.mjs` are side-effecting ES modules that patch pdf-lib's live exports. All are imported at the top of `render-book.mjs` before any pdf-lib operation runs; they are mutually compatible and idempotent (each guards its installation with a flag on the patched prototype or module). Together they reduce the process phase --- parsing Chromium's raw PDF output, adding bookmarks and metadata, and serialising the result --- from ~40 seconds to ~1.6 seconds on the 1651-page book. The root cause of the need for all these patches is the same: pdf-lib is designed for general-purpose use in both browsers and Node, and optimises for generality rather than throughput on a single large document. diff --git a/docs/Documentation/Fixes-PagedJS.md b/docs/Documentation/Fixes-PagedJS.md index 7b5d660e..ba808974 100644 --- a/docs/Documentation/Fixes-PagedJS.md +++ b/docs/Documentation/Fixes-PagedJS.md @@ -9,7 +9,7 @@ permalink: /Documentation/Development/Fixes/PagedJS # Paged.js Patches {: .no_toc } -`docs/lib/paged.browser.js` is a vendored, patched copy of paged.js v0.4.3 (MIT). Upstream paged.js is designed for interactive browsers: it yields to the event loop to keep pages responsive during long renders, uses async functions throughout, and registers observation and resize callbacks. None of that is useful in a headless, non-interactive Chromium process where the only goal is to produce a PDF as fast as possible. This page documents every patch and its rationale. +`book/lib/paged.browser.js` is a vendored, patched copy of paged.js v0.4.3 (MIT). Upstream paged.js is designed for interactive browsers: it yields to the event loop to keep pages responsive during long renders, uses async functions throughout, and registers observation and resize callbacks. None of that is useful in a headless, non-interactive Chromium process where the only goal is to produce a PDF as fast as possible. This page documents every patch and its rationale. * TOC goes here {:toc} diff --git a/docs/Documentation/Fixes.md b/docs/Documentation/Fixes.md index 9ca4813f..0d229ba5 100644 --- a/docs/Documentation/Fixes.md +++ b/docs/Documentation/Fixes.md @@ -10,10 +10,10 @@ permalink: /Documentation/Development/Fixes # Library Patches {: .no_toc } -Several third-party libraries carry in-tree modifications. `docs/lib/paged.browser.js` is a patched copy of paged.js v0.4.3 (MIT); the thirteen `fast-*.mjs` files there are side-effecting shims applied to pdf-lib's live exports before each PDF process phase; and `builder/scripts/patch-dagre.mjs` is a `postinstall` hook that rewrites mermaid's bundled dagre adapter to fix per-cluster layout. This section documents every change: what the upstream behaviour was, why it was unsuitable for the build pipeline, and what was changed. +Several third-party libraries carry in-tree modifications. `book/lib/paged.browser.js` is a patched copy of paged.js v0.4.3 (MIT); the thirteen `fast-*.mjs` files there are side-effecting shims applied to pdf-lib's live exports before each PDF process phase; and `builder/scripts/patch-dagre.mjs` is a `postinstall` hook that rewrites mermaid's bundled dagre adapter to fix per-cluster layout. This section documents every change: what the upstream behaviour was, why it was unsuitable for the build pipeline, and what was changed. ## Sub-pages -- [Paged.js Patches](Fixes/PagedJS) --- changes to `docs/lib/paged.browser.js`: the synchronous execution chain, hook dispatch fast-paths, DOM lookup optimizations, layout correctness fixes, and miscellaneous headless-specific changes. +- [Paged.js Patches](Fixes/PagedJS) --- changes to `book/lib/paged.browser.js`: the synchronous execution chain, hook dispatch fast-paths, DOM lookup optimizations, layout correctness fixes, and miscellaneous headless-specific changes. - [pdf-lib Patches](Fixes/PDFLib) --- the thirteen `fast-*.mjs` shims and `parallel-deflate.mjs` that retune pdf-lib's parser, object model, and serializer for the process phase. - [Mermaid Dagre Patches](Fixes/Dagre) --- five patches to `node_modules/mermaid/dist/chunks/mermaid.esm/dagre-ZXKKJJHT.mjs` that make `direction LR` subgraphs work correctly when they have cross-cluster edges or no internal edges at all. diff --git a/docs/Documentation/PDF-Generation.md b/docs/Documentation/PDF-Generation.md index 3948b006..c5991c2d 100644 --- a/docs/Documentation/PDF-Generation.md +++ b/docs/Documentation/PDF-Generation.md @@ -8,7 +8,7 @@ permalink: /Documentation/Development/PDF-Generation # PDF Generation {: .no_toc } -Internals of the two-stage PDF pipeline: `tbdocs` Phase 8 assembles a sparse `_site-pdf/` source tree, then `docs/render-book.mjs` renders it into `_pdf/twinBASIC Book.pdf` via headless Chromium + paged.js + pdf-lib. Read this when modifying the renderer, the print stylesheet, or the paged.js bundle. +Internals of the two-stage PDF pipeline: `tbdocs` Phase 8 assembles a sparse `_site-pdf/` source tree, then `book/render-book.mjs` renders it into `_pdf/twinBASIC Book.pdf` via headless Chromium + paged.js + pdf-lib. Read this when modifying the renderer, the print stylesheet, or the paged.js bundle. * TOC goes here {:toc} @@ -22,7 +22,7 @@ The two stages are decoupled: `tbdocs` builds `_site-pdf/` as part of its normal ## Running the renderer ``` -node render-book.mjs -o +node book/render-book.mjs -o [--outline-tags h1,h2,h3,h4] [-t ] [--additional-script ]... @@ -39,7 +39,7 @@ node render-book.mjs -o `book.bat` runs the standard production invocation: ```batch -node render-book.mjs _site-pdf\book.html -o "_pdf\twinBASIC Book.pdf" ^ +node ..\book\render-book.mjs _site-pdf\book.html -o "_pdf\twinBASIC Book.pdf" ^ --outline-tags h1,h2,h3,h4 ^ --additional-script ..\perf\detach-pages.js ``` @@ -48,7 +48,7 @@ Always run `build.bat` first to populate `_site-pdf/`. ## render-book.mjs -`docs/render-book.mjs` drives the three phases. Its helpers live in `docs/lib/`. +`book/render-book.mjs` drives the three phases. Its helpers live in `book/lib/`. ### Phase 1: Render @@ -227,7 +227,7 @@ Paged.registerHandlers(ProgressHandler); ## paged.browser.js -`docs/lib/paged.browser.js` is a vendored, lightly patched copy of [Paged.js](https://pagedjs.org/) v0.4.3 (MIT). Paged.js is a CSS Paged Media polyfill: it reads `@page` rules from the linked stylesheet, breaks the document into discrete DOM pages, resolves CSS counters, and copies running headers and footers from `string-set` declarations into each page's margin boxes. Chromium then renders the resulting DOM into a PDF. +`book/lib/paged.browser.js` is a vendored, lightly patched copy of [Paged.js](https://pagedjs.org/) v0.4.3 (MIT). Paged.js is a CSS Paged Media polyfill: it reads `@page` rules from the linked stylesheet, breaks the document into discrete DOM pages, resolves CSS counters, and copies running headers and footers from `string-set` declarations into each page's margin boxes. Chromium then renders the resulting DOM into a PDF. ### Global API diff --git a/docs/Documentation/Tools.md b/docs/Documentation/Tools.md index 4431d28b..9a52cd47 100644 --- a/docs/Documentation/Tools.md +++ b/docs/Documentation/Tools.md @@ -38,7 +38,7 @@ Runs `scripts/check_links.mjs` against the rendered `_site/` and `_site-offline/ docs\book.bat -Renders the PDF book from `_site-pdf/book.html` into `_pdf/twinBASIC Book.pdf`. Calls `node render-book.mjs` (see [below](#docsrender-bookmjs)). Requires `build.bat` to have populated `_site-pdf/` and a Chromium install from `npx puppeteer browsers install chrome`. The first invocation auto-runs `npm install` at the repository root if `puppeteer` is missing. The output filename is set by the `-o` argument here; to rename the PDF, update it in `docs/book.bat` and in `.github/workflows/jekyll-gh-pages.yml`. +Renders the PDF book from `_site-pdf/book.html` into `_pdf/twinBASIC Book.pdf`. Calls `node ..\book\render-book.mjs` (see [below](#bookrender-bookmjs)). Requires `build.bat` to have populated `_site-pdf/` and a Chromium install from `npx puppeteer browsers install chrome`. The first invocation auto-runs `npm install` at the repository root if `puppeteer` is missing. The output filename is set by the `-o` argument here; to rename the PDF, update it in `docs/book.bat` and in `.github/workflows/jekyll-gh-pages.yml`. ## CLI tools @@ -107,9 +107,10 @@ Online link crawler for the deployed site. Starts at ``, GETs every s Normalises literal en-dash / em-dash characters in markdown source under `docs/` to their kramdown smart-quotes ASCII source form (`--` for en-dash, `---` for em-dash). The site forbids literal `–` / `—` in source --- this is the canonical fixer if any slip back in. Skips fenced code blocks and inline code spans. -### docs/render-book.mjs +### book/render-book.mjs +{: #bookrender-bookmjs } - node docs/render-book.mjs -o [options] + node book/render-book.mjs -o [options] The PDF renderer that `book.bat` calls. It is a generic HTML-to-PDF converter: it takes the pre-built `_site-pdf/book.html` as its sole document input and has no knowledge of `_data/book.yml` --- all chapter structure, heading levels, and outline entries are already embedded in the HTML by `tbdocs` Phase 8. Uses `puppeteer` + `paged.js` + `pdf-lib` directly, so it controls `pdf-lib`'s `parseSpeed` (the default yields the event loop between every 100 objects on load, adding ~32 seconds to a 100-second build for no reason in Node --- see [perf/README.md](https://github.com/twinbasic/documentation/blob/main/perf/README.md) for the diagnosis). Replaces an earlier `npx pagedjs-cli ...` invocation. diff --git a/docs/book.bat b/docs/book.bat index cbfe2cc2..c9a243c0 100644 --- a/docs/book.bat +++ b/docs/book.bat @@ -25,4 +25,4 @@ if not exist ..\node_modules\puppeteer\package.json ( if errorlevel 1 exit /b 1 ) if not exist _pdf mkdir _pdf -node render-book.mjs _site-pdf\book.html -o "_pdf\twinBASIC Book.pdf" --outline-tags h1,h2,h3,h4 --additional-script ..\perf\detach-pages.js +node ..\book\render-book.mjs _site-pdf\book.html -o "_pdf\twinBASIC Book.pdf" --outline-tags h1,h2,h3,h4 --additional-script ..\perf\detach-pages.js diff --git a/perf/instrument-objclasses.mjs b/perf/instrument-objclasses.mjs index 3c2860cb..14395d8d 100644 --- a/perf/instrument-objclasses.mjs +++ b/perf/instrument-objclasses.mjs @@ -38,19 +38,19 @@ // // Run: node perf/instrument-objclasses.mjs -import '../docs/lib/fast-refs-class.mjs'; -import '../docs/lib/fast-inflate.mjs'; -import '../docs/lib/fast-parse-number.mjs'; -import '../docs/lib/fast-decode-name.mjs'; -import '../docs/lib/fast-number-to-string.mjs'; -import '../docs/lib/fast-size-in-bytes.mjs'; -import '../docs/lib/fast-parse-object.mjs'; -import '../docs/lib/fast-sync-load.mjs'; -import '../docs/lib/fast-indirect-objects.mjs'; -import '../docs/lib/fast-pdfnumber-pool.mjs'; -import { setExpectedDictSlots } from '../docs/lib/fast-dict-onebuf.mjs'; -import { setExpectedArraySlots } from '../docs/lib/fast-array-onebuf.mjs'; -import { measure as measureRawPdf } from '../docs/lib/measure-pass.mjs'; +import '../book/lib/fast-refs-class.mjs'; +import '../book/lib/fast-inflate.mjs'; +import '../book/lib/fast-parse-number.mjs'; +import '../book/lib/fast-decode-name.mjs'; +import '../book/lib/fast-number-to-string.mjs'; +import '../book/lib/fast-size-in-bytes.mjs'; +import '../book/lib/fast-parse-object.mjs'; +import '../book/lib/fast-sync-load.mjs'; +import '../book/lib/fast-indirect-objects.mjs'; +import '../book/lib/fast-pdfnumber-pool.mjs'; +import { setExpectedDictSlots } from '../book/lib/fast-dict-onebuf.mjs'; +import { setExpectedArraySlots } from '../book/lib/fast-array-onebuf.mjs'; +import { measure as measureRawPdf } from '../book/lib/measure-pass.mjs'; import { PDFDocument } from 'pdf-lib'; import { createRequire } from 'node:module'; import { readFileSync } from 'node:fs'; diff --git a/perf/instrument-pioh.mjs b/perf/instrument-pioh.mjs index 30cc6e7e..deb790e7 100644 --- a/perf/instrument-pioh.mjs +++ b/perf/instrument-pioh.mjs @@ -27,19 +27,19 @@ // // Run: node --expose-gc perf/instrument-pioh.mjs -import '../docs/lib/fast-refs-class.mjs'; -import '../docs/lib/fast-inflate.mjs'; -import '../docs/lib/fast-parse-number.mjs'; -import '../docs/lib/fast-decode-name.mjs'; -import '../docs/lib/fast-number-to-string.mjs'; -import '../docs/lib/fast-size-in-bytes.mjs'; -import '../docs/lib/fast-parse-object.mjs'; -import '../docs/lib/fast-sync-load.mjs'; -import '../docs/lib/fast-indirect-objects.mjs'; -import '../docs/lib/fast-pdfnumber-pool.mjs'; -import { setExpectedDictSlots } from '../docs/lib/fast-dict-onebuf.mjs'; -import { setExpectedArraySlots } from '../docs/lib/fast-array-onebuf.mjs'; -import { measure as measureRawPdf } from '../docs/lib/measure-pass.mjs'; +import '../book/lib/fast-refs-class.mjs'; +import '../book/lib/fast-inflate.mjs'; +import '../book/lib/fast-parse-number.mjs'; +import '../book/lib/fast-decode-name.mjs'; +import '../book/lib/fast-number-to-string.mjs'; +import '../book/lib/fast-size-in-bytes.mjs'; +import '../book/lib/fast-parse-object.mjs'; +import '../book/lib/fast-sync-load.mjs'; +import '../book/lib/fast-indirect-objects.mjs'; +import '../book/lib/fast-pdfnumber-pool.mjs'; +import { setExpectedDictSlots } from '../book/lib/fast-dict-onebuf.mjs'; +import { setExpectedArraySlots } from '../book/lib/fast-array-onebuf.mjs'; +import { measure as measureRawPdf } from '../book/lib/measure-pass.mjs'; import { PDFDocument } from 'pdf-lib'; import { createRequire } from 'node:module'; import { readFileSync } from 'node:fs'; diff --git a/perf/instrument-slot-types.mjs b/perf/instrument-slot-types.mjs index 5b33b5db..25b639ee 100644 --- a/perf/instrument-slot-types.mjs +++ b/perf/instrument-slot-types.mjs @@ -14,7 +14,7 @@ // in that shim). import { createRequire } from 'node:module'; -import { main, getMainLen } from '../docs/lib/fast-dict-onebuf.mjs'; +import { main, getMainLen } from '../book/lib/fast-dict-onebuf.mjs'; const require = createRequire(import.meta.url); const PDFName = require('pdf-lib/cjs/core/objects/PDFName.js').default; diff --git a/perf/measure.mjs b/perf/measure.mjs index cb6e5108..99eec99a 100644 --- a/perf/measure.mjs +++ b/perf/measure.mjs @@ -113,7 +113,7 @@ // for compressed objects) pass through unchanged. // // --parallel-deflate replaces pdfDoc.save() with parallelSave from -// docs/lib/parallel-deflate.mjs: object streams are pre-deflated in +// book/lib/parallel-deflate.mjs: object streams are pre-deflated in // parallel on libuv's thread pool with objectsPerStream=500 (vs // pdf-lib's serial save with default 50). Moves ~300 ms of zlib work // off the main thread on the book. @@ -212,7 +212,7 @@ // immutable so sharing is safe. Production runs through it. // // --measure-pass runs the no-allocate measure pass from -// docs/lib/measure-pass.mjs against the raw Chrome PDF before +// book/lib/measure-pass.mjs against the raw Chrome PDF before // pdf-lib's load, and uses the measured dict-slot count to // pre-size fast-dict-onebuf's mainBuf to exact demand (no // V8-amortized growth, no slack). Phase 1 of the two-pass @@ -245,15 +245,15 @@ import { mkdirSync, writeFileSync, existsSync } from 'node:fs'; import { Session } from 'node:inspector/promises'; import puppeteer from 'puppeteer'; import { PDFDocument } from 'pdf-lib'; -// Shared with docs/render-book.mjs -- the helpers and the paged.js -// bundle live under docs/lib/ now that we've dropped the pagedjs-cli +// Shared with book/render-book.mjs -- the helpers and the paged.js +// bundle live under book/lib/ now that we've dropped the pagedjs-cli // dependency. Importing from there guarantees the harness measures the // same code that production runs. -import { parseOutline, setOutline } from '../docs/lib/outline.mjs'; -import { setMetadata } from '../docs/lib/postprocesser.mjs'; +import { parseOutline, setOutline } from '../book/lib/outline.mjs'; +import { setMetadata } from '../book/lib/postprocesser.mjs'; import { applyOutlineAndMetadataIncremental } from './incremental-pdf.mjs'; import { pinCpuIfWindows } from './pin-cpu.mjs'; -import { parallelSave } from '../docs/lib/parallel-deflate.mjs'; +import { parallelSave } from '../book/lib/parallel-deflate.mjs'; // On Windows, re-launch under `start /affinity 0x5500 /high` to stabilise // CPU sample-time. See pin-cpu.mjs. Cuts run-to-run variance from @@ -425,71 +425,71 @@ if (fastRefs && fastRefsClass) { process.exit(2); } if (fastRefs) { - await import('../docs/lib/fast-refs.mjs'); + await import('../book/lib/fast-refs.mjs'); console.log('[harness] fast-refs: PDFRef.of dense-array cache for gen=0'); } if (fastRefsClass) { - await import('../docs/lib/fast-refs-class.mjs'); + await import('../book/lib/fast-refs-class.mjs'); console.log('[harness] fast-refs-class: PDFRef.of dense-array cache + class-constructor shape'); } if (fastDecodeName) { - await import('../docs/lib/fast-decode-name.mjs'); + await import('../book/lib/fast-decode-name.mjs'); console.log('[harness] fast-decode-name: skip decodeName regex when name has no #'); } if (fastNumberToString) { - await import('../docs/lib/fast-number-to-string.mjs'); + await import('../book/lib/fast-number-to-string.mjs'); console.log('[harness] fast-number-to-string: skip redundant toString/split when no exponential'); } if (fastSizeInBytes) { - await import('../docs/lib/fast-size-in-bytes.mjs'); + await import('../book/lib/fast-size-in-bytes.mjs'); console.log('[harness] fast-size-in-bytes: non-allocating ladder for xref byte-width'); } if (fastInflate) { - await import('../docs/lib/fast-inflate.mjs'); + await import('../book/lib/fast-inflate.mjs'); console.log('[harness] fast-inflate: swap pako.inflate for node:zlib.inflateSync'); } if (fastParseNumber) { - await import('../docs/lib/fast-parse-number.mjs'); + await import('../book/lib/fast-parse-number.mjs'); console.log('[harness] fast-parse-number: direct-integer accumulator for parseRawNumber/parseRawInt'); } if (fastDictIter) { - await import('../docs/lib/fast-dict-iter.mjs'); + await import('../book/lib/fast-dict-iter.mjs'); console.log('[harness] fast-dict-iter: in-place Map.forEach for PDFDict.sizeInBytes/copyBytesInto'); } if (fastParseDict) { - await import('../docs/lib/fast-parse-dict.mjs'); + await import('../book/lib/fast-parse-dict.mjs'); console.log('[harness] fast-parse-dict: hoist Type/Catalog/Pages/Page sentinel PDFNames out of parseDict'); } if (fastParseObject) { - await import('../docs/lib/fast-parse-object.mjs'); + await import('../book/lib/fast-parse-object.mjs'); console.log('[harness] fast-parse-object: first-byte dispatch in parseObject, gate true/false/null matchKeyword behind byte check'); } if (fastParseName) { - await import('../docs/lib/fast-parse-name.mjs'); + await import('../book/lib/fast-parse-name.mjs'); console.log('[harness] fast-parse-name: byte-slice + String.fromCharCode build for PDFObjectParser.parseName'); } if (fastSyncLoad) { - await import('../docs/lib/fast-sync-load.mjs'); + await import('../book/lib/fast-sync-load.mjs'); console.log('[harness] fast-sync-load: synchronify PDFParser load path, strip waitForTick machinery'); } if (fastDictArray) { - await import('../docs/lib/fast-dict-array.mjs'); + await import('../book/lib/fast-dict-array.mjs'); console.log('[harness] fast-dict-array: PDFDict backed by flat alternating array (subsumes fast-parse-dict + fast-dict-iter)'); } if (fastIndirectObjects) { - await import('../docs/lib/fast-indirect-objects.mjs'); + await import('../book/lib/fast-indirect-objects.mjs'); console.log('[harness] fast-indirect-objects: PDFContext.indirectObjects dense-array cache for gen=0 PDFRefs'); } if (fastPdfnumberPool) { - await import('../docs/lib/fast-pdfnumber-pool.mjs'); + await import('../book/lib/fast-pdfnumber-pool.mjs'); console.log('[harness] fast-pdfnumber-pool: value-keyed cache in front of PDFNumber.of'); } if (fastDictOnebuf) { - await import('../docs/lib/fast-dict-onebuf.mjs'); + await import('../book/lib/fast-dict-onebuf.mjs'); console.log('[harness] fast-dict-onebuf: ONE long-lived buffer for all PDFDict entries + small per-parser temp'); } if (fastArrayOnebuf) { - await import('../docs/lib/fast-array-onebuf.mjs'); + await import('../book/lib/fast-array-onebuf.mjs'); console.log('[harness] fast-array-onebuf: ONE long-lived buffer for all PDFArray elements + small per-parser temp'); } if (instrumentParsedict) { @@ -500,11 +500,11 @@ if (instrumentParsedict) { // invoked in-flight (after rawPdf is in hand, before PDFDocument.load). let _runMeasurePass = null; if (measurePass) { - const { measure } = await import('../docs/lib/measure-pass.mjs'); - const { setExpectedDictSlots } = await import('../docs/lib/fast-dict-onebuf.mjs'); + const { measure } = await import('../book/lib/measure-pass.mjs'); + const { setExpectedDictSlots } = await import('../book/lib/fast-dict-onebuf.mjs'); let setExpectedArraySlots = null; if (fastArrayOnebuf) { - const ma = await import('../docs/lib/fast-array-onebuf.mjs'); + const ma = await import('../book/lib/fast-array-onebuf.mjs'); setExpectedArraySlots = ma.setExpectedArraySlots; } _runMeasurePass = (bytes) => { @@ -556,7 +556,7 @@ const browser = await puppeteer.launch({ // the latter being present at launch. // // --disable-gpu + --disable-software-rasterizer mirror production - // (docs/render-book.mjs). Shrinks the GPU process from ~100 MB to + // (book/render-book.mjs). Shrinks the GPU process from ~100 MB to // ~16 MB and the renderer ~120 MB; generate ~5 s faster; PDF byte- // identical. See perf/README.md "Disabling the GPU process". args: [ diff --git a/perf/phase0-measure.mjs b/perf/phase0-measure.mjs index 40139458..28b9d2a0 100644 --- a/perf/phase0-measure.mjs +++ b/perf/phase0-measure.mjs @@ -24,18 +24,18 @@ import { inflateSync } from 'node:zlib'; import { performance } from 'node:perf_hooks'; import { createRequire } from 'node:module'; -// Production-equivalent shim wiring (same order as docs/render-book.mjs). -await import('../docs/lib/fast-refs.mjs'); -await import('../docs/lib/fast-inflate.mjs'); -await import('../docs/lib/fast-parse-number.mjs'); -await import('../docs/lib/fast-decode-name.mjs'); -await import('../docs/lib/fast-number-to-string.mjs'); -await import('../docs/lib/fast-size-in-bytes.mjs'); -await import('../docs/lib/fast-dict-onebuf.mjs'); -await import('../docs/lib/fast-parse-object.mjs'); -await import('../docs/lib/fast-sync-load.mjs'); -await import('../docs/lib/fast-indirect-objects.mjs'); -await import('../docs/lib/fast-pdfnumber-pool.mjs'); +// Production-equivalent shim wiring (same order as book/render-book.mjs). +await import('../book/lib/fast-refs.mjs'); +await import('../book/lib/fast-inflate.mjs'); +await import('../book/lib/fast-parse-number.mjs'); +await import('../book/lib/fast-decode-name.mjs'); +await import('../book/lib/fast-number-to-string.mjs'); +await import('../book/lib/fast-size-in-bytes.mjs'); +await import('../book/lib/fast-dict-onebuf.mjs'); +await import('../book/lib/fast-parse-object.mjs'); +await import('../book/lib/fast-sync-load.mjs'); +await import('../book/lib/fast-indirect-objects.mjs'); +await import('../book/lib/fast-pdfnumber-pool.mjs'); const require = createRequire(import.meta.url); const { PDFDocument } = require('pdf-lib'); diff --git a/perf/probe-parallel.mjs b/perf/probe-parallel.mjs index cc67b5b1..74e5c83e 100644 --- a/perf/probe-parallel.mjs +++ b/perf/probe-parallel.mjs @@ -70,7 +70,7 @@ console.log(`[probe] shards : ${shardCount}`); async function runShard(shardIndex) { const tStart = Date.now(); const browser = await puppeteer.launch({ - // Matches docs/render-book.mjs (production path). + // Matches book/render-book.mjs (production path). headless: true, args: [ '--no-sandbox', diff --git a/perf/probe-renderer-mem.mjs b/perf/probe-renderer-mem.mjs index 96478a7c..78c6ba2d 100644 --- a/perf/probe-renderer-mem.mjs +++ b/perf/probe-renderer-mem.mjs @@ -90,7 +90,7 @@ console.log(`[probe] output : ${outDir}`); console.log(`[probe] gc-passes : ${forceGc ? gcPasses : '(off)'}`); console.log(`[probe] heap-snapshot: ${heapSnap}`); -// Match production launch args (docs/render-book.mjs). --expose-gc +// Match production launch args (book/render-book.mjs). --expose-gc // is added when --gc is set so window.gc() inside the page works; // pinning V8 to that flag has no measurable cost on render or generate. const chromeArgs = [ From 15b93f3de37cfdba167ca89511fa2bf99f1eb732 Mon Sep 17 00:00:00 2001 From: Kuba Sunderland-Ober Date: Fri, 29 May 2026 01:25:00 +0200 Subject: [PATCH 2/9] Move the ignored files list to _config.yml, Jekyll-style. --- builder/discover.mjs | 27 ++---------------------- builder/tbdocs.mjs | 11 +++++----- docs/Documentation/Builder.md | 4 ++-- docs/Documentation/Pipeline-Stages.md | 6 +++--- docs/Documentation/Tools.md | 2 +- docs/_config.yml | 30 +++++++++++++++++++++++++++ 6 files changed, 44 insertions(+), 36 deletions(-) diff --git a/builder/discover.mjs b/builder/discover.mjs index 7304c3c9..ab680354 100644 --- a/builder/discover.mjs +++ b/builder/discover.mjs @@ -12,36 +12,13 @@ import { permalinkToDestPath } from "./paths.mjs"; const PAGE_EXT = /\.(md|html)$/i; const IMAGE_SCOPE = /(^|\/)Images\//; -// Files that look like pages but are toolchain artifacts. -const IGNORE = [ - // Underscored directories at the root and at any depth -- catches - // _site, _site-offline, _site-pdf, _pdf, _data, _includes, _layouts, - // _sass, _plugins, _profile, and every _Images at any depth. - "_*/**", - "**/_*/**", - // Defensive: caches and unrelated trees that should never be in docs/. - "**/.git/**", - "**/node_modules/**", - "**/.jekyll-cache/**", - "**/.sass-cache/**", - // Theme assets ship prebuilt from builder/assets/ instead. - "assets/css/**", - "assets/js/**", - // Top-level Jekyll / toolchain files. - "Gemfile", - "Gemfile.lock", - "_config.yml", - "*.bat", - "redirects.json", -]; - -export async function discover(srcRoot) { +export async function discover(srcRoot, ignore = []) { const allFiles = await fg("**/*", { cwd: srcRoot, dot: false, onlyFiles: true, followSymbolicLinks: false, - ignore: IGNORE, + ignore, }); allFiles.sort(); diff --git a/builder/tbdocs.mjs b/builder/tbdocs.mjs index 01f3d8a4..526e8e7f 100644 --- a/builder/tbdocs.mjs +++ b/builder/tbdocs.mjs @@ -129,16 +129,17 @@ export async function runBuild(opts) { process.exitCode = 1; } - const { pages, staticFiles } = await discover(srcRoot); - t.lap("discover"); + const config = yaml.load(await fs.readFile(path.join(srcRoot, "_config.yml"), "utf8")); + if (opts.baseurl != null) config.baseurl = opts.baseurl; + if (opts.url != null) config.url = opts.url; // Issue build-info immediately so the git shell-outs overlap with the // CPU-bound nav work. const buildInfoPromise = captureBuildInfo(); - const config = yaml.load(await fs.readFile(path.join(srcRoot, "_config.yml"), "utf8")); - if (opts.baseurl != null) config.baseurl = opts.baseurl; - if (opts.url != null) config.url = opts.url; + const { pages, staticFiles } = await discover(srcRoot, config.exclude ?? []); + t.lap("discover"); + const { navTree } = computeNav(pages, config); t.lap("nav"); diff --git a/docs/Documentation/Builder.md b/docs/Documentation/Builder.md index 64600bab..c7e8caa3 100644 --- a/docs/Documentation/Builder.md +++ b/docs/Documentation/Builder.md @@ -116,7 +116,7 @@ Each subsection covers the design rationale and implementation details for one m ### [tbdocs.mjs](https://github.com/twinbasic/documentation/blob/main/builder/tbdocs.mjs) --- entry point and orchestrator -`captureBuildInfo()` is launched as a promise immediately after discover so the two `git` shell-outs overlap with the CPU-bound nav computation that follows; the result is `await`ed only once Phase 2's other substeps are done. The shared markdown-it instance is built once via `initHighlighter` + `createMarkdownIt` and stored on `site.markdown` so Phase 2's SEO precompute and Phase 3's body renderer use the same configured pipeline --- titles run through the same dash, quote, and footnote-stripping rules as page body text. +`_config.yml` is loaded first so its `exclude:` list can be passed to `discover()`. `captureBuildInfo()` is launched as a promise immediately after the config load so the two `git` shell-outs overlap with the I/O-bound discover and the CPU-bound nav computation that follows; the result is `await`ed only once Phase 2's other substeps are done. The shared markdown-it instance is built once via `initHighlighter` + `createMarkdownIt` and stored on `site.markdown` so Phase 2's SEO precompute and Phase 3's body renderer use the same configured pipeline --- titles run through the same dash, quote, and footnote-stripping rules as page body text. The drift guard at the end (`if (pages.length < 836)`) sets `process.exitCode = 1` when discover loses pages --- a discovery-rule regression that silently drops content appears as a non-zero exit even though the build itself "succeeded". @@ -128,7 +128,7 @@ The 300 ms debounce coalesces rapid file changes into a single rebuild. A lightw ### [discover.mjs](https://github.com/twinbasic/documentation/blob/main/builder/discover.mjs) --- Phase 1 -The IGNORE rules skip every underscored directory (catches `_site/`, `_site-offline/`, `_site-pdf/`, `_data/`, every `_Images/` at any depth), the prebuilt theme trees under `assets/css/` and `assets/js/` (sourced from `builder/assets/` instead), top-level toolchain files (`Gemfile`, `_config.yml`, `*.bat`), and the obvious cache dirs. +The `exclude:` list from `_config.yml` is passed in as the `ignore` parameter and forwarded directly to `fast-glob`. It skips every underscored directory (catches `_site/`, `_site-offline/`, `_site-pdf/`, `_data/`, every `_Images/` at any depth), the prebuilt theme trees under `assets/css/` and `assets/js/` (sourced from `builder/assets/` instead), top-level toolchain files (`Gemfile`, `_config.yml`, `*.bat`), Mermaid source files (`**/*.mmd`), and the obvious cache dirs. The final `pages.sort(byName)` mirrors Jekyll's `site.pages.sort_by!(&:name)` --- sort by basename, leaving fast-glob's input order to break ties (which `nav_order` then resolves deterministically in Phase 2). diff --git a/docs/Documentation/Pipeline-Stages.md b/docs/Documentation/Pipeline-Stages.md index a648435b..b77df53c 100644 --- a/docs/Documentation/Pipeline-Stages.md +++ b/docs/Documentation/Pipeline-Stages.md @@ -116,10 +116,10 @@ Traverses the source tree and produces the `pages` and `staticFiles` arrays cons **Entry point** ```js -discover(srcRoot: string): Promise<{ pages: Page[], staticFiles: StaticFile[] }> +discover(srcRoot: string, ignore: string[]): Promise<{ pages: Page[], staticFiles: StaticFile[] }> ``` -Runs a single `fast-glob` call over `srcRoot` with the hardcoded `IGNORE` exclude list (underscore-prefixed directories, prebuilt theme assets, toolchain files). For each `.md` or `.html` file, attempts to parse YAML frontmatter. Files with parseable frontmatter become page objects; everything else becomes static file objects. Pages are sorted by basename (mirroring Jekyll's reader); static files by relative path. +Runs a single `fast-glob` call over `srcRoot` with the `exclude:` list read from `_config.yml` and passed in by the orchestrator. For each `.md` or `.html` file, attempts to parse YAML frontmatter. Files with parseable frontmatter become page objects; everything else becomes static file objects. Pages are sorted by basename (mirroring Jekyll's reader); static files by relative path. **Reads:** source files under `srcRoot`. **Writes (page fields):** `srcPath`, `srcRel`, `ext`, `frontmatter`, `rawContent`, `permalink`, `destPath`, `layoutDefault`, `imageScope`. @@ -128,7 +128,7 @@ Runs a single `fast-glob` call over `srcRoot` with the hardcoded `IGNORE` exclud | Symbol | Signature | Description | |---|---|---| -| `discover` | `(srcRoot) → Promise<{ pages, staticFiles }>` | Main entry point. | +| `discover` | `(srcRoot, ignore) → Promise<{ pages, staticFiles }>` | Main entry point. | --- diff --git a/docs/Documentation/Tools.md b/docs/Documentation/Tools.md index 9a52cd47..b9a569f4 100644 --- a/docs/Documentation/Tools.md +++ b/docs/Documentation/Tools.md @@ -128,7 +128,7 @@ The build pipeline also reads a handful of declarative files. They are not execu | File | Effect | |---|---| -| `docs/_config.yml` | Site config. `tbdocs` reads `url`, `baseurl`, `title`, `logo`, `also_build_offline`, `also_build_pdf`, `offline_exclude`, `enable_copy_code_button`, the footer / aux-link knobs, the GitHub edit-link knobs, and the offline-download-link knobs. Jekyll-only keys (`markdown`, `kramdown`, `theme`, `highlighter`, the `defaults` block, the `compress_html` block, the `exclude` list) are ignored. | +| `docs/_config.yml` | Site config. `tbdocs` reads `url`, `baseurl`, `title`, `logo`, `also_build_offline`, `also_build_pdf`, `offline_exclude`, `exclude`, `enable_copy_code_button`, the footer / aux-link knobs, the GitHub edit-link knobs, and the offline-download-link knobs. Jekyll-only keys (`markdown`, `kramdown`, `theme`, `highlighter`, the `defaults` block, the `compress_html` block) are ignored. | | `docs/_data/book.yml` | The PDF book's chapter manifest. Entries are resolved to pages via the selector schema (`page` / `pages` / `nav_page` / `nav_pages` / `no_descent`) and control PDF outline behaviour via `landing_page:`, `landing_is_target:`, `no_outline_entry:`, `no_heading_shift:`, and `outline_closed:`. Full schema is documented in the file header. Phase 2 resolves chapter arrays; Phase 8 assembles `book.html`. | | `builder/themes/Light.theme`, `Dark.theme`, `Classic.theme` | twinBASIC IDE theme files, vendored from the BETA installer. `builder/highlight-theme.mjs` parses them into a Symbol-keyed palette that drives both the renderer's scope-to-class mapping and the generated `tb-highlight.css`. Refresh from the installer when the IDE adds new palette entries. | | `builder/twinbasic.tmLanguage.json` | TextMate grammar for the twinBASIC language. Shiki uses it to tokenise every ` ```tb ` code block. | diff --git a/docs/_config.yml b/docs/_config.yml index 5cc7bc56..4dabe01d 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -74,3 +74,33 @@ offline_exclude: - robots.txt - sitemap.xml - book.html + +# Files and patterns that tbdocs's discover phase excludes from the source +# tree. Uses fast-glob syntax: ** crosses directory separators, * stays +# within one level. Same role as Jekyll's built-in `exclude:` list. +# +# Patterns that start with * must be quoted so YAML does not interpret them +# as alias references. +exclude: + # Underscored directories at the root and at any depth -- catches + # _site, _site-offline, _site-pdf, _pdf, _data, _includes, _layouts, + # _sass, _plugins, _profile, and every _Images at any depth. + - _*/** + - "**/_*/**" + # Defensive: caches and unrelated trees that should never be in docs/. + - "**/.git/**" + - "**/node_modules/**" + - "**/.jekyll-cache/**" + - "**/.sass-cache/**" + # Theme assets ship prebuilt from builder/assets/ instead. + - assets/css/** + - assets/js/** + # Top-level Jekyll / toolchain files. + - Gemfile + - Gemfile.lock + - _config.yml + - "*.bat" + - redirects.json + # Mermaid source files -- the build outputs .svg counterparts; the .mmd + # sources are not part of the published site. + - "**/*.mmd" From 26670c17a26ed531dcebf9729dec798d8fedb8d6 Mon Sep 17 00:00:00 2001 From: Kuba Sunderland-Ober Date: Fri, 29 May 2026 01:40:49 +0200 Subject: [PATCH 3/9] Don't include the online-only search-data.json in the offline site assets. --- builder/offline.mjs | 19 +++---------------- docs/Documentation/Builder.md | 2 +- docs/Documentation/Pipeline-Stages.md | 2 +- docs/_config.yml | 1 + 4 files changed, 6 insertions(+), 18 deletions(-) diff --git a/builder/offline.mjs b/builder/offline.mjs index 1db09a5f..bf3f0264 100644 --- a/builder/offline.mjs +++ b/builder/offline.mjs @@ -112,13 +112,12 @@ export async function writeOffline(pages, staticFiles, site, destRoot, { auxStat // the parallel await keeps the timing report honest. if (subT) { const t0Pages = Date.now(); - let dPages = 0, dRedirects = 0, dStatics = 0, dThemes = 0, dSearch = 0; + let dPages = 0, dRedirects = 0, dStatics = 0, dThemes = 0; const branches = [ writeOfflinePages(pages, deps).then(() => { dPages = Date.now() - t0Pages; }), writeOfflineRedirects(auxStats?.redirects?.stubs ?? [], deps).then(() => { dRedirects = Date.now() - t0Pages; }), copyOfflineStatics(staticFiles, deps).then(() => { dStatics = Date.now() - t0Pages; }), copyOfflineThemeAssets(deps).then(() => { dThemes = Date.now() - t0Pages; }), - copyOfflineSearchData(auxStats?.search?.json ?? null, deps).then(() => { dSearch = Date.now() - t0Pages; }), ]; await Promise.all(branches); subT.lap("parallel"); @@ -126,14 +125,12 @@ export async function writeOffline(pages, staticFiles, site, destRoot, { auxStat console.log(` offline.redirects (concurrent): ${dRedirects} ms`); console.log(` offline.statics (concurrent): ${dStatics} ms`); console.log(` offline.themeAssets (concurrent): ${dThemes} ms`); - console.log(` offline.searchDataCopy (concurrent): ${dSearch} ms`); } else { await Promise.all([ writeOfflinePages(pages, deps), writeOfflineRedirects(auxStats?.redirects?.stubs ?? [], deps), copyOfflineStatics(staticFiles, deps), copyOfflineThemeAssets(deps), - copyOfflineSearchData(auxStats?.search?.json ?? null, deps), ]); } @@ -366,6 +363,8 @@ async function copyOfflineThemeAssets(deps) { await runLimited(themeEntries, LIMIT, async (e) => { if (e.isJtdJs) return; + const relAsset = "assets/" + e.relUnderAssets; + if (offlineExcluded(relAsset, deps.excludePatterns)) return; const dest = path.join(offlineRoot, "assets", e.relUnderAssets); if (e.isCss) { const cssIn = await fs.readFile(e.srcAbs, "utf8"); @@ -393,14 +392,6 @@ export function deriveOfflineCss(cssIn, themeRel, state) { return { css: rewritten, misses }; } -// §5.6 copyOfflineSearchData -- verbatim copy of search-data.json. -async function copyOfflineSearchData(jsonBytes, deps) { - if (jsonBytes == null) return; - const dest = path.join(deps.offlineRoot, "assets/js/search-data.json"); - await writeFileMkdirp(dest, jsonBytes); - deps.counters.assets += 1; -} - // --------------------------------------------------------------------------- // §B Site-paths set // --------------------------------------------------------------------------- @@ -439,10 +430,6 @@ async function buildSitePaths(pages, staticFiles, destRoot, excludePatterns, stu paths.add("/" + rel); } } - // Defensive: the search-data.json Phase 6 writes isn't in pages[] - // or staticFiles[]; add it so a stray link from somewhere resolves - // instead of becoming an unresolved miss. - paths.add("/assets/js/search-data.json"); return paths; } diff --git a/docs/Documentation/Builder.md b/docs/Documentation/Builder.md index c7e8caa3..864ac756 100644 --- a/docs/Documentation/Builder.md +++ b/docs/Documentation/Builder.md @@ -262,7 +262,7 @@ The per-page sidebar nav block is byte-identical across every page (it doesn't d The just-the-docs.js patcher is AST-based as of Phase 11 (B11): `deriveOfflineJtdJs` parses the upstream source with `acorn`, scans for `FunctionDeclaration` nodes named `navLink` and `initSearch`, and slices in the two replacement implementations (`JTD_NAVLINK_REPLACEMENT`, `JTD_INITSEARCH_FN_REPLACEMENT`). The non-patched regions stay byte-identical to the upstream source, and cosmetic upstream edits (variable renames, whitespace inside the patched bodies) survive --- the prior anchored-regex patcher would have broken on either. A parse error at build time is a clear signal that re-extraction produced something acorn can't read; no defensive fallback ships because just-the-docs.js is only re-extracted on deliberate gem-bump operations. -`deriveOfflineSearchDataJs` wraps `search-data.json` as `window.SEARCH_DATA = {...}` (a `