diff --git a/packages/super-editor/src/editors/v1/core/Editor.ts b/packages/super-editor/src/editors/v1/core/Editor.ts index 16e66dbd28..b2a5aa6c5e 100644 --- a/packages/super-editor/src/editors/v1/core/Editor.ts +++ b/packages/super-editor/src/editors/v1/core/Editor.ts @@ -216,8 +216,7 @@ const rangeIsTrackedInsertionOnly = (doc: PmNode, from: number, to: number): boo const getSingleTrackedInsertionMarkInRange = (doc: PmNode, from: number, to: number): PmMark | null => { if (!rangeIsTrackedInsertionOnly(doc, from, to)) return null; - /** @type {import('prosemirror-model').Mark[]} */ - const insertionMarks = []; + const insertionMarks: PmMark[] = []; const seenIds = new Set(); doc.nodesBetween(from, to, (node, pos) => { if (!node.isInline || !node.isLeaf) return; diff --git a/packages/super-editor/src/editors/v1/core/super-converter/field-references/fld-preprocessors/tc-preprocessor.js b/packages/super-editor/src/editors/v1/core/super-converter/field-references/fld-preprocessors/tc-preprocessor.js index 54223fcd26..629e6c0c8e 100644 --- a/packages/super-editor/src/editors/v1/core/super-converter/field-references/fld-preprocessors/tc-preprocessor.js +++ b/packages/super-editor/src/editors/v1/core/super-converter/field-references/fld-preprocessors/tc-preprocessor.js @@ -7,7 +7,25 @@ * @returns {import('../../v2/types/index.js').OpenXmlNode[]} */ export function preProcessTcInstruction(nodesToCombine, instrText, _docx, instructionTokens = null) { + // SD-3227 / SD-3229: a `_Toc...` bookmark embedded inside the TC field + // instruction gets swallowed by the synthesized `sd:tableOfContentsEntry` + // atom wrapper — `buildPositionMap` does not visit its descendants, so the + // bookmark name is never indexed (breaks Section link navigation) and the + // resulting PM node tends to be dropped (breaks TOC rebuild). Hoist + // `w:bookmarkStart` / `w:bookmarkEnd` nodes back out as paragraph-level + // siblings: starts go before the entry, ends after — same logical position + // they had inside the field, but visible to the rest of the importer. + const startBookmarks = []; + const endBookmarks = []; + const innerNodes = []; + for (const child of nodesToCombine) { + if (child?.name === 'w:bookmarkStart') startBookmarks.push(child); + else if (child?.name === 'w:bookmarkEnd') endBookmarks.push(child); + else innerNodes.push(child); + } + return [ + ...startBookmarks, { name: 'sd:tableOfContentsEntry', type: 'element', @@ -15,7 +33,8 @@ export function preProcessTcInstruction(nodesToCombine, instrText, _docx, instru instruction: instrText, ...(instructionTokens ? { instructionTokens } : {}), }, - elements: nodesToCombine, + elements: innerNodes, }, + ...endBookmarks, ]; } diff --git a/packages/super-editor/src/editors/v1/core/super-converter/field-references/shared/toc-switches.ts b/packages/super-editor/src/editors/v1/core/super-converter/field-references/shared/toc-switches.ts index c3c23e11c3..ef8bdb7fd5 100644 --- a/packages/super-editor/src/editors/v1/core/super-converter/field-references/shared/toc-switches.ts +++ b/packages/super-editor/src/editors/v1/core/super-converter/field-references/shared/toc-switches.ts @@ -65,8 +65,14 @@ export const DEFAULT_TOC_INSTRUCTION = 'TOC \\o "1-3" \\u \\h \\z'; // Parser // --------------------------------------------------------------------------- -/** Regex to match a switch and its optional quoted argument. */ -const SWITCH_PATTERN = /\\([a-z])\s*(?:"([^"]*)")?/gi; +/** + * Regex to match a switch and its optional argument. Word emits both quoted + * (`\t "Heading 1,1"`) and unquoted (`\f C`) forms — capture both so switches + * like `\f C` survive parsing instead of being read as bare flags. + * + * Group 2 = quoted arg, group 3 = unquoted arg. + */ +const SWITCH_PATTERN = /\\([a-z])(?:\s*(?:"([^"]*)"|([^\s\\]+)))?/gi; function parseLevelRange(value: string): { from: number; to: number } | undefined { const match = value.match(/^(\d+)-(\d+)$/); @@ -132,7 +138,10 @@ export function parseTocInstruction(instruction: string): TocSwitchConfig { SWITCH_PATTERN.lastIndex = 0; while ((match = SWITCH_PATTERN.exec(instruction)) !== null) { const switchChar = match[1].toLowerCase(); - const rawArg = match[2]; + // Group 2 = quoted arg, group 3 = unquoted arg. Track which form was used + // so `\p ""` (an explicit empty arg) stays distinguishable from `\p` + // (the switch with no arg at all). + const rawArg = match[2] !== undefined ? match[2] : match[3]; const arg = rawArg ?? ''; switch (switchChar) { diff --git a/packages/super-editor/src/editors/v1/core/super-converter/v2/importer/docxImporter.js b/packages/super-editor/src/editors/v1/core/super-converter/v2/importer/docxImporter.js index ca8c9cf1a7..f5c0dc76b8 100644 --- a/packages/super-editor/src/editors/v1/core/super-converter/v2/importer/docxImporter.js +++ b/packages/super-editor/src/editors/v1/core/super-converter/v2/importer/docxImporter.js @@ -30,6 +30,7 @@ import { footnoteReferenceHandlerEntity } from './footnoteReferenceImporter.js'; import { endnoteReferenceHandlerEntity } from './endnoteReferenceImporter.js'; import { tableNodeHandlerEntity } from './tableImporter.js'; import { tableOfContentsHandlerEntity } from './tableOfContentsImporter.js'; +import { tableOfContentsEntryEntity } from './tableOfContentsEntryImporter.js'; import { indexHandlerEntity, indexEntryHandlerEntity } from './indexImporter.js'; import { bibliographyHandlerEntity } from './bibliographyImporter.js'; import { preProcessNodesForFldChar } from '../../field-references'; @@ -340,6 +341,7 @@ export const defaultNodeListHandler = () => { tabNodeEntityHandler, noBreakHyphenNodeEntityHandler, tableOfContentsHandlerEntity, + tableOfContentsEntryEntity, indexHandlerEntity, bibliographyHandlerEntity, indexEntryHandlerEntity, diff --git a/packages/super-editor/src/editors/v1/core/super-converter/v2/importer/tableOfContentsEntryImporter.js b/packages/super-editor/src/editors/v1/core/super-converter/v2/importer/tableOfContentsEntryImporter.js new file mode 100644 index 0000000000..8e1521b7f1 --- /dev/null +++ b/packages/super-editor/src/editors/v1/core/super-converter/v2/importer/tableOfContentsEntryImporter.js @@ -0,0 +1,11 @@ +import { generateV2HandlerEntity } from '@core/super-converter/v3/handlers/utils'; +import { translator } from '../../v3/handlers/sd/tableOfContentsEntry/tableOfContentsEntry-translator.js'; + +/** + * Bridges the v3 `sd:tableOfContentsEntry` translator into the v2 node-list + * pipeline so TC fields synthesized by `tc-preprocessor` are materialized as + * PM `tableOfContentsEntry` nodes during import. + * + * @type {import("./docxImporter").NodeHandlerEntry} + */ +export const tableOfContentsEntryEntity = generateV2HandlerEntity('tableOfContentsEntryNodeHandler', translator); diff --git a/packages/super-editor/src/editors/v1/document-api-adapters/helpers/toc-bookmark-sync.ts b/packages/super-editor/src/editors/v1/document-api-adapters/helpers/toc-bookmark-sync.ts index e57bac0055..6325f8bcb8 100644 --- a/packages/super-editor/src/editors/v1/document-api-adapters/helpers/toc-bookmark-sync.ts +++ b/packages/super-editor/src/editors/v1/document-api-adapters/helpers/toc-bookmark-sync.ts @@ -76,11 +76,20 @@ function encodeBlockId(input: string): string { * - All required bookmarks already exist * - The schema lacks bookmark node types (headless/test environments) */ -export function syncTocBookmarks(editor: Editor, sources: Array<{ sdBlockId: string }>): void { +export function syncTocBookmarks(editor: Editor, sources: Array<{ sdBlockId: string; bodyAnchor?: string }>): void { const { schema, doc } = editor.state; if (!schema.nodes.bookmarkStart || !schema.nodes.bookmarkEnd) return; - const needed = deduplicateByBlockId(sources); + // SD-3229: when a source already has a `_Toc...` bookmark in the body + // (preserved by the importer), the rebuilder reuses that name as the + // entry's anchor. Generating a *new* synthetic bookmark on top would + // litter the document with duplicate `_Toc` markers next to the + // original `_Toc230123326`, etc. Drop sources whose anchor is already + // satisfied by an existing body bookmark. + const sourcesNeedingSync = sources.filter((s) => !s.bodyAnchor); + if (sourcesNeedingSync.length === 0) return; + + const needed = deduplicateByBlockId(sourcesNeedingSync); const existing = collectExistingTocBookmarkNames(doc); const missing = needed.filter((t) => !existing.has(t.bookmarkName)); if (missing.length === 0) return; diff --git a/packages/super-editor/src/editors/v1/document-api-adapters/helpers/toc-entry-builder.test.ts b/packages/super-editor/src/editors/v1/document-api-adapters/helpers/toc-entry-builder.test.ts index bedddbccc1..d3c5def818 100644 --- a/packages/super-editor/src/editors/v1/document-api-adapters/helpers/toc-entry-builder.test.ts +++ b/packages/super-editor/src/editors/v1/document-api-adapters/helpers/toc-entry-builder.test.ts @@ -27,12 +27,19 @@ function titleTextOf(paragraphs: ReturnType): Te return titleRun.content?.[0] ?? {}; } -/** Find the page-number text node (carries the tocPageNumber mark) inside any run. */ +/** + * Find the page-number text inside the entry's `pageReference` node — the + * builder now emits a real PAGEREF field instead of a `tocPageNumber` mark. + */ function pageNumberTextOf(paragraphs: ReturnType): TextLike { - const runs = paragraphs[0]!.content as Array<{ content?: TextLike[] }>; - for (const run of runs) { - const child = run.content?.find((c) => Array.isArray(c.marks) && c.marks.some((m) => m.type === 'tocPageNumber')); - if (child) return child; + const nodes = paragraphs[0]!.content as Array<{ type?: string; content?: TextLike[] }>; + for (const node of nodes) { + if (node.type !== 'pageReference') continue; + const innerRuns = (node.content ?? []) as Array<{ content?: TextLike[] }>; + for (const run of innerRuns) { + const text = run.content?.find((c) => c.type === 'text'); + if (text) return text; + } } return {}; } @@ -125,22 +132,27 @@ describe('buildTocEntryParagraphs', () => { expect(linkMark?.attrs?.anchor).toBe(generateTocBookmarkName(BASE_SOURCE.sdBlockId)); }); - it('wraps each text run in a `run` node so wrapTextInRunsPlugin does not clobber marks', () => { + it('wraps text in `run` nodes and emits a real pageReference for the page number', () => { const paragraphs = buildTocEntryParagraphs([BASE_SOURCE], makeConfig({ hyperlinks: true })); - const runs = paragraphs[0]!.content as Array<{ type: string }>; - // Title run + tab run + page-number run = 3 runs (no \p, no omit). - expect(runs.length).toBe(3); - runs.forEach((r) => expect(r.type).toBe('run')); + const nodes = paragraphs[0]!.content as Array<{ type: string }>; + // Title run + tab run + pageReference node = 3 children. + expect(nodes.map((n) => n.type)).toEqual(['run', 'run', 'pageReference']); }); - it('carries allowed character marks (bold, italic, underline, color, highlight, fontFamily, textStyle.fontFamily) from the source heading', () => { + it("does not propagate the heading paragraph's character marks into the rebuilt entry (SD-3229)", () => { + // Per ECMA-376 §17.16.5.68, Word rebuilds heading-driven TOC entries + // (\o / \u / \t) from the heading text plus the linked TOC{n} style's + // typography. The heading's own bold/underline/font marks must NOT + // bleed through — otherwise the entry shows "ARTICLE 1 BASIC + // INFORMATION" in Heading1's bold/Times-New-Roman-Bold instead of the + // TOC1 style's lighter weight. const sourceWithMarks: TocSource = { ...BASE_SOURCE, segments: [ { text: 'Heading', marks: [ - { type: 'textStyle', attrs: { fontFamily: 'Aptos', fontSize: '24pt' } }, // fontSize must be scrubbed + { type: 'textStyle', attrs: { fontFamily: 'Aptos', fontSize: '24pt' } }, { type: 'bold' }, { type: 'italic' }, { type: 'underline' }, @@ -153,22 +165,10 @@ describe('buildTocEntryParagraphs', () => { }; const paragraphs = buildTocEntryParagraphs([sourceWithMarks], makeConfig({ hyperlinks: true })); const text = titleTextOf(paragraphs); - expect(text.marks!.map((m) => m.type)).toEqual([ - 'textStyle', - 'bold', - 'italic', - 'underline', - 'color', - 'highlight', - 'fontFamily', - 'link', - ]); - // textStyle keeps fontFamily, drops fontSize. - const textStyleMark = text.marks!.find((m) => m.type === 'textStyle'); - expect(textStyleMark!.attrs).toEqual({ fontFamily: 'Aptos' }); + expect(text.marks!.map((m) => m.type)).toEqual(['link']); }); - it('drops disallowed marks (fontSize, strike, link, comments, track-changes, tocPageNumber)', () => { + it('only the link mark is attached to heading-source title runs', () => { const sourceWithDisallowed: TocSource = { ...BASE_SOURCE, segments: [ @@ -188,11 +188,38 @@ describe('buildTocEntryParagraphs', () => { }; const paragraphs = buildTocEntryParagraphs([sourceWithDisallowed], makeConfig({ hyperlinks: true })); const text = titleTextOf(paragraphs); - // Only the allowed `bold` survives, plus the rebuilt `link` to the source bookmark. - expect(text.marks!.map((m) => m.type)).toEqual(['bold', 'link']); + expect(text.marks!.map((m) => m.type)).toEqual(['link']); const linkMark = text.marks!.find((m) => m.type === 'link'); expect(linkMark!.attrs!.anchor).toBe(generateTocBookmarkName(BASE_SOURCE.sdBlockId)); - expect(linkMark!.attrs!.href).toBeUndefined(); + // The rebuilt link points at the synthetic in-document anchor; the source's + // `href: "https://example.com"` is dropped (we route through the anchor). + expect(linkMark!.attrs!.href).toBe(`#${generateTocBookmarkName(BASE_SOURCE.sdBlockId)}`); + }); + + it('TC-field entries carry bold/italic/underline (but not textStyle) from the surrounding Heading2 (SD-3229)', () => { + // Word's update-field inherits character formatting from the body run + // that surrounds the TC field's title — but only the visible style + // marks (bold/italic/underline). Inheriting `textStyle` overrides the + // TOC2 paragraph style's font, which is wrong. + const tcSource: TocSource = { + text: 'Section 1.1\tCertain Basic Terms', + level: 2, + sdBlockId: 'h2-1', + kind: 'tcField', + titleMarks: [ + { type: 'bold' }, + { type: 'underline' }, + { type: 'textStyle', attrs: { fontFamily: 'Times New Roman, serif' } }, + ], + }; + const paragraphs = buildTocEntryParagraphs([tcSource], makeConfig({ hyperlinks: true })); + // Section-number run (first run) — plain link only, no inherited marks. + const numberRun = paragraphs[0]!.content[0] as { content?: TextLike[] }; + expect(numberRun.content?.[0]?.marks?.map((m) => m.type)).toEqual(['link']); + // Title run (third run, after number / tab) — bold + underline survive, + // textStyle is dropped so the TOC2 style picks the font. + const titleRun = paragraphs[0]!.content[2] as { content?: TextLike[] }; + expect(titleRun.content?.[0]?.marks?.map((m) => m.type)).toEqual(['bold', 'underline', 'link']); }); }); @@ -220,6 +247,7 @@ interface MockParagraph { text: string; styleId?: string; outlineLevel?: number; + listMarkerText?: string; } function mockDoc(paragraphs: MockParagraph[]) { @@ -239,6 +267,7 @@ function mockDoc(paragraphs: MockParagraph[]) { ...(p.styleId ? { styleId: p.styleId } : {}), ...(p.outlineLevel !== undefined ? { outlineLevel: p.outlineLevel } : {}), }, + ...(p.listMarkerText !== undefined ? { listRendering: { markerText: p.listMarkerText } } : {}), }, isText: false, descendants: (cb: (node: unknown, pos: number) => boolean | void) => { @@ -432,4 +461,82 @@ describe('collectTocSources', () => { expect(sources.map((s) => s.text)).toEqual(['Part 3', 'Part 4']); expect(sources[1].sdBlockId).toMatch(/^para-auto-/); }); + + describe('custom-style mapping (\\t)', () => { + // SD-3229: a TOC whose instruction relies solely on \t (and \f with no + // imported TC nodes) used to rebuild as the "No table of contents entries + // found." placeholder because collectTocSources ignored \t mappings. + it('collects paragraphs whose styleId matches a \\t custom-style mapping', () => { + const doc = mockDoc([ + { sdBlockId: 'p1', text: 'Article 1', styleId: 'Heading1' }, + { sdBlockId: 'p2', text: 'Article 2', styleId: 'Heading1' }, + { sdBlockId: 'p3', text: 'Body', styleId: 'Normal' }, + ]); + + const config: TocSwitchConfig = { + source: { tcFieldIdentifier: 'C' }, + display: { hyperlinks: true }, + // styleName "Heading 1" (with space) must match styleId "Heading1" + preserved: { customStyles: [{ styleName: 'Heading 1', level: 1 }] }, + }; + + const sources = collectTocSources(doc, config); + expect(sources.map((s) => s.text)).toEqual(['Article 1', 'Article 2']); + expect(sources.every((s) => s.kind === 'customStyle')).toBe(true); + expect(sources.every((s) => s.level === 1)).toBe(true); + }); + + it('prefers \\o heading collection over \\t when both match the same paragraph', () => { + const doc = mockDoc([{ sdBlockId: 'p1', text: 'Intro', styleId: 'Heading1' }]); + + const config: TocSwitchConfig = { + source: { outlineLevels: { from: 1, to: 3 } }, + display: {}, + preserved: { customStyles: [{ styleName: 'Heading 1', level: 2 }] }, + }; + + const sources = collectTocSources(doc, config); + expect(sources).toHaveLength(1); + expect(sources[0].kind).toBe('heading'); + expect(sources[0].level).toBe(1); + }); + + it('exposes the rendered list marker so the builder can emit it as its own run', () => { + // SD-3229 PSA repro: Heading1 paragraphs only contain "BASIC INFORMATION" + // / "PROPERTY" in their text content. The "ARTICLE 1" / "ARTICLE 2" + // prefix is auto-numbered by the Heading1 style (lvlText "ARTICLE %1") + // and surfaces on the paragraph as listRendering.markerText after layout. + const doc = mockDoc([ + { sdBlockId: 'p1', text: 'BASIC INFORMATION', styleId: 'Heading1', listMarkerText: 'ARTICLE 1' }, + { sdBlockId: 'p2', text: 'PROPERTY', styleId: 'Heading1', listMarkerText: 'ARTICLE 2' }, + ]); + + const config: TocSwitchConfig = { + source: { tcFieldIdentifier: 'C' }, + display: { hyperlinks: true }, + preserved: { customStyles: [{ styleName: 'Heading 1', level: 1 }] }, + }; + + const sources = collectTocSources(doc, config); + expect(sources.map((s) => s.text)).toEqual(['BASIC INFORMATION', 'PROPERTY']); + // Marker is reported alongside the segments so the builder can wrap it + // in its own run (matching Word's two-run TOC1 shape) instead of + // smuggling the prefix into the heading text. + expect(sources.map((s) => s.markerText)).toEqual(['ARTICLE 1', 'ARTICLE 2']); + }); + + it('respects an explicit \\o range when filtering \\t matches', () => { + const doc = mockDoc([{ sdBlockId: 'p1', text: 'Article 1', styleId: 'CustomHeading' }]); + + const config: TocSwitchConfig = { + source: { outlineLevels: { from: 1, to: 1 } }, + display: {}, + preserved: { customStyles: [{ styleName: 'CustomHeading', level: 2 }] }, + }; + + // Mapping says level 2 but \o range is 1-1 → excluded. + const sources = collectTocSources(doc, config); + expect(sources).toHaveLength(0); + }); + }); }); diff --git a/packages/super-editor/src/editors/v1/document-api-adapters/helpers/toc-entry-builder.ts b/packages/super-editor/src/editors/v1/document-api-adapters/helpers/toc-entry-builder.ts index d5545eda50..e80390dd19 100644 --- a/packages/super-editor/src/editors/v1/document-api-adapters/helpers/toc-entry-builder.ts +++ b/packages/super-editor/src/editors/v1/document-api-adapters/helpers/toc-entry-builder.ts @@ -27,6 +27,13 @@ export interface TocSource { * plain string is available from the field instruction. */ segments?: TocTextSegment[]; + /** + * Auto-numbered marker prefix (e.g. "ARTICLE 1") resolved from the source + * paragraph's `listRendering.markerText`. Emitted as a separate run before + * the heading text so Word's two-run TOC1 shape is preserved on rebuild. + * Undefined for paragraphs without auto-numbering and for TC entries. + */ + markerText?: string; /** TOC level (1-based). */ level: number; /** @@ -36,9 +43,24 @@ export interface TocSource { */ sdBlockId: string; /** Source type for diagnostic purposes. */ - kind: 'heading' | 'appliedOutline' | 'tcField'; + kind: 'heading' | 'appliedOutline' | 'tcField' | 'customStyle'; /** Whether to omit the page number for this specific entry (TC \n switch). */ omitPageNumber?: boolean; + /** + * Existing `_Toc...` bookmark name on the source paragraph (when present). + * Reused as the rebuilt entry's link anchor so the rebuild does not invent + * synthetic bookmark names for headings/sections that Word has already + * tagged. Undefined when no such bookmark exists yet — in that case the + * entry builder falls back to a deterministic synthetic name. + */ + bodyAnchor?: string; + /** + * Marks captured from the body source for the *title* portion of a TC + * entry (the text after the embedded `\t`). Lets the rebuilt section row + * inherit the bold/underline that Word applies in Heading2 paragraphs. + * Undefined for non-TC sources. + */ + titleMarks?: EntryTextMark[]; } /** A run of source text with its surviving character marks. */ @@ -94,12 +116,95 @@ function sanitizeSourceMark(mark: EntryTextMark): EntryTextMark | null { // Source collection // --------------------------------------------------------------------------- +/** Normalises a style name/styleId for case- and whitespace-insensitive comparison. */ +function normalizeStyleKey(value: string | undefined | null): string { + return value ? value.replace(/\s+/g, '').toLowerCase() : ''; +} + +/** + * Cleans up the text inside a TC entry. The field preprocessor concatenates + * each `` run with a trailing space, which leaves stray gaps + * around tabs and before punctuation (`" Section 1.1 \tCertain Basic Terms . "`). + * Tabs are meaningful (they separate the section number from the title) so + * we keep them; spaces collapse to a single space and trailing space before + * a `.` or `:` is removed. + */ +function normalizeTcEntryText(text: string): string { + return text + .replace(/ +\t/g, '\t') + .replace(/\t +/g, '\t') + .replace(/ {2,}/g, ' ') + .replace(/ +([.,;:!?])/g, '$1') + .trim(); +} + +/** + * Pulls the rendered list-marker (e.g. "ARTICLE 1") from a paragraph's + * `listRendering` attribute. The layout pass populates this with the resolved + * marker text so we don't have to re-evaluate the numbering definition here. + */ +function readListMarker(node: ProseMirrorNode): string | undefined { + const lr = (node.attrs as Record | undefined)?.listRendering as + | { markerText?: string | null } + | null + | undefined; + const marker = lr?.markerText; + if (!marker) return undefined; + const trimmed = marker.replace(/\s+$/, ''); + return trimmed.length > 0 ? trimmed : undefined; +} + +/** + * Returns the last `_Toc...` bookmark name attached to the given paragraph + * (scanning its descendants). Word emits a new TOC bookmark for each TOC + * regeneration and tends to leave the older one in the document, so the + * *last* one is the anchor the current TOC's hyperlinks point at. + */ +function findBodyTocAnchor(node: ProseMirrorNode): string | undefined { + let last: string | undefined; + node.descendants((child) => { + if (child.type.name === 'bookmarkStart') { + const name = (child.attrs as Record | undefined)?.name as string | undefined; + if (name?.startsWith('_Toc')) last = name; + } + return true; + }); + return last; +} + +/** + * Inspects a paragraph for the character marks that should flow onto the + * "title" portion of a TC entry (i.e. the text after the embedded `\t`). + * Word's TC field doesn't carry character formatting in its instruction + * string — it inherits from the body run that surrounds the title. + * We capture the marks of the longest non-empty bold/italic/underline text + * node to keep the title visually consistent with how Word renders it. + */ +function findTitleMarksOnParagraph(node: ProseMirrorNode): EntryTextMark[] | undefined { + let best: { length: number; marks: EntryTextMark[] } | undefined; + node.descendants((child) => { + if (!child.isText || !child.text) return true; + const captured: EntryTextMark[] = []; + for (const mark of child.marks ?? []) { + const raw: EntryTextMark = { type: mark.type?.name ?? '' }; + if (mark.attrs && Object.keys(mark.attrs).length > 0) raw.attrs = { ...mark.attrs }; + const sanitized = sanitizeSourceMark(raw); + if (sanitized) captured.push(sanitized); + } + if (!captured.some((m) => m.type === 'bold' || m.type === 'italic' || m.type === 'underline')) return true; + if (!best || child.text.length > best.length) best = { length: child.text.length, marks: captured }; + return true; + }); + return best?.marks; +} + /** * Collects all document nodes that qualify as TOC entry sources. * * Sources are collected based on the instruction's active switches: * - \o (outlineLevels): heading nodes whose level falls within the range * - \u (useAppliedOutlineLevel): paragraph nodes with explicit outlineLevel + * - \t (customStyles): paragraph nodes whose styleId matches a custom-style mapping * - \f (tcFieldIdentifier): TC field nodes with matching identifier * - \l (tcFieldLevels): TC field nodes within the level range * @@ -112,8 +217,20 @@ export function collectTocSources(doc: ProseMirrorNode, config: TocSwitchConfig) const useApplied = useAppliedOutlineLevel ?? false; const collectTcFields = tcFieldIdentifier !== undefined || tcFieldLevels !== undefined; + // Build a lookup from normalized custom-style name → TOC level. Word's \t + // switch matches against the style *name*, but the PM document only stores + // styleId. For built-in styles the two differ only by whitespace (e.g. + // styleId "Heading1" vs name "Heading 1"), so normalizing both sides handles + // the common case without needing a styles-table lookup. + const customStyleLevels = new Map(); + for (const mapping of config.preserved?.customStyles ?? []) { + const key = normalizeStyleKey(mapping.styleName); + if (key && Number.isFinite(mapping.level)) customStyleLevels.set(key, mapping.level); + } + // Track the current paragraph context for TC field collection let currentParagraphSdBlockId: string | undefined; + let currentParagraphNode: ProseMirrorNode | undefined; doc.descendants((node, pos) => { // Skip TOC nodes themselves — don't collect entries from within a TOC @@ -129,6 +246,7 @@ export function collectTocSources(doc: ProseMirrorNode, config: TocSwitchConfig) const sdBlockId = ((attrs?.sdBlockId ?? attrs?.paraId) as string | undefined) ?? buildFallbackBlockNodeId('paragraph', pos); currentParagraphSdBlockId = sdBlockId; + currentParagraphNode = node; if (!sdBlockId) return true; const text = flattenText(node); @@ -136,11 +254,23 @@ export function collectTocSources(doc: ProseMirrorNode, config: TocSwitchConfig) // (page-break spacers, empty stubs). if (text.trim().length === 0) return true; + const markerText = readListMarker(node); + const bodyAnchor = findBodyTocAnchor(node); + const segments = extractTextSegments(node); + // \o switch — heading-style level if (outlineLevels) { const headingLevel = getHeadingLevel(styleId); if (headingLevel != null && headingLevel >= outlineLevels.from && headingLevel <= outlineLevels.to) { - sources.push({ text, segments: extractTextSegments(node), level: headingLevel, sdBlockId, kind: 'heading' }); + sources.push({ + text, + segments, + markerText, + level: headingLevel, + sdBlockId, + kind: 'heading', + bodyAnchor, + }); return true; // descend so TC fields inside this paragraph are still collected } } @@ -154,10 +284,33 @@ export function collectTocSources(doc: ProseMirrorNode, config: TocSwitchConfig) if (tocLevel >= effectiveLevels.from && tocLevel <= effectiveLevels.to) { sources.push({ text, - segments: extractTextSegments(node), + segments, + markerText, level: tocLevel, sdBlockId, kind: 'appliedOutline', + bodyAnchor, + }); + return true; + } + } + } + + // \t switch — custom-style mapping. Falls through after \o/\u so a + // heading-styled paragraph is preferred as a heading source. + if (customStyleLevels.size > 0) { + const tocLevel = customStyleLevels.get(normalizeStyleKey(styleId)); + if (tocLevel != null) { + const effectiveLevels = outlineLevels ?? { from: 1, to: 9 }; + if (tocLevel >= effectiveLevels.from && tocLevel <= effectiveLevels.to) { + sources.push({ + text, + segments, + markerText, + level: tocLevel, + sdBlockId, + kind: 'customStyle', + bodyAnchor, }); return true; } @@ -184,12 +337,20 @@ export function collectTocSources(doc: ProseMirrorNode, config: TocSwitchConfig) } } + // The TC instruction lives inside the containing paragraph; reuse its + // bookmark + character marks so the rebuilt entry retains the same + // anchor and bold/underline that Word renders for the section title. + const bodyAnchor = currentParagraphNode ? findBodyTocAnchor(currentParagraphNode) : undefined; + const titleMarks = currentParagraphNode ? findTitleMarksOnParagraph(currentParagraphNode) : undefined; + sources.push({ - text: tcConfig.text, + text: normalizeTcEntryText(tcConfig.text), level: tcConfig.level, sdBlockId: currentParagraphSdBlockId, kind: 'tcField', omitPageNumber: tcConfig.omitPageNumber || undefined, + bodyAnchor, + titleMarks, }); return false; @@ -313,44 +474,170 @@ function asRun(children: Array>): RecordPAGEREF` fields — an atom with `instruction` + * + a single result run carrying the resolved page number. Word's TOC + * entries reference the heading via `PAGEREF \h`; we reproduce + * the same shape so updating the TOC keeps the field intact instead of + * downgrading it to a plain text run with a `tocPageNumber` mark. + */ +function buildPageReferenceNode( + anchor: string, + resolvedPage: number | undefined, + linkMark: EntryTextMark | undefined, +): Record { + const pageText = resolvedPage != null ? String(resolvedPage) : '0'; + const marksAsAttrs = linkMark ? [{ type: 'link', attrs: { anchor, history: true, href: `#${anchor}` } }] : []; + return { + type: 'pageReference', + attrs: { + marksAsAttrs, + instruction: `PAGEREF ${anchor} \\h`, + }, + content: [asRun([{ type: 'text', text: pageText }])], + }; +} + +/** Builds the link mark JSON used for every text/tab node in a TOC entry. */ +function buildLinkMark(anchor: string): EntryTextMark { + return { + type: 'link', + attrs: { + anchor, + history: true, + href: `#${anchor}`, + rel: 'noopener noreferrer nofollow', + }, + }; +} + +/** Filters source segments through the allow-list at build time. */ +function sanitizeSegment(segment: TocTextSegment): EntryTextMark[] { + return (segment.marks ?? []).map((m) => sanitizeSourceMark(m)).filter((m): m is EntryTextMark => m !== null); +} + +/** + * Marks Word's "Update field" propagates from the body source onto a TC + * entry's title run — bold / italic / underline only. Per ECMA-376 + * §17.16.5.68 the TOC{n} paragraph style supplies typography (font family, + * size, weight defaults); we deliberately drop `textStyle` and any colour + * marks so the heading's Times-New-Roman text doesn't override the TOC2 + * style's theme font. + */ +const TC_TITLE_INHERITED_MARK_TYPES = new Set(['bold', 'italic', 'underline']); - // Title text. Character-level marks (bold, italic, color, font…) are - // carried over from the *source heading* — never sampled from the existing - // TOC entry, which would leak entry-1's direct formatting onto every - // rebuilt entry (Word rebuilds entries from the linked TOC1, TOC2, … - // paragraph styles, plus character formatting from the source). - // Each text node is wrapped in a `run` so wrapTextInRunsPlugin does not - // re-wrap and merge the paragraph style's run properties via addToSet. - const linkMark: EntryTextMark | undefined = display.hyperlinks - ? { type: 'link', attrs: { anchor: generateTocBookmarkName(source.sdBlockId), rId: null, history: true } } - : undefined; +function filterTitleMarks(marks: EntryTextMark[] | undefined): EntryTextMark[] { + if (!marks) return []; + return marks + .map((m) => sanitizeSourceMark(m)) + .filter((m): m is EntryTextMark => m !== null && TC_TITLE_INHERITED_MARK_TYPES.has(m.type)); +} +/** + * Builds the inline content for a non-TC entry (heading / customStyle / + * appliedOutline). When the source has an auto-numbered marker we split it + * into a marker run + a title run so the rebuild matches the two-run shape + * Word emits ("ARTICLE 1" + " BASIC INFORMATION"). + * + * Per ECMA-376 §17.16.5.68, Word builds these entries by combining the + * heading paragraph's *text* with the linked TOC{n} style's typography — + * the heading's own character marks (bold/underline/font from Heading1, + * etc.) are not carried into the TOC entry. We mirror that behaviour by + * emitting plain text runs and letting the rebuilt paragraph's `styleId` + * drive font/weight via the style cascade. + */ +function buildHeadingContent(source: TocSource, linkMark: EntryTextMark | undefined): Array> { const segments: TocTextSegment[] = source.segments && source.segments.length > 0 ? source.segments : [{ text: source.text || ' ' }]; - const titleTextNodes: Array> = segments.map((segment) => { - // Re-apply the allowlist at build time so callers passing hand-built - // segments cannot smuggle in disallowed marks (font-size, link, comments, - // track-changes, etc.). collectTocSources also sanitizes, but the - // builder is the contract boundary that users of buildTocEntryParagraphs - // hit directly — defending here keeps the rule in one place. - const sourceMarks = (segment.marks ?? []) - .map((m) => sanitizeSourceMark(m)) - .filter((m): m is EntryTextMark => m !== null); - const marks: EntryTextMark[] = [...sourceMarks]; - if (linkMark) marks.push(linkMark); - const node: Record = { type: 'text', text: segment.text || ' ' }; + const wrapTextNode = (text: string): Record => { + const marks = linkMark ? [linkMark] : []; + const node: Record = { type: 'text', text }; if (marks.length > 0) node.marks = marks; return node; - }); + }; + + const runs: Array> = []; - const content: Array> = [asRun(titleTextNodes)]; + if (source.markerText) { + runs.push(asRun([wrapTextNode(source.markerText)])); + + // Heading body text — prefixed by a space matching Word's separator + // between the numbered marker and the heading text in the TOC entry. + const headingNodes: Array> = []; + let first = true; + for (const segment of segments) { + const text = first ? ` ${segment.text}` : segment.text; + headingNodes.push(wrapTextNode(text || ' ')); + first = false; + } + runs.push(asRun(headingNodes)); + } else { + runs.push(asRun(segments.map((segment) => wrapTextNode(segment.text || ' ')))); + } + + return runs; +} + +/** + * Builds the inline content for a TC-field entry. Word emits the TC's + * instruction text split by an embedded tab — the part before the tab is + * the section number ("Section 1.1") and the part after is the title + * ("Certain Basic Terms"). We mirror that with three runs: number / tab / + * title (with bold/underline if the surrounding Heading2 carried those + * marks). + */ +function buildTcContent(source: TocSource, linkMark: EntryTextMark | undefined): Array> { + const text = source.text ?? ''; + const tabIndex = text.indexOf('\t'); + const wrapTextNode = (value: string, marks: EntryTextMark[]): Record => { + const allMarks = linkMark ? [...marks, linkMark] : [...marks]; + const node: Record = { type: 'text', text: value }; + if (allMarks.length > 0) node.marks = allMarks; + return node; + }; + const wrapTabNode = (): Record => { + const marks = linkMark ? [linkMark] : []; + const node: Record = { type: 'tab' }; + if (marks.length > 0) node.marks = marks; + return node; + }; + + if (tabIndex < 0) { + // No tab inside the TC instruction — single text run, no split. + return [asRun([wrapTextNode(text || ' ', [])])]; + } + + const numberPart = text.slice(0, tabIndex); + const titlePart = text.slice(tabIndex + 1); + // Inherit only bold/italic/underline from the Heading2 body — letting the + // body's `textStyle` (Times New Roman, etc.) flow into the TOC2 entry + // overrides whatever font the TOC2 paragraph style would otherwise provide. + const titleMarks = filterTitleMarks(source.titleMarks); + + const runs: Array> = []; + runs.push(asRun([wrapTextNode(numberPart || ' ', [])])); + runs.push(asRun([wrapTabNode()])); + runs.push(asRun([wrapTextNode(titlePart || ' ', titleMarks)])); + return runs; +} + +function buildEntryParagraph( + source: TocSource, + config: TocSwitchConfig, + options: BuildTocEntryOptions = {}, +): EntryParagraphJson { + const { display } = config; + + // Reuse an existing `_Toc...` body bookmark when present so navigation and + // round-trips with Word stay aligned. Fall back to a deterministic synthetic + // name only when the source paragraph has no TOC bookmark yet. + const anchor = source.bodyAnchor ?? generateTocBookmarkName(source.sdBlockId); + const linkMark: EntryTextMark | undefined = display.hyperlinks ? buildLinkMark(anchor) : undefined; + + const content: Array> = + source.kind === 'tcField' ? buildTcContent(source, linkMark) : buildHeadingContent(source, linkMark); // Determine whether to omit page number for this entry. const omitRange = display.omitPageNumberLevels; @@ -359,22 +646,17 @@ function buildEntryParagraph( ); if (!omitPageNumber) { - // Separator: custom \p text or default tab. - content.push(asRun([display.separator ? { type: 'text', text: display.separator } : { type: 'tab' }])); - - // Page number — resolved from the page map when available; '0' placeholder - // otherwise (e.g. freshly-pasted heading whose synthetic id hasn't been - // seen by a layout cycle yet). + // Tab separator before the page number — carries the link mark like the + // surrounding text runs so the entire entry is one hyperlink target. + const tabMarks = linkMark ? [linkMark] : []; + const tabNode: Record = { type: 'tab' }; + if (tabMarks.length > 0) tabNode.marks = tabMarks; + content.push(asRun([tabNode])); + + // Real PAGEREF field, matching what the importer materializes for the + // page-number column of a TOC entry. const resolvedPage = options.pageMap?.get(source.sdBlockId); - content.push( - asRun([ - { - type: 'text', - text: resolvedPage != null ? String(resolvedPage) : '0', - marks: [{ type: 'tocPageNumber' }], - }, - ]), - ); + content.push(buildPageReferenceNode(anchor, resolvedPage, linkMark)); } const paragraphProperties: Record = { styleId: `TOC${source.level}` }; @@ -387,7 +669,11 @@ function buildEntryParagraph( const leader = display.tabLeader === 'none' ? undefined : (display.tabLeader && TAB_LEADER_MAP[display.tabLeader]) || 'dot'; const pos = options.tabPos ?? DEFAULT_RIGHT_TAB_POS; - paragraphProperties.tabStops = [{ tab: { tabType: 'right', pos, ...(leader ? { leader } : {}) } }]; + const rightStop: Record = { tab: { tabType: 'right', pos, ...(leader ? { leader } : {}) } }; + // TOC2+ entries in Word also carry a left tab at 1440 twips so the title + // column lines up. TOC1 doesn't (the article number sits at the margin). + paragraphProperties.tabStops = + source.level >= 2 ? [{ tab: { tabType: 'left', pos: 1440 } }, rightStop] : [rightStop]; } return { diff --git a/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/toc-wrappers.ts b/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/toc-wrappers.ts index 374c0edb96..46599d1619 100644 --- a/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/toc-wrappers.ts +++ b/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/toc-wrappers.ts @@ -669,7 +669,10 @@ function tocUpdatePageNumbers(editor: Editor, input: TocUpdateInput, options?: M /** * Walks the TOC node's children and produces updated paragraph JSON where - * tocPageNumber-marked text runs are replaced with resolved page numbers. + * `pageReference` nodes have their resolved page number refreshed from the + * layout's page map. Also still recognises the legacy `tocPageNumber` mark + * for backwards compatibility with TOCs rebuilt by an older code path that + * predates the pageReference rewrite (SD-3229). */ function buildPageNumberUpdatedContent( tocNode: ProseMirrorNode, @@ -691,27 +694,39 @@ function buildPageNumberUpdatedContent( let paragraphChanged = false; - // Walk recursively — the rebuilt paragraph wraps its runs in `run` nodes, - // so the tocPageNumber mark sits one level below the paragraph's direct - // children. A flat scan over `paragraph.content` would miss it and fall - // through to PAGE_NUMBERS_NOT_MATERIALIZED. + const replacePageNumberText = (node: Record): Record => { + if (!tocSourceId) return node; + const pageNumber = pageMap.get(tocSourceId); + const newText = pageNumber !== undefined ? String(pageNumber) : '??'; + if (node.text !== newText) { + paragraphChanged = true; + return { ...node, text: newText }; + } + return node; + }; + const visit = (node: Record): Record => { + // `pageReference` — the importer-compatible shape we emit since SD-3229. + // The resolved page number is the first text descendant inside the field. + if (node.type === 'pageReference') { + hasPageNumberMarks = true; + const inner = node.content as Array> | undefined; + if (!Array.isArray(inner) || inner.length === 0) return node; + const updatedInner = inner.map((c) => visit(c)); + return updatedInner.some((next, idx) => next !== inner[idx]) ? { ...node, content: updatedInner } : node; + } + const marks = node.marks as Array<{ type: string }> | undefined; const hasTocPageNumberMark = marks?.some((m) => m.type === 'tocPageNumber'); - if (hasTocPageNumberMark) { hasPageNumberMarks = true; + return replacePageNumberText(node); + } - if (!tocSourceId) return node; - - const pageNumber = pageMap.get(tocSourceId); - const newText = pageNumber !== undefined ? String(pageNumber) : '??'; - - if (node.text !== newText) { - paragraphChanged = true; - return { ...node, text: newText }; - } - return node; + // Inside a `pageReference`, the text descendant carries the page number + // without any mark — replace it once we're in that subtree. + if (node.type === 'text') { + return replacePageNumberText(node); } const nested = node.content as Array> | undefined; @@ -721,7 +736,23 @@ function buildPageNumberUpdatedContent( return replaced ? { ...node, content: visited } : node; }; - const updatedContentArray = (childJson.content ?? []).map(visit); + // Only enter a `pageReference` subtree from the top-level walk — a plain + // text node sitting in a title run must not be rewritten as a page number. + const visitTop = (node: Record): Record => { + if (node.type === 'pageReference') return visit(node); + const marks = node.marks as Array<{ type: string }> | undefined; + if (marks?.some((m) => m.type === 'tocPageNumber')) { + hasPageNumberMarks = true; + return replacePageNumberText(node); + } + const nested = node.content as Array> | undefined; + if (!Array.isArray(nested) || nested.length === 0) return node; + const visited = nested.map(visitTop); + const replaced = visited.some((next, idx) => next !== nested[idx]); + return replaced ? { ...node, content: visited } : node; + }; + + const updatedContentArray = (childJson.content ?? []).map(visitTop); if (paragraphChanged) { anyChanged = true; diff --git a/packages/super-editor/src/editors/v1/tests/data/SD-3229.docx b/packages/super-editor/src/editors/v1/tests/data/SD-3229.docx new file mode 100644 index 0000000000..b1d3e67291 Binary files /dev/null and b/packages/super-editor/src/editors/v1/tests/data/SD-3229.docx differ diff --git a/packages/super-editor/src/editors/v1/tests/sd-3229-psa-toc.test.js b/packages/super-editor/src/editors/v1/tests/sd-3229-psa-toc.test.js new file mode 100644 index 0000000000..7f514af882 --- /dev/null +++ b/packages/super-editor/src/editors/v1/tests/sd-3229-psa-toc.test.js @@ -0,0 +1,141 @@ +import { describe, expect, it } from 'vitest'; +import { loadTestDataForEditorTests, initTestEditor } from './helpers/helpers.js'; +import { SuperConverter } from '@core/super-converter/SuperConverter.js'; +import { parseTocInstruction } from '@core/super-converter/field-references/shared/toc-switches.ts'; +import { collectTocSources, buildTocEntryParagraphs } from '../document-api-adapters/helpers/toc-entry-builder.ts'; + +/** + * SD-3229 end-to-end regression: loading the SD-3229.docx fixture + * (a mixed-source TOC backed by `\t "Heading 1,1"` for articles and `\f C` + * for sections) and rebuilding/installing the TOC content must mirror the + * shape the importer emits — body-bookmark anchors (`_Toc230123326` …), + * multi-run entries, and real `pageReference` nodes rather than bare + * `tocPageNumber` marks. Re-installing the rebuilt content via the PM + * command also verifies the encoder accepts the new shape. + */ +describe('SD-3229 mixed-source TOC repro', () => { + async function loadEditor() { + const { docx, media, mediaFiles, fonts } = await loadTestDataForEditorTests('SD-3229.docx'); + const converter = new SuperConverter({ docx, media, mediaFiles, fonts }); + const { editor } = initTestEditor({ converter, loadFromSchema: false }); + return editor; + } + + function findToc(doc) { + let toc; + doc.descendants((node) => { + if (!toc && node.type.name === 'tableOfContents') toc = node; + return !toc; + }); + return toc; + } + + /** Walks an entry paragraph and reports its anchor + child-type sequence + text marks. */ + function describeEntryParagraph(paragraph) { + const styleId = paragraph?.attrs?.paragraphProperties?.styleId; + if (paragraph.type.name !== 'paragraph' || !/^TOC[1-9]$/.test(String(styleId ?? ''))) { + return { kind: 'non-entry' }; + } + const childTypes = []; + const textMarksPerRun = []; + let anchor; + let hasPageReference = false; + let hasTocPageNumberMark = false; + paragraph.descendants((node) => { + if (node.type.name === 'pageReference') { + hasPageReference = true; + childTypes.push('pageReference'); + return false; + } + if (node.marks?.some((m) => m.type.name === 'tocPageNumber')) hasTocPageNumberMark = true; + if (node.type.name === 'text') { + const link = node.marks?.find((m) => m.type.name === 'link'); + if (link && !anchor) anchor = link.attrs?.anchor; + childTypes.push('text'); + textMarksPerRun.push({ + text: node.text, + markTypes: (node.marks ?? []).map((m) => m.type.name), + }); + } else if (node.type.name === 'tab') { + childTypes.push('tab'); + } + return true; + }); + return { styleId, anchor, childTypes, textMarksPerRun, hasPageReference, hasTocPageNumberMark }; + } + + function describeTocNode(tocNode) { + const entries = []; + tocNode.forEach((child) => entries.push(describeEntryParagraph(child))); + return entries; + } + + it('rebuild output mirrors the importer shape (anchors, multi-run entries, pageReference field)', async () => { + const editor = await loadEditor(); + const toc = findToc(editor.state.doc); + expect(toc).toBeDefined(); + + const config = parseTocInstruction(toc.attrs?.instruction ?? ''); + const sources = collectTocSources(editor.state.doc, config); + expect(sources.length).toBe(5); + + const entries = buildTocEntryParagraphs(sources, config); + expect(entries.length).toBe(5); + + // Install the rebuilt content via the PM command — this is the same code + // path `tocUpdateWrapper` exercises in production, minus the plan-engine + // wrapper. Driving it directly catches any schema-validation issues in + // the rebuilt JSON. + const tocId = toc.attrs.sdBlockId; + const replaced = editor.commands.replaceTableOfContentsContentById({ sdBlockId: tocId, content: entries }); + expect(replaced).toBe(true); + + const tocAfter = findToc(editor.state.doc); + const after = describeTocNode(tocAfter); + const styledEntries = after.filter((e) => e.styleId); + expect(styledEntries.length).toBe(5); + + // Anchors must reuse the existing body bookmarks, not synthetic `_Toc` names. + expect(styledEntries.map((e) => e.anchor)).toEqual([ + '_Toc230123326', + '_Toc230123327', + '_Toc230123328', + '_Toc230123329', + '_Toc230123330', + ]); + + // Every entry has a real pageReference field; nothing relies on the legacy tocPageNumber mark. + expect(styledEntries.every((e) => e.hasPageReference)).toBe(true); + expect(styledEntries.every((e) => !e.hasTocPageNumberMark)).toBe(true); + + // TOC1 (Articles): marker run + heading text run + tab + pageReference. + const articles = styledEntries.filter((e) => e.styleId === 'TOC1'); + expect(articles.length).toBe(2); + for (const e of articles) { + expect(e.childTypes).toEqual(['text', 'text', 'tab', 'pageReference']); + // Heading marks (bold / textStyle / underline) must NOT leak into the + // TOC1 entry — TOC1 paragraph style supplies the typography. + for (const run of e.textMarksPerRun) { + expect(run.markTypes).not.toContain('bold'); + expect(run.markTypes).not.toContain('underline'); + expect(run.markTypes).not.toContain('textStyle'); + } + } + + // TOC2 (Sections): section number + tab + title + tab + pageReference. + const sections = styledEntries.filter((e) => e.styleId === 'TOC2'); + expect(sections.length).toBe(3); + for (const e of sections) { + expect(e.childTypes).toEqual(['text', 'tab', 'text', 'tab', 'pageReference']); + // The section title text (second `text` entry) inherits bold/underline + // from the Heading2 source — but never `textStyle`, which would override + // the TOC2 style's font. + const numberRun = e.textMarksPerRun[0]; + const titleRun = e.textMarksPerRun[1]; + expect(numberRun.markTypes).toEqual(['link']); + expect(titleRun.markTypes).toContain('bold'); + expect(titleRun.markTypes).toContain('underline'); + expect(titleRun.markTypes).not.toContain('textStyle'); + } + }); +});