Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions packages/super-editor/src/editors/v1/core/Editor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -216,8 +216,7 @@ const rangeIsTrackedInsertionOnly = (doc: PmNode, from: number, to: number): boo
const getSingleTrackedInsertionMarkInRange = (doc: PmNode, from: number, to: number): PmMark | null => {
if (!rangeIsTrackedInsertionOnly(doc, from, to)) return null;

/** @type {import('prosemirror-model').Mark[]} */
const insertionMarks = [];
const insertionMarks: PmMark[] = [];
const seenIds = new Set<string>();
doc.nodesBetween(from, to, (node, pos) => {
if (!node.isInline || !node.isLeaf) return;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,34 @@
* @returns {import('../../v2/types/index.js').OpenXmlNode[]}
*/
export function preProcessTcInstruction(nodesToCombine, instrText, _docx, instructionTokens = null) {
// SD-3227 / SD-3229: a `_Toc...` bookmark embedded inside the TC field
// instruction gets swallowed by the synthesized `sd:tableOfContentsEntry`
// atom wrapper — `buildPositionMap` does not visit its descendants, so the
// bookmark name is never indexed (breaks Section link navigation) and the
// resulting PM node tends to be dropped (breaks TOC rebuild). Hoist
// `w:bookmarkStart` / `w:bookmarkEnd` nodes back out as paragraph-level
// siblings: starts go before the entry, ends after — same logical position
// they had inside the field, but visible to the rest of the importer.
const startBookmarks = [];
const endBookmarks = [];
const innerNodes = [];
for (const child of nodesToCombine) {
if (child?.name === 'w:bookmarkStart') startBookmarks.push(child);
else if (child?.name === 'w:bookmarkEnd') endBookmarks.push(child);
else innerNodes.push(child);
}

return [
...startBookmarks,
{
name: 'sd:tableOfContentsEntry',
type: 'element',
attributes: {
instruction: instrText,
...(instructionTokens ? { instructionTokens } : {}),
},
elements: nodesToCombine,
elements: innerNodes,
},
...endBookmarks,
];
}
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,14 @@ export const DEFAULT_TOC_INSTRUCTION = 'TOC \\o "1-3" \\u \\h \\z';
// Parser
// ---------------------------------------------------------------------------

/** Regex to match a switch and its optional quoted argument. */
const SWITCH_PATTERN = /\\([a-z])\s*(?:"([^"]*)")?/gi;
/**
* Regex to match a switch and its optional argument. Word emits both quoted
* (`\t "Heading 1,1"`) and unquoted (`\f C`) forms — capture both so switches
* like `\f C` survive parsing instead of being read as bare flags.
*
* Group 2 = quoted arg, group 3 = unquoted arg.
*/
const SWITCH_PATTERN = /\\([a-z])(?:\s*(?:"([^"]*)"|([^\s\\]+)))?/gi;

function parseLevelRange(value: string): { from: number; to: number } | undefined {
const match = value.match(/^(\d+)-(\d+)$/);
Expand Down Expand Up @@ -132,7 +138,10 @@ export function parseTocInstruction(instruction: string): TocSwitchConfig {
SWITCH_PATTERN.lastIndex = 0;
while ((match = SWITCH_PATTERN.exec(instruction)) !== null) {
const switchChar = match[1].toLowerCase();
const rawArg = match[2];
// Group 2 = quoted arg, group 3 = unquoted arg. Track which form was used
// so `\p ""` (an explicit empty arg) stays distinguishable from `\p`
// (the switch with no arg at all).
const rawArg = match[2] !== undefined ? match[2] : match[3];
const arg = rawArg ?? '';

switch (switchChar) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import { footnoteReferenceHandlerEntity } from './footnoteReferenceImporter.js';
import { endnoteReferenceHandlerEntity } from './endnoteReferenceImporter.js';
import { tableNodeHandlerEntity } from './tableImporter.js';
import { tableOfContentsHandlerEntity } from './tableOfContentsImporter.js';
import { tableOfContentsEntryEntity } from './tableOfContentsEntryImporter.js';
import { indexHandlerEntity, indexEntryHandlerEntity } from './indexImporter.js';
import { bibliographyHandlerEntity } from './bibliographyImporter.js';
import { preProcessNodesForFldChar } from '../../field-references';
Expand Down Expand Up @@ -340,6 +341,7 @@ export const defaultNodeListHandler = () => {
tabNodeEntityHandler,
noBreakHyphenNodeEntityHandler,
tableOfContentsHandlerEntity,
tableOfContentsEntryEntity,
indexHandlerEntity,
bibliographyHandlerEntity,
indexEntryHandlerEntity,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import { generateV2HandlerEntity } from '@core/super-converter/v3/handlers/utils';
import { translator } from '../../v3/handlers/sd/tableOfContentsEntry/tableOfContentsEntry-translator.js';

/**
* Bridges the v3 `sd:tableOfContentsEntry` translator into the v2 node-list
* pipeline so TC fields synthesized by `tc-preprocessor` are materialized as
* PM `tableOfContentsEntry` nodes during import.
*
* @type {import("./docxImporter").NodeHandlerEntry}
*/
export const tableOfContentsEntryEntity = generateV2HandlerEntity('tableOfContentsEntryNodeHandler', translator);
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,20 @@ function encodeBlockId(input: string): string {
* - All required bookmarks already exist
* - The schema lacks bookmark node types (headless/test environments)
*/
export function syncTocBookmarks(editor: Editor, sources: Array<{ sdBlockId: string }>): void {
export function syncTocBookmarks(editor: Editor, sources: Array<{ sdBlockId: string; bodyAnchor?: string }>): void {
const { schema, doc } = editor.state;
if (!schema.nodes.bookmarkStart || !schema.nodes.bookmarkEnd) return;

const needed = deduplicateByBlockId(sources);
// SD-3229: when a source already has a `_Toc...` bookmark in the body
// (preserved by the importer), the rebuilder reuses that name as the
// entry's anchor. Generating a *new* synthetic bookmark on top would
// litter the document with duplicate `_Toc<uuid>` markers next to the
// original `_Toc230123326`, etc. Drop sources whose anchor is already
// satisfied by an existing body bookmark.
const sourcesNeedingSync = sources.filter((s) => !s.bodyAnchor);
if (sourcesNeedingSync.length === 0) return;

const needed = deduplicateByBlockId(sourcesNeedingSync);
const existing = collectExistingTocBookmarkNames(doc);
const missing = needed.filter((t) => !existing.has(t.bookmarkName));
if (missing.length === 0) return;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,19 @@ function titleTextOf(paragraphs: ReturnType<typeof buildTocEntryParagraphs>): Te
return titleRun.content?.[0] ?? {};
}

/** Find the page-number text node (carries the tocPageNumber mark) inside any run. */
/**
* Find the page-number text inside the entry's `pageReference` node — the
* builder now emits a real PAGEREF field instead of a `tocPageNumber` mark.
*/
function pageNumberTextOf(paragraphs: ReturnType<typeof buildTocEntryParagraphs>): TextLike {
const runs = paragraphs[0]!.content as Array<{ content?: TextLike[] }>;
for (const run of runs) {
const child = run.content?.find((c) => Array.isArray(c.marks) && c.marks.some((m) => m.type === 'tocPageNumber'));
if (child) return child;
const nodes = paragraphs[0]!.content as Array<{ type?: string; content?: TextLike[] }>;
for (const node of nodes) {
if (node.type !== 'pageReference') continue;
const innerRuns = (node.content ?? []) as Array<{ content?: TextLike[] }>;
for (const run of innerRuns) {
const text = run.content?.find((c) => c.type === 'text');
if (text) return text;
}
}
return {};
}
Expand Down Expand Up @@ -125,22 +132,27 @@ describe('buildTocEntryParagraphs', () => {
expect(linkMark?.attrs?.anchor).toBe(generateTocBookmarkName(BASE_SOURCE.sdBlockId));
});

it('wraps each text run in a `run` node so wrapTextInRunsPlugin does not clobber marks', () => {
it('wraps text in `run` nodes and emits a real pageReference for the page number', () => {
const paragraphs = buildTocEntryParagraphs([BASE_SOURCE], makeConfig({ hyperlinks: true }));
const runs = paragraphs[0]!.content as Array<{ type: string }>;
// Title run + tab run + page-number run = 3 runs (no \p, no omit).
expect(runs.length).toBe(3);
runs.forEach((r) => expect(r.type).toBe('run'));
const nodes = paragraphs[0]!.content as Array<{ type: string }>;
// Title run + tab run + pageReference node = 3 children.
expect(nodes.map((n) => n.type)).toEqual(['run', 'run', 'pageReference']);
});

it('carries allowed character marks (bold, italic, underline, color, highlight, fontFamily, textStyle.fontFamily) from the source heading', () => {
it("does not propagate the heading paragraph's character marks into the rebuilt entry (SD-3229)", () => {
// Per ECMA-376 §17.16.5.68, Word rebuilds heading-driven TOC entries
// (\o / \u / \t) from the heading text plus the linked TOC{n} style's
// typography. The heading's own bold/underline/font marks must NOT
// bleed through — otherwise the entry shows "ARTICLE 1 BASIC
// INFORMATION" in Heading1's bold/Times-New-Roman-Bold instead of the
// TOC1 style's lighter weight.
const sourceWithMarks: TocSource = {
...BASE_SOURCE,
segments: [
{
text: 'Heading',
marks: [
{ type: 'textStyle', attrs: { fontFamily: 'Aptos', fontSize: '24pt' } }, // fontSize must be scrubbed
{ type: 'textStyle', attrs: { fontFamily: 'Aptos', fontSize: '24pt' } },
{ type: 'bold' },
{ type: 'italic' },
{ type: 'underline' },
Expand All @@ -153,22 +165,10 @@ describe('buildTocEntryParagraphs', () => {
};
const paragraphs = buildTocEntryParagraphs([sourceWithMarks], makeConfig({ hyperlinks: true }));
const text = titleTextOf(paragraphs);
expect(text.marks!.map((m) => m.type)).toEqual([
'textStyle',
'bold',
'italic',
'underline',
'color',
'highlight',
'fontFamily',
'link',
]);
// textStyle keeps fontFamily, drops fontSize.
const textStyleMark = text.marks!.find((m) => m.type === 'textStyle');
expect(textStyleMark!.attrs).toEqual({ fontFamily: 'Aptos' });
expect(text.marks!.map((m) => m.type)).toEqual(['link']);
});

it('drops disallowed marks (fontSize, strike, link, comments, track-changes, tocPageNumber)', () => {
it('only the link mark is attached to heading-source title runs', () => {
const sourceWithDisallowed: TocSource = {
...BASE_SOURCE,
segments: [
Expand All @@ -188,11 +188,38 @@ describe('buildTocEntryParagraphs', () => {
};
const paragraphs = buildTocEntryParagraphs([sourceWithDisallowed], makeConfig({ hyperlinks: true }));
const text = titleTextOf(paragraphs);
// Only the allowed `bold` survives, plus the rebuilt `link` to the source bookmark.
expect(text.marks!.map((m) => m.type)).toEqual(['bold', 'link']);
expect(text.marks!.map((m) => m.type)).toEqual(['link']);
const linkMark = text.marks!.find((m) => m.type === 'link');
expect(linkMark!.attrs!.anchor).toBe(generateTocBookmarkName(BASE_SOURCE.sdBlockId));
expect(linkMark!.attrs!.href).toBeUndefined();
// The rebuilt link points at the synthetic in-document anchor; the source's
// `href: "https://example.com"` is dropped (we route through the anchor).
expect(linkMark!.attrs!.href).toBe(`#${generateTocBookmarkName(BASE_SOURCE.sdBlockId)}`);
});

it('TC-field entries carry bold/italic/underline (but not textStyle) from the surrounding Heading2 (SD-3229)', () => {
// Word's update-field inherits character formatting from the body run
// that surrounds the TC field's title — but only the visible style
// marks (bold/italic/underline). Inheriting `textStyle` overrides the
// TOC2 paragraph style's font, which is wrong.
const tcSource: TocSource = {
text: 'Section 1.1\tCertain Basic Terms',
level: 2,
sdBlockId: 'h2-1',
kind: 'tcField',
titleMarks: [
{ type: 'bold' },
{ type: 'underline' },
{ type: 'textStyle', attrs: { fontFamily: 'Times New Roman, serif' } },
],
};
const paragraphs = buildTocEntryParagraphs([tcSource], makeConfig({ hyperlinks: true }));
// Section-number run (first run) — plain link only, no inherited marks.
const numberRun = paragraphs[0]!.content[0] as { content?: TextLike[] };
expect(numberRun.content?.[0]?.marks?.map((m) => m.type)).toEqual(['link']);
// Title run (third run, after number / tab) — bold + underline survive,
// textStyle is dropped so the TOC2 style picks the font.
const titleRun = paragraphs[0]!.content[2] as { content?: TextLike[] };
expect(titleRun.content?.[0]?.marks?.map((m) => m.type)).toEqual(['bold', 'underline', 'link']);
});
});

Expand Down Expand Up @@ -220,6 +247,7 @@ interface MockParagraph {
text: string;
styleId?: string;
outlineLevel?: number;
listMarkerText?: string;
}

function mockDoc(paragraphs: MockParagraph[]) {
Expand All @@ -239,6 +267,7 @@ function mockDoc(paragraphs: MockParagraph[]) {
...(p.styleId ? { styleId: p.styleId } : {}),
...(p.outlineLevel !== undefined ? { outlineLevel: p.outlineLevel } : {}),
},
...(p.listMarkerText !== undefined ? { listRendering: { markerText: p.listMarkerText } } : {}),
},
isText: false,
descendants: (cb: (node: unknown, pos: number) => boolean | void) => {
Expand Down Expand Up @@ -432,4 +461,82 @@ describe('collectTocSources', () => {
expect(sources.map((s) => s.text)).toEqual(['Part 3', 'Part 4']);
expect(sources[1].sdBlockId).toMatch(/^para-auto-/);
});

describe('custom-style mapping (\\t)', () => {
// SD-3229: a TOC whose instruction relies solely on \t (and \f with no
// imported TC nodes) used to rebuild as the "No table of contents entries
// found." placeholder because collectTocSources ignored \t mappings.
it('collects paragraphs whose styleId matches a \\t custom-style mapping', () => {
const doc = mockDoc([
{ sdBlockId: 'p1', text: 'Article 1', styleId: 'Heading1' },
{ sdBlockId: 'p2', text: 'Article 2', styleId: 'Heading1' },
{ sdBlockId: 'p3', text: 'Body', styleId: 'Normal' },
]);

const config: TocSwitchConfig = {
source: { tcFieldIdentifier: 'C' },
display: { hyperlinks: true },
// styleName "Heading 1" (with space) must match styleId "Heading1"
preserved: { customStyles: [{ styleName: 'Heading 1', level: 1 }] },
};

const sources = collectTocSources(doc, config);
expect(sources.map((s) => s.text)).toEqual(['Article 1', 'Article 2']);
expect(sources.every((s) => s.kind === 'customStyle')).toBe(true);
expect(sources.every((s) => s.level === 1)).toBe(true);
});

it('prefers \\o heading collection over \\t when both match the same paragraph', () => {
const doc = mockDoc([{ sdBlockId: 'p1', text: 'Intro', styleId: 'Heading1' }]);

const config: TocSwitchConfig = {
source: { outlineLevels: { from: 1, to: 3 } },
display: {},
preserved: { customStyles: [{ styleName: 'Heading 1', level: 2 }] },
};

const sources = collectTocSources(doc, config);
expect(sources).toHaveLength(1);
expect(sources[0].kind).toBe('heading');
expect(sources[0].level).toBe(1);
});

it('exposes the rendered list marker so the builder can emit it as its own run', () => {
// SD-3229 PSA repro: Heading1 paragraphs only contain "BASIC INFORMATION"
// / "PROPERTY" in their text content. The "ARTICLE 1" / "ARTICLE 2"
// prefix is auto-numbered by the Heading1 style (lvlText "ARTICLE %1")
// and surfaces on the paragraph as listRendering.markerText after layout.
const doc = mockDoc([
{ sdBlockId: 'p1', text: 'BASIC INFORMATION', styleId: 'Heading1', listMarkerText: 'ARTICLE 1' },
{ sdBlockId: 'p2', text: 'PROPERTY', styleId: 'Heading1', listMarkerText: 'ARTICLE 2' },
]);

const config: TocSwitchConfig = {
source: { tcFieldIdentifier: 'C' },
display: { hyperlinks: true },
preserved: { customStyles: [{ styleName: 'Heading 1', level: 1 }] },
};

const sources = collectTocSources(doc, config);
expect(sources.map((s) => s.text)).toEqual(['BASIC INFORMATION', 'PROPERTY']);
// Marker is reported alongside the segments so the builder can wrap it
// in its own run (matching Word's two-run TOC1 shape) instead of
// smuggling the prefix into the heading text.
expect(sources.map((s) => s.markerText)).toEqual(['ARTICLE 1', 'ARTICLE 2']);
});

it('respects an explicit \\o range when filtering \\t matches', () => {
const doc = mockDoc([{ sdBlockId: 'p1', text: 'Article 1', styleId: 'CustomHeading' }]);

const config: TocSwitchConfig = {
source: { outlineLevels: { from: 1, to: 1 } },
display: {},
preserved: { customStyles: [{ styleName: 'CustomHeading', level: 2 }] },
};

// Mapping says level 2 but \o range is 1-1 → excluded.
const sources = collectTocSources(doc, config);
expect(sources).toHaveLength(0);
});
});
});
Loading
Loading