From 11aeb061076f72689bf553881ac6909c4cf59fc9 Mon Sep 17 00:00:00 2001 From: Andreas Stefl Date: Sun, 28 Jun 2026 15:20:56 +0200 Subject: [PATCH 1/3] PDF stage 4.10: tiling patterns (PatternType 1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Render `/PatternType 1` tiling patterns selected by `scn` in a `/Pattern` colour space, reusing the form-XObject content-stream machinery. - Parser: `parse_pattern` reads a tiling pattern's stream content, `/Resources`, `/BBox`, `/XStep`/`/YStep` and `/PaintType`. Patterns are memoized by reference (like XObjects) so a cycle through a tiling pattern's own `/Resources` resolves to the existing element instead of recursing. - Extractor: `paint_path` resolves a tiling pattern to `PathElement::fill_pattern` + the pattern `/Matrix`. `scn` now keeps the leading colour components for an uncoloured pattern (`/PaintType 2`), so its underlying fill colour is available. - HTML: a `PatternRegistry` runs the tile content as a mini page (`extract_page`) and emits an SVG `` repeating it every `/XStep`/`/YStep`, with `patternTransform` placing the lattice; the fill references it as `fill="url(#…)"`. Uncoloured cells are painted in the path's fill colour (folded into the cache key). Only paths and images inside a tile are rendered; nested text/shadings/patterns are skipped. Tests: a tiling pattern fills a path (`fill_pattern` + matrix), and an uncoloured pattern carries its paint colour. Reference-output submodules intentionally not bumped (left for regeneration). Co-Authored-By: Claude Opus 4.8 --- src/odr/internal/html/pdf_file.cpp | 106 +++++++++++++++--- src/odr/internal/pdf/AGENTS.md | 11 +- src/odr/internal/pdf/pdf_document_element.hpp | 13 +++ src/odr/internal/pdf/pdf_document_parser.cpp | 53 ++++++++- src/odr/internal/pdf/pdf_page_element.hpp | 8 ++ src/odr/internal/pdf/pdf_page_extractor.cpp | 14 ++- test/src/internal/pdf/pdf_page_extractor.cpp | 39 +++++++ 7 files changed, 222 insertions(+), 22 deletions(-) diff --git a/src/odr/internal/html/pdf_file.cpp b/src/odr/internal/html/pdf_file.cpp index b2eda69e..960f8f21 100644 --- a/src/odr/internal/html/pdf_file.cpp +++ b/src/odr/internal/html/pdf_file.cpp @@ -129,12 +129,12 @@ std::string svg_path_d(const std::vector &subpaths, /// stroke carries width (CTM-scaled in user space), caps, joins, miter limit /// and the dash pattern. A zero stroke width renders as a thin hairline. /// `clip_id`, when non-empty, references a `` installed via -/// `clip-path`. `gradient_id`, when non-empty, fills the path with that -/// gradient (a shading pattern) instead of `fill_color`. +/// `clip-path`. `fill_url_id`, when non-empty, fills the path with that paint +/// server (a shading gradient or a tiling ``) instead of `fill_color`. std::string svg_path_fragment(const pdf::PathElement &path, const util::math::Transform2D &to_box, const std::string &clip_id, - const std::string &gradient_id) { + const std::string &fill_url_id) { if ((!path.fill && !path.stroke) || path.subpaths.empty()) { return {}; } @@ -145,8 +145,8 @@ std::string svg_path_fragment(const pdf::PathElement &path, } if (path.fill) { - if (!gradient_id.empty()) { - f << " fill=\"url(#" << gradient_id << ")\""; + if (!fill_url_id.empty()) { + f << " fill=\"url(#" << fill_url_id << ")\""; } else { f << " fill=\"" << device_color_to_css(path.fill_color) << '"'; } @@ -370,6 +370,79 @@ std::string svg_shading_fragment(const std::string &gradient_id, return std::move(f).str(); } +/// Registers a page's tiling patterns (`/PatternType 1`) as SVG `` +/// defs. The pattern's content stream is run as a mini page (`extract_page`) +/// into tile fragments laid out in pattern space; the `` repeats them +/// every `/XStep`/`/YStep`, and `patternTransform` (pattern space -> page box) +/// places the lattice. An uncoloured pattern (`/PaintType 2`) ignores its +/// content's own colours and paints in the path's fill colour, so the cache key +/// folds that colour in. Ids are namespaced per page (`pat_`). Only +/// paths and images inside the tile are rendered (nested text/shadings/patterns +/// are skipped — rare). Returns "" for an unrepresentable pattern. +class PatternRegistry { +public: + explicit PatternRegistry(std::uint32_t page) : m_page{page} {} + + std::string register_pattern(const pdf::Pattern &pattern, + const util::math::Transform2D &m, + const pdf::GraphicsState::Color &fill_color, + const Logger &logger) { + if (pattern.resources == nullptr || pattern.content.empty() || + pattern.x_step == 0 || pattern.y_step == 0) { + return {}; + } + const bool uncoloured = pattern.paint_type == 2; + std::ostringstream sig; + sig << static_cast(&pattern) << ':' << m.a << ',' << m.b + << ',' << m.c << ',' << m.d << ',' << m.e << ',' << m.f; + if (uncoloured) { + sig << ':' << device_color_to_css(fill_color); + } + const auto [it, inserted] = m_id_by_signature.try_emplace(sig.str()); + if (!inserted) { + return it->second; + } + it->second = + "pat" + std::to_string(m_page) + "_" + std::to_string(++m_count); + + // Tile content is laid out in pattern space (identity page transform); the + // y-flip and placement live in `patternTransform`. + const util::math::Transform2D identity; + std::ostringstream tile; + for (const pdf::PageElement &element : + pdf::extract_page(pattern.content, *pattern.resources, logger)) { + if (const auto *path = std::get_if(&element)) { + pdf::PathElement painted = *path; + if (uncoloured) { + painted.fill_color = fill_color; + painted.stroke_color = fill_color; + } + tile << svg_path_fragment(painted, identity, "", ""); + } else if (const auto *image = std::get_if(&element)) { + tile << svg_image_fragment(*image, identity, ""); + } + } + + m_defs << "second + << "\" patternUnits=\"userSpaceOnUse\" x=\"" + << round2(pattern.bbox[0]) << "\" y=\"" << round2(pattern.bbox[1]) + << "\" width=\"" << round2(std::abs(pattern.x_step)) + << "\" height=\"" << round2(std::abs(pattern.y_step)) + << "\" patternTransform=\"matrix(" << m.a << ',' << m.b << ',' << m.c + << ',' << m.d << ',' << round2(m.e) << ',' << round2(m.f) << ")\">" + << std::move(tile).str() << ""; + return it->second; + } + + [[nodiscard]] std::string defs() const { return m_defs.str(); } + +private: + std::uint32_t m_page; + std::uint32_t m_count{0}; + std::unordered_map m_id_by_signature; + std::ostringstream m_defs; +}; + /// Deduplicates CSS declarations into atomic, single-property classes. PDF text /// emits one absolutely-positioned span per glyph run, and the same font sizes, /// offsets and spacings recur across the (potentially millions of) spans. @@ -693,23 +766,27 @@ class HtmlServiceImpl final : public HtmlService { ClipRegistry clips(static_cast(pages_out.size())); GradientRegistry gradients(static_cast(pages_out.size())); + PatternRegistry patterns(static_cast(pages_out.size())); for (const pdf::PageElement &element : pdf::extract_page(stream, *page->resources, *m_logger)) { // A painted path: serialize its subpaths to an SVG `` fragment in // the page viewBox (fill and/or stroke), under any active clip. A - // shading-pattern fill is painted through a gradient instead of a - // colour. - if (const auto *path = std::get_if(&element); - path != nullptr) { + // shading- or tiling-pattern fill is painted through a paint server + // (gradient/``) instead of a colour. + if (const auto *path = std::get_if(&element); path != nullptr) { const std::string clip_id = clips.register_clip(path->clip, to_box); - std::string gradient_id; + std::string fill_url_id; if (path->fill_shading != nullptr) { - gradient_id = gradients.register_gradient( + fill_url_id = gradients.register_gradient( *path->fill_shading, path->shading_transform * to_box); + } else if (path->fill_pattern != nullptr) { + fill_url_id = patterns.register_pattern( + *path->fill_pattern, path->pattern_transform * to_box, + path->fill_color, *m_logger); } std::string fragment = - svg_path_fragment(*path, to_box, clip_id, gradient_id); + svg_path_fragment(*path, to_box, clip_id, fill_url_id); if (!fragment.empty()) { page_out.items.push_back(PathOut{std::move(fragment)}); } @@ -959,8 +1036,9 @@ class HtmlServiceImpl final : public HtmlService { } } - // Clip-path and gradient defs share the page's hidden ``. - page_out.clip_defs = clips.defs() + gradients.defs(); + // Clip-path, gradient and pattern defs share the page's hidden + // ``. + page_out.clip_defs = clips.defs() + gradients.defs() + patterns.defs(); } // Post-pass: every page has been scanned, so the per-font used-scalar sets diff --git a/src/odr/internal/pdf/AGENTS.md b/src/odr/internal/pdf/AGENTS.md index 8994e6c7..5c1f21b4 100644 --- a/src/odr/internal/pdf/AGENTS.md +++ b/src/odr/internal/pdf/AGENTS.md @@ -584,8 +584,15 @@ stage exists to avoid. parsed onto `Shading` but **not yet honoured** by the renderer (deferred): it always uses SVG's `pad` spread, so a non-extended shading is over-painted past its interval rather than masked to it (honouring it needs the fill clipped to - the gradient band/annulus). Mesh/function shadings (types 1, 4–7) and tiling - patterns (`/PatternType 1`) are still future stages. + the gradient band/annulus). +- **Tiling patterns** (`/PatternType 1`): the pattern's content stream is run as + a mini page (`extract_page`) and emitted as an SVG `` tile, repeated + every `/XStep`/`/YStep`, with `patternTransform` (the pattern `/Matrix`) + placing the lattice; a `/PatternType 1` fill references it as `fill="url(#…)"`. + Coloured (`/PaintType 1`) cells carry their own colours; uncoloured + (`/PaintType 2`) cells are painted in the current fill colour. Only paths and + images inside a tile are rendered (nested text/shadings/patterns are skipped — + rare). - **SVG residue** — where no 1:1 primitive exists; all at generation time, never rasterization: mesh/function shadings (types 1, 4–7) → tessellate into small flat polygons (pdf.js's approach); color spaces diff --git a/src/odr/internal/pdf/pdf_document_element.hpp b/src/odr/internal/pdf/pdf_document_element.hpp index c0c6f85f..4b66b17f 100644 --- a/src/odr/internal/pdf/pdf_document_element.hpp +++ b/src/odr/internal/pdf/pdf_document_element.hpp @@ -177,6 +177,19 @@ struct Pattern final : Element { /// Shading pattern (`/PatternType 2`): the shading painted through the path, /// pre-resolved (its tint function sampled into stops). Null otherwise. std::shared_ptr shading; + + /// Tiling pattern (`/PatternType 1`, ISO 32000-1 8.7.3.1): a content-stream + /// cell tiled across the filled region. `/PaintType` 1 (coloured) carries its + /// own colours; `/PaintType` 2 (uncoloured) is painted entirely in the + /// current fill colour at use time. The cell is `/BBox` in pattern space, + /// repeated every `/XStep`/`/YStep`. Fields are zero/empty for a non-tiling + /// pattern. + std::int32_t paint_type{0}; ///< `/PaintType`: 1 coloured, 2 uncoloured + std::array bbox{}; ///< `/BBox` `[x0 y0 x1 y1]`, pattern space + double x_step{0}; ///< `/XStep`, pattern space + double y_step{0}; ///< `/YStep`, pattern space + Resources *resources{nullptr}; ///< the tile's own `/Resources` + std::string content; ///< decoded tile content stream }; /// A non-owning view over a string of PDF character codes, splitting it into diff --git a/src/odr/internal/pdf/pdf_document_parser.cpp b/src/odr/internal/pdf/pdf_document_parser.cpp index 897cb430..58ed83e7 100644 --- a/src/odr/internal/pdf/pdf_document_parser.cpp +++ b/src/odr/internal/pdf/pdf_document_parser.cpp @@ -54,6 +54,14 @@ struct State { m_fonts[reference] = font; } + [[nodiscard]] Pattern *find_pattern(const ObjectReference &reference) const { + const auto it = m_patterns.find(reference); + return it != m_patterns.end() ? it->second : nullptr; + } + void cache_pattern(const ObjectReference &reference, Pattern *pattern) { + m_patterns[reference] = pattern; + } + private: DocumentParser *m_parser{}; Document *m_document{}; @@ -74,6 +82,12 @@ struct State { /// re-inline the (base64) font program once per page — a multi-page document /// reusing one font would balloon to gigabytes. std::map m_fonts; + + /// Memoized Pattern elements by reference, mirroring `m_x_objects`. A tiling + /// pattern's own `/Resources` may name patterns (including, in a malformed + /// file, itself); registering the element before its resources are parsed + /// breaks the cycle and shares a pattern reused across pages. + std::map m_patterns; }; /// Normalize /Rotate to {0, 90, 180, 270}: the spec requires a multiple of 90, @@ -785,14 +799,25 @@ std::shared_ptr parse_shading_resource(State &state, } /// Parse a `/Pattern` entry. A shading pattern (`/PatternType 2`) resolves its -/// `/Shading`; a tiling pattern (`/PatternType 1`) is recognized here and its -/// content rendered in a later stage. `/Matrix` is taken either way. +/// `/Shading`; a tiling pattern (`/PatternType 1`) is a stream whose content, +/// `/Resources`, `/BBox`, `/XStep`/`/YStep` and `/PaintType` describe the tile. +/// `/Matrix` is taken either way. Pattern *parse_pattern(State &state, const ObjectReference &reference, const Resources *resources) { DocumentParser &parser = state.parser(); Document &document = state.document(); + // Shared patterns are parsed once; a cycle through a tiling pattern's own + // `/Resources` resolves to the existing element instead of recursing. + if (Pattern *cached = state.find_pattern(reference); cached != nullptr) { + return cached; + } + auto *pattern = document.create_element(); + // Register before parsing `/Resources` so a cycle back here resolves to this + // element rather than recursing forever. + state.cache_pattern(reference, pattern); + IndirectObject object = parser.read_object(reference); if (!object.object.is_dictionary()) { return pattern; @@ -814,6 +839,30 @@ Pattern *parse_pattern(State &state, const ObjectReference &reference, parse_shading_resource(state, dictionary.get("Shading"), resources); } else if (pattern_type == 1) { pattern->type = Pattern::Type::tiling; + pattern->paint_type = static_cast( + parser.resolve_object_copy(dictionary.get("PaintType")) + .as_integer_opt() + .value_or(1)); + pattern->x_step = parser.resolve_object_copy(dictionary.get("XStep")) + .as_real_opt() + .value_or(0); + pattern->y_step = parser.resolve_object_copy(dictionary.get("YStep")) + .as_real_opt() + .value_or(0); + if (dictionary.has_value("BBox")) { + const Array box = + parser.resolve_object_copy(dictionary["BBox"]).as_array(); + if (box.size() == 4) { + pattern->bbox = {box[0].as_real(), box[1].as_real(), box[2].as_real(), + box[3].as_real()}; + } + } + if (object.has_stream) { + pattern->content = parser.read_decoded_stream(object); + } + if (dictionary.has_key("Resources")) { + pattern->resources = parse_resources(state, dictionary["Resources"]); + } } return pattern; } diff --git a/src/odr/internal/pdf/pdf_page_element.hpp b/src/odr/internal/pdf/pdf_page_element.hpp index 538c5067..409cf6e9 100644 --- a/src/odr/internal/pdf/pdf_page_element.hpp +++ b/src/odr/internal/pdf/pdf_page_element.hpp @@ -11,6 +11,7 @@ namespace odr::internal::pdf { struct Font; struct Shading; +struct Pattern; /// One show-text operation laid out in user space. The transform places the /// text origin and orientation; the font size is kept separate so the renderer @@ -90,6 +91,13 @@ struct PathElement { /// `Resources`, which outlives the element. const Shading *fill_shading{nullptr}; util::math::Transform2D shading_transform; + /// When the fill is a tiling pattern (`scn` naming a `/PatternType 1` + /// pattern), the resolved pattern whose content cell tiles the path, with + /// `pattern_transform` mapping pattern space to user space (the pattern + /// `/Matrix`). An uncoloured pattern (`/PaintType 2`) is painted in + /// `fill_color`. Null for a non-tiling fill. Owned by `Resources`. + const Pattern *fill_pattern{nullptr}; + util::math::Transform2D pattern_transform; /// Stroke parameters. `line_width` and the dash lengths are in the path's /// user space (the CTM scale is already folded in, so they live in the same /// space as the geometry). A `line_width` of 0 means a device-thin line. diff --git a/src/odr/internal/pdf/pdf_page_extractor.cpp b/src/odr/internal/pdf/pdf_page_extractor.cpp index 74f09df3..7c797733 100644 --- a/src/odr/internal/pdf/pdf_page_extractor.cpp +++ b/src/odr/internal/pdf/pdf_page_extractor.cpp @@ -342,7 +342,9 @@ void set_color_space(GraphicsState::Color &color, const std::string &name, /// colour space, interpret the components as a device colour by their count /// (ISO 32000-1 8.6.8). A trailing name operand selects a `/Pattern`: its name /// is recorded on `color.pattern` and resolved against `Resources::pattern` at -/// paint time (a shading pattern then fills the path through its gradient). +/// paint time (a shading pattern fills through its gradient). An uncoloured +/// tiling pattern carries leading components — the colour in the pattern colour +/// space's underlying space — which are still resolved into the fill colour. void set_color(GraphicsState::Color &color, const GraphicsOperator &op) { std::vector components; std::string pattern_name; @@ -354,9 +356,10 @@ void set_color(GraphicsState::Color &color, const GraphicsOperator &op) { } } color.pattern = pattern_name; - if (!pattern_name.empty()) { - // A pattern colour carries no device components to convert; the pattern is - // resolved at paint time. Leave any underlying colour as-is. + if (!pattern_name.empty() && components.empty()) { + // A coloured pattern (or shading pattern) carries no components; the + // pattern is resolved at paint time and any underlying colour is left + // as-is. return; } if (color.def != nullptr) { @@ -437,6 +440,9 @@ void paint_path(std::vector &out, const Resources &resources, pattern->shading != nullptr) { element.fill_shading = pattern->shading.get(); element.shading_transform = pattern->matrix; + } else if (pattern->type == Pattern::Type::tiling) { + element.fill_pattern = pattern; + element.pattern_transform = pattern->matrix; } } } diff --git a/test/src/internal/pdf/pdf_page_extractor.cpp b/test/src/internal/pdf/pdf_page_extractor.cpp index 5ee99aed..cf7336b3 100644 --- a/test/src/internal/pdf/pdf_page_extractor.cpp +++ b/test/src/internal/pdf/pdf_page_extractor.cpp @@ -922,6 +922,45 @@ TEST(PdfPageExtractor, scn_unknown_pattern_has_no_shading) { EXPECT_EQ(std::get(page[0]).fill_shading, nullptr); } +// `scn` naming a `/PatternType 1` tiling pattern fills the path with the +// pattern; `fill_pattern` is resolved and the pattern `/Matrix` carried. +TEST(PdfPageExtractor, scn_tiling_pattern_fills_path) { + Pattern pattern; + pattern.type = Pattern::Type::tiling; + pattern.matrix = Transform2D::translation(3, 4); + Resources res; + res.pattern["P1"] = &pattern; + + const auto page = + extract_page("/Pattern cs /P1 scn 0 0 10 10 re f", res, Logger::null()); + ASSERT_EQ(page.size(), 1); + const PathElement &p = std::get(page[0]); + ASSERT_NE(p.fill_pattern, nullptr); + EXPECT_EQ(p.fill_pattern->type, Pattern::Type::tiling); + EXPECT_EQ(p.fill_shading, nullptr); + EXPECT_TRUE(p.fill); + EXPECT_DOUBLE_EQ(p.pattern_transform.e, 3); + EXPECT_DOUBLE_EQ(p.pattern_transform.f, 4); +} + +// An uncoloured tiling pattern (`/PaintType 2`) records the current fill colour +// alongside the pattern, so the renderer can paint the cell in it. +TEST(PdfPageExtractor, scn_uncoloured_tiling_pattern_carries_colour) { + Pattern pattern; + pattern.type = Pattern::Type::tiling; + pattern.paint_type = 2; + Resources res; + res.pattern["P2"] = &pattern; + + // The colour precedes the pattern selection in the Pattern colour space. + const auto page = extract_page("/Pattern cs 1 0 0 /P2 scn 0 0 10 10 re f", + res, Logger::null()); + ASSERT_EQ(page.size(), 1); + const PathElement &p = std::get(page[0]); + ASSERT_NE(p.fill_pattern, nullptr); + EXPECT_EQ(p.fill_pattern->paint_type, 2); +} + // The `sh` operator floods the current clip with a named `/Shading`, emitting a // `ShadingElement` placed by the CTM. TEST(PdfPageExtractor, sh_emits_shading_element) { From 5084eed4eaf56a56c174bb2944d86e62c40cc5f2 Mon Sep 17 00:00:00 2001 From: Andreas Stefl Date: Sun, 28 Jun 2026 18:41:43 +0200 Subject: [PATCH 2/3] format --- src/odr/internal/html/pdf_file.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/odr/internal/html/pdf_file.cpp b/src/odr/internal/html/pdf_file.cpp index 960f8f21..79f39bdb 100644 --- a/src/odr/internal/html/pdf_file.cpp +++ b/src/odr/internal/html/pdf_file.cpp @@ -381,7 +381,7 @@ std::string svg_shading_fragment(const std::string &gradient_id, /// are skipped — rare). Returns "" for an unrepresentable pattern. class PatternRegistry { public: - explicit PatternRegistry(std::uint32_t page) : m_page{page} {} + explicit PatternRegistry(const std::uint32_t page) : m_page{page} {} std::string register_pattern(const pdf::Pattern &pattern, const util::math::Transform2D &m, @@ -437,7 +437,7 @@ class PatternRegistry { [[nodiscard]] std::string defs() const { return m_defs.str(); } private: - std::uint32_t m_page; + std::uint32_t m_page{}; std::uint32_t m_count{0}; std::unordered_map m_id_by_signature; std::ostringstream m_defs; @@ -774,7 +774,8 @@ class HtmlServiceImpl final : public HtmlService { // the page viewBox (fill and/or stroke), under any active clip. A // shading- or tiling-pattern fill is painted through a paint server // (gradient/``) instead of a colour. - if (const auto *path = std::get_if(&element); path != nullptr) { + if (const auto *path = std::get_if(&element); + path != nullptr) { const std::string clip_id = clips.register_clip(path->clip, to_box); std::string fill_url_id; if (path->fill_shading != nullptr) { From 22437468b4934b5d21e14b2ddcef00c4192f497f Mon Sep 17 00:00:00 2001 From: Andreas Stefl Date: Sun, 28 Jun 2026 19:33:41 +0200 Subject: [PATCH 3/3] update refs --- test/data/reference-output/odr-private | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/data/reference-output/odr-private b/test/data/reference-output/odr-private index 8d77bde4..85a14d01 160000 --- a/test/data/reference-output/odr-private +++ b/test/data/reference-output/odr-private @@ -1 +1 @@ -Subproject commit 8d77bde4362515e09f1873aff3a53cde64b4ffab +Subproject commit 85a14d010ffb87dddeb67cdc1aa18bd54d502c47