diff --git a/src/odr/internal/html/pdf_file.cpp b/src/odr/internal/html/pdf_file.cpp index 79f39bdb..07a250d1 100644 --- a/src/odr/internal/html/pdf_file.cpp +++ b/src/odr/internal/html/pdf_file.cpp @@ -41,6 +41,18 @@ namespace { /// the extra digits add up across a page full of path data. double round2(const double v) { return std::round(v * 100.0) / 100.0; } +/// Serialize a transform as an SVG `matrix(...)`. Only the translation (e, f) +/// is rounded — it lives in page-box units where 1/100 px is plenty; the linear +/// part (a..d) keeps full precision so small scale/skew factors aren't +/// quantized to zero. Used for `transform`, `gradientTransform` and +/// `patternTransform`. +std::string svg_matrix(const util::math::Transform2D &m) { + std::ostringstream f; + f << "matrix(" << m.a << ',' << m.b << ',' << m.c << ',' << m.d << ',' + << round2(m.e) << ',' << round2(m.f) << ')'; + return std::move(f).str(); +} + /// Convert a PDF device color to a CSS `rgb(...)` string. Non-device color /// spaces (Separation/ICCBased/… — stage 4.4) and the unknown space fall back /// to black, the PDF initial color. @@ -222,9 +234,8 @@ std::string svg_image_fragment(const pdf::ImageElement &image, if (!clip_id.empty()) { f << ""; } - f << R"("; if (!clip_id.empty()) { f << ""; @@ -232,15 +243,51 @@ std::string svg_image_fragment(const pdf::ImageElement &image, return std::move(f).str(); } +/// Shared bookkeeping for the per-page `` registries below (clips, +/// gradients, tiling patterns): a signature->id cache that deduplicates +/// repeated definitions, a per-page monotonic id counter, and the accumulated +/// `` markup (emitted once into the page's hidden ``). Ids are +/// namespaced per page as `_`. +class DefsRegistry { +public: + explicit DefsRegistry(const std::uint32_t page) : m_page{page} {} + + [[nodiscard]] std::string defs() const { return m_defs.str(); } + +protected: + /// The id for `signature`, minting `_` the first time it is + /// seen. `inserted` is true only on that first sight — when the caller still + /// needs to emit the definition into `m_defs`. + struct Entry { + std::string id; + bool inserted; + }; + Entry intern(const std::string &signature, const char *prefix) { + const auto [it, inserted] = m_id_by_signature.try_emplace(signature); + if (inserted) { + it->second = std::string(prefix) + std::to_string(m_page) + "_" + + std::to_string(++m_count); + } + return {it->second, inserted}; + } + + std::ostringstream m_defs; + +private: + std::uint32_t m_page; + std::uint32_t m_count{0}; + std::unordered_map m_id_by_signature; +}; + /// Registers a page's clip regions as nested `` defs, deduplicating /// shared prefixes. PDF's current clip is the *intersection* of an ordered list /// of regions; SVG expresses intersection by chaining `clip-path` from one /// `` to the next, so region i's clipPath references region i-1's and /// the painted element references the last. Ids are namespaced per page -/// (`c_`); `defs()` is emitted once in a hidden `` for the page. -class ClipRegistry { +/// (`c_`). +class ClipRegistry : public DefsRegistry { public: - explicit ClipRegistry(std::uint32_t page) : m_page{page} {} + using DefsRegistry::DefsRegistry; /// The clipPath id to reference on a path painted under `clip`, registering /// any not-yet-seen regions. Empty when `clip` is empty (unclipped). @@ -253,11 +300,9 @@ class ClipRegistry { signature += region.even_odd ? 'E' : 'N'; signature += d; signature += ';'; - const auto [it, inserted] = m_id_by_signature.try_emplace(signature); + const auto [id, inserted] = intern(signature, "c"); if (inserted) { - it->second = - "c" + std::to_string(m_page) + "_" + std::to_string(++m_count); - m_defs << "second << '"'; + m_defs << ""; } - parent = it->second; + parent = id; } return parent; } - - [[nodiscard]] std::string defs() const { return m_defs.str(); } - -private: - std::uint32_t m_page; - std::uint32_t m_count{0}; - std::unordered_map m_id_by_signature; - std::ostringstream m_defs; }; /// Registers a page's shadings (axial/radial) as ``/ @@ -293,9 +330,9 @@ class ClipRegistry { /// shading is over-painted beyond its interval instead of being masked to it; /// `Shading::background` and `Shading::bbox` are likewise not yet honoured. /// Honouring them needs the fill clipped to the gradient band/annulus. -class GradientRegistry { +class GradientRegistry : public DefsRegistry { public: - explicit GradientRegistry(const std::uint32_t page) : m_page{page} {} + using DefsRegistry::DefsRegistry; /// The gradient id to reference via `fill="url(#id)"` for `shading` placed by /// `m` (shading space -> page box). Empty for an unrepresentable shading. @@ -308,12 +345,10 @@ class GradientRegistry { sig << shading.type << ':' << static_cast(&shading) << ':' << m.a << ',' << m.b << ',' << m.c << ',' << m.d << ',' << m.e << ',' << m.f; - const auto [it, inserted] = m_id_by_signature.try_emplace(sig.str()); + const auto [id, inserted] = intern(sig.str(), "g"); if (!inserted) { - return it->second; + return id; } - it->second = "g" + std::to_string(m_page) + "_" + std::to_string(++m_count); - const std::string &id = it->second; const std::array &c = shading.coords; if (shading.type == 2) { @@ -327,12 +362,8 @@ class GradientRegistry { << "\" cy=\"" << c[4] << "\" r=\"" << c[5] << "\" fx=\"" << c[0] << "\" fy=\"" << c[1] << "\" fr=\"" << c[2] << '"'; } - // Only the translation (e, f) is rounded — it lives in page-box units where - // 1/100 px is plenty; the linear part (a..d) keeps full precision so small - // scale/skew factors aren't quantized to zero. - m_defs << " gradientUnits=\"userSpaceOnUse\" gradientTransform=\"matrix(" - << m.a << ',' << m.b << ',' << m.c << ',' << m.d << ',' - << round2(m.e) << ',' << round2(m.f) << ")\">"; + m_defs << " gradientUnits=\"userSpaceOnUse\" gradientTransform=\"" + << svg_matrix(m) << "\">"; for (const pdf::GradientStop &stop : shading.stops) { m_defs << ""; @@ -340,14 +371,6 @@ class GradientRegistry { m_defs << (shading.type == 2 ? "" : ""); return id; } - - [[nodiscard]] std::string defs() const { return m_defs.str(); } - -private: - std::uint32_t m_page{}; - std::uint32_t m_count{0}; - std::unordered_map m_id_by_signature; - std::ostringstream m_defs; }; /// Serialize an `sh` shading flood to an SVG `` covering the page box, @@ -376,12 +399,14 @@ std::string svg_shading_fragment(const std::string &gradient_id, /// every `/XStep`/`/YStep`, and `patternTransform` (pattern space -> page box) /// places the lattice. An uncoloured pattern (`/PaintType 2`) ignores its /// content's own colours and paints in the path's fill colour, so the cache key -/// folds that colour in. Ids are namespaced per page (`pat_`). Only -/// paths and images inside the tile are rendered (nested text/shadings/patterns -/// are skipped — rare). Returns "" for an unrepresentable pattern. -class PatternRegistry { +/// folds that colour in. Each cell is clipped to its `/BBox` so marks outside +/// the cell (or in the gap when a step exceeds the BBox) don't leak into the +/// tile. Ids are namespaced per page (`pat_`). Only paths and images +/// inside the tile are rendered (nested text/shadings/patterns are skipped — +/// rare). Returns "" for an unrepresentable pattern. +class PatternRegistry : public DefsRegistry { public: - explicit PatternRegistry(const std::uint32_t page) : m_page{page} {} + using DefsRegistry::DefsRegistry; std::string register_pattern(const pdf::Pattern &pattern, const util::math::Transform2D &m, @@ -398,12 +423,10 @@ class PatternRegistry { if (uncoloured) { sig << ':' << device_color_to_css(fill_color); } - const auto [it, inserted] = m_id_by_signature.try_emplace(sig.str()); + const auto [id, inserted] = intern(sig.str(), "pat"); if (!inserted) { - return it->second; + return id; } - it->second = - "pat" + std::to_string(m_page) + "_" + std::to_string(++m_count); // Tile content is laid out in pattern space (identity page transform); the // y-flip and placement live in `patternTransform`. @@ -423,24 +446,29 @@ class PatternRegistry { } } - m_defs << "second + m_defs << "" - << std::move(tile).str() << ""; - return it->second; + << "\" patternTransform=\"" << svg_matrix(m) << "\">"; + // Clip each cell to its `/BBox` (ISO 32000-1 8.7.3.1). An overlapping + // lattice (a step smaller than the BBox) can't be expressed as a single SVG + // `` and is not reproduced. + const double bbox_w = pattern.bbox[2] - pattern.bbox[0]; + const double bbox_h = pattern.bbox[3] - pattern.bbox[1]; + if (bbox_w > 0 && bbox_h > 0) { + m_defs << "" << std::move(tile).str() << ""; + } else { + m_defs << std::move(tile).str(); + } + m_defs << ""; + return id; } - - [[nodiscard]] std::string defs() const { return m_defs.str(); } - -private: - std::uint32_t m_page{}; - std::uint32_t m_count{0}; - std::unordered_map m_id_by_signature; - std::ostringstream m_defs; }; /// Deduplicates CSS declarations into atomic, single-property classes. PDF text diff --git a/src/odr/internal/pdf/AGENTS.md b/src/odr/internal/pdf/AGENTS.md index 5c1f21b4..1a042398 100644 --- a/src/odr/internal/pdf/AGENTS.md +++ b/src/odr/internal/pdf/AGENTS.md @@ -590,9 +590,12 @@ stage exists to avoid. every `/XStep`/`/YStep`, with `patternTransform` (the pattern `/Matrix`) placing the lattice; a `/PatternType 1` fill references it as `fill="url(#…)"`. Coloured (`/PaintType 1`) cells carry their own colours; uncoloured - (`/PaintType 2`) cells are painted in the current fill colour. Only paths and - images inside a tile are rendered (nested text/shadings/patterns are skipped — - rare). + (`/PaintType 2`) cells are painted in the current fill colour (resolved + through the Pattern colour space's base, so `[/Pattern /DeviceRGB]` keeps its + tint). Each cell is clipped to its `/BBox`; an overlapping lattice (a step + smaller than the BBox) can't be expressed as one SVG `` and is not + reproduced. Only paths and images inside a tile are rendered (nested + text/shadings/patterns are skipped — rare). - **SVG residue** — where no 1:1 primitive exists; all at generation time, never rasterization: mesh/function shadings (types 1, 4–7) → tessellate into small flat polygons (pdf.js's approach); color spaces diff --git a/src/odr/internal/pdf/pdf_color.cpp b/src/odr/internal/pdf/pdf_color.cpp index 9e7d1967..de4fdb22 100644 --- a/src/odr/internal/pdf/pdf_color.cpp +++ b/src/odr/internal/pdf/pdf_color.cpp @@ -148,6 +148,13 @@ ColorSpaceDef::to_rgb(const std::vector &c) const { return alternate->to_rgb(tint->eval(c)); } case ColorSpaceKind::pattern: + // An uncoloured pattern (`/PaintType 2`) carries its colour in the Pattern + // space's underlying base (e.g. `[/Pattern /DeviceRGB]`); convert through + // it. Without a base there is no device colour to convert. + if (base != nullptr) { + return base->to_rgb(c); + } + return {0, 0, 0}; case ColorSpaceKind::unknown: return {0, 0, 0}; } diff --git a/src/odr/internal/pdf/pdf_document_parser.cpp b/src/odr/internal/pdf/pdf_document_parser.cpp index 58ed83e7..f00b5a26 100644 --- a/src/odr/internal/pdf/pdf_document_parser.cpp +++ b/src/odr/internal/pdf/pdf_document_parser.cpp @@ -860,7 +860,7 @@ Pattern *parse_pattern(State &state, const ObjectReference &reference, if (object.has_stream) { pattern->content = parser.read_decoded_stream(object); } - if (dictionary.has_key("Resources")) { + if (dictionary.has_value("Resources")) { pattern->resources = parse_resources(state, dictionary["Resources"]); } } diff --git a/test/src/internal/pdf/pdf_page_extractor.cpp b/test/src/internal/pdf/pdf_page_extractor.cpp index cf7336b3..35ab9b42 100644 --- a/test/src/internal/pdf/pdf_page_extractor.cpp +++ b/test/src/internal/pdf/pdf_page_extractor.cpp @@ -961,6 +961,37 @@ TEST(PdfPageExtractor, scn_uncoloured_tiling_pattern_carries_colour) { EXPECT_EQ(p.fill_pattern->paint_type, 2); } +// An uncoloured pattern selected through a *named* Pattern colour space with an +// underlying base (`[/Pattern /DeviceRGB]`) resolves its leading components +// through that base — `1 0 0` is red, not black (the base would be ignored if +// the Pattern space's `to_rgb` dropped it). +TEST(PdfPageExtractor, scn_uncoloured_tiling_pattern_colour_through_base) { + Pattern pattern; + pattern.type = Pattern::Type::tiling; + pattern.paint_type = 2; + + std::vector array{Object(Name{"Pattern"}), Object(Name{"DeviceRGB"})}; + ColorSpaceContext ctx; + ctx.resolve = [](const Object &o) { return o; }; + ctx.load_stream = [](const Object &) { return std::string{}; }; + ctx.named = nullptr; + + Resources res; + res.color_space["CS1"] = + parse_color_space(Object(Array(std::move(array))), ctx); + res.pattern["P2"] = &pattern; + + const auto page = + extract_page("/CS1 cs 1 0 0 /P2 scn 0 0 10 10 re f", res, Logger::null()); + ASSERT_EQ(page.size(), 1); + const PathElement &p = std::get(page[0]); + ASSERT_NE(p.fill_pattern, nullptr); + EXPECT_EQ(p.fill_color.space, ColorSpace::device_rgb); + EXPECT_DOUBLE_EQ(p.fill_color.rgb[0], 1.0); + EXPECT_DOUBLE_EQ(p.fill_color.rgb[1], 0.0); + EXPECT_DOUBLE_EQ(p.fill_color.rgb[2], 0.0); +} + // The `sh` operator floods the current clip with a named `/Shading`, emitting a // `ShadingElement` placed by the CTM. TEST(PdfPageExtractor, sh_emits_shading_element) {