Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 89 additions & 61 deletions src/odr/internal/html/pdf_file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,18 @@ namespace {
/// the extra digits add up across a page full of path data.
double round2(const double v) { return std::round(v * 100.0) / 100.0; }

/// Serialize a transform as an SVG `matrix(...)`. Only the translation (e, f)
/// is rounded — it lives in page-box units where 1/100 px is plenty; the linear
/// part (a..d) keeps full precision so small scale/skew factors aren't
/// quantized to zero. Used for `transform`, `gradientTransform` and
/// `patternTransform`.
std::string svg_matrix(const util::math::Transform2D &m) {
std::ostringstream f;
f << "matrix(" << m.a << ',' << m.b << ',' << m.c << ',' << m.d << ','
<< round2(m.e) << ',' << round2(m.f) << ')';
return std::move(f).str();
}

/// Convert a PDF device color to a CSS `rgb(...)` string. Non-device color
/// spaces (Separation/ICCBased/… — stage 4.4) and the unknown space fall back
/// to black, the PDF initial color.
Expand Down Expand Up @@ -222,25 +234,60 @@ std::string svg_image_fragment(const pdf::ImageElement &image,
if (!clip_id.empty()) {
f << "<g clip-path=\"url(#" << clip_id << ")\">";
}
f << R"(<image width="1" height="1" preserveAspectRatio="none" transform="matrix()"
<< m.a << ',' << m.b << ',' << m.c << ',' << m.d << ',' << round2(m.e)
<< ',' << round2(m.f) << ")\"";
f << R"(<image width="1" height="1" preserveAspectRatio="none" transform=")"
<< svg_matrix(m) << '"';
f << " href=\"" << file_to_url(image.data, image.mime) << "\"/>";
if (!clip_id.empty()) {
f << "</g>";
}
return std::move(f).str();
}

/// Shared bookkeeping for the per-page `<defs>` registries below (clips,
/// gradients, tiling patterns): a signature->id cache that deduplicates
/// repeated definitions, a per-page monotonic id counter, and the accumulated
/// `<defs>` markup (emitted once into the page's hidden `<svg>`). Ids are
/// namespaced per page as `<prefix><page>_<n>`.
class DefsRegistry {
public:
explicit DefsRegistry(const std::uint32_t page) : m_page{page} {}

[[nodiscard]] std::string defs() const { return m_defs.str(); }

protected:
/// The id for `signature`, minting `<prefix><page>_<n>` the first time it is
/// seen. `inserted` is true only on that first sight — when the caller still
/// needs to emit the definition into `m_defs`.
struct Entry {
std::string id;
bool inserted;
};
Entry intern(const std::string &signature, const char *prefix) {
const auto [it, inserted] = m_id_by_signature.try_emplace(signature);
if (inserted) {
it->second = std::string(prefix) + std::to_string(m_page) + "_" +
std::to_string(++m_count);
}
return {it->second, inserted};
}

std::ostringstream m_defs;

private:
std::uint32_t m_page;
std::uint32_t m_count{0};
std::unordered_map<std::string, std::string> m_id_by_signature;
};

/// Registers a page's clip regions as nested `<clipPath>` defs, deduplicating
/// shared prefixes. PDF's current clip is the *intersection* of an ordered list
/// of regions; SVG expresses intersection by chaining `clip-path` from one
/// `<clipPath>` to the next, so region i's clipPath references region i-1's and
/// the painted element references the last. Ids are namespaced per page
/// (`c<page>_<n>`); `defs()` is emitted once in a hidden `<svg>` for the page.
class ClipRegistry {
/// (`c<page>_<n>`).
class ClipRegistry : public DefsRegistry {
public:
explicit ClipRegistry(std::uint32_t page) : m_page{page} {}
using DefsRegistry::DefsRegistry;

/// The clipPath id to reference on a path painted under `clip`, registering
/// any not-yet-seen regions. Empty when `clip` is empty (unclipped).
Expand All @@ -253,11 +300,9 @@ class ClipRegistry {
signature += region.even_odd ? 'E' : 'N';
signature += d;
signature += ';';
const auto [it, inserted] = m_id_by_signature.try_emplace(signature);
const auto [id, inserted] = intern(signature, "c");
if (inserted) {
it->second =
"c" + std::to_string(m_page) + "_" + std::to_string(++m_count);
m_defs << "<clipPath id=\"" << it->second << '"';
m_defs << "<clipPath id=\"" << id << '"';
if (!parent.empty()) {
m_defs << " clip-path=\"url(#" << parent << ")\"";
}
Expand All @@ -267,18 +312,10 @@ class ClipRegistry {
}
m_defs << "/></clipPath>";
}
parent = it->second;
parent = id;
}
return parent;
}

[[nodiscard]] std::string defs() const { return m_defs.str(); }

private:
std::uint32_t m_page;
std::uint32_t m_count{0};
std::unordered_map<std::string, std::string> m_id_by_signature;
std::ostringstream m_defs;
};

/// Registers a page's shadings (axial/radial) as `<linearGradient>`/
Expand All @@ -293,9 +330,9 @@ class ClipRegistry {
/// shading is over-painted beyond its interval instead of being masked to it;
/// `Shading::background` and `Shading::bbox` are likewise not yet honoured.
/// Honouring them needs the fill clipped to the gradient band/annulus.
class GradientRegistry {
class GradientRegistry : public DefsRegistry {
public:
explicit GradientRegistry(const std::uint32_t page) : m_page{page} {}
using DefsRegistry::DefsRegistry;

/// The gradient id to reference via `fill="url(#id)"` for `shading` placed by
/// `m` (shading space -> page box). Empty for an unrepresentable shading.
Expand All @@ -308,12 +345,10 @@ class GradientRegistry {
sig << shading.type << ':' << static_cast<const void *>(&shading) << ':'
<< m.a << ',' << m.b << ',' << m.c << ',' << m.d << ',' << m.e << ','
<< m.f;
const auto [it, inserted] = m_id_by_signature.try_emplace(sig.str());
const auto [id, inserted] = intern(sig.str(), "g");
if (!inserted) {
return it->second;
return id;
}
it->second = "g" + std::to_string(m_page) + "_" + std::to_string(++m_count);
const std::string &id = it->second;

const std::array<double, 6> &c = shading.coords;
if (shading.type == 2) {
Expand All @@ -327,27 +362,15 @@ class GradientRegistry {
<< "\" cy=\"" << c[4] << "\" r=\"" << c[5] << "\" fx=\"" << c[0]
<< "\" fy=\"" << c[1] << "\" fr=\"" << c[2] << '"';
}
// Only the translation (e, f) is rounded — it lives in page-box units where
// 1/100 px is plenty; the linear part (a..d) keeps full precision so small
// scale/skew factors aren't quantized to zero.
m_defs << " gradientUnits=\"userSpaceOnUse\" gradientTransform=\"matrix("
<< m.a << ',' << m.b << ',' << m.c << ',' << m.d << ','
<< round2(m.e) << ',' << round2(m.f) << ")\">";
m_defs << " gradientUnits=\"userSpaceOnUse\" gradientTransform=\""
<< svg_matrix(m) << "\">";
for (const pdf::GradientStop &stop : shading.stops) {
m_defs << "<stop offset=\"" << round2(stop.offset) << "\" stop-color=\""
<< rgb_to_css(stop.rgb) << "\"/>";
}
m_defs << (shading.type == 2 ? "</linearGradient>" : "</radialGradient>");
return id;
}

[[nodiscard]] std::string defs() const { return m_defs.str(); }

private:
std::uint32_t m_page{};
std::uint32_t m_count{0};
std::unordered_map<std::string, std::string> m_id_by_signature;
std::ostringstream m_defs;
};

/// Serialize an `sh` shading flood to an SVG `<rect>` covering the page box,
Expand Down Expand Up @@ -376,12 +399,14 @@ std::string svg_shading_fragment(const std::string &gradient_id,
/// every `/XStep`/`/YStep`, and `patternTransform` (pattern space -> page box)
/// places the lattice. An uncoloured pattern (`/PaintType 2`) ignores its
/// content's own colours and paints in the path's fill colour, so the cache key
/// folds that colour in. Ids are namespaced per page (`pat<page>_<n>`). Only
/// paths and images inside the tile are rendered (nested text/shadings/patterns
/// are skipped — rare). Returns "" for an unrepresentable pattern.
class PatternRegistry {
/// folds that colour in. Each cell is clipped to its `/BBox` so marks outside
/// the cell (or in the gap when a step exceeds the BBox) don't leak into the
/// tile. Ids are namespaced per page (`pat<page>_<n>`). Only paths and images
/// inside the tile are rendered (nested text/shadings/patterns are skipped —
/// rare). Returns "" for an unrepresentable pattern.
class PatternRegistry : public DefsRegistry {
public:
explicit PatternRegistry(const std::uint32_t page) : m_page{page} {}
using DefsRegistry::DefsRegistry;

std::string register_pattern(const pdf::Pattern &pattern,
const util::math::Transform2D &m,
Expand All @@ -398,12 +423,10 @@ class PatternRegistry {
if (uncoloured) {
sig << ':' << device_color_to_css(fill_color);
}
const auto [it, inserted] = m_id_by_signature.try_emplace(sig.str());
const auto [id, inserted] = intern(sig.str(), "pat");
if (!inserted) {
return it->second;
return id;
}
it->second =
"pat" + std::to_string(m_page) + "_" + std::to_string(++m_count);

// Tile content is laid out in pattern space (identity page transform); the
// y-flip and placement live in `patternTransform`.
Expand All @@ -423,24 +446,29 @@ class PatternRegistry {
}
}

m_defs << "<pattern id=\"" << it->second
m_defs << "<pattern id=\"" << id
<< "\" patternUnits=\"userSpaceOnUse\" x=\""
<< round2(pattern.bbox[0]) << "\" y=\"" << round2(pattern.bbox[1])
<< "\" width=\"" << round2(std::abs(pattern.x_step))
<< "\" height=\"" << round2(std::abs(pattern.y_step))
<< "\" patternTransform=\"matrix(" << m.a << ',' << m.b << ',' << m.c
<< ',' << m.d << ',' << round2(m.e) << ',' << round2(m.f) << ")\">"
<< std::move(tile).str() << "</pattern>";
return it->second;
<< "\" patternTransform=\"" << svg_matrix(m) << "\">";
// Clip each cell to its `/BBox` (ISO 32000-1 8.7.3.1). An overlapping
// lattice (a step smaller than the BBox) can't be expressed as a single SVG
// `<pattern>` and is not reproduced.
const double bbox_w = pattern.bbox[2] - pattern.bbox[0];
const double bbox_h = pattern.bbox[3] - pattern.bbox[1];
if (bbox_w > 0 && bbox_h > 0) {
m_defs << "<clipPath id=\"" << id << "c\"><rect x=\""
<< round2(pattern.bbox[0]) << "\" y=\"" << round2(pattern.bbox[1])
<< "\" width=\"" << round2(bbox_w) << "\" height=\""
<< round2(bbox_h) << "\"/></clipPath><g clip-path=\"url(#" << id
<< "c)\">" << std::move(tile).str() << "</g>";
} else {
m_defs << std::move(tile).str();
}
m_defs << "</pattern>";
return id;
}

[[nodiscard]] std::string defs() const { return m_defs.str(); }

private:
std::uint32_t m_page{};
std::uint32_t m_count{0};
std::unordered_map<std::string, std::string> m_id_by_signature;
std::ostringstream m_defs;
};

/// Deduplicates CSS declarations into atomic, single-property classes. PDF text
Expand Down
9 changes: 6 additions & 3 deletions src/odr/internal/pdf/AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -590,9 +590,12 @@ stage exists to avoid.
every `/XStep`/`/YStep`, with `patternTransform` (the pattern `/Matrix`)
placing the lattice; a `/PatternType 1` fill references it as `fill="url(#…)"`.
Coloured (`/PaintType 1`) cells carry their own colours; uncoloured
(`/PaintType 2`) cells are painted in the current fill colour. Only paths and
images inside a tile are rendered (nested text/shadings/patterns are skipped —
rare).
(`/PaintType 2`) cells are painted in the current fill colour (resolved
through the Pattern colour space's base, so `[/Pattern /DeviceRGB]` keeps its
tint). Each cell is clipped to its `/BBox`; an overlapping lattice (a step
smaller than the BBox) can't be expressed as one SVG `<pattern>` and is not
reproduced. Only paths and images inside a tile are rendered (nested
text/shadings/patterns are skipped — rare).
- **SVG residue** — where no 1:1 primitive exists; all at generation time, never
rasterization: mesh/function shadings (types 1, 4–7) → tessellate into small
flat polygons (pdf.js's approach); color spaces
Expand Down
7 changes: 7 additions & 0 deletions src/odr/internal/pdf/pdf_color.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,13 @@ ColorSpaceDef::to_rgb(const std::vector<double> &c) const {
return alternate->to_rgb(tint->eval(c));
}
case ColorSpaceKind::pattern:
// An uncoloured pattern (`/PaintType 2`) carries its colour in the Pattern
// space's underlying base (e.g. `[/Pattern /DeviceRGB]`); convert through
// it. Without a base there is no device colour to convert.
if (base != nullptr) {
return base->to_rgb(c);
}
return {0, 0, 0};
case ColorSpaceKind::unknown:
return {0, 0, 0};
}
Expand Down
2 changes: 1 addition & 1 deletion src/odr/internal/pdf/pdf_document_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -860,7 +860,7 @@ Pattern *parse_pattern(State &state, const ObjectReference &reference,
if (object.has_stream) {
pattern->content = parser.read_decoded_stream(object);
}
if (dictionary.has_key("Resources")) {
if (dictionary.has_value("Resources")) {
pattern->resources = parse_resources(state, dictionary["Resources"]);
}
}
Expand Down
31 changes: 31 additions & 0 deletions test/src/internal/pdf/pdf_page_extractor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -961,6 +961,37 @@ TEST(PdfPageExtractor, scn_uncoloured_tiling_pattern_carries_colour) {
EXPECT_EQ(p.fill_pattern->paint_type, 2);
}

// An uncoloured pattern selected through a *named* Pattern colour space with an
// underlying base (`[/Pattern /DeviceRGB]`) resolves its leading components
// through that base — `1 0 0` is red, not black (the base would be ignored if
// the Pattern space's `to_rgb` dropped it).
TEST(PdfPageExtractor, scn_uncoloured_tiling_pattern_colour_through_base) {
Pattern pattern;
pattern.type = Pattern::Type::tiling;
pattern.paint_type = 2;

std::vector<Object> array{Object(Name{"Pattern"}), Object(Name{"DeviceRGB"})};
ColorSpaceContext ctx;
ctx.resolve = [](const Object &o) { return o; };
ctx.load_stream = [](const Object &) { return std::string{}; };
ctx.named = nullptr;

Resources res;
res.color_space["CS1"] =
parse_color_space(Object(Array(std::move(array))), ctx);
res.pattern["P2"] = &pattern;

const auto page =
extract_page("/CS1 cs 1 0 0 /P2 scn 0 0 10 10 re f", res, Logger::null());
ASSERT_EQ(page.size(), 1);
const PathElement &p = std::get<PathElement>(page[0]);
ASSERT_NE(p.fill_pattern, nullptr);
EXPECT_EQ(p.fill_color.space, ColorSpace::device_rgb);
EXPECT_DOUBLE_EQ(p.fill_color.rgb[0], 1.0);
EXPECT_DOUBLE_EQ(p.fill_color.rgb[1], 0.0);
EXPECT_DOUBLE_EQ(p.fill_color.rgb[2], 0.0);
}

// The `sh` operator floods the current clip with a named `/Shading`, emitting a
// `ShadingElement` placed by the CTM.
TEST(PdfPageExtractor, sh_emits_shading_element) {
Expand Down
Loading