Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 91 additions & 12 deletions src/odr/internal/html/pdf_file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,12 +129,12 @@ std::string svg_path_d(const std::vector<pdf::Subpath> &subpaths,
/// stroke carries width (CTM-scaled in user space), caps, joins, miter limit
/// and the dash pattern. A zero stroke width renders as a thin hairline.
/// `clip_id`, when non-empty, references a `<clipPath>` installed via
/// `clip-path`. `gradient_id`, when non-empty, fills the path with that
/// gradient (a shading pattern) instead of `fill_color`.
/// `clip-path`. `fill_url_id`, when non-empty, fills the path with that paint
/// server (a shading gradient or a tiling `<pattern>`) instead of `fill_color`.
std::string svg_path_fragment(const pdf::PathElement &path,
const util::math::Transform2D &to_box,
const std::string &clip_id,
const std::string &gradient_id) {
const std::string &fill_url_id) {
if ((!path.fill && !path.stroke) || path.subpaths.empty()) {
return {};
}
Expand All @@ -145,8 +145,8 @@ std::string svg_path_fragment(const pdf::PathElement &path,
}

if (path.fill) {
if (!gradient_id.empty()) {
f << " fill=\"url(#" << gradient_id << ")\"";
if (!fill_url_id.empty()) {
f << " fill=\"url(#" << fill_url_id << ")\"";
} else {
f << " fill=\"" << device_color_to_css(path.fill_color) << '"';
}
Expand Down Expand Up @@ -370,6 +370,79 @@ std::string svg_shading_fragment(const std::string &gradient_id,
return std::move(f).str();
}

/// Registers a page's tiling patterns (`/PatternType 1`) as SVG `<pattern>`
/// defs. The pattern's content stream is run as a mini page (`extract_page`)
/// into tile fragments laid out in pattern space; the `<pattern>` repeats them
/// every `/XStep`/`/YStep`, and `patternTransform` (pattern space -> page box)
/// places the lattice. An uncoloured pattern (`/PaintType 2`) ignores its
/// content's own colours and paints in the path's fill colour, so the cache key
/// folds that colour in. Ids are namespaced per page (`pat<page>_<n>`). Only
/// paths and images inside the tile are rendered (nested text/shadings/patterns
/// are skipped — rare). Returns "" for an unrepresentable pattern.
class PatternRegistry {
public:
explicit PatternRegistry(const std::uint32_t page) : m_page{page} {}

std::string register_pattern(const pdf::Pattern &pattern,
const util::math::Transform2D &m,
const pdf::GraphicsState::Color &fill_color,
const Logger &logger) {
if (pattern.resources == nullptr || pattern.content.empty() ||
pattern.x_step == 0 || pattern.y_step == 0) {
return {};
}
const bool uncoloured = pattern.paint_type == 2;
std::ostringstream sig;
sig << static_cast<const void *>(&pattern) << ':' << m.a << ',' << m.b
<< ',' << m.c << ',' << m.d << ',' << m.e << ',' << m.f;
if (uncoloured) {
sig << ':' << device_color_to_css(fill_color);
}
const auto [it, inserted] = m_id_by_signature.try_emplace(sig.str());
if (!inserted) {
return it->second;
}
it->second =
"pat" + std::to_string(m_page) + "_" + std::to_string(++m_count);

// Tile content is laid out in pattern space (identity page transform); the
// y-flip and placement live in `patternTransform`.
const util::math::Transform2D identity;
std::ostringstream tile;
for (const pdf::PageElement &element :
pdf::extract_page(pattern.content, *pattern.resources, logger)) {
if (const auto *path = std::get_if<pdf::PathElement>(&element)) {
pdf::PathElement painted = *path;
if (uncoloured) {
painted.fill_color = fill_color;
painted.stroke_color = fill_color;
}
tile << svg_path_fragment(painted, identity, "", "");
} else if (const auto *image = std::get_if<pdf::ImageElement>(&element)) {
tile << svg_image_fragment(*image, identity, "");
}
}

m_defs << "<pattern id=\"" << it->second
<< "\" patternUnits=\"userSpaceOnUse\" x=\""
<< round2(pattern.bbox[0]) << "\" y=\"" << round2(pattern.bbox[1])
<< "\" width=\"" << round2(std::abs(pattern.x_step))
<< "\" height=\"" << round2(std::abs(pattern.y_step))
<< "\" patternTransform=\"matrix(" << m.a << ',' << m.b << ',' << m.c
<< ',' << m.d << ',' << round2(m.e) << ',' << round2(m.f) << ")\">"
<< std::move(tile).str() << "</pattern>";
Comment thread
andiwand marked this conversation as resolved.
return it->second;
}

[[nodiscard]] std::string defs() const { return m_defs.str(); }

private:
std::uint32_t m_page{};
std::uint32_t m_count{0};
std::unordered_map<std::string, std::string> m_id_by_signature;
std::ostringstream m_defs;
};

/// Deduplicates CSS declarations into atomic, single-property classes. PDF text
/// emits one absolutely-positioned span per glyph run, and the same font sizes,
/// offsets and spacings recur across the (potentially millions of) spans.
Expand Down Expand Up @@ -693,23 +766,28 @@ class HtmlServiceImpl final : public HtmlService {

ClipRegistry clips(static_cast<std::uint32_t>(pages_out.size()));
GradientRegistry gradients(static_cast<std::uint32_t>(pages_out.size()));
PatternRegistry patterns(static_cast<std::uint32_t>(pages_out.size()));

for (const pdf::PageElement &element :
pdf::extract_page(stream, *page->resources, *m_logger)) {
// A painted path: serialize its subpaths to an SVG `<path>` fragment in
// the page viewBox (fill and/or stroke), under any active clip. A
// shading-pattern fill is painted through a gradient instead of a
// colour.
// shading- or tiling-pattern fill is painted through a paint server
// (gradient/`<pattern>`) instead of a colour.
if (const auto *path = std::get_if<pdf::PathElement>(&element);
path != nullptr) {
const std::string clip_id = clips.register_clip(path->clip, to_box);
std::string gradient_id;
std::string fill_url_id;
if (path->fill_shading != nullptr) {
gradient_id = gradients.register_gradient(
fill_url_id = gradients.register_gradient(
*path->fill_shading, path->shading_transform * to_box);
} else if (path->fill_pattern != nullptr) {
fill_url_id = patterns.register_pattern(
*path->fill_pattern, path->pattern_transform * to_box,
path->fill_color, *m_logger);
}
std::string fragment =
svg_path_fragment(*path, to_box, clip_id, gradient_id);
svg_path_fragment(*path, to_box, clip_id, fill_url_id);
if (!fragment.empty()) {
page_out.items.push_back(PathOut{std::move(fragment)});
}
Expand Down Expand Up @@ -959,8 +1037,9 @@ class HtmlServiceImpl final : public HtmlService {
}
}

// Clip-path and gradient defs share the page's hidden `<svg><defs>`.
page_out.clip_defs = clips.defs() + gradients.defs();
// Clip-path, gradient and pattern defs share the page's hidden
// `<svg><defs>`.
page_out.clip_defs = clips.defs() + gradients.defs() + patterns.defs();
}

// Post-pass: every page has been scanned, so the per-font used-scalar sets
Expand Down
11 changes: 9 additions & 2 deletions src/odr/internal/pdf/AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -584,8 +584,15 @@ stage exists to avoid.
parsed onto `Shading` but **not yet honoured** by the renderer (deferred): it
always uses SVG's `pad` spread, so a non-extended shading is over-painted past
its interval rather than masked to it (honouring it needs the fill clipped to
the gradient band/annulus). Mesh/function shadings (types 1, 4–7) and tiling
patterns (`/PatternType 1`) are still future stages.
the gradient band/annulus).
- **Tiling patterns** (`/PatternType 1`): the pattern's content stream is run as
a mini page (`extract_page`) and emitted as an SVG `<pattern>` tile, repeated
every `/XStep`/`/YStep`, with `patternTransform` (the pattern `/Matrix`)
placing the lattice; a `/PatternType 1` fill references it as `fill="url(#…)"`.
Coloured (`/PaintType 1`) cells carry their own colours; uncoloured
(`/PaintType 2`) cells are painted in the current fill colour. Only paths and
images inside a tile are rendered (nested text/shadings/patterns are skipped —
rare).
- **SVG residue** — where no 1:1 primitive exists; all at generation time, never
rasterization: mesh/function shadings (types 1, 4–7) → tessellate into small
flat polygons (pdf.js's approach); color spaces
Expand Down
13 changes: 13 additions & 0 deletions src/odr/internal/pdf/pdf_document_element.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,19 @@ struct Pattern final : Element {
/// Shading pattern (`/PatternType 2`): the shading painted through the path,
/// pre-resolved (its tint function sampled into stops). Null otherwise.
std::shared_ptr<Shading> shading;

/// Tiling pattern (`/PatternType 1`, ISO 32000-1 8.7.3.1): a content-stream
/// cell tiled across the filled region. `/PaintType` 1 (coloured) carries its
/// own colours; `/PaintType` 2 (uncoloured) is painted entirely in the
/// current fill colour at use time. The cell is `/BBox` in pattern space,
/// repeated every `/XStep`/`/YStep`. Fields are zero/empty for a non-tiling
/// pattern.
std::int32_t paint_type{0}; ///< `/PaintType`: 1 coloured, 2 uncoloured
std::array<double, 4> bbox{}; ///< `/BBox` `[x0 y0 x1 y1]`, pattern space
double x_step{0}; ///< `/XStep`, pattern space
double y_step{0}; ///< `/YStep`, pattern space
Resources *resources{nullptr}; ///< the tile's own `/Resources`
std::string content; ///< decoded tile content stream
};

/// A non-owning view over a string of PDF character codes, splitting it into
Expand Down
53 changes: 51 additions & 2 deletions src/odr/internal/pdf/pdf_document_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,14 @@ struct State {
m_fonts[reference] = font;
}

[[nodiscard]] Pattern *find_pattern(const ObjectReference &reference) const {
const auto it = m_patterns.find(reference);
return it != m_patterns.end() ? it->second : nullptr;
}
void cache_pattern(const ObjectReference &reference, Pattern *pattern) {
m_patterns[reference] = pattern;
}

private:
DocumentParser *m_parser{};
Document *m_document{};
Expand All @@ -74,6 +82,12 @@ struct State {
/// re-inline the (base64) font program once per page — a multi-page document
/// reusing one font would balloon to gigabytes.
std::map<ObjectReference, Font *> m_fonts;

/// Memoized Pattern elements by reference, mirroring `m_x_objects`. A tiling
/// pattern's own `/Resources` may name patterns (including, in a malformed
/// file, itself); registering the element before its resources are parsed
/// breaks the cycle and shares a pattern reused across pages.
std::map<ObjectReference, Pattern *> m_patterns;
};

/// Normalize /Rotate to {0, 90, 180, 270}: the spec requires a multiple of 90,
Expand Down Expand Up @@ -785,14 +799,25 @@ std::shared_ptr<Shading> parse_shading_resource(State &state,
}

/// Parse a `/Pattern` entry. A shading pattern (`/PatternType 2`) resolves its
/// `/Shading`; a tiling pattern (`/PatternType 1`) is recognized here and its
/// content rendered in a later stage. `/Matrix` is taken either way.
/// `/Shading`; a tiling pattern (`/PatternType 1`) is a stream whose content,
/// `/Resources`, `/BBox`, `/XStep`/`/YStep` and `/PaintType` describe the tile.
/// `/Matrix` is taken either way.
Pattern *parse_pattern(State &state, const ObjectReference &reference,
const Resources *resources) {
DocumentParser &parser = state.parser();
Document &document = state.document();

// Shared patterns are parsed once; a cycle through a tiling pattern's own
// `/Resources` resolves to the existing element instead of recursing.
if (Pattern *cached = state.find_pattern(reference); cached != nullptr) {
return cached;
}

auto *pattern = document.create_element<Pattern>();
// Register before parsing `/Resources` so a cycle back here resolves to this
// element rather than recursing forever.
state.cache_pattern(reference, pattern);

IndirectObject object = parser.read_object(reference);
if (!object.object.is_dictionary()) {
return pattern;
Expand All @@ -814,6 +839,30 @@ Pattern *parse_pattern(State &state, const ObjectReference &reference,
parse_shading_resource(state, dictionary.get("Shading"), resources);
} else if (pattern_type == 1) {
pattern->type = Pattern::Type::tiling;
pattern->paint_type = static_cast<std::int32_t>(
parser.resolve_object_copy(dictionary.get("PaintType"))
.as_integer_opt()
.value_or(1));
pattern->x_step = parser.resolve_object_copy(dictionary.get("XStep"))
.as_real_opt()
.value_or(0);
pattern->y_step = parser.resolve_object_copy(dictionary.get("YStep"))
.as_real_opt()
.value_or(0);
if (dictionary.has_value("BBox")) {
const Array box =
parser.resolve_object_copy(dictionary["BBox"]).as_array();
if (box.size() == 4) {
pattern->bbox = {box[0].as_real(), box[1].as_real(), box[2].as_real(),
box[3].as_real()};
}
}
if (object.has_stream) {
pattern->content = parser.read_decoded_stream(object);
}
if (dictionary.has_key("Resources")) {
pattern->resources = parse_resources(state, dictionary["Resources"]);
}
}
return pattern;
}
Expand Down
8 changes: 8 additions & 0 deletions src/odr/internal/pdf/pdf_page_element.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ namespace odr::internal::pdf {

struct Font;
struct Shading;
struct Pattern;

/// One show-text operation laid out in user space. The transform places the
/// text origin and orientation; the font size is kept separate so the renderer
Expand Down Expand Up @@ -90,6 +91,13 @@ struct PathElement {
/// `Resources`, which outlives the element.
const Shading *fill_shading{nullptr};
util::math::Transform2D shading_transform;
/// When the fill is a tiling pattern (`scn` naming a `/PatternType 1`
/// pattern), the resolved pattern whose content cell tiles the path, with
/// `pattern_transform` mapping pattern space to user space (the pattern
/// `/Matrix`). An uncoloured pattern (`/PaintType 2`) is painted in
/// `fill_color`. Null for a non-tiling fill. Owned by `Resources`.
const Pattern *fill_pattern{nullptr};
util::math::Transform2D pattern_transform;
/// Stroke parameters. `line_width` and the dash lengths are in the path's
/// user space (the CTM scale is already folded in, so they live in the same
/// space as the geometry). A `line_width` of 0 means a device-thin line.
Expand Down
14 changes: 10 additions & 4 deletions src/odr/internal/pdf/pdf_page_extractor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,9 @@ void set_color_space(GraphicsState::Color &color, const std::string &name,
/// colour space, interpret the components as a device colour by their count
/// (ISO 32000-1 8.6.8). A trailing name operand selects a `/Pattern`: its name
/// is recorded on `color.pattern` and resolved against `Resources::pattern` at
/// paint time (a shading pattern then fills the path through its gradient).
/// paint time (a shading pattern fills through its gradient). An uncoloured
/// tiling pattern carries leading components — the colour in the pattern colour
/// space's underlying space — which are still resolved into the fill colour.
void set_color(GraphicsState::Color &color, const GraphicsOperator &op) {
std::vector<double> components;
std::string pattern_name;
Expand All @@ -354,9 +356,10 @@ void set_color(GraphicsState::Color &color, const GraphicsOperator &op) {
}
}
color.pattern = pattern_name;
if (!pattern_name.empty()) {
// A pattern colour carries no device components to convert; the pattern is
// resolved at paint time. Leave any underlying colour as-is.
if (!pattern_name.empty() && components.empty()) {
// A coloured pattern (or shading pattern) carries no components; the
// pattern is resolved at paint time and any underlying colour is left
// as-is.
return;
Comment thread
andiwand marked this conversation as resolved.
}
if (color.def != nullptr) {
Expand Down Expand Up @@ -437,6 +440,9 @@ void paint_path(std::vector<PageElement> &out, const Resources &resources,
pattern->shading != nullptr) {
element.fill_shading = pattern->shading.get();
element.shading_transform = pattern->matrix;
} else if (pattern->type == Pattern::Type::tiling) {
element.fill_pattern = pattern;
element.pattern_transform = pattern->matrix;
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion test/data/reference-output/odr-private
39 changes: 39 additions & 0 deletions test/src/internal/pdf/pdf_page_extractor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -922,6 +922,45 @@ TEST(PdfPageExtractor, scn_unknown_pattern_has_no_shading) {
EXPECT_EQ(std::get<PathElement>(page[0]).fill_shading, nullptr);
}

// `scn` naming a `/PatternType 1` tiling pattern fills the path with the
// pattern; `fill_pattern` is resolved and the pattern `/Matrix` carried.
TEST(PdfPageExtractor, scn_tiling_pattern_fills_path) {
Pattern pattern;
pattern.type = Pattern::Type::tiling;
pattern.matrix = Transform2D::translation(3, 4);
Resources res;
res.pattern["P1"] = &pattern;

const auto page =
extract_page("/Pattern cs /P1 scn 0 0 10 10 re f", res, Logger::null());
ASSERT_EQ(page.size(), 1);
const PathElement &p = std::get<PathElement>(page[0]);
ASSERT_NE(p.fill_pattern, nullptr);
EXPECT_EQ(p.fill_pattern->type, Pattern::Type::tiling);
EXPECT_EQ(p.fill_shading, nullptr);
EXPECT_TRUE(p.fill);
EXPECT_DOUBLE_EQ(p.pattern_transform.e, 3);
EXPECT_DOUBLE_EQ(p.pattern_transform.f, 4);
}

// An uncoloured tiling pattern (`/PaintType 2`) records the current fill colour
// alongside the pattern, so the renderer can paint the cell in it.
TEST(PdfPageExtractor, scn_uncoloured_tiling_pattern_carries_colour) {
Pattern pattern;
pattern.type = Pattern::Type::tiling;
pattern.paint_type = 2;
Resources res;
res.pattern["P2"] = &pattern;

// The colour precedes the pattern selection in the Pattern colour space.
const auto page = extract_page("/Pattern cs 1 0 0 /P2 scn 0 0 10 10 re f",
res, Logger::null());
ASSERT_EQ(page.size(), 1);
const PathElement &p = std::get<PathElement>(page[0]);
ASSERT_NE(p.fill_pattern, nullptr);
EXPECT_EQ(p.fill_pattern->paint_type, 2);
}

// The `sh` operator floods the current clip with a named `/Shading`, emitting a
// `ShadingElement` placed by the CTM.
TEST(PdfPageExtractor, sh_emits_shading_element) {
Expand Down
Loading