Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ set(ODR_SOURCE_FILES
"src/odr/internal/pdf/pdf_object.cpp"
"src/odr/internal/pdf/pdf_object_parser.cpp"
"src/odr/internal/pdf/pdf_page_extractor.cpp"
"src/odr/internal/pdf/pdf_shading.cpp"

"src/odr/internal/font/cff_builder.cpp"
"src/odr/internal/font/cff_font.cpp"
Expand Down
160 changes: 149 additions & 11 deletions src/odr/internal/html/pdf_file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,18 @@ std::string device_color_to_css(const pdf::GraphicsState::Color &color) {
return std::move(s).str();
}

/// Convert an sRGB triple in [0, 1] (a shading colour stop) to a CSS
/// `rgb(...)`.
std::string rgb_to_css(const std::array<double, 3> &rgb) {
const auto to255 = [](const double v) {
return static_cast<int>(std::lround(std::clamp(v, 0.0, 1.0) * 255.0));
};
std::ostringstream s;
s << "rgb(" << to255(rgb[0]) << ',' << to255(rgb[1]) << ',' << to255(rgb[2])
<< ')';
return std::move(s).str();
}

/// Build an SVG `d` attribute from a path's subpaths, each point mapped through
/// `to_box` (PDF user space -> the page box, y-down). Lines become `L`, cubic
/// Béziers `C`, and an explicitly closed subpath ends with `Z`.
Expand Down Expand Up @@ -117,10 +129,12 @@ std::string svg_path_d(const std::vector<pdf::Subpath> &subpaths,
/// stroke carries width (CTM-scaled in user space), caps, joins, miter limit
/// and the dash pattern. A zero stroke width renders as a thin hairline.
/// `clip_id`, when non-empty, references a `<clipPath>` installed via
/// `clip-path`.
/// `clip-path`. `gradient_id`, when non-empty, fills the path with that
/// gradient (a shading pattern) instead of `fill_color`.
std::string svg_path_fragment(const pdf::PathElement &path,
const util::math::Transform2D &to_box,
const std::string &clip_id) {
const std::string &clip_id,
const std::string &gradient_id) {
if ((!path.fill && !path.stroke) || path.subpaths.empty()) {
return {};
}
Expand All @@ -131,7 +145,11 @@ std::string svg_path_fragment(const pdf::PathElement &path,
}

if (path.fill) {
f << " fill=\"" << device_color_to_css(path.fill_color) << '"';
if (!gradient_id.empty()) {
f << " fill=\"url(#" << gradient_id << ")\"";
} else {
f << " fill=\"" << device_color_to_css(path.fill_color) << '"';
}
if (path.even_odd) {
f << " fill-rule=\"evenodd\"";
}
Expand Down Expand Up @@ -263,6 +281,95 @@ class ClipRegistry {
std::ostringstream m_defs;
};

/// Registers a page's shadings (axial/radial) as `<linearGradient>`/
/// `<radialGradient>` defs, deduplicating by shading and placement. The
/// shading's pre-sampled colour stops become `<stop>`s; `gradientTransform`
/// (shading space -> page box) places the gradient in the page's user space, so
/// referencing elements use `gradientUnits="userSpaceOnUse"`. Ids are
/// namespaced per page (`g<page>_<n>`).
///
/// DEFERRED (out of scope for this stage): PDF `/Extend` is approximated by
/// SVG's default `pad` spread (the end stops extend outward), so a non-extended
/// shading is over-painted beyond its interval instead of being masked to it;
/// `Shading::background` and `Shading::bbox` are likewise not yet honoured.
/// Honouring them needs the fill clipped to the gradient band/annulus.
class GradientRegistry {
public:
explicit GradientRegistry(const std::uint32_t page) : m_page{page} {}

/// The gradient id to reference via `fill="url(#id)"` for `shading` placed by
/// `m` (shading space -> page box). Empty for an unrepresentable shading.
std::string register_gradient(const pdf::Shading &shading,
const util::math::Transform2D &m) {
if ((shading.type != 2 && shading.type != 3) || shading.stops.empty()) {
return {};
}
std::ostringstream sig;
sig << shading.type << ':' << static_cast<const void *>(&shading) << ':'
<< m.a << ',' << m.b << ',' << m.c << ',' << m.d << ',' << m.e << ','
<< m.f;
const auto [it, inserted] = m_id_by_signature.try_emplace(sig.str());
if (!inserted) {
return it->second;
}
it->second = "g" + std::to_string(m_page) + "_" + std::to_string(++m_count);
const std::string &id = it->second;

const std::array<double, 6> &c = shading.coords;
if (shading.type == 2) {
m_defs << "<linearGradient id=\"" << id << "\" x1=\"" << c[0]
<< "\" y1=\"" << c[1] << "\" x2=\"" << c[2] << "\" y2=\"" << c[3]
<< '"';
} else {
// Radial: the outer circle (x1,y1,r1) is SVG's (cx,cy,r); the inner
// circle (x0,y0,r0) is the focal point and radius (fr is SVG2).
m_defs << "<radialGradient id=\"" << id << "\" cx=\"" << c[3]
<< "\" cy=\"" << c[4] << "\" r=\"" << c[5] << "\" fx=\"" << c[0]
<< "\" fy=\"" << c[1] << "\" fr=\"" << c[2] << '"';
}
// Only the translation (e, f) is rounded — it lives in page-box units where
// 1/100 px is plenty; the linear part (a..d) keeps full precision so small
// scale/skew factors aren't quantized to zero.
m_defs << " gradientUnits=\"userSpaceOnUse\" gradientTransform=\"matrix("
<< m.a << ',' << m.b << ',' << m.c << ',' << m.d << ','
<< round2(m.e) << ',' << round2(m.f) << ")\">";
Comment thread
andiwand marked this conversation as resolved.
for (const pdf::GradientStop &stop : shading.stops) {
m_defs << "<stop offset=\"" << round2(stop.offset) << "\" stop-color=\""
<< rgb_to_css(stop.rgb) << "\"/>";
}
m_defs << (shading.type == 2 ? "</linearGradient>" : "</radialGradient>");
return id;
}

[[nodiscard]] std::string defs() const { return m_defs.str(); }

private:
std::uint32_t m_page{};
std::uint32_t m_count{0};
std::unordered_map<std::string, std::string> m_id_by_signature;
std::ostringstream m_defs;
};

/// Serialize an `sh` shading flood to an SVG `<rect>` covering the page box,
/// filled with `gradient_id` and bounded by `clip_id` (the clip in force at
/// `sh` time). Returns "" when the shading produced no gradient. The rect spans
/// the whole page; the clip (and the gradient's own extent) bound the paint.
std::string svg_shading_fragment(const std::string &gradient_id,
const std::string &clip_id, const double width,
const double height) {
if (gradient_id.empty()) {
return {};
}
std::ostringstream f;
f << "<rect x=\"0\" y=\"0\" width=\"" << round2(width) << "\" height=\""
<< round2(height) << "\" fill=\"url(#" << gradient_id << ")\"";
if (!clip_id.empty()) {
f << " clip-path=\"url(#" << clip_id << ")\"";
}
f << "/>";
return std::move(f).str();
}

/// Deduplicates CSS declarations into atomic, single-property classes. PDF text
/// emits one absolutely-positioned span per glyph run, and the same font sizes,
/// offsets and spacings recur across the (potentially millions of) spans.
Expand Down Expand Up @@ -585,14 +692,43 @@ class HtmlServiceImpl final : public HtmlService {
util::math::Transform2D::scaling_translation(1, -1, 0, height);

ClipRegistry clips(static_cast<std::uint32_t>(pages_out.size()));
GradientRegistry gradients(static_cast<std::uint32_t>(pages_out.size()));

for (const pdf::PageElement &element :
pdf::extract_page(stream, *page->resources, *m_logger)) {
// A painted path: serialize its subpaths to an SVG `<path>` fragment in
// the page viewBox (fill and/or stroke), under any active clip.
if (const auto *path = std::get_if<pdf::PathElement>(&element)) {
// the page viewBox (fill and/or stroke), under any active clip. A
// shading-pattern fill is painted through a gradient instead of a
// colour.
if (const auto *path = std::get_if<pdf::PathElement>(&element);
path != nullptr) {
const std::string clip_id = clips.register_clip(path->clip, to_box);
std::string fragment = svg_path_fragment(*path, to_box, clip_id);
std::string gradient_id;
if (path->fill_shading != nullptr) {
gradient_id = gradients.register_gradient(
*path->fill_shading, path->shading_transform * to_box);
}
std::string fragment =
svg_path_fragment(*path, to_box, clip_id, gradient_id);
if (!fragment.empty()) {
page_out.items.push_back(PathOut{std::move(fragment)});
}
continue;
}

// An `sh` shading flood: a `<rect>` over the page box filled with the
// shading's gradient, bounded by the clip in force at `sh` time.
if (const auto *shading = std::get_if<pdf::ShadingElement>(&element);
shading != nullptr) {
if (shading->shading == nullptr) {
continue;
}
const std::string clip_id =
clips.register_clip(shading->clip, to_box);
const std::string gradient_id = gradients.register_gradient(
*shading->shading, shading->transform * to_box);
std::string fragment =
svg_shading_fragment(gradient_id, clip_id, width, height);
if (!fragment.empty()) {
page_out.items.push_back(PathOut{std::move(fragment)});
}
Expand Down Expand Up @@ -823,7 +959,8 @@ class HtmlServiceImpl final : public HtmlService {
}
}

page_out.clip_defs = clips.defs();
// Clip-path and gradient defs share the page's hidden `<svg><defs>`.
page_out.clip_defs = clips.defs() + gradients.defs();
}

// Post-pass: every page has been scanned, so the per-font used-scalar sets
Expand Down Expand Up @@ -969,10 +1106,11 @@ class HtmlServiceImpl final : public HtmlService {
for (const PageOut &page : pages_out) {
out.write_element_begin("div",
HtmlElementOptions().set_class(page.classes));
// Clip-path defs for this page, in a hidden zero-size `<svg>`. They are
// referenced by id from the page's path fragments; `clipPathUnits`
// defaults to `userSpaceOnUse`, so the geometry is read in the user space
// of the referencing element (the page viewBox), not this `<svg>`.
// Clip-path and gradient defs for this page, in a hidden zero-size
// `<svg>`. They are referenced by id from the page's fragments;
// `clipPathUnits`/`gradientUnits` are `userSpaceOnUse`, so the geometry
// is read in the user space of the referencing element (the page
// viewBox), not this `<svg>`.
if (!page.clip_defs.empty()) {
out.write_raw(
"<svg width=\"0\" height=\"0\" style=\"position:absolute\">"
Expand Down
12 changes: 12 additions & 0 deletions src/odr/internal/pdf/AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,18 @@ stage exists to avoid.
current fill colour; `/SMask` and `/Mask` (stencil + colour-key) composited
into RGBA on the raster path (a mask on a JPEG base is ignored — decoding the
JPEG to composite is out of scope).
- **Shadings & shading patterns** (axial type 2, radial type 3): `parse_shading`
pre-samples the tint `/Function` across `/Domain` into 32 sRGB colour stops, so
the renderer needs no function evaluator. The `sh` operator floods the current
clip (a `ShadingElement` → `<rect>` filled with the gradient); a `/PatternType
2` shading pattern selected by `scn` fills a path (`PathElement::fill_shading`
+ the pattern `/Matrix`). Both emit SVG `<linearGradient>`/`<radialGradient>`
with `gradientUnits="userSpaceOnUse"`. `/Extend`, `/Background` and `/BBox` are
parsed onto `Shading` but **not yet honoured** by the renderer (deferred): it
always uses SVG's `pad` spread, so a non-extended shading is over-painted past
its interval rather than masked to it (honouring it needs the fill clipped to
the gradient band/annulus). Mesh/function shadings (types 1, 4–7) and tiling
patterns (`/PatternType 1`) are still future stages.
- **SVG residue** — where no 1:1 primitive exists; all at generation time, never
rasterization: mesh/function shadings (types 1, 4–7) → tessellate into small
flat polygons (pdf.js's approach); color spaces
Expand Down
34 changes: 34 additions & 0 deletions src/odr/internal/pdf/pdf_document_element.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <odr/internal/pdf/pdf_cmap.hpp>
#include <odr/internal/pdf/pdf_encoding.hpp>
#include <odr/internal/pdf/pdf_object.hpp>
#include <odr/internal/pdf/pdf_shading.hpp>
#include <odr/internal/util/math_util.hpp>

#include <array>
Expand All @@ -28,6 +29,7 @@ struct Annotation;
struct Resources;
struct Font;
struct XObject;
struct Pattern;
struct ColorSpaceDef;

struct Element {
Expand Down Expand Up @@ -95,6 +97,17 @@ struct Resources final : Element {
/// referenced by `BDC`. Each value is the resolved property-list dictionary
/// `Object`; used to recover `/ActualText` for a `BDC /Tag /Name` sequence.
std::unordered_map<std::string, Object> properties;
/// The `/Shading` subdictionary (ISO 32000-1 8.7.4.3): named shadings painted
/// by the `sh` operator. Resolved eagerly (the tint function sampled into
/// colour stops) so extraction needs no parser handle. Held by `shared_ptr`
/// because `Shading` is a plain value type, not a document `Element` (a
/// shading pattern shares ownership of the same `Shading`).
std::unordered_map<std::string, std::shared_ptr<Shading>> shading;
/// The `/Pattern` subdictionary (ISO 32000-1 8.7.3.3): named tiling/shading
/// patterns selected as a colour by `scn`/`SCN` in a `/Pattern` colour space.
/// A non-owning pointer: `Pattern` is a document `Element`, owned by the
/// `Document` graph like the other resource elements (`Font`, `XObject`).
std::unordered_map<std::string, Pattern *> pattern;
};

/// An external object referenced by `Do` and listed in a resource dictionary's
Expand Down Expand Up @@ -145,6 +158,27 @@ struct XObject final : Element {
std::vector<double> stencil_decode; ///< `/Decode`, empty = default `[0 1]`
};

/// A pattern listed in a resource dictionary's `/Pattern` subdictionary
/// (ISO 32000-1 8.7.3), selected as a colour by `scn`/`SCN` in a `/Pattern`
/// colour space. Shading patterns (`/PatternType 2`) paint a gradient through
/// the path; tiling patterns (`/PatternType 1`) repeat a content-stream cell.
struct Pattern final : Element {
enum class Type {
unknown,
tiling, ///< `/PatternType 1`
shading, ///< `/PatternType 2`
};
Type type{Type::unknown};

/// `/Matrix` mapping pattern space to the default coordinate system of the
/// pattern's parent content stream (8.7.3.1); default identity.
util::math::Transform2D matrix;

/// Shading pattern (`/PatternType 2`): the shading painted through the path,
/// pre-resolved (its tint function sampled into stops). Null otherwise.
std::shared_ptr<Shading> shading;
};

/// A non-owning view over a string of PDF character codes, splitting it into
/// fixed-width (`Font::code_byte_width()`) big-endian codes on iteration. Holds
/// only a `string_view`, so it must not outlive the underlying bytes; iterate
Expand Down
Loading
Loading