diff --git a/src/odr/internal/pdf/AGENTS.md b/src/odr/internal/pdf/AGENTS.md index 58b5b546..ccf3aa8f 100644 --- a/src/odr/internal/pdf/AGENTS.md +++ b/src/odr/internal/pdf/AGENTS.md @@ -570,8 +570,10 @@ stage exists to avoid. usually ship none — the deferred AFM-widths item) as a generated data table. Glyph shapes are the browser's fallback font. - **Images**: `DCTDecode` → `` JPEG pass-through; Flate/LZW raster → PNG - encode; inline images (`BI`/`ID`/`EI` — currently not even tokenized correctly - past `ID`); image masks and SMasks later. + encode; inline images (`BI`/`ID`/`EI`); `/ImageMask` stencils painted in the + current fill colour; `/SMask` and `/Mask` (stencil + colour-key) composited + into RGBA on the raster path (a mask on a JPEG base is ignored — decoding the + JPEG to composite is out of scope). - **SVG residue** — where no 1:1 primitive exists; all at generation time, never rasterization: mesh/function shadings (types 1, 4–7) → tessellate into small flat polygons (pdf.js's approach); color spaces diff --git a/src/odr/internal/pdf/pdf_document_element.hpp b/src/odr/internal/pdf/pdf_document_element.hpp index 4f1f7223..2c7bf58f 100644 --- a/src/odr/internal/pdf/pdf_document_element.hpp +++ b/src/odr/internal/pdf/pdf_document_element.hpp @@ -126,11 +126,23 @@ struct XObject final : Element { /// Image XObject only: the encoded image bytes for the browser — a JPEG /// passed through (`DCTDecode`) or a raster re-encoded as PNG (Flate/LZW/raw - /// samples assembled through the colour space) — with `image_mime` naming the - /// format. Empty for a codec not yet handled (JPX/CCITT/JBIG2), an image mask - /// (later stages) and non-image XObjects, so `Do` skips it. + /// samples assembled through the colour space, with a `/SMask`/`/Mask` + /// composited into RGBA) — with `image_mime` naming the format. Empty for a + /// codec not yet handled (JPX/CCITT/JBIG2), a stencil mask (see below) and + /// non-image XObjects, so `Do` skips it. std::string image_data; std::string image_mime; + + /// Image XObject `/ImageMask true` (ISO 32000-1 8.9.6.2): a 1-bpc stencil + /// painted in the *current fill colour*, which is known only at `Do` time. So + /// the decoded bitmap and its geometry are carried here for the page + /// extractor to recolour (`encode_stencil_png`); `image_data` stays empty. + /// `false` for a normal image. + bool stencil_mask{false}; + std::string stencil_samples; ///< decoded 1-bpc bitmap, rows byte-aligned + std::int32_t stencil_width{0}; ///< `/Width` + std::int32_t stencil_height{0}; ///< `/Height` + std::vector stencil_decode; ///< `/Decode`, empty = default `[0 1]` }; /// A non-owning view over a string of PDF character codes, splitting it into diff --git a/src/odr/internal/pdf/pdf_document_parser.cpp b/src/odr/internal/pdf/pdf_document_parser.cpp index 56a7fe1f..da84bbca 100644 --- a/src/odr/internal/pdf/pdf_document_parser.cpp +++ b/src/odr/internal/pdf/pdf_document_parser.cpp @@ -525,12 +525,110 @@ Element *parse_page_or_pages(State &state, const ObjectReference &reference, // back the in-progress element, so the in-memory graph mirrors the file. Resources *parse_resources(State &state, const Object &object); +/// Read an integer image-dictionary entry (e.g. `/Width`), resolving an +/// indirect reference, defaulting to `fallback`. +std::int32_t image_int(DocumentParser &parser, const Dictionary &dictionary, + const std::string &key, const std::int32_t fallback) { + return static_cast( + parser.resolve_object_copy(dictionary.get(key)) + .as_integer_opt() + .value_or(fallback)); +} + +/// The `/Decode` array of an image dictionary as doubles ([] when absent). +std::vector image_decode(DocumentParser &parser, + const Dictionary &dictionary) { + std::vector decode; + const Object decode_object = + parser.resolve_object_copy(dictionary.get("Decode")); + if (decode_object.is_array()) { + for (const Object &item : decode_object.as_array()) { + decode.push_back(item.as_real()); + } + } + return decode; +} + +/// Resolve a `/SMask` (soft mask) or stencil `/Mask` sub-image referenced by +/// `mask` into a base-sized alpha plane (ISO 32000-1 11.6.5.2 / 8.9.6.3). The +/// sub-image is a single-component raster: decode its `/Filter` chain, then map +/// its samples to coverage (`decode_mask_alpha`). Returns empty when `mask` is +/// not a stream reference or its codec is not decodable (CCITT/JBIG2/JPX), so +/// the base image stays opaque. +std::vector resolve_mask_alpha(DocumentParser &parser, + const Object &mask, + const std::int32_t base_width, + const std::int32_t base_height, + const bool stencil) { + if (!mask.is_reference()) { + return {}; + } + const IndirectObject &object = parser.read_object(mask.as_reference()); + if (!object.object.is_dictionary()) { + return {}; + } + const Dictionary &dictionary = object.object.as_dictionary(); + Object filter; + if (dictionary.has_key("Filter")) { + filter = parser.deep_resolve_object_copy(dictionary["Filter"]); + } + Object decode_parms; + if (dictionary.has_key("DecodeParms")) { + decode_parms = parser.deep_resolve_object_copy(dictionary["DecodeParms"]); + } + DecodeResult result = + decode(filter, decode_parms, parser.read_object_stream(object)); + if (result.stopped_at_filter.has_value()) { + return {}; // an image codec we cannot decode (CCITT/JBIG2/JPX) + } + return decode_mask_alpha( + result.data, image_int(parser, dictionary, "Width", 0), + image_int(parser, dictionary, "Height", 0), + image_int(parser, dictionary, "BitsPerComponent", stencil ? 1 : 8), + image_decode(parser, dictionary), stencil, base_width, base_height); +} + +/// Carry an `/ImageMask true` stencil's decoded bitmap and geometry onto +/// `x_object` (ISO 32000-1 8.9.6.2). The stencil is painted in the current fill +/// colour, known only at `Do` time, so the page extractor recolours it; here we +/// only decode and stash. An undecodable codec leaves `stencil_mask` false so +/// `Do` skips it. +void parse_stencil_mask(DocumentParser &parser, const Dictionary &dictionary, + const IndirectObject &object, XObject &x_object) { + Object filter; + if (dictionary.has_key("Filter")) { + filter = parser.deep_resolve_object_copy(dictionary["Filter"]); + } + Object decode_parms; + if (dictionary.has_key("DecodeParms")) { + decode_parms = parser.deep_resolve_object_copy(dictionary["DecodeParms"]); + } + DecodeResult result = + decode(filter, decode_parms, parser.read_object_stream(object)); + if (result.stopped_at_filter.has_value()) { + return; // CCITT/JBIG2 fax stencils are not yet decodable + } + const std::int32_t width = image_int(parser, dictionary, "Width", 0); + const std::int32_t height = image_int(parser, dictionary, "Height", 0); + if (width <= 0 || height <= 0) { + return; + } + x_object.stencil_mask = true; + x_object.stencil_samples = std::move(result.data); + x_object.stencil_width = width; + x_object.stencil_height = height; + x_object.stencil_decode = image_decode(parser, dictionary); +} + /// Build the browser-ready bytes of an image XObject (ISO 32000-1 8.9). A JPEG /// (`DCTDecode`) passes through undecoded; a fully decodable raster /// (Flate/LZW/RunLength/ASCII/raw) is decoded, its samples assembled through -/// the image's colour space and re-encoded as an 8-bit RGB PNG. Codecs we +/// the image's colour space and re-encoded as a PNG — RGBA when a `/SMask`, +/// stencil `/Mask` or colour-key `/Mask` supplies transparency. Codecs we /// cannot yet hand off (JPXDecode, CCITTFaxDecode, JBIG2Decode) and unresolved -/// colour spaces leave the bytes empty, so `Do` skips the image. +/// colour spaces leave the bytes empty, so `Do` skips the image. A `/SMask` or +/// `/Mask` on a JPEG base is ignored (decoding the JPEG to composite is out of +/// scope). void parse_image_data(DocumentParser &parser, const Dictionary &dictionary, const IndirectObject &object, XObject &x_object) { Object filter; @@ -558,31 +656,38 @@ void parse_image_data(DocumentParser &parser, const Dictionary &dictionary, }; color_space = parse_color_space(dictionary.get("ColorSpace"), context); } - const auto width = static_cast( - parser.resolve_object_copy(dictionary.get("Width")) - .as_integer_opt() - .value_or(0)); - const auto height = static_cast( - parser.resolve_object_copy(dictionary.get("Height")) - .as_integer_opt() - .value_or(0)); - const auto bits_per_component = static_cast( - parser.resolve_object_copy(dictionary.get("BitsPerComponent")) - .as_integer_opt() - .value_or(8)); - - std::vector decode_array; - const Object decode_object = - parser.resolve_object_copy(dictionary.get("Decode")); - if (decode_object.is_array()) { - for (const Object &item : decode_object.as_array()) { - decode_array.push_back(item.as_real()); + const std::int32_t width = image_int(parser, dictionary, "Width", 0); + const std::int32_t height = image_int(parser, dictionary, "Height", 0); + const std::int32_t bits_per_component = + image_int(parser, dictionary, "BitsPerComponent", 8); + const std::vector decode_array = image_decode(parser, dictionary); + + // Transparency (8.9.6 / 11.6.5.2): a `/SMask` (alpha) takes precedence over a + // `/Mask`, which is either a stencil sub-image (a reference) or a colour-key + // array. Each resolves to a base-sized alpha plane or a colour-key range, + // which `encode_image` composites into an RGBA PNG on the raster path. + std::vector alpha; + std::vector color_key; + if (dictionary.has_value("SMask")) { + alpha = resolve_mask_alpha(parser, dictionary["SMask"], width, height, + /*stencil=*/false); + } + if (alpha.empty() && dictionary.has_value("Mask")) { + const Object mask = parser.resolve_object_copy(dictionary["Mask"]); + if (mask.is_array()) { + for (const Object &item : mask.as_array()) { + color_key.push_back(item.as_real()); + } + } else if (dictionary["Mask"].is_reference()) { + alpha = resolve_mask_alpha(parser, dictionary["Mask"], width, height, + /*stencil=*/true); } } - if (std::optional encoded = encode_image( - parser.read_object_stream(object), filter, decode_parms, width, - height, bits_per_component, color_space.get(), decode_array)) { + if (std::optional encoded = + encode_image(parser.read_object_stream(object), filter, decode_parms, + width, height, bits_per_component, color_space.get(), + decode_array, alpha, color_key)) { x_object.image_data = std::move(encoded->data); x_object.image_mime = std::move(encoded->mime); } @@ -614,12 +719,15 @@ XObject *parse_x_object(State &state, const ObjectReference &reference) { : ""; if (subtype == "Image") { x_object->subtype = XObject::Subtype::image; - // `/ImageMask` stencils and colour-key masks are a later stage (4.8); leave - // their bytes empty so `Do` skips them. Everything else is handed to the - // browser as JPEG (pass-through) or PNG (raster), or skipped. + // An `/ImageMask true` stencil is painted in the current fill colour (known + // only at `Do` time), so its bitmap is stashed for the page extractor to + // recolour; everything else is encoded to JPEG/PNG bytes here (with any + // `/SMask`/`/Mask` transparency), or skipped. const bool image_mask = dictionary.get("ImageMask").as_bool_opt().value_or(false); - if (!image_mask) { + if (image_mask) { + parse_stencil_mask(parser, dictionary, object, *x_object); + } else { parse_image_data(parser, dictionary, object, *x_object); } return x_object; diff --git a/src/odr/internal/pdf/pdf_image.cpp b/src/odr/internal/pdf/pdf_image.cpp index 3e9825d6..872807f5 100644 --- a/src/odr/internal/pdf/pdf_image.cpp +++ b/src/odr/internal/pdf/pdf_image.cpp @@ -74,13 +74,15 @@ std::uint8_t to_byte(const double v) { namespace odr::internal { -std::string pdf::write_png_rgb(const std::string &rgb, const std::int32_t width, - const std::int32_t height) { - if (width <= 0 || height <= 0) { +std::string pdf::write_png(const std::string &pixels, const std::int32_t width, + const std::int32_t height, + const std::int32_t channels) { + if (width <= 0 || height <= 0 || (channels != 3 && channels != 4)) { return {}; } - const auto stride = static_cast(width) * 3; - if (rgb.size() < stride * static_cast(height)) { + const auto stride = + static_cast(width) * static_cast(channels); + if (pixels.size() < stride * static_cast(height)) { return {}; } @@ -90,7 +92,7 @@ std::string pdf::write_png_rgb(const std::string &rgb, const std::int32_t width, raw.reserve((stride + 1) * static_cast(height)); for (std::int32_t y = 0; y < height; ++y) { raw.push_back(0); - raw.append(rgb, static_cast(y) * stride, stride); + raw.append(pixels, static_cast(y) * stride, stride); } std::string out; @@ -103,7 +105,8 @@ std::string pdf::write_png_rgb(const std::string &rgb, const std::int32_t width, util::byte_string::put_u32_be(ihdr, static_cast(width)); util::byte_string::put_u32_be(ihdr, static_cast(height)); ihdr.push_back(8); // bit depth - ihdr.push_back(2); // colour type: truecolour (RGB) + // Colour type: 2 = truecolour (RGB), 6 = truecolour with alpha (RGBA). + ihdr.push_back(channels == 4 ? 6 : 2); ihdr.push_back(0); // compression: deflate ihdr.push_back(0); // filter method: adaptive ihdr.push_back(0); // interlace: none @@ -118,7 +121,9 @@ std::string pdf::encode_image_png(const std::string &samples, const std::int32_t height, const std::int32_t bits_per_component, const ColorSpaceDef &color_space, - const std::vector &decode) { + const std::vector &decode, + const std::vector &alpha, + const std::vector &color_key) { const std::int32_t components = color_space.components; if (width <= 0 || height <= 0 || components <= 0 || bits_per_component <= 0 || bits_per_component > 16) { @@ -136,18 +141,29 @@ std::string pdf::encode_image_png(const std::string &samples, static_cast(bits_per_component); const std::size_t row_bytes = (row_bits + 7) / 8; - std::string rgb; - rgb.resize(static_cast(width) * - static_cast(height) * 3); + const auto pixel_count = + static_cast(width) * static_cast(height); + // A colour-key array (8.9.6.4) or an alpha plane (an SMask / stencil /Mask) + // makes the output RGBA; otherwise it stays the compact 3-byte RGB. + const bool has_color_key = + color_key.size() >= 2 * static_cast(components); + const bool has_alpha = alpha.size() == pixel_count || has_color_key; + const std::size_t channels = has_alpha ? 4 : 3; + + std::string out; + out.resize(pixel_count * channels); std::vector component_values(static_cast(components)); + std::vector raw_samples(static_cast(components)); std::size_t out_index = 0; + std::size_t pixel_index = 0; for (std::int32_t y = 0; y < height; ++y) { BitReader reader(samples, static_cast(y) * row_bytes); for (std::int32_t x = 0; x < width; ++x) { for (std::int32_t j = 0; j < components; ++j) { const std::uint32_t sample = reader.read(bits_per_component); const auto k = static_cast(j); + raw_samples[k] = sample; if (decode.size() >= 2 * (k + 1)) { const double d_min = decode[2 * k]; const double d_max = decode[2 * k + 1]; @@ -161,20 +177,134 @@ std::string pdf::encode_image_png(const std::string &samples, } } const std::array pixel = color_space.to_rgb(component_values); - rgb[out_index++] = static_cast(to_byte(pixel[0])); - rgb[out_index++] = static_cast(to_byte(pixel[1])); - rgb[out_index++] = static_cast(to_byte(pixel[2])); + out[out_index++] = static_cast(to_byte(pixel[0])); + out[out_index++] = static_cast(to_byte(pixel[1])); + out[out_index++] = static_cast(to_byte(pixel[2])); + if (has_alpha) { + std::uint8_t a = + alpha.size() == pixel_count ? alpha[pixel_index] : 0xFF; + if (has_color_key) { + bool keyed = true; + for (std::int32_t j = 0; j < components && keyed; ++j) { + const auto k = static_cast(j); + if (raw_samples[k] < color_key[2 * k] || + raw_samples[k] > color_key[2 * k + 1]) { + keyed = false; + } + } + if (keyed) { + a = 0; + } + } + out[out_index++] = static_cast(a); + } + ++pixel_index; + } + } + + return write_png(out, width, height, has_alpha ? 4 : 3); +} + +std::vector pdf::decode_mask_alpha( + const std::string &samples, const std::int32_t width, + const std::int32_t height, const std::int32_t bits_per_component, + const std::vector &decode, const bool stencil, + const std::int32_t base_width, const std::int32_t base_height) { + if (width <= 0 || height <= 0 || base_width <= 0 || base_height <= 0 || + bits_per_component <= 0 || bits_per_component > 16) { + return {}; + } + const std::uint32_t max_sample = + (1u << static_cast(bits_per_component)) - 1u; + const std::size_t row_bytes = + (static_cast(width) * + static_cast(bits_per_component) + + 7) / + 8; + // A /Decode of [1 0] inverts the mask sense (8.9.5.4). + const bool invert = decode.size() >= 2 && decode[0] > decode[1]; + + // Decode the mask at its native resolution first, then resample. + std::vector native(static_cast(width) * + static_cast(height)); + std::size_t i = 0; + for (std::int32_t y = 0; y < height; ++y) { + BitReader reader(samples, static_cast(y) * row_bytes); + for (std::int32_t x = 0; x < width; ++x) { + const std::uint32_t sample = reader.read(bits_per_component); + double value = static_cast(sample) / max_sample; + if (invert) { + value = 1.0 - value; + } + if (stencil) { + // An explicit stencil /Mask: a decoded 1 masks the base pixel out. + native[i++] = value >= 0.5 ? 0x00 : 0xFF; + } else { + // A soft mask: the grey level is the coverage directly. + native[i++] = to_byte(value); + } + } + } + + if (width == base_width && height == base_height) { + return native; + } + std::vector out(static_cast(base_width) * + static_cast(base_height)); + std::size_t o = 0; + for (std::int32_t by = 0; by < base_height; ++by) { + const std::int32_t my = std::min(height - 1, by * height / base_height); + for (std::int32_t bx = 0; bx < base_width; ++bx) { + const std::int32_t mx = std::min(width - 1, bx * width / base_width); + out[o++] = native[static_cast(my) * width + mx]; } } + return out; +} + +std::string pdf::encode_stencil_png(const std::string &samples, + const std::int32_t width, + const std::int32_t height, + const std::array &color, + const std::vector &decode) { + if (width <= 0 || height <= 0) { + return {}; + } + // 1 bpc, one component; rows byte-aligned (8.9.5.2). + const std::size_t row_bytes = (static_cast(width) + 7) / 8; + // Default /Decode is [0 1]: a sample of 0 paints, 1 is transparent. A /Decode + // of [1 0] swaps that — paint when the decoded value rounds to 0. + const bool invert = decode.size() >= 2 && decode[0] > decode[1]; + + const std::uint8_t r = to_byte(color[0]); + const std::uint8_t g = to_byte(color[1]); + const std::uint8_t b = to_byte(color[2]); - return write_png_rgb(rgb, width, height); + std::string rgba; + rgba.resize(static_cast(width) * + static_cast(height) * 4); + std::size_t out_index = 0; + for (std::int32_t y = 0; y < height; ++y) { + BitReader reader(samples, static_cast(y) * row_bytes); + for (std::int32_t x = 0; x < width; ++x) { + const bool one = reader.read(1) != 0; + const bool paint = invert ? one : !one; + rgba[out_index++] = static_cast(r); + rgba[out_index++] = static_cast(g); + rgba[out_index++] = static_cast(b); + rgba[out_index++] = static_cast(paint ? 0xFF : 0x00); + } + } + return write_png(rgba, width, height, 4); } std::optional pdf::encode_image( std::string raw, const Object &filter, const Object &decode_parms, const std::int32_t width, const std::int32_t height, const std::int32_t bits_per_component, const ColorSpaceDef *color_space, - const std::vector &decode_array) { + const std::vector &decode_array, + const std::vector &alpha, + const std::vector &color_key) { const std::optional terminal = terminal_image_codec(filter); if (terminal == "DCTDecode") { @@ -199,7 +329,7 @@ std::optional pdf::encode_image( } std::string png = encode_image_png(result.data, width, height, bits_per_component, - *color_space, decode_array); + *color_space, decode_array, alpha, color_key); if (png.empty()) { return std::nullopt; } diff --git a/src/odr/internal/pdf/pdf_image.hpp b/src/odr/internal/pdf/pdf_image.hpp index 0bc0f019..586c092f 100644 --- a/src/odr/internal/pdf/pdf_image.hpp +++ b/src/odr/internal/pdf/pdf_image.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -22,14 +23,21 @@ struct EncodedImage { /// raster (Flate/LZW/RunLength/ASCII/raw) is decoded and re-encoded as PNG /// through `color_space`. `filter`/`decode_parms` are the resolved /// `/Filter`/`/DecodeParms`; `color_space` may be null (used only by the raster -/// path — a null there yields nullopt). Returns nullopt for a codec not yet -/// handled (JPXDecode/CCITTFaxDecode/JBIG2Decode) or an inconsistent raster. -/// Shared by image XObjects and inline images. +/// path — a null there yields nullopt). `alpha` is an optional per-pixel +/// coverage plane (one byte per base pixel, row-major; 0 = transparent, +/// 255 = opaque) resolved from a `/SMask` or stencil `/Mask`; `color_key` is an +/// optional `/Mask` colour-key array ([min0 max0 ...] in raw sample units) — +/// when either is present the raster path emits an RGBA PNG (both are ignored +/// by the JPEG pass-through). Returns nullopt for a codec not yet handled +/// (JPXDecode/CCITTFaxDecode/JBIG2Decode) or an inconsistent raster. Shared by +/// image XObjects and inline images. std::optional encode_image(std::string raw, const Object &filter, const Object &decode_parms, std::int32_t width, std::int32_t height, std::int32_t bits_per_component, const ColorSpaceDef *color_space, - const std::vector &decode); + const std::vector &decode, + const std::vector &alpha = {}, + const std::vector &color_key = {}); /// Assemble a raster image's decoded samples into a base-level (8-bit, RGB) /// PNG (ISO 32000-1 8.9.5 image samples -> a browser-ready raster). `samples` @@ -39,16 +47,49 @@ encode_image(std::string raw, const Object &filter, const Object &decode_parms, /// conversion (Indexed palettes included). `decode`, when non-empty, is the /// `/Decode` array ([min, max] per component) remapping the sample range. /// Returns "" for an inconsistent configuration, so the caller skips the image. +/// `alpha` (one byte per pixel, row-major) and `color_key` ([min0 max0 ...] in +/// raw sample units) add transparency: when either is given the output is an +/// RGBA PNG, a pixel made transparent where its coverage is 0 or every +/// component falls inside the colour-key ranges (ISO 32000-1 8.9.6). std::string encode_image_png(const std::string &samples, std::int32_t width, std::int32_t height, std::int32_t bits_per_component, const ColorSpaceDef &color_space, - const std::vector &decode); + const std::vector &decode, + const std::vector &alpha = {}, + const std::vector &color_key = {}); -/// Wrap 8-bit RGB pixels (`3 * width * height` bytes, row-major, no padding) -/// into a PNG (single `IDAT`, no interlacing). The compression core; exposed -/// for testing the container independently of sample assembly. -std::string write_png_rgb(const std::string &rgb, std::int32_t width, - std::int32_t height); +/// Resolve a `/SMask` or stencil `/Mask` sub-image (a single-component raster) +/// into a coverage plane sized to the *base* image (one byte per base pixel, +/// row-major), nearest-neighbour resampled from the mask's own `width`/`height` +/// (the two need not match — ISO 32000-1 8.9.5.4 / 11.6.5.2). `samples` is the +/// mask's filter-decoded bytes. For a soft mask (`stencil` false) the decoded +/// grey value becomes the alpha; for an explicit stencil `/Mask` (`stencil` +/// true, 1 bpc) a sample decoding to 1 is masked out (alpha 0). `decode`, when +/// non-empty, is the mask's `/Decode`. Returns empty for an inconsistent mask. +std::vector +decode_mask_alpha(const std::string &samples, std::int32_t width, + std::int32_t height, std::int32_t bits_per_component, + const std::vector &decode, bool stencil, + std::int32_t base_width, std::int32_t base_height); + +/// Wrap 8-bit pixels (`channels * width * height` bytes, row-major, no padding) +/// into a PNG (single `IDAT`, no interlacing). `channels` is 3 for RGB (colour +/// type 2) or 4 for RGBA (colour type 6); any other value yields "". The +/// compression core; exposed for testing the container independently of sample +/// assembly. +std::string write_png(const std::string &pixels, std::int32_t width, + std::int32_t height, std::int32_t channels); + +/// Paint a 1-bit stencil image mask (ISO 32000-1 8.9.6.2) in `color` (sRGB in +/// [0, 1]): a sample whose decoded value is 0 paints `color` opaquely, a 1 is +/// transparent — inverted by a `/Decode` of `[1 0]`. `samples` is the decoded +/// 1-bpc bitmap (rows byte-aligned, MSB first). The stencil's paint colour is +/// the graphics-state fill colour at draw time, so this is resolved by the page +/// extractor, not at parse time. Returns an RGBA PNG, or "" when inconsistent. +std::string encode_stencil_png(const std::string &samples, std::int32_t width, + std::int32_t height, + const std::array &color, + const std::vector &decode); } // namespace odr::internal::pdf diff --git a/src/odr/internal/pdf/pdf_page_extractor.cpp b/src/odr/internal/pdf/pdf_page_extractor.cpp index 60596451..fbc4e6d4 100644 --- a/src/odr/internal/pdf/pdf_page_extractor.cpp +++ b/src/odr/internal/pdf/pdf_page_extractor.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -38,7 +39,7 @@ Font *lookup_font(const Resources &resources, const std::string &name, /// diacritic block (0x18–0x1F), the typographic block (0x80–0x9E), and the /// euro (0xA0) need overriding; every other byte stands for the code point of /// the same value. The few undefined slots (0x7F, 0x9F, 0xAD) pass through. -char32_t pdf_doc_encoding_to_unicode(std::uint8_t byte) { +char32_t pdf_doc_encoding_to_unicode(const std::uint8_t byte) { switch (byte) { case 0x18: return U'˘'; // breve @@ -377,11 +378,35 @@ void set_color(GraphicsState::Color &color, const GraphicsOperator &op) { } } +/// Resolve a graphics-state colour to sRGB in [0, 1]. Non-device spaces have +/// already been converted to `rgb` by `set_color`/`set_color_space` (they set +/// `space` to `device_rgb`); CMYK uses the same naive conversion as the HTML +/// emitter. Used to paint a stencil image mask in the current fill colour. +std::array color_to_rgb(const GraphicsState::Color &color) { + switch (color.space) { + case ColorSpace::device_grey: + return {color.grey, color.grey, color.grey}; + case ColorSpace::device_rgb: + return {color.rgb[0], color.rgb[1], color.rgb[2]}; + case ColorSpace::device_cmyk: { + const double c = color.cmyk[0]; + const double m = color.cmyk[1]; + const double y = color.cmyk[2]; + const double k = color.cmyk[3]; + return {(1 - c) * (1 - k), (1 - m) * (1 - k), (1 - y) * (1 - k)}; + } + case ColorSpace::unknown: + break; + } + return {0, 0, 0}; +} + /// Emit a path-painting element from the path accumulated in `state` and the /// current paint state, then clear the path (as every painting operator does). /// `close` first closes the current subpath (the `s`/`b`/`b*` variants). -void paint_path(std::vector &out, GraphicsState &state, bool fill, - bool stroke, bool even_odd, bool close) { +void paint_path(std::vector &out, GraphicsState &state, + const bool fill, const bool stroke, const bool even_odd, + const bool close) { if (close) { state.path_close(); } @@ -468,8 +493,8 @@ resolve_inline_color_space(const Object &color_space, /// Emit an `ImageElement` for an inline image (`BI`/`ID`/`EI`). The operator /// carries the inline dictionary and the raw bytes; both are routed through the /// same image emission as an image XObject (placed by the CTM, under the -/// current clip). Image masks are deferred (4.8); an unsupported codec or -/// colour space yields nothing. +/// current clip). An `/ImageMask true` stencil is painted in the current fill +/// colour; an unsupported codec or colour space yields nothing. void emit_inline_image(const GraphicsOperator &op, const Resources &resources, GraphicsState &state, std::vector &out) { if (op.arguments.size() < 2 || !op.arguments[0].is_dictionary() || @@ -479,18 +504,10 @@ void emit_inline_image(const GraphicsOperator &op, const Resources &resources, const Dictionary dictionary = normalize_inline_image(op.arguments[0].as_dictionary()); - if (dictionary.get("ImageMask").as_bool_opt().value_or(false)) { - return; - } - - const std::shared_ptr color_space = - resolve_inline_color_space(dictionary.get("ColorSpace"), resources); const auto width = static_cast( dictionary.get("Width").as_integer_opt().value_or(0)); const auto height = static_cast( dictionary.get("Height").as_integer_opt().value_or(0)); - const auto bits_per_component = static_cast( - dictionary.get("BitsPerComponent").as_integer_opt().value_or(8)); std::vector decode_array; if (const Object &d = dictionary.get("Decode"); d.is_array()) { for (const Object &item : d.as_array()) { @@ -498,6 +515,36 @@ void emit_inline_image(const GraphicsOperator &op, const Resources &resources, } } + // An inline `/ImageMask true` stencil: decode the 1-bpc bitmap and paint it + // in the current fill colour, as for a stencil image XObject (ISO + // 32000-1 8.9.7). + if (dictionary.get("ImageMask").as_bool_opt().value_or(false)) { + DecodeResult mask = + decode(dictionary.get("Filter"), dictionary.get("DecodeParms"), + op.arguments[1].as_string()); + if (mask.stopped_at_filter.has_value()) { + return; + } + std::string png = encode_stencil_png( + mask.data, width, height, color_to_rgb(state.current().other_color), + decode_array); + if (png.empty()) { + return; + } + ImageElement image; + image.transform = state.current().general.transform_matrix; + image.clip = state.current().clip; + image.data = std::move(png); + image.mime = "image/png"; + out.push_back(std::move(image)); + return; + } + + const std::shared_ptr color_space = + resolve_inline_color_space(dictionary.get("ColorSpace"), resources); + const auto bits_per_component = static_cast( + dictionary.get("BitsPerComponent").as_integer_opt().value_or(8)); + std::optional encoded = encode_image(op.arguments[1].as_string(), dictionary.get("Filter"), dictionary.get("DecodeParms"), width, height, @@ -570,16 +617,31 @@ void invoke_x_object(const std::string &name, const Resources &resources, const XObject *x_object = it->second; if (x_object->subtype == XObject::Subtype::image) { // An image is placed by the CTM in effect (its unit square maps to user - // space), under the current clip. Only codecs with bytes ready for the - // browser carry `image_data` (stage 4.5: JPEG); the rest are skipped. - if (!x_object->image_data.empty()) { - ImageElement image; - image.transform = state.current().general.transform_matrix; - image.clip = state.current().clip; + // space), under the current clip. + ImageElement image; + image.transform = state.current().general.transform_matrix; + image.clip = state.current().clip; + if (x_object->stencil_mask) { + // A 1-bpc stencil painted in the current fill colour (resolved here, not + // at parse time, since the colour lives in the graphics state). + std::string png = encode_stencil_png( + x_object->stencil_samples, x_object->stencil_width, + x_object->stencil_height, color_to_rgb(state.current().other_color), + x_object->stencil_decode); + if (png.empty()) { + return; + } + image.data = std::move(png); + image.mime = "image/png"; + } else if (!x_object->image_data.empty()) { + // A normal image: bytes ready for the browser (JPEG pass-through or a + // PNG-encoded raster). Codecs we cannot hand off carry none, so skip. image.data = x_object->image_data; image.mime = x_object->image_mime; - out.push_back(std::move(image)); + } else { + return; } + out.push_back(std::move(image)); return; } if (x_object->subtype != XObject::Subtype::form) { diff --git a/test/data/reference-output/odr-private b/test/data/reference-output/odr-private index 7943f5ad..8d77bde4 160000 --- a/test/data/reference-output/odr-private +++ b/test/data/reference-output/odr-private @@ -1 +1 @@ -Subproject commit 7943f5ad038d8cd6081dcd0133f84ba6f50a93e9 +Subproject commit 8d77bde4362515e09f1873aff3a53cde64b4ffab diff --git a/test/src/internal/pdf/pdf_image.cpp b/test/src/internal/pdf/pdf_image.cpp index 0fec0bf2..78db1384 100644 --- a/test/src/internal/pdf/pdf_image.cpp +++ b/test/src/internal/pdf/pdf_image.cpp @@ -5,6 +5,7 @@ #include #include +#include #include @@ -76,6 +77,50 @@ std::string rgb_pixel(const std::string &rgb, const std::int32_t width, return rgb.substr((static_cast(y) * width + x) * 3, 3); } +/// Like `decode_png` but for the RGBA encoder output (colour type 6): keeps the +/// alpha channel, yielding 4 bytes per pixel. +struct DecodedPngRgba { + std::int32_t width{0}; + std::int32_t height{0}; + std::string rgba; +}; + +DecodedPngRgba decode_png_rgba(const std::string &png) { + EXPECT_GE(png.size(), 8u); + DecodedPngRgba result; + std::string idat; + std::size_t p = 8; + while (p + 12 <= png.size()) { + const std::uint32_t length = be32(png, p); + const std::string type = png.substr(p + 4, 4); + const std::string data = png.substr(p + 8, length); + if (type == "IHDR") { + result.width = static_cast(be32(data, 0)); + result.height = static_cast(be32(data, 4)); + EXPECT_EQ(static_cast(data[8]), 8); // bit depth + EXPECT_EQ(static_cast(data[9]), 6); // colour type RGBA + } else if (type == "IDAT") { + idat += data; + } else if (type == "IEND") { + break; + } + p += 12 + length; + } + const std::string raw = odr::internal::crypto::util::zlib_inflate(idat); + const auto stride = static_cast(result.width) * 4; + for (std::int32_t y = 0; y < result.height; ++y) { + const std::size_t row = static_cast(y) * (stride + 1); + EXPECT_EQ(static_cast(raw[row]), 0); // filter type None + result.rgba.append(raw, row + 1, stride); + } + return result; +} + +std::string rgba_pixel(const std::string &rgba, const std::int32_t width, + const std::int32_t x, const std::int32_t y) { + return rgba.substr((static_cast(y) * width + x) * 4, 4); +} + ColorSpaceDef device_rgb() { ColorSpaceDef def; def.kind = ColorSpaceKind::device_rgb; @@ -104,15 +149,15 @@ TEST(PdfImage, write_png_rgb_round_trip) { // 2x2: red, green / blue, white. const std::string rgb = bytes({255, 0, 0, 0, 255, 0, 0, 0, 255, 255, 255, 255}); - const DecodedPng png = decode_png(write_png_rgb(rgb, 2, 2)); + const DecodedPng png = decode_png(write_png(rgb, 2, 2, 3)); EXPECT_EQ(png.width, 2); EXPECT_EQ(png.height, 2); EXPECT_EQ(png.rgb, rgb); } TEST(PdfImage, write_png_rgb_rejects_short_buffer) { - EXPECT_TRUE(write_png_rgb(bytes({255, 0, 0}), 2, 2).empty()); - EXPECT_TRUE(write_png_rgb("", 0, 0).empty()); + EXPECT_TRUE(write_png(bytes({255, 0, 0}), 2, 2, 3).empty()); + EXPECT_TRUE(write_png("", 0, 0, 3).empty()); } TEST(PdfImage, encode_rgb_8bpc) { @@ -195,3 +240,75 @@ TEST(PdfImage, encode_rejects_bad_parameters) { zero.components = 0; EXPECT_TRUE(encode_image_png("", 1, 1, 8, zero, {}).empty()); } + +TEST(PdfImage, write_png_rgba_round_trip) { + // 2x1: opaque red, half-transparent green. + const std::string rgba = bytes({255, 0, 0, 255, 0, 255, 0, 128}); + const DecodedPngRgba png = decode_png_rgba(write_png(rgba, 2, 1, 4)); + EXPECT_EQ(png.width, 2); + EXPECT_EQ(png.height, 1); + EXPECT_EQ(png.rgba, rgba); +} + +TEST(PdfImage, encode_with_alpha_plane_emits_rgba) { + // DeviceGray 2x1, samples black/white, alpha plane opaque/transparent. + const std::string samples = bytes({0, 255}); + const std::vector alpha = {255, 0}; + const DecodedPngRgba png = decode_png_rgba( + encode_image_png(samples, 2, 1, 8, device_gray(), {}, alpha)); + EXPECT_EQ(rgba_pixel(png.rgba, 2, 0, 0), bytes({0, 0, 0, 255})); + EXPECT_EQ(rgba_pixel(png.rgba, 2, 1, 0), bytes({255, 255, 255, 0})); +} + +TEST(PdfImage, encode_with_colour_key_masks_matching_pixels) { + // DeviceRGB 2x1: pure red is keyed out, the other pixel stays opaque. + const std::string samples = bytes({255, 0, 0, 10, 20, 30}); + const std::vector color_key = {255, 255, 0, 0, 0, 0}; + const DecodedPngRgba png = decode_png_rgba( + encode_image_png(samples, 2, 1, 8, device_rgb(), {}, {}, color_key)); + EXPECT_EQ(rgba_pixel(png.rgba, 2, 0, 0), bytes({255, 0, 0, 0})); + EXPECT_EQ(rgba_pixel(png.rgba, 2, 1, 0), bytes({10, 20, 30, 255})); +} + +TEST(PdfImage, encode_stencil_paints_fill_colour_through_mask) { + // 1 bpc, 2x1: bits 0,1 -> 0b01000000 (row padded to a byte). Default /Decode + // [0 1]: a 0 paints the fill colour, a 1 is transparent. + const std::string samples = bytes({0b01000000}); + const DecodedPngRgba png = + decode_png_rgba(encode_stencil_png(samples, 2, 1, {1.0, 0.0, 0.0}, {})); + EXPECT_EQ(rgba_pixel(png.rgba, 2, 0, 0), bytes({255, 0, 0, 255})); + EXPECT_EQ(static_cast(rgba_pixel(png.rgba, 2, 1, 0)[3]), 0); +} + +TEST(PdfImage, encode_stencil_decode_inverts) { + // /Decode [1 0] swaps which sample paints: now the 1 paints, the 0 is clear. + const std::string samples = bytes({0b01000000}); + const DecodedPngRgba png = decode_png_rgba( + encode_stencil_png(samples, 2, 1, {0.0, 0.0, 1.0}, {1.0, 0.0})); + EXPECT_EQ(static_cast(rgba_pixel(png.rgba, 2, 0, 0)[3]), 0); + EXPECT_EQ(rgba_pixel(png.rgba, 2, 1, 0), bytes({0, 0, 255, 255})); +} + +TEST(PdfImage, decode_mask_alpha_soft_mask_grey_to_alpha) { + // 8-bpc DeviceGray soft mask, same size as the base: grey level is alpha. + const std::string samples = bytes({0, 128, 255, 64}); + const std::vector alpha = + decode_mask_alpha(samples, 2, 2, 8, {}, /*stencil=*/false, 2, 2); + EXPECT_EQ(alpha, (std::vector{0, 128, 255, 64})); +} + +TEST(PdfImage, decode_mask_alpha_stencil_masks_set_bits) { + // 1-bpc stencil /Mask, 2x1: a decoded 1 masks the base pixel out (alpha 0). + const std::string samples = bytes({0b01000000}); + const std::vector alpha = + decode_mask_alpha(samples, 2, 1, 1, {}, /*stencil=*/true, 2, 1); + EXPECT_EQ(alpha, (std::vector{255, 0})); +} + +TEST(PdfImage, decode_mask_alpha_resamples_to_base) { + // A 1x1 mask scaled up to 2x2: nearest-neighbour fills every base pixel. + const std::string samples = bytes({200}); + const std::vector alpha = + decode_mask_alpha(samples, 1, 1, 8, {}, /*stencil=*/false, 2, 2); + EXPECT_EQ(alpha, (std::vector{200, 200, 200, 200})); +} diff --git a/test/src/internal/pdf/pdf_page_extractor.cpp b/test/src/internal/pdf/pdf_page_extractor.cpp index 99d26064..b72bd528 100644 --- a/test/src/internal/pdf/pdf_page_extractor.cpp +++ b/test/src/internal/pdf/pdf_page_extractor.cpp @@ -299,14 +299,35 @@ TEST(PdfPageExtractor, form_xobject_nested) { EXPECT_EQ(texts[0].codes, "in"); } -// Image XObjects are recognized but not rendered: `Do` is a no-op. +// Image XObjects with no browser-ready bytes (an unhandled codec) are skipped: +// `Do` is a no-op. TEST(PdfPageExtractor, image_xobject_ignored) { XObject image; image.subtype = XObject::Subtype::image; Resources res; res.x_object["Im0"] = ℑ - EXPECT_TRUE(run("/Im0 Do", res).empty()); + EXPECT_TRUE(extract_page("/Im0 Do", res, Logger::null()).empty()); +} + +// A stencil image mask (`/ImageMask true`) is painted in the current fill +// colour at `Do` time, producing a recoloured RGBA PNG `ImageElement`. +TEST(PdfPageExtractor, stencil_image_xobject_recoloured) { + XObject mask; + mask.subtype = XObject::Subtype::image; + mask.stencil_mask = true; + mask.stencil_width = 2; + mask.stencil_height = 1; + mask.stencil_samples = std::string(1, '\x40'); // 1 bpc, bits 0,1 (padded) + Resources res; + res.x_object["Im0"] = &mask; + + const auto elements = extract_page("1 0 0 rg /Im0 Do", res, Logger::null()); + ASSERT_EQ(elements.size(), 1); + const auto *image = std::get_if(&elements[0]); + ASSERT_NE(image, nullptr); + EXPECT_EQ(image->mime, "image/png"); + EXPECT_FALSE(image->data.empty()); } // An unknown XObject name is skipped without throwing. @@ -1032,10 +1053,15 @@ TEST(PdfPageExtractor, inline_image_named_resource_color_space) { EXPECT_EQ(std::get(page[0]).mime, "image/png"); } -// An inline image mask (`/IM true`) is a stencil, deferred to a later stage: -// emitted as nothing for now. -TEST(PdfPageExtractor, inline_image_mask_skipped) { +// An inline image mask (`/IM true`) is a 1-bpc stencil painted in the current +// fill colour, emitted as a recoloured RGBA PNG `ImageElement`. +TEST(PdfPageExtractor, inline_image_mask_recoloured) { const std::string content = - "BI /W 8 /H 1 /IM true /BPC 1 ID " + raw_bytes({0xAA}) + "\nEI"; - EXPECT_TRUE(extract_page(content, Resources{}, Logger::null()).empty()); + "0 0 1 rg BI /W 8 /H 1 /IM true /BPC 1 ID " + raw_bytes({0xAA}) + "\nEI"; + const auto page = extract_page(content, Resources{}, Logger::null()); + ASSERT_EQ(page.size(), 1); + const auto *image = std::get_if(&page[0]); + ASSERT_NE(image, nullptr); + EXPECT_EQ(image->mime, "image/png"); + EXPECT_FALSE(image->data.empty()); }