From 639498a4305d631d1b864c3d61f88a610a5493d3 Mon Sep 17 00:00:00 2001
From: Lucas Smith <me@lucasjamessmith.me>
Date: Thu, 2 Jul 2026 14:21:53 +1000
Subject: [PATCH] fix: handle malformed objects more gracefully

---
 src/parser/document-parser.test.ts        | 110 +++++++++++++++++++++-
 src/parser/document-parser.ts             | 104 ++++++++++++--------
 src/parser/indirect-object-parser.test.ts |  98 ++++++++++++++++++-
 src/parser/indirect-object-parser.ts      |  85 +++++++++++++++--
 src/parser/object-parser.test.ts          |  60 ++++++++++++
 src/parser/object-parser.ts               |   6 ++
 src/parser/object-stream-parser.test.ts   |  31 ++++++
 src/parser/object-stream-parser.ts        |  24 ++++-
 8 files changed, 464 insertions(+), 54 deletions(-)

diff --git a/src/parser/document-parser.test.ts b/src/parser/document-parser.test.ts
index b86d276..605a721 100644
--- a/src/parser/document-parser.test.ts
+++ b/src/parser/document-parser.test.ts
@@ -16,6 +16,7 @@ function createMinimalPdf(options: {
   garbageBeforeHeader?: string;
   objects?: Array<{ objNum: number; content: string }>;
   xrefEntries?: Array<{ objNum: number; offset: number; gen?: number; free?: boolean }>;
+  extraXrefEntries?: Array<{ objNum: number; offset: number; gen?: number; free?: boolean }>;
   trailer?: Record<string, string>;
 }): Uint8Array {
   const parts: string[] = [];
@@ -30,9 +31,12 @@ function createMinimalPdf(options: {
   parts.push(`%PDF-${version}\n`);
   parts.push("%\x80\x81\x82\x83\n"); // Binary marker
 
-  // Track offsets for xref
+  // Track offsets for xref (byte lengths, not string lengths — the binary
+  // marker encodes to multiple UTF-8 bytes per char)
+  const byteLength = (s: string): number => new TextEncoder().encode(s).length;
+
   const offsets: Array<{ objNum: number; offset: number; gen: number; free: boolean }> = [];
-  let currentOffset = parts.join("").length;
+  let currentOffset = byteLength(parts.join(""));
 
   // Objects
   const objects = options.objects ?? [
@@ -44,7 +48,7 @@ function createMinimalPdf(options: {
     offsets.push({ objNum: obj.objNum, offset: currentOffset, gen: 0, free: false });
     const objStr = `${obj.objNum} 0 obj\n${obj.content}\nendobj\n`;
     parts.push(objStr);
-    currentOffset += objStr.length;
+    currentOffset += byteLength(objStr);
   }
 
   // Use provided xref entries or build from objects
@@ -53,6 +57,10 @@ function createMinimalPdf(options: {
     ...offsets,
   ];
 
+  if (options.extraXrefEntries) {
+    xrefEntries.push(...options.extraXrefEntries);
+  }
+
   // XRef table
   const xrefOffset = currentOffset;
   parts.push("xref\n");
@@ -779,6 +787,102 @@ describe("DocumentParser", () => {
     });
   });
 
+  describe("malformed object recovery", () => {
+    it("recovers dict with missing value (lenient default)", () => {
+      const bytes = createMinimalPdf({
+        objects: [
+          { objNum: 1, content: "<< /Type /Catalog /Pages 2 0 R >>" },
+          { objNum: 2, content: "<< /Type /Pages /Kids [] /Count 0 >>" },
+          { objNum: 3, content: "<< /S /GoTo /D >>" },
+        ],
+      });
+      const scanner = new Scanner(bytes);
+      const doc = new DocumentParser(scanner).parse();
+
+      const obj = doc.getObject(PdfRef.of(3, 0));
+
+      expect(obj).toBeInstanceOf(PdfDict);
+      expect((obj as PdfDict).getName("S")?.value).toBe("GoTo");
+      expect((obj as PdfDict).has("D")).toBe(false);
+      expect(doc.warnings.some(w => w.includes("Missing value for key D"))).toBe(true);
+    });
+
+    it("throws on dict with missing value in strict mode", () => {
+      const bytes = createMinimalPdf({
+        objects: [
+          { objNum: 1, content: "<< /Type /Catalog /Pages 2 0 R >>" },
+          { objNum: 2, content: "<< /Type /Pages /Kids [] /Count 0 >>" },
+          { objNum: 3, content: "<< /S /GoTo /D >>" },
+        ],
+      });
+      const scanner = new Scanner(bytes);
+      const doc = new DocumentParser(scanner, { lenient: false }).parse();
+
+      expect(() => doc.getObject(PdfRef.of(3, 0))).toThrow("Missing value for key D");
+    });
+
+    it("recovers stream with wrong /Length via endstream scan", () => {
+      const bytes = createMinimalPdf({
+        objects: [
+          { objNum: 1, content: "<< /Type /Catalog /Pages 2 0 R >>" },
+          { objNum: 2, content: "<< /Type /Pages /Kids [] /Count 0 >>" },
+          { objNum: 3, content: "<< /Length 999 >>\nstream\nHello\nendstream" },
+        ],
+      });
+      const scanner = new Scanner(bytes);
+      const doc = new DocumentParser(scanner).parse();
+
+      const obj = doc.getObject(PdfRef.of(3, 0));
+
+      expect(obj).toBeInstanceOf(PdfStream);
+      expect(new TextDecoder().decode((obj as PdfStream).data)).toBe("Hello");
+      expect(doc.warnings.some(w => w.includes("/Length 999"))).toBe(true);
+    });
+
+    it("returns null for unparseable object instead of throwing (lenient)", () => {
+      const bytes = createMinimalPdf({
+        // Object 3 points at garbage (way past EOF)
+        extraXrefEntries: [{ objNum: 3, offset: 999999 }],
+      });
+      const scanner = new Scanner(bytes);
+      const doc = new DocumentParser(scanner).parse();
+
+      const obj = doc.getObject(PdfRef.of(3, 0));
+
+      expect(obj).toBeNull();
+      expect(doc.warnings.some(w => w.includes("Failed to parse object 3 0"))).toBe(true);
+
+      // Rest of the document still works
+      expect(doc.getCatalog()).not.toBeNull();
+      expect(doc.getPageCount()).toBe(0);
+    });
+
+    it("caches parse failures and does not re-warn on repeated lookups", () => {
+      const bytes = createMinimalPdf({
+        extraXrefEntries: [{ objNum: 3, offset: 999999 }],
+      });
+      const scanner = new Scanner(bytes);
+      const doc = new DocumentParser(scanner).parse();
+
+      expect(doc.getObject(PdfRef.of(3, 0))).toBeNull();
+
+      const warningCount = doc.warnings.length;
+
+      expect(doc.getObject(PdfRef.of(3, 0))).toBeNull();
+      expect(doc.warnings.length).toBe(warningCount);
+    });
+
+    it("throws for unparseable object in strict mode", () => {
+      const bytes = createMinimalPdf({
+        extraXrefEntries: [{ objNum: 3, offset: 999999 }],
+      });
+      const scanner = new Scanner(bytes);
+      const doc = new DocumentParser(scanner, { lenient: false }).parse();
+
+      expect(() => doc.getObject(PdfRef.of(3, 0))).toThrow();
+    });
+  });
+
   describe("error handling", () => {
     it("throws in strict mode for invalid xref", async () => {
       const malformedPdf = new TextEncoder().encode(
diff --git a/src/parser/document-parser.ts b/src/parser/document-parser.ts
index a67111b..9027a4d 100644
--- a/src/parser/document-parser.ts
+++ b/src/parser/document-parser.ts
@@ -429,8 +429,17 @@ export class DocumentParser {
     trailer: PdfDict,
     recoveredViaBruteForce: boolean,
   ): ParsedDocument {
-    // Object cache: "objNum genNum" -> PdfObject
-    const cache = new Map<string, PdfObject>();
+    // Object cache: "objNum genNum" -> PdfObject (null = known-unparseable)
+    const cache = new Map<string, PdfObject | null>();
+
+    const lenient = this.options.lenient ?? true;
+
+    const recoveryOptions = {
+      recoveryMode: lenient,
+      onWarning: (message: string, position: number) => {
+        this.warnings.push(`Object parse warning at offset ${position}: ${message}`);
+      },
+    };
 
     // Object stream cache: streamObjNum -> ObjectStreamParser
     const objectStreamCache = new Map<number, ObjectStreamParser>();
@@ -644,8 +653,7 @@ export class DocumentParser {
 
       // Check cache
       if (cache.has(key)) {
-        // biome-ignore lint/style/noNonNullAssertion: checked with .has(...)
-        return cache.get(key)!;
+        return cache.get(key) ?? null;
       }
 
       // Look up in xref
@@ -657,59 +665,77 @@ export class DocumentParser {
 
       let obj: PdfObject | null = null;
 
-      switch (entry.type) {
-        case "free":
-          return null;
+      try {
+        switch (entry.type) {
+          case "free":
+            return null;
 
-        case "uncompressed": {
-          const parser = new IndirectObjectParser(this.scanner, lengthResolver);
+          case "uncompressed": {
+            const parser = new IndirectObjectParser(this.scanner, lengthResolver, recoveryOptions);
 
-          const result = parser.parseObjectAt(entry.offset);
+            const result = parser.parseObjectAt(entry.offset);
 
-          // Verify generation matches
-          if (result.genNum !== ref.generation) {
-            this.warnings.push(
-              `Generation mismatch for object ${ref.objectNumber}: expected ${ref.generation}, got ${result.genNum}`,
-            );
-          }
+            // Verify generation matches
+            if (result.genNum !== ref.generation) {
+              this.warnings.push(
+                `Generation mismatch for object ${ref.objectNumber}: expected ${ref.generation}, got ${result.genNum}`,
+              );
+            }
+
+            obj = result.value;
 
-          obj = result.value;
+            // Decrypt the object
+            if (securityHandler?.isAuthenticated) {
+              obj = decryptObject(obj, ref.objectNumber, ref.generation);
+            }
 
-          // Decrypt the object
-          if (securityHandler?.isAuthenticated) {
-            obj = decryptObject(obj, ref.objectNumber, ref.generation);
+            break;
           }
 
-          break;
-        }
+          case "compressed": {
+            // Get or create object stream parser
+            let streamParser = objectStreamCache.get(entry.streamObjNum);
 
-        case "compressed": {
-          // Get or create object stream parser
-          let streamParser = objectStreamCache.get(entry.streamObjNum);
+            if (!streamParser) {
+              // Load the object stream
+              const streamRef = PdfRef.of(entry.streamObjNum, 0);
+              const streamObj = getObject(streamRef);
 
-          if (!streamParser) {
-            // Load the object stream
-            const streamRef = PdfRef.of(entry.streamObjNum, 0);
-            const streamObj = getObject(streamRef);
+              if (!streamObj || !(streamObj instanceof PdfStream)) {
+                this.warnings.push(`Object stream ${entry.streamObjNum} not found or invalid`);
 
-            if (!streamObj || !(streamObj instanceof PdfStream)) {
-              this.warnings.push(`Object stream ${entry.streamObjNum} not found or invalid`);
+                return null;
+              }
 
-              return null;
+              streamParser = new ObjectStreamParser(streamObj, recoveryOptions);
+
+              objectStreamCache.set(entry.streamObjNum, streamParser);
             }
 
-            streamParser = new ObjectStreamParser(streamObj);
+            obj = streamParser.getObject(entry.indexInStream);
+
+            // Objects in object streams don't need individual decryption
+            // because the stream itself was decrypted
 
-            objectStreamCache.set(entry.streamObjNum, streamParser);
+            break;
           }
+        }
+      } catch (error) {
+        // Error boundary: a single malformed object must not crash reads
+        // or saves. In lenient mode, record a warning and treat the object
+        // as missing (like pdf.js and PDFBox).
+        if (!lenient) {
+          throw error;
+        }
 
-          obj = streamParser.getObject(entry.indexInStream);
+        const message = error instanceof Error ? error.message : String(error);
 
-          // Objects in object streams don't need individual decryption
-          // because the stream itself was decrypted
+        this.warnings.push(`Failed to parse object ${key}: ${message}`);
 
-          break;
-        }
+        // Cache the failure so repeated lookups don't re-parse and re-warn
+        cache.set(key, null);
+
+        return null;
       }
 
       // Cache the result
diff --git a/src/parser/indirect-object-parser.test.ts b/src/parser/indirect-object-parser.test.ts
index 6b1fc41..f21cd4d 100644
--- a/src/parser/indirect-object-parser.test.ts
+++ b/src/parser/indirect-object-parser.test.ts
@@ -8,7 +8,7 @@ import { PdfStream } from "#src/objects/pdf-stream";
 import { PdfString } from "#src/objects/pdf-string";
 import { describe, expect, it } from "vitest";
 
-import { IndirectObjectParser } from "./indirect-object-parser";
+import { IndirectObjectParser, type IndirectObjectParserOptions } from "./indirect-object-parser";
 
 /**
  * Helper to create an IndirectObjectParser from a string.
@@ -16,11 +16,12 @@ import { IndirectObjectParser } from "./indirect-object-parser";
 function parser(
   input: string,
   lengthResolver?: (ref: PdfRef) => number | null,
+  options?: IndirectObjectParserOptions,
 ): IndirectObjectParser {
   const bytes = new TextEncoder().encode(input);
   const scanner = new Scanner(bytes);
 
-  return new IndirectObjectParser(scanner, lengthResolver);
+  return new IndirectObjectParser(scanner, lengthResolver, options);
 }
 
 describe("IndirectObjectParser", () => {
@@ -317,6 +318,99 @@ endobj`);
       expect(() => p.parseObject()).toThrow(/obj/i);
     });
 
+    it("recovers stream with /Length too small via endstream scan", () => {
+      const warnings: string[] = [];
+      const p = parser(
+        `1 0 obj
+<< /Length 3 >>
+stream
+Hello World
+endstream
+endobj`,
+        undefined,
+        { onWarning: msg => warnings.push(msg) },
+      );
+      const result = p.parseObject();
+
+      const stream = result.value as PdfStream;
+      expect(new TextDecoder().decode(stream.data)).toBe("Hello World");
+      expect(warnings.length).toBe(1);
+      expect(warnings[0]).toContain("/Length 3");
+    });
+
+    it("recovers stream with /Length too large via endstream scan", () => {
+      const warnings: string[] = [];
+      const p = parser(
+        `1 0 obj
+<< /Length 9999 >>
+stream
+Hello
+endstream
+endobj`,
+        undefined,
+        { onWarning: msg => warnings.push(msg) },
+      );
+      const result = p.parseObject();
+
+      const stream = result.value as PdfStream;
+      expect(new TextDecoder().decode(stream.data)).toBe("Hello");
+      expect(warnings.length).toBe(1);
+    });
+
+    it("accepts correct /Length without warnings", () => {
+      const warnings: string[] = [];
+      const p = parser(
+        `1 0 obj
+<< /Length 5 >>
+stream
+Hello
+endstream
+endobj`,
+        undefined,
+        { onWarning: msg => warnings.push(msg) },
+      );
+      const result = p.parseObject();
+
+      const stream = result.value as PdfStream;
+      expect(new TextDecoder().decode(stream.data)).toBe("Hello");
+      expect(warnings.length).toBe(0);
+    });
+
+    it("throws when stream has no endstream at all", () => {
+      const p = parser(`1 0 obj
+<< /Length 9999 >>
+stream
+Hello and then the file just ends`);
+
+      expect(() => p.parseObject()).toThrow(/no endstream found/);
+    });
+
+    it("recovers dict with missing value in recovery mode", () => {
+      const warnings: string[] = [];
+      const p = parser(
+        `1 0 obj
+<< /S /GoTo /D >>
+endobj`,
+        undefined,
+        { recoveryMode: true, onWarning: msg => warnings.push(msg) },
+      );
+      const result = p.parseObject();
+
+      const dict = result.value as PdfDict;
+      expect(dict.getName("S")?.value).toBe("GoTo");
+      expect(dict.has("D")).toBe(false);
+      expect(warnings.length).toBe(1);
+      expect(warnings[0]).toContain("Missing value for key D");
+    });
+
+    it("throws on dict with missing value without recovery mode", () => {
+      const p = parser(`1 0 obj
+<< /S /GoTo /D >>
+endobj`);
+
+      expect(() => p.parseObject()).toThrow("Missing value for key D");
+    });
+
     it("recovers stream with missing /Length via endstream scan", () => {
       const p = parser(`1 0 obj
 << /Type /XObject >>
diff --git a/src/parser/indirect-object-parser.ts b/src/parser/indirect-object-parser.ts
index 411ee41..828572e 100644
--- a/src/parser/indirect-object-parser.ts
+++ b/src/parser/indirect-object-parser.ts
@@ -1,4 +1,4 @@
-import { CR, LF, SPACE, TAB } from "#src/helpers/chars";
+import { CR, isWhitespace, LF } from "#src/helpers/chars";
 import type { Scanner } from "#src/io/scanner";
 import type { PdfDict } from "#src/objects/pdf-dict";
 import type { PdfObject } from "#src/objects/pdf-object";
@@ -6,7 +6,7 @@ import type { PdfRef } from "#src/objects/pdf-ref";
 import { PdfStream } from "#src/objects/pdf-stream";
 
 import { ObjectParseError } from "./errors";
-import { ObjectParser } from "./object-parser";
+import { ObjectParser, type WarningCallback } from "./object-parser";
 import { TokenReader } from "./token-reader";
 
 /**
@@ -24,6 +24,20 @@ export interface IndirectObject {
  */
 export type LengthResolver = (ref: PdfRef) => number | null;
 
+/**
+ * Options for IndirectObjectParser.
+ */
+export interface IndirectObjectParserOptions {
+  /**
+   * Enable recovery mode for lenient parsing of object contents.
+   * Malformed dicts/arrays produce partial results instead of throwing.
+   */
+  recoveryMode?: boolean;
+
+  /** Callback for warnings emitted during lenient parsing. */
+  onWarning?: WarningCallback;
+}
+
 /**
  * Parser for indirect object definitions.
  *
@@ -34,8 +48,13 @@ export class IndirectObjectParser {
   constructor(
     private scanner: Scanner,
     private lengthResolver?: LengthResolver,
+    private options: IndirectObjectParserOptions = {},
   ) {}
 
+  private warn(message: string, position: number): void {
+    this.options.onWarning?.(message, position);
+  }
+
   /**
    * Parse indirect object at current scanner position.
    */
@@ -69,6 +88,10 @@ export class IndirectObjectParser {
 
     // Parse the object value
     const objectParser = new ObjectParser(reader);
+
+    objectParser.recoveryMode = this.options.recoveryMode ?? false;
+    objectParser.onWarning = this.options.onWarning ?? null;
+
     const result = objectParser.parseObject();
 
     if (result === null) {
@@ -134,16 +157,30 @@ export class IndirectObjectParser {
     // Try to resolve /Length from the dict. If that fails (e.g. indirect
     // ref during brute-force recovery with no resolver), fall back to
     // scanning for the "endstream" keyword to determine the length.
-    let length: number;
+    let length = -1;
 
     try {
       length = this.resolveLength(dict);
     } catch {
-      length = this.findEndStream(startPos);
+      length = -1;
+    }
 
-      if (length < 0) {
-        throw new ObjectParseError("Stream missing /Length and no endstream found");
-      }
+    // Validate /Length: the "endstream" keyword must follow the data.
+    // Malformed PDFs often carry a wrong /Length — recover by scanning
+    // for endstream instead of trusting the declared value.
+    if (length >= 0 && !this.isEndstreamAt(startPos + length)) {
+      this.warn(
+        `Stream /Length ${length} does not point at endstream, scanning for actual end`,
+        startPos,
+      );
+
+      length = -1;
+    }
+
+    length = length < 0 ? this.findEndStream(startPos) : length;
+
+    if (length < 0) {
+      throw new ObjectParseError("Stream has no valid /Length and no endstream found");
     }
 
     // Read exactly `length` bytes.
@@ -162,6 +199,38 @@ export class IndirectObjectParser {
     return new PdfStream(dict, data);
   }
 
+  /**
+   * Check whether the "endstream" keyword appears at the given position,
+   * allowing optional whitespace before it.
+   */
+  private isEndstreamAt(pos: number): boolean {
+    const bytes = this.scanner.bytes;
+
+    if (pos > bytes.length) {
+      return false;
+    }
+
+    let i = pos;
+
+    while (i < bytes.length && isWhitespace(bytes[i])) {
+      i++;
+    }
+
+    const keyword = "endstream";
+
+    if (i + keyword.length > bytes.length) {
+      return false;
+    }
+
+    for (let j = 0; j < keyword.length; j++) {
+      if (bytes[i + j] !== keyword.charCodeAt(j)) {
+        return false;
+      }
+    }
+
+    return true;
+  }
+
   /**
    * Expect and consume a keyword at current scanner position.
    */
@@ -190,7 +259,7 @@ export class IndirectObjectParser {
       if (byte === -1) {
         break;
       }
-      if (byte === SPACE || byte === TAB || byte === LF || byte === CR) {
+      if (isWhitespace(byte)) {
         this.scanner.advance();
       } else {
         break;
diff --git a/src/parser/object-parser.test.ts b/src/parser/object-parser.test.ts
index 73051f4..5e5c7b1 100644
--- a/src/parser/object-parser.test.ts
+++ b/src/parser/object-parser.test.ts
@@ -363,6 +363,66 @@ describe("ObjectParser", () => {
       expect(dict.get("Type")).toBe(PdfName.of("Page"));
       expect(warnings.length).toBeGreaterThan(0);
     });
+
+    it("throws on missing value for key in normal mode", () => {
+      const p = parser("<< /S /GoTo /D >>");
+
+      expect(() => p.parseObject()).toThrow("Missing value for key D");
+    });
+
+    it("drops key with missing value in recovery mode", () => {
+      const p = parser("<< /S /GoTo /D >>");
+      const warnings: string[] = [];
+
+      p.recoveryMode = true;
+      p.onWarning = msg => warnings.push(msg);
+
+      const result = p.parseObject();
+      const dict = result!.object as PdfDict;
+
+      expect(dict.get("S")).toBe(PdfName.of("GoTo"));
+      expect(dict.has("D")).toBe(false);
+      expect(warnings.length).toBe(1);
+      expect(warnings[0]).toContain("Missing value for key D");
+    });
+  });
+
+  describe("unknown keywords", () => {
+    it("throws on unknown keyword in normal mode", () => {
+      const p = parser("garbage");
+
+      expect(() => p.parseObject()).toThrow("Unexpected keyword: garbage");
+    });
+
+    it("treats unknown keyword as null in recovery mode", () => {
+      const p = parser("garbage");
+      const warnings: string[] = [];
+
+      p.recoveryMode = true;
+      p.onWarning = msg => warnings.push(msg);
+
+      const result = p.parseObject();
+
+      expect(result!.object).toBe(PdfNull.instance);
+      expect(warnings.length).toBe(1);
+      expect(warnings[0]).toContain("garbage");
+    });
+
+    it("recovers dict with unknown keyword value", () => {
+      const p = parser("<< /Type /Page /Foo bogus /Count 2 >>");
+      const warnings: string[] = [];
+
+      p.recoveryMode = true;
+      p.onWarning = msg => warnings.push(msg);
+
+      const result = p.parseObject();
+      const dict = result!.object as PdfDict;
+
+      expect(dict.get("Type")).toBe(PdfName.of("Page"));
+      expect(dict.get("Foo")).toBe(PdfNull.instance);
+      expect(dict.getNumber("Count")?.value).toBe(2);
+      expect(warnings.length).toBe(1);
+    });
   });
 
   describe("stream detection", () => {
diff --git a/src/parser/object-parser.ts b/src/parser/object-parser.ts
index b15c873..c29fab7 100644
--- a/src/parser/object-parser.ts
+++ b/src/parser/object-parser.ts
@@ -157,6 +157,12 @@ export class ObjectParser {
         return { object: PdfBool.FALSE, hasStream: false };
 
       default:
+        if (this.recoveryMode) {
+          this.warn(`Unexpected keyword: ${value}, treating as null`);
+
+          return { object: PdfNull.instance, hasStream: false };
+        }
+
         throw new ObjectParseError(`Unexpected keyword: ${value}`);
     }
   }
diff --git a/src/parser/object-stream-parser.test.ts b/src/parser/object-stream-parser.test.ts
index 043493f..ab19987 100644
--- a/src/parser/object-stream-parser.test.ts
+++ b/src/parser/object-stream-parser.test.ts
@@ -272,6 +272,37 @@ describe("ObjectStreamParser", () => {
     });
   });
 
+  describe("recovery mode", () => {
+    it("throws on malformed object without recovery mode", () => {
+      const stream = createObjectStream([{ objNum: 1, text: "<< /S /GoTo /D >>" }]);
+
+      const parser = new ObjectStreamParser(stream);
+
+      expect(() => parser.getObject(0)).toThrow("Missing value for key D");
+    });
+
+    it("recovers malformed object with recovery mode", () => {
+      const warnings: string[] = [];
+      const stream = createObjectStream([
+        { objNum: 1, text: "<< /S /GoTo /D >>" },
+        { objNum: 2, text: "<< /Type /Page >>" },
+      ]);
+
+      const parser = new ObjectStreamParser(stream, {
+        recoveryMode: true,
+        onWarning: msg => warnings.push(msg),
+      });
+
+      const first = parser.getObject(0) as PdfDict;
+      expect(first.getName("S")?.value).toBe("GoTo");
+      expect(first.has("D")).toBe(false);
+      expect(warnings.length).toBe(1);
+
+      const second = parser.getObject(1) as PdfDict;
+      expect(second.getName("Type")?.value).toBe("Page");
+    });
+  });
+
   describe("edge cases", () => {
     it("handles empty stream (n=0)", async () => {
       const stream = new PdfStream(
diff --git a/src/parser/object-stream-parser.ts b/src/parser/object-stream-parser.ts
index 4e30396..e5897cc 100644
--- a/src/parser/object-stream-parser.ts
+++ b/src/parser/object-stream-parser.ts
@@ -3,7 +3,7 @@ import type { PdfObject } from "#src/objects/pdf-object";
 import type { PdfStream } from "#src/objects/pdf-stream";
 
 import { ObjectParseError } from "./errors";
-import { ObjectParser } from "./object-parser";
+import { ObjectParser, type WarningCallback } from "./object-parser";
 import { TokenReader } from "./token-reader";
 
 /**
@@ -16,6 +16,20 @@ interface ObjectStreamEntry {
   offset: number;
 }
 
+/**
+ * Options for ObjectStreamParser.
+ */
+export interface ObjectStreamParserOptions {
+  /**
+   * Enable recovery mode for lenient parsing of contained objects.
+   * Malformed objects produce partial results instead of throwing.
+   */
+  recoveryMode?: boolean;
+
+  /** Callback for warnings emitted during lenient parsing. */
+  onWarning?: WarningCallback;
+}
+
 /**
  * Parser for PDF object streams (/Type /ObjStm).
  *
@@ -43,7 +57,10 @@ export class ObjectStreamParser {
   private readonly first: number;
   private readonly n: number;
 
-  constructor(private stream: PdfStream) {
+  constructor(
+    private stream: PdfStream,
+    private options: ObjectStreamParserOptions = {},
+  ) {
     // Validate stream type
     const type = stream.getName("Type");
 
@@ -156,6 +173,9 @@ export class ObjectStreamParser {
     const reader = new TokenReader(scanner);
     const parser = new ObjectParser(reader);
 
+    parser.recoveryMode = this.options.recoveryMode ?? false;
+    parser.onWarning = this.options.onWarning ?? null;
+
     const result = parser.parseObject();
 
     return result?.object ?? null;