diff --git a/.github/workflows/_publish-code.yml b/.github/workflows/_publish-code.yml index cb7b728e..9ccb1a0d 100644 --- a/.github/workflows/_publish-code.yml +++ b/.github/workflows/_publish-code.yml @@ -11,7 +11,7 @@ jobs: steps: - name: Check out Git repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Install Dependencies run: sudo apt-get install pcregrep diff --git a/.github/workflows/_publish-docs.yml b/.github/workflows/_publish-docs.yml index 6617f7c4..81d92d1a 100644 --- a/.github/workflows/_publish-docs.yml +++ b/.github/workflows/_publish-docs.yml @@ -11,7 +11,7 @@ jobs: steps: - name: Check out Git repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Set up Node.js uses: actions/setup-node@v6 diff --git a/.github/workflows/_static-analysis.yml b/.github/workflows/_static-analysis.yml index 9dce9e20..3aea4f43 100644 --- a/.github/workflows/_static-analysis.yml +++ b/.github/workflows/_static-analysis.yml @@ -10,7 +10,7 @@ jobs: steps: - name: Check out Git repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Set up Node.js uses: actions/setup-node@v6 diff --git a/.github/workflows/_test-integrations.yml b/.github/workflows/_test-integrations.yml index bceb234a..855d5ce1 100644 --- a/.github/workflows/_test-integrations.yml +++ b/.github/workflows/_test-integrations.yml @@ -33,7 +33,7 @@ jobs: steps: - name: Check out Git repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: submodules: recursive @@ -84,7 +84,7 @@ jobs: steps: - name: Check out Git repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: submodules: recursive diff --git a/.github/workflows/_test-smoke.yml b/.github/workflows/_test-smoke.yml index b847bda5..b87164d4 100644 --- a/.github/workflows/_test-smoke.yml +++ b/.github/workflows/_test-smoke.yml @@ -27,7 +27,7 @@ jobs: steps: - name: Check out Git repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: submodules: recursive diff --git a/.github/workflows/_test-units.yml b/.github/workflows/_test-units.yml index aafbf97b..f93c6a4f 100644 --- a/.github/workflows/_test-units.yml +++ b/.github/workflows/_test-units.yml @@ -18,7 +18,7 @@ jobs: steps: - name: Check out Git repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: submodules: recursive @@ -56,7 +56,7 @@ jobs: steps: - name: Check out Git repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: submodules: recursive diff --git a/src/v2/fileOperations/cropFiles.ts b/src/image/extractedImages.ts similarity index 66% rename from src/v2/fileOperations/cropFiles.ts rename to src/image/extractedImages.ts index 61d3ff22..2839523f 100644 --- a/src/v2/fileOperations/cropFiles.ts +++ b/src/image/extractedImages.ts @@ -1,6 +1,6 @@ import { ExtractedImage } from "@/image/index.js"; -export class CropFiles extends Array { +export class ExtractedImages extends Array { constructor(...items: ExtractedImage[]) { super(...items); } diff --git a/src/image/index.ts b/src/image/index.ts index 5ace9a21..2b1a8dde 100644 --- a/src/image/index.ts +++ b/src/image/index.ts @@ -1,3 +1,4 @@ export { compressImage } from "./imageCompressor.js"; export { ExtractedImage } from "./extractedImage.js"; +export { ExtractedImages } from "./extractedImages.js"; export { extractFromPage } from "./imageExtractor.js"; diff --git a/src/pdf/extractedPdfs.ts b/src/pdf/extractedPdfs.ts new file mode 100644 index 00000000..19771b0f --- /dev/null +++ b/src/pdf/extractedPdfs.ts @@ -0,0 +1,7 @@ +import { ExtractedPdf } from "@/pdf/extractedPdf.js"; + +export class ExtractedPdfs extends Array { + constructor(...items: ExtractedPdf[]) { + super(...items); + } +} diff --git a/src/pdf/index.ts b/src/pdf/index.ts index 752c5ffd..182ce9d9 100644 --- a/src/pdf/index.ts +++ b/src/pdf/index.ts @@ -2,3 +2,5 @@ export { extractPages, countPages } from "./pdfOperation.js"; export type { SplitPdf } from "./pdfOperation.js"; export { compressPdf } from "./pdfCompressor.js"; export { hasSourceText } from "./pdfUtils.js"; +export { ExtractedPdf } from "./extractedPdf.js"; +export { ExtractedPdfs } from "./extractedPdfs.js"; diff --git a/src/v2/fileOperations/crop.ts b/src/v2/fileOperations/crop.ts index 764a0947..f178d431 100644 --- a/src/v2/fileOperations/crop.ts +++ b/src/v2/fileOperations/crop.ts @@ -3,8 +3,7 @@ import { CropItem } from "@/v2/product/crop/index.js"; import { MindeeError } from "@/errors/index.js"; import { extractImagesFromPolygon } from "@/image/imageExtractor.js"; import { Polygon } from "@/geometry/index.js"; -import { CropFiles } from "@/v2/fileOperations/cropFiles.js"; -import { ExtractedImage } from "@/image/index.js"; +import { ExtractedImage, ExtractedImages } from "@/image/index.js"; import { logger } from "@/logger.js"; @@ -12,9 +11,12 @@ import { logger } from "@/logger.js"; * Extracts a single specified crop from a given input source. * @param inputSource Local input source. * @param crop Crop to extract. + * @param quality JPEG quality of extracted image. */ -export async function extractSingleCrop(inputSource: LocalInputSource, crop: CropItem): Promise { - return (await extractCrops(inputSource, [crop]))[0]; +export async function extractSingleCrop( + inputSource: LocalInputSource, crop: CropItem, quality?: number +): Promise { + return (await extractMultipleCrops(inputSource, [crop], quality))[0]; } @@ -25,11 +27,11 @@ export async function extractSingleCrop(inputSource: LocalInputSource, crop: Cro * @param quality JPEG quality of extracted images. * @return a list of extracted files, as a CropFiles object. */ -export async function extractCrops( +export async function extractMultipleCrops( inputSource: LocalInputSource, crops: CropItem[], quality?: number , -): Promise { +): Promise { if (crops.length === 0) { throw new MindeeError("No crop indexes provided."); } @@ -44,5 +46,5 @@ export async function extractCrops( polygonsByPage.get(pageId)!.push(crop.location.polygon); } const extractedCrops = await extractImagesFromPolygon(inputSource, polygonsByPage, quality); - return new CropFiles(...extractedCrops); + return new ExtractedImages(...extractedCrops); } diff --git a/src/v2/fileOperations/split.ts b/src/v2/fileOperations/split.ts index 2370befc..da4a3fd9 100644 --- a/src/v2/fileOperations/split.ts +++ b/src/v2/fileOperations/split.ts @@ -1,9 +1,9 @@ import { LocalInputSource } from "@/input/index.js"; import { MindeeError } from "@/errors/index.js"; import { PdfExtractor } from "@/pdf/pdfExtractor.js"; -import { SplitFiles } from "@/v2/fileOperations/splitFiles.js"; import { logger } from "@/logger.js"; import { ExtractedPdf } from "@/pdf/extractedPdf.js"; +import { ExtractedPdfs } from "@/pdf/extractedPdfs.js"; /** * Extracts a single specified split from a @@ -11,7 +11,7 @@ import { ExtractedPdf } from "@/pdf/extractedPdf.js"; * @param split */ export async function extractSingleSplit(inputSource: LocalInputSource, split: number[]) { - return await extractSplits(inputSource, [split]); + return await extractMultipleSplits(inputSource, [split]); } /** @@ -21,7 +21,7 @@ export async function extractSingleSplit(inputSource: LocalInputSource, split: n * @return a list of extracted files. * @throws MindeeError if no indexes are provided. */ -export async function extractSplits(inputSource: LocalInputSource, splits: number[][]): Promise { +export async function extractMultipleSplits(inputSource: LocalInputSource, splits: number[][]): Promise { const pageGroups = splits.filter(e => e.length > 0); if (pageGroups.length === 0) { throw new MindeeError("No valid split indexes provided."); @@ -32,14 +32,14 @@ export async function extractSplits(inputSource: LocalInputSource, splits: numbe await pdfExtractor.init(); if (splits.length === 0) { - return new SplitFiles(); + return new ExtractedPdfs(); } const pageCount = await pdfExtractor.getPageCount(); if (splits.length === 1 && splits[0].at(-1) === pageCount-1) { - return new SplitFiles(new ExtractedPdf(inputSource.fileObject as Buffer, inputSource.filename, pageCount)); + return new ExtractedPdfs(new ExtractedPdf(inputSource.fileObject as Buffer, inputSource.filename, pageCount)); } const subDocuments = await pdfExtractor.extractSubDocuments(pageGroups); - return new SplitFiles(...subDocuments); + return new ExtractedPdfs(...subDocuments); } /** diff --git a/src/v2/fileOperations/splitFiles.ts b/src/v2/fileOperations/splitFiles.ts deleted file mode 100644 index 14dcb755..00000000 --- a/src/v2/fileOperations/splitFiles.ts +++ /dev/null @@ -1,8 +0,0 @@ -import { ExtractedPdf } from "@/pdf/extractedPdf.js"; - -export class SplitFiles extends Array { - - constructor(...args: ExtractedPdf[]) { - super(...args); - } -} diff --git a/src/v2/product/crop/cropItem.ts b/src/v2/product/crop/cropItem.ts index e0b06d1c..cce46560 100644 --- a/src/v2/product/crop/cropItem.ts +++ b/src/v2/product/crop/cropItem.ts @@ -1,7 +1,7 @@ import { FieldLocation } from "@/v2/parsing/inference/field/index.js"; import { StringDict } from "@/parsing/index.js"; import { LocalInputSource } from "@/input/index.js"; -import { extractCrops } from "@/v2/fileOperations/crop.js"; +import { extractSingleCrop } from "@/v2/fileOperations/crop.js"; import { ExtractedImage } from "@/image/index.js"; import { ExtractionResponse } from "@/v2/product/index.js"; @@ -36,7 +36,7 @@ export class CropItem { * @param inputSource The input file to extract from. * @param quality Optional quality parameter for image extraction, default is undefined (full quality). */ - async extractFromFile(inputSource: LocalInputSource, quality: number = 1): Promise{ - return (await extractCrops(inputSource, [this], quality))[0]; + async extractFromInputSource(inputSource: LocalInputSource, quality: number = 1): Promise{ + return (await extractSingleCrop(inputSource, this, quality)); } } diff --git a/src/v2/product/crop/cropResponse.ts b/src/v2/product/crop/cropResponse.ts index 0f63e316..e9f7d2a3 100644 --- a/src/v2/product/crop/cropResponse.ts +++ b/src/v2/product/crop/cropResponse.ts @@ -1,7 +1,4 @@ -import { LocalInputSource } from "@/input/index.js"; import { StringDict } from "@/parsing/stringDict.js"; -import { extractCrops } from "@/v2/fileOperations/crop.js"; -import { CropFiles } from "@/v2/fileOperations/cropFiles.js"; import { BaseResponse } from "@/v2/parsing/index.js"; import { CropInference } from "./cropInference.js"; @@ -18,13 +15,4 @@ export class CropResponse extends BaseResponse { super(serverResponse); this.inference = new CropInference(serverResponse["inference"]); } - - /** - * Extracts all crops from an input. - * @param inputSource The input file to extract from. - * @param quality Optional quality parameter for image extraction, default is undefined (full quality). - */ - async extractFromFile(inputSource: LocalInputSource, quality: number = 1): Promise { - return await extractCrops(inputSource, this.inference.result.crops, quality); - } } diff --git a/src/v2/product/crop/cropResult.ts b/src/v2/product/crop/cropResult.ts index 9a0c527a..d6d1957d 100644 --- a/src/v2/product/crop/cropResult.ts +++ b/src/v2/product/crop/cropResult.ts @@ -1,5 +1,8 @@ import { StringDict } from "@/parsing/stringDict.js"; import { CropItem } from "@/v2/product/crop/cropItem.js"; +import { LocalInputSource } from "@/input/index.js"; +import { extractMultipleCrops } from "@/v2/fileOperations/crop.js"; +import { ExtractedImages } from "@/image/extractedImages.js"; export class CropResult { /** @@ -15,4 +18,13 @@ export class CropResult { const crops = this.crops.map(item => item.toString()).join("\n"); return `Crops\n=====\n${crops}`; } + + /** + * Extracts a single crop from an input. + * @param inputSource The input file to extract from. + * @param quality Optional quality parameter for image extraction, default is undefined (full quality). + */ + async extractFromInputSource(inputSource: LocalInputSource, quality: number = 1): Promise{ + return (await extractMultipleCrops(inputSource, this.crops, quality)); + } } diff --git a/src/v2/product/split/splitRange.ts b/src/v2/product/split/splitRange.ts index 27018cdf..30bb9209 100644 --- a/src/v2/product/split/splitRange.ts +++ b/src/v2/product/split/splitRange.ts @@ -1,7 +1,8 @@ import { StringDict } from "@/parsing/index.js"; import { LocalInputSource } from "@/input/index.js"; -import { expandRange, extractSplits } from "@/v2/fileOperations/split.js"; +import { expandRange, extractMultipleSplits } from "@/v2/fileOperations/split.js"; import { ExtractionResponse } from "@/v2/product/index.js"; +import { ExtractedPdf } from "@/pdf/index.js"; /** * Split inference result. @@ -38,8 +39,8 @@ export class SplitRange { * Extracts a single split from the input file. * @param inputSource The input file to extract from. */ - async extractFromFile(inputSource: LocalInputSource) { + async extractFromFile(inputSource: LocalInputSource): Promise { const pageRange = [expandRange(this.pageRange as [number, number])]; - return (await extractSplits(inputSource, pageRange))[0]; + return (await extractMultipleSplits(inputSource, pageRange))[0]; } } diff --git a/src/v2/product/split/splitResponse.ts b/src/v2/product/split/splitResponse.ts index 123ab286..891cb30d 100644 --- a/src/v2/product/split/splitResponse.ts +++ b/src/v2/product/split/splitResponse.ts @@ -1,9 +1,6 @@ import { StringDict } from "@/parsing/stringDict.js"; import { SplitInference } from "./splitInference.js"; import { BaseResponse } from "@/v2/parsing/index.js"; -import { LocalInputSource } from "@/input/index.js"; -import { expandRange, extractSplits } from "@/v2/fileOperations/split.js"; -import { SplitFiles } from "@/v2/fileOperations/splitFiles.js"; export class SplitResponse extends BaseResponse { /** @@ -18,16 +15,4 @@ export class SplitResponse extends BaseResponse { super(serverResponse); this.inference = new SplitInference(serverResponse["inference"]); } - - /** - * Extracts all splits from an input PDF. - * @param inputSource The input file to extract from. - */ - async extractFromFile(inputSource: LocalInputSource): Promise{ - const splits: number[][] = []; - for (const split of this.inference.result.splits) { - splits.push(expandRange(split.pageRange as [number, number])); - } - return await extractSplits(inputSource, splits); - } } diff --git a/src/v2/product/split/splitResult.ts b/src/v2/product/split/splitResult.ts index af3b4ec7..08411d8a 100644 --- a/src/v2/product/split/splitResult.ts +++ b/src/v2/product/split/splitResult.ts @@ -1,5 +1,8 @@ import { SplitRange } from "./splitRange.js"; import { StringDict } from "@/parsing/index.js"; +import { LocalInputSource } from "@/input/index.js"; +import { ExtractedPdfs } from "@/pdf/index.js"; +import { extractMultipleSplits, expandRange } from "@/v2/fileOperations/split.js"; /** * Split result info. @@ -14,6 +17,18 @@ export class SplitResult { this.splits = rawResponse.splits.map((split: StringDict) => new SplitRange(split)); } + /** + * Extracts all splits from an input PDF. + * @param inputSource The input file to extract from. + */ + async extractFromInputSource(inputSource: LocalInputSource): Promise{ + const splits: number[][] = []; + for (const split of this.splits) { + splits.push(expandRange(split.pageRange as [number, number])); + } + return await extractMultipleSplits(inputSource, splits); + } + toString(): string { let splits = "\n"; if (this.splits.length > 0) { diff --git a/tests/data b/tests/data index 13093f3a..2d7fcf8f 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit 13093f3a48de212ef26889df71199c1a2a9d1478 +Subproject commit 2d7fcf8f591f6d7f40e39862965325e6a8a21874 diff --git a/tests/v2/fileOperations/crop.spec.ts b/tests/v2/fileOperations/crop.spec.ts index eaa84fb4..819a63bd 100644 --- a/tests/v2/fileOperations/crop.spec.ts +++ b/tests/v2/fileOperations/crop.spec.ts @@ -1,7 +1,6 @@ import { loadOptionalDependency } from "@/dependency/index.js"; import { ExtractedImage } from "@/image/index.js"; import { PathInput } from "@/index.js"; -import { extractCrops } from "@/v2/fileOperations/crop.js"; import { LocalResponse } from "@/v2/parsing/index.js"; import { CropResponse } from "@/v2/product/crop/cropResponse.js"; @@ -58,7 +57,7 @@ describe("MindeeV2 - FileOperation - Crop #OptionalDepsRequired", async () => { path.join(cropPath, "default_sample.json") ); - const extractedCrops = await response.extractFromFile(inputSample); + const extractedCrops = await response.inference.result.extractFromInputSource(inputSample); assert.strictEqual(extractedCrops.length, 2); @@ -66,7 +65,8 @@ describe("MindeeV2 - FileOperation - Crop #OptionalDepsRequired", async () => { const dimensions = await getFileDimensions(extractedCrops[0].buffer, sharp); assert.strictEqual(Math.round(dimensions.width), 2201); assert.strictEqual(Math.round(dimensions.height), 4314); - const localExtract: ExtractedImage = await response.inference.result.crops[0].extractFromFile(inputSample); + const cropItem = response.inference.result.crops[0]; + const localExtract: ExtractedImage = await cropItem.extractFromInputSource(inputSample); assert.ok(localExtract.buffer.equals(extractedCrops[0].buffer)); }); @@ -79,7 +79,7 @@ describe("MindeeV2 - FileOperation - Crop #OptionalDepsRequired", async () => { path.join(cropPath, "default_sample.json") ); - const extractedCrops = await response.extractFromFile(inputSample, 0.5); + const extractedCrops = await response.inference.result.extractFromInputSource(inputSample, 0.5); assert.strictEqual(extractedCrops.length, 2); @@ -87,7 +87,8 @@ describe("MindeeV2 - FileOperation - Crop #OptionalDepsRequired", async () => { const dimensions = await getFileDimensions(extractedCrops[0].buffer, sharp); assert.strictEqual(Math.round(dimensions.width), Math.round(2201 * 0.5)); assert.strictEqual(Math.round(dimensions.height), Math.round(4314 * 0.5)); - const localExtract: ExtractedImage = await response.inference.result.crops[0].extractFromFile(inputSample, 0.5); + const cropItem = response.inference.result.crops[0]; + const localExtract: ExtractedImage = await cropItem.extractFromInputSource(inputSample, 0.5); assert.ok(localExtract.buffer.equals(extractedCrops[0].buffer)); }); @@ -100,10 +101,7 @@ describe("MindeeV2 - FileOperation - Crop #OptionalDepsRequired", async () => { path.join(cropPath, "crop_multiple.json") ); - const extractedCrops = await extractCrops( - inputSample, - response.inference.result.crops - ); + const extractedCrops = await response.inference.result.extractFromInputSource(inputSample); assert.strictEqual(extractedCrops.length, 2); diff --git a/tests/v2/fileOperations/split.spec.ts b/tests/v2/fileOperations/split.spec.ts index fdedf699..fd5baf22 100644 --- a/tests/v2/fileOperations/split.spec.ts +++ b/tests/v2/fileOperations/split.spec.ts @@ -1,8 +1,5 @@ import { PathInput } from "@/index.js"; -import { ExtractedPdf } from "@/pdf/extractedPdf.js"; -import { extractSplits } from "@/v2/fileOperations/split.js"; -import { SplitFiles } from "@/v2/fileOperations/splitFiles.js"; - +import { extractMultipleSplits } from "@/v2/fileOperations/split.js"; import { LocalResponse } from "@/v2/parsing/index.js"; import { SplitResponse } from "@/v2/product/split/splitResponse.js"; import assert from "node:assert/strict"; @@ -29,7 +26,7 @@ describe("MindeeV2 - Product - SplitResponse #OptionalDepsRequired", async () => path.join(splitPath, "split_single.json") ); - const extractedSplits = await response.extractFromFile(inputSample); + const extractedSplits = await response.inference.result.extractFromInputSource(inputSample); assert.strictEqual(extractedSplits.length, 1); @@ -49,7 +46,7 @@ describe("MindeeV2 - Product - SplitResponse #OptionalDepsRequired", async () => path.join(splitPath, "split_multiple.json") ); - const extractedSplits = await response.extractFromFile(inputSample); + const extractedSplits = await response.inference.result.extractFromInputSource(inputSample); assert.strictEqual(extractedSplits.length, 3); @@ -67,7 +64,7 @@ describe("MindeeV2 - Product - SplitResponse #OptionalDepsRequired", async () => const bufferInput2 = extractedSplits[2].asSource(); const count2 = await bufferInput2.getPageCount(); assert.strictEqual(count2, 1); - const localExtract: ExtractedPdf = await response.inference.result.splits[0].extractFromFile(inputSample); + const localExtract = await response.inference.result.splits[0].extractFromFile(inputSample); assert.ok(extractedSplits[0].buffer.equals(localExtract.buffer)); }); @@ -75,7 +72,7 @@ describe("MindeeV2 - Product - SplitResponse #OptionalDepsRequired", async () => const inputSample = new PathInput({ inputPath: path.join(splitPath, "invoice_5p.pdf") }); - const splitFiles: SplitFiles = await extractSplits(inputSample, [[0, 1, 2, 3, 4]]); + const splitFiles = await extractMultipleSplits(inputSample, [[0, 1, 2, 3, 4]]); assert(splitFiles.length === 1); assert(splitFiles[0].pageCount === 5); assert(splitFiles[0].buffer === inputSample.fileObject);