Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/_publish-code.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:

steps:
- name: Check out Git repository
uses: actions/checkout@v4
uses: actions/checkout@v6

- name: Install Dependencies
run: sudo apt-get install pcregrep
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/_publish-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:

steps:
- name: Check out Git repository
uses: actions/checkout@v4
uses: actions/checkout@v6

- name: Set up Node.js
uses: actions/setup-node@v6
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/_static-analysis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:

steps:
- name: Check out Git repository
uses: actions/checkout@v4
uses: actions/checkout@v6

- name: Set up Node.js
uses: actions/setup-node@v6
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/_test-integrations.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:

steps:
- name: Check out Git repository
uses: actions/checkout@v4
uses: actions/checkout@v6
with:
submodules: recursive

Expand Down Expand Up @@ -84,7 +84,7 @@ jobs:

steps:
- name: Check out Git repository
uses: actions/checkout@v4
uses: actions/checkout@v6
with:
submodules: recursive

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/_test-smoke.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:

steps:
- name: Check out Git repository
uses: actions/checkout@v4
uses: actions/checkout@v6
with:
submodules: recursive

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/_test-units.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:

steps:
- name: Check out Git repository
uses: actions/checkout@v4
uses: actions/checkout@v6
with:
submodules: recursive

Expand Down Expand Up @@ -56,7 +56,7 @@ jobs:

steps:
- name: Check out Git repository
uses: actions/checkout@v4
uses: actions/checkout@v6
with:
submodules: recursive

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { ExtractedImage } from "@/image/index.js";

export class CropFiles extends Array<ExtractedImage> {
export class ExtractedImages extends Array<ExtractedImage> {
constructor(...items: ExtractedImage[]) {
super(...items);
}
Expand Down
1 change: 1 addition & 0 deletions src/image/index.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
export { compressImage } from "./imageCompressor.js";
export { ExtractedImage } from "./extractedImage.js";
export { ExtractedImages } from "./extractedImages.js";
export { extractFromPage } from "./imageExtractor.js";
7 changes: 7 additions & 0 deletions src/pdf/extractedPdfs.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import { ExtractedPdf } from "@/pdf/extractedPdf.js";

export class ExtractedPdfs extends Array<ExtractedPdf> {
constructor(...items: ExtractedPdf[]) {
super(...items);
}
}
2 changes: 2 additions & 0 deletions src/pdf/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@ export { extractPages, countPages } from "./pdfOperation.js";
export type { SplitPdf } from "./pdfOperation.js";
export { compressPdf } from "./pdfCompressor.js";
export { hasSourceText } from "./pdfUtils.js";
export { ExtractedPdf } from "./extractedPdf.js";
export { ExtractedPdfs } from "./extractedPdfs.js";
16 changes: 9 additions & 7 deletions src/v2/fileOperations/crop.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,20 @@ import { CropItem } from "@/v2/product/crop/index.js";
import { MindeeError } from "@/errors/index.js";
import { extractImagesFromPolygon } from "@/image/imageExtractor.js";
import { Polygon } from "@/geometry/index.js";
import { CropFiles } from "@/v2/fileOperations/cropFiles.js";
import { ExtractedImage } from "@/image/index.js";
import { ExtractedImage, ExtractedImages } from "@/image/index.js";
import { logger } from "@/logger.js";


/**
* Extracts a single specified crop from a given input source.
* @param inputSource Local input source.
* @param crop Crop to extract.
* @param quality JPEG quality of extracted image.
*/
export async function extractSingleCrop(inputSource: LocalInputSource, crop: CropItem): Promise<ExtractedImage> {
return (await extractCrops(inputSource, [crop]))[0];
export async function extractSingleCrop(
inputSource: LocalInputSource, crop: CropItem, quality?: number
): Promise<ExtractedImage> {
return (await extractMultipleCrops(inputSource, [crop], quality))[0];
}


Expand All @@ -25,11 +27,11 @@ export async function extractSingleCrop(inputSource: LocalInputSource, crop: Cro
* @param quality JPEG quality of extracted images.
* @return a list of extracted files, as a CropFiles object.
*/
export async function extractCrops(
export async function extractMultipleCrops(
inputSource: LocalInputSource,
crops: CropItem[],
quality?: number ,
): Promise<CropFiles> {
): Promise<ExtractedImages> {
if (crops.length === 0) {
throw new MindeeError("No crop indexes provided.");
}
Expand All @@ -44,5 +46,5 @@ export async function extractCrops(
polygonsByPage.get(pageId)!.push(crop.location.polygon);
}
const extractedCrops = await extractImagesFromPolygon(inputSource, polygonsByPage, quality);
return new CropFiles(...extractedCrops);
return new ExtractedImages(...extractedCrops);
}
12 changes: 6 additions & 6 deletions src/v2/fileOperations/split.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
import { LocalInputSource } from "@/input/index.js";
import { MindeeError } from "@/errors/index.js";
import { PdfExtractor } from "@/pdf/pdfExtractor.js";
import { SplitFiles } from "@/v2/fileOperations/splitFiles.js";
import { logger } from "@/logger.js";
import { ExtractedPdf } from "@/pdf/extractedPdf.js";
import { ExtractedPdfs } from "@/pdf/extractedPdfs.js";

/**
* Extracts a single specified split from a
* @param inputSource
* @param split
*/
export async function extractSingleSplit(inputSource: LocalInputSource, split: number[]) {
return await extractSplits(inputSource, [split]);
return await extractMultipleSplits(inputSource, [split]);
}

/**
Expand All @@ -21,7 +21,7 @@ export async function extractSingleSplit(inputSource: LocalInputSource, split: n
* @return a list of extracted files.
* @throws MindeeError if no indexes are provided.
*/
export async function extractSplits(inputSource: LocalInputSource, splits: number[][]): Promise<SplitFiles> {
export async function extractMultipleSplits(inputSource: LocalInputSource, splits: number[][]): Promise<ExtractedPdfs> {
const pageGroups = splits.filter(e => e.length > 0);
if (pageGroups.length === 0) {
throw new MindeeError("No valid split indexes provided.");
Expand All @@ -32,14 +32,14 @@ export async function extractSplits(inputSource: LocalInputSource, splits: numbe
await pdfExtractor.init();

if (splits.length === 0) {
return new SplitFiles();
return new ExtractedPdfs();
}
const pageCount = await pdfExtractor.getPageCount();
if (splits.length === 1 && splits[0].at(-1) === pageCount-1) {
return new SplitFiles(new ExtractedPdf(inputSource.fileObject as Buffer, inputSource.filename, pageCount));
return new ExtractedPdfs(new ExtractedPdf(inputSource.fileObject as Buffer, inputSource.filename, pageCount));
}
const subDocuments = await pdfExtractor.extractSubDocuments(pageGroups);
return new SplitFiles(...subDocuments);
return new ExtractedPdfs(...subDocuments);
}

/**
Expand Down
8 changes: 0 additions & 8 deletions src/v2/fileOperations/splitFiles.ts

This file was deleted.

6 changes: 3 additions & 3 deletions src/v2/product/crop/cropItem.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { FieldLocation } from "@/v2/parsing/inference/field/index.js";
import { StringDict } from "@/parsing/index.js";
import { LocalInputSource } from "@/input/index.js";
import { extractCrops } from "@/v2/fileOperations/crop.js";
import { extractSingleCrop } from "@/v2/fileOperations/crop.js";
import { ExtractedImage } from "@/image/index.js";
import { ExtractionResponse } from "@/v2/product/index.js";

Expand Down Expand Up @@ -36,7 +36,7 @@ export class CropItem {
* @param inputSource The input file to extract from.
* @param quality Optional quality parameter for image extraction, default is undefined (full quality).
*/
async extractFromFile(inputSource: LocalInputSource, quality: number = 1): Promise<ExtractedImage>{
return (await extractCrops(inputSource, [this], quality))[0];
async extractFromInputSource(inputSource: LocalInputSource, quality: number = 1): Promise<ExtractedImage>{
return (await extractSingleCrop(inputSource, this, quality));
}
}
12 changes: 0 additions & 12 deletions src/v2/product/crop/cropResponse.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
import { LocalInputSource } from "@/input/index.js";
import { StringDict } from "@/parsing/stringDict.js";
import { extractCrops } from "@/v2/fileOperations/crop.js";
import { CropFiles } from "@/v2/fileOperations/cropFiles.js";
import { BaseResponse } from "@/v2/parsing/index.js";
import { CropInference } from "./cropInference.js";

Expand All @@ -18,13 +15,4 @@ export class CropResponse extends BaseResponse {
super(serverResponse);
this.inference = new CropInference(serverResponse["inference"]);
}

/**
* Extracts all crops from an input.
* @param inputSource The input file to extract from.
* @param quality Optional quality parameter for image extraction, default is undefined (full quality).
*/
async extractFromFile(inputSource: LocalInputSource, quality: number = 1): Promise<CropFiles> {
return await extractCrops(inputSource, this.inference.result.crops, quality);
}
}
12 changes: 12 additions & 0 deletions src/v2/product/crop/cropResult.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import { StringDict } from "@/parsing/stringDict.js";
import { CropItem } from "@/v2/product/crop/cropItem.js";
import { LocalInputSource } from "@/input/index.js";
import { extractMultipleCrops } from "@/v2/fileOperations/crop.js";
import { ExtractedImages } from "@/image/extractedImages.js";

export class CropResult {
/**
Expand All @@ -15,4 +18,13 @@ export class CropResult {
const crops = this.crops.map(item => item.toString()).join("\n");
return `Crops\n=====\n${crops}`;
}

/**
* Extracts a single crop from an input.
* @param inputSource The input file to extract from.
* @param quality Optional quality parameter for image extraction, default is undefined (full quality).
*/
async extractFromInputSource(inputSource: LocalInputSource, quality: number = 1): Promise<ExtractedImages>{
return (await extractMultipleCrops(inputSource, this.crops, quality));
}
}
7 changes: 4 additions & 3 deletions src/v2/product/split/splitRange.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import { StringDict } from "@/parsing/index.js";
import { LocalInputSource } from "@/input/index.js";
import { expandRange, extractSplits } from "@/v2/fileOperations/split.js";
import { expandRange, extractMultipleSplits } from "@/v2/fileOperations/split.js";
import { ExtractionResponse } from "@/v2/product/index.js";
import { ExtractedPdf } from "@/pdf/index.js";

/**
* Split inference result.
Expand Down Expand Up @@ -38,8 +39,8 @@ export class SplitRange {
* Extracts a single split from the input file.
* @param inputSource The input file to extract from.
*/
async extractFromFile(inputSource: LocalInputSource) {
async extractFromFile(inputSource: LocalInputSource): Promise<ExtractedPdf> {
const pageRange = [expandRange(this.pageRange as [number, number])];
return (await extractSplits(inputSource, pageRange))[0];
return (await extractMultipleSplits(inputSource, pageRange))[0];
}
}
15 changes: 0 additions & 15 deletions src/v2/product/split/splitResponse.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
import { StringDict } from "@/parsing/stringDict.js";
import { SplitInference } from "./splitInference.js";
import { BaseResponse } from "@/v2/parsing/index.js";
import { LocalInputSource } from "@/input/index.js";
import { expandRange, extractSplits } from "@/v2/fileOperations/split.js";
import { SplitFiles } from "@/v2/fileOperations/splitFiles.js";

export class SplitResponse extends BaseResponse {
/**
Expand All @@ -18,16 +15,4 @@ export class SplitResponse extends BaseResponse {
super(serverResponse);
this.inference = new SplitInference(serverResponse["inference"]);
}

/**
* Extracts all splits from an input PDF.
* @param inputSource The input file to extract from.
*/
async extractFromFile(inputSource: LocalInputSource): Promise<SplitFiles>{
const splits: number[][] = [];
for (const split of this.inference.result.splits) {
splits.push(expandRange(split.pageRange as [number, number]));
}
return await extractSplits(inputSource, splits);
}
}
15 changes: 15 additions & 0 deletions src/v2/product/split/splitResult.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import { SplitRange } from "./splitRange.js";
import { StringDict } from "@/parsing/index.js";
import { LocalInputSource } from "@/input/index.js";
import { ExtractedPdfs } from "@/pdf/index.js";
import { extractMultipleSplits, expandRange } from "@/v2/fileOperations/split.js";

/**
* Split result info.
Expand All @@ -14,6 +17,18 @@ export class SplitResult {
this.splits = rawResponse.splits.map((split: StringDict) => new SplitRange(split));
}

/**
* Extracts all splits from an input PDF.
* @param inputSource The input file to extract from.
*/
async extractFromInputSource(inputSource: LocalInputSource): Promise<ExtractedPdfs>{
const splits: number[][] = [];
for (const split of this.splits) {
splits.push(expandRange(split.pageRange as [number, number]));
}
return await extractMultipleSplits(inputSource, splits);
}

toString(): string {
let splits = "\n";
if (this.splits.length > 0) {
Expand Down
2 changes: 1 addition & 1 deletion tests/data
Loading
Loading