From 104e07f4cfea677ba4a2523d3ac43ff98722fc46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ianar=C3=A9=20S=C3=A9vi?= Date: Wed, 24 Jun 2026 18:18:54 +0200 Subject: [PATCH] :bug: harmonize extractors --- .../java/com/mindee/image/ExtractedImage.java | 17 ++-- .../java/com/mindee/image/ImageExtractor.java | 84 +++++++------------ .../java/com/mindee/pdf/BasePDFExtractor.java | 4 +- .../java/com/mindee/pdf/ExtractedPDF.java | 7 +- .../com/mindee/v2/fileoperations/Crop.java | 41 +++++++-- .../java/com/mindee/TestingUtilities.java | 11 +++ .../com/mindee/image/ImageExtractorTest.java | 20 ++--- .../InvoiceSplitterExtractionIT.java | 6 +- .../InvoiceSplitterExtractionTest.java | 15 ++-- .../mindee/v2/fileoperations/CropTest.java | 74 ++++++++-------- .../mindee/v2/fileoperations/SplitTest.java | 23 ++++- .../java/com/mindee/v2/product/CropTest.java | 29 ++++--- .../java/com/mindee/v2/product/SplitTest.java | 29 ++++--- 13 files changed, 207 insertions(+), 153 deletions(-) diff --git a/src/main/java/com/mindee/image/ExtractedImage.java b/src/main/java/com/mindee/image/ExtractedImage.java index cae0e9d79..0767a381b 100644 --- a/src/main/java/com/mindee/image/ExtractedImage.java +++ b/src/main/java/com/mindee/image/ExtractedImage.java @@ -19,6 +19,7 @@ public class ExtractedImage { private final String filename; private final String saveFormat; private final int pageId; + private final int elementId; /** * Default constructor. @@ -27,11 +28,18 @@ public class ExtractedImage { * @param filename Name of the extracted image. * @param saveFormat Format to save the image as, defaults to PNG. */ - public ExtractedImage(BufferedImage image, String filename, String saveFormat, int pageId) { + public ExtractedImage( + BufferedImage image, + String filename, + String saveFormat, + int pageId, + int elementId + ) { this.image = image; this.filename = filename; this.saveFormat = saveFormat; this.pageId = pageId; + this.elementId = elementId; } /** @@ -53,11 +61,10 @@ public void writeToFile(String outputPath) throws IOException { * @throws IOException Throws if the file can't be accessed. */ public void writeToFile(Path outputPath) throws IOException { - if (Files.isDirectory(outputPath)) { - outputPath = outputPath.resolve(this.filename); + if (!Files.isDirectory(outputPath)) { + throw new IllegalArgumentException("Provided path is not a directory."); } - var outputfile = outputPath.toFile(); - ImageIO.write(this.image, this.saveFormat, outputfile); + ImageIO.write(this.image, this.saveFormat, outputPath.resolve(this.filename).toFile()); } /** diff --git a/src/main/java/com/mindee/image/ImageExtractor.java b/src/main/java/com/mindee/image/ImageExtractor.java index 82c8981b3..1ac103272 100644 --- a/src/main/java/com/mindee/image/ImageExtractor.java +++ b/src/main/java/com/mindee/image/ImageExtractor.java @@ -22,6 +22,7 @@ public class ImageExtractor { public ImageExtractor(LocalInputSource source) throws IOException { this.pageImages = new ArrayList<>(); + this.filename = source.getFilename(); if (source.isPDF()) { this.saveFormat = "jpg"; @@ -29,12 +30,9 @@ public ImageExtractor(LocalInputSource source) throws IOException { for (PDFPageImage pdfPageImage : pdfPageImages) { this.pageImages.add(pdfPageImage.getImage()); } - this.filename = source.getFilename() + "." + this.saveFormat; } else { - this.filename = source.getFilename(); String[] splitName = InputSourceUtils.splitNameStrict(this.filename); this.saveFormat = splitName[1].toLowerCase(); - var input = new ByteArrayInputStream(source.getFile()); this.pageImages.add(ImageIO.read(input)); } @@ -64,53 +62,29 @@ public int getPageCount() { * * @param Type of field (needs to support positioning data). * @param fields List of Fields to extract. - * @param pageIndex The page index to extract, begins at 0. + * @param pageId The page index to extract, begins at 0. * @return A list of {@link ExtractedImage}. */ public ExtractedImages extractImagesFromPage( List fields, - int pageIndex + int pageId ) { - return extractImagesFromPage(fields, pageIndex, this.filename); - } - - /** - * Extract multiple images on a given page from a list of fields having position data. - * - * @param Type of field (needs to support positioning data). - * @param fields List of Fields to extract. - * @param pageIndex The page index to extract, begins at 0. - * @param outputName The base output filename, must have an image extension. - * @return A list of {@link ExtractedImage}. - */ - public ExtractedImages extractImagesFromPage( - List fields, - int pageIndex, - String outputName - ) { - String filename; - if (this.getPageCount() > 1) { - String[] splitName = InputSourceUtils.splitNameStrict(outputName); - filename = splitName[0] + "." + this.saveFormat; - } else { - filename = outputName; - } - return extractFromPage(fields, pageIndex, filename); + return extractFromPage(fields, pageId, this.filename); } private ExtractedImages extractFromPage( List fields, - int pageIndex, + int pageId, String outputName ) { - String[] splitName = InputSourceUtils.splitNameStrict(outputName); - var filename = String - .format("%s_page-%3s.%s", splitName[0], pageIndex + 1, splitName[1]) - .replace(" ", "0"); - var extractedImages = new ExtractedImages(); - for (int i = 0; i < fields.size(); i++) { - ExtractedImage extractedImage = extractImage(fields.get(i), pageIndex, i + 1, filename); + for (int elementId = 0; elementId < fields.size(); elementId++) { + ExtractedImage extractedImage = extractImage( + fields.get(elementId), + pageId, + elementId, + outputName + ); if (extractedImage != null) { extractedImages.add(extractedImage); } @@ -123,33 +97,31 @@ private ExtractedImages extractFromPage( * * @param Type of field (needs to support positioning data). * @param field The field to extract. - * @param index The index to use for naming the extracted image. + * @param elementId The index to use for naming the extracted image. * @param filename Name of the file. - * @param pageIndex The page index to extract, begins at 0. + * @param pageId The page index to extract, begins at 0. * @return The {@link ExtractedImage}, or null if the field does not have valid * position data. */ public ExtractedImage extractImage( FieldT field, - int pageIndex, - int index, + int pageId, + int elementId, String filename ) { String[] splitName = InputSourceUtils.splitNameStrict(filename); - String saveFormat = splitName[1].toLowerCase(); var polygon = field.getPolygon(); if (polygon == null) { return null; } - String fieldFilename = splitName[0] - + String.format("_%3s", index).replace(" ", "0") - + "." - + saveFormat; return new ExtractedImage( - extractImage(polygon.getAsBbox(), pageIndex), - fieldFilename, - saveFormat, - pageIndex + extractImage(polygon.getAsBbox(), pageId), + String + .format("%s_page-%3s-item-%3s.%s", splitName[0], pageId + 1, elementId + 1, this.saveFormat) + .replace(" ", "0"), + this.saveFormat, + pageId, + elementId ); } @@ -158,17 +130,17 @@ public ExtractedImage extractImage( * * @param Type of field (needs to support positioning data). * @param field The field to extract. - * @param index The index to use for naming the extracted image. - * @param pageIndex The 0-based page index to extract. + * @param elementId The index to use for naming the extracted image. + * @param pageId The 0-based page index to extract. * @return The {@link ExtractedImage}, or null if the field does not have valid * position data. */ public ExtractedImage extractImage( FieldT field, - int pageIndex, - int index + int pageId, + int elementId ) { - return extractImage(field, pageIndex, index, this.filename); + return extractImage(field, pageId, elementId, this.filename); } private BufferedImage extractImage(Bbox bbox, int pageIndex) { diff --git a/src/main/java/com/mindee/pdf/BasePDFExtractor.java b/src/main/java/com/mindee/pdf/BasePDFExtractor.java index 39a567aeb..da55a863c 100644 --- a/src/main/java/com/mindee/pdf/BasePDFExtractor.java +++ b/src/main/java/com/mindee/pdf/BasePDFExtractor.java @@ -88,9 +88,9 @@ public ExtractedPDFs extractSubDocuments(List> pageIndexes) throws protected String makeFilename(List pageNumbers) { String[] splitName = InputSourceUtils.splitNameStrict(filename); return splitName[0] - + String.format("_%3s", pageNumbers.get(0)).replace(" ", "0") + + String.format("_pages-%3s", pageNumbers.get(0) + 1).replace(" ", "0") + "-" - + String.format("%3s", pageNumbers.get(pageNumbers.size() - 1)).replace(" ", "0") + + String.format("%3s", pageNumbers.get(pageNumbers.size() - 1) + 1).replace(" ", "0") + "." + splitName[1]; } diff --git a/src/main/java/com/mindee/pdf/ExtractedPDF.java b/src/main/java/com/mindee/pdf/ExtractedPDF.java index 5280a8f68..52f7fc10f 100644 --- a/src/main/java/com/mindee/pdf/ExtractedPDF.java +++ b/src/main/java/com/mindee/pdf/ExtractedPDF.java @@ -33,10 +33,11 @@ public ExtractedPDF(byte[] fileBytes, String filename) { * @throws IOException Throws if the file can't be accessed. */ public void writeToFile(Path outputPath) throws IOException { - if (Files.isDirectory(outputPath)) { - outputPath = outputPath.resolve(this.filename); + if (!Files.isDirectory(outputPath)) { + throw new IllegalArgumentException("Provided path is not a directory."); } - Files.write(outputPath, this.fileBytes); + + Files.write(outputPath.resolve(this.filename), this.fileBytes); } /** diff --git a/src/main/java/com/mindee/v2/fileoperations/Crop.java b/src/main/java/com/mindee/v2/fileoperations/Crop.java index 11885e743..64d1bd616 100644 --- a/src/main/java/com/mindee/v2/fileoperations/Crop.java +++ b/src/main/java/com/mindee/v2/fileoperations/Crop.java @@ -7,6 +7,9 @@ import com.mindee.v2.product.crop.CropItem; import java.io.IOException; import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.IntStream; public class Crop { private final ImageExtractor imageExtractor; @@ -15,21 +18,41 @@ public Crop(LocalInputSource inputSource) throws IOException { this.imageExtractor = new ImageExtractor(inputSource); } - public ExtractedImage extractSingleCrop(CropItem cropItem) throws IOException { + public ExtractedImage extractSingleCrop(CropItem cropItem) { return this.imageExtractor .extractImage(cropItem.getLocation(), cropItem.getLocation().getPage(), 0); } public ExtractedImages extractMultipleCrops(List cropItems) { - var extractedImages = new ExtractedImages(); - for (int i = 0; i < cropItems.size(); i++) { - var cropItem = cropItems.get(i); - extractedImages - .add( - this.imageExtractor - .extractImage(cropItem.getLocation(), cropItem.getLocation().getPage(), i + 1) - ); + if (cropItems == null || cropItems.isEmpty()) { + return new ExtractedImages(); } + + // Group crops by page, preserving insertion order + Map> cropsByPage = cropItems + .stream() + .collect( + Collectors + .groupingBy( + item -> item.getLocation().getPage(), + java.util.LinkedHashMap::new, + Collectors.toList() + ) + ); + + var extractedImages = new ExtractedImages(); + cropsByPage + .forEach( + (page, pageCrops) -> IntStream + .range(0, pageCrops.size()) + .forEach( + elementId -> extractedImages + .add( + this.imageExtractor + .extractImage(pageCrops.get(elementId).getLocation(), page, elementId) + ) + ) + ); return extractedImages; } } diff --git a/src/test/java/com/mindee/TestingUtilities.java b/src/test/java/com/mindee/TestingUtilities.java index c9e70e8a9..de9a9a980 100644 --- a/src/test/java/com/mindee/TestingUtilities.java +++ b/src/test/java/com/mindee/TestingUtilities.java @@ -9,6 +9,17 @@ import org.junit.jupiter.api.Assertions; public class TestingUtilities { + public static void deleteRecursively(Path path) throws IOException { + if (Files.exists(path)) { + try (var entries = Files.walk(path)) { + entries + .sorted(java.util.Comparator.reverseOrder()) + .map(Path::toFile) + .forEach(java.io.File::delete); + } + } + } + public static Path getResourcePath(String filePath) { return Paths.get("src/test/resources/" + filePath); } diff --git a/src/test/java/com/mindee/image/ImageExtractorTest.java b/src/test/java/com/mindee/image/ImageExtractorTest.java index 89db4cbaf..809c9b122 100644 --- a/src/test/java/com/mindee/image/ImageExtractorTest.java +++ b/src/test/java/com/mindee/image/ImageExtractorTest.java @@ -74,7 +74,9 @@ public void givenAnImage_shouldExtractPositionFields() throws IOException { LocalInputSource source = extractedImage.asInputSource(); Assertions .assertEquals( - String.format("default_sample_page-001_%3s.jpg", i + 1).replace(" ", "0"), + String + .format("default_sample_page-%3s-item-%3s.jpg", page.getPageId() + 1, i + 1) + .replace(" ", "0"), source.getFilename() ); } @@ -93,28 +95,22 @@ public void givenAnImage_shouldExtractValueFields() throws IOException { for (Page page : inference.getPages()) { List codes1D = extractor - .extractImagesFromPage( - page.getPrediction().getCodes1D(), - page.getPageId(), - "barcodes_1D.png" - ); + .extractImagesFromPage(page.getPrediction().getCodes1D(), page.getPageId()); for (int i = 0; i < codes1D.size(); i++) { ExtractedImage extractedImage = codes1D.get(i); Assertions.assertNotNull(extractedImage.getImage()); LocalInputSource source = extractedImage.asInputSource(); Assertions .assertEquals( - String.format("barcodes_1D_page-001_%3s.png", i + 1).replace(" ", "0"), + String + .format("default_sample_page-%3s-item-%3s.jpg", page.getPageId() + 1, i + 1) + .replace(" ", "0"), source.getFilename() ); extractedImage.writeToFile(getResourcePath("output/")); } List codes2D = extractor - .extractImagesFromPage( - page.getPrediction().getCodes2D(), - page.getPageId(), - "barcodes_2D.png" - ); + .extractImagesFromPage(page.getPrediction().getCodes2D(), page.getPageId()); for (ExtractedImage extractedImage : codes2D) { Assertions.assertNotNull(extractedImage.getImage()); extractedImage.writeToFile(getResourcePath("output/")); diff --git a/src/test/java/com/mindee/v1/fileoperations/InvoiceSplitterExtractionIT.java b/src/test/java/com/mindee/v1/fileoperations/InvoiceSplitterExtractionIT.java index 79a8bb0b3..f0cf09409 100644 --- a/src/test/java/com/mindee/v1/fileoperations/InvoiceSplitterExtractionIT.java +++ b/src/test/java/com/mindee/v1/fileoperations/InvoiceSplitterExtractionIT.java @@ -70,8 +70,10 @@ public void givenAPDF_shouldExtractInvoices() throws IOException, InterruptedExc List extractedPDFsStrict = extractor .extractInvoices(inference.getPrediction().getInvoicePageGroups(), false); Assertions.assertEquals(2, extractedPDFsStrict.size()); - Assertions.assertEquals("default_sample_000-000.pdf", extractedPDFsStrict.get(0).getFilename()); - Assertions.assertEquals("default_sample_001-001.pdf", extractedPDFsStrict.get(1).getFilename()); + Assertions + .assertEquals("default_sample_pages-001-001.pdf", extractedPDFsStrict.get(0).getFilename()); + Assertions + .assertEquals("default_sample_pages-002-002.pdf", extractedPDFsStrict.get(1).getFilename()); PredictResponse invoice0 = getInvoicePrediction( extractedPDFsStrict.get(0).asInputSource() diff --git a/src/test/java/com/mindee/v1/fileoperations/InvoiceSplitterExtractionTest.java b/src/test/java/com/mindee/v1/fileoperations/InvoiceSplitterExtractionTest.java index 648961d42..a45c7ee9b 100644 --- a/src/test/java/com/mindee/v1/fileoperations/InvoiceSplitterExtractionTest.java +++ b/src/test/java/com/mindee/v1/fileoperations/InvoiceSplitterExtractionTest.java @@ -31,9 +31,12 @@ public void givenAPDF_shouldExtractInvoicesNoStrict() throws IOException { var extractedPDFSNoStrict = extractor .extractInvoices(inference.getPrediction().getInvoicePageGroups(), false); Assertions.assertEquals(3, extractedPDFSNoStrict.size()); - Assertions.assertEquals("invoice_5p_000-000.pdf", extractedPDFSNoStrict.get(0).getFilename()); - Assertions.assertEquals("invoice_5p_001-003.pdf", extractedPDFSNoStrict.get(1).getFilename()); - Assertions.assertEquals("invoice_5p_004-004.pdf", extractedPDFSNoStrict.get(2).getFilename()); + Assertions + .assertEquals("invoice_5p_pages-001-001.pdf", extractedPDFSNoStrict.get(0).getFilename()); + Assertions + .assertEquals("invoice_5p_pages-002-004.pdf", extractedPDFSNoStrict.get(1).getFilename()); + Assertions + .assertEquals("invoice_5p_pages-005-005.pdf", extractedPDFSNoStrict.get(2).getFilename()); } @Test @@ -48,7 +51,9 @@ public void givenAPDF_shouldExtractInvoicesStrict() throws IOException { var extractedPDFStrict = extractor .extractInvoices(inference.getPrediction().getInvoicePageGroups(), true); Assertions.assertEquals(2, extractedPDFStrict.size()); - Assertions.assertEquals("invoice_5p_000-000.pdf", extractedPDFStrict.get(0).getFilename()); - Assertions.assertEquals("invoice_5p_001-004.pdf", extractedPDFStrict.get(1).getFilename()); + Assertions + .assertEquals("invoice_5p_pages-001-001.pdf", extractedPDFStrict.get(0).getFilename()); + Assertions + .assertEquals("invoice_5p_pages-002-005.pdf", extractedPDFStrict.get(1).getFilename()); } } diff --git a/src/test/java/com/mindee/v2/fileoperations/CropTest.java b/src/test/java/com/mindee/v2/fileoperations/CropTest.java index 3a515f191..0148ef159 100644 --- a/src/test/java/com/mindee/v2/fileoperations/CropTest.java +++ b/src/test/java/com/mindee/v2/fileoperations/CropTest.java @@ -1,5 +1,6 @@ package com.mindee.v2.fileoperations; +import static com.mindee.TestingUtilities.deleteRecursively; import static com.mindee.TestingUtilities.getResourcePath; import static com.mindee.TestingUtilities.getV2ResourcePath; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -8,28 +9,23 @@ import com.mindee.input.LocalInputSource; import com.mindee.v2.parsing.LocalResponse; import com.mindee.v2.product.crop.CropResponse; +import java.io.IOException; import java.nio.file.Files; +import java.nio.file.Path; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; class CropTest { - @Test - void singlePageSingleCrop_cropsCorrectly() throws Exception { - var inputSample = new LocalInputSource(getV2ResourcePath("products/crop/default_sample.jpg")); - var localResponse = new LocalResponse(getV2ResourcePath("products/crop/default_sample.json")); - var doc = localResponse.deserializeResponse(CropResponse.class); - - var extractedCrop = new Crop(inputSample) - .extractSingleCrop(doc.getInference().getResult().getCrops().get(0)); + private static final Path outputPath = getResourcePath("output/v2/file_operations/crop"); - assertEquals(0, extractedCrop.getPageId()); - assertEquals("default_sample_000.jpg", extractedCrop.getFilename()); - - assertEquals(1056, extractedCrop.getImage().getWidth()); - assertEquals(2070, extractedCrop.getImage().getHeight()); + @BeforeAll + public static void setup() throws IOException { + deleteRecursively(outputPath); + Files.createDirectories(outputPath); } @Test - void singlePageMultiCrop_cropsCorrectly() throws Exception { + void singlePageCrop_cropsCorrectly() throws Exception { var inputSample = new LocalInputSource(getV2ResourcePath("products/crop/default_sample.jpg")); var localResponse = new LocalResponse(getV2ResourcePath("products/crop/default_sample.json")); var doc = localResponse.deserializeResponse(CropResponse.class); @@ -39,21 +35,27 @@ void singlePageMultiCrop_cropsCorrectly() throws Exception { assertEquals(2, extractedCrops.size()); + extractedCrops.saveAllToDisk(outputPath); + var crop0 = extractedCrops.get(0); assertEquals(0, crop0.getPageId()); - assertEquals("default_sample_001.jpg", crop0.getFilename()); - - assertEquals(1056, crop0.getImage().getWidth()); + assertEquals(0, crop0.getElementId()); + assertEquals("default_sample_page-001-item-001.jpg", crop0.getFilename()); assertEquals(2070, crop0.getImage().getHeight()); - - var outputPath = getResourcePath("output"); - extractedCrops.saveAllToDisk(outputPath); - assertTrue(Files.exists(outputPath.resolve("default_sample_001.jpg"))); - assertTrue(Files.exists(outputPath.resolve("default_sample_002.jpg"))); + assertEquals(1056, crop0.getImage().getWidth()); + assertTrue(Files.exists(outputPath.resolve("default_sample_page-001-item-001.jpg"))); + + var crop1 = extractedCrops.get(1); + assertEquals(0, crop1.getPageId()); + assertEquals(1, crop1.getElementId()); + assertEquals("default_sample_page-001-item-002.jpg", crop1.getFilename()); + assertEquals(1868, crop1.getImage().getHeight()); + assertEquals(1298, crop1.getImage().getWidth()); + assertTrue(Files.exists(outputPath.resolve("default_sample_page-001-item-002.jpg"))); } @Test - void multiPageMultiCrop_cropsCorrectly() throws Exception { + void multiPageCrop_cropsCorrectly() throws Exception { var inputSample = new LocalInputSource(getV2ResourcePath("products/crop/multipage_sample.pdf")); var localResponse = new LocalResponse(getV2ResourcePath("products/crop/multipage_sample.json")); var doc = localResponse.deserializeResponse(CropResponse.class); @@ -63,21 +65,21 @@ void multiPageMultiCrop_cropsCorrectly() throws Exception { assertEquals(5, extractedCrops.size()); + extractedCrops.saveAllToDisk(outputPath); + var crop0 = extractedCrops.get(0); assertEquals(0, crop0.getPageId()); - assertEquals("multipage_sample.pdf_001.jpg", crop0.getFilename()); - assertEquals(555, crop0.getImage().getWidth()); + assertEquals("multipage_sample_page-001-item-001.jpg", crop0.getFilename()); assertEquals(1533, crop0.getImage().getHeight()); - - var crop3 = extractedCrops.get(3); - assertEquals(1, crop3.getPageId()); - assertEquals("multipage_sample.pdf_004.jpg", crop3.getFilename()); - assertEquals(562, crop3.getImage().getWidth()); - assertEquals(974, crop3.getImage().getHeight()); - - var outputPath = getResourcePath("output"); - extractedCrops.saveAllToDisk(outputPath); - assertTrue(Files.exists(outputPath.resolve("multipage_sample.pdf_001.jpg"))); - assertTrue(Files.exists(outputPath.resolve("multipage_sample.pdf_005.jpg"))); + assertEquals(555, crop0.getImage().getWidth()); + assertTrue(Files.exists(outputPath.resolve("multipage_sample_page-001-item-002.jpg"))); + + var crop4 = extractedCrops.get(4); + assertEquals(1, crop4.getPageId()); + assertEquals(1, crop4.getElementId()); + assertEquals("multipage_sample_page-002-item-002.jpg", crop4.getFilename()); + assertEquals(1445, crop4.getImage().getHeight()); + assertEquals(547, crop4.getImage().getWidth()); + assertTrue(Files.exists(outputPath.resolve("multipage_sample_page-002-item-002.jpg"))); } } diff --git a/src/test/java/com/mindee/v2/fileoperations/SplitTest.java b/src/test/java/com/mindee/v2/fileoperations/SplitTest.java index b229c0955..0e25e99a4 100644 --- a/src/test/java/com/mindee/v2/fileoperations/SplitTest.java +++ b/src/test/java/com/mindee/v2/fileoperations/SplitTest.java @@ -1,5 +1,7 @@ package com.mindee.v2.fileoperations; +import static com.mindee.TestingUtilities.deleteRecursively; +import static com.mindee.TestingUtilities.getResourcePath; import static com.mindee.TestingUtilities.getV2ResourcePath; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -7,9 +9,20 @@ import com.mindee.v2.parsing.LocalResponse; import com.mindee.v2.product.split.SplitResponse; import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; public class SplitTest { + private static final Path outputPath = getResourcePath("output/v2/file_operations/split"); + + @BeforeAll + public static void setup() throws IOException { + deleteRecursively(outputPath); + Files.createDirectories(outputPath); + } + @Test void singlePage_splitsCorrectly() throws IOException { var inputSample = new LocalInputSource(getV2ResourcePath("products/split/default_sample.pdf")); @@ -20,9 +33,11 @@ void singlePage_splitsCorrectly() throws IOException { var extractedSplit = new Split(inputSample) .extractSingleSplit(doc.getInference().getResult().getSplits().get(0)); - assertEquals("default_sample_000-000.pdf", extractedSplit.getFilename()); + assertEquals("default_sample_pages-001-001.pdf", extractedSplit.getFilename()); var asInputSource = extractedSplit.asInputSource(); assertEquals(1, asInputSource.getPageCount()); + + extractedSplit.writeToFile(outputPath); } @Test @@ -38,13 +53,15 @@ void multiplePages_splitsCorrectly() throws IOException { assertEquals(2, extractedSplits.size()); var split0 = extractedSplits.get(0); - assertEquals("default_sample_000-000.pdf", split0.getFilename()); + assertEquals("default_sample_pages-001-001.pdf", split0.getFilename()); var asInputSource0 = split0.asInputSource(); assertEquals(1, asInputSource0.getPageCount()); var split1 = extractedSplits.get(1); - assertEquals("default_sample_001-001.pdf", split1.getFilename()); + assertEquals("default_sample_pages-002-002.pdf", split1.getFilename()); var asInputSource1 = split1.asInputSource(); assertEquals(1, asInputSource1.getPageCount()); + + extractedSplits.saveAllToDisk(outputPath); } } diff --git a/src/test/java/com/mindee/v2/product/CropTest.java b/src/test/java/com/mindee/v2/product/CropTest.java index 8b022afa4..0a185f55b 100644 --- a/src/test/java/com/mindee/v2/product/CropTest.java +++ b/src/test/java/com/mindee/v2/product/CropTest.java @@ -1,10 +1,12 @@ package com.mindee.v2.product; import static com.mindee.TestingUtilities.assertStringEqualsFile; +import static com.mindee.TestingUtilities.deleteRecursively; import static com.mindee.TestingUtilities.getResourcePath; import static com.mindee.TestingUtilities.getV2ResourcePath; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import com.mindee.input.LocalInputSource; import com.mindee.v2.fileoperations.Crop; @@ -13,12 +15,22 @@ import com.mindee.v2.product.extraction.ExtractionResponse; import java.io.IOException; import java.nio.file.Files; +import java.nio.file.Path; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; @DisplayName("MindeeV2 - Crop Model Tests") public class CropTest { + private static final Path outputPath = getResourcePath("output/v2/product/crop"); + + @BeforeAll + public static void setup() throws IOException { + deleteRecursively(outputPath); + Files.createDirectories(outputPath); + } + private CropResponse loadResponse(String filePath) throws IOException { var localResponse = new LocalResponse(getV2ResourcePath(filePath)); return localResponse.deserializeResponse(CropResponse.class); @@ -147,14 +159,12 @@ void extractMultipleCrops() throws IOException { var methodExtract = response.getInference().getResult().extractFromInputSource(inputSource); assertEquals(classExtract.size(), methodExtract.size()); - var outputPath = getResourcePath("output"); classExtract.saveAllToDisk(outputPath.toString()); - assert Files.exists(outputPath.resolve("default_sample_001.jpg")); - assert Files.size(outputPath.resolve("default_sample_001.jpg")) >= 1500; - - assert Files.exists(outputPath.resolve("default_sample_002.jpg")); - assert Files.size(outputPath.resolve("default_sample_002.jpg")) >= 1500; + assertTrue(Files.exists(outputPath.resolve("default_sample_page-001-item-001.jpg"))); + assertTrue(Files.size(outputPath.resolve("default_sample_page-001-item-001.jpg")) >= 1500); + assertTrue(Files.exists(outputPath.resolve("default_sample_page-001-item-002.jpg"))); + assertTrue(Files.size(outputPath.resolve("default_sample_page-001-item-002.jpg")) >= 1500); } @Test @@ -172,11 +182,10 @@ void extractSingleCrop() throws IOException { .get(0) .extractFromInputSource(inputSource); - var outputPath = getResourcePath("output"); - extractedCrop.writeToFile(outputPath.resolve("default_sample_999.jpg")); + extractedCrop.writeToFile(outputPath); - assert Files.exists(outputPath.resolve("default_sample_999.jpg")); - assert Files.size(outputPath.resolve("default_sample_999.jpg")) >= 1500; + assertTrue(Files.exists(outputPath.resolve("default_sample_page-001-item-001.jpg"))); + assertTrue(Files.size(outputPath.resolve("default_sample_page-001-item-001.jpg")) >= 1500); } } } diff --git a/src/test/java/com/mindee/v2/product/SplitTest.java b/src/test/java/com/mindee/v2/product/SplitTest.java index 73b9c121e..787ce6755 100644 --- a/src/test/java/com/mindee/v2/product/SplitTest.java +++ b/src/test/java/com/mindee/v2/product/SplitTest.java @@ -1,9 +1,11 @@ package com.mindee.v2.product; +import static com.mindee.TestingUtilities.deleteRecursively; import static com.mindee.TestingUtilities.getResourcePath; import static com.mindee.TestingUtilities.getV2ResourcePath; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import com.mindee.input.LocalInputSource; import com.mindee.v2.fileoperations.Split; @@ -13,12 +15,22 @@ import com.mindee.v2.product.split.SplitResponse; import java.io.IOException; import java.nio.file.Files; +import java.nio.file.Path; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; @DisplayName("MindeeV2 - Split Model Tests") public class SplitTest { + private static final Path outputPath = getResourcePath("output/v2/product/split"); + + @BeforeAll + public static void setup() throws IOException { + deleteRecursively(outputPath); + Files.createDirectories(outputPath); + } + private SplitResponse loadResponse(String filePath) throws IOException { var localResponse = new LocalResponse(getV2ResourcePath(filePath)); return localResponse.deserializeResponse(SplitResponse.class); @@ -127,14 +139,12 @@ void extractMultipleSplits() throws IOException { var methodExtract = response.getInference().getResult().extractFromInputSource(inputSource); assertEquals(classExtract.size(), methodExtract.size()); - var outputPath = getResourcePath("output"); classExtract.saveAllToDisk(outputPath.toString()); - assert Files.exists(outputPath.resolve("default_sample_000-000.pdf")); - assert Files.size(outputPath.resolve("default_sample_000-000.pdf")) >= 1500; - - assert Files.exists(outputPath.resolve("default_sample_001-001.pdf")); - assert Files.size(outputPath.resolve("default_sample_001-001.pdf")) >= 1500; + assertTrue(Files.exists(outputPath.resolve("default_sample_pages-001-001.pdf"))); + assertTrue(Files.size(outputPath.resolve("default_sample_pages-001-001.pdf")) >= 1500); + assertTrue(Files.exists(outputPath.resolve("default_sample_pages-002-002.pdf"))); + assertTrue(Files.size(outputPath.resolve("default_sample_pages-002-002.pdf")) >= 1500); } @Test @@ -154,11 +164,10 @@ void extractSingleSplit() throws IOException { .get(0) .extractFromInputSource(inputSource); - var outputPath = getResourcePath("output"); - extractedSplit.writeToFile(outputPath.resolve("default_sample_999.pdf")); + extractedSplit.writeToFile(outputPath); - assert Files.exists(outputPath.resolve("default_sample_999.pdf")); - assert Files.size(outputPath.resolve("default_sample_999.pdf")) >= 1500; + assertTrue(Files.exists(outputPath.resolve("default_sample_pages-001-001.pdf"))); + assertTrue(Files.size(outputPath.resolve("default_sample_pages-001-001.pdf")) >= 1500); } } }