@@ -42,23 +42,36 @@ def __init__(self, local_input: LocalInputSource):
4242 pdf_image .save (self ._source_pdf , format = "PDF" )
4343
4444 @requires_pypdfium2
45- def cut_pages (self , page_indexes : list ) -> BinaryIO :
45+ def extract_single_document (self , page_indexes : list [ int ] ) -> ExtractedPDF :
4646 """
4747 Create a new PDF from pages and save it into a buffer.
4848
4949 :param page_indexes: List of pages number to use for merging in the original PDF.
5050 :return: The buffer containing the new PDF.
5151 """
52+ if not page_indexes or len (page_indexes ) == 0 :
53+ raise MindeeError ("Empty indexes aren't allowed for extraction." )
54+ for page_index in page_indexes :
55+ if page_index > self ._page_count :
56+ raise MindeeError (f"Index { page_index } is out of range." )
57+
5258 self ._source_pdf .seek (0 )
5359 new_pdf = pdfium .PdfDocument .new ()
5460 pdf = pdfium .PdfDocument (self ._source_pdf )
5561 new_pdf .import_pages (pdf , page_indexes )
5662 bytes_io = io .BytesIO ()
5763 new_pdf .save (bytes_io )
58- return bytes_io
64+
65+ first_page = page_indexes [0 ]
66+ last_page = page_indexes [len (page_indexes ) - 1 ]
67+ return ExtractedPDF (
68+ pdf_byte_stream = bytes_io ,
69+ filename = self ._make_filename (first_page , last_page ),
70+ page_indexes = page_indexes ,
71+ )
5972
6073 @requires_pypdfium2
61- def extract_sub_documents (
74+ def extract_multiple_documents (
6275 self , page_indexes : list [list [int ]]
6376 ) -> list [ExtractedPDF ]:
6477 """
@@ -67,35 +80,13 @@ def extract_sub_documents(
6780 :param page_indexes: 2D list of numbers, representing page indexes.
6881 :return: A list of created PDFS.
6982 """
83+ if len (page_indexes ) < 1 :
84+ raise MindeeError ("No indexes provided." )
7085 extracted_pdfs : list [ExtractedPDF ] = []
71- extension = Path (self ._filename ).suffix
72- stem = Path (self ._filename ).stem
7386 for page_index_elem in page_indexes :
74- if not page_index_elem or len (page_index_elem ) == 0 :
75- raise MindeeError ("Empty indexes aren't allowed for extraction." )
76- for page_index in page_index_elem :
77- if page_index > self ._page_count :
78- raise MindeeError (f"Index { page_index } is out of range." )
79- first_page = page_index_elem [0 ]
80- last_page = page_index_elem [len (page_index_elem ) - 1 ]
81- extracted_pdf = ExtractedPDF (
82- self .cut_pages (page_index_elem ),
83- f"{ stem } _pages-{ (first_page + 1 ):03d} -{ (last_page + 1 ):03d} { extension } " ,
84- (first_page , last_page ),
85- )
86- extracted_pdfs .append (extracted_pdf )
87+ extracted_pdfs .append (self .extract_single_document (page_index_elem ))
8788 return extracted_pdfs
8889
89- def extract_documents (
90- self ,
91- page_indexes : list [list [int ]],
92- ) -> list [ExtractedPDF ]:
93- """
94- Extracts complete PDFs from the document.
95-
96- :param page_indexes: List of sub-lists of pages to keep.
97- :return: A list of extracted invoices.
98- """
99- if len (page_indexes ) < 1 :
100- raise MindeeError ("No indexes provided." )
101- return self .extract_sub_documents (page_indexes )
90+ def _make_filename (self , first_page : int , last_page : int ) -> str :
91+ stem = Path (self ._filename ).stem
92+ return f"{ stem } _pages-{ (first_page + 1 ):03d} -{ (last_page + 1 ):03d} .pdf"
0 commit comments