@@ -2134,6 +2134,52 @@ def test_repack_overlapping_blocks(self):
21342134 with self .assertRaisesRegex (zipfile .BadZipFile , 'Overlapped entries' ):
21352135 zh .repack ()
21362136
2137+ def test_repack_scan_unsigned_data_descriptor (self ):
2138+ """By default (strict_descriptor=True) the scan does not reclaim an
2139+ unreferenced entry written with an unsigned data descriptor, but keeps
2140+ the archive valid; strict_descriptor=False reclaims it."""
2141+ removed_name , removed_data = self .test_files [1 ]
2142+ remaining = [n for n , _ in self .test_files if n != removed_name ]
2143+
2144+ # Build an archive whose entries use *unsigned* data descriptors by
2145+ # writing to an unseekable stream with the descriptor signature stripped.
2146+ buf = io .BytesIO ()
2147+ with mock .patch .object (struct , 'pack' , side_effect = struct_pack_no_dd_sig ):
2148+ with zipfile .ZipFile (Unseekable (buf ), 'w' , self .compression ) as zh :
2149+ for file , data in self .test_files :
2150+ with zh .open (file , 'w' ) as fh :
2151+ fh .write (data )
2152+ archive = buf .getvalue ()
2153+
2154+ # sanity: the removed entry really uses a data descriptor (flag bit 3);
2155+ # it is unsigned by construction above
2156+ with zipfile .ZipFile (io .BytesIO (archive )) as zh :
2157+ self .assertTrue (zh .getinfo (removed_name ).flag_bits & 0x08 )
2158+
2159+ # default repack(): strict_descriptor=True does not locate the unsigned
2160+ # data descriptor, so the local data is preserved (not reclaimed).
2161+ fz = io .BytesIO (archive )
2162+ with zipfile .ZipFile (fz , 'a' , self .compression ) as zh :
2163+ zh .remove (removed_name )
2164+ zh .repack ()
2165+ default_size = len (fz .getvalue ())
2166+ with zipfile .ZipFile (fz ) as zh :
2167+ self .assertEqual (zh .namelist (), remaining )
2168+ self .assertIsNone (zh .testzip ())
2169+
2170+ # strict_descriptor=False: the unsigned data descriptor is detected, so
2171+ # the local data is reclaimed and the archive shrinks.
2172+ fz = io .BytesIO (archive )
2173+ with zipfile .ZipFile (fz , 'a' , self .compression ) as zh :
2174+ zh .remove (removed_name )
2175+ zh .repack (strict_descriptor = False )
2176+ strict_false_size = len (fz .getvalue ())
2177+ with zipfile .ZipFile (fz ) as zh :
2178+ self .assertEqual (zh .namelist (), remaining )
2179+ self .assertIsNone (zh .testzip ())
2180+
2181+ self .assertLess (strict_false_size , default_size )
2182+
21372183 def test_repack_removed_basic (self ):
21382184 """Should remove local file entries for provided deleted files."""
21392185 ln = len (self .test_files )
@@ -2628,7 +2674,9 @@ def test_validate_local_file_entry_zstd(self):
26282674 self ._test_validate_local_file_entry (method = zipfile .ZIP_ZSTANDARD )
26292675
26302676 def _test_validate_local_file_entry (self , method ):
2631- repacker = zipfile ._ZipRepacker ()
2677+ # strict_descriptor=False to exercise unsigned data descriptor scanning
2678+ # (the default is strict_descriptor=True, tested separately below)
2679+ repacker = zipfile ._ZipRepacker (strict_descriptor = False )
26322680
26332681 # basic
26342682 bytes_ = self ._generate_local_file_entry (
@@ -2799,7 +2847,9 @@ def test_validate_local_file_entry_zip64_zstd(self):
27992847 self ._test_validate_local_file_entry_zip64 (method = zipfile .ZIP_ZSTANDARD )
28002848
28012849 def _test_validate_local_file_entry_zip64 (self , method ):
2802- repacker = zipfile ._ZipRepacker ()
2850+ # strict_descriptor=False to exercise unsigned data descriptor scanning
2851+ # (the default is strict_descriptor=True, tested separately below)
2852+ repacker = zipfile ._ZipRepacker (strict_descriptor = False )
28032853
28042854 # zip64
28052855 bytes_ = self ._generate_local_file_entry (
@@ -2870,7 +2920,9 @@ def _test_validate_local_file_entry_zip64(self, method):
28702920 m_sddns .assert_not_called ()
28712921
28722922 def test_validate_local_file_entry_encrypted (self ):
2873- repacker = zipfile ._ZipRepacker ()
2923+ # strict_descriptor=False to exercise unsigned data descriptor scanning
2924+ # of an encrypted entry (the default strict_descriptor=True is tested below)
2925+ repacker = zipfile ._ZipRepacker (strict_descriptor = False )
28742926
28752927 bytes_ = (
28762928 b'PK\x03 \x04 '
@@ -2903,6 +2955,21 @@ def test_validate_local_file_entry_encrypted(self):
29032955 m_sddnsbd .assert_not_called ()
29042956 m_sddns .assert_called_once_with (fz , 38 , len (bytes_ ), False )
29052957
2958+ # return None for the unsigned data descriptor if `strict_descriptor=True`
2959+ repacker = zipfile ._ZipRepacker (strict_descriptor = True )
2960+ fz = io .BytesIO (bytes_ )
2961+ with mock .patch .object (repacker , '_scan_data_descriptor' ,
2962+ wraps = repacker ._scan_data_descriptor ) as m_sdd , \
2963+ mock .patch .object (repacker , '_scan_data_descriptor_no_sig_by_decompression' ,
2964+ wraps = repacker ._scan_data_descriptor_no_sig_by_decompression ) as m_sddnsbd , \
2965+ mock .patch .object (repacker , '_scan_data_descriptor_no_sig' ,
2966+ wraps = repacker ._scan_data_descriptor_no_sig ) as m_sddns :
2967+ result = repacker ._validate_local_file_entry (fz , 0 , len (bytes_ ))
2968+ self .assertEqual (result , None )
2969+ m_sdd .assert_called_once_with (fz , 38 , len (bytes_ ), False )
2970+ m_sddnsbd .assert_not_called ()
2971+ m_sddns .assert_not_called ()
2972+
29062973 def test_iter_scan_signature (self ):
29072974 bytes_ = b'sig__sig__sig__sig'
29082975 ln = len (bytes_ )
0 commit comments