Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit a836182

Browse files
committed
Support zip64 in zipimport
* Reads zip64 files as produced by zipfile * Include tests (somewhat slow, however, because of the need to create "large" zips) * About the same amount of strictness reading invalid zip files as zipfile has
1 parent 47e3562 commit a836182

File tree

4 files changed

+141
-37
lines changed

4 files changed

+141
-37
lines changed

Doc/library/zipimport.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ Any files may be present in the ZIP archive, but importers are only invoked for
3030
corresponding :file:`.pyc` file, meaning that if a ZIP archive
3131
doesn't contain :file:`.pyc` files, importing may be rather slow.
3232

33+
.. versionchanged:: 3.12
34+
ZIP64 is supported
35+
3336
.. versionchanged:: 3.8
3437
Previously, ZIP archives with an archive comment were not supported.
3538

Lib/test/test_zipimport.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -776,6 +776,16 @@ def testLargestPossibleComment(self):
776776
files = {TESTMOD + ".py": (NOW, test_src)}
777777
self.doTest(".py", files, TESTMOD, comment=b"c" * ((1 << 16) - 1))
778778

779+
def testZip64(self):
780+
# This is the simplest way to make zipfile generate the zip64 EOCD block
781+
files = {f"f{n}.py": (NOW, test_src) for n in range(65537)}
782+
self.doTest(".py", files, "f6")
783+
784+
def testZip64CruftAndComment(self):
785+
# This is the simplest way to make zipfile generate the zip64 EOCD block
786+
files = {f"f{n}.py": (NOW, test_src) for n in range(65537)}
787+
self.doTest(".py", files, "f65536", comment=b"c" * ((1 << 16) - 1))
788+
779789

780790
@support.requires_zlib()
781791
class CompressedZipImportTestCase(UncompressedZipImportTestCase):

Lib/zipimport.py

Lines changed: 127 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,14 @@ class ZipImportError(ImportError):
4040
_module_type = type(sys)
4141

4242
END_CENTRAL_DIR_SIZE = 22
43-
STRING_END_ARCHIVE = b'PK\x05\x06'
43+
END_CENTRAL_DIR_SIZE_64 = 56
44+
END_CENTRAL_DIR_LOCATOR_SIZE_64 = 20
45+
STRING_END_ARCHIVE = b'PK\x05\x06' # standard EOCD signature
46+
STRING_END_LOCATOR_64 = b'PK\x06\x07' # Zip64 EOCD Locator signature
47+
STRING_END_ZIP_64 = b'PK\x06\x06' # Zip64 EOCD signature
4448
MAX_COMMENT_LEN = (1 << 16) - 1
49+
MAX_UINT32 = 0xffffffff
50+
ZIP64_EXTRA_TAG = 0x1
4551

4652
class zipimporter(_bootstrap_external._LoaderBasics):
4753
"""zipimporter(archivepath) -> zipimporter object
@@ -406,49 +412,69 @@ def _read_directory(archive):
406412
raise ZipImportError(f"can't open Zip file: {archive!r}", path=archive)
407413

408414
with fp:
415+
# Check if there's a comment.
409416
try:
410-
fp.seek(-END_CENTRAL_DIR_SIZE, 2)
411-
header_position = fp.tell()
412-
buffer = fp.read(END_CENTRAL_DIR_SIZE)
417+
fp.seek(0, 2)
418+
file_size = fp.tell()
413419
except OSError:
414-
raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
415-
if len(buffer) != END_CENTRAL_DIR_SIZE:
416-
raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
417-
if buffer[:4] != STRING_END_ARCHIVE:
418-
# Bad: End of Central Dir signature
419-
# Check if there's a comment.
420-
try:
421-
fp.seek(0, 2)
422-
file_size = fp.tell()
423-
except OSError:
424-
raise ZipImportError(f"can't read Zip file: {archive!r}",
425-
path=archive)
426-
max_comment_start = max(file_size - MAX_COMMENT_LEN -
427-
END_CENTRAL_DIR_SIZE, 0)
428-
try:
429-
fp.seek(max_comment_start)
430-
data = fp.read()
431-
except OSError:
432-
raise ZipImportError(f"can't read Zip file: {archive!r}",
433-
path=archive)
434-
pos = data.rfind(STRING_END_ARCHIVE)
435-
if pos < 0:
436-
raise ZipImportError(f'not a Zip file: {archive!r}',
420+
raise ZipImportError(f"can't read Zip file: {archive!r}",
421+
path=archive)
422+
max_comment_start = max(file_size - MAX_COMMENT_LEN -
423+
END_CENTRAL_DIR_SIZE - END_CENTRAL_DIR_SIZE_64 -
424+
END_CENTRAL_DIR_LOCATOR_SIZE_64, 0)
425+
try:
426+
fp.seek(max_comment_start)
427+
data = fp.read()
428+
except OSError:
429+
raise ZipImportError(f"can't read Zip file: {archive!r}",
430+
path=archive)
431+
pos = data.rfind(STRING_END_ARCHIVE)
432+
pos64 = data.rfind(STRING_END_ZIP_64)
433+
434+
if (pos64 >= 0 and pos64+END_CENTRAL_DIR_SIZE_64+END_CENTRAL_DIR_LOCATOR_SIZE_64==pos):
435+
# Zip64 at "correct" offset from standard EOCD
436+
buffer = data[pos64:pos64 + END_CENTRAL_DIR_SIZE_64]
437+
if len(buffer) != END_CENTRAL_DIR_SIZE_64:
438+
raise ZipImportError(f"corrupt Zip64 file: {archive!r}",
437439
path=archive)
440+
header_position = file_size - len(data) + pos64
441+
442+
central_directory_size = int.from_bytes(buffer[40:48], 'little')
443+
central_directory_position = int.from_bytes(buffer[48:56], 'little')
444+
num_entries = int.from_bytes(buffer[24:32], 'little')
445+
elif pos >= 0:
438446
buffer = data[pos:pos+END_CENTRAL_DIR_SIZE]
439447
if len(buffer) != END_CENTRAL_DIR_SIZE:
440448
raise ZipImportError(f"corrupt Zip file: {archive!r}",
441449
path=archive)
450+
442451
header_position = file_size - len(data) + pos
443452

444-
header_size = _unpack_uint32(buffer[12:16])
445-
header_offset = _unpack_uint32(buffer[16:20])
446-
if header_position < header_size:
453+
# Buffer now contains a valid EOCD, and header_position gives the
454+
# starting position of it.
455+
central_directory_size = _unpack_uint32(buffer[12:16])
456+
central_directory_position = _unpack_uint32(buffer[16:20])
457+
num_entries = _unpack_uint16(buffer[8:10])
458+
459+
# N.b. if someday you want to prefer the standard (non-zip64) EOCD,
460+
# you need to adjust position by 76 for arc to be 0.
461+
else:
462+
raise ZipImportError(f'not a Zip file: {archive!r}',
463+
path=archive)
464+
465+
# Buffer now contains a valid EOCD, and header_position gives the
466+
# starting position of it.
467+
# XXX: These are cursory checks but are not as exact or strict as they
468+
# could be. Checking the arc-adjusted value is probably good too.
469+
if header_position < central_directory_size:
447470
raise ZipImportError(f'bad central directory size: {archive!r}', path=archive)
448-
if header_position < header_offset:
471+
if header_position < central_directory_position:
449472
raise ZipImportError(f'bad central directory offset: {archive!r}', path=archive)
450-
header_position -= header_size
451-
arc_offset = header_position - header_offset
473+
header_position -= central_directory_size
474+
# On just-a-zipfile these values are the same and arc_offset is zero; if
475+
# the file has some bytes prepended, `arc_offset` is the number of such
476+
# bytes. This is used for pex as well as self-extracting .exe.
477+
arc_offset = header_position - central_directory_position
452478
if arc_offset < 0:
453479
raise ZipImportError(f'bad central directory size or offset: {archive!r}', path=archive)
454480

@@ -465,6 +491,11 @@ def _read_directory(archive):
465491
raise EOFError('EOF read where not expected')
466492
# Start of file header
467493
if buffer[:4] != b'PK\x01\x02':
494+
if count != num_entries:
495+
raise ZipImportError(
496+
f"mismatched num_entries: {count} should be {num_entries} in {archive!r}",
497+
path=archive,
498+
)
468499
break # Bad: Central Dir File Header
469500
if len(buffer) != 46:
470501
raise EOFError('EOF read where not expected')
@@ -480,9 +511,6 @@ def _read_directory(archive):
480511
comment_size = _unpack_uint16(buffer[32:34])
481512
file_offset = _unpack_uint32(buffer[42:46])
482513
header_size = name_size + extra_size + comment_size
483-
if file_offset > header_offset:
484-
raise ZipImportError(f'bad local header offset: {archive!r}', path=archive)
485-
file_offset += arc_offset
486514

487515
try:
488516
name = fp.read(name_size)
@@ -494,7 +522,10 @@ def _read_directory(archive):
494522
# slower than reading the data because fseek flushes stdio's
495523
# internal buffers. See issue #8745.
496524
try:
497-
if len(fp.read(header_size - name_size)) != header_size - name_size:
525+
extra_data_len = header_size - name_size
526+
extra_data = fp.read(extra_data_len)
527+
528+
if len(extra_data) != extra_data_len:
498529
raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
499530
except OSError:
500531
raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
@@ -511,6 +542,65 @@ def _read_directory(archive):
511542

512543
name = name.replace('/', path_sep)
513544
path = _bootstrap_external._path_join(archive, name)
545+
546+
# Ordering matches unpacking below.
547+
if (
548+
file_size == MAX_UINT32 or
549+
data_size == MAX_UINT32 or
550+
file_offset == MAX_UINT32
551+
):
552+
# need to decode extra_data looking for a zip64 extra (which might not
553+
# be present)
554+
while extra_data:
555+
if len(extra_data) < 4:
556+
raise ZipImportError(f"can't read header extra: {archive!r}", path=archive)
557+
tag = _unpack_uint16(extra_data[:2])
558+
size = _unpack_uint16(extra_data[2:4])
559+
if len(extra_data) < 4 + size:
560+
raise ZipImportError(f"can't read header extra: {archive!r}", path=archive)
561+
if tag == ZIP64_EXTRA_TAG:
562+
if (len(extra_data) - 4) % 8 != 0:
563+
raise ZipImportError(f"can't read header extra: {archive!r}", path=archive)
564+
values = [
565+
int.from_bytes(extra_data[i:i+8], 'little')
566+
for i in range(4, len(extra_data), 8)
567+
]
568+
569+
# N.b. Here be dragons: the ordering of these is different than
570+
# the header fields, and it's really easy to get it wrong since
571+
# naturally-occuring zips that use all 3 are >4GB and not
572+
# something that would be checked-in.
573+
# The tests include a binary-edited zip that uses zip64
574+
# (unnecessarily) for all three.
575+
if file_size == MAX_UINT32:
576+
file_size = values.pop(0)
577+
if data_size == MAX_UINT32:
578+
data_size = values.pop(0)
579+
if file_offset == MAX_UINT32:
580+
file_offset = values.pop(0)
581+
582+
if values:
583+
raise ZipImportError(f"can't read header extra: {archive!r}", path=archive)
584+
585+
break
586+
587+
# For a typical zip, this bytes-slicing only happens 2-3 times, on
588+
# small data like timestamps and filesizes.
589+
extra_data = extra_data[4+size:]
590+
else:
591+
_bootstrap._verbose_message(
592+
"zipimport: suspected zip64 but no zip64 extra for {!r}",
593+
path,
594+
)
595+
# XXX These two statements seem swapped because `header_offset` is a
596+
# position within the actual file, but `file_offset` (when compared) is
597+
# as encoded in the entry, not adjusted for this file.
598+
# N.b. this must be after we've potentially read the zip64 extra which can
599+
# change `file_offset`.
600+
if file_offset > central_directory_position:
601+
raise ZipImportError(f'bad local header offset: {archive!r}', path=archive)
602+
file_offset += arc_offset
603+
514604
t = (path, compress, data_size, file_size, file_offset, time, date, crc)
515605
files[name] = t
516606
count += 1
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
The ``zipimport`` module can now read ZIP64 files.

0 commit comments

Comments
 (0)