@@ -40,8 +40,14 @@ class ZipImportError(ImportError):
40
40
_module_type = type (sys )
41
41
42
42
END_CENTRAL_DIR_SIZE = 22
43
- STRING_END_ARCHIVE = b'PK\x05 \x06 '
43
+ END_CENTRAL_DIR_SIZE_64 = 56
44
+ END_CENTRAL_DIR_LOCATOR_SIZE_64 = 20
45
+ STRING_END_ARCHIVE = b'PK\x05 \x06 ' # standard EOCD signature
46
+ STRING_END_LOCATOR_64 = b'PK\x06 \x07 ' # Zip64 EOCD Locator signature
47
+ STRING_END_ZIP_64 = b'PK\x06 \x06 ' # Zip64 EOCD signature
44
48
MAX_COMMENT_LEN = (1 << 16 ) - 1
49
+ MAX_UINT32 = 0xffffffff
50
+ ZIP64_EXTRA_TAG = 0x1
45
51
46
52
class zipimporter (_bootstrap_external ._LoaderBasics ):
47
53
"""zipimporter(archivepath) -> zipimporter object
@@ -406,49 +412,69 @@ def _read_directory(archive):
406
412
raise ZipImportError (f"can't open Zip file: { archive !r} " , path = archive )
407
413
408
414
with fp :
415
+ # Check if there's a comment.
409
416
try :
410
- fp .seek (- END_CENTRAL_DIR_SIZE , 2 )
411
- header_position = fp .tell ()
412
- buffer = fp .read (END_CENTRAL_DIR_SIZE )
417
+ fp .seek (0 , 2 )
418
+ file_size = fp .tell ()
413
419
except OSError :
414
- raise ZipImportError (f"can't read Zip file: { archive !r} " , path = archive )
415
- if len (buffer ) != END_CENTRAL_DIR_SIZE :
416
- raise ZipImportError (f"can't read Zip file: { archive !r} " , path = archive )
417
- if buffer [:4 ] != STRING_END_ARCHIVE :
418
- # Bad: End of Central Dir signature
419
- # Check if there's a comment.
420
- try :
421
- fp .seek (0 , 2 )
422
- file_size = fp .tell ()
423
- except OSError :
424
- raise ZipImportError (f"can't read Zip file: { archive !r} " ,
425
- path = archive )
426
- max_comment_start = max (file_size - MAX_COMMENT_LEN -
427
- END_CENTRAL_DIR_SIZE , 0 )
428
- try :
429
- fp .seek (max_comment_start )
430
- data = fp .read ()
431
- except OSError :
432
- raise ZipImportError (f"can't read Zip file: { archive !r} " ,
433
- path = archive )
434
- pos = data .rfind (STRING_END_ARCHIVE )
435
- if pos < 0 :
436
- raise ZipImportError (f'not a Zip file: { archive !r} ' ,
420
+ raise ZipImportError (f"can't read Zip file: { archive !r} " ,
421
+ path = archive )
422
+ max_comment_start = max (file_size - MAX_COMMENT_LEN -
423
+ END_CENTRAL_DIR_SIZE - END_CENTRAL_DIR_SIZE_64 -
424
+ END_CENTRAL_DIR_LOCATOR_SIZE_64 , 0 )
425
+ try :
426
+ fp .seek (max_comment_start )
427
+ data = fp .read ()
428
+ except OSError :
429
+ raise ZipImportError (f"can't read Zip file: { archive !r} " ,
430
+ path = archive )
431
+ pos = data .rfind (STRING_END_ARCHIVE )
432
+ pos64 = data .rfind (STRING_END_ZIP_64 )
433
+
434
+ if (pos64 >= 0 and pos64 + END_CENTRAL_DIR_SIZE_64 + END_CENTRAL_DIR_LOCATOR_SIZE_64 == pos ):
435
+ # Zip64 at "correct" offset from standard EOCD
436
+ buffer = data [pos64 :pos64 + END_CENTRAL_DIR_SIZE_64 ]
437
+ if len (buffer ) != END_CENTRAL_DIR_SIZE_64 :
438
+ raise ZipImportError (f"corrupt Zip64 file: { archive !r} " ,
437
439
path = archive )
440
+ header_position = file_size - len (data ) + pos64
441
+
442
+ central_directory_size = int .from_bytes (buffer [40 :48 ], 'little' )
443
+ central_directory_position = int .from_bytes (buffer [48 :56 ], 'little' )
444
+ num_entries = int .from_bytes (buffer [24 :32 ], 'little' )
445
+ elif pos >= 0 :
438
446
buffer = data [pos :pos + END_CENTRAL_DIR_SIZE ]
439
447
if len (buffer ) != END_CENTRAL_DIR_SIZE :
440
448
raise ZipImportError (f"corrupt Zip file: { archive !r} " ,
441
449
path = archive )
450
+
442
451
header_position = file_size - len (data ) + pos
443
452
444
- header_size = _unpack_uint32 (buffer [12 :16 ])
445
- header_offset = _unpack_uint32 (buffer [16 :20 ])
446
- if header_position < header_size :
453
+ # Buffer now contains a valid EOCD, and header_position gives the
454
+ # starting position of it.
455
+ central_directory_size = _unpack_uint32 (buffer [12 :16 ])
456
+ central_directory_position = _unpack_uint32 (buffer [16 :20 ])
457
+ num_entries = _unpack_uint16 (buffer [8 :10 ])
458
+
459
+ # N.b. if someday you want to prefer the standard (non-zip64) EOCD,
460
+ # you need to adjust position by 76 for arc to be 0.
461
+ else :
462
+ raise ZipImportError (f'not a Zip file: { archive !r} ' ,
463
+ path = archive )
464
+
465
+ # Buffer now contains a valid EOCD, and header_position gives the
466
+ # starting position of it.
467
+ # XXX: These are cursory checks but are not as exact or strict as they
468
+ # could be. Checking the arc-adjusted value is probably good too.
469
+ if header_position < central_directory_size :
447
470
raise ZipImportError (f'bad central directory size: { archive !r} ' , path = archive )
448
- if header_position < header_offset :
471
+ if header_position < central_directory_position :
449
472
raise ZipImportError (f'bad central directory offset: { archive !r} ' , path = archive )
450
- header_position -= header_size
451
- arc_offset = header_position - header_offset
473
+ header_position -= central_directory_size
474
+ # On just-a-zipfile these values are the same and arc_offset is zero; if
475
+ # the file has some bytes prepended, `arc_offset` is the number of such
476
+ # bytes. This is used for pex as well as self-extracting .exe.
477
+ arc_offset = header_position - central_directory_position
452
478
if arc_offset < 0 :
453
479
raise ZipImportError (f'bad central directory size or offset: { archive !r} ' , path = archive )
454
480
@@ -465,6 +491,11 @@ def _read_directory(archive):
465
491
raise EOFError ('EOF read where not expected' )
466
492
# Start of file header
467
493
if buffer [:4 ] != b'PK\x01 \x02 ' :
494
+ if count != num_entries :
495
+ raise ZipImportError (
496
+ f"mismatched num_entries: { count } should be { num_entries } in { archive !r} " ,
497
+ path = archive ,
498
+ )
468
499
break # Bad: Central Dir File Header
469
500
if len (buffer ) != 46 :
470
501
raise EOFError ('EOF read where not expected' )
@@ -480,9 +511,6 @@ def _read_directory(archive):
480
511
comment_size = _unpack_uint16 (buffer [32 :34 ])
481
512
file_offset = _unpack_uint32 (buffer [42 :46 ])
482
513
header_size = name_size + extra_size + comment_size
483
- if file_offset > header_offset :
484
- raise ZipImportError (f'bad local header offset: { archive !r} ' , path = archive )
485
- file_offset += arc_offset
486
514
487
515
try :
488
516
name = fp .read (name_size )
@@ -494,7 +522,10 @@ def _read_directory(archive):
494
522
# slower than reading the data because fseek flushes stdio's
495
523
# internal buffers. See issue #8745.
496
524
try :
497
- if len (fp .read (header_size - name_size )) != header_size - name_size :
525
+ extra_data_len = header_size - name_size
526
+ extra_data = fp .read (extra_data_len )
527
+
528
+ if len (extra_data ) != extra_data_len :
498
529
raise ZipImportError (f"can't read Zip file: { archive !r} " , path = archive )
499
530
except OSError :
500
531
raise ZipImportError (f"can't read Zip file: { archive !r} " , path = archive )
@@ -511,6 +542,65 @@ def _read_directory(archive):
511
542
512
543
name = name .replace ('/' , path_sep )
513
544
path = _bootstrap_external ._path_join (archive , name )
545
+
546
+ # Ordering matches unpacking below.
547
+ if (
548
+ file_size == MAX_UINT32 or
549
+ data_size == MAX_UINT32 or
550
+ file_offset == MAX_UINT32
551
+ ):
552
+ # need to decode extra_data looking for a zip64 extra (which might not
553
+ # be present)
554
+ while extra_data :
555
+ if len (extra_data ) < 4 :
556
+ raise ZipImportError (f"can't read header extra: { archive !r} " , path = archive )
557
+ tag = _unpack_uint16 (extra_data [:2 ])
558
+ size = _unpack_uint16 (extra_data [2 :4 ])
559
+ if len (extra_data ) < 4 + size :
560
+ raise ZipImportError (f"can't read header extra: { archive !r} " , path = archive )
561
+ if tag == ZIP64_EXTRA_TAG :
562
+ if (len (extra_data ) - 4 ) % 8 != 0 :
563
+ raise ZipImportError (f"can't read header extra: { archive !r} " , path = archive )
564
+ values = [
565
+ int .from_bytes (extra_data [i :i + 8 ], 'little' )
566
+ for i in range (4 , len (extra_data ), 8 )
567
+ ]
568
+
569
+ # N.b. Here be dragons: the ordering of these is different than
570
+ # the header fields, and it's really easy to get it wrong since
571
+ # naturally-occuring zips that use all 3 are >4GB and not
572
+ # something that would be checked-in.
573
+ # The tests include a binary-edited zip that uses zip64
574
+ # (unnecessarily) for all three.
575
+ if file_size == MAX_UINT32 :
576
+ file_size = values .pop (0 )
577
+ if data_size == MAX_UINT32 :
578
+ data_size = values .pop (0 )
579
+ if file_offset == MAX_UINT32 :
580
+ file_offset = values .pop (0 )
581
+
582
+ if values :
583
+ raise ZipImportError (f"can't read header extra: { archive !r} " , path = archive )
584
+
585
+ break
586
+
587
+ # For a typical zip, this bytes-slicing only happens 2-3 times, on
588
+ # small data like timestamps and filesizes.
589
+ extra_data = extra_data [4 + size :]
590
+ else :
591
+ _bootstrap ._verbose_message (
592
+ "zipimport: suspected zip64 but no zip64 extra for {!r}" ,
593
+ path ,
594
+ )
595
+ # XXX These two statements seem swapped because `header_offset` is a
596
+ # position within the actual file, but `file_offset` (when compared) is
597
+ # as encoded in the entry, not adjusted for this file.
598
+ # N.b. this must be after we've potentially read the zip64 extra which can
599
+ # change `file_offset`.
600
+ if file_offset > central_directory_position :
601
+ raise ZipImportError (f'bad local header offset: { archive !r} ' , path = archive )
602
+ file_offset += arc_offset
603
+
514
604
t = (path , compress , data_size , file_size , file_offset , time , date , crc )
515
605
files [name ] = t
516
606
count += 1
0 commit comments