diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index e23f5c2a8556f2..73c6b0185a1a0e 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -1616,6 +1616,33 @@ def test_write_unicode_filenames(self): self.assertEqual(zf.filelist[0].filename, "foo.txt") self.assertEqual(zf.filelist[1].filename, "\xf6.txt") + @requires_zlib() + def test_read_zipfile_containing_unicode_path_extra_field(self): + with zipfile.ZipFile(TESTFN, mode='w') as zf: + # create a file with a non-ASCII name + filename = '이름.txt' + filename_encoded = filename.encode('utf-8') + + # create a ZipInfo object with Unicode path extra field + zip_info = zipfile.ZipInfo(filename) + + tag_for_unicode_path = b'\x75\x70' + version_of_unicode_path = b'\x01' + + import zlib + filename_crc = struct.pack('= 0: + filename = filename[0:null_byte] + # This is used to ensure paths in generated ZIP files always use + # forward slashes as the directory separator, as required by the + # ZIP format specification. + if os.sep != "/" and os.sep in filename: + filename = filename.replace(os.sep, "/") + return filename + class ZipInfo (object): """Class with attributes describing each file in the ZIP archive.""" @@ -368,16 +384,9 @@ class ZipInfo (object): def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): self.orig_filename = filename # Original file name in archive - # Terminate the file name at the first null byte. Null bytes in file - # names are used as tricks by viruses in archives. - null_byte = filename.find(chr(0)) - if null_byte >= 0: - filename = filename[0:null_byte] - # This is used to ensure paths in generated ZIP files always use - # forward slashes as the directory separator, as required by the - # ZIP format specification. - if os.sep != "/" and os.sep in filename: - filename = filename.replace(os.sep, "/") + # Terminate the file name at the first null byte and + # ensure paths always use forward slashes as the directory separator. + filename = _sanitize_filename(filename) self.filename = filename # Normalized file name self.date_time = date_time # year, month, day, hour, min, sec @@ -482,7 +491,7 @@ def _encodeFilenameFlags(self): except UnicodeEncodeError: return self.filename.encode('utf-8'), self.flag_bits | _MASK_UTF_FILENAME - def _decodeExtra(self): + def _decodeExtra(self, filename_crc): # Try to decode the extra field. extra = self.extra unpack = struct.unpack @@ -508,6 +517,21 @@ def _decodeExtra(self): except struct.error: raise BadZipFile(f"Corrupt zip64 extra field. " f"{field} not found.") from None + elif tp == 0x7075: + data = extra[4:ln+4] + # Unicode Path Extra Field + try: + up_version, up_name_crc = unpack(' 2: print(centdir) filename = fp.read(centdir[_CD_FILENAME_LENGTH]) + orig_filename_crc = crc32(filename) flags = centdir[_CD_FLAG_BITS] if flags & _MASK_UTF_FILENAME: # UTF-8 file names extension @@ -1432,8 +1457,7 @@ def _RealGetContents(self): x._raw_time = t x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F, t>>11, (t>>5)&0x3F, (t&0x1F) * 2 ) - - x._decodeExtra() + x._decodeExtra(orig_filename_crc) x.header_offset = x.header_offset + concat self.filelist.append(x) self.NameToInfo[x.filename] = x diff --git a/Misc/ACKS b/Misc/ACKS index 7bbde3af99782b..89a4ed40a24998 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -627,6 +627,7 @@ Julian Gindi Yannick Gingras Neil Girdhar Matt Giuca +Andrea Giudiceandrea Franz Glasner Wim Glenn Michael Goderbauer diff --git a/Misc/NEWS.d/next/Documentation/2023-03-10-04-59-35.gh-issue-86094.zOYdy8.rst b/Misc/NEWS.d/next/Documentation/2023-03-10-04-59-35.gh-issue-86094.zOYdy8.rst new file mode 100644 index 00000000000000..39461f3f84c9ac --- /dev/null +++ b/Misc/NEWS.d/next/Documentation/2023-03-10-04-59-35.gh-issue-86094.zOYdy8.rst @@ -0,0 +1,2 @@ +Add support for Unicode Path Extra Field in ZipFile. Patch by Yeojin Kim +and Andrea Giudiceandrea