Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 77d8997

Browse files
Issue #23252: Added support for writing ZIP files to unseekable streams.
1 parent f07a4b6 commit 77d8997

5 files changed

Lines changed: 120 additions & 35 deletions

File tree

Doc/library/zipfile.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ ZipFile Objects
140140
ZIP file, then a new ZIP archive is appended to the file. This is meant for
141141
adding a ZIP archive to another file (such as :file:`python.exe`). If
142142
*mode* is ``a`` and the file does not exist at all, it is created.
143+
If *mode* is ``r`` or ``a``, the file should be seekable.
143144
*compression* is the ZIP compression method to use when writing the archive,
144145
and should be :const:`ZIP_STORED`, :const:`ZIP_DEFLATED`,
145146
:const:`ZIP_BZIP2` or :const:`ZIP_LZMA`; unrecognized
@@ -171,6 +172,9 @@ ZipFile Objects
171172
.. versionchanged:: 3.4
172173
ZIP64 extensions are enabled by default.
173174

175+
.. versionchanged:: 3.5
176+
Added support for writing to unseekable streams.
177+
174178

175179
.. method:: ZipFile.close()
176180

@@ -328,7 +332,6 @@ ZipFile Objects
328332
If ``arcname`` (or ``filename``, if ``arcname`` is not given) contains a null
329333
byte, the name of the file in the archive will be truncated at the null byte.
330334

331-
332335
.. method:: ZipFile.writestr(zinfo_or_arcname, bytes[, compress_type])
333336

334337
Write the string *bytes* to the archive; *zinfo_or_arcname* is either the file

Doc/whatsnew/3.5.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,12 @@ faulthandler
448448
:func:`~faulthandler.dump_traceback_later` functions now accept file
449449
descriptors. (Contributed by Wei Wu in :issue:`23566`.)
450450

451+
zipfile
452+
-------
453+
454+
* Added support for writing ZIP files to unseekable streams.
455+
(Contributed by Serhiy Storchaka in :issue:`23252`.)
456+
451457

452458
Optimizations
453459
=============

Lib/test/test_zipfile.py

Lines changed: 50 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1685,25 +1685,63 @@ def __init__(self, fp):
16851685
self.offset = 0
16861686

16871687
def write(self, data):
1688-
self.offset += self.fp.write(data)
1688+
n = self.fp.write(data)
1689+
self.offset += n
1690+
return n
16891691

16901692
def tell(self):
16911693
return self.offset
16921694

16931695
def flush(self):
1694-
pass
1696+
self.fp.flush()
1697+
1698+
class Unseekable:
1699+
def __init__(self, fp):
1700+
self.fp = fp
1701+
1702+
def write(self, data):
1703+
return self.fp.write(data)
1704+
1705+
def flush(self):
1706+
self.fp.flush()
16951707

16961708
class UnseekableTests(unittest.TestCase):
1697-
def test_writestr_tellable(self):
1698-
f = io.BytesIO()
1699-
with zipfile.ZipFile(Tellable(f), 'w', zipfile.ZIP_STORED) as zipfp:
1700-
zipfp.writestr('ones', b'111')
1701-
zipfp.writestr('twos', b'222')
1702-
with zipfile.ZipFile(f, mode='r') as zipf:
1703-
with zipf.open('ones') as zopen:
1704-
self.assertEqual(zopen.read(), b'111')
1705-
with zipf.open('twos') as zopen:
1706-
self.assertEqual(zopen.read(), b'222')
1709+
def test_writestr(self):
1710+
for wrapper in (lambda f: f), Tellable, Unseekable:
1711+
with self.subTest(wrapper=wrapper):
1712+
f = io.BytesIO()
1713+
f.write(b'abc')
1714+
bf = io.BufferedWriter(f)
1715+
with zipfile.ZipFile(wrapper(bf), 'w', zipfile.ZIP_STORED) as zipfp:
1716+
zipfp.writestr('ones', b'111')
1717+
zipfp.writestr('twos', b'222')
1718+
self.assertEqual(f.getvalue()[:5], b'abcPK')
1719+
with zipfile.ZipFile(f, mode='r') as zipf:
1720+
with zipf.open('ones') as zopen:
1721+
self.assertEqual(zopen.read(), b'111')
1722+
with zipf.open('twos') as zopen:
1723+
self.assertEqual(zopen.read(), b'222')
1724+
1725+
def test_write(self):
1726+
for wrapper in (lambda f: f), Tellable, Unseekable:
1727+
with self.subTest(wrapper=wrapper):
1728+
f = io.BytesIO()
1729+
f.write(b'abc')
1730+
bf = io.BufferedWriter(f)
1731+
with zipfile.ZipFile(wrapper(bf), 'w', zipfile.ZIP_STORED) as zipfp:
1732+
self.addCleanup(unlink, TESTFN)
1733+
with open(TESTFN, 'wb') as f2:
1734+
f2.write(b'111')
1735+
zipfp.write(TESTFN, 'ones')
1736+
with open(TESTFN, 'wb') as f2:
1737+
f2.write(b'222')
1738+
zipfp.write(TESTFN, 'twos')
1739+
self.assertEqual(f.getvalue()[:5], b'abcPK')
1740+
with zipfile.ZipFile(f, mode='r') as zipf:
1741+
with zipf.open('ones') as zopen:
1742+
self.assertEqual(zopen.read(), b'111')
1743+
with zipf.open('twos') as zopen:
1744+
self.assertEqual(zopen.read(), b'222')
17071745

17081746

17091747
@requires_zlib

Lib/zipfile.py

Lines changed: 58 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -667,6 +667,26 @@ def close(self):
667667
self._file = None
668668
self._close(fileobj)
669669

670+
# Provide the tell method for unseekable stream
671+
class _Tellable:
672+
def __init__(self, fp):
673+
self.fp = fp
674+
self.offset = 0
675+
676+
def write(self, data):
677+
n = self.fp.write(data)
678+
self.offset += n
679+
return n
680+
681+
def tell(self):
682+
return self.offset
683+
684+
def flush(self):
685+
self.fp.flush()
686+
687+
def close(self):
688+
self.fp.close()
689+
670690

671691
class ZipExtFile(io.BufferedIOBase):
672692
"""File-like object for reading an archive member.
@@ -994,6 +1014,7 @@ def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True):
9941014
self.filename = getattr(file, 'name', None)
9951015
self._fileRefCnt = 1
9961016
self._lock = threading.RLock()
1017+
self._seekable = True
9971018

9981019
try:
9991020
if mode == 'r':
@@ -1002,13 +1023,24 @@ def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True):
10021023
# set the modified flag so central directory gets written
10031024
# even if no files are added to the archive
10041025
self._didModify = True
1005-
self.start_dir = self.fp.tell()
1026+
try:
1027+
self.start_dir = self.fp.tell()
1028+
except (AttributeError, OSError):
1029+
self.fp = _Tellable(self.fp)
1030+
self.start_dir = 0
1031+
self._seekable = False
1032+
else:
1033+
# Some file-like objects can provide tell() but not seek()
1034+
try:
1035+
self.fp.seek(self.start_dir)
1036+
except (AttributeError, OSError):
1037+
self._seekable = False
10061038
elif mode == 'a':
10071039
try:
10081040
# See if file is a zip file
10091041
self._RealGetContents()
10101042
# seek to start of directory and overwrite
1011-
self.fp.seek(self.start_dir, 0)
1043+
self.fp.seek(self.start_dir)
10121044
except BadZipFile:
10131045
# file is not a zip file, just append
10141046
self.fp.seek(0, 2)
@@ -1415,7 +1447,8 @@ def write(self, filename, arcname=None, compress_type=None):
14151447
zinfo.file_size = st.st_size
14161448
zinfo.flag_bits = 0x00
14171449
with self._lock:
1418-
self.fp.seek(self.start_dir, 0)
1450+
if self._seekable:
1451+
self.fp.seek(self.start_dir)
14191452
zinfo.header_offset = self.fp.tell() # Start of header bytes
14201453
if zinfo.compress_type == ZIP_LZMA:
14211454
# Compressed data includes an end-of-stream (EOS) marker
@@ -1436,6 +1469,8 @@ def write(self, filename, arcname=None, compress_type=None):
14361469
return
14371470

14381471
cmpr = _get_compressor(zinfo.compress_type)
1472+
if not self._seekable:
1473+
zinfo.flag_bits |= 0x08
14391474
with open(filename, "rb") as fp:
14401475
# Must overwrite CRC and sizes with correct data later
14411476
zinfo.CRC = CRC = 0
@@ -1464,17 +1499,24 @@ def write(self, filename, arcname=None, compress_type=None):
14641499
zinfo.compress_size = file_size
14651500
zinfo.CRC = CRC
14661501
zinfo.file_size = file_size
1467-
if not zip64 and self._allowZip64:
1468-
if file_size > ZIP64_LIMIT:
1469-
raise RuntimeError('File size has increased during compressing')
1470-
if compress_size > ZIP64_LIMIT:
1471-
raise RuntimeError('Compressed size larger than uncompressed size')
1472-
# Seek backwards and write file header (which will now include
1473-
# correct CRC and file sizes)
1474-
self.start_dir = self.fp.tell() # Preserve current position in file
1475-
self.fp.seek(zinfo.header_offset, 0)
1476-
self.fp.write(zinfo.FileHeader(zip64))
1477-
self.fp.seek(self.start_dir, 0)
1502+
if zinfo.flag_bits & 0x08:
1503+
# Write CRC and file sizes after the file data
1504+
fmt = '<LQQ' if zip64 else '<LLL'
1505+
self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
1506+
zinfo.file_size))
1507+
self.start_dir = self.fp.tell()
1508+
else:
1509+
if not zip64 and self._allowZip64:
1510+
if file_size > ZIP64_LIMIT:
1511+
raise RuntimeError('File size has increased during compressing')
1512+
if compress_size > ZIP64_LIMIT:
1513+
raise RuntimeError('Compressed size larger than uncompressed size')
1514+
# Seek backwards and write file header (which will now include
1515+
# correct CRC and file sizes)
1516+
self.start_dir = self.fp.tell() # Preserve current position in file
1517+
self.fp.seek(zinfo.header_offset)
1518+
self.fp.write(zinfo.FileHeader(zip64))
1519+
self.fp.seek(self.start_dir)
14781520
self.filelist.append(zinfo)
14791521
self.NameToInfo[zinfo.filename] = zinfo
14801522

@@ -1504,11 +1546,8 @@ def writestr(self, zinfo_or_arcname, data, compress_type=None):
15041546

15051547
zinfo.file_size = len(data) # Uncompressed size
15061548
with self._lock:
1507-
try:
1549+
if self._seekable:
15081550
self.fp.seek(self.start_dir)
1509-
except (AttributeError, io.UnsupportedOperation):
1510-
# Some file-like objects can provide tell() but not seek()
1511-
pass
15121551
zinfo.header_offset = self.fp.tell() # Start of header data
15131552
if compress_type is not None:
15141553
zinfo.compress_type = compress_type
@@ -1557,11 +1596,8 @@ def close(self):
15571596
try:
15581597
if self.mode in ("w", "a") and self._didModify: # write ending records
15591598
with self._lock:
1560-
try:
1599+
if self._seekable:
15611600
self.fp.seek(self.start_dir)
1562-
except (AttributeError, io.UnsupportedOperation):
1563-
# Some file-like objects can provide tell() but not seek()
1564-
pass
15651601
self._write_end_record()
15661602
finally:
15671603
fp = self.fp

Misc/NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ Core and Builtins
2323
Library
2424
-------
2525

26+
- Issue #23252: Added support for writing ZIP files to unseekable streams.
27+
2628
- Issue #21526: Tkinter now supports new boolean type in Tcl 8.5.
2729

2830
- Issue #23647: Increase impalib's MAXLINE to accommodate modern mailbox sizes.

0 commit comments

Comments
 (0)