Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 4e18ac8

Browse files
committed
Merge heads
2 parents 98fe1a0 + 200e00a commit 4e18ac8

4 files changed

Lines changed: 192 additions & 23 deletions

File tree

Doc/library/bz2.rst

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,18 @@ All of the classes in this module may safely be accessed from multiple threads.
3737
*fileobj*), or operate directly on a named file (named by *filename*).
3838
Exactly one of these two parameters should be provided.
3939

40-
The *mode* argument can be either ``'r'`` for reading (default), or ``'w'``
41-
for writing.
40+
The *mode* argument can be either ``'r'`` for reading (default), ``'w'`` for
41+
overwriting, or ``'a'`` for appending. If *fileobj* is provided, a mode of
42+
``'w'`` does not truncate the file, and is instead equivalent to ``'a'``.
4243

4344
The *buffering* argument is ignored. Its use is deprecated.
4445

45-
If *mode* is ``'w'``, *compresslevel* can be a number between ``1`` and
46-
``9`` specifying the level of compression: ``1`` produces the least
47-
compression, and ``9`` (default) produces the most compression.
46+
If *mode* is ``'w'`` or ``'a'``, *compresslevel* can be a number between
47+
``1`` and ``9`` specifying the level of compression: ``1`` produces the
48+
least compression, and ``9`` (default) produces the most compression.
49+
50+
If *mode* is ``'r'``, the input file may be the concatenation of multiple
51+
compressed streams.
4852

4953
:class:`BZ2File` provides all of the members specified by the
5054
:class:`io.BufferedIOBase`, except for :meth:`detach` and :meth:`truncate`.
@@ -70,6 +74,10 @@ All of the classes in this module may safely be accessed from multiple threads.
7074
.. versionchanged:: 3.3
7175
The *fileobj* argument to the constructor was added.
7276

77+
.. versionchanged:: 3.3
78+
The ``'a'`` (append) mode was added, along with support for reading
79+
multi-stream files.
80+
7381

7482
Incremental (de)compression
7583
---------------------------
@@ -106,14 +114,20 @@ Incremental (de)compression
106114
incrementally. For one-shot compression, use the :func:`decompress` function
107115
instead.
108116

117+
.. note::
118+
This class does not transparently handle inputs containing multiple
119+
compressed streams, unlike :func:`decompress` and :class:`BZ2File`. If
120+
you need to decompress a multi-stream input with :class:`BZ2Decompressor`,
121+
you must use a new decompressor for each stream.
122+
109123
.. method:: decompress(data)
110124

111125
Provide data to the decompressor object. Returns a chunk of decompressed
112126
data if possible, or an empty byte string otherwise.
113127

114-
Attempting to decompress data after the end of stream is reached raises
115-
an :exc:`EOFError`. If any data is found after the end of the stream, it
116-
is ignored and saved in the :attr:`unused_data` attribute.
128+
Attempting to decompress data after the end of the current stream is
129+
reached raises an :exc:`EOFError`. If any data is found after the end of
130+
the stream, it is ignored and saved in the :attr:`unused_data` attribute.
117131

118132

119133
.. attribute:: eof
@@ -127,6 +141,9 @@ Incremental (de)compression
127141

128142
Data found after the end of the compressed stream.
129143

144+
If this attribute is accessed before the end of the stream has been
145+
reached, its value will be ``b''``.
146+
130147

131148
One-shot (de)compression
132149
------------------------
@@ -145,5 +162,11 @@ One-shot (de)compression
145162

146163
Decompress *data*.
147164

165+
If *data* is the concatenation of multiple compressed streams, decompress
166+
all of the streams.
167+
148168
For incremental decompression, use a :class:`BZ2Decompressor` instead.
149169

170+
.. versionchanged:: 3.3
171+
Support for multi-stream inputs was added.
172+

Lib/bz2.py

Lines changed: 34 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,10 @@ def __init__(self, filename=None, mode="r", buffering=None,
7676
mode = "wb"
7777
mode_code = _MODE_WRITE
7878
self._compressor = BZ2Compressor()
79+
elif mode in ("a", "ab"):
80+
mode = "ab"
81+
mode_code = _MODE_WRITE
82+
self._compressor = BZ2Compressor()
7983
else:
8084
raise ValueError("Invalid mode: {!r}".format(mode))
8185

@@ -161,14 +165,25 @@ def _check_can_seek(self):
161165
def _fill_buffer(self):
162166
if self._buffer:
163167
return True
164-
if self._decompressor.eof:
165-
self._mode = _MODE_READ_EOF
166-
self._size = self._pos
167-
return False
168-
rawblock = self._fp.read(_BUFFER_SIZE)
168+
169+
if self._decompressor.unused_data:
170+
rawblock = self._decompressor.unused_data
171+
else:
172+
rawblock = self._fp.read(_BUFFER_SIZE)
173+
169174
if not rawblock:
170-
raise EOFError("Compressed file ended before the "
171-
"end-of-stream marker was reached")
175+
if self._decompressor.eof:
176+
self._mode = _MODE_READ_EOF
177+
self._size = self._pos
178+
return False
179+
else:
180+
raise EOFError("Compressed file ended before the "
181+
"end-of-stream marker was reached")
182+
183+
# Continue to next stream.
184+
if self._decompressor.eof:
185+
self._decompressor = BZ2Decompressor()
186+
172187
self._buffer = self._decompressor.decompress(rawblock)
173188
return True
174189

@@ -384,9 +399,15 @@ def decompress(data):
384399
"""
385400
if len(data) == 0:
386401
return b""
387-
decomp = BZ2Decompressor()
388-
result = decomp.decompress(data)
389-
if not decomp.eof:
390-
raise ValueError("Compressed data ended before the "
391-
"end-of-stream marker was reached")
392-
return result
402+
403+
result = b""
404+
while True:
405+
decomp = BZ2Decompressor()
406+
result += decomp.decompress(data)
407+
if not decomp.eof:
408+
raise ValueError("Compressed data ended before the "
409+
"end-of-stream marker was reached")
410+
if not decomp.unused_data:
411+
return result
412+
# There is unused data left over. Proceed to next stream.
413+
data = decomp.unused_data

Lib/test/test_bz2.py

Lines changed: 124 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,9 @@ def getData(self, crlf=False):
8484
else:
8585
return self.DATA
8686

87-
def createTempFile(self, crlf=False):
87+
def createTempFile(self, crlf=False, streams=1):
8888
with open(self.filename, "wb") as f:
89-
f.write(self.getData(crlf))
89+
f.write(self.getData(crlf) * streams)
9090

9191
def testRead(self):
9292
# "Test BZ2File.read()"
@@ -95,6 +95,26 @@ def testRead(self):
9595
self.assertRaises(TypeError, bz2f.read, None)
9696
self.assertEqual(bz2f.read(), self.TEXT)
9797

98+
def testReadMultiStream(self):
99+
# "Test BZ2File.read() with a multi stream archive"
100+
self.createTempFile(streams=5)
101+
with BZ2File(self.filename) as bz2f:
102+
self.assertRaises(TypeError, bz2f.read, None)
103+
self.assertEqual(bz2f.read(), self.TEXT * 5)
104+
105+
def testReadMonkeyMultiStream(self):
106+
# "Test BZ2File.read() with a multi stream archive in which stream"
107+
# "end is alined with internal buffer size"
108+
buffer_size = bz2._BUFFER_SIZE
109+
bz2._BUFFER_SIZE = len(self.DATA)
110+
try:
111+
self.createTempFile(streams=5)
112+
with BZ2File(self.filename) as bz2f:
113+
self.assertRaises(TypeError, bz2f.read, None)
114+
self.assertEqual(bz2f.read(), self.TEXT * 5)
115+
finally:
116+
bz2._BUFFER_SIZE = buffer_size
117+
98118
def testRead0(self):
99119
# "Test BBZ2File.read(0)"
100120
self.createTempFile()
@@ -114,6 +134,18 @@ def testReadChunk10(self):
114134
text += str
115135
self.assertEqual(text, self.TEXT)
116136

137+
def testReadChunk10MultiStream(self):
138+
# "Test BZ2File.read() in chunks of 10 bytes with a multi stream archive"
139+
self.createTempFile(streams=5)
140+
with BZ2File(self.filename) as bz2f:
141+
text = b''
142+
while 1:
143+
str = bz2f.read(10)
144+
if not str:
145+
break
146+
text += str
147+
self.assertEqual(text, self.TEXT * 5)
148+
117149
def testRead100(self):
118150
# "Test BZ2File.read(100)"
119151
self.createTempFile()
@@ -151,6 +183,15 @@ def testReadLine(self):
151183
for line in sio.readlines():
152184
self.assertEqual(bz2f.readline(), line)
153185

186+
def testReadLineMultiStream(self):
187+
# "Test BZ2File.readline() with a multi stream archive"
188+
self.createTempFile(streams=5)
189+
with BZ2File(self.filename) as bz2f:
190+
self.assertRaises(TypeError, bz2f.readline, None)
191+
sio = BytesIO(self.TEXT * 5)
192+
for line in sio.readlines():
193+
self.assertEqual(bz2f.readline(), line)
194+
154195
def testReadLines(self):
155196
# "Test BZ2File.readlines()"
156197
self.createTempFile()
@@ -159,13 +200,28 @@ def testReadLines(self):
159200
sio = BytesIO(self.TEXT)
160201
self.assertEqual(bz2f.readlines(), sio.readlines())
161202

203+
def testReadLinesMultiStream(self):
204+
# "Test BZ2File.readlines() with a multi stream archive"
205+
self.createTempFile(streams=5)
206+
with BZ2File(self.filename) as bz2f:
207+
self.assertRaises(TypeError, bz2f.readlines, None)
208+
sio = BytesIO(self.TEXT * 5)
209+
self.assertEqual(bz2f.readlines(), sio.readlines())
210+
162211
def testIterator(self):
163212
# "Test iter(BZ2File)"
164213
self.createTempFile()
165214
with BZ2File(self.filename) as bz2f:
166215
sio = BytesIO(self.TEXT)
167216
self.assertEqual(list(iter(bz2f)), sio.readlines())
168217

218+
def testIteratorMultiStream(self):
219+
# "Test iter(BZ2File) with a multi stream archive"
220+
self.createTempFile(streams=5)
221+
with BZ2File(self.filename) as bz2f:
222+
sio = BytesIO(self.TEXT * 5)
223+
self.assertEqual(list(iter(bz2f)), sio.readlines())
224+
169225
def testClosedIteratorDeadlock(self):
170226
# "Test that iteration on a closed bz2file releases the lock."
171227
# http://bugs.python.org/issue3309
@@ -217,6 +273,17 @@ def testWriteMethodsOnReadOnlyFile(self):
217273
self.assertRaises(IOError, bz2f.write, b"a")
218274
self.assertRaises(IOError, bz2f.writelines, [b"a"])
219275

276+
def testAppend(self):
277+
# "Test BZ2File.write()"
278+
with BZ2File(self.filename, "w") as bz2f:
279+
self.assertRaises(TypeError, bz2f.write)
280+
bz2f.write(self.TEXT)
281+
with BZ2File(self.filename, "a") as bz2f:
282+
self.assertRaises(TypeError, bz2f.write)
283+
bz2f.write(self.TEXT)
284+
with open(self.filename, 'rb') as f:
285+
self.assertEqual(self.decompress(f.read()), self.TEXT * 2)
286+
220287
def testSeekForward(self):
221288
# "Test BZ2File.seek(150, 0)"
222289
self.createTempFile()
@@ -225,6 +292,14 @@ def testSeekForward(self):
225292
bz2f.seek(150)
226293
self.assertEqual(bz2f.read(), self.TEXT[150:])
227294

295+
def testSeekForwardMultiStream(self):
296+
# "Test BZ2File.seek(150, 0) across stream boundaries"
297+
self.createTempFile(streams=2)
298+
with BZ2File(self.filename) as bz2f:
299+
self.assertRaises(TypeError, bz2f.seek)
300+
bz2f.seek(len(self.TEXT) + 150)
301+
self.assertEqual(bz2f.read(), self.TEXT[150:])
302+
228303
def testSeekBackwards(self):
229304
# "Test BZ2File.seek(-150, 1)"
230305
self.createTempFile()
@@ -233,13 +308,30 @@ def testSeekBackwards(self):
233308
bz2f.seek(-150, 1)
234309
self.assertEqual(bz2f.read(), self.TEXT[500-150:])
235310

311+
def testSeekBackwardsMultiStream(self):
312+
# "Test BZ2File.seek(-150, 1) across stream boundaries"
313+
self.createTempFile(streams=2)
314+
with BZ2File(self.filename) as bz2f:
315+
readto = len(self.TEXT) + 100
316+
while readto > 0:
317+
readto -= len(bz2f.read(readto))
318+
bz2f.seek(-150, 1)
319+
self.assertEqual(bz2f.read(), self.TEXT[100-150:] + self.TEXT)
320+
236321
def testSeekBackwardsFromEnd(self):
237322
# "Test BZ2File.seek(-150, 2)"
238323
self.createTempFile()
239324
with BZ2File(self.filename) as bz2f:
240325
bz2f.seek(-150, 2)
241326
self.assertEqual(bz2f.read(), self.TEXT[len(self.TEXT)-150:])
242327

328+
def testSeekBackwardsFromEndMultiStream(self):
329+
# "Test BZ2File.seek(-1000, 2) across stream boundaries"
330+
self.createTempFile(streams=2)
331+
with BZ2File(self.filename) as bz2f:
332+
bz2f.seek(-1000, 2)
333+
self.assertEqual(bz2f.read(), (self.TEXT * 2)[-1000:])
334+
243335
def testSeekPostEnd(self):
244336
# "Test BZ2File.seek(150000)"
245337
self.createTempFile()
@@ -248,6 +340,14 @@ def testSeekPostEnd(self):
248340
self.assertEqual(bz2f.tell(), len(self.TEXT))
249341
self.assertEqual(bz2f.read(), b"")
250342

343+
def testSeekPostEndMultiStream(self):
344+
# "Test BZ2File.seek(150000)"
345+
self.createTempFile(streams=5)
346+
with BZ2File(self.filename) as bz2f:
347+
bz2f.seek(150000)
348+
self.assertEqual(bz2f.tell(), len(self.TEXT) * 5)
349+
self.assertEqual(bz2f.read(), b"")
350+
251351
def testSeekPostEndTwice(self):
252352
# "Test BZ2File.seek(150000) twice"
253353
self.createTempFile()
@@ -257,6 +357,15 @@ def testSeekPostEndTwice(self):
257357
self.assertEqual(bz2f.tell(), len(self.TEXT))
258358
self.assertEqual(bz2f.read(), b"")
259359

360+
def testSeekPostEndTwiceMultiStream(self):
361+
# "Test BZ2File.seek(150000) twice with a multi stream archive"
362+
self.createTempFile(streams=5)
363+
with BZ2File(self.filename) as bz2f:
364+
bz2f.seek(150000)
365+
bz2f.seek(150000)
366+
self.assertEqual(bz2f.tell(), len(self.TEXT) * 5)
367+
self.assertEqual(bz2f.read(), b"")
368+
260369
def testSeekPreStart(self):
261370
# "Test BZ2File.seek(-150, 0)"
262371
self.createTempFile()
@@ -265,6 +374,14 @@ def testSeekPreStart(self):
265374
self.assertEqual(bz2f.tell(), 0)
266375
self.assertEqual(bz2f.read(), self.TEXT)
267376

377+
def testSeekPreStartMultiStream(self):
378+
# "Test BZ2File.seek(-150, 0) with a multi stream archive"
379+
self.createTempFile(streams=2)
380+
with BZ2File(self.filename) as bz2f:
381+
bz2f.seek(-150)
382+
self.assertEqual(bz2f.tell(), 0)
383+
self.assertEqual(bz2f.read(), self.TEXT * 2)
384+
268385
def testFileno(self):
269386
# "Test BZ2File.fileno()"
270387
self.createTempFile()
@@ -510,6 +627,11 @@ def testDecompressIncomplete(self):
510627
# "Test decompress() function with incomplete data"
511628
self.assertRaises(ValueError, bz2.decompress, self.DATA[:-10])
512629

630+
def testDecompressMultiStream(self):
631+
# "Test decompress() function for data with multiple streams"
632+
text = bz2.decompress(self.DATA * 5)
633+
self.assertEqual(text, self.TEXT * 5)
634+
513635
def test_main():
514636
support.run_unittest(
515637
BZ2FileTest,

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,9 @@ Core and Builtins
161161
Library
162162
-------
163163

164+
- Issue #1625: BZ2File and bz2.decompress() now support multi-stream files.
165+
Initial patch by Nir Aides.
166+
164167
- Issue #8796: codecs.open() calls the builtin open() function instead of using
165168
StreamReaderWriter. Deprecate StreamReader, StreamWriter, StreamReaderWriter,
166169
StreamRecoder and EncodedFile() of the codec module. Use the builtin open()

0 commit comments

Comments
 (0)