Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 0a9dd2f

Browse files
committed
Issue #5689: Add support for lzma compression to the tarfile module.
1 parent ce2af33 commit 0a9dd2f

4 files changed

Lines changed: 146 additions & 22 deletions

File tree

Doc/library/tarfile.rst

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@
1313
--------------
1414

1515
The :mod:`tarfile` module makes it possible to read and write tar
16-
archives, including those using gzip or bz2 compression.
16+
archives, including those using gzip, bz2 and lzma compression.
1717
(:file:`.zip` files can be read and written using the :mod:`zipfile` module.)
1818

1919
Some facts and figures:
2020

21-
* reads and writes :mod:`gzip` and :mod:`bz2` compressed archives.
21+
* reads and writes :mod:`gzip`, :mod:`bz2` and :mod:`lzma` compressed archives.
2222

2323
* read/write support for the POSIX.1-1988 (ustar) format.
2424

@@ -55,6 +55,8 @@ Some facts and figures:
5555
+------------------+---------------------------------------------+
5656
| ``'r:bz2'`` | Open for reading with bzip2 compression. |
5757
+------------------+---------------------------------------------+
58+
| ``'r:xz'`` | Open for reading with lzma compression. |
59+
+------------------+---------------------------------------------+
5860
| ``'a' or 'a:'`` | Open for appending with no compression. The |
5961
| | file is created if it does not exist. |
6062
+------------------+---------------------------------------------+
@@ -64,11 +66,13 @@ Some facts and figures:
6466
+------------------+---------------------------------------------+
6567
| ``'w:bz2'`` | Open for bzip2 compressed writing. |
6668
+------------------+---------------------------------------------+
69+
| ``'w:xz'`` | Open for lzma compressed writing. |
70+
+------------------+---------------------------------------------+
6771

68-
Note that ``'a:gz'`` or ``'a:bz2'`` is not possible. If *mode* is not suitable
69-
to open a certain (compressed) file for reading, :exc:`ReadError` is raised. Use
70-
*mode* ``'r'`` to avoid this. If a compression method is not supported,
71-
:exc:`CompressionError` is raised.
72+
Note that ``'a:gz'``, ``'a:bz2'`` or ``'a:xz'`` is not possible. If *mode*
73+
is not suitable to open a certain (compressed) file for reading,
74+
:exc:`ReadError` is raised. Use *mode* ``'r'`` to avoid this. If a
75+
compression method is not supported, :exc:`CompressionError` is raised.
7276

7377
If *fileobj* is specified, it is used as an alternative to a :term:`file object`
7478
opened in binary mode for *name*. It is supposed to be at position 0.
@@ -99,6 +103,9 @@ Some facts and figures:
99103
| ``'r|bz2'`` | Open a bzip2 compressed *stream* for |
100104
| | reading. |
101105
+-------------+--------------------------------------------+
106+
| ``'r|xz'`` | Open a lzma compressed *stream* for |
107+
| | reading. |
108+
+-------------+--------------------------------------------+
102109
| ``'w|'`` | Open an uncompressed *stream* for writing. |
103110
+-------------+--------------------------------------------+
104111
| ``'w|gz'`` | Open a gzip compressed *stream* for |
@@ -107,6 +114,9 @@ Some facts and figures:
107114
| ``'w|bz2'`` | Open a bzip2 compressed *stream* for |
108115
| | writing. |
109116
+-------------+--------------------------------------------+
117+
| ``'w|xz'`` | Open an lzma compressed *stream* for |
118+
| | writing. |
119+
+-------------+--------------------------------------------+
110120

111121

112122
.. class:: TarFile

Lib/tarfile.py

Lines changed: 60 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -420,19 +420,37 @@ def __init__(self, name, mode, comptype, fileobj, bufsize):
420420
self.crc = zlib.crc32(b"")
421421
if mode == "r":
422422
self._init_read_gz()
423+
self.exception = zlib.error
423424
else:
424425
self._init_write_gz()
425426

426-
if comptype == "bz2":
427+
elif comptype == "bz2":
427428
try:
428429
import bz2
429430
except ImportError:
430431
raise CompressionError("bz2 module is not available")
431432
if mode == "r":
432433
self.dbuf = b""
433434
self.cmp = bz2.BZ2Decompressor()
435+
self.exception = IOError
434436
else:
435437
self.cmp = bz2.BZ2Compressor()
438+
439+
elif comptype == "xz":
440+
try:
441+
import lzma
442+
except ImportError:
443+
raise CompressionError("lzma module is not available")
444+
if mode == "r":
445+
self.dbuf = b""
446+
self.cmp = lzma.LZMADecompressor()
447+
self.exception = lzma.LZMAError
448+
else:
449+
self.cmp = lzma.LZMACompressor()
450+
451+
elif comptype != "tar":
452+
raise CompressionError("unknown compression type %r" % comptype)
453+
436454
except:
437455
if not self._extfileobj:
438456
self.fileobj.close()
@@ -584,7 +602,7 @@ def _read(self, size):
584602
break
585603
try:
586604
buf = self.cmp.decompress(buf)
587-
except IOError:
605+
except self.exception:
588606
raise ReadError("invalid compressed data")
589607
self.dbuf += buf
590608
c += len(buf)
@@ -622,11 +640,14 @@ def read(self, size):
622640
return self.buf
623641

624642
def getcomptype(self):
625-
if self.buf.startswith(b"\037\213\010"):
643+
if self.buf.startswith(b"\x1f\x8b\x08"):
626644
return "gz"
627-
if self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
645+
elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
628646
return "bz2"
629-
return "tar"
647+
elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
648+
return "xz"
649+
else:
650+
return "tar"
630651

631652
def close(self):
632653
self.fileobj.close()
@@ -1651,18 +1672,22 @@ def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
16511672
'r:' open for reading exclusively uncompressed
16521673
'r:gz' open for reading with gzip compression
16531674
'r:bz2' open for reading with bzip2 compression
1675+
'r:xz' open for reading with lzma compression
16541676
'a' or 'a:' open for appending, creating the file if necessary
16551677
'w' or 'w:' open for writing without compression
16561678
'w:gz' open for writing with gzip compression
16571679
'w:bz2' open for writing with bzip2 compression
1680+
'w:xz' open for writing with lzma compression
16581681
16591682
'r|*' open a stream of tar blocks with transparent compression
16601683
'r|' open an uncompressed stream of tar blocks for reading
16611684
'r|gz' open a gzip compressed stream of tar blocks
16621685
'r|bz2' open a bzip2 compressed stream of tar blocks
1686+
'r|xz' open an lzma compressed stream of tar blocks
16631687
'w|' open an uncompressed stream for writing
16641688
'w|gz' open a gzip compressed stream for writing
16651689
'w|bz2' open a bzip2 compressed stream for writing
1690+
'w|xz' open an lzma compressed stream for writing
16661691
"""
16671692

16681693
if not name and not fileobj:
@@ -1780,11 +1805,40 @@ def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
17801805
t._extfileobj = False
17811806
return t
17821807

1808+
@classmethod
1809+
def xzopen(cls, name, mode="r", fileobj=None, preset=9, **kwargs):
1810+
"""Open lzma compressed tar archive name for reading or writing.
1811+
Appending is not allowed.
1812+
"""
1813+
if mode not in ("r", "w"):
1814+
raise ValueError("mode must be 'r' or 'w'")
1815+
1816+
try:
1817+
import lzma
1818+
except ImportError:
1819+
raise CompressionError("lzma module is not available")
1820+
1821+
if mode == "r":
1822+
# LZMAFile complains about a preset argument in read mode.
1823+
preset = None
1824+
1825+
fileobj = lzma.LZMAFile(filename=name if fileobj is None else None,
1826+
mode=mode, fileobj=fileobj, preset=preset)
1827+
1828+
try:
1829+
t = cls.taropen(name, mode, fileobj, **kwargs)
1830+
except (lzma.LZMAError, EOFError):
1831+
fileobj.close()
1832+
raise ReadError("not an lzma file")
1833+
t._extfileobj = False
1834+
return t
1835+
17831836
# All *open() methods are registered here.
17841837
OPEN_METH = {
17851838
"tar": "taropen", # uncompressed tar
17861839
"gz": "gzopen", # gzip compressed tar
1787-
"bz2": "bz2open" # bzip2 compressed tar
1840+
"bz2": "bz2open", # bzip2 compressed tar
1841+
"xz": "xzopen" # lzma compressed tar
17881842
}
17891843

17901844
#--------------------------------------------------------------------------

Lib/test/test_tarfile.py

Lines changed: 68 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@
2121
import bz2
2222
except ImportError:
2323
bz2 = None
24+
try:
25+
import lzma
26+
except ImportError:
27+
lzma = None
2428

2529
def md5sum(data):
2630
return md5(data).hexdigest()
@@ -29,6 +33,7 @@ def md5sum(data):
2933
tarname = support.findfile("testtar.tar")
3034
gzipname = os.path.join(TEMPDIR, "testtar.tar.gz")
3135
bz2name = os.path.join(TEMPDIR, "testtar.tar.bz2")
36+
xzname = os.path.join(TEMPDIR, "testtar.tar.xz")
3237
tmpname = os.path.join(TEMPDIR, "tmp.tar")
3338

3439
md5_regtype = "65f477c818ad9e15f7feab0c6d37742f"
@@ -201,13 +206,15 @@ def test_ignore_zeros(self):
201206
_open = gzip.GzipFile
202207
elif self.mode.endswith(":bz2"):
203208
_open = bz2.BZ2File
209+
elif self.mode.endswith(":xz"):
210+
_open = lzma.LZMAFile
204211
else:
205-
_open = open
212+
_open = io.FileIO
206213

207214
for char in (b'\0', b'a'):
208215
# Test if EOFHeaderError ('\0') and InvalidHeaderError ('a')
209216
# are ignored correctly.
210-
with _open(tmpname, "wb") as fobj:
217+
with _open(tmpname, "w") as fobj:
211218
fobj.write(char * 1024)
212219
fobj.write(tarfile.TarInfo("foo").tobuf())
213220

@@ -222,9 +229,10 @@ def test_ignore_zeros(self):
222229
class MiscReadTest(CommonReadTest):
223230

224231
def test_no_name_argument(self):
225-
if self.mode.endswith("bz2"):
226-
# BZ2File has no name attribute.
227-
return
232+
if self.mode.endswith(("bz2", "xz")):
233+
# BZ2File and LZMAFile have no name attribute.
234+
self.skipTest("no name attribute")
235+
228236
with open(self.tarname, "rb") as fobj:
229237
tar = tarfile.open(fileobj=fobj, mode=self.mode)
230238
self.assertEqual(tar.name, os.path.abspath(fobj.name))
@@ -265,10 +273,12 @@ def test_fileobj_with_offset(self):
265273
_open = gzip.GzipFile
266274
elif self.mode.endswith(":bz2"):
267275
_open = bz2.BZ2File
276+
elif self.mode.endswith(":xz"):
277+
_open = lzma.LZMAFile
268278
else:
269-
_open = open
270-
fobj = _open(self.tarname, "rb")
271-
try:
279+
_open = io.FileIO
280+
281+
with _open(self.tarname) as fobj:
272282
fobj.seek(offset)
273283

274284
# Test if the tarfile starts with the second member.
@@ -281,8 +291,6 @@ def test_fileobj_with_offset(self):
281291
self.assertEqual(tar.extractfile(t).read(), data,
282292
"seek back did not work")
283293
tar.close()
284-
finally:
285-
fobj.close()
286294

287295
def test_fail_comp(self):
288296
# For Gzip and Bz2 Tests: fail with a ReadError on an uncompressed file.
@@ -526,6 +534,18 @@ def _test_modes(self, testfunc):
526534
testfunc(bz2name, "r|*")
527535
testfunc(bz2name, "r|bz2")
528536

537+
if lzma:
538+
self.assertRaises(tarfile.ReadError, tarfile.open, tarname, mode="r:xz")
539+
self.assertRaises(tarfile.ReadError, tarfile.open, tarname, mode="r|xz")
540+
self.assertRaises(tarfile.ReadError, tarfile.open, xzname, mode="r:")
541+
self.assertRaises(tarfile.ReadError, tarfile.open, xzname, mode="r|")
542+
543+
testfunc(xzname, "r")
544+
testfunc(xzname, "r:*")
545+
testfunc(xzname, "r:xz")
546+
testfunc(xzname, "r|*")
547+
testfunc(xzname, "r|xz")
548+
529549
def test_detect_file(self):
530550
self._test_modes(self._testfunc_file)
531551

@@ -1096,6 +1116,9 @@ def test_stream_padding(self):
10961116
data = dec.decompress(data)
10971117
self.assertTrue(len(dec.unused_data) == 0,
10981118
"found trailing data")
1119+
elif self.mode.endswith("xz"):
1120+
with lzma.LZMAFile(tmpname) as fobj:
1121+
data = fobj.read()
10991122
else:
11001123
with open(tmpname, "rb") as fobj:
11011124
data = fobj.read()
@@ -1510,6 +1533,12 @@ def test_append_bz2(self):
15101533
self._create_testtar("w:bz2")
15111534
self.assertRaises(tarfile.ReadError, tarfile.open, tmpname, "a")
15121535

1536+
def test_append_lzma(self):
1537+
if lzma is None:
1538+
self.skipTest("lzma module not available")
1539+
self._create_testtar("w:xz")
1540+
self.assertRaises(tarfile.ReadError, tarfile.open, tmpname, "a")
1541+
15131542
# Append mode is supposed to fail if the tarfile to append to
15141543
# does not end with a zero block.
15151544
def _test_error(self, data):
@@ -1788,6 +1817,21 @@ def test_partial_input_bz2(self):
17881817
self._test_partial_input("r:bz2")
17891818

17901819

1820+
class LzmaMiscReadTest(MiscReadTest):
1821+
tarname = xzname
1822+
mode = "r:xz"
1823+
class LzmaUstarReadTest(UstarReadTest):
1824+
tarname = xzname
1825+
mode = "r:xz"
1826+
class LzmaStreamReadTest(StreamReadTest):
1827+
tarname = xzname
1828+
mode = "r|xz"
1829+
class LzmaWriteTest(WriteTest):
1830+
mode = "w:xz"
1831+
class LzmaStreamWriteTest(StreamWriteTest):
1832+
mode = "w|xz"
1833+
1834+
17911835
def test_main():
17921836
support.unlink(TEMPDIR)
17931837
os.makedirs(TEMPDIR)
@@ -1850,6 +1894,20 @@ def test_main():
18501894
Bz2PartialReadTest,
18511895
]
18521896

1897+
if lzma:
1898+
# Create testtar.tar.xz and add lzma-specific tests.
1899+
support.unlink(xzname)
1900+
with lzma.LZMAFile(xzname, "w") as tar:
1901+
tar.write(data)
1902+
1903+
tests += [
1904+
LzmaMiscReadTest,
1905+
LzmaUstarReadTest,
1906+
LzmaStreamReadTest,
1907+
LzmaWriteTest,
1908+
LzmaStreamWriteTest,
1909+
]
1910+
18531911
try:
18541912
support.run_unittest(*tests)
18551913
finally:

Misc/NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,8 @@ Core and Builtins
406406
Library
407407
-------
408408

409+
- Issue #5689: Add support for lzma compression to the tarfile module.
410+
409411
- Issue #13248: Turn 3.2's PendingDeprecationWarning into 3.3's
410412
DeprecationWarning. It covers 'cgi.escape', 'importlib.abc.PyLoader',
411413
'importlib.abc.PyPycLoader', 'nntplib.NNTP.xgtitle', 'nntplib.NNTP.xpath',

0 commit comments

Comments
 (0)