Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 01277d1

Browse files
committed
Merge with 3.2: Issue #13158: Fix decoding and encoding of base-256 number fields in tarfile.
The nti() function that converts a number field from a tar header to a number failed to decode GNU tar specific base-256 fields. I also added support for decoding and encoding negative base-256 number fields.
2 parents 0e7e715 + ac3d137 commit 01277d1

3 files changed

Lines changed: 49 additions & 21 deletions

File tree

Lib/tarfile.py

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -194,16 +194,18 @@ def nti(s):
194194
"""
195195
# There are two possible encodings for a number field, see
196196
# itn() below.
197-
if s[0] != chr(0o200):
197+
if s[0] in (0o200, 0o377):
198+
n = 0
199+
for i in range(len(s) - 1):
200+
n <<= 8
201+
n += s[i + 1]
202+
if s[0] == 0o377:
203+
n = -(256 ** (len(s) - 1) - n)
204+
else:
198205
try:
199206
n = int(nts(s, "ascii", "strict") or "0", 8)
200207
except ValueError:
201208
raise InvalidHeaderError("invalid header")
202-
else:
203-
n = 0
204-
for i in range(len(s) - 1):
205-
n <<= 8
206-
n += ord(s[i + 1])
207209
return n
208210

209211
def itn(n, digits=8, format=DEFAULT_FORMAT):
@@ -212,25 +214,26 @@ def itn(n, digits=8, format=DEFAULT_FORMAT):
212214
# POSIX 1003.1-1988 requires numbers to be encoded as a string of
213215
# octal digits followed by a null-byte, this allows values up to
214216
# (8**(digits-1))-1. GNU tar allows storing numbers greater than
215-
# that if necessary. A leading 0o200 byte indicates this particular
216-
# encoding, the following digits-1 bytes are a big-endian
217-
# representation. This allows values up to (256**(digits-1))-1.
217+
# that if necessary. A leading 0o200 or 0o377 byte indicate this
218+
# particular encoding, the following digits-1 bytes are a big-endian
219+
# base-256 representation. This allows values up to (256**(digits-1))-1.
220+
# A 0o200 byte indicates a positive number, a 0o377 byte a negative
221+
# number.
218222
if 0 <= n < 8 ** (digits - 1):
219223
s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
220-
else:
221-
if format != GNU_FORMAT or n >= 256 ** (digits - 1):
222-
raise ValueError("overflow in number field")
223-
224-
if n < 0:
225-
# XXX We mimic GNU tar's behaviour with negative numbers,
226-
# this could raise OverflowError.
227-
n = struct.unpack("L", struct.pack("l", n))[0]
224+
elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
225+
if n >= 0:
226+
s = bytearray([0o200])
227+
else:
228+
s = bytearray([0o377])
229+
n = 256 ** digits + n
228230

229-
s = bytearray()
230231
for i in range(digits - 1):
231-
s.insert(0, n & 0o377)
232+
s.insert(1, n & 0o377)
232233
n >>= 8
233-
s.insert(0, 0o200)
234+
else:
235+
raise ValueError("overflow in number field")
236+
234237
return s
235238

236239
def calc_chksums(buf):

Lib/test/test_tarfile.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1582,9 +1582,31 @@ def test_char_fields(self):
15821582
self.assertEqual(tarfile.nts(b"foo\0\0\0\0\0", "ascii", "strict"), "foo")
15831583
self.assertEqual(tarfile.nts(b"foo\0bar\0", "ascii", "strict"), "foo")
15841584

1585-
def test_number_fields(self):
1585+
def test_read_number_fields(self):
1586+
# Issue 13158: Test if GNU tar specific base-256 number fields
1587+
# are decoded correctly.
1588+
self.assertEqual(tarfile.nti(b"0000001\x00"), 1)
1589+
self.assertEqual(tarfile.nti(b"7777777\x00"), 0o7777777)
1590+
self.assertEqual(tarfile.nti(b"\x80\x00\x00\x00\x00\x20\x00\x00"), 0o10000000)
1591+
self.assertEqual(tarfile.nti(b"\x80\x00\x00\x00\xff\xff\xff\xff"), 0xffffffff)
1592+
self.assertEqual(tarfile.nti(b"\xff\xff\xff\xff\xff\xff\xff\xff"), -1)
1593+
self.assertEqual(tarfile.nti(b"\xff\xff\xff\xff\xff\xff\xff\x9c"), -100)
1594+
self.assertEqual(tarfile.nti(b"\xff\x00\x00\x00\x00\x00\x00\x00"), -0x100000000000000)
1595+
1596+
def test_write_number_fields(self):
15861597
self.assertEqual(tarfile.itn(1), b"0000001\x00")
1598+
self.assertEqual(tarfile.itn(0o7777777), b"7777777\x00")
1599+
self.assertEqual(tarfile.itn(0o10000000), b"\x80\x00\x00\x00\x00\x20\x00\x00")
15871600
self.assertEqual(tarfile.itn(0xffffffff), b"\x80\x00\x00\x00\xff\xff\xff\xff")
1601+
self.assertEqual(tarfile.itn(-1), b"\xff\xff\xff\xff\xff\xff\xff\xff")
1602+
self.assertEqual(tarfile.itn(-100), b"\xff\xff\xff\xff\xff\xff\xff\x9c")
1603+
self.assertEqual(tarfile.itn(-0x100000000000000), b"\xff\x00\x00\x00\x00\x00\x00\x00")
1604+
1605+
def test_number_field_limits(self):
1606+
self.assertRaises(ValueError, tarfile.itn, -1, 8, tarfile.USTAR_FORMAT)
1607+
self.assertRaises(ValueError, tarfile.itn, 0o10000000, 8, tarfile.USTAR_FORMAT)
1608+
self.assertRaises(ValueError, tarfile.itn, -0x10000000001, 6, tarfile.GNU_FORMAT)
1609+
self.assertRaises(ValueError, tarfile.itn, 0x10000000000, 6, tarfile.GNU_FORMAT)
15881610

15891611

15901612
class ContextManagerTest(unittest.TestCase):

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,9 @@ Core and Builtins
305305
Library
306306
-------
307307

308+
- Issue #13158: Fix decoding and encoding of GNU tar specific base-256 number
309+
fields in tarfile.
310+
308311
- Issue #13025: mimetypes is now reading MIME types using the UTF-8 encoding,
309312
instead of the locale encoding.
310313

0 commit comments

Comments
 (0)