Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 09aa752

Browse files
committed
Refactor recently added bugfix into more testable code by using a
method for windows file name sanitization. Splits the unittest up into several based on platform.
1 parent 6d29628 commit 09aa752

2 files changed

Lines changed: 48 additions & 19 deletions

File tree

Lib/test/test_zipfile.py

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -538,8 +538,15 @@ def check_file(self, filename, content):
538538
with open(filename, 'rb') as f:
539539
self.assertEqual(f.read(), content)
540540

541-
def test_extract_hackers_arcnames(self):
542-
hacknames = [
541+
def test_sanitize_windows_name(self):
542+
san = zipfile.ZipFile._sanitize_windows_name
543+
# Passing pathsep in allows this test to work regardless of platform.
544+
self.assertEqual(san(r',,?,C:,foo,bar/z', ','), r'_,C_,foo,bar/z')
545+
self.assertEqual(san(r'a\b,c<d>e|f"g?h*i', ','), r'a\b,c_d_e_f_g_h_i')
546+
self.assertEqual(san('../../foo../../ba..r', '/'), r'foo/ba..r')
547+
548+
def test_extract_hackers_arcnames_common_cases(self):
549+
common_hacknames = [
543550
('../foo/bar', 'foo/bar'),
544551
('foo/../bar', 'foo/bar'),
545552
('foo/../../bar', 'foo/bar'),
@@ -549,8 +556,12 @@ def test_extract_hackers_arcnames(self):
549556
('/foo/../bar', 'foo/bar'),
550557
('/foo/../../bar', 'foo/bar'),
551558
]
552-
if os.path.sep == '\\': # Windows.
553-
hacknames.extend([
559+
self._test_extract_hackers_arcnames(common_hacknames)
560+
561+
@unittest.skipIf(os.path.sep != '\\', 'Requires \\ as path separator.')
562+
def test_extract_hackers_arcnames_windows_only(self):
563+
"""Test combination of path fixing and windows name sanitization."""
564+
windows_hacknames = [
554565
(r'..\foo\bar', 'foo/bar'),
555566
(r'..\/foo\/bar', 'foo/bar'),
556567
(r'foo/\..\/bar', 'foo/bar'),
@@ -570,14 +581,19 @@ def test_extract_hackers_arcnames(self):
570581
(r'C:/../C:/foo/bar', 'C_/foo/bar'),
571582
(r'a:b\c<d>e|f"g?h*i', 'b/c_d_e_f_g_h_i'),
572583
('../../foo../../ba..r', 'foo/ba..r'),
573-
])
574-
else: # Unix
575-
hacknames.extend([
576-
('//foo/bar', 'foo/bar'),
577-
('../../foo../../ba..r', 'foo../ba..r'),
578-
(r'foo/..\bar', r'foo/..\bar'),
579-
])
584+
]
585+
self._test_extract_hackers_arcnames(windows_hacknames)
586+
587+
@unittest.skipIf(os.path.sep != '/', r'Requires / as path separator.')
588+
def test_extract_hackers_arcnames_posix_only(self):
589+
posix_hacknames = [
590+
('//foo/bar', 'foo/bar'),
591+
('../../foo../../ba..r', 'foo../ba..r'),
592+
(r'foo/..\bar', r'foo/..\bar'),
593+
]
594+
self._test_extract_hackers_arcnames(posix_hacknames)
580595

596+
def _test_extract_hackers_arcnames(self, hacknames):
581597
for arcname, fixedname in hacknames:
582598
content = b'foobar' + arcname.encode()
583599
with zipfile.ZipFile(TESTFN2, 'w', zipfile.ZIP_STORED) as zipfp:
@@ -594,7 +610,8 @@ def test_extract_hackers_arcnames(self):
594610
with zipfile.ZipFile(TESTFN2, 'r') as zipfp:
595611
writtenfile = zipfp.extract(arcname, targetpath)
596612
self.assertEqual(writtenfile, correctfile,
597-
msg="extract %r" % arcname)
613+
msg='extract %r: %r != %r' %
614+
(arcname, writtenfile, correctfile))
598615
self.check_file(correctfile, content)
599616
shutil.rmtree('target')
600617

Lib/zipfile.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -883,6 +883,7 @@ class ZipFile:
883883
"""
884884

885885
fp = None # Set here since __del__ checks it
886+
_windows_illegal_name_trans_table = None
886887

887888
def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
888889
"""Open the ZIP file with mode read "r", write "w" or append "a"."""
@@ -1223,6 +1224,21 @@ def extractall(self, path=None, members=None, pwd=None):
12231224
for zipinfo in members:
12241225
self.extract(zipinfo, path, pwd)
12251226

1227+
@classmethod
1228+
def _sanitize_windows_name(cls, arcname, pathsep):
1229+
"""Replace bad characters and remove trailing dots from parts."""
1230+
table = cls._windows_illegal_name_trans_table
1231+
if not table:
1232+
illegal = ':<>|"?*'
1233+
table = str.maketrans(illegal, '_' * len(illegal))
1234+
cls._windows_illegal_name_trans_table = table
1235+
arcname = arcname.translate(table)
1236+
# remove trailing dots
1237+
arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1238+
# rejoin, removing empty parts.
1239+
arcname = pathsep.join(x for x in arcname if x)
1240+
return arcname
1241+
12261242
def _extract_member(self, member, targetpath, pwd):
12271243
"""Extract the ZipInfo object 'member' to a physical
12281244
file on the path targetpath.
@@ -1236,16 +1252,12 @@ def _extract_member(self, member, targetpath, pwd):
12361252
# interpret absolute pathname as relative, remove drive letter or
12371253
# UNC path, redundant separators, "." and ".." components.
12381254
arcname = os.path.splitdrive(arcname)[1]
1255+
invalid_path_parts = ('', os.path.curdir, os.path.pardir)
12391256
arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
1240-
if x not in ('', os.path.curdir, os.path.pardir))
1257+
if x not in invalid_path_parts)
12411258
if os.path.sep == '\\':
12421259
# filter illegal characters on Windows
1243-
illegal = ':<>|"?*'
1244-
table = str.maketrans(illegal, '_' * len(illegal))
1245-
arcname = arcname.translate(table)
1246-
# remove trailing dots
1247-
arcname = (x.rstrip('.') for x in arcname.split(os.path.sep))
1248-
arcname = os.path.sep.join(x for x in arcname if x)
1260+
arcname = self._sanitize_windows_name(arcname, os.path.sep)
12491261

12501262
targetpath = os.path.join(targetpath, arcname)
12511263
targetpath = os.path.normpath(targetpath)

0 commit comments

Comments
 (0)