Thanks to visit codestin.com
Credit goes to github.com

Skip to content

bpo-36305: Fixes to path handling and parsing in pathlib #12361

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 52 additions & 28 deletions Lib/pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,38 +60,49 @@ class _Flavour(object):
def __init__(self):
self.join = self.sep.join

def _split_part(self, part):
"""
Return the drive, root and path parts from a given part.
If the part is a tuple, it already contains these values and therefore is returned.
Otherwise, splitroot is used to parse the part.
"""
if isinstance(part, tuple):
return part
elif isinstance(part, str):
if self.altsep:
part = part.replace(self.altsep, self.sep)
drv, root, rel = self.splitroot(part)
return drv, root, rel.split(self.sep)
else:
raise TypeError(f'argument should be a tuple or an str object, not {type(part)}')

def parse_parts(self, parts):
"""
Parse and join multiple path strings, and
return a tuple of the final drive, root and path parts.
The given parts can be either strings of paths,
or tuples that represent paths, containing the drive, root and list of path parts.
The option for passing a tuple is needed, as the part 'a:b' could be interpreted
either as the relative path 'b' with the drive 'a:',
or as a file 'a' with the NTFS data-stream 'b'.
For example, passing either ('a:', '', ['b']) or ('', '', ['a:b']) instead of 'a:b'
will allow parse_parts to behave properly in these cases.
"""
parsed = []
sep = self.sep
altsep = self.altsep
drv = root = ''
it = reversed(parts)
for part in it:
if not part:
continue
if altsep:
part = part.replace(altsep, sep)
drv, root, rel = self.splitroot(part)
if sep in rel:
for x in reversed(rel.split(sep)):
current_drv, current_root, rel_parts = self._split_part(part)
if not drv:
drv = current_drv
if not root:
root = current_root
for x in reversed(rel_parts):
if x and x != '.':
parsed.append(sys.intern(x))
else:
if rel and rel != '.':
parsed.append(sys.intern(rel))
if drv or root:
if not drv:
# If no drive is present, try to find one in the previous
# parts. This makes the result of parsing e.g.
# ("C:", "/", "a") reasonably intuitive.
for part in it:
if not part:
continue
if altsep:
part = part.replace(altsep, sep)
drv = self.splitroot(part)[0]
if drv:
break
if root and drv:
break
if drv or root:
parsed.append(drv + root)
Expand All @@ -115,6 +126,9 @@ def join_parsed_parts(self, drv, root, parts, drv2, root2, parts2):
return drv, root, parts + parts2
return drv2, root2, parts2

def has_drive(self, part):
return self.splitroot(part)[0] != ''


class _WindowsFlavour(_Flavour):
# Reference for Windows paths can be found at
Expand Down Expand Up @@ -194,18 +208,21 @@ def resolve(self, path, strict=False):
s = str(path)
if not s:
return os.getcwd()
previous_s = None
if _getfinalpathname is not None:
if strict:
return self._ext_to_normal(_getfinalpathname(s))
else:
previous_s = None
tail_parts = [] # End of the path after the first one not found
while True:
try:
s = self._ext_to_normal(_getfinalpathname(s))
except FileNotFoundError:
previous_s = s
s, tail = os.path.split(s)
if self.has_drive(tail):
# To avoid confusing between a filename with a data-stream and a drive letter
tail = f'.{self.sep}{tail}'
tail_parts.append(tail)
if previous_s == s:
return path
Expand Down Expand Up @@ -653,7 +670,10 @@ def _parse_args(cls, args):
parts = []
for a in args:
if isinstance(a, PurePath):
parts += a._parts
path_parts = a._parts
if a._drv or a._root:
path_parts = path_parts[1:]
parts.append((a._drv, a._root, path_parts))
else:
a = os.fspath(a)
if isinstance(a, str):
Expand Down Expand Up @@ -691,6 +711,10 @@ def _from_parsed_parts(cls, drv, root, parts, init=True):

@classmethod
def _format_parsed_parts(cls, drv, root, parts):
if parts and not drv and cls._flavour.has_drive(parts[0]):
# In case there is no drive, and the first part might be interpreted as a drive,
# we add a dot to clarify the first part is not a drive.
parts = ['.'] + parts
if drv or root:
return drv + root + cls._flavour.join(parts[1:])
else:
Expand Down Expand Up @@ -941,7 +965,7 @@ def __truediv__(self, key):

def __rtruediv__(self, key):
try:
return self._from_parts([key] + self._parts)
return self._from_parts([key, self])
except TypeError:
return NotImplemented

Expand Down Expand Up @@ -1171,7 +1195,7 @@ def absolute(self):
return self
# FIXME this must defer to the specific flavour (and, under Windows,
# use nt._getfullpathname())
obj = self._from_parts([os.getcwd()] + self._parts, init=False)
obj = self._from_parts([os.getcwd(), self], init=False)
obj._init(template=self)
return obj

Expand Down Expand Up @@ -1553,7 +1577,7 @@ def expanduser(self):
if (not (self._drv or self._root) and
self._parts and self._parts[0][:1] == '~'):
homedir = self._flavour.gethomedir(self._parts[0][1:])
return self._from_parts([homedir] + self._parts[1:])
return self._from_parts([homedir, self.relative_to(self._parts[0])])

return self

Expand Down
40 changes: 37 additions & 3 deletions Lib/test/test_pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,6 @@ def test_parse_parts(self):
check(['//a/b/'], ('\\\\a\\b', '\\', ['\\\\a\\b\\']))
check(['//a/b/c'], ('\\\\a\\b', '\\', ['\\\\a\\b\\', 'c']))
# Second part is anchored, so that the first part is ignored.
check(['a', 'Z:b', 'c'], ('Z:', '', ['Z:', 'b', 'c']))
check(['a', 'Z:/b', 'c'], ('Z:', '\\', ['Z:\\', 'b', 'c']))
# UNC paths.
check(['a', '//b/c', 'd'], ('\\\\b\\c', '\\', ['\\\\b\\c\\', 'd']))
Expand All @@ -133,6 +132,16 @@ def test_parse_parts(self):
check(['a', '/b', 'c'], ('', '\\', ['\\', 'b', 'c']))
check(['Z:/a', '/b', 'c'], ('Z:', '\\', ['Z:\\', 'b', 'c']))
check(['//?/Z:/a', '/b', 'c'], ('\\\\?\\Z:', '\\', ['\\\\?\\Z:\\', 'b', 'c']))
# Second part has a drive but not root.
check(['a', 'Z:b', 'c'], ('Z:', '', ['Z:', 'a', 'b', 'c']))
check(['Y:a', 'Z:b', 'c'], ('Z:', '', ['Z:', 'a', 'b', 'c']))
Comment on lines +136 to +137
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't look right - if we switch drives mid-way through we should discard everything before. This is what os.path.join() does.

# Paths to files with NTFS alternate data streams
check(['./c:s'], ('', '', ['c:s']))
check(['cc:s'], ('', '', ['cc:s']))
check(['C:c:s'], ('C:', '', ['C:', 'c:s']))
check(['C:/c:s'], ('C:', '\\', ['C:\\', 'c:s']))
check(['D:a', './c:b'], ('D:', '', ['D:', 'a', 'c:b']))
check(['D:/a', './c:b'], ('D:', '\\', ['D:\\', 'a', 'c:b']))

def test_splitroot(self):
f = self.flavour.splitroot
Expand Down Expand Up @@ -201,6 +210,7 @@ def test_constructor_common(self):
self.assertEqual(P(P('a'), 'b'), P('a/b'))
self.assertEqual(P(P('a'), P('b')), P('a/b'))
self.assertEqual(P(P('a'), P('b'), P('c')), P(FakePath("a/b/c")))
self.assertEqual(P(P('./a:b')), P('./a:b'))

def _check_str_subclass(self, *args):
# Issue #21127: it should be possible to construct a PurePath object
Expand Down Expand Up @@ -746,7 +756,9 @@ class PureWindowsPathTest(_BasePurePathTest, unittest.TestCase):

equivalences = _BasePurePathTest.equivalences.copy()
equivalences.update({
'c:a': [ ('c:', 'a'), ('c:', 'a/'), ('/', 'c:', 'a') ],
'./a:b': [ ('./a:b',) ],
'a:b:c': [ ('./b:c', 'a:'), ('b:', 'a:b:c') ],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar to the above, I don't see why ('./b:c', 'a:') shouldn't resolve to a:.

'c:a': [ ('c:', 'a'), ('c:', 'a/'), ('.', 'c:', 'a') ],
'c:/a': [
('c:/', 'a'), ('c:', '/', 'a'), ('c:', '/a'),
('/z', 'c:/', 'a'), ('//x/y', 'c:/', 'a'),
Expand All @@ -770,6 +782,7 @@ def test_str(self):
self.assertEqual(str(p), '\\\\a\\b\\c\\d')

def test_str_subclass(self):
self._check_str_subclass('.\\a:b')
self._check_str_subclass('c:')
self._check_str_subclass('c:a')
self._check_str_subclass('c:a\\b.txt')
Expand Down Expand Up @@ -916,6 +929,7 @@ def test_drive(self):
self.assertEqual(P('//a/b').drive, '\\\\a\\b')
self.assertEqual(P('//a/b/').drive, '\\\\a\\b')
self.assertEqual(P('//a/b/c/d').drive, '\\\\a\\b')
self.assertEqual(P('./c:a').drive, '')

def test_root(self):
P = self.cls
Expand Down Expand Up @@ -1191,6 +1205,14 @@ def test_join(self):
self.assertEqual(pp, P('C:/a/b/x/y'))
pp = p.joinpath('c:/x/y')
self.assertEqual(pp, P('C:/x/y'))
# Joining with files with NTFS data streams => the filename should
# not be parsed as a drive letter
pp = p.joinpath(P('./d:s'))
self.assertEqual(pp, P('C:/a/b/d:s'))
pp = p.joinpath(P('./dd:s'))
self.assertEqual(pp, P('C:/a/b/dd:s'))
pp = p.joinpath(P('E:d:s'))
self.assertEqual(pp, P('E:d:s'))

def test_div(self):
# Basically the same as joinpath().
Expand All @@ -1211,6 +1233,11 @@ def test_div(self):
# the second path is relative.
self.assertEqual(p / 'c:x/y', P('C:/a/b/x/y'))
self.assertEqual(p / 'c:/x/y', P('C:/x/y'))
# Joining with files with NTFS data streams => the filename should
# not be parsed as a drive letter
self.assertEqual(p / P('./d:s'), P('C:/a/b/d:s'))
self.assertEqual(p / P('./dd:s'), P('C:/a/b/dd:s'))
self.assertEqual(p / P('E:d:s'), P('E:d:s'))

def test_is_reserved(self):
P = self.cls
Expand Down Expand Up @@ -1431,6 +1458,8 @@ def test_expanduser_common(self):
self.assertEqual(p.expanduser(), p)
p = P(P('').absolute().anchor) / '~'
self.assertEqual(p.expanduser(), p)
p = P('~/a:b')
self.assertEqual(p.expanduser(), P(os.path.expanduser('~'), './a:b'))

def test_exists(self):
P = self.cls
Expand Down Expand Up @@ -2467,11 +2496,16 @@ def check():
env.pop('HOMEPATH', None)
env['USERPROFILE'] = 'C:\\Users\\alice'
check()

# bpo-38883: ignore `HOME` when set on windows
env['HOME'] = 'C:\\Users\\eve'
check()

def test_resolve(self):
P = self.cls
p = P(BASE, './a:b')
self.assertEqual(str(p.resolve(strict=False)), f'{BASE}\\a:b')


class CompatiblePathTest(unittest.TestCase):
"""
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix a ``pathlib`` inconsistency in handling of paths containing colons.