diff --git a/Lib/pathlib.py b/Lib/pathlib.py index cfa574af6e8bab..6a24bba95a09ef 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -60,38 +60,49 @@ class _Flavour(object): def __init__(self): self.join = self.sep.join + def _split_part(self, part): + """ + Return the drive, root and path parts from a given part. + If the part is a tuple, it already contains these values and therefore is returned. + Otherwise, splitroot is used to parse the part. + """ + if isinstance(part, tuple): + return part + elif isinstance(part, str): + if self.altsep: + part = part.replace(self.altsep, self.sep) + drv, root, rel = self.splitroot(part) + return drv, root, rel.split(self.sep) + else: + raise TypeError(f'argument should be a tuple or an str object, not {type(part)}') + def parse_parts(self, parts): + """ + Parse and join multiple path strings, and + return a tuple of the final drive, root and path parts. + The given parts can be either strings of paths, + or tuples that represent paths, containing the drive, root and list of path parts. + The option for passing a tuple is needed, as the part 'a:b' could be interpreted + either as the relative path 'b' with the drive 'a:', + or as a file 'a' with the NTFS data-stream 'b'. + For example, passing either ('a:', '', ['b']) or ('', '', ['a:b']) instead of 'a:b' + will allow parse_parts to behave properly in these cases. + """ parsed = [] - sep = self.sep - altsep = self.altsep drv = root = '' it = reversed(parts) for part in it: if not part: continue - if altsep: - part = part.replace(altsep, sep) - drv, root, rel = self.splitroot(part) - if sep in rel: - for x in reversed(rel.split(sep)): + current_drv, current_root, rel_parts = self._split_part(part) + if not drv: + drv = current_drv + if not root: + root = current_root + for x in reversed(rel_parts): if x and x != '.': parsed.append(sys.intern(x)) - else: - if rel and rel != '.': - parsed.append(sys.intern(rel)) - if drv or root: - if not drv: - # If no drive is present, try to find one in the previous - # parts. This makes the result of parsing e.g. - # ("C:", "/", "a") reasonably intuitive. - for part in it: - if not part: - continue - if altsep: - part = part.replace(altsep, sep) - drv = self.splitroot(part)[0] - if drv: - break + if root and drv: break if drv or root: parsed.append(drv + root) @@ -115,6 +126,9 @@ def join_parsed_parts(self, drv, root, parts, drv2, root2, parts2): return drv, root, parts + parts2 return drv2, root2, parts2 + def has_drive(self, part): + return self.splitroot(part)[0] != '' + class _WindowsFlavour(_Flavour): # Reference for Windows paths can be found at @@ -194,11 +208,11 @@ def resolve(self, path, strict=False): s = str(path) if not s: return os.getcwd() - previous_s = None if _getfinalpathname is not None: if strict: return self._ext_to_normal(_getfinalpathname(s)) else: + previous_s = None tail_parts = [] # End of the path after the first one not found while True: try: @@ -206,6 +220,9 @@ def resolve(self, path, strict=False): except FileNotFoundError: previous_s = s s, tail = os.path.split(s) + if self.has_drive(tail): + # To avoid confusing between a filename with a data-stream and a drive letter + tail = f'.{self.sep}{tail}' tail_parts.append(tail) if previous_s == s: return path @@ -653,7 +670,10 @@ def _parse_args(cls, args): parts = [] for a in args: if isinstance(a, PurePath): - parts += a._parts + path_parts = a._parts + if a._drv or a._root: + path_parts = path_parts[1:] + parts.append((a._drv, a._root, path_parts)) else: a = os.fspath(a) if isinstance(a, str): @@ -691,6 +711,10 @@ def _from_parsed_parts(cls, drv, root, parts, init=True): @classmethod def _format_parsed_parts(cls, drv, root, parts): + if parts and not drv and cls._flavour.has_drive(parts[0]): + # In case there is no drive, and the first part might be interpreted as a drive, + # we add a dot to clarify the first part is not a drive. + parts = ['.'] + parts if drv or root: return drv + root + cls._flavour.join(parts[1:]) else: @@ -941,7 +965,7 @@ def __truediv__(self, key): def __rtruediv__(self, key): try: - return self._from_parts([key] + self._parts) + return self._from_parts([key, self]) except TypeError: return NotImplemented @@ -1171,7 +1195,7 @@ def absolute(self): return self # FIXME this must defer to the specific flavour (and, under Windows, # use nt._getfullpathname()) - obj = self._from_parts([os.getcwd()] + self._parts, init=False) + obj = self._from_parts([os.getcwd(), self], init=False) obj._init(template=self) return obj @@ -1553,7 +1577,7 @@ def expanduser(self): if (not (self._drv or self._root) and self._parts and self._parts[0][:1] == '~'): homedir = self._flavour.gethomedir(self._parts[0][1:]) - return self._from_parts([homedir] + self._parts[1:]) + return self._from_parts([homedir, self.relative_to(self._parts[0])]) return self diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index a50dce01718080..eacf61ec689460 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -114,7 +114,6 @@ def test_parse_parts(self): check(['//a/b/'], ('\\\\a\\b', '\\', ['\\\\a\\b\\'])) check(['//a/b/c'], ('\\\\a\\b', '\\', ['\\\\a\\b\\', 'c'])) # Second part is anchored, so that the first part is ignored. - check(['a', 'Z:b', 'c'], ('Z:', '', ['Z:', 'b', 'c'])) check(['a', 'Z:/b', 'c'], ('Z:', '\\', ['Z:\\', 'b', 'c'])) # UNC paths. check(['a', '//b/c', 'd'], ('\\\\b\\c', '\\', ['\\\\b\\c\\', 'd'])) @@ -133,6 +132,16 @@ def test_parse_parts(self): check(['a', '/b', 'c'], ('', '\\', ['\\', 'b', 'c'])) check(['Z:/a', '/b', 'c'], ('Z:', '\\', ['Z:\\', 'b', 'c'])) check(['//?/Z:/a', '/b', 'c'], ('\\\\?\\Z:', '\\', ['\\\\?\\Z:\\', 'b', 'c'])) + # Second part has a drive but not root. + check(['a', 'Z:b', 'c'], ('Z:', '', ['Z:', 'a', 'b', 'c'])) + check(['Y:a', 'Z:b', 'c'], ('Z:', '', ['Z:', 'a', 'b', 'c'])) + # Paths to files with NTFS alternate data streams + check(['./c:s'], ('', '', ['c:s'])) + check(['cc:s'], ('', '', ['cc:s'])) + check(['C:c:s'], ('C:', '', ['C:', 'c:s'])) + check(['C:/c:s'], ('C:', '\\', ['C:\\', 'c:s'])) + check(['D:a', './c:b'], ('D:', '', ['D:', 'a', 'c:b'])) + check(['D:/a', './c:b'], ('D:', '\\', ['D:\\', 'a', 'c:b'])) def test_splitroot(self): f = self.flavour.splitroot @@ -201,6 +210,7 @@ def test_constructor_common(self): self.assertEqual(P(P('a'), 'b'), P('a/b')) self.assertEqual(P(P('a'), P('b')), P('a/b')) self.assertEqual(P(P('a'), P('b'), P('c')), P(FakePath("a/b/c"))) + self.assertEqual(P(P('./a:b')), P('./a:b')) def _check_str_subclass(self, *args): # Issue #21127: it should be possible to construct a PurePath object @@ -746,7 +756,9 @@ class PureWindowsPathTest(_BasePurePathTest, unittest.TestCase): equivalences = _BasePurePathTest.equivalences.copy() equivalences.update({ - 'c:a': [ ('c:', 'a'), ('c:', 'a/'), ('/', 'c:', 'a') ], + './a:b': [ ('./a:b',) ], + 'a:b:c': [ ('./b:c', 'a:'), ('b:', 'a:b:c') ], + 'c:a': [ ('c:', 'a'), ('c:', 'a/'), ('.', 'c:', 'a') ], 'c:/a': [ ('c:/', 'a'), ('c:', '/', 'a'), ('c:', '/a'), ('/z', 'c:/', 'a'), ('//x/y', 'c:/', 'a'), @@ -770,6 +782,7 @@ def test_str(self): self.assertEqual(str(p), '\\\\a\\b\\c\\d') def test_str_subclass(self): + self._check_str_subclass('.\\a:b') self._check_str_subclass('c:') self._check_str_subclass('c:a') self._check_str_subclass('c:a\\b.txt') @@ -916,6 +929,7 @@ def test_drive(self): self.assertEqual(P('//a/b').drive, '\\\\a\\b') self.assertEqual(P('//a/b/').drive, '\\\\a\\b') self.assertEqual(P('//a/b/c/d').drive, '\\\\a\\b') + self.assertEqual(P('./c:a').drive, '') def test_root(self): P = self.cls @@ -1191,6 +1205,14 @@ def test_join(self): self.assertEqual(pp, P('C:/a/b/x/y')) pp = p.joinpath('c:/x/y') self.assertEqual(pp, P('C:/x/y')) + # Joining with files with NTFS data streams => the filename should + # not be parsed as a drive letter + pp = p.joinpath(P('./d:s')) + self.assertEqual(pp, P('C:/a/b/d:s')) + pp = p.joinpath(P('./dd:s')) + self.assertEqual(pp, P('C:/a/b/dd:s')) + pp = p.joinpath(P('E:d:s')) + self.assertEqual(pp, P('E:d:s')) def test_div(self): # Basically the same as joinpath(). @@ -1211,6 +1233,11 @@ def test_div(self): # the second path is relative. self.assertEqual(p / 'c:x/y', P('C:/a/b/x/y')) self.assertEqual(p / 'c:/x/y', P('C:/x/y')) + # Joining with files with NTFS data streams => the filename should + # not be parsed as a drive letter + self.assertEqual(p / P('./d:s'), P('C:/a/b/d:s')) + self.assertEqual(p / P('./dd:s'), P('C:/a/b/dd:s')) + self.assertEqual(p / P('E:d:s'), P('E:d:s')) def test_is_reserved(self): P = self.cls @@ -1431,6 +1458,8 @@ def test_expanduser_common(self): self.assertEqual(p.expanduser(), p) p = P(P('').absolute().anchor) / '~' self.assertEqual(p.expanduser(), p) + p = P('~/a:b') + self.assertEqual(p.expanduser(), P(os.path.expanduser('~'), './a:b')) def test_exists(self): P = self.cls @@ -2467,11 +2496,16 @@ def check(): env.pop('HOMEPATH', None) env['USERPROFILE'] = 'C:\\Users\\alice' check() - + # bpo-38883: ignore `HOME` when set on windows env['HOME'] = 'C:\\Users\\eve' check() + def test_resolve(self): + P = self.cls + p = P(BASE, './a:b') + self.assertEqual(str(p.resolve(strict=False)), f'{BASE}\\a:b') + class CompatiblePathTest(unittest.TestCase): """ diff --git a/Misc/NEWS.d/next/Library/2019-03-15-22-50-27.bpo-36305.Pbkv6u.rst b/Misc/NEWS.d/next/Library/2019-03-15-22-50-27.bpo-36305.Pbkv6u.rst new file mode 100644 index 00000000000000..9b280655aad7af --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-03-15-22-50-27.bpo-36305.Pbkv6u.rst @@ -0,0 +1 @@ +Fix a ``pathlib`` inconsistency in handling of paths containing colons. \ No newline at end of file