Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 99 additions & 1 deletion Lib/test/test_urlparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,11 @@ def checkJoin(self, base, relurl, expected, *, relroundtrip=True):
self.assertEqual(urllib.parse.urljoin(base, relurl), expected)
relurlb = urllib.parse.urlunsplit(urllib.parse.urlsplit(relurlb))
self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
else:
relurl = urllib.parse.urlunsplit(urllib.parse.urlsplit(relurl))
self.assertNotEqual(urllib.parse.urljoin(base, relurl), expected)
relurlb = urllib.parse.urlunsplit(urllib.parse.urlsplit(relurlb))
self.assertNotEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)

def test_unparse_parse(self):
str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]
Expand Down Expand Up @@ -568,6 +573,9 @@ def test_urljoins(self):
# slashes
self.checkJoin('http://a/b/c/d/e/', '../../f/g/', 'http://a/b/c/f/g/')
self.checkJoin('http://a/b/c/d/e', '../../f/g/', 'http://a/b/f/g/')
self.checkJoin('http://a/b/c/d/e//', '../../f/g/', 'http://a/b/c/d/f/g/')
self.checkJoin('http://a/b/c/d/e///', '../../f/g/', 'http://a/b/c/d/e/f/g/')
self.checkJoin('http://a/b/c/d/e////', '../../f/g/', 'http://a/b/c/d/e//f/g/')
self.checkJoin('http://a/b/c/d/e/', '/../../f/g/', 'http://a/f/g/')
self.checkJoin('http://a/b/c/d/e', '/../../f/g/', 'http://a/f/g/')
self.checkJoin('http://a/b/c/d/e/', '../../f/g', 'http://a/b/c/f/g')
Expand Down Expand Up @@ -645,6 +653,16 @@ def test_urljoins_relative_base(self):
self.checkJoin('//', '/w', '///w')
self.checkJoin('//', '///w', '///w')
self.checkJoin('//', 'w', '///w')
self.checkJoin('//', '../w', '///w')
self.checkJoin('//', './w', '///w')
self.checkJoin('//', '..//w', '///w')
self.checkJoin('//', './/w', '///w')
self.checkJoin('//', '..', '//')
self.checkJoin('//', '.', '//')
self.checkJoin('//', '../', '//')
self.checkJoin('//', './', '//')
self.checkJoin('//', '..//', '///')
self.checkJoin('//', './/', '///')

self.checkJoin('//a', '', '//a')
self.checkJoin('//a', '//', '//a')
Expand All @@ -653,6 +671,16 @@ def test_urljoins_relative_base(self):
self.checkJoin('//a', '/w', '//a/w')
self.checkJoin('//a', '///w', '//a/w')
self.checkJoin('//a', 'w', '//a/w')
self.checkJoin('//a', '../w', '//a/w')
self.checkJoin('//a', './w', '//a/w')
self.checkJoin('//a', '..//w', '//a/w')
self.checkJoin('//a', './/w', '//a/w')
self.checkJoin('//a', '..', '//a')
self.checkJoin('//a', '.', '//a')
self.checkJoin('//a', '../', '//a')
self.checkJoin('//a', './', '//a')
self.checkJoin('//a', '..//', '//a/')
self.checkJoin('//a', './/', '//a/')

for scheme in '', 'http:':
self.checkJoin('http:', scheme + '', 'http:')
Expand All @@ -661,7 +689,21 @@ def test_urljoins_relative_base(self):
self.checkJoin('http:', scheme + '//v/w', 'http://v/w')
self.checkJoin('http:', scheme + '/w', 'http:/w')
self.checkJoin('http:', scheme + '///w', 'http:/w')
self.checkJoin('http:', scheme + 'w', 'http:/w')
self.checkJoin('http:', scheme + 'w', 'http:w')
self.checkJoin('http:', scheme + '../w', 'http:w')
self.checkJoin('http:', scheme + './w', 'http:w')
self.checkJoin('http:', scheme + '..//w', 'http:/w')
self.checkJoin('http:', scheme + './/w', 'http:/w')
self.checkJoin('http:', scheme + '..///w', 'http:////w')
self.checkJoin('http:', scheme + './//w', 'http:////w')
self.checkJoin('http:', scheme + '..', 'http:')
self.checkJoin('http:', scheme + '.', 'http:')
self.checkJoin('http:', scheme + '../', 'http:')
self.checkJoin('http:', scheme + './', 'http:')
self.checkJoin('http:', scheme + '..//', 'http:/')
self.checkJoin('http:', scheme + './/', 'http:/')
self.checkJoin('http:', scheme + '..///', 'http:////')
self.checkJoin('http:', scheme + './//', 'http:////')

self.checkJoin('http://', scheme + '', 'http://')
self.checkJoin('http://', scheme + '//', 'http://')
Expand All @@ -670,6 +712,20 @@ def test_urljoins_relative_base(self):
self.checkJoin('http://', scheme + '/w', 'http:///w')
self.checkJoin('http://', scheme + '///w', 'http:///w')
self.checkJoin('http://', scheme + 'w', 'http:///w')
self.checkJoin('http://', scheme + '../w', 'http:///w')
self.checkJoin('http://', scheme + './w', 'http:///w')
self.checkJoin('http://', scheme + '..//w', 'http:///w')
self.checkJoin('http://', scheme + './/w', 'http:///w')
self.checkJoin('http://', scheme + '..///w', 'http:////w')
self.checkJoin('http://', scheme + './//w', 'http:////w')
self.checkJoin('http://', scheme + '..', 'http://')
self.checkJoin('http://', scheme + '.', 'http://')
self.checkJoin('http://', scheme + '../', 'http://')
self.checkJoin('http://', scheme + './', 'http://')
self.checkJoin('http://', scheme + '..//', 'http:///')
self.checkJoin('http://', scheme + './/', 'http:///')
self.checkJoin('http://', scheme + '..///', 'http:////')
self.checkJoin('http://', scheme + './//', 'http:////')

self.checkJoin('http://a', scheme + '', 'http://a')
self.checkJoin('http://a', scheme + '//', 'http://a')
Expand All @@ -678,6 +734,38 @@ def test_urljoins_relative_base(self):
self.checkJoin('http://a', scheme + '/w', 'http://a/w')
self.checkJoin('http://a', scheme + '///w', 'http://a/w')
self.checkJoin('http://a', scheme + 'w', 'http://a/w')
self.checkJoin('http://a', scheme + '../w', 'http://a/w')
self.checkJoin('http://a', scheme + './w', 'http://a/w')
self.checkJoin('http://a', scheme + '..//w', 'http://a/w')
self.checkJoin('http://a', scheme + './/w', 'http://a/w')
self.checkJoin('http://a', scheme + '..///w', 'http://a//w')
self.checkJoin('http://a', scheme + './//w', 'http://a//w')
self.checkJoin('http://a', scheme + '..', 'http://a')
self.checkJoin('http://a', scheme + '.', 'http://a')
self.checkJoin('http://a', scheme + '../', 'http://a')
self.checkJoin('http://a', scheme + './', 'http://a')
self.checkJoin('http://a', scheme + '..//', 'http://a/')
self.checkJoin('http://a', scheme + './/', 'http://a/')
self.checkJoin('http://a', scheme + '..///', 'http://a//')
self.checkJoin('http://a', scheme + './//', 'http://a//')

self.checkJoin('b/c', '', 'b/c')
self.checkJoin('b/c', '//', 'b/c')
Copy link
Contributor

@andersk andersk Nov 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

RFC 3986 carefully distinguishes between undefined and empty, and // has an empty authority, not undefined, so we should hit the if defined(R.authority) branch in §5.2.2. The result should be //.

(This is independent of the discussion of #96015, I think.)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I know. I left them non-distinguished for compatibility. We will likely change this in a separate issue.

self.checkJoin('b/c', '//v', '//v')
self.checkJoin('b/c', '//v/w', '//v/w')
self.checkJoin('b/c', '/w', '/w')
self.checkJoin('b/c', '///w', '/w')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same; the result should be ///w.

self.checkJoin('b/c', 'w', 'b/w')
self.checkJoin('b/c', '../w', 'w')
self.checkJoin('b/c', '../../w', 'w')
self.checkJoin('b/c', '../../../w', 'w')
self.checkJoin('b/c', 'w/.', 'b/w/')
self.checkJoin('b/c', '../w/.', 'w/')
self.checkJoin('b/c', '../../w/.', 'w/')
self.checkJoin('b/c', '../../../w/.', 'w/')
self.checkJoin('b/c', '..', '')
self.checkJoin('b/c', '../..', '')
self.checkJoin('b/c', '../../..', '')
Comment on lines +760 to +768
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Although these fall outside the direct scope of the pseudocode defined in RFC 3986 because b/c is not an absolute base URI, they violate the obvious expectation that urljoin should be associative. See

Given non–RFC 3986 input where the base URI is path-relative (undefined scheme, undefined authority, and path not beginning with /), we should preserve extra initial .. components in the output:

Suggested change
self.checkJoin('b/c', '../../w', 'w')
self.checkJoin('b/c', '../../../w', 'w')
self.checkJoin('b/c', 'w/.', 'b/w/')
self.checkJoin('b/c', '../w/.', 'w/')
self.checkJoin('b/c', '../../w/.', 'w/')
self.checkJoin('b/c', '../../../w/.', 'w/')
self.checkJoin('b/c', '..', '')
self.checkJoin('b/c', '../..', '')
self.checkJoin('b/c', '../../..', '')
self.checkJoin('b/c', '../../w', '../w')
self.checkJoin('b/c', '../../../w', '../../w')
self.checkJoin('b/c', 'w/.', 'b/w/')
self.checkJoin('b/c', '../w/.', 'w/')
self.checkJoin('b/c', '../../w/.', '../w/')
self.checkJoin('b/c', '../../../w/.', '../../w/')
self.checkJoin('b/c', '..', '')
self.checkJoin('b/c', '../..', '..')
self.checkJoin('b/c', '../../..', '../..')


self.checkJoin('/b/c', '', '/b/c')
self.checkJoin('/b/c', '//', '/b/c')
Expand All @@ -686,6 +774,16 @@ def test_urljoins_relative_base(self):
self.checkJoin('/b/c', '/w', '/w')
self.checkJoin('/b/c', '///w', '/w')
self.checkJoin('/b/c', 'w', '/b/w')
self.checkJoin('/b/c', '../w', '/w')
self.checkJoin('/b/c', '../../w', '/w')
self.checkJoin('/b/c', '../../../w', '/w')
self.checkJoin('/b/c', 'w/.', '/b/w/')
self.checkJoin('/b/c', '../w/.', '/w/')
self.checkJoin('/b/c', '../../w/.', '/w/')
self.checkJoin('/b/c', '../../../w/.', '/w/')
self.checkJoin('/b/c', '..', '/')
self.checkJoin('/b/c', '../..', '/')
self.checkJoin('/b/c', '../../..', '/')

self.checkJoin('///b/c', '', '///b/c')
self.checkJoin('///b/c', '//', '///b/c')
Expand Down
33 changes: 11 additions & 22 deletions Lib/urllib/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -610,31 +610,22 @@ def urljoin(base, url, allow_fragments=True):
return _coerce_result(_urlunsplit(scheme, netloc, path,
query, fragment))

base_parts = bpath.split('/')
if base_parts[-1] != '':
# the last item is not a directory, so will not be taken into account
# in resolving the relative path
del base_parts[-1]

# for rfc3986, ignore all base path should the first character be root.
if path[:1] == '/':
segments = path.split('/')
else:
segments = base_parts + path.split('/')
# filter out elements that would cause redundant slashes on re-joining
# the resolved_path
segments[1:-1] = filter(None, segments[1:-1])
if path[:1] != '/' and '/' in bpath:
path = bpath.rsplit('/', 1)[0] + '/' + path

resolved_path = []
path = _remove_dot_segments(path)
return _coerce_result(_urlunsplit(scheme, netloc, path, query, fragment))

def _remove_dot_segments(path):
segments = path.split('/')
min_len = 0 if segments[0] else 1

resolved_path = []
for seg in segments:
if seg == '..':
try:
if len(resolved_path) > min_len:
resolved_path.pop()
except IndexError:
# ignore any .. segments that would otherwise cause an IndexError
# when popped from resolved_path if resolving for rfc3986
pass
elif seg == '.':
continue
else:
Expand All @@ -645,9 +636,7 @@ def urljoin(base, url, allow_fragments=True):
# then we need to append the trailing '/'
resolved_path.append('')

return _coerce_result(_urlunsplit(scheme, netloc, '/'.join(
resolved_path) or '/', query, fragment))

return '/'.join(resolved_path)

def urldefrag(url):
"""Removes any existing fragment from URL.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix `urllib.parse.urljoin` for the case when the base path is relative
and the relative reference path starts with '..'.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Preserve double slashes in the path in :func:`urllib.parse.urljoin`.