Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 84c7d9f

Browse files
committed
Fix Issue754016 - urlparse goes wrong with IP:port without scheme
1 parent 4aa0d4d commit 84c7d9f

3 files changed

Lines changed: 44 additions & 5 deletions

File tree

Doc/library/urllib.parse.rst

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,23 @@ The :mod:`urllib.parse` module defines the following functions:
4848
>>> o.geturl()
4949
'http://www.cwi.nl:80/%7Eguido/Python.html'
5050

51+
If the scheme value is not specified, urlparse following the syntax
52+
specifications from RFC 1808, expects the netloc value to start with '//',
53+
Otherwise, it is not possible to distinguish between net_loc and path
54+
component and would classify the indistinguishable component as path as in
55+
a relative url.
56+
57+
>>> from urlparse import urlparse
58+
>>> urlparse('//www.cwi.nl:80/%7Eguido/Python.html')
59+
ParseResult(scheme='', netloc='www.cwi.nl:80', path='/%7Eguido/Python.html',
60+
params='', query='', fragment='')
61+
>>> urlparse('www.cwi.nl:80/%7Eguido/Python.html')
62+
ParseResult(scheme='', netloc='', path='www.cwi.nl:80/%7Eguido/Python.html',
63+
params='', query='', fragment='')
64+
>>> urlparse('help/Python.html')
65+
ParseResult(scheme='', netloc='', path='help/Python.html', params='',
66+
query='', fragment='')
67+
5168
If the *scheme* argument is specified, it gives the default addressing
5269
scheme, to be used only if the URL does not specify one. The default value for
5370
this argument is the empty string.

Lib/test/test_urlparse.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,27 @@ def test_noslash(self):
461461
self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
462462
('http', 'example.com', '', '', 'blahblah=/foo', ''))
463463

464+
def test_withoutscheme(self):
465+
# Test urlparse without scheme
466+
# Issue 754016: urlparse goes wrong with IP:port without scheme
467+
# RFC 1808 specifies that netloc should start with //, urlparse expects
468+
# the same, otherwise it classifies the portion of url as path.
469+
self.assertEqual(urllib.parse.urlparse("path"),
470+
('','','path','','',''))
471+
self.assertEqual(urllib.parse.urlparse("//www.python.org:80"),
472+
('','www.python.org:80','','','',''))
473+
self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
474+
('http','www.python.org:80','','','',''))
475+
476+
def test_portseparator(self):
477+
# Issue 754016 makes changes for port separator ':' from scheme separator
478+
self.assertEqual(urllib.parse.urlparse("path:80"),
479+
('','','path:80','','',''))
480+
self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
481+
self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
482+
self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
483+
('http','www.python.org:80','','','',''))
484+
464485
def test_usingsys(self):
465486
# Issue 3314: sys module is used in the error
466487
self.assertRaises(TypeError, urllib.parse.urlencode, "foo")

Lib/urllib/parse.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -192,11 +192,12 @@ def urlsplit(url, scheme='', allow_fragments=True):
192192
v = SplitResult(scheme, netloc, url, query, fragment)
193193
_parse_cache[key] = v
194194
return v
195-
for c in url[:i]:
196-
if c not in scheme_chars:
197-
break
198-
else:
199-
scheme, url = url[:i].lower(), url[i+1:]
195+
if url.endswith(':') or not url[i+1].isdigit():
196+
for c in url[:i]:
197+
if c not in scheme_chars:
198+
break
199+
else:
200+
scheme, url = url[:i].lower(), url[i+1:]
200201
if url[:2] == '//':
201202
netloc, url = _splitnetloc(url, 2)
202203
if (('[' in netloc and ']' not in netloc) or

0 commit comments

Comments
 (0)