Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 1be320e

Browse files
committed
Issue9374 - Generic parsing of query and fragment portion of urls for any scheme
1 parent 8d88604 commit 1be320e

3 files changed

Lines changed: 14 additions & 9 deletions

File tree

Lib/test/test_urlparse.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -636,11 +636,20 @@ def test_anyscheme(self):
636636
('s3', 'foo.com', '/stuff', '', '', ''))
637637
self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),
638638
('x-newscheme', 'foo.com', '/stuff', '', '', ''))
639+
self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"),
640+
('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment'))
641+
self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query"),
642+
('x-newscheme', 'foo.com', '/stuff', '', 'query', ''))
643+
639644
# And for bytes...
640645
self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"),
641646
(b's3', b'foo.com', b'/stuff', b'', b'', b''))
642647
self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"),
643648
(b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b''))
649+
self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"),
650+
(b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment'))
651+
self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"),
652+
(b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b''))
644653

645654
def test_mixed_types_rejected(self):
646655
# Several functions that process either strings or ASCII encoded bytes

Lib/urllib/parse.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -44,16 +44,9 @@
4444
'imap', 'wais', 'file', 'mms', 'https', 'shttp',
4545
'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '',
4646
'svn', 'svn+ssh', 'sftp', 'nfs', 'git', 'git+ssh']
47-
non_hierarchical = ['gopher', 'hdl', 'mailto', 'news',
48-
'telnet', 'wais', 'imap', 'snews', 'sip', 'sips']
4947
uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap',
5048
'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips',
5149
'mms', '', 'sftp']
52-
uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms',
53-
'gopher', 'rtsp', 'rtspu', 'sip', 'sips', '']
54-
uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news',
55-
'nntp', 'wais', 'https', 'shttp', 'snews',
56-
'file', 'prospero', '']
5750

5851
# Characters valid in scheme names
5952
scheme_chars = ('abcdefghijklmnopqrstuvwxyz'
@@ -357,9 +350,9 @@ def urlsplit(url, scheme='', allow_fragments=True):
357350
if (('[' in netloc and ']' not in netloc) or
358351
(']' in netloc and '[' not in netloc)):
359352
raise ValueError("Invalid IPv6 URL")
360-
if allow_fragments and scheme in uses_fragment and '#' in url:
353+
if allow_fragments and '#' in url:
361354
url, fragment = url.split('#', 1)
362-
if scheme in uses_query and '?' in url:
355+
if '?' in url:
363356
url, query = url.split('?', 1)
364357
v = SplitResult(scheme, netloc, url, query, fragment)
365358
_parse_cache[key] = v

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ Core and Builtins
6363
Library
6464
-------
6565

66+
- Issue #9374: Generic parsing of query and fragment portions of url for any
67+
scheme. Supported both by RFC3986 and RFC2396.
68+
6669
- Issue #14798: Fix the functions in pyclbr to raise an ImportError
6770
when the first part of a dotted name is not a package. Patch by
6871
Xavier de Gaye.

0 commit comments

Comments
 (0)