Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 3fd32ec

Browse files
committed
optimizations due to Fred Drake; added urldefrag() function
1 parent 1acbffe commit 3fd32ec

File tree

1 file changed

+35
-18
lines changed

1 file changed

+35
-18
lines changed

Lib/urlparse.py

Lines changed: 35 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
# Standard/builtin Python modules
55
import string
6+
from string import joinfields, splitfields, find, rfind
67

78
# A classification of schemes ('' means apply by default)
89
uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'wais', 'file',
@@ -18,17 +19,23 @@
1819
# Characters valid in scheme names
1920
scheme_chars = string.letters + string.digits + '+-.'
2021

22+
_parse_cache = {}
23+
24+
def clear_cache():
25+
global _parse_cache
26+
_parse_cache = {}
27+
28+
2129
# Parse a URL into 6 components:
2230
# <scheme>://<netloc>/<path>;<params>?<query>#<fragment>
2331
# Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
2432
# Note that we don't break the components up in smaller bits
2533
# (e.g. netloc is a single string) and we don't expand % escapes.
2634
def urlparse(url, scheme = '', allow_framents = 1):
27-
netloc = ''
28-
path = ''
29-
params = ''
30-
query = ''
31-
fragment = ''
35+
key = url, scheme, allow_framents
36+
if _parse_cache.has_key(key):
37+
return _parse_cache[key]
38+
netloc = path = params = query = fragment = ''
3239
i = string.find(url, ':')
3340
if i > 0:
3441
for c in url[:i]:
@@ -54,7 +61,9 @@ def urlparse(url, scheme = '', allow_framents = 1):
5461
i = string.find(url, ';')
5562
if i >= 0:
5663
url, params = url[:i], url[i+1:]
57-
return scheme, netloc, url, params, query, fragment
64+
tuple = scheme, netloc, url, params, query, fragment
65+
_parse_cache[key] = tuple
66+
return tuple
5867

5968
# Put a parsed URL back together again. This may result in a slightly
6069
# different, but equivalent URL, if the URL that was parsed originally
@@ -80,7 +89,7 @@ def urljoin(base, url, allow_framents = 1):
8089
if not base:
8190
return url
8291
bscheme, bnetloc, bpath, bparams, bquery, bfragment = \
83-
urlparse(base, '', allow_framents)
92+
urlparse(base, '', allow_framents)
8493
scheme, netloc, path, params, query, fragment = \
8594
urlparse(url, bscheme, allow_framents)
8695
# XXX Unofficial hack: default netloc to bnetloc even if
@@ -90,9 +99,9 @@ def urljoin(base, url, allow_framents = 1):
9099
scheme in uses_netloc and bscheme in uses_netloc:
91100
netloc = bnetloc
92101
# Strip the port number
93-
i = string.find(netloc, '@')
102+
i = find(netloc, '@')
94103
if i < 0: i = 0
95-
i = string.find(netloc, ':', i)
104+
i = find(netloc, ':', i)
96105
if i >= 0:
97106
netloc = netloc[:i]
98107
if scheme != bscheme or scheme not in uses_relative:
@@ -107,15 +116,12 @@ def urljoin(base, url, allow_framents = 1):
107116
return urlunparse((scheme, netloc, path,
108117
params, query, fragment))
109118
if not path:
110-
path = bpath
111-
if not query:
112-
query = bquery
113-
return urlunparse((scheme, netloc, path,
114-
params, query, fragment))
115-
i = string.rfind(bpath, '/')
119+
return urlunparse((scheme, netloc, bpath,
120+
params, query or bquery, fragment))
121+
i = rfind(bpath, '/')
116122
if i >= 0:
117123
path = bpath[:i] + '/' + path
118-
segments = string.splitfields(path, '/')
124+
segments = splitfields(path, '/')
119125
if segments[-1] == '.':
120126
segments[-1] = ''
121127
while '.' in segments:
@@ -132,10 +138,21 @@ def urljoin(base, url, allow_framents = 1):
132138
break
133139
if len(segments) >= 2 and segments[-1] == '..':
134140
segments[-2:] = ['']
135-
path = string.joinfields(segments, '/')
136-
return urlunparse((scheme, netloc, path,
141+
return urlunparse((scheme, netloc, joinfields(segments, '/'),
137142
params, query, fragment))
138143

144+
def urldefrag(url):
145+
"""Removes any existing fragment from URL.
146+
147+
Returns a tuple of the defragmented URL and the fragment. If
148+
the URL contained no fragments, the second element is the
149+
empty string.
150+
"""
151+
s, n, p, a, q, frag = urlparse(url)
152+
defrag = urlunparse((s, n, p, a, q, ''))
153+
return defrag, frag
154+
155+
139156
test_input = """
140157
http://a/b/c/d
141158

0 commit comments

Comments
 (0)