Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 7ae51bf

Browse files
committed
Remove "," from the list of always_safe characters. It is a reserved
character according to RFC 2396. Add some text to quote doc string that explains the quoting rules better. This closes SF Bug #114427. Add _fast_quote operation that uses a dictionary instead of a list when the standard set of safe characters is used.
1 parent d94f707 commit 7ae51bf

2 files changed

Lines changed: 62 additions & 8 deletions

File tree

Lib/test/test_urllib.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,21 @@
1212

1313
test = urllib.quote(chars)
1414
assert test == expected, "urllib.quote problem"
15+
test2 = urllib.unquote(expected)
16+
assert test2 == chars
17+
18+
in1 = "abc/def"
19+
out1_1 = "abc/def"
20+
out1_2 = "abc%2fdef"
21+
22+
assert urllib.quote(in1) == out1_1, "urllib.quote problem"
23+
assert urllib.quote(in1, '') == out1_2, "urllib.quote problem"
24+
25+
in2 = "abc?def"
26+
out2_1 = "abc%3fdef"
27+
out2_2 = "abc?def"
28+
29+
assert urllib.quote(in2) == out2_1, "urllib.quote problem"
30+
assert urllib.quote(in2, '?') == out2_2, "urllib.quote problem"
31+
32+

Lib/urllib.py

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -426,7 +426,7 @@ def open_ftp(self, url):
426426
dirs, file = dirs[:-1], dirs[-1]
427427
if dirs and not dirs[0]: dirs = dirs[1:]
428428
if dirs and not dirs[0]: dirs[0] = '/'
429-
key = (user, host, port, string.joinfields(dirs, '/'))
429+
key = user, host, port, string.join(dirs, '/')
430430
# XXX thread unsafe!
431431
if len(self.ftpcache) > MAXFTPCACHE:
432432
# Prune the cache, rather arbitrarily
@@ -1013,22 +1013,58 @@ def unquote_plus(s):
10131013

10141014
always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
10151015
'abcdefghijklmnopqrstuvwxyz'
1016-
'0123456789' '_,.-')
1016+
'0123456789' '_.-')
1017+
1018+
_fast_safe_test = always_safe + '/'
1019+
_fast_safe = None
1020+
1021+
def _fast_quote(s):
1022+
global _fast_safe
1023+
if _fast_safe is None:
1024+
_fast_safe = {}
1025+
for c in _fast_safe_test:
1026+
_fast_safe[c] = c
1027+
res = list(s)
1028+
for i in range(len(res)):
1029+
c = res[i]
1030+
if not _fast_safe.has_key(c):
1031+
res[i] = '%%%02x' % ord(c)
1032+
return string.join(res, '')
1033+
10171034
def quote(s, safe = '/'):
1018-
"""quote('abc def') -> 'abc%20def'."""
1019-
# XXX Can speed this up an order of magnitude
1035+
"""quote('abc def') -> 'abc%20def'
1036+
1037+
Each part of a URL, e.g. the path info, the query, etc., has a
1038+
different set of reserved characters that must be quoted.
1039+
1040+
RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1041+
the following reserved characters.
1042+
1043+
reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1044+
"$" | ","
1045+
1046+
Each of these characters is reserved in some component of a URL,
1047+
but not necessarily in all of them.
1048+
1049+
By default, the quote function is intended for quoting the path
1050+
section of a URL. Thus, it will not encode '/'. This character
1051+
is reserved, but in typical usage the quote function is being
1052+
called on a path where the existing slash characters are used as
1053+
reserved characters.
1054+
"""
10201055
safe = always_safe + safe
1056+
if _fast_safe_test == safe:
1057+
return _fast_quote(s)
10211058
res = list(s)
10221059
for i in range(len(res)):
10231060
c = res[i]
10241061
if c not in safe:
10251062
res[i] = '%%%02x' % ord(c)
1026-
return string.joinfields(res, '')
1063+
return string.join(res, '')
10271064

1028-
def quote_plus(s, safe = '/'):
1029-
# XXX Can speed this up an order of magnitude
1065+
def quote_plus(s, safe = ''):
1066+
"""Quote the query fragment of a URL; replacing ' ' with '+'"""
10301067
if ' ' in s:
1031-
# replace ' ' with '+'
10321068
l = string.split(s, ' ')
10331069
for i in range(len(l)):
10341070
l[i] = quote(l[i], safe)

0 commit comments

Comments
 (0)