Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit df9f1ec

Browse files
committed
Revert accidentally committed files. Oops!
1 parent 10faf6a commit df9f1ec

2 files changed

Lines changed: 70 additions & 61 deletions

File tree

Lib/test/test_urllib.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -465,7 +465,7 @@ def test_unquoting_plus(self):
465465

466466
def test_unquote_with_unicode(self):
467467
r = urllib.parse.unquote('br%C3%BCckner_sapporo_20050930.doc')
468-
self.assertEqual(r, 'br\u00FCckner_sapporo_20050930.doc')
468+
self.assertEqual(r, 'br\xc3\xbcckner_sapporo_20050930.doc')
469469

470470
class urlencode_Tests(unittest.TestCase):
471471
"""Tests for urlencode()"""

Lib/urllib/parse.py

Lines changed: 69 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -261,74 +261,84 @@ def urldefrag(url):
261261
return url, ''
262262

263263

264-
def unquote_as_string (s, plus=False, charset=None):
265-
if charset is None:
266-
charset = "UTF-8"
267-
return str(unquote_as_bytes(s, plus=plus), charset, 'strict')
264+
_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
265+
_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
268266

269-
def unquote_as_bytes (s, plus=False):
267+
def unquote(s):
270268
"""unquote('abc%20def') -> 'abc def'."""
271-
if plus:
272-
s = s.replace('+', ' ')
273269
res = s.split('%')
274-
res[0] = res[0].encode('ASCII', 'strict')
275270
for i in range(1, len(res)):
276-
res[i] = (bytes.fromhex(res[i][:2]) +
277-
res[i][2:].encode('ASCII', 'strict'))
278-
return b''.join(res)
279-
280-
_always_safe = (b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
281-
b'abcdefghijklmnopqrstuvwxyz'
282-
b'0123456789'
283-
b'_.-')
284-
285-
_percent_code = ord('%')
286-
287-
_hextable = b'0123456789ABCDEF'
288-
289-
def quote_as_bytes(s, safe = '/', plus=False):
290-
"""quote(b'abc@def') -> 'abc%40def'"""
291-
292-
if isinstance(s, str):
293-
s = s.encode("UTF-8", "strict")
294-
if not (isinstance(s, bytes) or isinstance(s, bytearray)):
295-
raise ValueError("Argument to quote must be either bytes "
296-
"or bytearray; string arguments will be "
297-
"converted to UTF-8 bytes")
298-
299-
safeset = _always_safe + safe.encode('ASCII', 'strict')
300-
if plus:
301-
safeset += b' '
302-
303-
result = bytearray()
304-
for i in s:
305-
if i not in safeset:
306-
result.append(_percent_code)
307-
result.append(_hextable[(i >> 4) & 0xF])
308-
result.append(_hextable[i & 0xF])
309-
else:
310-
result.append(i)
311-
if plus:
312-
result = result.replace(b' ', b'+')
313-
return result
271+
item = res[i]
272+
try:
273+
res[i] = _hextochr[item[:2]] + item[2:]
274+
except KeyError:
275+
res[i] = '%' + item
276+
except UnicodeDecodeError:
277+
res[i] = chr(int(item[:2], 16)) + item[2:]
278+
return "".join(res)
314279

315-
def quote_as_string(s, safe = '/', plus=False):
316-
return str(quote_as_bytes(s, safe=safe, plus=plus), 'ASCII', 'strict')
280+
def unquote_plus(s):
281+
"""unquote('%7e/abc+def') -> '~/abc def'"""
282+
s = s.replace('+', ' ')
283+
return unquote(s)
317284

318-
# finally, define defaults for 'quote' and 'unquote'
285+
always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
286+
'abcdefghijklmnopqrstuvwxyz'
287+
'0123456789' '_.-')
288+
_safe_quoters= {}
319289

320-
def quote(s, safe='/'):
321-
return quote_as_string(s, safe=safe)
290+
class Quoter:
291+
def __init__(self, safe):
292+
self.cache = {}
293+
self.safe = safe + always_safe
322294

323-
def quote_plus(s, safe=''):
324-
return quote_as_string(s, safe=safe, plus=True)
295+
def __call__(self, c):
296+
try:
297+
return self.cache[c]
298+
except KeyError:
299+
if ord(c) < 256:
300+
res = (c in self.safe) and c or ('%%%02X' % ord(c))
301+
self.cache[c] = res
302+
return res
303+
else:
304+
return "".join(['%%%02X' % i for i in c.encode("utf-8")])
325305

326-
def unquote(s):
327-
return unquote_as_string(s)
306+
def quote(s, safe = '/'):
307+
"""quote('abc def') -> 'abc%20def'
328308
329-
def unquote_plus(s):
330-
return unquote_as_string(s, plus=True)
309+
Each part of a URL, e.g. the path info, the query, etc., has a
310+
different set of reserved characters that must be quoted.
311+
312+
RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
313+
the following reserved characters.
331314
315+
reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
316+
"$" | ","
317+
318+
Each of these characters is reserved in some component of a URL,
319+
but not necessarily in all of them.
320+
321+
By default, the quote function is intended for quoting the path
322+
section of a URL. Thus, it will not encode '/'. This character
323+
is reserved, but in typical usage the quote function is being
324+
called on a path where the existing slash characters are used as
325+
reserved characters.
326+
"""
327+
cachekey = (safe, always_safe)
328+
try:
329+
quoter = _safe_quoters[cachekey]
330+
except KeyError:
331+
quoter = Quoter(safe)
332+
_safe_quoters[cachekey] = quoter
333+
res = map(quoter, s)
334+
return ''.join(res)
335+
336+
def quote_plus(s, safe = ''):
337+
"""Quote the query fragment of a URL; replacing ' ' with '+'"""
338+
if ' ' in s:
339+
s = quote(s, safe + ' ')
340+
return s.replace(' ', '+')
341+
return quote(s, safe)
332342

333343
def urlencode(query,doseq=0):
334344
"""Encode a sequence of two-element tuples or dictionary into a URL query string.
@@ -377,7 +387,7 @@ def urlencode(query,doseq=0):
377387
# is there a reasonable way to convert to ASCII?
378388
# encode generates a string, but "replace" or "ignore"
379389
# lose information and "strict" can raise UnicodeError
380-
v = quote_plus(v)
390+
v = quote_plus(v.encode("ASCII","replace"))
381391
l.append(k + '=' + v)
382392
else:
383393
try:
@@ -464,8 +474,7 @@ def splituser(host):
464474
_userprog = re.compile('^(.*)@(.*)$')
465475

466476
match = _userprog.match(host)
467-
if match:
468-
return map(unquote, match.group(1, 2))
477+
if match: return map(unquote, match.group(1, 2))
469478
return None, host
470479

471480
_passwdprog = None

0 commit comments

Comments
 (0)