@@ -261,74 +261,84 @@ def urldefrag(url):
261261 return url , ''
262262
263263
264- def unquote_as_string (s , plus = False , charset = None ):
265- if charset is None :
266- charset = "UTF-8"
267- return str (unquote_as_bytes (s , plus = plus ), charset , 'strict' )
264+ _hextochr = dict (('%02x' % i , chr (i )) for i in range (256 ))
265+ _hextochr .update (('%02X' % i , chr (i )) for i in range (256 ))
268266
269- def unquote_as_bytes ( s , plus = False ):
267+ def unquote ( s ):
270268 """unquote('abc%20def') -> 'abc def'."""
271- if plus :
272- s = s .replace ('+' , ' ' )
273269 res = s .split ('%' )
274- res [0 ] = res [0 ].encode ('ASCII' , 'strict' )
275270 for i in range (1 , len (res )):
276- res [i ] = (bytes .fromhex (res [i ][:2 ]) +
277- res [i ][2 :].encode ('ASCII' , 'strict' ))
278- return b'' .join (res )
279-
280- _always_safe = (b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
281- b'abcdefghijklmnopqrstuvwxyz'
282- b'0123456789'
283- b'_.-' )
284-
285- _percent_code = ord ('%' )
286-
287- _hextable = b'0123456789ABCDEF'
288-
289- def quote_as_bytes (s , safe = '/' , plus = False ):
290- """quote(b'abc@def') -> 'abc%40def'"""
291-
292- if isinstance (s , str ):
293- s = s .encode ("UTF-8" , "strict" )
294- if not (isinstance (s , bytes ) or isinstance (s , bytearray )):
295- raise ValueError ("Argument to quote must be either bytes "
296- "or bytearray; string arguments will be "
297- "converted to UTF-8 bytes" )
298-
299- safeset = _always_safe + safe .encode ('ASCII' , 'strict' )
300- if plus :
301- safeset += b' '
302-
303- result = bytearray ()
304- for i in s :
305- if i not in safeset :
306- result .append (_percent_code )
307- result .append (_hextable [(i >> 4 ) & 0xF ])
308- result .append (_hextable [i & 0xF ])
309- else :
310- result .append (i )
311- if plus :
312- result = result .replace (b' ' , b'+' )
313- return result
271+ item = res [i ]
272+ try :
273+ res [i ] = _hextochr [item [:2 ]] + item [2 :]
274+ except KeyError :
275+ res [i ] = '%' + item
276+ except UnicodeDecodeError :
277+ res [i ] = chr (int (item [:2 ], 16 )) + item [2 :]
278+ return "" .join (res )
314279
315- def quote_as_string (s , safe = '/' , plus = False ):
316- return str (quote_as_bytes (s , safe = safe , plus = plus ), 'ASCII' , 'strict' )
280+ def unquote_plus (s ):
281+ """unquote('%7e/abc+def') -> '~/abc def'"""
282+ s = s .replace ('+' , ' ' )
283+ return unquote (s )
317284
318- # finally, define defaults for 'quote' and 'unquote'
285+ always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
286+ 'abcdefghijklmnopqrstuvwxyz'
287+ '0123456789' '_.-' )
288+ _safe_quoters = {}
319289
320- def quote (s , safe = '/' ):
321- return quote_as_string (s , safe = safe )
290+ class Quoter :
291+ def __init__ (self , safe ):
292+ self .cache = {}
293+ self .safe = safe + always_safe
322294
323- def quote_plus (s , safe = '' ):
324- return quote_as_string (s , safe = safe , plus = True )
295+ def __call__ (self , c ):
296+ try :
297+ return self .cache [c ]
298+ except KeyError :
299+ if ord (c ) < 256 :
300+ res = (c in self .safe ) and c or ('%%%02X' % ord (c ))
301+ self .cache [c ] = res
302+ return res
303+ else :
304+ return "" .join (['%%%02X' % i for i in c .encode ("utf-8" )])
325305
326- def unquote ( s ):
327- return unquote_as_string ( s )
306+ def quote ( s , safe = '/' ):
307+ """quote('abc def') -> 'abc%20def'
328308
329- def unquote_plus (s ):
330- return unquote_as_string (s , plus = True )
309+ Each part of a URL, e.g. the path info, the query, etc., has a
310+ different set of reserved characters that must be quoted.
311+
312+ RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
313+ the following reserved characters.
331314
315+ reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
316+ "$" | ","
317+
318+ Each of these characters is reserved in some component of a URL,
319+ but not necessarily in all of them.
320+
321+ By default, the quote function is intended for quoting the path
322+ section of a URL. Thus, it will not encode '/'. This character
323+ is reserved, but in typical usage the quote function is being
324+ called on a path where the existing slash characters are used as
325+ reserved characters.
326+ """
327+ cachekey = (safe , always_safe )
328+ try :
329+ quoter = _safe_quoters [cachekey ]
330+ except KeyError :
331+ quoter = Quoter (safe )
332+ _safe_quoters [cachekey ] = quoter
333+ res = map (quoter , s )
334+ return '' .join (res )
335+
336+ def quote_plus (s , safe = '' ):
337+ """Quote the query fragment of a URL; replacing ' ' with '+'"""
338+ if ' ' in s :
339+ s = quote (s , safe + ' ' )
340+ return s .replace (' ' , '+' )
341+ return quote (s , safe )
332342
333343def urlencode (query ,doseq = 0 ):
334344 """Encode a sequence of two-element tuples or dictionary into a URL query string.
@@ -377,7 +387,7 @@ def urlencode(query,doseq=0):
377387 # is there a reasonable way to convert to ASCII?
378388 # encode generates a string, but "replace" or "ignore"
379389 # lose information and "strict" can raise UnicodeError
380- v = quote_plus (v )
390+ v = quote_plus (v . encode ( "ASCII" , "replace" ) )
381391 l .append (k + '=' + v )
382392 else :
383393 try :
@@ -464,8 +474,7 @@ def splituser(host):
464474 _userprog = re .compile ('^(.*)@(.*)$' )
465475
466476 match = _userprog .match (host )
467- if match :
468- return map (unquote , match .group (1 , 2 ))
477+ if match : return map (unquote , match .group (1 , 2 ))
469478 return None , host
470479
471480_passwdprog = None
0 commit comments