27
27
test_urlparse.py provides a good indicator of parsing behavior.
28
28
"""
29
29
30
+ from collections import namedtuple
31
+ import functools
30
32
import re
31
33
import sys
32
34
import types
33
- import collections
34
35
import warnings
35
36
36
37
__all__ = ["urlparse" , "urlunparse" , "urljoin" , "urldefrag" ,
81
82
# Unsafe bytes to be removed per WHATWG spec
82
83
_UNSAFE_URL_BYTES_TO_REMOVE = ['\t ' , '\r ' , '\n ' ]
83
84
84
- # XXX: Consider replacing with functools.lru_cache
85
- MAX_CACHE_SIZE = 20
86
- _parse_cache = {}
87
-
88
85
def clear_cache ():
89
- """Clear the parse cache and the quoters cache."""
90
- _parse_cache .clear ()
91
- _safe_quoters .clear ()
92
-
86
+ """Clear internal performance caches. Undocumented; some tests want it."""
87
+ urlsplit .cache_clear ()
88
+ _byte_quoter_factory .cache_clear ()
93
89
94
90
# Helpers for bytes handling
95
91
# For 3.2, we deliberately require applications that
@@ -243,8 +239,6 @@ def _hostinfo(self):
243
239
return hostname , port
244
240
245
241
246
- from collections import namedtuple
247
-
248
242
_DefragResultBase = namedtuple ('DefragResult' , 'url fragment' )
249
243
_SplitResultBase = namedtuple (
250
244
'SplitResult' , 'scheme netloc path query fragment' )
@@ -434,6 +428,9 @@ def _checknetloc(netloc):
434
428
raise ValueError ("netloc '" + netloc + "' contains invalid " +
435
429
"characters under NFKC normalization" )
436
430
431
+ # typed=True avoids BytesWarnings being emitted during cache key
432
+ # comparison since this API supports both bytes and str input.
433
+ @functools .lru_cache (typed = True )
437
434
def urlsplit (url , scheme = '' , allow_fragments = True ):
438
435
"""Parse a URL into 5 components:
439
436
<scheme>://<netloc>/<path>?<query>#<fragment>
@@ -462,12 +459,6 @@ def urlsplit(url, scheme='', allow_fragments=True):
462
459
scheme = scheme .replace (b , "" )
463
460
464
461
allow_fragments = bool (allow_fragments )
465
- key = url , scheme , allow_fragments , type (url ), type (scheme )
466
- cached = _parse_cache .get (key , None )
467
- if cached :
468
- return _coerce_result (cached )
469
- if len (_parse_cache ) >= MAX_CACHE_SIZE : # avoid runaway growth
470
- clear_cache ()
471
462
netloc = query = fragment = ''
472
463
i = url .find (':' )
473
464
if i > 0 :
@@ -488,7 +479,6 @@ def urlsplit(url, scheme='', allow_fragments=True):
488
479
url , query = url .split ('?' , 1 )
489
480
_checknetloc (netloc )
490
481
v = SplitResult (scheme , netloc , url , query , fragment )
491
- _parse_cache [key ] = v
492
482
return _coerce_result (v )
493
483
494
484
def urlunparse (components ):
@@ -791,23 +781,30 @@ def unquote_plus(string, encoding='utf-8', errors='replace'):
791
781
b'0123456789'
792
782
b'_.-~' )
793
783
_ALWAYS_SAFE_BYTES = bytes (_ALWAYS_SAFE )
794
- _safe_quoters = {}
795
784
796
- class Quoter (collections .defaultdict ):
797
- """A mapping from bytes (in range(0,256)) to strings.
785
+ def __getattr__ (name ):
786
+ if name == 'Quoter' :
787
+ warnings .warn ('Deprecated in 3.11. '
788
+ 'urllib.parse.Quoter will be removed in Python 3.14. '
789
+ 'It was not intended to be a public API.' ,
790
+ DeprecationWarning , stacklevel = 2 )
791
+ return _Quoter
792
+ raise AttributeError (f'module { __name__ !r} has no attribute { name !r} ' )
793
+
794
+ class _Quoter (dict ):
795
+ """A mapping from bytes numbers (in range(0,256)) to strings.
798
796
799
797
String values are percent-encoded byte values, unless the key < 128, and
800
- in the "safe" set ( either the specified safe set, or default set) .
798
+ in either of the specified safe set, or the always safe set.
801
799
"""
802
- # Keeps a cache internally, using defaultdict , for efficiency (lookups
800
+ # Keeps a cache internally, via __missing__ , for efficiency (lookups
803
801
# of cached keys don't call Python code at all).
804
802
def __init__ (self , safe ):
805
803
"""safe: bytes object."""
806
804
self .safe = _ALWAYS_SAFE .union (safe )
807
805
808
806
def __repr__ (self ):
809
- # Without this, will just display as a defaultdict
810
- return "<%s %r>" % (self .__class__ .__name__ , dict (self ))
807
+ return f"<Quoter { dict (self )!r} >"
811
808
812
809
def __missing__ (self , b ):
813
810
# Handle a cache miss. Store quoted string in cache and return.
@@ -886,6 +883,11 @@ def quote_plus(string, safe='', encoding=None, errors=None):
886
883
string = quote (string , safe + space , encoding , errors )
887
884
return string .replace (' ' , '+' )
888
885
886
+ # Expectation: A typical program is unlikely to create more than 5 of these.
887
+ @functools .lru_cache
888
+ def _byte_quoter_factory (safe ):
889
+ return _Quoter (safe ).__getitem__
890
+
889
891
def quote_from_bytes (bs , safe = '/' ):
890
892
"""Like quote(), but accepts a bytes object rather than a str, and does
891
893
not perform string-to-bytes encoding. It always returns an ASCII string.
@@ -899,13 +901,11 @@ def quote_from_bytes(bs, safe='/'):
899
901
# Normalize 'safe' by converting to bytes and removing non-ASCII chars
900
902
safe = safe .encode ('ascii' , 'ignore' )
901
903
else :
904
+ # List comprehensions are faster than generator expressions.
902
905
safe = bytes ([c for c in safe if c < 128 ])
903
906
if not bs .rstrip (_ALWAYS_SAFE_BYTES + safe ):
904
907
return bs .decode ()
905
- try :
906
- quoter = _safe_quoters [safe ]
907
- except KeyError :
908
- _safe_quoters [safe ] = quoter = Quoter (safe ).__getitem__
908
+ quoter = _byte_quoter_factory (safe )
909
909
return '' .join ([quoter (char ) for char in bs ])
910
910
911
911
def urlencode (query , doseq = False , safe = '' , encoding = None , errors = None ,
0 commit comments