2727test_urlparse.py provides a good indicator of parsing behavior.
2828"""
2929
30+ from collections import namedtuple
31+ import functools
3032import re
3133import sys
3234import types
33- import collections
3435import warnings
3536
3637__all__ = ["urlparse" , "urlunparse" , "urljoin" , "urldefrag" ,
8182# Unsafe bytes to be removed per WHATWG spec
8283_UNSAFE_URL_BYTES_TO_REMOVE = ['\t ' , '\r ' , '\n ' ]
8384
84- # XXX: Consider replacing with functools.lru_cache
85- MAX_CACHE_SIZE = 20
86- _parse_cache = {}
87-
8885def clear_cache ():
89- """Clear the parse cache and the quoters cache."""
90- _parse_cache .clear ()
91- _safe_quoters .clear ()
92-
86+ """Clear internal performance caches. Undocumented; some tests want it."""
87+ urlsplit .cache_clear ()
88+ _byte_quoter_factory .cache_clear ()
9389
9490# Helpers for bytes handling
9591# For 3.2, we deliberately require applications that
@@ -243,8 +239,6 @@ def _hostinfo(self):
243239 return hostname , port
244240
245241
246- from collections import namedtuple
247-
248242_DefragResultBase = namedtuple ('DefragResult' , 'url fragment' )
249243_SplitResultBase = namedtuple (
250244 'SplitResult' , 'scheme netloc path query fragment' )
@@ -434,6 +428,9 @@ def _checknetloc(netloc):
434428 raise ValueError ("netloc '" + netloc + "' contains invalid " +
435429 "characters under NFKC normalization" )
436430
431+ # typed=True avoids BytesWarnings being emitted during cache key
432+ # comparison since this API supports both bytes and str input.
433+ @functools .lru_cache (typed = True )
437434def urlsplit (url , scheme = '' , allow_fragments = True ):
438435 """Parse a URL into 5 components:
439436 <scheme>://<netloc>/<path>?<query>#<fragment>
@@ -462,12 +459,6 @@ def urlsplit(url, scheme='', allow_fragments=True):
462459 scheme = scheme .replace (b , "" )
463460
464461 allow_fragments = bool (allow_fragments )
465- key = url , scheme , allow_fragments , type (url ), type (scheme )
466- cached = _parse_cache .get (key , None )
467- if cached :
468- return _coerce_result (cached )
469- if len (_parse_cache ) >= MAX_CACHE_SIZE : # avoid runaway growth
470- clear_cache ()
471462 netloc = query = fragment = ''
472463 i = url .find (':' )
473464 if i > 0 :
@@ -488,7 +479,6 @@ def urlsplit(url, scheme='', allow_fragments=True):
488479 url , query = url .split ('?' , 1 )
489480 _checknetloc (netloc )
490481 v = SplitResult (scheme , netloc , url , query , fragment )
491- _parse_cache [key ] = v
492482 return _coerce_result (v )
493483
494484def urlunparse (components ):
@@ -791,23 +781,30 @@ def unquote_plus(string, encoding='utf-8', errors='replace'):
791781 b'0123456789'
792782 b'_.-~' )
793783_ALWAYS_SAFE_BYTES = bytes (_ALWAYS_SAFE )
794- _safe_quoters = {}
795784
796- class Quoter (collections .defaultdict ):
797- """A mapping from bytes (in range(0,256)) to strings.
785+ def __getattr__ (name ):
786+ if name == 'Quoter' :
787+ warnings .warn ('Deprecated in 3.11. '
788+ 'urllib.parse.Quoter will be removed in Python 3.14. '
789+ 'It was not intended to be a public API.' ,
790+ DeprecationWarning , stacklevel = 2 )
791+ return _Quoter
792+ raise AttributeError (f'module { __name__ !r} has no attribute { name !r} ' )
793+
794+ class _Quoter (dict ):
795+ """A mapping from bytes numbers (in range(0,256)) to strings.
798796
799797 String values are percent-encoded byte values, unless the key < 128, and
800- in the "safe" set ( either the specified safe set, or default set) .
798+ in either of the specified safe set, or the always safe set.
801799 """
802- # Keeps a cache internally, using defaultdict , for efficiency (lookups
800+ # Keeps a cache internally, via __missing__ , for efficiency (lookups
803801 # of cached keys don't call Python code at all).
804802 def __init__ (self , safe ):
805803 """safe: bytes object."""
806804 self .safe = _ALWAYS_SAFE .union (safe )
807805
808806 def __repr__ (self ):
809- # Without this, will just display as a defaultdict
810- return "<%s %r>" % (self .__class__ .__name__ , dict (self ))
807+ return f"<Quoter { dict (self )!r} >"
811808
812809 def __missing__ (self , b ):
813810 # Handle a cache miss. Store quoted string in cache and return.
@@ -886,6 +883,11 @@ def quote_plus(string, safe='', encoding=None, errors=None):
886883 string = quote (string , safe + space , encoding , errors )
887884 return string .replace (' ' , '+' )
888885
886+ # Expectation: A typical program is unlikely to create more than 5 of these.
887+ @functools .lru_cache
888+ def _byte_quoter_factory (safe ):
889+ return _Quoter (safe ).__getitem__
890+
889891def quote_from_bytes (bs , safe = '/' ):
890892 """Like quote(), but accepts a bytes object rather than a str, and does
891893 not perform string-to-bytes encoding. It always returns an ASCII string.
@@ -899,13 +901,11 @@ def quote_from_bytes(bs, safe='/'):
899901 # Normalize 'safe' by converting to bytes and removing non-ASCII chars
900902 safe = safe .encode ('ascii' , 'ignore' )
901903 else :
904+ # List comprehensions are faster than generator expressions.
902905 safe = bytes ([c for c in safe if c < 128 ])
903906 if not bs .rstrip (_ALWAYS_SAFE_BYTES + safe ):
904907 return bs .decode ()
905- try :
906- quoter = _safe_quoters [safe ]
907- except KeyError :
908- _safe_quoters [safe ] = quoter = Quoter (safe ).__getitem__
908+ quoter = _byte_quoter_factory (safe )
909909 return '' .join ([quoter (char ) for char in bs ])
910910
911911def urlencode (query , doseq = False , safe = '' , encoding = None , errors = None ,
0 commit comments