Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit d597fdc

Browse files
authored
bpo-44002: Switch to lru_cache in urllib.parse. (GH-25798)
Switch to lru_cache in urllib.parse. urllib.parse now uses functool.lru_cache for its internal URL splitting and quoting caches instead of rolling its own like its the 90s. The undocumented internal Quoted class API is now deprecated as it had no reason to be public and no existing OSS users were found. The clear_cache() API remains undocumented but gets an explicit test as it is used in a few projects' (twisted, gevent) tests as well as our own regrtest.
1 parent e9d7f88 commit d597fdc

File tree

3 files changed

+50
-31
lines changed

3 files changed

+50
-31
lines changed

Lib/test/test_urlparse.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1044,16 +1044,24 @@ def test_telurl_params(self):
10441044
self.assertEqual(p1.params, 'phone-context=+1-914-555')
10451045

10461046
def test_Quoter_repr(self):
1047-
quoter = urllib.parse.Quoter(urllib.parse._ALWAYS_SAFE)
1047+
quoter = urllib.parse._Quoter(urllib.parse._ALWAYS_SAFE)
10481048
self.assertIn('Quoter', repr(quoter))
10491049

1050+
def test_clear_cache_for_code_coverage(self):
1051+
urllib.parse.clear_cache()
1052+
1053+
def test_urllib_parse_getattr_failure(self):
1054+
"""Test that urllib.parse.__getattr__() fails correctly."""
1055+
with self.assertRaises(AttributeError):
1056+
unused = urllib.parse.this_does_not_exist
1057+
10501058
def test_all(self):
10511059
expected = []
10521060
undocumented = {
10531061
'splitattr', 'splithost', 'splitnport', 'splitpasswd',
10541062
'splitport', 'splitquery', 'splittag', 'splittype', 'splituser',
10551063
'splitvalue',
1056-
'Quoter', 'ResultBase', 'clear_cache', 'to_bytes', 'unwrap',
1064+
'ResultBase', 'clear_cache', 'to_bytes', 'unwrap',
10571065
}
10581066
for name in dir(urllib.parse):
10591067
if name.startswith('_') or name in undocumented:
@@ -1245,6 +1253,12 @@ def test_unwrap(self):
12451253

12461254
class DeprecationTest(unittest.TestCase):
12471255

1256+
def test_Quoter_deprecation(self):
1257+
with self.assertWarns(DeprecationWarning) as cm:
1258+
old_class = urllib.parse.Quoter
1259+
self.assertIs(old_class, urllib.parse._Quoter)
1260+
self.assertIn('Quoter will be removed', str(cm.warning))
1261+
12481262
def test_splittype_deprecation(self):
12491263
with self.assertWarns(DeprecationWarning) as cm:
12501264
urllib.parse.splittype('')

Lib/urllib/parse.py

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,11 @@
2727
test_urlparse.py provides a good indicator of parsing behavior.
2828
"""
2929

30+
from collections import namedtuple
31+
import functools
3032
import re
3133
import sys
3234
import types
33-
import collections
3435
import warnings
3536

3637
__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
@@ -81,15 +82,10 @@
8182
# Unsafe bytes to be removed per WHATWG spec
8283
_UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n']
8384

84-
# XXX: Consider replacing with functools.lru_cache
85-
MAX_CACHE_SIZE = 20
86-
_parse_cache = {}
87-
8885
def clear_cache():
89-
"""Clear the parse cache and the quoters cache."""
90-
_parse_cache.clear()
91-
_safe_quoters.clear()
92-
86+
"""Clear internal performance caches. Undocumented; some tests want it."""
87+
urlsplit.cache_clear()
88+
_byte_quoter_factory.cache_clear()
9389

9490
# Helpers for bytes handling
9591
# For 3.2, we deliberately require applications that
@@ -243,8 +239,6 @@ def _hostinfo(self):
243239
return hostname, port
244240

245241

246-
from collections import namedtuple
247-
248242
_DefragResultBase = namedtuple('DefragResult', 'url fragment')
249243
_SplitResultBase = namedtuple(
250244
'SplitResult', 'scheme netloc path query fragment')
@@ -434,6 +428,9 @@ def _checknetloc(netloc):
434428
raise ValueError("netloc '" + netloc + "' contains invalid " +
435429
"characters under NFKC normalization")
436430

431+
# typed=True avoids BytesWarnings being emitted during cache key
432+
# comparison since this API supports both bytes and str input.
433+
@functools.lru_cache(typed=True)
437434
def urlsplit(url, scheme='', allow_fragments=True):
438435
"""Parse a URL into 5 components:
439436
<scheme>://<netloc>/<path>?<query>#<fragment>
@@ -462,12 +459,6 @@ def urlsplit(url, scheme='', allow_fragments=True):
462459
scheme = scheme.replace(b, "")
463460

464461
allow_fragments = bool(allow_fragments)
465-
key = url, scheme, allow_fragments, type(url), type(scheme)
466-
cached = _parse_cache.get(key, None)
467-
if cached:
468-
return _coerce_result(cached)
469-
if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
470-
clear_cache()
471462
netloc = query = fragment = ''
472463
i = url.find(':')
473464
if i > 0:
@@ -488,7 +479,6 @@ def urlsplit(url, scheme='', allow_fragments=True):
488479
url, query = url.split('?', 1)
489480
_checknetloc(netloc)
490481
v = SplitResult(scheme, netloc, url, query, fragment)
491-
_parse_cache[key] = v
492482
return _coerce_result(v)
493483

494484
def urlunparse(components):
@@ -791,23 +781,30 @@ def unquote_plus(string, encoding='utf-8', errors='replace'):
791781
b'0123456789'
792782
b'_.-~')
793783
_ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE)
794-
_safe_quoters = {}
795784

796-
class Quoter(collections.defaultdict):
797-
"""A mapping from bytes (in range(0,256)) to strings.
785+
def __getattr__(name):
786+
if name == 'Quoter':
787+
warnings.warn('Deprecated in 3.11. '
788+
'urllib.parse.Quoter will be removed in Python 3.14. '
789+
'It was not intended to be a public API.',
790+
DeprecationWarning, stacklevel=2)
791+
return _Quoter
792+
raise AttributeError(f'module {__name__!r} has no attribute {name!r}')
793+
794+
class _Quoter(dict):
795+
"""A mapping from bytes numbers (in range(0,256)) to strings.
798796
799797
String values are percent-encoded byte values, unless the key < 128, and
800-
in the "safe" set (either the specified safe set, or default set).
798+
in either of the specified safe set, or the always safe set.
801799
"""
802-
# Keeps a cache internally, using defaultdict, for efficiency (lookups
800+
# Keeps a cache internally, via __missing__, for efficiency (lookups
803801
# of cached keys don't call Python code at all).
804802
def __init__(self, safe):
805803
"""safe: bytes object."""
806804
self.safe = _ALWAYS_SAFE.union(safe)
807805

808806
def __repr__(self):
809-
# Without this, will just display as a defaultdict
810-
return "<%s %r>" % (self.__class__.__name__, dict(self))
807+
return f"<Quoter {dict(self)!r}>"
811808

812809
def __missing__(self, b):
813810
# Handle a cache miss. Store quoted string in cache and return.
@@ -886,6 +883,11 @@ def quote_plus(string, safe='', encoding=None, errors=None):
886883
string = quote(string, safe + space, encoding, errors)
887884
return string.replace(' ', '+')
888885

886+
# Expectation: A typical program is unlikely to create more than 5 of these.
887+
@functools.lru_cache
888+
def _byte_quoter_factory(safe):
889+
return _Quoter(safe).__getitem__
890+
889891
def quote_from_bytes(bs, safe='/'):
890892
"""Like quote(), but accepts a bytes object rather than a str, and does
891893
not perform string-to-bytes encoding. It always returns an ASCII string.
@@ -899,13 +901,11 @@ def quote_from_bytes(bs, safe='/'):
899901
# Normalize 'safe' by converting to bytes and removing non-ASCII chars
900902
safe = safe.encode('ascii', 'ignore')
901903
else:
904+
# List comprehensions are faster than generator expressions.
902905
safe = bytes([c for c in safe if c < 128])
903906
if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe):
904907
return bs.decode()
905-
try:
906-
quoter = _safe_quoters[safe]
907-
except KeyError:
908-
_safe_quoters[safe] = quoter = Quoter(safe).__getitem__
908+
quoter = _byte_quoter_factory(safe)
909909
return ''.join([quoter(char) for char in bs])
910910

911911
def urlencode(query, doseq=False, safe='', encoding=None, errors=None,
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
:mod:`urllib.parse` now uses :func:`functool.lru_cache` for its internal URL
2+
splitting and quoting caches instead of rolling its own like its the '90s.
3+
4+
The undocumented internal :mod:`urllib.parse` ``Quoted`` class API is now
5+
deprecated, for removal in 3.14.

0 commit comments

Comments
 (0)