Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 9f347ea

Browse files
committed
reapply the revert made in r83875
Now the _collections is statically built, the build dependencies are in proper order and build works fine. Commit Log from r83874: Issue 9396. Apply functools.lru_cache in the place of the random flushing cache in the re module.
1 parent 5c87c1a commit 9f347ea

3 files changed

Lines changed: 14 additions & 84 deletions

File tree

Lib/re.py

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@
118118
import sys
119119
import sre_compile
120120
import sre_parse
121+
import functools
121122

122123
# public symbols
123124
__all__ = [ "match", "search", "sub", "subn", "split", "findall",
@@ -205,9 +206,9 @@ def compile(pattern, flags=0):
205206
return _compile(pattern, flags)
206207

207208
def purge():
208-
"Clear the regular expression cache"
209-
_cache.clear()
210-
_cache_repl.clear()
209+
"Clear the regular expression caches"
210+
_compile_typed.clear()
211+
_compile_repl.clear()
211212

212213
def template(pattern, flags=0):
213214
"Compile a template pattern, returning a pattern object"
@@ -289,12 +290,12 @@ def _shrink_cache(cache_dict, max_length, divisor=5):
289290
# Ignore problems if the cache changed from another thread.
290291
pass
291292

292-
def _compile(*key):
293+
def _compile(*args):
294+
return _compile_typed(type(args[0]), *args)
295+
296+
@functools.lru_cache(maxsize=_MAXCACHE)
297+
def _compile_typed(type, *key):
293298
# internal: compile pattern
294-
cachekey = (type(key[0]),) + key
295-
p = _cache.get(cachekey)
296-
if p is not None:
297-
return p
298299
pattern, flags = key
299300
if isinstance(pattern, _pattern_type):
300301
if flags:
@@ -303,23 +304,14 @@ def _compile(*key):
303304
return pattern
304305
if not sre_compile.isstring(pattern):
305306
raise TypeError("first argument must be string or compiled pattern")
306-
p = sre_compile.compile(pattern, flags)
307-
if len(_cache) >= _MAXCACHE:
308-
_shrink_cache(_cache, _MAXCACHE)
309-
_cache[cachekey] = p
307+
return sre_compile.compile(pattern, flags)
310308
return p
311309

310+
@functools.lru_cache(maxsize=_MAXCACHE)
312311
def _compile_repl(*key):
313312
# internal: compile replacement pattern
314-
p = _cache_repl.get(key)
315-
if p is not None:
316-
return p
317313
repl, pattern = key
318-
p = sre_parse.parse_template(repl, pattern)
319-
if len(_cache_repl) >= _MAXCACHE:
320-
_shrink_cache(_cache_repl, _MAXCACHE)
321-
_cache_repl[key] = p
322-
return p
314+
return sre_parse.parse_template(repl, pattern)
323315

324316
def _expand(pattern, match, template):
325317
# internal: match.expand implementation hook

Lib/test/test_re.py

Lines changed: 0 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -875,70 +875,8 @@ def run_re_tests():
875875
print('=== Fails on unicode-sensitive match', t)
876876

877877

878-
class ReCacheTests(unittest.TestCase):
879-
"""These tests are specific to the re._shrink_cache implementation."""
880-
881-
def setUp(self):
882-
self._orig_maxcache = re._MAXCACHE
883-
884-
def tearDown(self):
885-
re._MAXCACHE = self._orig_maxcache
886-
887-
def test_compile_cache_overflow(self):
888-
# NOTE: If a profiler or debugger is tracing code and compiling
889-
# regular expressions while tracing through this test... expect
890-
# the test to fail. This test is not concurrency safe.
891-
892-
# Explicitly fill the caches.
893-
re._MAXCACHE = 20
894-
max_cache = re._MAXCACHE
895-
unique_chars = tuple(chr(char_num) for char_num in
896-
range(b'a'[0], b'a'[0]+max_cache))
897-
re._cache.clear()
898-
for char in unique_chars:
899-
re._compile(char, 0)
900-
self.assertEqual(max_cache, len(re._cache))
901-
re._cache_repl.clear()
902-
for char in unique_chars:
903-
re._compile_repl(char*2, char)
904-
self.assertEqual(max_cache, len(re._cache_repl))
905-
906-
# Overflow both caches and make sure they have extra room left
907-
# afterwards as well as having more than a single entry.
908-
re._compile('A', 0)
909-
self.assertLess(len(re._cache), max_cache)
910-
self.assertGreater(len(re._cache), 1)
911-
re._compile_repl('A', 'A')
912-
self.assertLess(len(re._cache_repl), max_cache)
913-
self.assertGreater(len(re._cache_repl), 1)
914-
915-
def test_shrink_cache_at_limit(self):
916-
cache = dict(zip(range(6), range(6)))
917-
re._shrink_cache(cache, 6, divisor=3)
918-
self.assertEqual(4, len(cache))
919-
920-
def test_shrink_cache_empty(self):
921-
cache = {}
922-
re._shrink_cache(cache, 6, divisor=3)
923-
# Cache was empty, make sure we didn't raise an exception.
924-
self.assertEqual(0, len(cache))
925-
926-
def test_shrink_cache_overflowing(self):
927-
cache = dict(zip(range(6), range(6)))
928-
re._shrink_cache(cache, 4, divisor=2)
929-
# Cache was larger than the maximum, be sure we shrunk to smaller.
930-
self.assertEqual(2, len(cache))
931-
932-
def test_shrink_cache_underflow(self):
933-
cache = dict(zip(range(6), range(6)))
934-
# No shrinking to do.
935-
re._shrink_cache(cache, 9, divisor=3)
936-
self.assertEqual(6, len(cache))
937-
938-
939878
def test_main():
940879
run_unittest(ReTests)
941-
run_unittest(ReCacheTests)
942880
run_re_tests()
943881

944882
if __name__ == "__main__":

Misc/NEWS

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -697,8 +697,8 @@ Library
697697

698698
- The default size of the re module's compiled regular expression cache has been
699699
increased from 100 to 500 and the cache replacement policy has changed from
700-
simply clearing the entire cache on overflow to randomly forgetting 20% of the
701-
existing cached compiled regular expressions. This is a performance win for
700+
simply clearing the entire cache on overflow to forgetting the least recently
701+
used cached compiled regular expressions. This is a performance win for
702702
applications that use a lot of regular expressions and limits the impact of
703703
the performance hit anytime the cache is exceeded.
704704

0 commit comments

Comments
 (0)