From d65cab040a9dbed8c5d4f68dee22a2332218a457 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 4 Sep 2021 13:50:45 -0500 Subject: [PATCH 01/12] New recipe better demonstrates OrderedDict use cases and use patterns --- Doc/library/collections.rst | 78 ++++++++++++++++++++++++------------- 1 file changed, 51 insertions(+), 27 deletions(-) diff --git a/Doc/library/collections.rst b/Doc/library/collections.rst index b1779a5b2382e5..0155191557cc85 100644 --- a/Doc/library/collections.rst +++ b/Doc/library/collections.rst @@ -1175,39 +1175,63 @@ variants of :func:`functools.lru_cache`: .. testcode:: - class LRU: + class MultiHitLRUCache: + """ Variant of an LRU cache that defers caching a result until + it has been requested multiple times. - def __init__(self, func, maxsize=128): + To avoid flushing LRU caches with one-time requests, we don't + cache until a request has been made more than once. + + """ + + def __init__(self, func, *, maxsize=128, maxrequests=4096, cache_after=1): + self.requests = OrderedDict() # { uncached_key : request_count } + self.cache = OrderedDict() # { cached_key : function_result } self.func = func - self.maxsize = maxsize - self.cache = OrderedDict() - - def __call__(self, *args): - if args in self.cache: - value = self.cache[args] - self.cache.move_to_end(args) - return value - value = self.func(*args) - if len(self.cache) >= self.maxsize: - self.cache.popitem(False) - self.cache[args] = value - return value + self.maxrequests = maxrequests # max number of uncached request counts + self.maxsize = maxsize # max number of stored return values + self.cache_after = cache_after + + def __call__(self, x): + cache = self.cache + if x in cache: + y = cache[x] + cache.move_to_end(x) + return y + y = self.func(x) + requests = self.requests + requests[x] = requests.get(x, 0) + 1 + if requests[x] <= self.cache_after: + requests.move_to_end(x) + if len(requests) > self.maxrequests: + requests.popitem(0) + else: + requests.pop(x, None) + cache[x] = y + if len(cache) > self.maxsize: + cache.popitem(0) + return y .. doctest:: :hide: - >>> def square(x): - ... return x ** 2 - ... - >>> s = LRU(square, maxsize=5) - >>> actual = [(s(x), s(x)) for x in range(20)] - >>> expected = [(x**2, x**2) for x in range(20)] - >>> actual == expected - True - >>> actual = list(s.cache.items()) - >>> expected = [((x,), x**2) for x in range(15, 20)] - >>> actual == expected - True + >>> f = MultiHitLRUCache(square, maxsize=4, maxrequests=6) + >>> list(map(f, range(10))) # First requests, don't cache + [0, 1, 4, 9, 16, 25, 36, 49, 64, 81] + >>> f(4) # Cache the second request + 16 + >>> f(6) # Cache the second request + 36 + >>> f(2) # The first request aged out, so don't cache + 4 + >>> f(6) # Cache hit + 36 + >>> f(4) # Cache hit and move to front + 16 + >>> f.requests + OrderedDict([(5, 1), (7, 1), (8, 1), (9, 1), (2, 1)]) + >>> f.cache + OrderedDict([(6, 36), (4, 16)]) :class:`UserDict` objects From e5238ee5db786cca173a5aa936da1cbabaa53a3e Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 4 Sep 2021 14:17:11 -0500 Subject: [PATCH 02/12] More tests --- Doc/library/collections.rst | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/Doc/library/collections.rst b/Doc/library/collections.rst index 0155191557cc85..0fa2cd5005a35b 100644 --- a/Doc/library/collections.rst +++ b/Doc/library/collections.rst @@ -1179,8 +1179,8 @@ variants of :func:`functools.lru_cache`: """ Variant of an LRU cache that defers caching a result until it has been requested multiple times. - To avoid flushing LRU caches with one-time requests, we don't - cache until a request has been made more than once. + To avoid flushing the LRU cache with one-time requests, + we don't cache until a request has been made more than once. """ @@ -1215,6 +1215,9 @@ variants of :func:`functools.lru_cache`: .. doctest:: :hide: + >>> def square(x): + ... return x * x + ... >>> f = MultiHitLRUCache(square, maxsize=4, maxrequests=6) >>> list(map(f, range(10))) # First requests, don't cache [0, 1, 4, 9, 16, 25, 36, 49, 64, 81] @@ -1232,7 +1235,18 @@ variants of :func:`functools.lru_cache`: OrderedDict([(5, 1), (7, 1), (8, 1), (9, 1), (2, 1)]) >>> f.cache OrderedDict([(6, 36), (4, 16)]) - + >>> set(f.requests).isdisjoint(f.cache) + True + >>> list(map(f, [9, 8, 7])) # Cache these second requests + [81, 64, 49] + >>> list(map(f, [7, 9])) # Cache hits + [49, 81] + >>> f.requests + OrderedDict([(5, 1), (2, 1)]) + >>> f.cache + OrderedDict([(4, 16), (8, 64), (7, 49), (9, 81)]) + >>> set(f.requests).isdisjoint(f.cache) + True :class:`UserDict` objects ------------------------- From ec03ce48724061e880b27308b68fdcafbf4c456e Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 4 Sep 2021 22:08:22 -0500 Subject: [PATCH 03/12] Restore Simple version --- Doc/library/collections.rst | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/Doc/library/collections.rst b/Doc/library/collections.rst index 0fa2cd5005a35b..2ee7e2412988c5 100644 --- a/Doc/library/collections.rst +++ b/Doc/library/collections.rst @@ -1173,6 +1173,42 @@ original insertion position is changed and moved to the end:: An :class:`OrderedDict` would also be useful for implementing variants of :func:`functools.lru_cache`: +.. testcode:: + + class SimpleLRU: + + def __init__(self, func, maxsize=128): + self.func = func + self.maxsize = maxsize + self.cache = OrderedDict() + + def __call__(self, *args): + if args in self.cache: + value = self.cache[args] + self.cache.move_to_end(args) + return value + value = self.func(*args) + if len(self.cache) >= self.maxsize: + self.cache.popitem(False) + self.cache[args] = value + return value + +.. doctest:: + :hide: + + >>> def square(x): + ... return x ** 2 + ... + >>> s = SimpleLRU(square, maxsize=5) + >>> actual = [(s(x), s(x)) for x in range(20)] + >>> expected = [(x**2, x**2) for x in range(20)] + >>> actual == expected + True + >>> actual = list(s.cache.items()) + >>> expected = [((x,), x**2) for x in range(15, 20)] + >>> actual == expected + True + .. testcode:: class MultiHitLRUCache: From 19460f2daa365ede8ea3d4f2fbdf3c0eb3671296 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 4 Sep 2021 22:11:47 -0500 Subject: [PATCH 04/12] Add time bounded LRU variant --- Doc/library/collections.rst | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/Doc/library/collections.rst b/Doc/library/collections.rst index 2ee7e2412988c5..39b6f1d6814de1 100644 --- a/Doc/library/collections.rst +++ b/Doc/library/collections.rst @@ -1209,6 +1209,32 @@ variants of :func:`functools.lru_cache`: >>> actual == expected True +.. testcode: + + from time import time + + class TimeBoundedLRU: + 'Variant of an LRU Cache that invalidates and refreshes old entries' + + def __init__(self, func, *, maxsize=128, maxage=30): + self.func = func + self.maxsize = maxsize + self.maxage = maxage + self.cache = OrderedDict() # { args : (timestamp, result)} + + def __call__(self, args): + if args in self.cache: + self.cache.move_to_end(args) + timestamp, result = self.cache[args] + if time() - timestamp <= self.maxage: + return result + result = self.func(args) + self.cache[args] = time(), result + if len(self.cache) > self.maxsize: + self.cache.popitem(0) + return result + + .. testcode:: class MultiHitLRUCache: From 62b4bf9b88f7501be2791409f687b6d22a136bbe Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 4 Sep 2021 22:16:07 -0500 Subject: [PATCH 05/12] Fix markup --- Doc/library/collections.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/collections.rst b/Doc/library/collections.rst index 39b6f1d6814de1..1ef76864f4d9cb 100644 --- a/Doc/library/collections.rst +++ b/Doc/library/collections.rst @@ -1209,12 +1209,12 @@ variants of :func:`functools.lru_cache`: >>> actual == expected True -.. testcode: +.. testcode:: from time import time class TimeBoundedLRU: - 'Variant of an LRU Cache that invalidates and refreshes old entries' + "Variant of an LRU Cache that invalidates and refreshes old entries." def __init__(self, func, *, maxsize=128, maxage=30): self.func = func From 09b822685ed9d498ccea84a20d5a41b8304cce89 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 4 Sep 2021 22:26:56 -0500 Subject: [PATCH 06/12] Make the recipes more consistent --- Doc/library/collections.rst | 44 ++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/Doc/library/collections.rst b/Doc/library/collections.rst index 1ef76864f4d9cb..d8990845b19dce 100644 --- a/Doc/library/collections.rst +++ b/Doc/library/collections.rst @@ -1178,20 +1178,20 @@ variants of :func:`functools.lru_cache`: class SimpleLRU: def __init__(self, func, maxsize=128): + self.cache = OrderedDict() self.func = func self.maxsize = maxsize - self.cache = OrderedDict() def __call__(self, *args): if args in self.cache: - value = self.cache[args] + result = self.cache[args] self.cache.move_to_end(args) - return value - value = self.func(*args) + return result + result = self.func(*args) if len(self.cache) >= self.maxsize: - self.cache.popitem(False) - self.cache[args] = value - return value + self.cache.popitem(0) + self.cache[args] = result + return result .. doctest:: :hide: @@ -1217,18 +1217,18 @@ variants of :func:`functools.lru_cache`: "Variant of an LRU Cache that invalidates and refreshes old entries." def __init__(self, func, *, maxsize=128, maxage=30): + self.cache = OrderedDict() # { args : (timestamp, result)} self.func = func self.maxsize = maxsize self.maxage = maxage - self.cache = OrderedDict() # { args : (timestamp, result)} - def __call__(self, args): + def __call__(self, *args): if args in self.cache: self.cache.move_to_end(args) timestamp, result = self.cache[args] if time() - timestamp <= self.maxage: return result - result = self.func(args) + result = self.func(*args) self.cache[args] = time(), result if len(self.cache) > self.maxsize: self.cache.popitem(0) @@ -1254,25 +1254,25 @@ variants of :func:`functools.lru_cache`: self.maxsize = maxsize # max number of stored return values self.cache_after = cache_after - def __call__(self, x): + def __call__(self, *args): cache = self.cache - if x in cache: - y = cache[x] - cache.move_to_end(x) - return y - y = self.func(x) + if args in self.cache: + result = cache[args] + cache.move_to_end(args) + return result + result = self.func(*args) requests = self.requests - requests[x] = requests.get(x, 0) + 1 - if requests[x] <= self.cache_after: - requests.move_to_end(x) + requests[args] = requests.get(args, 0) + 1 + if requests[args] <= self.cache_after: + requests.move_to_end(args) if len(requests) > self.maxrequests: requests.popitem(0) else: - requests.pop(x, None) - cache[x] = y + requests.pop(args, None) + cache[args] = result if len(cache) > self.maxsize: cache.popitem(0) - return y + return result .. doctest:: :hide: From e3ffd4acbcdffb7412e293903ac3cf3a6fc20137 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 4 Sep 2021 22:34:27 -0500 Subject: [PATCH 07/12] Tighten docstrings --- Doc/library/collections.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Doc/library/collections.rst b/Doc/library/collections.rst index d8990845b19dce..8af126221baf49 100644 --- a/Doc/library/collections.rst +++ b/Doc/library/collections.rst @@ -1184,13 +1184,13 @@ variants of :func:`functools.lru_cache`: def __call__(self, *args): if args in self.cache: - result = self.cache[args] self.cache.move_to_end(args) + result = self.cache[args] return result result = self.func(*args) - if len(self.cache) >= self.maxsize: - self.cache.popitem(0) self.cache[args] = result + if len(self.cache) > self.maxsize: + self.cache.popitem(0) return result .. doctest:: @@ -1214,7 +1214,7 @@ variants of :func:`functools.lru_cache`: from time import time class TimeBoundedLRU: - "Variant of an LRU Cache that invalidates and refreshes old entries." + "LRU Cache that invalidates and refreshes old entries." def __init__(self, func, *, maxsize=128, maxage=30): self.cache = OrderedDict() # { args : (timestamp, result)} @@ -1238,7 +1238,7 @@ variants of :func:`functools.lru_cache`: .. testcode:: class MultiHitLRUCache: - """ Variant of an LRU cache that defers caching a result until + """ LRU cache that defers caching a result until it has been requested multiple times. To avoid flushing the LRU cache with one-time requests, @@ -1250,7 +1250,7 @@ variants of :func:`functools.lru_cache`: self.requests = OrderedDict() # { uncached_key : request_count } self.cache = OrderedDict() # { cached_key : function_result } self.func = func - self.maxrequests = maxrequests # max number of uncached request counts + self.maxrequests = maxrequests # max number of uncached requests self.maxsize = maxsize # max number of stored return values self.cache_after = cache_after From d379e288033bad726bcf2785540c4736cc73be6a Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 4 Sep 2021 22:35:25 -0500 Subject: [PATCH 08/12] Remove the SimpleLRU which is too much like the TimeBounded variant --- Doc/library/collections.rst | 36 ------------------------------------ 1 file changed, 36 deletions(-) diff --git a/Doc/library/collections.rst b/Doc/library/collections.rst index 8af126221baf49..ca7991edd22da4 100644 --- a/Doc/library/collections.rst +++ b/Doc/library/collections.rst @@ -1173,42 +1173,6 @@ original insertion position is changed and moved to the end:: An :class:`OrderedDict` would also be useful for implementing variants of :func:`functools.lru_cache`: -.. testcode:: - - class SimpleLRU: - - def __init__(self, func, maxsize=128): - self.cache = OrderedDict() - self.func = func - self.maxsize = maxsize - - def __call__(self, *args): - if args in self.cache: - self.cache.move_to_end(args) - result = self.cache[args] - return result - result = self.func(*args) - self.cache[args] = result - if len(self.cache) > self.maxsize: - self.cache.popitem(0) - return result - -.. doctest:: - :hide: - - >>> def square(x): - ... return x ** 2 - ... - >>> s = SimpleLRU(square, maxsize=5) - >>> actual = [(s(x), s(x)) for x in range(20)] - >>> expected = [(x**2, x**2) for x in range(20)] - >>> actual == expected - True - >>> actual = list(s.cache.items()) - >>> expected = [((x,), x**2) for x in range(15, 20)] - >>> actual == expected - True - .. testcode:: from time import time From 49fdb772e00586533a3ed0e1453de2d4c7121afb Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 4 Sep 2021 22:45:35 -0500 Subject: [PATCH 09/12] Simplify by remove the keyword-only annotation --- Doc/library/collections.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/collections.rst b/Doc/library/collections.rst index ca7991edd22da4..8652dd44a91a23 100644 --- a/Doc/library/collections.rst +++ b/Doc/library/collections.rst @@ -1180,7 +1180,7 @@ variants of :func:`functools.lru_cache`: class TimeBoundedLRU: "LRU Cache that invalidates and refreshes old entries." - def __init__(self, func, *, maxsize=128, maxage=30): + def __init__(self, func, maxsize=128, maxage=30): self.cache = OrderedDict() # { args : (timestamp, result)} self.func = func self.maxsize = maxsize @@ -1210,7 +1210,7 @@ variants of :func:`functools.lru_cache`: """ - def __init__(self, func, *, maxsize=128, maxrequests=4096, cache_after=1): + def __init__(self, func, maxsize=128, maxrequests=4096, cache_after=1): self.requests = OrderedDict() # { uncached_key : request_count } self.cache = OrderedDict() # { cached_key : function_result } self.func = func From 545304d3b35340822c2af8c73a4c9f57eec672d3 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 4 Sep 2021 22:47:16 -0500 Subject: [PATCH 10/12] Further harmonize the recipes --- Doc/library/collections.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/collections.rst b/Doc/library/collections.rst index 8652dd44a91a23..766c9f8b8be072 100644 --- a/Doc/library/collections.rst +++ b/Doc/library/collections.rst @@ -1221,8 +1221,8 @@ variants of :func:`functools.lru_cache`: def __call__(self, *args): cache = self.cache if args in self.cache: - result = cache[args] cache.move_to_end(args) + result = cache[args] return result result = self.func(*args) requests = self.requests From 312b0214b7326d130c13576d0572390b25657f48 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 4 Sep 2021 23:00:25 -0500 Subject: [PATCH 11/12] Update the test for *args --- Doc/library/collections.rst | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/Doc/library/collections.rst b/Doc/library/collections.rst index 766c9f8b8be072..83afbf7ec8161c 100644 --- a/Doc/library/collections.rst +++ b/Doc/library/collections.rst @@ -1257,20 +1257,16 @@ variants of :func:`functools.lru_cache`: 36 >>> f(4) # Cache hit and move to front 16 - >>> f.requests - OrderedDict([(5, 1), (7, 1), (8, 1), (9, 1), (2, 1)]) - >>> f.cache - OrderedDict([(6, 36), (4, 16)]) + >>> list(f.cache.values()) + [36, 16] >>> set(f.requests).isdisjoint(f.cache) True >>> list(map(f, [9, 8, 7])) # Cache these second requests [81, 64, 49] >>> list(map(f, [7, 9])) # Cache hits [49, 81] - >>> f.requests - OrderedDict([(5, 1), (2, 1)]) - >>> f.cache - OrderedDict([(4, 16), (8, 64), (7, 49), (9, 81)]) + >>> list(f.cache.values()) + [16, 64, 49, 81] >>> set(f.requests).isdisjoint(f.cache) True From 8655a48f6e8ed7aabea912003cab1937fd4e0d31 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 4 Sep 2021 23:43:50 -0500 Subject: [PATCH 12/12] Remove the local variable optimization --- Doc/library/collections.rst | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/Doc/library/collections.rst b/Doc/library/collections.rst index 83afbf7ec8161c..4ba197e11e97bd 100644 --- a/Doc/library/collections.rst +++ b/Doc/library/collections.rst @@ -1181,7 +1181,7 @@ variants of :func:`functools.lru_cache`: "LRU Cache that invalidates and refreshes old entries." def __init__(self, func, maxsize=128, maxage=30): - self.cache = OrderedDict() # { args : (timestamp, result)} + self.cache = OrderedDict() # { args : (timestamp, result)} self.func = func self.maxsize = maxsize self.maxage = maxage @@ -1219,23 +1219,20 @@ variants of :func:`functools.lru_cache`: self.cache_after = cache_after def __call__(self, *args): - cache = self.cache if args in self.cache: - cache.move_to_end(args) - result = cache[args] - return result + self.cache.move_to_end(args) + return self.cache[args] result = self.func(*args) - requests = self.requests - requests[args] = requests.get(args, 0) + 1 - if requests[args] <= self.cache_after: - requests.move_to_end(args) - if len(requests) > self.maxrequests: - requests.popitem(0) + self.requests[args] = self.requests.get(args, 0) + 1 + if self.requests[args] <= self.cache_after: + self.requests.move_to_end(args) + if len(self.requests) > self.maxrequests: + self.requests.popitem(0) else: - requests.pop(args, None) - cache[args] = result - if len(cache) > self.maxsize: - cache.popitem(0) + self.requests.pop(args, None) + self.cache[args] = result + if len(self.cache) > self.maxsize: + self.cache.popitem(0) return result .. doctest::