Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 7ab3d15

Browse files
tim-onerhettinger
authored andcommitted
Rework tuple hash tests. (GH-10161)
Add tooling that will useful in future updates, paying particular attention to difficult cases where only the upper bits on the input vary.
1 parent 5741c45 commit 7ab3d15

2 files changed

Lines changed: 308 additions & 84 deletions

File tree

Lib/test/support/__init__.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2944,3 +2944,44 @@ def __fspath__(self):
29442944
def maybe_get_event_loop_policy():
29452945
"""Return the global event loop policy if one is set, else return None."""
29462946
return asyncio.events._event_loop_policy
2947+
2948+
# Helpers for testing hashing.
2949+
NHASHBITS = sys.hash_info.width # number of bits in hash() result
2950+
assert NHASHBITS in (32, 64)
2951+
2952+
# Return mean and sdev of number of collisions when tossing nballs balls
2953+
# uniformly at random into nbins bins. By definition, the number of
2954+
# collisions is the number of balls minus the number of occupied bins at
2955+
# the end.
2956+
def collision_stats(nbins, nballs):
2957+
n, k = nbins, nballs
2958+
# prob a bin empty after k trials = (1 - 1/n)**k
2959+
# mean # empty is then n * (1 - 1/n)**k
2960+
# so mean # occupied is n - n * (1 - 1/n)**k
2961+
# so collisions = k - (n - n*(1 - 1/n)**k)
2962+
#
2963+
# For the variance:
2964+
# n*(n-1)*(1-2/n)**k + meanempty - meanempty**2 =
2965+
# n*(n-1)*(1-2/n)**k + meanempty * (1 - meanempty)
2966+
#
2967+
# Massive cancellation occurs, and, e.g., for a 64-bit hash code
2968+
# 1-1/2**64 rounds uselessly to 1.0. Rather than make heroic (and
2969+
# error-prone) efforts to rework the naive formulas to avoid those,
2970+
# we use the `decimal` module to get plenty of extra precision.
2971+
#
2972+
# Note: the exact values are straightforward to compute with
2973+
# rationals, but in context that's unbearably slow, requiring
2974+
# multi-million bit arithmetic.
2975+
import decimal
2976+
with decimal.localcontext() as ctx:
2977+
bits = n.bit_length() * 2 # bits in n**2
2978+
# At least that many bits will likely cancel out.
2979+
# Use that many decimal digits instead.
2980+
ctx.prec = max(bits, 30)
2981+
dn = decimal.Decimal(n)
2982+
p1empty = ((dn - 1) / dn) ** k
2983+
meanempty = n * p1empty
2984+
occupied = n - meanempty
2985+
collisions = k - occupied
2986+
var = dn*(dn-1)*((dn-2)/dn)**k + meanempty * (1 - meanempty)
2987+
return float(collisions), float(var.sqrt())

0 commit comments

Comments
 (0)