@@ -2944,3 +2944,44 @@ def __fspath__(self):
29442944def maybe_get_event_loop_policy ():
29452945 """Return the global event loop policy if one is set, else return None."""
29462946 return asyncio .events ._event_loop_policy
2947+
2948+ # Helpers for testing hashing.
2949+ NHASHBITS = sys .hash_info .width # number of bits in hash() result
2950+ assert NHASHBITS in (32 , 64 )
2951+
2952+ # Return mean and sdev of number of collisions when tossing nballs balls
2953+ # uniformly at random into nbins bins. By definition, the number of
2954+ # collisions is the number of balls minus the number of occupied bins at
2955+ # the end.
2956+ def collision_stats (nbins , nballs ):
2957+ n , k = nbins , nballs
2958+ # prob a bin empty after k trials = (1 - 1/n)**k
2959+ # mean # empty is then n * (1 - 1/n)**k
2960+ # so mean # occupied is n - n * (1 - 1/n)**k
2961+ # so collisions = k - (n - n*(1 - 1/n)**k)
2962+ #
2963+ # For the variance:
2964+ # n*(n-1)*(1-2/n)**k + meanempty - meanempty**2 =
2965+ # n*(n-1)*(1-2/n)**k + meanempty * (1 - meanempty)
2966+ #
2967+ # Massive cancellation occurs, and, e.g., for a 64-bit hash code
2968+ # 1-1/2**64 rounds uselessly to 1.0. Rather than make heroic (and
2969+ # error-prone) efforts to rework the naive formulas to avoid those,
2970+ # we use the `decimal` module to get plenty of extra precision.
2971+ #
2972+ # Note: the exact values are straightforward to compute with
2973+ # rationals, but in context that's unbearably slow, requiring
2974+ # multi-million bit arithmetic.
2975+ import decimal
2976+ with decimal .localcontext () as ctx :
2977+ bits = n .bit_length () * 2 # bits in n**2
2978+ # At least that many bits will likely cancel out.
2979+ # Use that many decimal digits instead.
2980+ ctx .prec = max (bits , 30 )
2981+ dn = decimal .Decimal (n )
2982+ p1empty = ((dn - 1 ) / dn ) ** k
2983+ meanempty = n * p1empty
2984+ occupied = n - meanempty
2985+ collisions = k - occupied
2986+ var = dn * (dn - 1 )* ((dn - 2 )/ dn )** k + meanempty * (1 - meanempty )
2987+ return float (collisions ), float (var .sqrt ())
0 commit comments