@@ -31,6 +31,58 @@ k a second time. Theory can be used to find such polys efficiently, but the
3131operational defn. of "works" is sufficient to find them in reasonable time
3232via brute force program (hint: any poly that has an even number of 1 bits
3333cannot work; ditto any poly with low bit 0; exploit those).
34+
35+ Some major subtleties: Most hash schemes depend on having a "good" hash
36+ function, in the sense of simulating randomness. Python doesn't: some of
37+ its hash functions are trivial, such as hash(i) == i for ints i (excepting
38+ i == -1, because -1 is the "error occurred" return value from tp_hash).
39+
40+ This isn't necessarily bad! To the contrary, that our hash tables are powers
41+ of 2 in size, and that we take the low-order bits as the initial table index,
42+ means that there are no collisions at all for dicts indexed by a contiguous
43+ range of ints. This is "better than random" behavior, and that's very
44+ desirable.
45+
46+ On the other hand, when collisions occur, the tendency to fill contiguous
47+ slices of the hash table makes a good collision resolution strategy crucial;
48+ e.g., linear probing is right out.
49+
50+ Reimer Behrends contributed the idea of using a polynomial-based approach,
51+ using repeated multiplication by x in GF(2**n) where a polynomial is chosen
52+ such that x is a primitive root. This visits every table location exactly
53+ once, and the sequence of locations probed is highly non-linear.
54+
55+ The same is also largely true of quadratic probing for power-of-2 tables, of
56+ the specific
57+
58+ (i + comb(1, 2)) mod size
59+ (i + comb(2, 2)) mod size
60+ (i + comb(3, 2)) mod size
61+ (i + comb(4, 2)) mod size
62+ ...
63+ (i + comb(j, 2)) mod size
64+
65+ flavor. The polynomial approach "scrambles" the probe indices better, but
66+ more importantly allows to get *some* additional bits of the hash code into
67+ play via computing the initial increment, thus giving a weak form of double
68+ hashing. Quadratic probing cannot be extended that way (the first probe
69+ offset must be 1, the second 3, the third 6, etc).
70+
71+ Christian Tismer later contributed the idea of using polynomial division
72+ instead of multiplication. The problem is that the multiplicative method
73+ can't get *all* the bits of the hash code into play without expensive
74+ computations that slow down the initial index and/or initial increment
75+ computation. For a set of keys like [i << 16 for i in range(20000)], under
76+ the multiplicative method the initial index and increment were the same for
77+ all keys, so every key followed exactly the same probe sequence, and so
78+ this degenerated into a (very slow) linear search. The division method uses
79+ all the bits of the hash code naturally in the increment, although it *may*
80+ visit locations more than once until such time as all the high bits of the
81+ increment have been shifted away. It's also impossible to tell in advance
82+ whether incr is congruent to 0 modulo poly, so each iteration of the loop has
83+ to guard against incr becoming 0. These are minor costs, as we usually don't
84+ get into the probe loop, and when we do we usually get out on its first
85+ iteration.
3486*/
3587
3688static long polys [] = {
@@ -204,7 +256,7 @@ static dictentry *
204256lookdict (dictobject * mp , PyObject * key , register long hash )
205257{
206258 register int i ;
207- register unsigned incr ;
259+ register unsigned int incr ;
208260 register dictentry * freeslot ;
209261 register unsigned int mask = mp -> ma_size - 1 ;
210262 dictentry * ep0 = mp -> ma_table ;
@@ -244,13 +296,14 @@ lookdict(dictobject *mp, PyObject *key, register long hash)
244296 }
245297 /* Derive incr from hash, just to make it more arbitrary. Note that
246298 incr must not be 0, or we will get into an infinite loop.*/
247- incr = (hash ^ ((unsigned long )hash >> 3 )) & mask ;
248- if (!incr )
249- incr = mask ;
299+ incr = hash ^ ((unsigned long )hash >> 3 );
300+
250301 /* In the loop, me_key == dummy is by far (factor of 100s) the
251302 least likely outcome, so test for that last. */
252303 for (;;) {
253- ep = & ep0 [(i + incr )& mask ];
304+ if (!incr )
305+ incr = 1 ; /* and incr will never be 0 again */
306+ ep = & ep0 [(i + incr ) & mask ];
254307 if (ep -> me_key == NULL ) {
255308 if (restore_error )
256309 PyErr_Restore (err_type , err_value , err_tb );
@@ -282,10 +335,10 @@ lookdict(dictobject *mp, PyObject *key, register long hash)
282335 }
283336 else if (ep -> me_key == dummy && freeslot == NULL )
284337 freeslot = ep ;
285- /* Cycle through GF(2^n)-{0} */
286- incr <<= 1 ;
287- if ( incr > mask )
288- incr ^= mp -> ma_poly ; /* clears the highest bit */
338+ /* Cycle through GF(2**n). */
339+ if ( incr & 1 )
340+ incr ^= mp -> ma_poly ; /* clears the lowest bit */
341+ incr >>= 1 ;
289342 }
290343}
291344
@@ -303,7 +356,7 @@ static dictentry *
303356lookdict_string (dictobject * mp , PyObject * key , register long hash )
304357{
305358 register int i ;
306- register unsigned incr ;
359+ register unsigned int incr ;
307360 register dictentry * freeslot ;
308361 register unsigned int mask = mp -> ma_size - 1 ;
309362 dictentry * ep0 = mp -> ma_table ;
@@ -334,13 +387,14 @@ lookdict_string(dictobject *mp, PyObject *key, register long hash)
334387 }
335388 /* Derive incr from hash, just to make it more arbitrary. Note that
336389 incr must not be 0, or we will get into an infinite loop.*/
337- incr = (hash ^ ((unsigned long )hash >> 3 )) & mask ;
338- if (!incr )
339- incr = mask ;
390+ incr = hash ^ ((unsigned long )hash >> 3 );
391+
340392 /* In the loop, me_key == dummy is by far (factor of 100s) the
341393 least likely outcome, so test for that last. */
342394 for (;;) {
343- ep = & ep0 [(i + incr )& mask ];
395+ if (!incr )
396+ incr = 1 ; /* and incr will never be 0 again */
397+ ep = & ep0 [(i + incr ) & mask ];
344398 if (ep -> me_key == NULL )
345399 return freeslot == NULL ? ep : freeslot ;
346400 if (ep -> me_key == key
@@ -350,10 +404,10 @@ lookdict_string(dictobject *mp, PyObject *key, register long hash)
350404 return ep ;
351405 if (ep -> me_key == dummy && freeslot == NULL )
352406 freeslot = ep ;
353- /* Cycle through GF(2^n)-{0} */
354- incr <<= 1 ;
355- if ( incr > mask )
356- incr ^= mp -> ma_poly ; /* clears the highest bit */
407+ /* Cycle through GF(2**n). */
408+ if ( incr & 1 )
409+ incr ^= mp -> ma_poly ; /* clears the lowest bit */
410+ incr >>= 1 ;
357411 }
358412}
359413
0 commit comments