Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 13634cf

Browse files
committed
This patch addresses two main issues: (1) There exist some non-fatal
errors in some of the hash algorithms. For exmaple, in float_hash and complex_hash a certain part of the value is not included in the hash calculation. See Tim's, Guido's, and my discussion of this on python-dev in May under the title "fix float_hash and complex_hash for 64-bit *nix" (2) The hash algorithms that use pointers (e.g. func_hash, code_hash) are universally not correct on Win64 (they assume that sizeof(long) == sizeof(void*)) As well, this patch significantly cleans up the hash code. It adds the two function _Py_HashDouble and _PyHash_VoidPtr that the various hashing routine are changed to use. These help maintain the hash function invariant: (a==b) => (hash(a)==hash(b))) I have added Lib/test/test_hash.py and Lib/test/output/test_hash to test this for some cases.
1 parent b46696c commit 13634cf

10 files changed

Lines changed: 126 additions & 49 deletions

File tree

Include/object.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,10 @@ extern DL_IMPORT(void) Py_ReprLeave Py_PROTO((PyObject *));
293293
/* tstate dict key for PyObject_Compare helper */
294294
extern PyObject *_PyCompareState_Key;
295295

296+
/* Helpers for hash functions */
297+
extern DL_IMPORT(long) _Py_HashDouble Py_PROTO((double));
298+
extern DL_IMPORT(long) _Py_HashPointer Py_PROTO((void*));
299+
296300
/* Flag bits for printing: */
297301
#define Py_PRINT_RAW 1 /* No string quotes etc. */
298302

Lib/test/output/test_hash

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
test_hash

Lib/test/test_hash.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# test the invariant that
2+
# iff a==b then hash(a)==hash(b)
3+
#
4+
5+
import test_support
6+
7+
8+
def same_hash(*objlist):
9+
# hash each object given an raise TestFailed if
10+
# the hash values are not all the same
11+
hashed = map(hash, objlist)
12+
for h in hashed[1:]:
13+
if h != hashed[0]:
14+
raise TestFailed, "hashed values differ: %s" % `objlist`
15+
16+
17+
18+
same_hash(1, 1L, 1.0, 1.0+0.0j)
19+
same_hash(int(1), long(1), float(1), complex(1))
20+
21+
same_hash(long(1.23e300), float(1.23e300))
22+
23+
same_hash(float(0.5), complex(0.5, 0.0))
24+
25+
26+

Objects/classobject.c

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -864,10 +864,7 @@ instance_hash(inst)
864864
func = instance_getattr(inst, cmpstr);
865865
if (func == NULL) {
866866
PyErr_Clear();
867-
outcome = (long)inst;
868-
if (outcome == -1)
869-
outcome = -2;
870-
return outcome;
867+
return _Py_HashPointer(inst);
871868
}
872869
PyErr_SetString(PyExc_TypeError, "unhashable instance");
873870
return -1;

Objects/complexobject.c

Lines changed: 9 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -285,8 +285,7 @@ complex_hash(v)
285285
PyComplexObject *v;
286286
{
287287
double intpart, fractpart;
288-
int expo;
289-
long hipart, x;
288+
long x;
290289
/* This is designed so that Python numbers with the same
291290
value hash to the same value, otherwise comparisons
292291
of mapping keys will turn out weird */
@@ -302,7 +301,7 @@ complex_hash(v)
302301
#endif
303302

304303
if (fractpart == 0.0 && v->cval.imag == 0.0) {
305-
if (intpart > 0x7fffffffL || -intpart > 0x7fffffffL) {
304+
if (intpart > LONG_MAX || -intpart > LONG_MAX) {
306305
/* Convert to long int and use its hash... */
307306
PyObject *w = PyLong_FromDouble(v->cval.real);
308307
if (w == NULL)
@@ -314,36 +313,18 @@ complex_hash(v)
314313
x = (long)intpart;
315314
}
316315
else {
317-
fractpart = frexp(fractpart, &expo);
318-
fractpart = fractpart * 2147483648.0; /* 2**31 */
319-
hipart = (long)fractpart; /* Take the top 32 bits */
320-
fractpart = (fractpart - (double)hipart) * 2147483648.0;
321-
/* Get the next 32 bits */
322-
x = hipart + (long)fractpart + (long)intpart + (expo << 15);
323-
/* Combine everything */
316+
x = _Py_HashDouble(v->cval.real);
317+
if (x == -1)
318+
return -1;
324319

325320
if (v->cval.imag != 0.0) { /* Hash the imaginary part */
326321
/* XXX Note that this hashes complex(x, y)
327322
to the same value as complex(y, x).
328323
Still better than it used to be :-) */
329-
#ifdef MPW
330-
{
331-
extended e;
332-
fractpart = modf(v->cval.imag, &e);
333-
intpart = e;
334-
}
335-
#else
336-
fractpart = modf(v->cval.imag, &intpart);
337-
#endif
338-
fractpart = frexp(fractpart, &expo);
339-
fractpart = fractpart * 2147483648.0; /* 2**31 */
340-
hipart = (long)fractpart; /* Take the top 32 bits */
341-
fractpart =
342-
(fractpart - (double)hipart) * 2147483648.0;
343-
/* Get the next 32 bits */
344-
x ^= hipart + (long)fractpart +
345-
(long)intpart + (expo << 15);
346-
/* Combine everything */
324+
long y = _Py_HashDouble(v->cval.imag);
325+
if (y == -1)
326+
return -1;
327+
x += y;
347328
}
348329
}
349330
if (x == -1)

Objects/floatobject.c

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,13 @@ PERFORMANCE OF THIS SOFTWARE.
5959
#endif
6060

6161
#ifndef LONG_MAX
62+
#if SIZEOF_LONG == 4
6263
#define LONG_MAX 0X7FFFFFFFL
64+
#elif SIZEOF_LONG == 8
65+
#define LONG_MAX 0X7FFFFFFFFFFFFFFFL
66+
#else
67+
#error "could not set LONG_MAX"
68+
#endif
6369
#endif
6470

6571
#ifndef LONG_MIN
@@ -357,12 +363,12 @@ float_compare(v, w)
357363
return (i < j) ? -1 : (i > j) ? 1 : 0;
358364
}
359365

366+
360367
static long
361368
float_hash(v)
362369
PyFloatObject *v;
363370
{
364371
double intpart, fractpart;
365-
int expo;
366372
long x;
367373
/* This is designed so that Python numbers with the same
368374
value hash to the same value, otherwise comparisons
@@ -379,7 +385,7 @@ float_hash(v)
379385
#endif
380386

381387
if (fractpart == 0.0) {
382-
if (intpart > 0x7fffffffL || -intpart > 0x7fffffffL) {
388+
if (intpart > LONG_MAX || -intpart > LONG_MAX) {
383389
/* Convert to long int and use its hash... */
384390
PyObject *w = PyLong_FromDouble(v->ob_fval);
385391
if (w == NULL)
@@ -393,14 +399,9 @@ float_hash(v)
393399
else {
394400
/* Note -- if you change this code, also change the copy
395401
in complexobject.c */
396-
long hipart;
397-
fractpart = frexp(fractpart, &expo);
398-
fractpart = fractpart * 2147483648.0; /* 2**31 */
399-
hipart = (long)fractpart; /* Take the top 32 bits */
400-
fractpart = (fractpart - (double)hipart) * 2147483648.0;
401-
/* Get the next 32 bits */
402-
x = hipart + (long)fractpart + (long)intpart + (expo << 15);
403-
/* Combine everything */
402+
x = _Py_HashDouble(v->ob_fval);
403+
if (x == -1)
404+
return -1;
404405
}
405406
if (x == -1)
406407
x = -2;

Objects/funcobject.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -231,10 +231,12 @@ static long
231231
func_hash(f)
232232
PyFunctionObject *f;
233233
{
234-
long h;
234+
long h,x;
235235
h = PyObject_Hash(f->func_code);
236236
if (h == -1) return h;
237-
h = h ^ (long)f->func_globals;
237+
x = _Py_HashPointer(f->func_globals);
238+
if (x == -1) return x;
239+
h ^= x;
238240
if (h == -1) h = -2;
239241
return h;
240242
}

Objects/methodobject.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,15 +172,21 @@ static long
172172
meth_hash(a)
173173
PyCFunctionObject *a;
174174
{
175-
long x;
175+
long x,y;
176176
if (a->m_self == NULL)
177177
x = 0;
178178
else {
179179
x = PyObject_Hash(a->m_self);
180180
if (x == -1)
181181
return -1;
182182
}
183-
return x ^ (long) a->m_ml->ml_meth;
183+
y = _Py_HashPointer(a->m_ml->ml_meth);
184+
if (y == -1)
185+
return -1;
186+
x ^= y;
187+
if (x == -1)
188+
x = -2;
189+
return x;
184190
}
185191

186192
PyTypeObject PyCFunction_Type = {

Objects/object.c

Lines changed: 61 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ PERFORMANCE OF THIS SOFTWARE.
3333

3434
#include "Python.h"
3535

36+
#include "mymath.h"
37+
3638
/* just for trashcan: */
3739
#include "compile.h"
3840
#include "frameobject.h"
@@ -507,15 +509,72 @@ PyObject_Compare(v, w)
507509
return result;
508510
}
509511

512+
513+
/* Set of hash utility functions to help maintaining the invariant that
514+
iff a==b then hash(a)==hash(b)
515+
516+
All the utility functions (_Py_Hash*()) return "-1" to signify an error.
517+
*/
518+
519+
long
520+
_Py_HashDouble(v)
521+
double v;
522+
{
523+
/* Use frexp to get at the bits in the double.
524+
* Since the VAX D double format has 56 mantissa bits, which is the
525+
* most of any double format in use, each of these parts may have as
526+
* many as (but no more than) 56 significant bits.
527+
* So, assuming sizeof(long) >= 4, each part can be broken into two longs;
528+
* frexp and multiplication are used to do that.
529+
* Also, since the Cray double format has 15 exponent bits, which is the
530+
* most of any double format in use, shifting the exponent field left by
531+
* 15 won't overflow a long (again assuming sizeof(long) >= 4).
532+
*/
533+
int expo;
534+
long hipart;
535+
536+
v = frexp(v, &expo);
537+
v = v * 2147483648.0; /* 2**31 */
538+
hipart = (long)v; /* Take the top 32 bits */
539+
v = (v - (double)hipart) * 2147483648.0; /* Get the next 32 bits */
540+
541+
return hipart + (long)v + (expo << 15); /* Combine everything */
542+
}
543+
544+
long
545+
_Py_HashPointer(p)
546+
void *p;
547+
{
548+
#if SIZEOF_LONG >= SIZEOF_VOID_P
549+
return (long)p;
550+
#else
551+
/* convert to a Python long and hash that */
552+
PyObject* longobj;
553+
long x;
554+
555+
if ((longobj = PyLong_FromVoidPtr(p)) == NULL) {
556+
x = -1;
557+
goto finally;
558+
}
559+
x = PyObject_Hash(longobj);
560+
561+
finally:
562+
Py_XDECREF(longobj);
563+
return x;
564+
#endif
565+
}
566+
567+
510568
long
511569
PyObject_Hash(v)
512570
PyObject *v;
513571
{
514572
PyTypeObject *tp = v->ob_type;
515573
if (tp->tp_hash != NULL)
516574
return (*tp->tp_hash)(v);
517-
if (tp->tp_compare == NULL)
518-
return (long) v; /* Use address as hash value */
575+
if (tp->tp_compare == NULL) {
576+
return _Py_HashPointer(v); /* Use address as hash value */
577+
}
519578
/* If there's a cmp but no hash defined, the object can't be hashed */
520579
PyErr_SetString(PyExc_TypeError, "unhashable type");
521580
return -1;

PC/_winreg.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,7 @@ PyHKEY_hashFunc(PyObject *ob)
423423
/* Just use the address.
424424
XXX - should we use the handle value?
425425
*/
426-
return (long)ob;
426+
return _Py_HashPointer(ob);
427427
}
428428

429429

0 commit comments

Comments
 (0)