Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 57e683e

Browse files
committed
Issue #1621: Fix undefined behaviour in bytes.__hash__, str.__hash__, tuple.__hash__, frozenset.__hash__ and set indexing operations.
1 parent 0390151 commit 57e683e

5 files changed

Lines changed: 26 additions & 25 deletions

File tree

Objects/bytesobject.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -869,16 +869,16 @@ bytes_hash(PyBytesObject *a)
869869
{
870870
register Py_ssize_t len;
871871
register unsigned char *p;
872-
register Py_hash_t x;
872+
register Py_uhash_t x;
873873

874874
if (a->ob_shash != -1)
875875
return a->ob_shash;
876876
len = Py_SIZE(a);
877877
p = (unsigned char *) a->ob_sval;
878-
x = *p << 7;
878+
x = (Py_uhash_t)*p << 7;
879879
while (--len >= 0)
880-
x = (1000003*x) ^ *p++;
881-
x ^= Py_SIZE(a);
880+
x = (1000003U*x) ^ (Py_uhash_t)*p++;
881+
x ^= (Py_uhash_t)Py_SIZE(a);
882882
if (x == -1)
883883
x = -2;
884884
a->ob_shash = x;

Objects/dictobject.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,7 @@ lookdict_unicode(PyDictObject *mp, PyObject *key, register Py_hash_t hash)
418418
mp->ma_lookup = lookdict;
419419
return lookdict(mp, key, hash);
420420
}
421-
i = hash & mask;
421+
i = (size_t)hash & mask;
422422
ep = &ep0[i];
423423
if (ep->me_key == NULL || ep->me_key == key)
424424
return ep;
@@ -572,7 +572,7 @@ insertdict_clean(register PyDictObject *mp, PyObject *key, Py_hash_t hash,
572572
register PyDictEntry *ep;
573573

574574
MAINTAIN_TRACKING(mp, key, value);
575-
i = hash & mask;
575+
i = (size_t)hash & mask;
576576
ep = &ep0[i];
577577
for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) {
578578
i = (i << 2) + i + perturb + 1;

Objects/setobject.c

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ NULL if the rich comparison returns an error.
7777
static setentry *
7878
set_lookkey(PySetObject *so, PyObject *key, register Py_hash_t hash)
7979
{
80-
register Py_ssize_t i;
80+
register size_t i;
8181
register size_t perturb;
8282
register setentry *freeslot;
8383
register size_t mask = so->mask;
@@ -86,7 +86,7 @@ set_lookkey(PySetObject *so, PyObject *key, register Py_hash_t hash)
8686
register int cmp;
8787
PyObject *startkey;
8888

89-
i = hash & mask;
89+
i = (size_t)hash & mask;
9090
entry = &table[i];
9191
if (entry->key == NULL || entry->key == key)
9292
return entry;
@@ -159,7 +159,7 @@ set_lookkey(PySetObject *so, PyObject *key, register Py_hash_t hash)
159159
static setentry *
160160
set_lookkey_unicode(PySetObject *so, PyObject *key, register Py_hash_t hash)
161161
{
162-
register Py_ssize_t i;
162+
register size_t i;
163163
register size_t perturb;
164164
register setentry *freeslot;
165165
register size_t mask = so->mask;
@@ -174,7 +174,7 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, register Py_hash_t hash)
174174
so->lookup = set_lookkey;
175175
return set_lookkey(so, key, hash);
176176
}
177-
i = hash & mask;
177+
i = (size_t)hash & mask;
178178
entry = &table[i];
179179
if (entry->key == NULL || entry->key == key)
180180
return entry;
@@ -256,7 +256,7 @@ set_insert_clean(register PySetObject *so, PyObject *key, Py_hash_t hash)
256256
setentry *table = so->table;
257257
register setentry *entry;
258258

259-
i = hash & mask;
259+
i = (size_t)hash & mask;
260260
entry = &table[i];
261261
for (perturb = hash; entry->key != NULL; perturb >>= PERTURB_SHIFT) {
262262
i = (i << 2) + i + perturb + 1;
@@ -770,26 +770,26 @@ static Py_hash_t
770770
frozenset_hash(PyObject *self)
771771
{
772772
PySetObject *so = (PySetObject *)self;
773-
Py_hash_t h, hash = 1927868237L;
773+
Py_uhash_t h, hash = 1927868237U;
774774
setentry *entry;
775775
Py_ssize_t pos = 0;
776776

777777
if (so->hash != -1)
778778
return so->hash;
779779

780-
hash *= PySet_GET_SIZE(self) + 1;
780+
hash *= (Py_uhash_t)PySet_GET_SIZE(self) + 1;
781781
while (set_next(so, &pos, &entry)) {
782782
/* Work to increase the bit dispersion for closely spaced hash
783783
values. The is important because some use cases have many
784784
combinations of a small number of elements with nearby
785785
hashes so that many distinct combinations collapse to only
786786
a handful of distinct hash values. */
787787
h = entry->hash;
788-
hash ^= (h ^ (h << 16) ^ 89869747L) * 3644798167u;
788+
hash ^= (h ^ (h << 16) ^ 89869747U) * 3644798167U;
789789
}
790-
hash = hash * 69069L + 907133923L;
790+
hash = hash * 69069U + 907133923U;
791791
if (hash == -1)
792-
hash = 590923713L;
792+
hash = 590923713U;
793793
so->hash = hash;
794794
return hash;
795795
}

Objects/tupleobject.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -315,11 +315,12 @@ tuplerepr(PyTupleObject *v)
315315
static Py_hash_t
316316
tuplehash(PyTupleObject *v)
317317
{
318-
register Py_hash_t x, y;
318+
register Py_uhash_t x;
319+
register Py_hash_t y;
319320
register Py_ssize_t len = Py_SIZE(v);
320321
register PyObject **p;
321-
Py_hash_t mult = 1000003L;
322-
x = 0x345678L;
322+
Py_uhash_t mult = 1000003;
323+
x = 0x345678;
323324
p = v->ob_item;
324325
while (--len >= 0) {
325326
y = PyObject_Hash(*p++);
@@ -330,7 +331,7 @@ tuplehash(PyTupleObject *v)
330331
mult += (Py_hash_t)(82520L + len + len);
331332
}
332333
x += 97531L;
333-
if (x == -1)
334+
if (x == (Py_uhash_t)-1)
334335
x = -2;
335336
return x;
336337
}

Objects/unicodeobject.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7721,22 +7721,22 @@ unicode_getitem(PyUnicodeObject *self, Py_ssize_t index)
77217721
}
77227722

77237723
/* Believe it or not, this produces the same value for ASCII strings
7724-
as string_hash(). */
7724+
as bytes_hash(). */
77257725
static Py_hash_t
77267726
unicode_hash(PyUnicodeObject *self)
77277727
{
77287728
Py_ssize_t len;
77297729
Py_UNICODE *p;
7730-
Py_hash_t x;
7730+
Py_uhash_t x;
77317731

77327732
if (self->hash != -1)
77337733
return self->hash;
77347734
len = Py_SIZE(self);
77357735
p = self->str;
7736-
x = *p << 7;
7736+
x = (Py_uhash_t)*p << 7;
77377737
while (--len >= 0)
7738-
x = (1000003*x) ^ *p++;
7739-
x ^= Py_SIZE(self);
7738+
x = (1000003U*x) ^ (Py_uhash_t)*p++;
7739+
x ^= (Py_uhash_t)Py_SIZE(self);
77407740
if (x == -1)
77417741
x = -2;
77427742
self->hash = x;

0 commit comments

Comments
 (0)