Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit a35adf5

Browse files
committed
Instead of XORed indicies, switch to a hybrid of linear probing and open addressing.
Modern processors tend to make consecutive memory accesses cheaper than random probes into memory. Small sets can fit into L1 cache, so they get less benefit. But they do come out ahead because the consecutive probes don't probe the same key more than once and because the randomization step occurs less frequently (or not at all). For the open addressing step, putting the perturb shift before the index calculation gets the upper bits into play sooner.
1 parent a661f45 commit a35adf5

1 file changed

Lines changed: 68 additions & 91 deletions

File tree

Objects/setobject.c

Lines changed: 68 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ set_key_error(PyObject *arg)
2727

2828
/* This must be >= 1. */
2929
#define PERTURB_SHIFT 5
30+
#define LINEAR_PROBES 9
3031

3132
/* Object used as dummy key to fill deleted entries */
3233
static PyObject _dummy_struct;
@@ -59,17 +60,17 @@ static int numfree = 0;
5960
/*
6061
The basic lookup function used by all operations.
6162
This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
62-
Open addressing is preferred over chaining since the link overhead for
63-
chaining would be substantial (100% with typical malloc overhead).
6463
65-
The initial probe index is computed as hash mod the table size. Subsequent
66-
probe indices are computed as explained in Objects/dictobject.c.
64+
The initial probe index is computed as hash mod the table size.
65+
Subsequent probe indices are computed as explained in Objects/dictobject.c.
6766
68-
To improve cache locality, each probe inspects nearby entries before
69-
moving on to probes elsewhere in memory. Depending on alignment and the
70-
size of a cache line, the nearby entries are cheaper to inspect than
71-
other probes elsewhere in memory. This probe strategy reduces the cost
72-
of hash collisions.
67+
To improve cache locality, each probe inspects a series of consecutive
68+
nearby entries before moving on to probes elsewhere in memory. This leaves
69+
us with a hybrid of linear probing and open addressing. The linear probing
70+
reduces the cost of hash collisions because consecutive memory accesses
71+
tend to be much cheaper than scattered probes. After LINEAR_PROBES steps,
72+
we then use open addressing with the upper bits from the hash value. This
73+
helps break-up long chains of collisions.
7374
7475
All arithmetic on hash should ignore overflow.
7576
@@ -83,13 +84,14 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
8384
setentry *table = so->table;
8485
setentry *freeslot = NULL;
8586
setentry *entry;
87+
setentry *limit;
8688
size_t perturb = hash;
8789
size_t mask = so->mask;
88-
size_t i = (size_t)hash & mask; /* Unsigned for defined overflow behavior. */
89-
size_t j = i;
90+
size_t i = (size_t)hash; /* Unsigned for defined overflow behavior. */
91+
size_t j;
9092
int cmp;
9193

92-
entry = &table[i];
94+
entry = &table[i & mask];
9395
if (entry->key == NULL)
9496
return entry;
9597

@@ -111,54 +113,37 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
111113
if (entry->key == dummy && freeslot == NULL)
112114
freeslot = entry;
113115

114-
entry = &table[j ^ 1];
115-
if (entry->key == NULL)
116-
break;
117-
if (entry->key == key)
118-
return entry;
119-
if (entry->hash == hash && entry->key != dummy) {
120-
PyObject *startkey = entry->key;
121-
Py_INCREF(startkey);
122-
cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
123-
Py_DECREF(startkey);
124-
if (cmp < 0)
125-
return NULL;
126-
if (table != so->table || entry->key != startkey)
127-
return set_lookkey(so, key, hash);
128-
if (cmp > 0)
129-
return entry;
130-
}
131-
if (entry->key == dummy && freeslot == NULL)
132-
freeslot = entry;
133-
134-
entry = &table[j ^ 2];
135-
if (entry->key == NULL)
136-
break;
137-
if (entry->key == key)
138-
return entry;
139-
if (entry->hash == hash && entry->key != dummy) {
140-
PyObject *startkey = entry->key;
141-
Py_INCREF(startkey);
142-
cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
143-
Py_DECREF(startkey);
144-
if (cmp < 0)
145-
return NULL;
146-
if (table != so->table || entry->key != startkey)
147-
return set_lookkey(so, key, hash);
148-
if (cmp > 0)
116+
limit = &table[mask];
117+
for (j = 0 ; j < LINEAR_PROBES ; j++) {
118+
entry = (entry == limit) ? &table[0] : entry + 1;
119+
if (entry->key == NULL)
120+
goto found_null;
121+
if (entry->key == key)
149122
return entry;
123+
if (entry->hash == hash && entry->key != dummy) {
124+
PyObject *startkey = entry->key;
125+
Py_INCREF(startkey);
126+
cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
127+
Py_DECREF(startkey);
128+
if (cmp < 0)
129+
return NULL;
130+
if (table != so->table || entry->key != startkey)
131+
return set_lookkey(so, key, hash);
132+
if (cmp > 0)
133+
return entry;
134+
}
135+
if (entry->key == dummy && freeslot == NULL)
136+
freeslot = entry;
150137
}
151-
if (entry->key == dummy && freeslot == NULL)
152-
freeslot = entry;
153138

154-
i = i * 5 + perturb + 1;
155-
j = i & mask;
156139
perturb >>= PERTURB_SHIFT;
140+
i = i * 5 + perturb + 1;
157141

158-
entry = &table[j];
142+
entry = &table[i & mask];
159143
if (entry->key == NULL)
160144
break;
161145
}
146+
found_null:
162147
return freeslot == NULL ? entry : freeslot;
163148
}
164149

@@ -173,10 +158,11 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash)
173158
setentry *table = so->table;
174159
setentry *freeslot = NULL;
175160
setentry *entry;
161+
setentry *limit;
176162
size_t perturb = hash;
177163
size_t mask = so->mask;
178-
size_t i = (size_t)hash & mask;
179-
size_t j = i;
164+
size_t i = (size_t)hash;
165+
size_t j;
180166

181167
/* Make sure this function doesn't have to handle non-unicode keys,
182168
including subclasses of str; e.g., one reason to subclass
@@ -187,7 +173,7 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash)
187173
return set_lookkey(so, key, hash);
188174
}
189175

190-
entry = &table[i];
176+
entry = &table[i & mask];
191177
if (entry->key == NULL)
192178
return entry;
193179

@@ -200,36 +186,28 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash)
200186
if (entry->key == dummy && freeslot == NULL)
201187
freeslot = entry;
202188

203-
entry = &table[j ^ 1];
204-
if (entry->key == NULL)
205-
break;
206-
if (entry->key == key
207-
|| (entry->hash == hash
208-
&& entry->key != dummy
209-
&& unicode_eq(entry->key, key)))
210-
return entry;
211-
if (entry->key == dummy && freeslot == NULL)
212-
freeslot = entry;
213-
214-
entry = &table[j ^ 2];
215-
if (entry->key == NULL)
216-
break;
217-
if (entry->key == key
218-
|| (entry->hash == hash
219-
&& entry->key != dummy
220-
&& unicode_eq(entry->key, key)))
221-
return entry;
222-
if (entry->key == dummy && freeslot == NULL)
223-
freeslot = entry;
189+
limit = &table[mask];
190+
for (j = 0 ; j < LINEAR_PROBES ; j++) {
191+
entry = (entry == limit) ? &table[0] : entry + 1;
192+
if (entry->key == NULL)
193+
goto found_null;
194+
if (entry->key == key
195+
|| (entry->hash == hash
196+
&& entry->key != dummy
197+
&& unicode_eq(entry->key, key)))
198+
return entry;
199+
if (entry->key == dummy && freeslot == NULL)
200+
freeslot = entry;
201+
}
224202

225-
i = i * 5 + perturb + 1;
226-
j = i & mask;
227203
perturb >>= PERTURB_SHIFT;
204+
i = i * 5 + perturb + 1;
228205

229-
entry = &table[j];
206+
entry = &table[i & mask];
230207
if (entry->key == NULL)
231208
break;
232209
}
210+
found_null:
233211
return freeslot == NULL ? entry : freeslot;
234212
}
235213

@@ -280,23 +258,22 @@ set_insert_clean(PySetObject *so, PyObject *key, Py_hash_t hash)
280258
setentry *entry;
281259
size_t perturb = hash;
282260
size_t mask = (size_t)so->mask;
283-
size_t i, j;
261+
size_t i = (size_t)hash;
262+
size_t j;
284263

285-
i = j = (size_t)hash & mask;
286264
while (1) {
287-
entry = &table[j];
265+
entry = &table[i & mask];
288266
if (entry->key == NULL)
289-
break;
290-
entry = &table[j ^ 1];
291-
if (entry->key == NULL)
292-
break;
293-
entry = &table[j ^ 2];
294-
if (entry->key == NULL)
295-
break;
296-
i = i * 5 + perturb + 1;
297-
j = i & mask;
267+
goto found_null;
268+
for (j = 1 ; j <= LINEAR_PROBES ; j++) {
269+
entry = &table[(i + j) & mask];
270+
if (entry->key == NULL)
271+
goto found_null;
272+
}
298273
perturb >>= PERTURB_SHIFT;
274+
i = i * 5 + perturb + 1;
299275
}
276+
found_null:
300277
so->fill++;
301278
entry->key = key;
302279
entry->hash = hash;

0 commit comments

Comments
 (0)