Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 0ded5b5

Browse files
committed
Fixed issue #1564: The set implementation should special-case PyUnicode instead of PyString
I moved the unicode_eq to stringlib/eq.h to keep the function static and possible inline for setobject.c and dictobject.h. I also removed the unused _PyString_Eq method. If it's required in the future it can be added to eq.h
1 parent 1101940 commit 0ded5b5

7 files changed

Lines changed: 85 additions & 45 deletions

File tree

Include/stringobject.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ PyAPI_FUNC(PyObject *) PyString_Repr(PyObject *, int);
5858
PyAPI_FUNC(void) PyString_Concat(PyObject **, PyObject *);
5959
PyAPI_FUNC(void) PyString_ConcatAndDel(PyObject **, PyObject *);
6060
PyAPI_FUNC(int) _PyString_Resize(PyObject **, Py_ssize_t);
61-
PyAPI_FUNC(int) _PyString_Eq(PyObject *, PyObject*);
6261
PyAPI_FUNC(PyObject *) PyString_Format(PyObject *, PyObject *);
6362
PyAPI_FUNC(PyObject *) _PyString_FormatLong(PyObject*, int, int,
6463
int, char**, int*);

Lib/test/test_set.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import os
88
from random import randrange, shuffle
99
import sys
10+
import warnings
1011

1112
class PassThru(Exception):
1213
pass
@@ -817,6 +818,44 @@ def setUp(self):
817818
self.length = 3
818819
self.repr = None
819820

821+
#------------------------------------------------------------------------------
822+
823+
class TestBasicOpsString(TestBasicOps):
824+
def setUp(self):
825+
self.case = "string set"
826+
self.values = ["a", "b", "c"]
827+
self.set = set(self.values)
828+
self.dup = set(self.values)
829+
self.length = 3
830+
self.repr = "{'a', 'c', 'b'}"
831+
832+
#------------------------------------------------------------------------------
833+
834+
class TestBasicOpsBytes(TestBasicOps):
835+
def setUp(self):
836+
self.case = "string set"
837+
self.values = [b"a", b"b", b"c"]
838+
self.set = set(self.values)
839+
self.dup = set(self.values)
840+
self.length = 3
841+
self.repr = "{b'a', b'c', b'b'}"
842+
843+
#------------------------------------------------------------------------------
844+
845+
class TestBasicOpsMixedStringBytes(TestBasicOps):
846+
def setUp(self):
847+
self.warning_filters = warnings.filters[:]
848+
warnings.simplefilter('ignore', BytesWarning)
849+
self.case = "string and bytes set"
850+
self.values = ["a", "b", b"a", b"b"]
851+
self.set = set(self.values)
852+
self.dup = set(self.values)
853+
self.length = 4
854+
self.repr = "{'a', b'a', 'b', b'b'}"
855+
856+
def tearDown(self):
857+
warnings.filters = self.warning_filters
858+
820859
#==============================================================================
821860

822861
def baditer():
@@ -1581,6 +1620,9 @@ def test_main(verbose=None):
15811620
TestBasicOpsSingleton,
15821621
TestBasicOpsTuple,
15831622
TestBasicOpsTriple,
1623+
TestBasicOpsString,
1624+
TestBasicOpsBytes,
1625+
TestBasicOpsMixedStringBytes,
15841626
TestBinaryOps,
15851627
TestUpdateOps,
15861628
TestMutate,

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ Core and Builtins
1414

1515
- Issue #1573: Improper use of the keyword-only syntax makes the parser crash
1616

17+
- Issue #1564: The set implementation should special-case PyUnicode instead
18+
of PyString
19+
1720

1821
Extension Modules
1922
-----------------

Objects/dictobject.c

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
*/
99

1010
#include "Python.h"
11+
#include "stringlib/eq.h"
1112

1213

1314
/* Set a key error with the specified argument, wrapping it in a
@@ -327,25 +328,6 @@ lookdict(PyDictObject *mp, PyObject *key, register long hash)
327328
return 0;
328329
}
329330

330-
/* Return 1 if two unicode objects are equal, 0 if not. */
331-
static int
332-
unicode_eq(PyObject *aa, PyObject *bb)
333-
{
334-
PyUnicodeObject *a = (PyUnicodeObject *)aa;
335-
PyUnicodeObject *b = (PyUnicodeObject *)bb;
336-
337-
if (a->length != b->length)
338-
return 0;
339-
if (a->length == 0)
340-
return 1;
341-
if (a->str[0] != b->str[0])
342-
return 0;
343-
if (a->length == 1)
344-
return 1;
345-
return memcmp(a->str, b->str, a->length * sizeof(Py_UNICODE)) == 0;
346-
}
347-
348-
349331
/*
350332
* Hacked up version of lookdict which can assume keys are always
351333
* unicodes; this assumption allows testing for errors during

Objects/setobject.c

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
#include "Python.h"
1111
#include "structmember.h"
12+
#include "stringlib/eq.h"
1213

1314
/* Set a key error with the specified argument, wrapping it in a
1415
* tuple automatically so that tuple keys are not unpacked as the
@@ -55,6 +56,7 @@ _PySet_Dummy(void)
5556
static PySetObject *free_sets[MAXFREESETS];
5657
static int num_free_sets = 0;
5758

59+
5860
/*
5961
The basic lookup function used by all operations.
6062
This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
@@ -144,12 +146,12 @@ set_lookkey(PySetObject *so, PyObject *key, register long hash)
144146
}
145147

146148
/*
147-
* Hacked up version of set_lookkey which can assume keys are always strings;
148-
* This means we can always use _PyString_Eq directly and not have to check to
149+
* Hacked up version of set_lookkey which can assume keys are always unicode;
150+
* This means we can always use unicode_eq directly and not have to check to
149151
* see if the comparison altered the table.
150152
*/
151153
static setentry *
152-
set_lookkey_string(PySetObject *so, PyObject *key, register long hash)
154+
set_lookkey_unicode(PySetObject *so, PyObject *key, register long hash)
153155
{
154156
register Py_ssize_t i;
155157
register size_t perturb;
@@ -158,11 +160,11 @@ set_lookkey_string(PySetObject *so, PyObject *key, register long hash)
158160
setentry *table = so->table;
159161
register setentry *entry;
160162

161-
/* Make sure this function doesn't have to handle non-string keys,
163+
/* Make sure this function doesn't have to handle non-unicode keys,
162164
including subclasses of str; e.g., one reason to subclass
163165
strings is to override __eq__, and for speed we don't cater to
164166
that here. */
165-
if (!PyString_CheckExact(key)) {
167+
if (!PyUnicode_CheckExact(key)) {
166168
so->lookup = set_lookkey;
167169
return set_lookkey(so, key, hash);
168170
}
@@ -173,7 +175,7 @@ set_lookkey_string(PySetObject *so, PyObject *key, register long hash)
173175
if (entry->key == dummy)
174176
freeslot = entry;
175177
else {
176-
if (entry->hash == hash && _PyString_Eq(entry->key, key))
178+
if (entry->hash == hash && unicode_eq(entry->key, key))
177179
return entry;
178180
freeslot = NULL;
179181
}
@@ -188,7 +190,7 @@ set_lookkey_string(PySetObject *so, PyObject *key, register long hash)
188190
if (entry->key == key
189191
|| (entry->hash == hash
190192
&& entry->key != dummy
191-
&& _PyString_Eq(entry->key, key)))
193+
&& unicode_eq(entry->key, key)))
192194
return entry;
193195
if (entry->key == dummy && freeslot == NULL)
194196
freeslot = entry;
@@ -375,8 +377,8 @@ set_add_key(register PySetObject *so, PyObject *key)
375377
register long hash;
376378
register Py_ssize_t n_used;
377379

378-
if (!PyString_CheckExact(key) ||
379-
(hash = ((PyStringObject *) key)->ob_shash) == -1) {
380+
if (!PyUnicode_CheckExact(key) ||
381+
(hash = ((PyUnicodeObject *) key)->hash) == -1) {
380382
hash = PyObject_Hash(key);
381383
if (hash == -1)
382384
return -1;
@@ -422,8 +424,9 @@ set_discard_key(PySetObject *so, PyObject *key)
422424
PyObject *old_key;
423425

424426
assert (PyAnySet_Check(so));
425-
if (!PyString_CheckExact(key) ||
426-
(hash = ((PyStringObject *) key)->ob_shash) == -1) {
427+
428+
if (!PyUnicode_CheckExact(key) ||
429+
(hash = ((PyUnicodeObject *) key)->hash) == -1) {
427430
hash = PyObject_Hash(key);
428431
if (hash == -1)
429432
return -1;
@@ -668,8 +671,8 @@ set_contains_key(PySetObject *so, PyObject *key)
668671
long hash;
669672
setentry *entry;
670673

671-
if (!PyString_CheckExact(key) ||
672-
(hash = ((PyStringObject *) key)->ob_shash) == -1) {
674+
if (!PyUnicode_CheckExact(key) ||
675+
(hash = ((PyUnicodeObject *) key)->hash) == -1) {
673676
hash = PyObject_Hash(key);
674677
if (hash == -1)
675678
return -1;
@@ -989,7 +992,7 @@ make_new_set(PyTypeObject *type, PyObject *iterable)
989992
INIT_NONZERO_SET_SLOTS(so);
990993
}
991994

992-
so->lookup = set_lookkey_string;
995+
so->lookup = set_lookkey_unicode;
993996
so->weakreflist = NULL;
994997

995998
if (iterable != NULL) {
@@ -1352,7 +1355,7 @@ set_isdisjoint(PySetObject *so, PyObject *other)
13521355
while ((key = PyIter_Next(it)) != NULL) {
13531356
int rv;
13541357
setentry entry;
1355-
long hash = PyObject_Hash(key);
1358+
long hash = PyObject_Hash(key);;
13561359

13571360
if (hash == -1) {
13581361
Py_DECREF(key);

Objects/stringlib/eq.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
/* Fast unicode equal function optimized for dictobject.c and setobject.c */
2+
3+
/* Return 1 if two unicode objects are equal, 0 if not.
4+
* unicode_eq() is called when the hash of two unicode objects is equal.
5+
*/
6+
Py_LOCAL_INLINE(int)
7+
unicode_eq(PyObject *aa, PyObject *bb)
8+
{
9+
register PyUnicodeObject *a = (PyUnicodeObject *)aa;
10+
register PyUnicodeObject *b = (PyUnicodeObject *)bb;
11+
12+
if (a->length != b->length)
13+
return 0;
14+
if (a->length == 0)
15+
return 1;
16+
if (a->str[0] != b->str[0])
17+
return 0;
18+
if (a->length == 1)
19+
return 1;
20+
return memcmp(a->str, b->str, a->length * sizeof(Py_UNICODE)) == 0;
21+
}

Objects/stringobject.c

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -877,16 +877,6 @@ string_richcompare(PyStringObject *a, PyStringObject *b, int op)
877877
return result;
878878
}
879879

880-
int
881-
_PyString_Eq(PyObject *o1, PyObject *o2)
882-
{
883-
PyStringObject *a = (PyStringObject*) o1;
884-
PyStringObject *b = (PyStringObject*) o2;
885-
return Py_Size(a) == Py_Size(b)
886-
&& *a->ob_sval == *b->ob_sval
887-
&& memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0;
888-
}
889-
890880
static long
891881
string_hash(PyStringObject *a)
892882
{

0 commit comments

Comments
 (0)