Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit f4934ea

Browse files
Issue #28701: Replace PyUnicode_CompareWithASCIIString with _PyUnicode_EqualToASCIIString.
The latter function is more readable, faster and doesn't raise exceptions.
1 parent 5ebff7b commit f4934ea

21 files changed

+120
-75
lines changed

Doc/c-api/unicode.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1638,6 +1638,9 @@ They all return *NULL* or ``-1`` if an exception occurs.
16381638
Compare two strings and return ``-1``, ``0``, ``1`` for less than, equal, and greater than,
16391639
respectively.
16401640
1641+
This function returns ``-1`` upon failure, so one should call
1642+
:c:func:`PyErr_Occurred` to check for errors.
1643+
16411644
16421645
.. c:function:: int PyUnicode_CompareWithASCIIString(PyObject *uni, const char *string)
16431646
@@ -1646,6 +1649,9 @@ They all return *NULL* or ``-1`` if an exception occurs.
16461649
ASCII-encoded strings, but the function interprets the input string as
16471650
ISO-8859-1 if it contains non-ASCII characters.
16481651
1652+
This function returns ``-1`` upon failure, so one should call
1653+
:c:func:`PyErr_Occurred` to check for errors.
1654+
16491655
16501656
.. c:function:: PyObject* PyUnicode_RichCompare(PyObject *left, PyObject *right, int op)
16511657

Include/unicodeobject.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2011,6 +2011,17 @@ PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
20112011
const char *right /* ASCII-encoded string */
20122012
);
20132013

2014+
#ifndef Py_LIMITED_API
2015+
/* Test whether a unicode is equal to ASCII string. Return 1 if true,
2016+
0 otherwise. Return 0 if any argument contains non-ASCII characters.
2017+
Any error occurs inside will be cleared before return. */
2018+
2019+
PyAPI_FUNC(int) _PyUnicode_EqualToASCIIString(
2020+
PyObject *left,
2021+
const char *right /* ASCII-encoded string */
2022+
);
2023+
#endif
2024+
20142025
/* Rich compare two strings and return one of the following:
20152026
20162027
- NULL in case an exception was raised

Modules/_decimal/_decimal.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1119,12 +1119,12 @@ context_getattr(PyObject *self, PyObject *name)
11191119
PyObject *retval;
11201120

11211121
if (PyUnicode_Check(name)) {
1122-
if (PyUnicode_CompareWithASCIIString(name, "traps") == 0) {
1122+
if (_PyUnicode_EqualToASCIIString(name, "traps")) {
11231123
retval = ((PyDecContextObject *)self)->traps;
11241124
Py_INCREF(retval);
11251125
return retval;
11261126
}
1127-
if (PyUnicode_CompareWithASCIIString(name, "flags") == 0) {
1127+
if (_PyUnicode_EqualToASCIIString(name, "flags")) {
11281128
retval = ((PyDecContextObject *)self)->flags;
11291129
Py_INCREF(retval);
11301130
return retval;
@@ -1144,10 +1144,10 @@ context_setattr(PyObject *self, PyObject *name, PyObject *value)
11441144
}
11451145

11461146
if (PyUnicode_Check(name)) {
1147-
if (PyUnicode_CompareWithASCIIString(name, "traps") == 0) {
1147+
if (_PyUnicode_EqualToASCIIString(name, "traps")) {
11481148
return context_settraps_dict(self, value);
11491149
}
1150-
if (PyUnicode_CompareWithASCIIString(name, "flags") == 0) {
1150+
if (_PyUnicode_EqualToASCIIString(name, "flags")) {
11511151
return context_setstatus_dict(self, value);
11521152
}
11531153
}
@@ -2446,14 +2446,14 @@ dectuple_as_str(PyObject *dectuple)
24462446
tmp = PyTuple_GET_ITEM(dectuple, 2);
24472447
if (PyUnicode_Check(tmp)) {
24482448
/* special */
2449-
if (PyUnicode_CompareWithASCIIString(tmp, "F") == 0) {
2449+
if (_PyUnicode_EqualToASCIIString(tmp, "F")) {
24502450
strcat(sign_special, "Inf");
24512451
is_infinite = 1;
24522452
}
2453-
else if (PyUnicode_CompareWithASCIIString(tmp, "n") == 0) {
2453+
else if (_PyUnicode_EqualToASCIIString(tmp, "n")) {
24542454
strcat(sign_special, "NaN");
24552455
}
2456-
else if (PyUnicode_CompareWithASCIIString(tmp, "N") == 0) {
2456+
else if (_PyUnicode_EqualToASCIIString(tmp, "N")) {
24572457
strcat(sign_special, "sNaN");
24582458
}
24592459
else {

Modules/_elementtree.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3686,11 +3686,11 @@ xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
36863686
{
36873687
if (PyUnicode_Check(nameobj)) {
36883688
PyObject* res;
3689-
if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0)
3689+
if (_PyUnicode_EqualToASCIIString(nameobj, "entity"))
36903690
res = self->entity;
3691-
else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0)
3691+
else if (_PyUnicode_EqualToASCIIString(nameobj, "target"))
36923692
res = self->target;
3693-
else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) {
3693+
else if (_PyUnicode_EqualToASCIIString(nameobj, "version")) {
36943694
return PyUnicode_FromFormat(
36953695
"Expat %d.%d.%d", XML_MAJOR_VERSION,
36963696
XML_MINOR_VERSION, XML_MICRO_VERSION);

Modules/_io/textio.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1023,7 +1023,7 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
10231023
else if (PyUnicode_Check(res)) {
10241024
encodefuncentry *e = encodefuncs;
10251025
while (e->name != NULL) {
1026-
if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
1026+
if (_PyUnicode_EqualToASCIIString(res, e->name)) {
10271027
self->encodefunc = e->encodefunc;
10281028
break;
10291029
}

Modules/_lsprof.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ normalizeUserObj(PyObject *obj)
185185
}
186186
}
187187
if (modname != NULL) {
188-
if (PyUnicode_CompareWithASCIIString(modname, "builtins") != 0) {
188+
if (!_PyUnicode_EqualToASCIIString(modname, "builtins")) {
189189
PyObject *result;
190190
result = PyUnicode_FromFormat("<%U.%s>", modname,
191191
fn->m_ml->ml_name);

Modules/_pickle.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1675,7 +1675,7 @@ whichmodule(PyObject *global, PyObject *dotted_path)
16751675
while (PyDict_Next(modules_dict, &i, &module_name, &module)) {
16761676
PyObject *candidate;
16771677
if (PyUnicode_Check(module_name) &&
1678-
!PyUnicode_CompareWithASCIIString(module_name, "__main__"))
1678+
_PyUnicode_EqualToASCIIString(module_name, "__main__"))
16791679
continue;
16801680
if (module == Py_None)
16811681
continue;

Modules/_sqlite/connection.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1216,7 +1216,7 @@ static int pysqlite_connection_set_isolation_level(pysqlite_Connection* self, Py
12161216
return -1;
12171217
}
12181218
for (candidate = begin_statements; *candidate; candidate++) {
1219-
if (!PyUnicode_CompareWithASCIIString(uppercase_level, *candidate + 6))
1219+
if (_PyUnicode_EqualToASCIIString(uppercase_level, *candidate + 6))
12201220
break;
12211221
}
12221222
Py_DECREF(uppercase_level);

Modules/_testcapimodule.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2310,7 +2310,7 @@ test_string_from_format(PyObject *self, PyObject *args)
23102310
result = PyUnicode_FromFormat(FORMAT, (TYPE)1); \
23112311
if (result == NULL) \
23122312
return NULL; \
2313-
if (PyUnicode_CompareWithASCIIString(result, "1")) { \
2313+
if (!_PyUnicode_EqualToASCIIString(result, "1")) { \
23142314
msg = FORMAT " failed at 1"; \
23152315
goto Fail; \
23162316
} \

Modules/pyexpat.c

Lines changed: 21 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1247,8 +1247,7 @@ handlername2int(PyObject *name)
12471247
{
12481248
int i;
12491249
for (i = 0; handler_info[i].name != NULL; i++) {
1250-
if (PyUnicode_CompareWithASCIIString(
1251-
name, handler_info[i].name) == 0) {
1250+
if (_PyUnicode_EqualToASCIIString(name, handler_info[i].name)) {
12521251
return i;
12531252
}
12541253
}
@@ -1286,45 +1285,45 @@ xmlparse_getattro(xmlparseobject *self, PyObject *nameobj)
12861285

12871286
first_char = PyUnicode_READ_CHAR(nameobj, 0);
12881287
if (first_char == 'E') {
1289-
if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorCode") == 0)
1288+
if (_PyUnicode_EqualToASCIIString(nameobj, "ErrorCode"))
12901289
return PyLong_FromLong((long)
12911290
XML_GetErrorCode(self->itself));
1292-
if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorLineNumber") == 0)
1291+
if (_PyUnicode_EqualToASCIIString(nameobj, "ErrorLineNumber"))
12931292
return PyLong_FromLong((long)
12941293
XML_GetErrorLineNumber(self->itself));
1295-
if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorColumnNumber") == 0)
1294+
if (_PyUnicode_EqualToASCIIString(nameobj, "ErrorColumnNumber"))
12961295
return PyLong_FromLong((long)
12971296
XML_GetErrorColumnNumber(self->itself));
1298-
if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorByteIndex") == 0)
1297+
if (_PyUnicode_EqualToASCIIString(nameobj, "ErrorByteIndex"))
12991298
return PyLong_FromLong((long)
13001299
XML_GetErrorByteIndex(self->itself));
13011300
}
13021301
if (first_char == 'C') {
1303-
if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentLineNumber") == 0)
1302+
if (_PyUnicode_EqualToASCIIString(nameobj, "CurrentLineNumber"))
13041303
return PyLong_FromLong((long)
13051304
XML_GetCurrentLineNumber(self->itself));
1306-
if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentColumnNumber") == 0)
1305+
if (_PyUnicode_EqualToASCIIString(nameobj, "CurrentColumnNumber"))
13071306
return PyLong_FromLong((long)
13081307
XML_GetCurrentColumnNumber(self->itself));
1309-
if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentByteIndex") == 0)
1308+
if (_PyUnicode_EqualToASCIIString(nameobj, "CurrentByteIndex"))
13101309
return PyLong_FromLong((long)
13111310
XML_GetCurrentByteIndex(self->itself));
13121311
}
13131312
if (first_char == 'b') {
1314-
if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_size") == 0)
1313+
if (_PyUnicode_EqualToASCIIString(nameobj, "buffer_size"))
13151314
return PyLong_FromLong((long) self->buffer_size);
1316-
if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_text") == 0)
1315+
if (_PyUnicode_EqualToASCIIString(nameobj, "buffer_text"))
13171316
return get_pybool(self->buffer != NULL);
1318-
if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_used") == 0)
1317+
if (_PyUnicode_EqualToASCIIString(nameobj, "buffer_used"))
13191318
return PyLong_FromLong((long) self->buffer_used);
13201319
}
1321-
if (PyUnicode_CompareWithASCIIString(nameobj, "namespace_prefixes") == 0)
1320+
if (_PyUnicode_EqualToASCIIString(nameobj, "namespace_prefixes"))
13221321
return get_pybool(self->ns_prefixes);
1323-
if (PyUnicode_CompareWithASCIIString(nameobj, "ordered_attributes") == 0)
1322+
if (_PyUnicode_EqualToASCIIString(nameobj, "ordered_attributes"))
13241323
return get_pybool(self->ordered_attributes);
1325-
if (PyUnicode_CompareWithASCIIString(nameobj, "specified_attributes") == 0)
1324+
if (_PyUnicode_EqualToASCIIString(nameobj, "specified_attributes"))
13261325
return get_pybool((long) self->specified_attributes);
1327-
if (PyUnicode_CompareWithASCIIString(nameobj, "intern") == 0) {
1326+
if (_PyUnicode_EqualToASCIIString(nameobj, "intern")) {
13281327
if (self->intern == NULL) {
13291328
Py_INCREF(Py_None);
13301329
return Py_None;
@@ -1388,7 +1387,7 @@ xmlparse_setattro(xmlparseobject *self, PyObject *name, PyObject *v)
13881387
PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
13891388
return -1;
13901389
}
1391-
if (PyUnicode_CompareWithASCIIString(name, "buffer_text") == 0) {
1390+
if (_PyUnicode_EqualToASCIIString(name, "buffer_text")) {
13921391
int b = PyObject_IsTrue(v);
13931392
if (b < 0)
13941393
return -1;
@@ -1410,30 +1409,30 @@ xmlparse_setattro(xmlparseobject *self, PyObject *name, PyObject *v)
14101409
}
14111410
return 0;
14121411
}
1413-
if (PyUnicode_CompareWithASCIIString(name, "namespace_prefixes") == 0) {
1412+
if (_PyUnicode_EqualToASCIIString(name, "namespace_prefixes")) {
14141413
int b = PyObject_IsTrue(v);
14151414
if (b < 0)
14161415
return -1;
14171416
self->ns_prefixes = b;
14181417
XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
14191418
return 0;
14201419
}
1421-
if (PyUnicode_CompareWithASCIIString(name, "ordered_attributes") == 0) {
1420+
if (_PyUnicode_EqualToASCIIString(name, "ordered_attributes")) {
14221421
int b = PyObject_IsTrue(v);
14231422
if (b < 0)
14241423
return -1;
14251424
self->ordered_attributes = b;
14261425
return 0;
14271426
}
1428-
if (PyUnicode_CompareWithASCIIString(name, "specified_attributes") == 0) {
1427+
if (_PyUnicode_EqualToASCIIString(name, "specified_attributes")) {
14291428
int b = PyObject_IsTrue(v);
14301429
if (b < 0)
14311430
return -1;
14321431
self->specified_attributes = b;
14331432
return 0;
14341433
}
14351434

1436-
if (PyUnicode_CompareWithASCIIString(name, "buffer_size") == 0) {
1435+
if (_PyUnicode_EqualToASCIIString(name, "buffer_size")) {
14371436
long new_buffer_size;
14381437
if (!PyLong_Check(v)) {
14391438
PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
@@ -1479,7 +1478,7 @@ xmlparse_setattro(xmlparseobject *self, PyObject *name, PyObject *v)
14791478
return 0;
14801479
}
14811480

1482-
if (PyUnicode_CompareWithASCIIString(name, "CharacterDataHandler") == 0) {
1481+
if (_PyUnicode_EqualToASCIIString(name, "CharacterDataHandler")) {
14831482
/* If we're changing the character data handler, flush all
14841483
* cached data with the old handler. Not sure there's a
14851484
* "right" thing to do, though, but this probably won't

Objects/longobject.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4936,9 +4936,9 @@ long_to_bytes(PyLongObject *v, PyObject *args, PyObject *kwds)
49364936
return NULL;
49374937
}
49384938

4939-
if (!PyUnicode_CompareWithASCIIString(byteorder_str, "little"))
4939+
if (_PyUnicode_EqualToASCIIString(byteorder_str, "little"))
49404940
little_endian = 1;
4941-
else if (!PyUnicode_CompareWithASCIIString(byteorder_str, "big"))
4941+
else if (_PyUnicode_EqualToASCIIString(byteorder_str, "big"))
49424942
little_endian = 0;
49434943
else {
49444944
PyErr_SetString(PyExc_ValueError,
@@ -5019,9 +5019,9 @@ long_from_bytes(PyTypeObject *type, PyObject *args, PyObject *kwds)
50195019
return NULL;
50205020
}
50215021

5022-
if (!PyUnicode_CompareWithASCIIString(byteorder_str, "little"))
5022+
if (_PyUnicode_EqualToASCIIString(byteorder_str, "little"))
50235023
little_endian = 1;
5024-
else if (!PyUnicode_CompareWithASCIIString(byteorder_str, "big"))
5024+
else if (_PyUnicode_EqualToASCIIString(byteorder_str, "big"))
50255025
little_endian = 0;
50265026
else {
50275027
PyErr_SetString(PyExc_ValueError,

Objects/moduleobject.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -587,7 +587,7 @@ _PyModule_ClearDict(PyObject *d)
587587
while (PyDict_Next(d, &pos, &key, &value)) {
588588
if (value != Py_None && PyUnicode_Check(key)) {
589589
if (PyUnicode_READ_CHAR(key, 0) != '_' ||
590-
PyUnicode_CompareWithASCIIString(key, "__builtins__") != 0)
590+
!_PyUnicode_EqualToASCIIString(key, "__builtins__"))
591591
{
592592
if (Py_VerboseFlag > 1) {
593593
const char *s = _PyUnicode_AsString(key);

Objects/typeobject.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2395,7 +2395,7 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds)
23952395
}
23962396
add_dict++;
23972397
}
2398-
if (PyUnicode_CompareWithASCIIString(tmp, "__weakref__") == 0) {
2398+
if (_PyUnicode_EqualToASCIIString(tmp, "__weakref__")) {
23992399
if (!may_add_weak || add_weak) {
24002400
PyErr_SetString(PyExc_TypeError,
24012401
"__weakref__ slot disallowed: "
@@ -2419,7 +2419,7 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds)
24192419
if ((add_dict &&
24202420
_PyUnicode_CompareWithId(tmp, &PyId___dict__) == 0) ||
24212421
(add_weak &&
2422-
PyUnicode_CompareWithASCIIString(tmp, "__weakref__") == 0))
2422+
_PyUnicode_EqualToASCIIString(tmp, "__weakref__")))
24232423
continue;
24242424
tmp =_Py_Mangle(name, tmp);
24252425
if (!tmp) {

Objects/unicodeobject.c

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10834,6 +10834,41 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str)
1083410834
}
1083510835
}
1083610836

10837+
static int
10838+
non_ready_unicode_equal_to_ascii_string(PyObject *unicode, const char *str)
10839+
{
10840+
size_t i, len;
10841+
const wchar_t *p;
10842+
len = (size_t)_PyUnicode_WSTR_LENGTH(unicode);
10843+
if (strlen(str) != len)
10844+
return 0;
10845+
p = _PyUnicode_WSTR(unicode);
10846+
assert(p);
10847+
for (i = 0; i < len; i++) {
10848+
unsigned char c = (unsigned char)str[i];
10849+
if (c > 128 || p[i] != (wchar_t)c)
10850+
return 0;
10851+
}
10852+
return 1;
10853+
}
10854+
10855+
int
10856+
_PyUnicode_EqualToASCIIString(PyObject *unicode, const char *str)
10857+
{
10858+
size_t len;
10859+
assert(_PyUnicode_CHECK(unicode));
10860+
if (PyUnicode_READY(unicode) == -1) {
10861+
/* Memory error or bad data */
10862+
PyErr_Clear();
10863+
return non_ready_unicode_equal_to_ascii_string(unicode, str);
10864+
}
10865+
if (!PyUnicode_IS_ASCII(unicode))
10866+
return 0;
10867+
len = (size_t)PyUnicode_GET_LENGTH(unicode);
10868+
return strlen(str) == len &&
10869+
memcmp(PyUnicode_1BYTE_DATA(unicode), str, len) == 0;
10870+
}
10871+
1083710872

1083810873
#define TEST_COND(cond) \
1083910874
((cond) ? Py_True : Py_False)

0 commit comments

Comments
 (0)