Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit b211068

Browse files
committed
Issue #28822: Adjust indices handling of PyUnicode_FindChar().
1 parent 38f225d commit b211068

5 files changed

Lines changed: 55 additions & 8 deletions

File tree

Doc/c-api/unicode.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1625,6 +1625,9 @@ They all return *NULL* or ``-1`` if an exception occurs.
16251625
16261626
.. versionadded:: 3.3
16271627
1628+
.. versionchanged:: 3.7
1629+
*start* and *end* are now adjusted to behave like ``str[start:end]``.
1630+
16281631
16291632
.. c:function:: Py_ssize_t PyUnicode_Count(PyObject *str, PyObject *substr, \
16301633
Py_ssize_t start, Py_ssize_t end)

Lib/test/test_unicode.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2728,6 +2728,29 @@ def test_asucs4(self):
27282728
self.assertEqual(unicode_asucs4(s, len(s), 1), s+'\0')
27292729
self.assertEqual(unicode_asucs4(s, len(s), 0), s+'\uffff')
27302730

2731+
# Test PyUnicode_FindChar()
2732+
@support.cpython_only
2733+
def test_findchar(self):
2734+
from _testcapi import unicode_findchar
2735+
2736+
for str in "\xa1", "\u8000\u8080", "\ud800\udc02", "\U0001f100\U0001f1f1":
2737+
for i, ch in enumerate(str):
2738+
self.assertEqual(unicode_findchar(str, ord(ch), 0, len(str), 1), i)
2739+
self.assertEqual(unicode_findchar(str, ord(ch), 0, len(str), -1), i)
2740+
2741+
str = "!>_<!"
2742+
self.assertEqual(unicode_findchar(str, 0x110000, 0, len(str), 1), -1)
2743+
self.assertEqual(unicode_findchar(str, 0x110000, 0, len(str), -1), -1)
2744+
# start < end
2745+
self.assertEqual(unicode_findchar(str, ord('!'), 1, len(str)+1, 1), 4)
2746+
self.assertEqual(unicode_findchar(str, ord('!'), 1, len(str)+1, -1), 4)
2747+
# start >= end
2748+
self.assertEqual(unicode_findchar(str, ord('!'), 0, 0, 1), -1)
2749+
self.assertEqual(unicode_findchar(str, ord('!'), len(str), 0, 1), -1)
2750+
# negative
2751+
self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, 1), 0)
2752+
self.assertEqual(unicode_findchar(str, ord('!'), -len(str), -1, -1), 0)
2753+
27312754
# Test PyUnicode_CopyCharacters()
27322755
@support.cpython_only
27332756
def test_copycharacters(self):

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,9 @@ Windows
545545
C API
546546
-----
547547

548+
- Issue #28822: The indices parameters *start* and *end* of PyUnicode_FindChar()
549+
are now adjusted to behave like ``str[start:end]``.
550+
548551
- Issue #28808: PyUnicode_CompareWithASCIIString() now never raises exceptions.
549552

550553
- Issue #28761: The fields name and doc of structures PyMemberDef, PyGetSetDef,

Modules/_testcapimodule.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1887,6 +1887,27 @@ unicode_asucs4(PyObject *self, PyObject *args)
18871887
return result;
18881888
}
18891889

1890+
static PyObject *
1891+
unicode_findchar(PyObject *self, PyObject *args)
1892+
{
1893+
PyObject *str;
1894+
int direction;
1895+
unsigned int ch;
1896+
Py_ssize_t result;
1897+
Py_ssize_t start, end;
1898+
1899+
if (!PyArg_ParseTuple(args, "UInni:unicode_findchar", &str, &ch,
1900+
&start, &end, &direction)) {
1901+
return NULL;
1902+
}
1903+
1904+
result = PyUnicode_FindChar(str, (Py_UCS4)ch, start, end, direction);
1905+
if (result == -2)
1906+
return NULL;
1907+
else
1908+
return PyLong_FromSsize_t(result);
1909+
}
1910+
18901911
static PyObject *
18911912
unicode_copycharacters(PyObject *self, PyObject *args)
18921913
{
@@ -4121,6 +4142,7 @@ static PyMethodDef TestMethods[] = {
41214142
{"unicode_aswidechar", unicode_aswidechar, METH_VARARGS},
41224143
{"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS},
41234144
{"unicode_asucs4", unicode_asucs4, METH_VARARGS},
4145+
{"unicode_findchar", unicode_findchar, METH_VARARGS},
41244146
{"unicode_copycharacters", unicode_copycharacters, METH_VARARGS},
41254147
{"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS},
41264148
{"unicode_transformdecimaltoascii", unicode_transformdecimaltoascii, METH_VARARGS},

Objects/unicodeobject.c

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9461,16 +9461,12 @@ PyUnicode_FindChar(PyObject *str, Py_UCS4 ch,
94619461
int direction)
94629462
{
94639463
int kind;
9464-
Py_ssize_t result;
9464+
Py_ssize_t len, result;
94659465
if (PyUnicode_READY(str) == -1)
94669466
return -2;
9467-
if (start < 0 || end < 0) {
9468-
PyErr_SetString(PyExc_IndexError, "string index out of range");
9469-
return -2;
9470-
}
9471-
if (end > PyUnicode_GET_LENGTH(str))
9472-
end = PyUnicode_GET_LENGTH(str);
9473-
if (start >= end)
9467+
len = PyUnicode_GET_LENGTH(str);
9468+
ADJUST_INDICES(start, end, len);
9469+
if (end - start < 1)
94749470
return -1;
94759471
kind = PyUnicode_KIND(str);
94769472
result = findchar(PyUnicode_1BYTE_DATA(str) + kind*start,

0 commit comments

Comments
 (0)