Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit f0b934b

Browse files
committed
Reuse the stringlib in findchar(), and make its signature more convenient
1 parent c198d05 commit f0b934b

1 file changed

Lines changed: 41 additions & 39 deletions

File tree

Objects/unicodeobject.c

Lines changed: 41 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -519,36 +519,45 @@ make_bloom_mask(int kind, void* ptr, Py_ssize_t len)
519519
#include "stringlib/localeutil.h"
520520
#include "stringlib/undef.h"
521521

522+
#include "stringlib/unicodedefs.h"
523+
#include "stringlib/fastsearch.h"
524+
#include "stringlib/count.h"
525+
#include "stringlib/find.h"
526+
522527
/* --- Unicode Object ----------------------------------------------------- */
523528

524529
static PyObject *
525530
fixup(PyObject *self, Py_UCS4 (*fixfct)(PyObject *s));
526531

527-
Py_LOCAL_INLINE(char *) findchar(void *s, int kind,
528-
Py_ssize_t size, Py_UCS4 ch,
529-
int direction)
532+
Py_LOCAL_INLINE(Py_ssize_t) findchar(void *s, int kind,
533+
Py_ssize_t size, Py_UCS4 ch,
534+
int direction)
530535
{
531-
/* like wcschr, but doesn't stop at NULL characters */
532-
Py_ssize_t i;
533-
if (kind == 1) {
534-
if (direction == 1)
535-
return memchr(s, ch, size);
536-
#ifdef HAVE_MEMRCHR
537-
else
538-
return memrchr(s, ch, size);
539-
#endif
540-
}
541-
if (direction == 1) {
542-
for(i = 0; i < size; i++)
543-
if (PyUnicode_READ(kind, s, i) == ch)
544-
return (char*)s + kind * i;
545-
}
546-
else {
547-
for(i = size-1; i >= 0; i--)
548-
if (PyUnicode_READ(kind, s, i) == ch)
549-
return (char*)s + kind * i;
536+
int mode = (direction == 1) ? FAST_SEARCH : FAST_RSEARCH;
537+
538+
switch (kind) {
539+
case PyUnicode_1BYTE_KIND:
540+
{
541+
Py_UCS1 ch1 = (Py_UCS1) ch;
542+
if (ch1 == ch)
543+
return ucs1lib_fastsearch((Py_UCS1 *) s, size, &ch1, 1, 0, mode);
544+
else
545+
return -1;
546+
}
547+
case PyUnicode_2BYTE_KIND:
548+
{
549+
Py_UCS2 ch2 = (Py_UCS2) ch;
550+
if (ch2 == ch)
551+
return ucs2lib_fastsearch((Py_UCS2 *) s, size, &ch2, 1, 0, mode);
552+
else
553+
return -1;
554+
}
555+
case PyUnicode_4BYTE_KIND:
556+
return ucs4lib_fastsearch((Py_UCS4 *) s, size, &ch, 1, 0, mode);
557+
default:
558+
assert(0);
559+
return -1;
550560
}
551-
return NULL;
552561
}
553562

554563
static PyObject*
@@ -3311,7 +3320,7 @@ PyUnicode_FSDecoder(PyObject* arg, void* addr)
33113320
}
33123321
}
33133322
if (findchar(PyUnicode_DATA(output), PyUnicode_KIND(output),
3314-
PyUnicode_GET_LENGTH(output), 0, 1)) {
3323+
PyUnicode_GET_LENGTH(output), 0, 1) >= 0) {
33153324
PyErr_SetString(PyExc_TypeError, "embedded NUL character");
33163325
Py_DECREF(output);
33173326
return 0;
@@ -8638,12 +8647,6 @@ _PyUnicode_InsertThousandsGrouping(PyObject *unicode, int kind, void *data,
86388647
}
86398648

86408649

8641-
#include "stringlib/unicodedefs.h"
8642-
#include "stringlib/fastsearch.h"
8643-
8644-
#include "stringlib/count.h"
8645-
#include "stringlib/find.h"
8646-
86478650
/* helper macro to fixup start/end slice values */
86488651
#define ADJUST_INDICES(start, end, len) \
86498652
if (end > len) \
@@ -8779,8 +8782,8 @@ PyUnicode_FindChar(PyObject *str, Py_UCS4 ch,
87798782
Py_ssize_t start, Py_ssize_t end,
87808783
int direction)
87818784
{
8782-
char *result;
87838785
int kind;
8786+
Py_ssize_t result;
87848787
if (PyUnicode_READY(str) == -1)
87858788
return -2;
87868789
if (start < 0 || end < 0) {
@@ -8790,13 +8793,12 @@ PyUnicode_FindChar(PyObject *str, Py_UCS4 ch,
87908793
if (end > PyUnicode_GET_LENGTH(str))
87918794
end = PyUnicode_GET_LENGTH(str);
87928795
kind = PyUnicode_KIND(str);
8793-
result = findchar(PyUnicode_1BYTE_DATA(str)
8794-
+ kind*start,
8795-
kind,
8796-
end-start, ch, direction);
8797-
if (!result)
8796+
result = findchar(PyUnicode_1BYTE_DATA(str) + kind*start,
8797+
kind, end-start, ch, direction);
8798+
if (result == -1)
87988799
return -1;
8799-
return (result-(char*)PyUnicode_DATA(str)) >> (kind-1);
8800+
else
8801+
return start + result;
88008802
}
88018803

88028804
static int
@@ -9707,8 +9709,8 @@ replace(PyObject *self, PyObject *str1,
97079709
Py_UCS4 u1, u2;
97089710
int rkind;
97099711
u1 = PyUnicode_READ_CHAR(str1, 0);
9710-
if (!findchar(sbuf, PyUnicode_KIND(self),
9711-
slen, u1, 1))
9712+
if (findchar(sbuf, PyUnicode_KIND(self),
9713+
slen, u1, 1) < 0)
97129714
goto nothing;
97139715
u2 = PyUnicode_READ_CHAR(str2, 0);
97149716
u = PyUnicode_New(slen, maxchar);

0 commit comments

Comments
 (0)