Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 413fdce

Browse files
Issue #24821: Refactor STRINGLIB(fastsearch_memchr_1char) and split it on
STRINGLIB(find_char) and STRINGLIB(rfind_char) that can be used independedly without special preconditions.
1 parent 0304729 commit 413fdce

4 files changed

Lines changed: 121 additions & 100 deletions

File tree

Objects/bytearrayobject.c

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1159,16 +1159,15 @@ bytearray_find_internal(PyByteArrayObject *self, PyObject *args, int dir)
11591159
ADJUST_INDICES(start, end, len);
11601160
if (end - start < sub_len)
11611161
res = -1;
1162-
else if (sub_len == 1
1163-
#ifndef HAVE_MEMRCHR
1164-
&& dir > 0
1165-
#endif
1166-
) {
1167-
unsigned char needle = *sub;
1168-
int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
1169-
res = stringlib_fastsearch_memchr_1char(
1170-
PyByteArray_AS_STRING(self) + start, end - start,
1171-
needle, needle, mode);
1162+
else if (sub_len == 1) {
1163+
if (dir > 0)
1164+
res = stringlib_find_char(
1165+
PyByteArray_AS_STRING(self) + start, end - start,
1166+
*sub);
1167+
else
1168+
res = stringlib_rfind_char(
1169+
PyByteArray_AS_STRING(self) + start, end - start,
1170+
*sub);
11721171
if (res >= 0)
11731172
res += start;
11741173
}

Objects/bytesobject.c

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1937,16 +1937,15 @@ bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
19371937
ADJUST_INDICES(start, end, len);
19381938
if (end - start < sub_len)
19391939
res = -1;
1940-
else if (sub_len == 1
1941-
#ifndef HAVE_MEMRCHR
1942-
&& dir > 0
1943-
#endif
1944-
) {
1945-
unsigned char needle = *sub;
1946-
int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
1947-
res = stringlib_fastsearch_memchr_1char(
1948-
PyBytes_AS_STRING(self) + start, end - start,
1949-
needle, needle, mode);
1940+
else if (sub_len == 1) {
1941+
if (dir > 0)
1942+
res = stringlib_find_char(
1943+
PyBytes_AS_STRING(self) + start, end - start,
1944+
*sub);
1945+
else
1946+
res = stringlib_rfind_char(
1947+
PyBytes_AS_STRING(self) + start, end - start,
1948+
*sub);
19501949
if (res >= 0)
19511950
res += start;
19521951
}

Objects/stringlib/fastsearch.h

Lines changed: 87 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -32,52 +32,98 @@
3232
#define STRINGLIB_BLOOM(mask, ch) \
3333
((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
3434

35-
3635
Py_LOCAL_INLINE(Py_ssize_t)
37-
STRINGLIB(fastsearch_memchr_1char)(const STRINGLIB_CHAR* s, Py_ssize_t n,
38-
STRINGLIB_CHAR ch, unsigned char needle,
39-
int mode)
36+
STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
4037
{
41-
if (mode == FAST_SEARCH) {
42-
const STRINGLIB_CHAR *ptr = s;
43-
const STRINGLIB_CHAR *e = s + n;
44-
while (ptr < e) {
45-
void *candidate = memchr((const void *) ptr, needle, (e - ptr) * sizeof(STRINGLIB_CHAR));
46-
if (candidate == NULL)
47-
return -1;
48-
ptr = (const STRINGLIB_CHAR *) _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
49-
if (sizeof(STRINGLIB_CHAR) == 1 || *ptr == ch)
50-
return (ptr - s);
51-
/* False positive */
52-
ptr++;
53-
}
38+
const STRINGLIB_CHAR *p, *e;
39+
40+
p = s;
41+
e = s + n;
42+
if (n > 10) {
43+
#if STRINGLIB_SIZEOF_CHAR == 1
44+
p = memchr(s, ch, n);
45+
if (p != NULL)
46+
return (p - s);
5447
return -1;
48+
#else
49+
/* use memchr if we can choose a needle without two many likely
50+
false positives */
51+
unsigned char needle = ch & 0xff;
52+
/* If looking for a multiple of 256, we'd have too
53+
many false positives looking for the '\0' byte in UCS2
54+
and UCS4 representations. */
55+
if (needle != 0) {
56+
while (p < e) {
57+
void *candidate = memchr(p, needle,
58+
(e - p) * sizeof(STRINGLIB_CHAR));
59+
if (candidate == NULL)
60+
return -1;
61+
p = (const STRINGLIB_CHAR *)
62+
_Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
63+
if (*p == ch)
64+
return (p - s);
65+
/* False positive */
66+
p++;
67+
}
68+
return -1;
69+
}
70+
#endif
5571
}
72+
while (p < e) {
73+
if (*p == ch)
74+
return (p - s);
75+
p++;
76+
}
77+
return -1;
78+
}
79+
80+
Py_LOCAL_INLINE(Py_ssize_t)
81+
STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
82+
{
83+
const STRINGLIB_CHAR *p;
5684
#ifdef HAVE_MEMRCHR
5785
/* memrchr() is a GNU extension, available since glibc 2.1.91.
5886
it doesn't seem as optimized as memchr(), but is still quite
59-
faster than our hand-written loop in FASTSEARCH below */
60-
else if (mode == FAST_RSEARCH) {
61-
while (n > 0) {
62-
const STRINGLIB_CHAR *found;
63-
void *candidate = memrchr((const void *) s, needle, n * sizeof(STRINGLIB_CHAR));
64-
if (candidate == NULL)
65-
return -1;
66-
found = (const STRINGLIB_CHAR *) _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
67-
n = found - s;
68-
if (sizeof(STRINGLIB_CHAR) == 1 || *found == ch)
69-
return n;
70-
/* False positive */
71-
}
87+
faster than our hand-written loop below */
88+
89+
if (n > 10) {
90+
#if STRINGLIB_SIZEOF_CHAR == 1
91+
p = memrchr(s, ch, n);
92+
if (p != NULL)
93+
return (p - s);
7294
return -1;
73-
}
95+
#else
96+
/* use memrchr if we can choose a needle without two many likely
97+
false positives */
98+
unsigned char needle = ch & 0xff;
99+
/* If looking for a multiple of 256, we'd have too
100+
many false positives looking for the '\0' byte in UCS2
101+
and UCS4 representations. */
102+
if (needle != 0) {
103+
while (n > 0) {
104+
void *candidate = memrchr(s, needle,
105+
n * sizeof(STRINGLIB_CHAR));
106+
if (candidate == NULL)
107+
return -1;
108+
p = (const STRINGLIB_CHAR *)
109+
_Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
110+
n = p - s;
111+
if (*p == ch)
112+
return n;
113+
/* False positive */
114+
}
115+
return -1;
116+
}
74117
#endif
75-
else {
76-
assert(0); /* Should never get here */
77-
return 0;
78118
}
79-
80-
#undef DO_MEMCHR
119+
#endif /* HAVE_MEMRCHR */
120+
p = s + n;
121+
while (p > s) {
122+
p--;
123+
if (*p == ch)
124+
return (p - s);
125+
}
126+
return -1;
81127
}
82128

83129
Py_LOCAL_INLINE(Py_ssize_t)
@@ -99,40 +145,18 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
99145
if (m <= 0)
100146
return -1;
101147
/* use special case for 1-character strings */
102-
if (n > 10 && (mode == FAST_SEARCH
103-
#ifdef HAVE_MEMRCHR
104-
|| mode == FAST_RSEARCH
105-
#endif
106-
)) {
107-
/* use memchr if we can choose a needle without two many likely
108-
false positives */
109-
unsigned char needle;
110-
needle = p[0] & 0xff;
111-
#if STRINGLIB_SIZEOF_CHAR > 1
112-
/* If looking for a multiple of 256, we'd have too
113-
many false positives looking for the '\0' byte in UCS2
114-
and UCS4 representations. */
115-
if (needle != 0)
116-
#endif
117-
return STRINGLIB(fastsearch_memchr_1char)
118-
(s, n, p[0], needle, mode);
119-
}
120-
if (mode == FAST_COUNT) {
148+
if (mode == FAST_SEARCH)
149+
return STRINGLIB(find_char)(s, n, p[0]);
150+
else if (mode == FAST_RSEARCH)
151+
return STRINGLIB(rfind_char)(s, n, p[0]);
152+
else { /* FAST_COUNT */
121153
for (i = 0; i < n; i++)
122154
if (s[i] == p[0]) {
123155
count++;
124156
if (count == maxcount)
125157
return maxcount;
126158
}
127159
return count;
128-
} else if (mode == FAST_SEARCH) {
129-
for (i = 0; i < n; i++)
130-
if (s[i] == p[0])
131-
return i;
132-
} else { /* FAST_RSEARCH */
133-
for (i = n - 1; i > -1; i--)
134-
if (s[i] == p[0])
135-
return i;
136160
}
137161
return -1;
138162
}

Objects/unicodeobject.c

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -811,27 +811,26 @@ Py_LOCAL_INLINE(Py_ssize_t) findchar(const void *s, int kind,
811811
Py_ssize_t size, Py_UCS4 ch,
812812
int direction)
813813
{
814-
int mode = (direction == 1) ? FAST_SEARCH : FAST_RSEARCH;
815-
816814
switch (kind) {
817815
case PyUnicode_1BYTE_KIND:
818-
{
819-
Py_UCS1 ch1 = (Py_UCS1) ch;
820-
if (ch1 == ch)
821-
return ucs1lib_fastsearch((Py_UCS1 *) s, size, &ch1, 1, 0, mode);
822-
else
823-
return -1;
824-
}
816+
if ((Py_UCS1) ch != ch)
817+
return -1;
818+
if (direction > 0)
819+
return ucs1lib_find_char((Py_UCS1 *) s, size, (Py_UCS1) ch);
820+
else
821+
return ucs1lib_rfind_char((Py_UCS1 *) s, size, (Py_UCS1) ch);
825822
case PyUnicode_2BYTE_KIND:
826-
{
827-
Py_UCS2 ch2 = (Py_UCS2) ch;
828-
if (ch2 == ch)
829-
return ucs2lib_fastsearch((Py_UCS2 *) s, size, &ch2, 1, 0, mode);
830-
else
831-
return -1;
832-
}
823+
if ((Py_UCS2) ch != ch)
824+
return -1;
825+
if (direction > 0)
826+
return ucs2lib_find_char((Py_UCS2 *) s, size, (Py_UCS2) ch);
827+
else
828+
return ucs2lib_rfind_char((Py_UCS2 *) s, size, (Py_UCS2) ch);
833829
case PyUnicode_4BYTE_KIND:
834-
return ucs4lib_fastsearch((Py_UCS4 *) s, size, &ch, 1, 0, mode);
830+
if (direction > 0)
831+
return ucs4lib_find_char((Py_UCS4 *) s, size, ch);
832+
else
833+
return ucs4lib_rfind_char((Py_UCS4 *) s, size, ch);
835834
default:
836835
assert(0);
837836
return -1;

0 commit comments

Comments
 (0)