3232#define STRINGLIB_BLOOM (mask , ch ) \
3333 ((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
3434
35-
3635Py_LOCAL_INLINE (Py_ssize_t )
37- STRINGLIB (fastsearch_memchr_1char )(const STRINGLIB_CHAR * s , Py_ssize_t n ,
38- STRINGLIB_CHAR ch , unsigned char needle ,
39- int mode )
36+ STRINGLIB (find_char )(const STRINGLIB_CHAR * s , Py_ssize_t n , STRINGLIB_CHAR ch )
4037{
41- if (mode == FAST_SEARCH ) {
42- const STRINGLIB_CHAR * ptr = s ;
43- const STRINGLIB_CHAR * e = s + n ;
44- while (ptr < e ) {
45- void * candidate = memchr ((const void * ) ptr , needle , (e - ptr ) * sizeof (STRINGLIB_CHAR ));
46- if (candidate == NULL )
47- return -1 ;
48- ptr = (const STRINGLIB_CHAR * ) _Py_ALIGN_DOWN (candidate , sizeof (STRINGLIB_CHAR ));
49- if (sizeof (STRINGLIB_CHAR ) == 1 || * ptr == ch )
50- return (ptr - s );
51- /* False positive */
52- ptr ++ ;
53- }
38+ const STRINGLIB_CHAR * p , * e ;
39+
40+ p = s ;
41+ e = s + n ;
42+ if (n > 10 ) {
43+ #if STRINGLIB_SIZEOF_CHAR == 1
44+ p = memchr (s , ch , n );
45+ if (p != NULL )
46+ return (p - s );
5447 return -1 ;
48+ #else
49+ /* use memchr if we can choose a needle without two many likely
50+ false positives */
51+ unsigned char needle = ch & 0xff ;
52+ /* If looking for a multiple of 256, we'd have too
53+ many false positives looking for the '\0' byte in UCS2
54+ and UCS4 representations. */
55+ if (needle != 0 ) {
56+ while (p < e ) {
57+ void * candidate = memchr (p , needle ,
58+ (e - p ) * sizeof (STRINGLIB_CHAR ));
59+ if (candidate == NULL )
60+ return -1 ;
61+ p = (const STRINGLIB_CHAR * )
62+ _Py_ALIGN_DOWN (candidate , sizeof (STRINGLIB_CHAR ));
63+ if (* p == ch )
64+ return (p - s );
65+ /* False positive */
66+ p ++ ;
67+ }
68+ return -1 ;
69+ }
70+ #endif
5571 }
72+ while (p < e ) {
73+ if (* p == ch )
74+ return (p - s );
75+ p ++ ;
76+ }
77+ return -1 ;
78+ }
79+
80+ Py_LOCAL_INLINE (Py_ssize_t )
81+ STRINGLIB (rfind_char )(const STRINGLIB_CHAR * s , Py_ssize_t n , STRINGLIB_CHAR ch )
82+ {
83+ const STRINGLIB_CHAR * p ;
5684#ifdef HAVE_MEMRCHR
5785 /* memrchr() is a GNU extension, available since glibc 2.1.91.
5886 it doesn't seem as optimized as memchr(), but is still quite
59- faster than our hand-written loop in FASTSEARCH below */
60- else if (mode == FAST_RSEARCH ) {
61- while (n > 0 ) {
62- const STRINGLIB_CHAR * found ;
63- void * candidate = memrchr ((const void * ) s , needle , n * sizeof (STRINGLIB_CHAR ));
64- if (candidate == NULL )
65- return -1 ;
66- found = (const STRINGLIB_CHAR * ) _Py_ALIGN_DOWN (candidate , sizeof (STRINGLIB_CHAR ));
67- n = found - s ;
68- if (sizeof (STRINGLIB_CHAR ) == 1 || * found == ch )
69- return n ;
70- /* False positive */
71- }
87+ faster than our hand-written loop below */
88+
89+ if (n > 10 ) {
90+ #if STRINGLIB_SIZEOF_CHAR == 1
91+ p = memrchr (s , ch , n );
92+ if (p != NULL )
93+ return (p - s );
7294 return -1 ;
73- }
95+ #else
96+ /* use memrchr if we can choose a needle without two many likely
97+ false positives */
98+ unsigned char needle = ch & 0xff ;
99+ /* If looking for a multiple of 256, we'd have too
100+ many false positives looking for the '\0' byte in UCS2
101+ and UCS4 representations. */
102+ if (needle != 0 ) {
103+ while (n > 0 ) {
104+ void * candidate = memrchr (s , needle ,
105+ n * sizeof (STRINGLIB_CHAR ));
106+ if (candidate == NULL )
107+ return -1 ;
108+ p = (const STRINGLIB_CHAR * )
109+ _Py_ALIGN_DOWN (candidate , sizeof (STRINGLIB_CHAR ));
110+ n = p - s ;
111+ if (* p == ch )
112+ return n ;
113+ /* False positive */
114+ }
115+ return -1 ;
116+ }
74117#endif
75- else {
76- assert (0 ); /* Should never get here */
77- return 0 ;
78118 }
79-
80- #undef DO_MEMCHR
119+ #endif /* HAVE_MEMRCHR */
120+ p = s + n ;
121+ while (p > s ) {
122+ p -- ;
123+ if (* p == ch )
124+ return (p - s );
125+ }
126+ return -1 ;
81127}
82128
83129Py_LOCAL_INLINE (Py_ssize_t )
@@ -99,40 +145,18 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
99145 if (m <= 0 )
100146 return -1 ;
101147 /* use special case for 1-character strings */
102- if (n > 10 && (mode == FAST_SEARCH
103- #ifdef HAVE_MEMRCHR
104- || mode == FAST_RSEARCH
105- #endif
106- )) {
107- /* use memchr if we can choose a needle without two many likely
108- false positives */
109- unsigned char needle ;
110- needle = p [0 ] & 0xff ;
111- #if STRINGLIB_SIZEOF_CHAR > 1
112- /* If looking for a multiple of 256, we'd have too
113- many false positives looking for the '\0' byte in UCS2
114- and UCS4 representations. */
115- if (needle != 0 )
116- #endif
117- return STRINGLIB (fastsearch_memchr_1char )
118- (s , n , p [0 ], needle , mode );
119- }
120- if (mode == FAST_COUNT ) {
148+ if (mode == FAST_SEARCH )
149+ return STRINGLIB (find_char )(s , n , p [0 ]);
150+ else if (mode == FAST_RSEARCH )
151+ return STRINGLIB (rfind_char )(s , n , p [0 ]);
152+ else { /* FAST_COUNT */
121153 for (i = 0 ; i < n ; i ++ )
122154 if (s [i ] == p [0 ]) {
123155 count ++ ;
124156 if (count == maxcount )
125157 return maxcount ;
126158 }
127159 return count ;
128- } else if (mode == FAST_SEARCH ) {
129- for (i = 0 ; i < n ; i ++ )
130- if (s [i ] == p [0 ])
131- return i ;
132- } else { /* FAST_RSEARCH */
133- for (i = n - 1 ; i > -1 ; i -- )
134- if (s [i ] == p [0 ])
135- return i ;
136160 }
137161 return -1 ;
138162 }
0 commit comments