Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit c28e2e5

Browse files
committed
In text I/O, optimize scanning for new lines with 1-byte unicode chars
1 parent f364e7b commit c28e2e5

1 file changed

Lines changed: 20 additions & 12 deletions

File tree

Modules/_io/textio.c

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -365,19 +365,23 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self,
365365
*/
366366
if (seennl == 0 &&
367367
memchr(in_str, '\n', kind * len) != NULL) {
368-
Py_ssize_t i = 0;
369-
for (;;) {
370-
Py_UCS4 c;
371-
/* Fast loop for non-control characters */
372-
while (PyUnicode_READ(kind, in_str, i) > '\n')
373-
i++;
374-
c = PyUnicode_READ(kind, in_str, i++);
375-
if (c == '\n') {
376-
seennl |= SEEN_LF;
377-
break;
368+
if (kind == PyUnicode_1BYTE_KIND)
369+
seennl |= SEEN_LF;
370+
else {
371+
Py_ssize_t i = 0;
372+
for (;;) {
373+
Py_UCS4 c;
374+
/* Fast loop for non-control characters */
375+
while (PyUnicode_READ(kind, in_str, i) > '\n')
376+
i++;
377+
c = PyUnicode_READ(kind, in_str, i++);
378+
if (c == '\n') {
379+
seennl |= SEEN_LF;
380+
break;
381+
}
382+
if (i >= len)
383+
break;
378384
}
379-
if (i >= len)
380-
break;
381385
}
382386
}
383387
/* Finished: we have scanned for newlines, and none of them
@@ -1597,6 +1601,10 @@ textiowrapper_read(textio *self, PyObject *args)
15971601
static char *
15981602
find_control_char(int kind, char *s, char *end, Py_UCS4 ch)
15991603
{
1604+
if (kind == PyUnicode_1BYTE_KIND) {
1605+
assert(ch < 256);
1606+
return (char *) memchr((void *) s, (char) ch, end - s);
1607+
}
16001608
for (;;) {
16011609
while (PyUnicode_READ(kind, s, 0) > ch)
16021610
s += kind;

0 commit comments

Comments
 (0)