Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit d7728ca

Browse files
Issue #15381: Optimized line reading in io.BytesIO.
1 parent 121be0d commit d7728ca

2 files changed

Lines changed: 29 additions & 29 deletions

File tree

Misc/NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,8 @@ Core and Builtins
115115
Library
116116
-------
117117

118+
- Issue #15381: Optimized line reading in io.BytesIO.
119+
118120
- Issue #20729: Restored the use of lazy iterkeys()/itervalues()/iteritems()
119121
in the mailbox module.
120122

Modules/_io/bytesio.c

Lines changed: 27 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -81,31 +81,28 @@ unshare(bytesio *self, size_t preferred_size, int truncate)
8181
object. Returns the length between the current position to the
8282
next newline character. */
8383
static Py_ssize_t
84-
get_line(bytesio *self, char **output)
84+
scan_eol(bytesio *self, Py_ssize_t len)
8585
{
86-
char *n;
87-
const char *str_end;
88-
Py_ssize_t len;
86+
const char *start, *n;
87+
Py_ssize_t maxlen;
8988

9089
assert(self->buf != NULL);
9190

9291
/* Move to the end of the line, up to the end of the string, s. */
93-
str_end = self->buf + self->string_size;
94-
for (n = self->buf + self->pos;
95-
n < str_end && *n != '\n';
96-
n++);
97-
98-
/* Skip the newline character */
99-
if (n < str_end)
100-
n++;
101-
102-
/* Get the length from the current position to the end of the line. */
103-
len = n - (self->buf + self->pos);
104-
*output = self->buf + self->pos;
105-
92+
start = self->buf + self->pos;
93+
maxlen = self->string_size - self->pos;
94+
if (len < 0 || len > maxlen)
95+
len = maxlen;
96+
97+
if (len) {
98+
n = memchr(start, '\n', len);
99+
if (n)
100+
/* Get the length from the current position to the end of
101+
the line. */
102+
len = n - start + 1;
103+
}
106104
assert(len >= 0);
107105
assert(self->pos < PY_SSIZE_T_MAX - len);
108-
self->pos += len;
109106

110107
return len;
111108
}
@@ -477,14 +474,10 @@ bytesio_readline(bytesio *self, PyObject *args)
477474
return NULL;
478475
}
479476

480-
n = get_line(self, &output);
481-
482-
if (size >= 0 && size < n) {
483-
size = n - size;
484-
n -= size;
485-
self->pos -= size;
486-
}
477+
n = scan_eol(self, size);
487478

479+
output = self->buf + self->pos;
480+
self->pos += n;
488481
return PyBytes_FromStringAndSize(output, n);
489482
}
490483

@@ -528,7 +521,9 @@ bytesio_readlines(bytesio *self, PyObject *args)
528521
if (!result)
529522
return NULL;
530523

531-
while ((n = get_line(self, &output)) != 0) {
524+
output = self->buf + self->pos;
525+
while ((n = scan_eol(self, -1)) != 0) {
526+
self->pos += n;
532527
line = PyBytes_FromStringAndSize(output, n);
533528
if (!line)
534529
goto on_error;
@@ -540,6 +535,7 @@ bytesio_readlines(bytesio *self, PyObject *args)
540535
size += n;
541536
if (maxsize > 0 && size >= maxsize)
542537
break;
538+
output += n;
543539
}
544540
return result;
545541

@@ -636,16 +632,18 @@ bytesio_truncate(bytesio *self, PyObject *args)
636632
static PyObject *
637633
bytesio_iternext(bytesio *self)
638634
{
639-
char *next;
635+
const char *next;
640636
Py_ssize_t n;
641637

642638
CHECK_CLOSED(self, NULL);
643639

644-
n = get_line(self, &next);
640+
n = scan_eol(self, -1);
645641

646-
if (!next || n == 0)
642+
if (n == 0)
647643
return NULL;
648644

645+
next = self->buf + self->pos;
646+
self->pos += n;
649647
return PyBytes_FromStringAndSize(next, n);
650648
}
651649

0 commit comments

Comments
 (0)