Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 6263d54

Browse files
committed
Rewrite readlines() to speed it up -- about a factor of 2 on my
Indigo2, reading a 9Meg file from the local disk.
1 parent 5449b6e commit 6263d54

1 file changed

Lines changed: 89 additions & 25 deletions

File tree

Objects/fileobject.c

Lines changed: 89 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -451,25 +451,29 @@ file_read(f, args)
451451
if (v == NULL)
452452
return NULL;
453453
bytesread = 0;
454-
Py_BEGIN_ALLOW_THREADS
455454
for (;;) {
455+
Py_BEGIN_ALLOW_THREADS
456+
errno = 0;
456457
chunksize = fread(BUF(v) + bytesread, 1,
457458
buffersize - bytesread, f->f_fp);
458-
/* XXX Error check? */
459-
if (chunksize == 0)
460-
break;
459+
Py_END_ALLOW_THREADS
460+
if (chunksize == 0) {
461+
if (!ferror(f->f_fp))
462+
break;
463+
PyErr_SetFromErrno(PyExc_IOError);
464+
clearerr(f->f_fp);
465+
Py_DECREF(v);
466+
return NULL;
467+
}
461468
bytesread += chunksize;
462469
if (bytesread < buffersize)
463470
break;
464471
if (bytesrequested < 0) {
465472
buffersize = new_buffersize(f, buffersize);
466-
Py_BLOCK_THREADS
467473
if (_PyString_Resize(&v, buffersize) < 0)
468474
return NULL;
469-
Py_UNBLOCK_THREADS
470475
}
471476
}
472-
Py_END_ALLOW_THREADS
473477
if (bytesread != buffersize)
474478
_PyString_Resize(&v, bytesread);
475479
return v;
@@ -488,24 +492,21 @@ file_readinto(f, args)
488492
if (!PyArg_Parse(args, "w#", &ptr, &ntodo))
489493
return NULL;
490494
ndone = 0;
491-
/*
492-
** XXXX Is this correct? Other threads may see partially-completed
493-
** reads if they look at the object we're reading into...
494-
*/
495-
Py_BEGIN_ALLOW_THREADS
496-
while(ntodo > 0) {
495+
while (ntodo > 0) {
496+
Py_BEGIN_ALLOW_THREADS
497+
errno = 0;
497498
nnow = fread(ptr+ndone, 1, ntodo, f->f_fp);
498-
if (nnow < 0 ) {
499+
Py_END_ALLOW_THREADS
500+
if (nnow == 0) {
501+
if (!ferror(f->f_fp))
502+
break;
499503
PyErr_SetFromErrno(PyExc_IOError);
500504
clearerr(f->f_fp);
501505
return NULL;
502506
}
503-
if (nnow == 0)
504-
break;
505507
ndone += nnow;
506508
ntodo -= nnow;
507509
}
508-
Py_END_ALLOW_THREADS
509510
return PyInt_FromLong(ndone);
510511
}
511512

@@ -675,6 +676,14 @@ file_readlines(f, args)
675676
{
676677
PyObject *list;
677678
PyObject *line;
679+
char small_buffer[SMALLCHUNK];
680+
char *buffer = small_buffer;
681+
size_t buffersize = SMALLCHUNK;
682+
PyObject *big_buffer = NULL;
683+
size_t nfilled = 0;
684+
size_t nread;
685+
char *p, *q, *end;
686+
int err;
678687

679688
if (f->f_fp == NULL)
680689
return err_closed();
@@ -683,18 +692,73 @@ file_readlines(f, args)
683692
if ((list = PyList_New(0)) == NULL)
684693
return NULL;
685694
for (;;) {
686-
line = getline(f, 0);
687-
if (line != NULL && PyString_Size(line) == 0) {
688-
Py_DECREF(line);
689-
break;
690-
}
691-
if (line == NULL || PyList_Append(list, line) != 0) {
695+
Py_BEGIN_ALLOW_THREADS
696+
errno = 0;
697+
nread = fread(buffer+nfilled, 1, buffersize-nfilled, f->f_fp);
698+
Py_END_ALLOW_THREADS
699+
if (nread == 0) {
700+
if (nread == 0)
701+
break;
702+
PyErr_SetFromErrno(PyExc_IOError);
703+
clearerr(f->f_fp);
704+
error:
692705
Py_DECREF(list);
693-
Py_XDECREF(line);
694-
return NULL;
706+
list = NULL;
707+
goto cleanup;
695708
}
709+
p = memchr(buffer+nfilled, '\n', nread);
710+
if (p == NULL) {
711+
/* Need a larger buffer to fit this line */
712+
nfilled += nread;
713+
buffersize *= 2;
714+
if (big_buffer == NULL) {
715+
/* Create the big buffer */
716+
big_buffer = PyString_FromStringAndSize(
717+
NULL, buffersize);
718+
if (big_buffer == NULL)
719+
goto error;
720+
buffer = PyString_AS_STRING(big_buffer);
721+
memcpy(buffer, small_buffer, nfilled);
722+
}
723+
else {
724+
/* Grow the big buffer */
725+
_PyString_Resize(&big_buffer, buffersize);
726+
buffer = PyString_AS_STRING(big_buffer);
727+
}
728+
continue;
729+
}
730+
end = buffer+nfilled+nread;
731+
q = buffer;
732+
do {
733+
/* Process complete lines */
734+
p++;
735+
line = PyString_FromStringAndSize(q, p-q);
736+
if (line == NULL)
737+
goto error;
738+
err = PyList_Append(list, line);
739+
Py_DECREF(line);
740+
if (err != 0)
741+
goto error;
742+
q = p;
743+
p = memchr(q, '\n', end-q);
744+
} while (p != NULL);
745+
/* Move the remaining incomplete line to the start */
746+
nfilled = end-q;
747+
memmove(buffer, q, nfilled);
748+
}
749+
if (nfilled != 0) {
750+
/* Partial last line */
751+
line = PyString_FromStringAndSize(buffer, nfilled);
752+
if (line == NULL)
753+
goto error;
754+
err = PyList_Append(list, line);
696755
Py_DECREF(line);
756+
if (err != 0)
757+
goto error;
697758
}
759+
cleanup:
760+
if (big_buffer)
761+
Py_DECREF(big_buffer);
698762
return list;
699763
}
700764

0 commit comments

Comments
 (0)