Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit af7260e

Browse files
committed
Issue #15758: Fix FileIO.readall() so it no longer has O(n**2) complexity.
1 parent a29ac45 commit af7260e

2 files changed

Lines changed: 54 additions & 66 deletions

File tree

Misc/NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ Core and Builtins
9191
Library
9292
-------
9393

94+
- Issue #15758: Fix FileIO.readall() so it no longer has O(n**2) complexity.
95+
9496
- Issue #14596: The struct.Struct() objects now use more compact implementation.
9597

9698
- Issue #17981: Closed socket on error in SysLogHandler.

Modules/_io/fileio.c

Lines changed: 52 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -556,33 +556,27 @@ fileio_readinto(fileio *self, PyObject *args)
556556
return PyLong_FromSsize_t(n);
557557
}
558558

559+
#ifndef HAVE_FSTAT
560+
561+
static PyObject *
562+
fileio_readall(fileio *self)
563+
{
564+
_Py_IDENTIFIER(readall);
565+
return _PyObject_CallMethodId((PyObject*)&PyRawIOBase_Type,
566+
&PyId_readall, "O", self);
567+
}
568+
569+
#else
570+
559571
static size_t
560-
new_buffersize(fileio *self, size_t currentsize
561-
#ifdef HAVE_FSTAT
562-
, Py_off_t pos, Py_off_t end
563-
#endif
564-
)
572+
new_buffersize(fileio *self, size_t currentsize)
565573
{
566574
size_t addend;
567-
#ifdef HAVE_FSTAT
568-
if (end != (Py_off_t)-1) {
569-
/* Files claiming a size smaller than SMALLCHUNK may
570-
actually be streaming pseudo-files. In this case, we
571-
apply the more aggressive algorithm below.
572-
*/
573-
if (end >= SMALLCHUNK && end >= pos && pos >= 0) {
574-
/* Add 1 so if the file were to grow we'd notice. */
575-
Py_off_t bufsize = currentsize + end - pos + 1;
576-
if (bufsize < PY_SSIZE_T_MAX)
577-
return (size_t)bufsize;
578-
else
579-
return PY_SSIZE_T_MAX;
580-
}
581-
}
582-
#endif
575+
583576
/* Expand the buffer by an amount proportional to the current size,
584577
giving us amortized linear-time behavior. For bigger sizes, use a
585578
less-than-double growth factor to avoid excessive allocation. */
579+
assert(currentsize <= PY_SSIZE_T_MAX);
586580
if (currentsize > 65536)
587581
addend = currentsize >> 3;
588582
else
@@ -596,25 +590,18 @@ new_buffersize(fileio *self, size_t currentsize
596590
static PyObject *
597591
fileio_readall(fileio *self)
598592
{
599-
#ifdef HAVE_FSTAT
600593
struct stat st;
601594
Py_off_t pos, end;
602-
#endif
603595
PyObject *result;
604-
Py_ssize_t total = 0;
596+
Py_ssize_t bytes_read = 0;
605597
Py_ssize_t n;
606-
size_t newsize;
598+
size_t bufsize;
607599

608600
if (self->fd < 0)
609601
return err_closed();
610602
if (!_PyVerify_fd(self->fd))
611603
return PyErr_SetFromErrno(PyExc_IOError);
612604

613-
result = PyBytes_FromStringAndSize(NULL, SMALLCHUNK);
614-
if (result == NULL)
615-
return NULL;
616-
617-
#ifdef HAVE_FSTAT
618605
#if defined(MS_WIN64) || defined(MS_WINDOWS)
619606
pos = _lseeki64(self->fd, 0L, SEEK_CUR);
620607
#else
@@ -624,44 +611,46 @@ fileio_readall(fileio *self)
624611
end = st.st_size;
625612
else
626613
end = (Py_off_t)-1;
627-
#endif
614+
615+
if (end > 0 && end >= pos && pos >= 0 && end - pos < PY_SSIZE_T_MAX) {
616+
/* This is probably a real file, so we try to allocate a
617+
buffer one byte larger than the rest of the file. If the
618+
calculation is right then we should get EOF without having
619+
to enlarge the buffer. */
620+
bufsize = (size_t)(end - pos + 1);
621+
} else {
622+
bufsize = SMALLCHUNK;
623+
}
624+
625+
result = PyBytes_FromStringAndSize(NULL, bufsize);
626+
if (result == NULL)
627+
return NULL;
628+
628629
while (1) {
629-
#ifdef HAVE_FSTAT
630-
newsize = new_buffersize(self, total, pos, end);
631-
#else
632-
newsize = new_buffersize(self, total);
633-
#endif
634-
if (newsize > PY_SSIZE_T_MAX || newsize <= 0) {
635-
PyErr_SetString(PyExc_OverflowError,
636-
"unbounded read returned more bytes "
637-
"than a Python string can hold ");
638-
Py_DECREF(result);
639-
return NULL;
640-
}
630+
if (bytes_read >= (Py_ssize_t)bufsize) {
631+
bufsize = new_buffersize(self, bytes_read);
632+
if (bufsize > PY_SSIZE_T_MAX || bufsize <= 0) {
633+
PyErr_SetString(PyExc_OverflowError,
634+
"unbounded read returned more bytes "
635+
"than a Python string can hold ");
636+
Py_DECREF(result);
637+
return NULL;
638+
}
641639

642-
if (PyBytes_GET_SIZE(result) < (Py_ssize_t)newsize) {
643-
if (_PyBytes_Resize(&result, newsize) < 0) {
644-
if (total == 0) {
645-
Py_DECREF(result);
640+
if (PyBytes_GET_SIZE(result) < (Py_ssize_t)bufsize) {
641+
if (_PyBytes_Resize(&result, bufsize) < 0)
646642
return NULL;
647-
}
648-
PyErr_Clear();
649-
break;
650643
}
651644
}
652645
Py_BEGIN_ALLOW_THREADS
653646
errno = 0;
654-
n = newsize - total;
647+
n = bufsize - bytes_read;
655648
#if defined(MS_WIN64) || defined(MS_WINDOWS)
656649
if (n > INT_MAX)
657650
n = INT_MAX;
658-
n = read(self->fd,
659-
PyBytes_AS_STRING(result) + total,
660-
(int)n);
651+
n = read(self->fd, PyBytes_AS_STRING(result) + bytes_read, (int)n);
661652
#else
662-
n = read(self->fd,
663-
PyBytes_AS_STRING(result) + total,
664-
n);
653+
n = read(self->fd, PyBytes_AS_STRING(result) + bytes_read, n);
665654
#endif
666655
Py_END_ALLOW_THREADS
667656
if (n == 0)
@@ -674,7 +663,7 @@ fileio_readall(fileio *self)
674663
}
675664
continue;
676665
}
677-
if (total > 0)
666+
if (bytes_read > 0)
678667
break;
679668
if (errno == EAGAIN) {
680669
Py_DECREF(result);
@@ -684,22 +673,19 @@ fileio_readall(fileio *self)
684673
PyErr_SetFromErrno(PyExc_IOError);
685674
return NULL;
686675
}
687-
total += n;
688-
#ifdef HAVE_FSTAT
676+
bytes_read += n;
689677
pos += n;
690-
#endif
691678
}
692679

693-
if (PyBytes_GET_SIZE(result) > total) {
694-
if (_PyBytes_Resize(&result, total) < 0) {
695-
/* This should never happen, but just in case */
696-
Py_DECREF(result);
680+
if (PyBytes_GET_SIZE(result) > bytes_read) {
681+
if (_PyBytes_Resize(&result, bytes_read) < 0)
697682
return NULL;
698-
}
699683
}
700684
return result;
701685
}
702686

687+
#endif /* HAVE_FSTAT */
688+
703689
static PyObject *
704690
fileio_read(fileio *self, PyObject *args)
705691
{

0 commit comments

Comments
 (0)