Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit c66f909

Browse files
committed
#2523: binary buffered reading is quadratic
1 parent 7114193 commit c66f909

2 files changed

Lines changed: 64 additions & 30 deletions

File tree

Lib/io.py

Lines changed: 61 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -893,8 +893,12 @@ def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
893893
"""
894894
raw._checkReadable()
895895
_BufferedIOMixin.__init__(self, raw)
896-
self._read_buf = b""
897896
self.buffer_size = buffer_size
897+
self._reset_read_buf()
898+
899+
def _reset_read_buf(self):
900+
self._read_buf = b""
901+
self._read_pos = 0
898902

899903
def read(self, n=None):
900904
"""Read n bytes.
@@ -904,25 +908,50 @@ def read(self, n=None):
904908
mode. If n is negative, read until EOF or until read() would
905909
block.
906910
"""
907-
if n is None:
908-
n = -1
909911
nodata_val = b""
910-
while n < 0 or len(self._read_buf) < n:
911-
to_read = max(self.buffer_size,
912-
n if n is not None else 2*len(self._read_buf))
913-
current = self.raw.read(to_read)
914-
if current in (b"", None):
915-
nodata_val = current
912+
empty_values = (b"", None)
913+
buf = self._read_buf
914+
pos = self._read_pos
915+
916+
# Special case for when the number of bytes to read is unspecified.
917+
if n is None or n == -1:
918+
self._reset_read_buf()
919+
chunks = [buf[pos:]] # Strip the consumed bytes.
920+
current_size = 0
921+
while True:
922+
# Read until EOF or until read() would block.
923+
chunk = self.raw.read()
924+
if chunk in empty_values:
925+
nodata_val = chunk
926+
break
927+
current_size += len(chunk)
928+
chunks.append(chunk)
929+
return b"".join(chunks) or nodata_val
930+
931+
# The number of bytes to read is specified, return at most n bytes.
932+
avail = len(buf) - pos # Length of the available buffered data.
933+
if n <= avail:
934+
# Fast path: the data to read is fully buffered.
935+
self._read_pos += n
936+
return buf[pos:pos+n]
937+
# Slow path: read from the stream until enough bytes are read,
938+
# or until an EOF occurs or until read() would block.
939+
chunks = [buf[pos:]]
940+
wanted = max(self.buffer_size, n)
941+
while avail < n:
942+
chunk = self.raw.read(wanted)
943+
if chunk in empty_values:
944+
nodata_val = chunk
916945
break
917-
self._read_buf += current
918-
if self._read_buf:
919-
if n < 0:
920-
n = len(self._read_buf)
921-
out = self._read_buf[:n]
922-
self._read_buf = self._read_buf[n:]
923-
else:
924-
out = nodata_val
925-
return out
946+
avail += len(chunk)
947+
chunks.append(chunk)
948+
# n is more then avail only when an EOF occurred or when
949+
# read() would have blocked.
950+
n = min(n, avail)
951+
out = b"".join(chunks)
952+
self._read_buf = out[n:] # Save the extra data in the buffer.
953+
self._read_pos = 0
954+
return out[:n] if out else nodata_val
926955

927956
def peek(self, n=0):
928957
"""Returns buffered bytes without advancing the position.
@@ -932,13 +961,14 @@ def peek(self, n=0):
932961
than self.buffer_size.
933962
"""
934963
want = min(n, self.buffer_size)
935-
have = len(self._read_buf)
964+
have = len(self._read_buf) - self._read_pos
936965
if have < want:
937966
to_read = self.buffer_size - have
938967
current = self.raw.read(to_read)
939968
if current:
940-
self._read_buf += current
941-
return self._read_buf
969+
self._read_buf = self._read_buf[self._read_pos:] + current
970+
self._read_pos = 0
971+
return self._read_buf[self._read_pos:]
942972

943973
def read1(self, n):
944974
"""Reads up to n bytes, with at most one read() system call."""
@@ -947,16 +977,16 @@ def read1(self, n):
947977
if n <= 0:
948978
return b""
949979
self.peek(1)
950-
return self.read(min(n, len(self._read_buf)))
980+
return self.read(min(n, len(self._read_buf) - self._read_pos))
951981

952982
def tell(self):
953-
return self.raw.tell() - len(self._read_buf)
983+
return self.raw.tell() - len(self._read_buf) + self._read_pos
954984

955985
def seek(self, pos, whence=0):
956986
if whence == 1:
957-
pos -= len(self._read_buf)
987+
pos -= len(self._read_buf) - self._read_pos
958988
pos = self.raw.seek(pos, whence)
959-
self._read_buf = b""
989+
self._reset_read_buf()
960990
return pos
961991

962992

@@ -1125,14 +1155,14 @@ def seek(self, pos, whence=0):
11251155
# First do the raw seek, then empty the read buffer, so that
11261156
# if the raw seek fails, we don't lose buffered data forever.
11271157
pos = self.raw.seek(pos, whence)
1128-
self._read_buf = b""
1158+
self._reset_read_buf()
11291159
return pos
11301160

11311161
def tell(self):
1132-
if (self._write_buf):
1162+
if self._write_buf:
11331163
return self.raw.tell() + len(self._write_buf)
11341164
else:
1135-
return self.raw.tell() - len(self._read_buf)
1165+
return BufferedReader.tell(self)
11361166

11371167
def truncate(self, pos=None):
11381168
if pos is None:
@@ -1161,8 +1191,9 @@ def read1(self, n):
11611191

11621192
def write(self, b):
11631193
if self._read_buf:
1164-
self.raw.seek(-len(self._read_buf), 1) # Undo readahead
1165-
self._read_buf = b""
1194+
# Undo readahead
1195+
self.raw.seek(self._read_pos - len(self._read_buf), 1)
1196+
self._reset_read_buf()
11661197
return BufferedWriter.write(self, b)
11671198

11681199

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ Library
2222
file name rather than a ZipInfo instance, so files are extracted with
2323
mode 0600 rather than 000 under Unix.
2424

25+
- Issue #2523: Fix quadratic behaviour when read()ing a binary file without
26+
asking for a specific length.
27+
2528

2629
What's new in Python 3.0b2?
2730
===========================

0 commit comments

Comments
 (0)