Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 4795524

Browse files
committed
Speed it up by using readlines(sizehint). It's still slower than
other ways of reading input. :-( In the process, I added an optional bufsize argument to the input() function and the FileInput class.
1 parent 1187aa4 commit 4795524

1 file changed

Lines changed: 39 additions & 10 deletions

File tree

Lib/fileinput.py

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,17 @@
6262
disabled when standard input is read. XXX The current implementation
6363
does not work for MS-DOS 8+3 filesystems.
6464
65+
Performance: this module is unfortunately one of the slower ways of
66+
processing large numbers of input lines. Nevertheless, a significant
67+
speed-up has been obtained by using readlines(bufsize) instead of
68+
readline(). A new keyword argument, bufsize=N, is present on the
69+
input() function and the FileInput() class to override the default
70+
buffer size.
71+
6572
XXX Possible additions:
6673
6774
- optional getopt argument processing
6875
- specify open mode ('r' or 'rb')
69-
- specify buffer size
7076
- fileno()
7177
- isatty()
7278
- read(), read(size), even readlines()
@@ -77,11 +83,13 @@
7783

7884
_state = None
7985

80-
def input(files=None, inplace=0, backup=""):
86+
DEFAULT_BUFSIZE = 8*1024
87+
88+
def input(files=None, inplace=0, backup="", bufsize=0):
8189
global _state
8290
if _state and _state._file:
8391
raise RuntimeError, "input() already active"
84-
_state = FileInput(files, inplace, backup)
92+
_state = FileInput(files, inplace, backup, bufsize)
8593
return _state
8694

8795
def close():
@@ -123,7 +131,7 @@ def isstdin():
123131

124132
class FileInput:
125133

126-
def __init__(self, files=None, inplace=0, backup=""):
134+
def __init__(self, files=None, inplace=0, backup="", bufsize=0):
127135
if type(files) == type(''):
128136
files = (files,)
129137
else:
@@ -136,6 +144,7 @@ def __init__(self, files=None, inplace=0, backup=""):
136144
self._files = files
137145
self._inplace = inplace
138146
self._backup = backup
147+
self._bufsize = bufsize or DEFAULT_BUFSIZE
139148
self._savestdout = None
140149
self._output = None
141150
self._filename = None
@@ -144,6 +153,8 @@ def __init__(self, files=None, inplace=0, backup=""):
144153
self._file = None
145154
self._isstdin = 0
146155
self._backupfilename = None
156+
self._buffer = []
157+
self._bufindex = 0
147158

148159
def __del__(self):
149160
self.close()
@@ -153,6 +164,15 @@ def close(self):
153164
self._files = ()
154165

155166
def __getitem__(self, i):
167+
try:
168+
line = self._buffer[self._bufindex]
169+
except IndexError:
170+
pass
171+
else:
172+
self._bufindex += 1
173+
self._lineno += 1
174+
self._filelineno += 1
175+
return line
156176
if i != self._lineno:
157177
raise RuntimeError, "accessing lines out of order"
158178
line = self.readline()
@@ -183,8 +203,19 @@ def nextfile(self):
183203
except: pass
184204

185205
self._isstdin = 0
206+
self._buffer = []
207+
self._bufindex = 0
186208

187209
def readline(self):
210+
try:
211+
line = self._buffer[self._bufindex]
212+
except IndexError:
213+
pass
214+
else:
215+
self._bufindex += 1
216+
self._lineno += 1
217+
self._filelineno += 1
218+
return line
188219
if not self._file:
189220
if not self._files:
190221
return ""
@@ -225,12 +256,10 @@ def readline(self):
225256
else:
226257
# This may raise IOError
227258
self._file = open(self._filename, "r")
228-
line = self._file.readline()
229-
if line:
230-
self._lineno = self._lineno + 1
231-
self._filelineno = self._filelineno + 1
232-
return line
233-
self.nextfile()
259+
self._buffer = self._file.readlines(self._bufsize)
260+
self._bufindex = 0
261+
if not self._buffer:
262+
self.nextfile()
234263
# Recursive call
235264
return self.readline()
236265

0 commit comments

Comments
 (0)