Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit cc2dbc5

Browse files
Issue #15068: Got rid of excessive buffering in the fileinput module.
The bufsize parameter is no longer used.
1 parent 55e3218 commit cc2dbc5

4 files changed

Lines changed: 152 additions & 90 deletions

File tree

Doc/library/fileinput.rst

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,9 @@ The following function is the primary interface of this module:
7171
.. versionchanged:: 3.2
7272
Can be used as a context manager.
7373

74+
.. versionchanged:: 3.5.2
75+
The *bufsize* parameter is no longer used.
76+
7477

7578
The following functions use the global state created by :func:`fileinput.input`;
7679
if there is no active state, :exc:`RuntimeError` is raised.
@@ -161,7 +164,10 @@ available for subclassing as well:
161164
Can be used as a context manager.
162165

163166
.. deprecated:: 3.4
164-
The ``'rU'`` and ``'U'`` modes.
167+
The ``'rU'`` and ``'U'`` modes.
168+
169+
.. versionchanged:: 3.5.2
170+
The *bufsize* parameter is no longer used.
165171

166172

167173
**Optional in-place filtering:** if the keyword argument ``inplace=True`` is

Lib/fileinput.py

Lines changed: 73 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -64,13 +64,6 @@
6464
disabled when standard input is read. XXX The current implementation
6565
does not work for MS-DOS 8+3 filesystems.
6666
67-
Performance: this module is unfortunately one of the slower ways of
68-
processing large numbers of input lines. Nevertheless, a significant
69-
speed-up has been obtained by using readlines(bufsize) instead of
70-
readline(). A new keyword argument, bufsize=N, is present on the
71-
input() function and the FileInput() class to override the default
72-
buffer size.
73-
7467
XXX Possible additions:
7568
7669
- optional getopt argument processing
@@ -86,6 +79,7 @@
8679

8780
_state = None
8881

82+
# No longer used
8983
DEFAULT_BUFSIZE = 8*1024
9084

9185
def input(files=None, inplace=False, backup="", bufsize=0,
@@ -207,17 +201,15 @@ def __init__(self, files=None, inplace=False, backup="", bufsize=0,
207201
self._files = files
208202
self._inplace = inplace
209203
self._backup = backup
210-
self._bufsize = bufsize or DEFAULT_BUFSIZE
211204
self._savestdout = None
212205
self._output = None
213206
self._filename = None
214-
self._lineno = 0
207+
self._startlineno = 0
215208
self._filelineno = 0
216209
self._file = None
210+
self._readline = self._start_readline
217211
self._isstdin = False
218212
self._backupfilename = None
219-
self._buffer = []
220-
self._bufindex = 0
221213
# restrict mode argument to reading modes
222214
if mode not in ('r', 'rU', 'U', 'rb'):
223215
raise ValueError("FileInput opening mode must be one of "
@@ -253,22 +245,18 @@ def __iter__(self):
253245
return self
254246

255247
def __next__(self):
256-
try:
257-
line = self._buffer[self._bufindex]
258-
except IndexError:
259-
pass
260-
else:
261-
self._bufindex += 1
262-
self._lineno += 1
248+
line = self._readline()
249+
if line:
263250
self._filelineno += 1
264251
return line
265-
line = self.readline()
266-
if not line:
252+
if not self._file:
267253
raise StopIteration
268-
return line
254+
self.nextfile()
255+
# Recursive call
256+
return self.__next__()
269257

270258
def __getitem__(self, i):
271-
if i != self._lineno:
259+
if i != self.lineno():
272260
raise RuntimeError("accessing lines out of order")
273261
try:
274262
return self.__next__()
@@ -289,6 +277,7 @@ def nextfile(self):
289277
finally:
290278
file = self._file
291279
self._file = None
280+
self._readline = self._start_readline
292281
try:
293282
if file and not self._isstdin:
294283
file.close()
@@ -300,85 +289,81 @@ def nextfile(self):
300289
except OSError: pass
301290

302291
self._isstdin = False
303-
self._buffer = []
304-
self._bufindex = 0
305292

306293
def readline(self):
307-
try:
308-
line = self._buffer[self._bufindex]
309-
except IndexError:
310-
pass
294+
while True:
295+
line = self._readline()
296+
if line:
297+
self._filelineno += 1
298+
return line
299+
if not self._file:
300+
return line
301+
self.nextfile()
302+
# repeat with next file
303+
304+
def _start_readline(self):
305+
if not self._files:
306+
if 'b' in self._mode:
307+
return b''
308+
else:
309+
return ''
310+
self._filename = self._files[0]
311+
self._files = self._files[1:]
312+
self._startlineno = self.lineno()
313+
self._filelineno = 0
314+
self._file = None
315+
self._isstdin = False
316+
self._backupfilename = 0
317+
if self._filename == '-':
318+
self._filename = '<stdin>'
319+
if 'b' in self._mode:
320+
self._file = getattr(sys.stdin, 'buffer', sys.stdin)
321+
else:
322+
self._file = sys.stdin
323+
self._isstdin = True
311324
else:
312-
self._bufindex += 1
313-
self._lineno += 1
314-
self._filelineno += 1
315-
return line
316-
if not self._file:
317-
if not self._files:
318-
if 'b' in self._mode:
319-
return b''
325+
if self._inplace:
326+
self._backupfilename = (
327+
self._filename + (self._backup or ".bak"))
328+
try:
329+
os.unlink(self._backupfilename)
330+
except OSError:
331+
pass
332+
# The next few lines may raise OSError
333+
os.rename(self._filename, self._backupfilename)
334+
self._file = open(self._backupfilename, self._mode)
335+
try:
336+
perm = os.fstat(self._file.fileno()).st_mode
337+
except OSError:
338+
self._output = open(self._filename, "w")
320339
else:
321-
return ''
322-
self._filename = self._files[0]
323-
self._files = self._files[1:]
324-
self._filelineno = 0
325-
self._file = None
326-
self._isstdin = False
327-
self._backupfilename = 0
328-
if self._filename == '-':
329-
self._filename = '<stdin>'
330-
if 'b' in self._mode:
331-
self._file = getattr(sys.stdin, 'buffer', sys.stdin)
332-
else:
333-
self._file = sys.stdin
334-
self._isstdin = True
335-
else:
336-
if self._inplace:
337-
self._backupfilename = (
338-
self._filename + (self._backup or ".bak"))
340+
mode = os.O_CREAT | os.O_WRONLY | os.O_TRUNC
341+
if hasattr(os, 'O_BINARY'):
342+
mode |= os.O_BINARY
343+
344+
fd = os.open(self._filename, mode, perm)
345+
self._output = os.fdopen(fd, "w")
339346
try:
340-
os.unlink(self._backupfilename)
347+
if hasattr(os, 'chmod'):
348+
os.chmod(self._filename, perm)
341349
except OSError:
342350
pass
343-
# The next few lines may raise OSError
344-
os.rename(self._filename, self._backupfilename)
345-
self._file = open(self._backupfilename, self._mode)
346-
try:
347-
perm = os.fstat(self._file.fileno()).st_mode
348-
except OSError:
349-
self._output = open(self._filename, "w")
350-
else:
351-
mode = os.O_CREAT | os.O_WRONLY | os.O_TRUNC
352-
if hasattr(os, 'O_BINARY'):
353-
mode |= os.O_BINARY
354-
355-
fd = os.open(self._filename, mode, perm)
356-
self._output = os.fdopen(fd, "w")
357-
try:
358-
if hasattr(os, 'chmod'):
359-
os.chmod(self._filename, perm)
360-
except OSError:
361-
pass
362-
self._savestdout = sys.stdout
363-
sys.stdout = self._output
351+
self._savestdout = sys.stdout
352+
sys.stdout = self._output
353+
else:
354+
# This may raise OSError
355+
if self._openhook:
356+
self._file = self._openhook(self._filename, self._mode)
364357
else:
365-
# This may raise OSError
366-
if self._openhook:
367-
self._file = self._openhook(self._filename, self._mode)
368-
else:
369-
self._file = open(self._filename, self._mode)
370-
self._buffer = self._file.readlines(self._bufsize)
371-
self._bufindex = 0
372-
if not self._buffer:
373-
self.nextfile()
374-
# Recursive call
375-
return self.readline()
358+
self._file = open(self._filename, self._mode)
359+
self._readline = self._file.readline
360+
return self._readline()
376361

377362
def filename(self):
378363
return self._filename
379364

380365
def lineno(self):
381-
return self._lineno
366+
return self._startlineno + self._filelineno
382367

383368
def filelineno(self):
384369
return self._filelineno

Lib/test/test_fileinput.py

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,42 @@ def remove_tempfiles(*names):
4646
if name:
4747
safe_unlink(name)
4848

49+
class LineReader:
50+
51+
def __init__(self):
52+
self._linesread = []
53+
54+
@property
55+
def linesread(self):
56+
try:
57+
return self._linesread[:]
58+
finally:
59+
self._linesread = []
60+
61+
def openhook(self, filename, mode):
62+
self.it = iter(filename.splitlines(True))
63+
return self
64+
65+
def readline(self, size=None):
66+
line = next(self.it, '')
67+
self._linesread.append(line)
68+
return line
69+
70+
def readlines(self, hint=-1):
71+
lines = []
72+
size = 0
73+
while True:
74+
line = self.readline()
75+
if not line:
76+
return lines
77+
lines.append(line)
78+
size += len(line)
79+
if size >= hint:
80+
return lines
81+
82+
def close(self):
83+
pass
84+
4985
class BufferSizesTests(unittest.TestCase):
5086
def test_buffer_sizes(self):
5187
# First, run the tests with default and teeny buffer size.
@@ -289,7 +325,7 @@ def test_readline(self):
289325
self.addCleanup(safe_unlink, TESTFN)
290326

291327
with FileInput(files=TESTFN,
292-
openhook=hook_encoded('ascii'), bufsize=8) as fi:
328+
openhook=hook_encoded('ascii')) as fi:
293329
try:
294330
self.assertEqual(fi.readline(), 'A\n')
295331
self.assertEqual(fi.readline(), 'B\n')
@@ -457,6 +493,38 @@ def fileno(self):
457493

458494
self.assertEqual(result, -1, "fileno() should return -1")
459495

496+
def test_readline_buffering(self):
497+
src = LineReader()
498+
with FileInput(files=['line1\nline2', 'line3\n'],
499+
openhook=src.openhook) as fi:
500+
self.assertEqual(src.linesread, [])
501+
self.assertEqual(fi.readline(), 'line1\n')
502+
self.assertEqual(src.linesread, ['line1\n'])
503+
self.assertEqual(fi.readline(), 'line2')
504+
self.assertEqual(src.linesread, ['line2'])
505+
self.assertEqual(fi.readline(), 'line3\n')
506+
self.assertEqual(src.linesread, ['', 'line3\n'])
507+
self.assertEqual(fi.readline(), '')
508+
self.assertEqual(src.linesread, [''])
509+
self.assertEqual(fi.readline(), '')
510+
self.assertEqual(src.linesread, [])
511+
512+
def test_iteration_buffering(self):
513+
src = LineReader()
514+
with FileInput(files=['line1\nline2', 'line3\n'],
515+
openhook=src.openhook) as fi:
516+
self.assertEqual(src.linesread, [])
517+
self.assertEqual(next(fi), 'line1\n')
518+
self.assertEqual(src.linesread, ['line1\n'])
519+
self.assertEqual(next(fi), 'line2')
520+
self.assertEqual(src.linesread, ['line2'])
521+
self.assertEqual(next(fi), 'line3\n')
522+
self.assertEqual(src.linesread, ['', 'line3\n'])
523+
self.assertRaises(StopIteration, next, fi)
524+
self.assertEqual(src.linesread, [''])
525+
self.assertRaises(StopIteration, next, fi)
526+
self.assertEqual(src.linesread, [])
527+
460528
class MockFileInput:
461529
"""A class that mocks out fileinput.FileInput for use during unit tests"""
462530

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,9 @@ Core and Builtins
9191
Library
9292
-------
9393

94+
- Issue #15068: Got rid of excessive buffering in the fileinput module.
95+
The bufsize parameter is no longer used.
96+
9497
- Issue #2202: Fix UnboundLocalError in
9598
AbstractDigestAuthHandler.get_algorithm_impls. Initial patch by Mathieu Dupuy.
9699

0 commit comments

Comments
 (0)