Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit e2b0705

Browse files
committed
Revert r61508: it caused test_mailbox to fail on all platforms.
1 parent ada8c3b commit e2b0705

2 files changed

Lines changed: 69 additions & 115 deletions

File tree

Lib/io.py

Lines changed: 64 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -1180,14 +1180,14 @@ def __init__(self, buffer, encoding=None, errors=None, newline=None,
11801180
self._encoder = None
11811181
self._decoder = None
11821182
self._decoded_text = "" # buffer for text produced by decoder
1183-
self._decoded_text_offset = 0 # offset to text returned by read()
11841183
self._snapshot = None # info for reconstructing decoder state
11851184
self._seekable = self._telling = self.buffer.seekable()
11861185

11871186
# A word about _snapshot. This attribute is either None, or a tuple
1188-
# (decoder_state, next_input) where decoder_state is the second
1189-
# (integer) item of the decoder state, and next_input is the chunk
1190-
# of bytes that comes after the snapshot point in the input.
1187+
# (decoder_state, input_chunk, decoded_chars) where decoder_state is
1188+
# the second (integer) item of the decoder state, input_chunk is the
1189+
# chunk of bytes that was read, and decoded_chars is the number of
1190+
# characters rendered by the decoder after feeding it those bytes.
11911191
# We use this to reconstruct intermediate decoder states in tell().
11921192

11931193
# Naming convention:
@@ -1271,10 +1271,10 @@ def _read_chunk(self):
12711271
"""
12721272
Read and decode the next chunk of data from the BufferedReader.
12731273
1274-
The return value is True unless EOF was reached. The decoded string
1275-
is placed in self._decoded_text (replacing its previous value).
1276-
(The entire input chunk is sent to the decoder, though some of it
1277-
may remain buffered in the decoder, yet to be converted.)
1274+
Return a tuple of two elements: all the bytes that were read, and
1275+
the decoded string produced by the decoder. (The entire input
1276+
chunk is sent to the decoder, but some of it may remain buffered
1277+
in the decoder, yet to be converted.)
12781278
"""
12791279

12801280
if self._decoder is None:
@@ -1283,9 +1283,8 @@ def _read_chunk(self):
12831283
# No one should call tell(), so don't bother taking a snapshot.
12841284
input_chunk = self.buffer.read1(self._CHUNK_SIZE)
12851285
eof = not input_chunk
1286-
self._decoded_text = self._decoder.decode(input_chunk, eof)
1287-
self._decoded_text_offset = 0
1288-
return not eof
1286+
decoded = self._decoder.decode(input_chunk, eof)
1287+
return (input_chunk, decoded)
12891288

12901289
# The cookie returned by tell() cannot include the contents of
12911290
# the decoder's buffer, so we need to snapshot a point in the
@@ -1299,15 +1298,16 @@ def _read_chunk(self):
12991298

13001299
input_chunk = self.buffer.read1(self._CHUNK_SIZE)
13011300
eof = not input_chunk
1302-
self._decoded_text = self._decoder.decode(input_chunk, eof)
1303-
self._decoded_text_offset = 0
1301+
decoded = self._decoder.decode(input_chunk, eof)
13041302

1305-
# At the snapshot point, len(dec_buffer) bytes ago, the next input
1306-
# to be passed to the decoder is dec_buffer + input_chunk.
1307-
self._snapshot = (dec_flags, dec_buffer + input_chunk)
1308-
return not eof
1303+
# At the snapshot point len(dec_buffer) bytes ago, the next input
1304+
# to be passed to the decoder is dec_buffer + input_chunk. Save
1305+
# len(decoded) so that later, tell() can figure out how much
1306+
# decoded data has been used up by TextIOWrapper.read().
1307+
self._snapshot = (dec_flags, dec_buffer + input_chunk, len(decoded))
1308+
return (input_chunk, decoded)
13091309

1310-
def _pack_cookie(self, position, dec_flags=0,
1310+
def _encode_tell_cookie(self, position, dec_flags=0,
13111311
feed_bytes=0, need_eof=0, skip_chars=0):
13121312
# The meaning of a tell() cookie is: seek to position, set the
13131313
# decoder flags to dec_flags, read feed_bytes bytes, feed them
@@ -1317,7 +1317,7 @@ def _pack_cookie(self, position, dec_flags=0,
13171317
return (position | (dec_flags<<64) | (feed_bytes<<128) |
13181318
(skip_chars<<192) | bool(need_eof)<<256)
13191319

1320-
def _unpack_cookie(self, bigint):
1320+
def _decode_tell_cookie(self, bigint):
13211321
rest, position = divmod(bigint, 1<<64)
13221322
rest, dec_flags = divmod(rest, 1<<64)
13231323
rest, feed_bytes = divmod(rest, 1<<64)
@@ -1339,14 +1339,14 @@ def tell(self):
13391339
return position
13401340

13411341
# Skip backward to the snapshot point (see _read_chunk).
1342-
dec_flags, next_input = self._snapshot
1342+
dec_flags, next_input, decoded_chars = self._snapshot
13431343
position -= len(next_input)
13441344

1345-
# How many decoded characters have been returned since the snapshot?
1346-
skip_chars = self._decoded_text_offset
1345+
# How many decoded characters have been consumed since the snapshot?
1346+
skip_chars = decoded_chars - len(self._decoded_text)
13471347
if skip_chars == 0:
13481348
# We haven't moved from the snapshot point.
1349-
return self._pack_cookie(position, dec_flags)
1349+
return self._encode_tell_cookie(position, dec_flags)
13501350

13511351
# Walk the decoder forward, one byte at a time, to find the minimum
13521352
# input necessary to give us the decoded characters we need to skip.
@@ -1373,8 +1373,8 @@ def tell(self):
13731373
if decoded_chars >= skip_chars:
13741374
break
13751375
else:
1376-
# We didn't get enough decoded data; signal EOF to get more.
1377-
decoded = decoder.decode(b"", final=True)
1376+
# We didn't get enough decoded data; send EOF to get more.
1377+
decoded = decoder.decode(b"", True)
13781378
decoded_chars += len(decoded)
13791379
need_eof = 1
13801380
if decoded_chars < skip_chars:
@@ -1385,7 +1385,7 @@ def tell(self):
13851385
position += safe_fed_bytes
13861386
fed_bytes -= safe_fed_bytes
13871387
skip_chars -= safe_decoded_chars
1388-
return self._pack_cookie(
1388+
return self._encode_tell_cookie(
13891389
position, dec_flags, fed_bytes, need_eof, skip_chars)
13901390
finally:
13911391
decoder.setstate(saved_state)
@@ -1405,7 +1405,8 @@ def seek(self, cookie, whence=0):
14051405
raise IOError("can't do nonzero end-relative seeks")
14061406
self.flush()
14071407
position = self.buffer.seek(0, 2)
1408-
self._clear_decoded_text()
1408+
self._decoded_text = ""
1409+
self._snapshot = None
14091410
if self._decoder:
14101411
self._decoder.reset()
14111412
return position
@@ -1418,70 +1419,48 @@ def seek(self, cookie, whence=0):
14181419

14191420
# Seek back to the snapshot point.
14201421
position, dec_flags, feed_bytes, need_eof, skip_chars = \
1421-
self._unpack_cookie(cookie)
1422+
self._decode_tell_cookie(cookie)
14221423
self.buffer.seek(position)
1423-
self._clear_decoded_text()
1424+
self._decoded_text = ""
1425+
self._snapshot = None
14241426

14251427
if self._decoder or dec_flags or feed_bytes or need_eof:
14261428
# Restore the decoder flags to their values from the snapshot.
14271429
self._decoder = self._decoder or self._get_decoder()
14281430
self._decoder.setstate((b"", dec_flags))
1429-
self._snapshot = (dec_flags, b'')
14301431

14311432
if feed_bytes or need_eof:
14321433
# Feed feed_bytes bytes to the decoder.
14331434
input_chunk = self.buffer.read(feed_bytes)
1434-
self._decoded_text = self._decoder.decode(input_chunk, need_eof)
1435-
if len(self._decoded_text) < skip_chars:
1435+
decoded = self._decoder.decode(input_chunk, need_eof)
1436+
if len(decoded) < skip_chars:
14361437
raise IOError("can't restore logical file position")
14371438

14381439
# Skip skip_chars of the decoded characters.
1439-
self._decoded_text_offset = skip_chars
1440+
self._decoded_text = decoded[skip_chars:]
14401441

14411442
# Restore the snapshot.
1442-
self._snapshot = (dec_flags, input_chunk)
1443+
self._snapshot = (dec_flags, input_chunk, len(decoded))
14431444
return cookie
14441445

1445-
def _clear_decoded_text(self):
1446-
"""Reset the _decoded_text buffer."""
1447-
self._decoded_text = ''
1448-
self._decoded_text_offset = 0
1449-
self._snapshot = None
1450-
1451-
def _emit_decoded_text(self, n=None):
1452-
"""Advance into the _decoded_text buffer."""
1453-
offset = self._decoded_text_offset
1454-
if n is None:
1455-
text = self._decoded_text[offset:]
1456-
else:
1457-
text = self._decoded_text[offset:offset + n]
1458-
self._decoded_text_offset += len(text)
1459-
return text
1460-
1461-
def _unemit_decoded_text(self, n):
1462-
"""Rewind the _decoded_text buffer."""
1463-
if self._decoded_text_offset < n:
1464-
raise AssertionError("unemit out of bounds")
1465-
self._decoded_text_offset -= n
1466-
14671446
def read(self, n=None):
14681447
if n is None:
14691448
n = -1
14701449
decoder = self._decoder or self._get_decoder()
1450+
result = self._decoded_text
14711451
if n < 0:
1472-
# Read everything.
1473-
result = (self._emit_decoded_text() +
1474-
decoder.decode(self.buffer.read(), final=True))
1475-
self._clear_decoded_text()
1452+
result += decoder.decode(self.buffer.read(), True)
1453+
self._decoded_text = ""
1454+
self._snapshot = None
14761455
return result
14771456
else:
1478-
# Keep reading chunks until we have n characters to return.
1479-
eof = False
1480-
result = self._emit_decoded_text(n)
1481-
while len(result) < n and not eof:
1482-
eof = not self._read_chunk()
1483-
result += self._emit_decoded_text(n - len(result))
1484-
return result
1457+
while len(result) < n:
1458+
input_chunk, decoded = self._read_chunk()
1459+
result += decoded
1460+
if not input_chunk:
1461+
break
1462+
self._decoded_text = result[n:]
1463+
return result[:n]
14851464

14861465
def __next__(self):
14871466
self._telling = False
@@ -1495,20 +1474,21 @@ def __next__(self):
14951474
def readline(self, limit=None):
14961475
if limit is None:
14971476
limit = -1
1477+
if limit >= 0:
1478+
# XXX Hack to support limit argument, for backwards compatibility
1479+
line = self.readline()
1480+
if len(line) <= limit:
1481+
return line
1482+
line, self._decoded_text = \
1483+
line[:limit], line[limit:] + self._decoded_text
1484+
return line
14981485

1499-
# Grab all the decoded text (we will rewind any extra bits later).
1500-
line = self._emit_decoded_text()
1501-
1486+
line = self._decoded_text
15021487
start = 0
15031488
decoder = self._decoder or self._get_decoder()
15041489

15051490
pos = endpos = None
15061491
while True:
1507-
if limit >= 0 and len(line) >= limit:
1508-
# Length limit has been reached.
1509-
endpos = limit
1510-
break
1511-
15121492
if self._readtranslate:
15131493
# Newlines are already translated, only search for \n
15141494
pos = line.find('\n', start)
@@ -1558,18 +1538,20 @@ def readline(self, limit=None):
15581538

15591539
# No line ending seen yet - get more data
15601540
more_line = ''
1561-
while self._read_chunk():
1562-
if self._decoded_text:
1541+
while True:
1542+
readahead, pending = self._read_chunk()
1543+
more_line = pending
1544+
if more_line or not readahead:
15631545
break
1564-
if self._decoded_text:
1565-
line += self._emit_decoded_text()
1546+
if more_line:
1547+
line += more_line
15661548
else:
15671549
# end of file
1568-
self._clear_decoded_text()
1550+
self._decoded_text = ''
1551+
self._snapshot = None
15691552
return line
15701553

1571-
# Rewind _decoded_text to just after the line ending we found.
1572-
self._unemit_decoded_text(len(line) - endpos)
1554+
self._decoded_text = line[endpos:]
15731555
return line[:endpos]
15741556

15751557
@property

Lib/test/test_io.py

Lines changed: 5 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -590,9 +590,7 @@ class StatefulIncrementalDecoderTest(unittest.TestCase):
590590
# I=0, O=3
591591
(b'i.o3.x.xyz.toolong.', False, 'x--.xyz.too.'),
592592
# I=6, O=3
593-
(b'i.o3.i6.abcdefghijklmnop', True, 'abc.ghi.mno.'),
594-
# I=5, O=8 with newlines
595-
(b'i.o8.i5.abc\ndef\nghy\nz', True, 'abc\nd---.ef\ngh---.y\nz-----.')
593+
(b'i.o3.i6.abcdefghijklmnop', True, 'abc.ghi.mno.')
596594
]
597595

598596
def testDecoder(self):
@@ -892,8 +890,8 @@ def lookupTestDecoder(name):
892890
return codecs.CodecInfo(
893891
name='test_decoder', encode=None, decode=None,
894892
incrementalencoder=None,
895-
incrementaldecoder=StatefulIncrementalDecoder,
896-
streamreader=None, streamwriter=None)
893+
streamreader=None, streamwriter=None,
894+
incrementaldecoder=StatefulIncrementalDecoder)
897895

898896
def testSeekAndTellWithData(data, min_pos=0):
899897
"""Tell/seek to various points within a data stream and ensure
@@ -905,42 +903,16 @@ def testSeekAndTellWithData(data, min_pos=0):
905903
decoded = f.read()
906904
f.close()
907905

908-
# Use read() to move to various positions in the input;
909-
# then tell, read some more data, and seek back.
910-
for i in range(min_pos, len(decoded) + 1): # to read before tell
911-
for j in [1, 5, len(decoded)]: # to read after tell
906+
for i in range(min_pos, len(decoded) + 1): # seek positions
907+
for j in [1, 5, len(decoded) - i]: # read lengths
912908
f = io.open(test_support.TESTFN, encoding='test_decoder')
913909
self.assertEquals(f.read(i), decoded[:i])
914910
cookie = f.tell()
915911
self.assertEquals(f.read(j), decoded[i:i + j])
916912
f.seek(cookie)
917-
self.assertEquals(f.tell(), cookie)
918913
self.assertEquals(f.read(), decoded[i:])
919914
f.close()
920915

921-
lines = len(decoded.split('\n'))
922-
923-
# Use readline() to move to various positions in the input;
924-
# then tell, read some more data, and seek back.
925-
for limit in [-1, 4, 128]: # 'limit' argument for readline()
926-
for j in [1, 5, len(decoded)]: # to read after tell()
927-
f = io.open(test_support.TESTFN, encoding='test_decoder')
928-
text = ''
929-
for k in range(lines): # repeatedly call readline()
930-
line = f.readline(limit=limit)
931-
if limit >= 0:
932-
self.assert_(len(line) <= limit)
933-
text += line
934-
i = len(text)
935-
self.assertEquals(text, decoded[:i])
936-
cookie = f.tell()
937-
self.assertEquals(f.read(j), decoded[i:i + j])
938-
f.seek(cookie)
939-
self.assertEquals(f.tell(), cookie)
940-
self.assertEquals(f.read(), decoded[i:])
941-
f.seek(cookie)
942-
f.close()
943-
944916
# Register a special incremental decoder for testing.
945917
codecs.register(lookupTestDecoder)
946918
self.codecEnabled = 1

0 commit comments

Comments
 (0)