Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 56452ee

Browse files
committed
Issue #22982: Improve BOM handling when seeking to multiple positions of a writable text file.
2 parents 682d055 + 85e3ee7 commit 56452ee

4 files changed

Lines changed: 52 additions & 15 deletions

File tree

Lib/_pyio.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2275,6 +2275,19 @@ def detach(self):
22752275
return buffer
22762276

22772277
def seek(self, cookie, whence=0):
2278+
def _reset_encoder(position):
2279+
"""Reset the encoder (merely useful for proper BOM handling)"""
2280+
try:
2281+
encoder = self._encoder or self._get_encoder()
2282+
except LookupError:
2283+
# Sometimes the encoder doesn't exist
2284+
pass
2285+
else:
2286+
if position != 0:
2287+
encoder.setstate(0)
2288+
else:
2289+
encoder.reset()
2290+
22782291
if self.closed:
22792292
raise ValueError("tell on closed file")
22802293
if not self._seekable:
@@ -2295,6 +2308,7 @@ def seek(self, cookie, whence=0):
22952308
self._snapshot = None
22962309
if self._decoder:
22972310
self._decoder.reset()
2311+
_reset_encoder(position)
22982312
return position
22992313
if whence != 0:
23002314
raise ValueError("unsupported whence (%r)" % (whence,))
@@ -2332,17 +2346,7 @@ def seek(self, cookie, whence=0):
23322346
raise OSError("can't restore logical file position")
23332347
self._decoded_chars_used = chars_to_skip
23342348

2335-
# Finally, reset the encoder (merely useful for proper BOM handling)
2336-
try:
2337-
encoder = self._encoder or self._get_encoder()
2338-
except LookupError:
2339-
# Sometimes the encoder doesn't exist
2340-
pass
2341-
else:
2342-
if cookie != 0:
2343-
encoder.setstate(0)
2344-
else:
2345-
encoder.reset()
2349+
_reset_encoder(cookie)
23462350
return cookie
23472351

23482352
def read(self, size=None):

Lib/test/test_io.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2730,6 +2730,19 @@ def test_seek_bom(self):
27302730
with self.open(filename, 'rb') as f:
27312731
self.assertEqual(f.read(), 'bbbzzz'.encode(charset))
27322732

2733+
def test_seek_append_bom(self):
2734+
# Same test, but first seek to the start and then to the end
2735+
filename = support.TESTFN
2736+
for charset in ('utf-8-sig', 'utf-16', 'utf-32'):
2737+
with self.open(filename, 'w', encoding=charset) as f:
2738+
f.write('aaa')
2739+
with self.open(filename, 'a', encoding=charset) as f:
2740+
f.seek(0)
2741+
f.seek(0, self.SEEK_END)
2742+
f.write('xxx')
2743+
with self.open(filename, 'rb') as f:
2744+
self.assertEqual(f.read(), 'aaaxxx'.encode(charset))
2745+
27332746
def test_errors_property(self):
27342747
with self.open(support.TESTFN, "w") as f:
27352748
self.assertEqual(f.errors, "strict")

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ Core and Builtins
2424
Library
2525
-------
2626

27+
- Issue #22982: Improve BOM handling when seeking to multiple positions of
28+
a writable text file.
29+
2730
- Issue #23464: Removed deprecated asyncio JoinableQueue.
2831

2932
- Issue #23529: Limit the size of decompressed data when reading from

Modules/_io/textio.c

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2048,11 +2048,10 @@ _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
20482048
}
20492049

20502050
static int
2051-
_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2051+
_textiowrapper_encoder_reset(textio *self, int start_of_stream)
20522052
{
20532053
PyObject *res;
2054-
/* Same as _textiowrapper_decoder_setstate() above. */
2055-
if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
2054+
if (start_of_stream) {
20562055
res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
20572056
self->encoding_start_of_stream = 1;
20582057
}
@@ -2067,6 +2066,14 @@ _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
20672066
return 0;
20682067
}
20692068

2069+
static int
2070+
_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2071+
{
2072+
/* Same as _textiowrapper_decoder_setstate() above. */
2073+
return _textiowrapper_encoder_reset(
2074+
self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2075+
}
2076+
20702077
static PyObject *
20712078
textiowrapper_seek(textio *self, PyObject *args)
20722079
{
@@ -2134,7 +2141,17 @@ textiowrapper_seek(textio *self, PyObject *args)
21342141
}
21352142

21362143
res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
2137-
Py_XDECREF(cookieObj);
2144+
Py_CLEAR(cookieObj);
2145+
if (res == NULL)
2146+
goto fail;
2147+
if (self->encoder) {
2148+
/* If seek() == 0, we are at the start of stream, otherwise not */
2149+
cmp = PyObject_RichCompareBool(res, _PyIO_zero, Py_EQ);
2150+
if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2151+
Py_DECREF(res);
2152+
goto fail;
2153+
}
2154+
}
21382155
return res;
21392156
}
21402157
else if (whence != 0) {

0 commit comments

Comments
 (0)