Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 507434f

Browse files
authored
bpo-15216: io: TextIOWrapper.reconfigure() accepts encoding, errors and newline (GH-2343)
1 parent 31e9908 commit 507434f

6 files changed

Lines changed: 517 additions & 175 deletions

File tree

Doc/library/io.rst

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -904,7 +904,7 @@ Text I/O
904904
locale encoding using :func:`locale.setlocale`, use the current locale
905905
encoding instead of the user preferred encoding.
906906

907-
:class:`TextIOWrapper` provides one attribute in addition to those of
907+
:class:`TextIOWrapper` provides these members in addition to those of
908908
:class:`TextIOBase` and its parents:
909909

910910
.. attribute:: line_buffering
@@ -918,11 +918,19 @@ Text I/O
918918

919919
.. versionadded:: 3.7
920920

921-
.. method:: reconfigure(*, line_buffering=None, write_through=None)
921+
.. method:: reconfigure(*[, encoding][, errors][, newline][, \
922+
line_buffering][, write_through])
922923

923-
Reconfigure this text stream using new settings for *line_buffering*
924-
and *write_through*. Passing ``None`` as an argument will retain
925-
the current setting for that parameter.
924+
Reconfigure this text stream using new settings for *encoding*,
925+
*errors*, *newline*, *line_buffering* and *write_through*.
926+
927+
Parameters not specified keep current settings, except
928+
``errors='strict`` is used when *encoding* is specified but
929+
*errors* is not specified.
930+
931+
It is not possible to change the encoding or newline if some data
932+
has already been read from the stream. On the other hand, changing
933+
encoding after write is possible.
926934

927935
This method does an implicit stream flush before setting the
928936
new parameters.

Lib/_pyio.py

Lines changed: 56 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1938,10 +1938,7 @@ class TextIOWrapper(TextIOBase):
19381938
# so that the signature can match the signature of the C version.
19391939
def __init__(self, buffer, encoding=None, errors=None, newline=None,
19401940
line_buffering=False, write_through=False):
1941-
if newline is not None and not isinstance(newline, str):
1942-
raise TypeError("illegal newline type: %r" % (type(newline),))
1943-
if newline not in (None, "", "\n", "\r", "\r\n"):
1944-
raise ValueError("illegal newline value: %r" % (newline,))
1941+
self._check_newline(newline)
19451942
if encoding is None:
19461943
try:
19471944
encoding = os.device_encoding(buffer.fileno())
@@ -1971,22 +1968,38 @@ def __init__(self, buffer, encoding=None, errors=None, newline=None,
19711968
raise ValueError("invalid errors: %r" % errors)
19721969

19731970
self._buffer = buffer
1971+
self._decoded_chars = '' # buffer for text returned from decoder
1972+
self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1973+
self._snapshot = None # info for reconstructing decoder state
1974+
self._seekable = self._telling = self.buffer.seekable()
1975+
self._has_read1 = hasattr(self.buffer, 'read1')
1976+
self._configure(encoding, errors, newline,
1977+
line_buffering, write_through)
1978+
1979+
def _check_newline(self, newline):
1980+
if newline is not None and not isinstance(newline, str):
1981+
raise TypeError("illegal newline type: %r" % (type(newline),))
1982+
if newline not in (None, "", "\n", "\r", "\r\n"):
1983+
raise ValueError("illegal newline value: %r" % (newline,))
1984+
1985+
def _configure(self, encoding=None, errors=None, newline=None,
1986+
line_buffering=False, write_through=False):
19741987
self._encoding = encoding
19751988
self._errors = errors
1989+
self._encoder = None
1990+
self._decoder = None
1991+
self._b2cratio = 0.0
1992+
19761993
self._readuniversal = not newline
19771994
self._readtranslate = newline is None
19781995
self._readnl = newline
19791996
self._writetranslate = newline != ''
19801997
self._writenl = newline or os.linesep
1981-
self._encoder = None
1982-
self._decoder = None
1983-
self._decoded_chars = '' # buffer for text returned from decoder
1984-
self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1985-
self._snapshot = None # info for reconstructing decoder state
1986-
self._seekable = self._telling = self.buffer.seekable()
1987-
self._has_read1 = hasattr(self.buffer, 'read1')
1988-
self._b2cratio = 0.0
19891998

1999+
self._line_buffering = line_buffering
2000+
self._write_through = write_through
2001+
2002+
# don't write a BOM in the middle of a file
19902003
if self._seekable and self.writable():
19912004
position = self.buffer.tell()
19922005
if position != 0:
@@ -1996,12 +2009,6 @@ def __init__(self, buffer, encoding=None, errors=None, newline=None,
19962009
# Sometimes the encoder doesn't exist
19972010
pass
19982011

1999-
self._configure(line_buffering, write_through)
2000-
2001-
def _configure(self, line_buffering=False, write_through=False):
2002-
self._line_buffering = line_buffering
2003-
self._write_through = write_through
2004-
20052012
# self._snapshot is either None, or a tuple (dec_flags, next_input)
20062013
# where dec_flags is the second (integer) item of the decoder state
20072014
# and next_input is the chunk of input bytes that comes next after the
@@ -2048,17 +2055,46 @@ def write_through(self):
20482055
def buffer(self):
20492056
return self._buffer
20502057

2051-
def reconfigure(self, *, line_buffering=None, write_through=None):
2058+
def reconfigure(self, *,
2059+
encoding=None, errors=None, newline=Ellipsis,
2060+
line_buffering=None, write_through=None):
20522061
"""Reconfigure the text stream with new parameters.
20532062
20542063
This also flushes the stream.
20552064
"""
2065+
if (self._decoder is not None
2066+
and (encoding is not None or errors is not None
2067+
or newline is not Ellipsis)):
2068+
raise UnsupportedOperation(
2069+
"It is not possible to set the encoding or newline of stream "
2070+
"after the first read")
2071+
2072+
if errors is None:
2073+
if encoding is None:
2074+
errors = self._errors
2075+
else:
2076+
errors = 'strict'
2077+
elif not isinstance(errors, str):
2078+
raise TypeError("invalid errors: %r" % errors)
2079+
2080+
if encoding is None:
2081+
encoding = self._encoding
2082+
else:
2083+
if not isinstance(encoding, str):
2084+
raise TypeError("invalid encoding: %r" % encoding)
2085+
2086+
if newline is Ellipsis:
2087+
newline = self._readnl
2088+
self._check_newline(newline)
2089+
20562090
if line_buffering is None:
20572091
line_buffering = self.line_buffering
20582092
if write_through is None:
20592093
write_through = self.write_through
2094+
20602095
self.flush()
2061-
self._configure(line_buffering, write_through)
2096+
self._configure(encoding, errors, newline,
2097+
line_buffering, write_through)
20622098

20632099
def seekable(self):
20642100
if self.closed:

Lib/test/test_io.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3408,6 +3408,123 @@ def seekable(self): return True
34083408
F.tell = lambda x: 0
34093409
t = self.TextIOWrapper(F(), encoding='utf-8')
34103410

3411+
def test_reconfigure_encoding_read(self):
3412+
# latin1 -> utf8
3413+
# (latin1 can decode utf-8 encoded string)
3414+
data = 'abc\xe9\n'.encode('latin1') + 'd\xe9f\n'.encode('utf8')
3415+
raw = self.BytesIO(data)
3416+
txt = self.TextIOWrapper(raw, encoding='latin1', newline='\n')
3417+
self.assertEqual(txt.readline(), 'abc\xe9\n')
3418+
with self.assertRaises(self.UnsupportedOperation):
3419+
txt.reconfigure(encoding='utf-8')
3420+
with self.assertRaises(self.UnsupportedOperation):
3421+
txt.reconfigure(newline=None)
3422+
3423+
def test_reconfigure_write_fromascii(self):
3424+
# ascii has a specific encodefunc in the C implementation,
3425+
# but utf-8-sig has not. Make sure that we get rid of the
3426+
# cached encodefunc when we switch encoders.
3427+
raw = self.BytesIO()
3428+
txt = self.TextIOWrapper(raw, encoding='ascii', newline='\n')
3429+
txt.write('foo\n')
3430+
txt.reconfigure(encoding='utf-8-sig')
3431+
txt.write('\xe9\n')
3432+
txt.flush()
3433+
self.assertEqual(raw.getvalue(), b'foo\n\xc3\xa9\n')
3434+
3435+
def test_reconfigure_write(self):
3436+
# latin -> utf8
3437+
raw = self.BytesIO()
3438+
txt = self.TextIOWrapper(raw, encoding='latin1', newline='\n')
3439+
txt.write('abc\xe9\n')
3440+
txt.reconfigure(encoding='utf-8')
3441+
self.assertEqual(raw.getvalue(), b'abc\xe9\n')
3442+
txt.write('d\xe9f\n')
3443+
txt.flush()
3444+
self.assertEqual(raw.getvalue(), b'abc\xe9\nd\xc3\xa9f\n')
3445+
3446+
# ascii -> utf-8-sig: ensure that no BOM is written in the middle of
3447+
# the file
3448+
raw = self.BytesIO()
3449+
txt = self.TextIOWrapper(raw, encoding='ascii', newline='\n')
3450+
txt.write('abc\n')
3451+
txt.reconfigure(encoding='utf-8-sig')
3452+
txt.write('d\xe9f\n')
3453+
txt.flush()
3454+
self.assertEqual(raw.getvalue(), b'abc\nd\xc3\xa9f\n')
3455+
3456+
def test_reconfigure_write_non_seekable(self):
3457+
raw = self.BytesIO()
3458+
raw.seekable = lambda: False
3459+
raw.seek = None
3460+
txt = self.TextIOWrapper(raw, encoding='ascii', newline='\n')
3461+
txt.write('abc\n')
3462+
txt.reconfigure(encoding='utf-8-sig')
3463+
txt.write('d\xe9f\n')
3464+
txt.flush()
3465+
3466+
# If the raw stream is not seekable, there'll be a BOM
3467+
self.assertEqual(raw.getvalue(), b'abc\n\xef\xbb\xbfd\xc3\xa9f\n')
3468+
3469+
def test_reconfigure_defaults(self):
3470+
txt = self.TextIOWrapper(self.BytesIO(), 'ascii', 'replace', '\n')
3471+
txt.reconfigure(encoding=None)
3472+
self.assertEqual(txt.encoding, 'ascii')
3473+
self.assertEqual(txt.errors, 'replace')
3474+
txt.write('LF\n')
3475+
3476+
txt.reconfigure(newline='\r\n')
3477+
self.assertEqual(txt.encoding, 'ascii')
3478+
self.assertEqual(txt.errors, 'replace')
3479+
3480+
txt.reconfigure(errors='ignore')
3481+
self.assertEqual(txt.encoding, 'ascii')
3482+
self.assertEqual(txt.errors, 'ignore')
3483+
txt.write('CRLF\n')
3484+
3485+
txt.reconfigure(encoding='utf-8', newline=None)
3486+
self.assertEqual(txt.errors, 'strict')
3487+
txt.seek(0)
3488+
self.assertEqual(txt.read(), 'LF\nCRLF\n')
3489+
3490+
self.assertEqual(txt.detach().getvalue(), b'LF\nCRLF\r\n')
3491+
3492+
def test_reconfigure_newline(self):
3493+
raw = self.BytesIO(b'CR\rEOF')
3494+
txt = self.TextIOWrapper(raw, 'ascii', newline='\n')
3495+
txt.reconfigure(newline=None)
3496+
self.assertEqual(txt.readline(), 'CR\n')
3497+
raw = self.BytesIO(b'CR\rEOF')
3498+
txt = self.TextIOWrapper(raw, 'ascii', newline='\n')
3499+
txt.reconfigure(newline='')
3500+
self.assertEqual(txt.readline(), 'CR\r')
3501+
raw = self.BytesIO(b'CR\rLF\nEOF')
3502+
txt = self.TextIOWrapper(raw, 'ascii', newline='\r')
3503+
txt.reconfigure(newline='\n')
3504+
self.assertEqual(txt.readline(), 'CR\rLF\n')
3505+
raw = self.BytesIO(b'LF\nCR\rEOF')
3506+
txt = self.TextIOWrapper(raw, 'ascii', newline='\n')
3507+
txt.reconfigure(newline='\r')
3508+
self.assertEqual(txt.readline(), 'LF\nCR\r')
3509+
raw = self.BytesIO(b'CR\rCRLF\r\nEOF')
3510+
txt = self.TextIOWrapper(raw, 'ascii', newline='\r')
3511+
txt.reconfigure(newline='\r\n')
3512+
self.assertEqual(txt.readline(), 'CR\rCRLF\r\n')
3513+
3514+
txt = self.TextIOWrapper(self.BytesIO(), 'ascii', newline='\r')
3515+
txt.reconfigure(newline=None)
3516+
txt.write('linesep\n')
3517+
txt.reconfigure(newline='')
3518+
txt.write('LF\n')
3519+
txt.reconfigure(newline='\n')
3520+
txt.write('LF\n')
3521+
txt.reconfigure(newline='\r')
3522+
txt.write('CR\n')
3523+
txt.reconfigure(newline='\r\n')
3524+
txt.write('CRLF\n')
3525+
expected = 'linesep' + os.linesep + 'LF\nLF\nCR\rCRLF\r\n'
3526+
self.assertEqual(txt.detach().getvalue().decode('ascii'), expected)
3527+
34113528

34123529
class MemviewBytesIO(io.BytesIO):
34133530
'''A BytesIO object whose read method returns memoryviews
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
``TextIOWrapper.reconfigure()`` supports changing *encoding*, *errors*, and
2+
*newline*.

Modules/_io/clinic/textio.c.h

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ PyDoc_STRVAR(_io_TextIOWrapper___init____doc__,
149149

150150
static int
151151
_io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
152-
const char *encoding, const char *errors,
152+
const char *encoding, PyObject *errors,
153153
const char *newline, int line_buffering,
154154
int write_through);
155155

@@ -158,10 +158,10 @@ _io_TextIOWrapper___init__(PyObject *self, PyObject *args, PyObject *kwargs)
158158
{
159159
int return_value = -1;
160160
static const char * const _keywords[] = {"buffer", "encoding", "errors", "newline", "line_buffering", "write_through", NULL};
161-
static _PyArg_Parser _parser = {"O|zzzii:TextIOWrapper", _keywords, 0};
161+
static _PyArg_Parser _parser = {"O|zOzii:TextIOWrapper", _keywords, 0};
162162
PyObject *buffer;
163163
const char *encoding = NULL;
164-
const char *errors = NULL;
164+
PyObject *errors = Py_None;
165165
const char *newline = NULL;
166166
int line_buffering = 0;
167167
int write_through = 0;
@@ -177,7 +177,8 @@ _io_TextIOWrapper___init__(PyObject *self, PyObject *args, PyObject *kwargs)
177177
}
178178

179179
PyDoc_STRVAR(_io_TextIOWrapper_reconfigure__doc__,
180-
"reconfigure($self, /, *, line_buffering=None, write_through=None)\n"
180+
"reconfigure($self, /, *, encoding=None, errors=None, newline=None,\n"
181+
" line_buffering=None, write_through=None)\n"
181182
"--\n"
182183
"\n"
183184
"Reconfigure the text stream with new parameters.\n"
@@ -188,24 +189,28 @@ PyDoc_STRVAR(_io_TextIOWrapper_reconfigure__doc__,
188189
{"reconfigure", (PyCFunction)_io_TextIOWrapper_reconfigure, METH_FASTCALL|METH_KEYWORDS, _io_TextIOWrapper_reconfigure__doc__},
189190

190191
static PyObject *
191-
_io_TextIOWrapper_reconfigure_impl(textio *self,
192+
_io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
193+
PyObject *errors, PyObject *newline_obj,
192194
PyObject *line_buffering_obj,
193195
PyObject *write_through_obj);
194196

195197
static PyObject *
196198
_io_TextIOWrapper_reconfigure(textio *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
197199
{
198200
PyObject *return_value = NULL;
199-
static const char * const _keywords[] = {"line_buffering", "write_through", NULL};
200-
static _PyArg_Parser _parser = {"|$OO:reconfigure", _keywords, 0};
201+
static const char * const _keywords[] = {"encoding", "errors", "newline", "line_buffering", "write_through", NULL};
202+
static _PyArg_Parser _parser = {"|$OOOOO:reconfigure", _keywords, 0};
203+
PyObject *encoding = Py_None;
204+
PyObject *errors = Py_None;
205+
PyObject *newline_obj = NULL;
201206
PyObject *line_buffering_obj = Py_None;
202207
PyObject *write_through_obj = Py_None;
203208

204209
if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser,
205-
&line_buffering_obj, &write_through_obj)) {
210+
&encoding, &errors, &newline_obj, &line_buffering_obj, &write_through_obj)) {
206211
goto exit;
207212
}
208-
return_value = _io_TextIOWrapper_reconfigure_impl(self, line_buffering_obj, write_through_obj);
213+
return_value = _io_TextIOWrapper_reconfigure_impl(self, encoding, errors, newline_obj, line_buffering_obj, write_through_obj);
209214

210215
exit:
211216
return return_value;
@@ -499,4 +504,4 @@ _io_TextIOWrapper_close(textio *self, PyObject *Py_UNUSED(ignored))
499504
{
500505
return _io_TextIOWrapper_close_impl(self);
501506
}
502-
/*[clinic end generated code: output=679b3ac5284df4e0 input=a9049054013a1b77]*/
507+
/*[clinic end generated code: output=b5be870b0039d577 input=a9049054013a1b77]*/

0 commit comments

Comments
 (0)