Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 2fc8f77

Browse files
committed
Issue #20404: reject non-text encodings early in TextIOWrapper.
1 parent 2658bad commit 2fc8f77

5 files changed

Lines changed: 134 additions & 39 deletions

File tree

Include/codecs.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,14 @@ PyAPI_FUNC(PyObject *) PyCodec_Decode(
104104
Please note that these APIs are internal and should not
105105
be used in Python C extensions.
106106
107+
XXX (ncoghlan): should we make these, or something like them, public
108+
in Python 3.5+?
109+
107110
*/
111+
PyAPI_FUNC(PyObject *) _PyCodec_LookupTextEncoding(
112+
const char *encoding,
113+
const char *alternate_command
114+
);
108115

109116
PyAPI_FUNC(PyObject *) _PyCodec_EncodeText(
110117
PyObject *object,
@@ -117,6 +124,19 @@ PyAPI_FUNC(PyObject *) _PyCodec_DecodeText(
117124
const char *encoding,
118125
const char *errors
119126
);
127+
128+
/* These two aren't actually text encoding specific, but _io.TextIOWrapper
129+
* is the only current API consumer.
130+
*/
131+
PyAPI_FUNC(PyObject *) _PyCodecInfo_GetIncrementalDecoder(
132+
PyObject *codec_info,
133+
const char *errors
134+
);
135+
136+
PyAPI_FUNC(PyObject *) _PyCodecInfo_GetIncrementalEncoder(
137+
PyObject *codec_info,
138+
const char *errors
139+
);
120140
#endif
121141

122142

Lib/_pyio.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1495,6 +1495,11 @@ def __init__(self, buffer, encoding=None, errors=None, newline=None,
14951495
if not isinstance(encoding, str):
14961496
raise ValueError("invalid encoding: %r" % encoding)
14971497

1498+
if not codecs.lookup(encoding)._is_text_encoding:
1499+
msg = ("%r is not a text encoding; "
1500+
"use codecs.open() to handle arbitrary codecs")
1501+
raise LookupError(msg % encoding)
1502+
14981503
if errors is None:
14991504
errors = "strict"
15001505
else:

Lib/test/test_io.py

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1955,6 +1955,15 @@ def test_constructor(self):
19551955
self.assertRaises(TypeError, t.__init__, b, newline=42)
19561956
self.assertRaises(ValueError, t.__init__, b, newline='xyzzy')
19571957

1958+
def test_non_text_encoding_codecs_are_rejected(self):
1959+
# Ensure the constructor complains if passed a codec that isn't
1960+
# marked as a text encoding
1961+
# http://bugs.python.org/issue20404
1962+
r = self.BytesIO()
1963+
b = self.BufferedWriter(r)
1964+
with self.assertRaisesRegex(LookupError, "is not a text encoding"):
1965+
self.TextIOWrapper(b, encoding="hex_codec")
1966+
19581967
def test_detach(self):
19591968
r = self.BytesIO()
19601969
b = self.BufferedWriter(r)
@@ -2607,15 +2616,22 @@ def test_read_nonbytes(self):
26072616

26082617
def test_illegal_decoder(self):
26092618
# Issue #17106
2619+
# Bypass the early encoding check added in issue 20404
2620+
def _make_illegal_wrapper():
2621+
quopri = codecs.lookup("quopri_codec")
2622+
quopri._is_text_encoding = True
2623+
try:
2624+
t = self.TextIOWrapper(self.BytesIO(b'aaaaaa'),
2625+
newline='\n', encoding="quopri_codec")
2626+
finally:
2627+
quopri._is_text_encoding = False
2628+
return t
26102629
# Crash when decoder returns non-string
2611-
t = self.TextIOWrapper(self.BytesIO(b'aaaaaa'), newline='\n',
2612-
encoding='quopri_codec')
2630+
t = _make_illegal_wrapper()
26132631
self.assertRaises(TypeError, t.read, 1)
2614-
t = self.TextIOWrapper(self.BytesIO(b'aaaaaa'), newline='\n',
2615-
encoding='quopri_codec')
2632+
t = _make_illegal_wrapper()
26162633
self.assertRaises(TypeError, t.readline)
2617-
t = self.TextIOWrapper(self.BytesIO(b'aaaaaa'), newline='\n',
2618-
encoding='quopri_codec')
2634+
t = _make_illegal_wrapper()
26192635
self.assertRaises(TypeError, t.read)
26202636

26212637

@@ -3053,6 +3069,7 @@ def test_open_allargs(self):
30533069

30543070
class CMiscIOTest(MiscIOTest):
30553071
io = io
3072+
shutdown_error = "RuntimeError: could not find io module state"
30563073

30573074
def test_readinto_buffer_overflow(self):
30583075
# Issue #18025
@@ -3065,6 +3082,7 @@ def read(self, n=-1):
30653082

30663083
class PyMiscIOTest(MiscIOTest):
30673084
io = pyio
3085+
shutdown_error = "LookupError: unknown encoding: ascii"
30683086

30693087

30703088
@unittest.skipIf(os.name == 'nt', 'POSIX signals required for this test.')

Modules/_io/textio.c

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -836,7 +836,7 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
836836
char *kwlist[] = {"buffer", "encoding", "errors",
837837
"newline", "line_buffering", "write_through",
838838
NULL};
839-
PyObject *buffer, *raw;
839+
PyObject *buffer, *raw, *codec_info = NULL;
840840
char *encoding = NULL;
841841
char *errors = NULL;
842842
char *newline = NULL;
@@ -951,6 +951,17 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
951951
"could not determine default encoding");
952952
}
953953

954+
/* Check we have been asked for a real text encoding */
955+
codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
956+
if (codec_info == NULL) {
957+
Py_CLEAR(self->encoding);
958+
goto error;
959+
}
960+
961+
/* XXX: Failures beyond this point have the potential to leak elements
962+
* of the partially constructed object (like self->encoding)
963+
*/
964+
954965
if (errors == NULL)
955966
errors = "strict";
956967
self->errors = PyBytes_FromString(errors);
@@ -965,7 +976,7 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
965976
if (newline) {
966977
self->readnl = PyUnicode_FromString(newline);
967978
if (self->readnl == NULL)
968-
return -1;
979+
goto error;
969980
}
970981
self->writetranslate = (newline == NULL || newline[0] != '\0');
971982
if (!self->readuniversal && self->readnl) {
@@ -989,8 +1000,8 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
9891000
if (r == -1)
9901001
goto error;
9911002
if (r == 1) {
992-
self->decoder = PyCodec_IncrementalDecoder(
993-
encoding, errors);
1003+
self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info,
1004+
errors);
9941005
if (self->decoder == NULL)
9951006
goto error;
9961007

@@ -1014,17 +1025,12 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
10141025
if (r == -1)
10151026
goto error;
10161027
if (r == 1) {
1017-
PyObject *ci;
1018-
self->encoder = PyCodec_IncrementalEncoder(
1019-
encoding, errors);
1028+
self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info,
1029+
errors);
10201030
if (self->encoder == NULL)
10211031
goto error;
10221032
/* Get the normalized named of the codec */
1023-
ci = _PyCodec_Lookup(encoding);
1024-
if (ci == NULL)
1025-
goto error;
1026-
res = _PyObject_GetAttrId(ci, &PyId_name);
1027-
Py_DECREF(ci);
1033+
res = _PyObject_GetAttrId(codec_info, &PyId_name);
10281034
if (res == NULL) {
10291035
if (PyErr_ExceptionMatches(PyExc_AttributeError))
10301036
PyErr_Clear();
@@ -1044,6 +1050,9 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
10441050
Py_XDECREF(res);
10451051
}
10461052

1053+
/* Finished sorting out the codec details */
1054+
Py_DECREF(codec_info);
1055+
10471056
self->buffer = buffer;
10481057
Py_INCREF(buffer);
10491058

@@ -1106,6 +1115,7 @@ textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
11061115
return 0;
11071116

11081117
error:
1118+
Py_XDECREF(codec_info);
11091119
return -1;
11101120
}
11111121

Python/codecs.c

Lines changed: 63 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -243,20 +243,15 @@ PyObject *codec_getitem(const char *encoding, int index)
243243
return v;
244244
}
245245

246-
/* Helper function to create an incremental codec. */
247-
246+
/* Helper functions to create an incremental codec. */
248247
static
249-
PyObject *codec_getincrementalcodec(const char *encoding,
250-
const char *errors,
251-
const char *attrname)
248+
PyObject *codec_makeincrementalcodec(PyObject *codec_info,
249+
const char *errors,
250+
const char *attrname)
252251
{
253-
PyObject *codecs, *ret, *inccodec;
252+
PyObject *ret, *inccodec;
254253

255-
codecs = _PyCodec_Lookup(encoding);
256-
if (codecs == NULL)
257-
return NULL;
258-
inccodec = PyObject_GetAttrString(codecs, attrname);
259-
Py_DECREF(codecs);
254+
inccodec = PyObject_GetAttrString(codec_info, attrname);
260255
if (inccodec == NULL)
261256
return NULL;
262257
if (errors)
@@ -267,6 +262,21 @@ PyObject *codec_getincrementalcodec(const char *encoding,
267262
return ret;
268263
}
269264

265+
static
266+
PyObject *codec_getincrementalcodec(const char *encoding,
267+
const char *errors,
268+
const char *attrname)
269+
{
270+
PyObject *codec_info, *ret;
271+
272+
codec_info = _PyCodec_Lookup(encoding);
273+
if (codec_info == NULL)
274+
return NULL;
275+
ret = codec_makeincrementalcodec(codec_info, errors, attrname);
276+
Py_DECREF(codec_info);
277+
return ret;
278+
}
279+
270280
/* Helper function to create a stream codec. */
271281

272282
static
@@ -290,6 +300,24 @@ PyObject *codec_getstreamcodec(const char *encoding,
290300
return streamcodec;
291301
}
292302

303+
/* Helpers to work with the result of _PyCodec_Lookup
304+
305+
*/
306+
PyObject *_PyCodecInfo_GetIncrementalDecoder(PyObject *codec_info,
307+
const char *errors)
308+
{
309+
return codec_makeincrementalcodec(codec_info, errors,
310+
"incrementaldecoder");
311+
}
312+
313+
PyObject *_PyCodecInfo_GetIncrementalEncoder(PyObject *codec_info,
314+
const char *errors)
315+
{
316+
return codec_makeincrementalcodec(codec_info, errors,
317+
"incrementalencoder");
318+
}
319+
320+
293321
/* Convenience APIs to query the Codec registry.
294322
295323
All APIs return a codec object with incremented refcount.
@@ -447,15 +475,12 @@ PyObject *PyCodec_Decode(PyObject *object,
447475
}
448476

449477
/* Text encoding/decoding API */
450-
static
451-
PyObject *codec_getitem_checked(const char *encoding,
452-
const char *operation_name,
453-
int index)
478+
PyObject * _PyCodec_LookupTextEncoding(const char *encoding,
479+
const char *alternate_command)
454480
{
455481
_Py_IDENTIFIER(_is_text_encoding);
456482
PyObject *codec;
457483
PyObject *attr;
458-
PyObject *v;
459484
int is_text_codec;
460485

461486
codec = _PyCodec_Lookup(encoding);
@@ -482,27 +507,44 @@ PyObject *codec_getitem_checked(const char *encoding,
482507
Py_DECREF(codec);
483508
PyErr_Format(PyExc_LookupError,
484509
"'%.400s' is not a text encoding; "
485-
"use codecs.%s() to handle arbitrary codecs",
486-
encoding, operation_name);
510+
"use %s to handle arbitrary codecs",
511+
encoding, alternate_command);
487512
return NULL;
488513
}
489514
}
490515
}
491516

517+
/* This appears to be a valid text encoding */
518+
return codec;
519+
}
520+
521+
522+
static
523+
PyObject *codec_getitem_checked(const char *encoding,
524+
const char *alternate_command,
525+
int index)
526+
{
527+
PyObject *codec;
528+
PyObject *v;
529+
530+
codec = _PyCodec_LookupTextEncoding(encoding, alternate_command);
531+
if (codec == NULL)
532+
return NULL;
533+
492534
v = PyTuple_GET_ITEM(codec, index);
493-
Py_DECREF(codec);
494535
Py_INCREF(v);
536+
Py_DECREF(codec);
495537
return v;
496538
}
497539

498540
static PyObject * _PyCodec_TextEncoder(const char *encoding)
499541
{
500-
return codec_getitem_checked(encoding, "encode", 0);
542+
return codec_getitem_checked(encoding, "codecs.encode()", 0);
501543
}
502544

503545
static PyObject * _PyCodec_TextDecoder(const char *encoding)
504546
{
505-
return codec_getitem_checked(encoding, "decode", 1);
547+
return codec_getitem_checked(encoding, "codecs.decode()", 1);
506548
}
507549

508550
PyObject *_PyCodec_EncodeText(PyObject *object,

0 commit comments

Comments
 (0)