Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 94dc673

Browse files
Issue #17106: Fix a segmentation fault in io.TextIOWrapper when an underlying
stream or a decoder produces data of an unexpected type (i.e. when io.TextIOWrapper initialized with text stream or use bytes-to-bytes codec).
1 parent 028915e commit 94dc673

3 files changed

Lines changed: 72 additions & 18 deletions

File tree

Lib/test/test_io.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2481,6 +2481,30 @@ def test_rawio_write_through(self):
24812481
txt.write('5')
24822482
self.assertEqual(b''.join(raw._write_stack), b'123\n45')
24832483

2484+
def test_read_nonbytes(self):
2485+
# Issue #17106
2486+
# Crash when underlying read() returns non-bytes
2487+
t = self.TextIOWrapper(self.StringIO('a'))
2488+
self.assertRaises(TypeError, t.read, 1)
2489+
t = self.TextIOWrapper(self.StringIO('a'))
2490+
self.assertRaises(TypeError, t.readline)
2491+
t = self.TextIOWrapper(self.StringIO('a'))
2492+
self.assertRaises(TypeError, t.read)
2493+
2494+
def test_illegal_decoder(self):
2495+
# Issue #17106
2496+
# Crash when decoder returns non-string
2497+
t = self.TextIOWrapper(self.BytesIO(b'aaaaaa'), newline='\n',
2498+
encoding='quopri_codec')
2499+
self.assertRaises(TypeError, t.read, 1)
2500+
t = self.TextIOWrapper(self.BytesIO(b'aaaaaa'), newline='\n',
2501+
encoding='quopri_codec')
2502+
self.assertRaises(TypeError, t.readline)
2503+
t = self.TextIOWrapper(self.BytesIO(b'aaaaaa'), newline='\n',
2504+
encoding='quopri_codec')
2505+
self.assertRaises(TypeError, t.read)
2506+
2507+
24842508
class CTextIOWrapperTest(TextIOWrapperTest):
24852509

24862510
def test_initialization(self):

Misc/NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,10 @@ Core and Builtins
212212
Library
213213
-------
214214

215+
- Issue #17106: Fix a segmentation fault in io.TextIOWrapper when an underlying
216+
stream or a decoder produces data of an unexpected type (i.e. when
217+
io.TextIOWrapper initialized with text stream or use bytes-to-bytes codec).
218+
215219
- Issue #15633: httplib.HTTPResponse is now mark closed when the server
216220
sends less than the advertised Content-Length.
217221

Modules/_io/textio.c

Lines changed: 44 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,21 @@ incrementalnewlinedecoder_dealloc(nldecoder_object *self)
236236
Py_TYPE(self)->tp_free((PyObject *)self);
237237
}
238238

239+
static int
240+
check_decoded(PyObject *decoded)
241+
{
242+
if (decoded == NULL)
243+
return -1;
244+
if (!PyUnicode_Check(decoded)) {
245+
PyErr_Format(PyExc_TypeError,
246+
"decoder should return a string result, not '%.200s'",
247+
Py_TYPE(decoded)->tp_name);
248+
Py_DECREF(decoded);
249+
return -1;
250+
}
251+
return 0;
252+
}
253+
239254
#define SEEN_CR 1
240255
#define SEEN_LF 2
241256
#define SEEN_CRLF 4
@@ -265,15 +280,9 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self,
265280
Py_INCREF(output);
266281
}
267282

268-
if (output == NULL)
283+
if (check_decoded(output) < 0)
269284
return NULL;
270285

271-
if (!PyUnicode_Check(output)) {
272-
PyErr_SetString(PyExc_TypeError,
273-
"decoder should return a string result");
274-
goto error;
275-
}
276-
277286
output_len = PyUnicode_GET_SIZE(output);
278287
if (self->pendingcr && (final || output_len > 0)) {
279288
Py_UNICODE *out;
@@ -1454,7 +1463,13 @@ textiowrapper_read_chunk(textio *self)
14541463
Py_DECREF(chunk_size);
14551464
if (input_chunk == NULL)
14561465
goto fail;
1457-
assert(PyBytes_Check(input_chunk));
1466+
if (!PyBytes_Check(input_chunk)) {
1467+
PyErr_Format(PyExc_TypeError,
1468+
"underlying %s() should have returned a bytes object, "
1469+
"not '%.200s'", (self->has_read1 ? "read1": "read"),
1470+
Py_TYPE(input_chunk)->tp_name);
1471+
goto fail;
1472+
}
14581473

14591474
eof = (PyBytes_Size(input_chunk) == 0);
14601475

@@ -1467,8 +1482,7 @@ textiowrapper_read_chunk(textio *self)
14671482
_PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
14681483
}
14691484

1470-
/* TODO sanity check: isinstance(decoded_chars, unicode) */
1471-
if (decoded_chars == NULL)
1485+
if (check_decoded(decoded_chars) < 0)
14721486
goto fail;
14731487
textiowrapper_set_decoded_chars(self, decoded_chars);
14741488
if (PyUnicode_GET_SIZE(decoded_chars) > 0)
@@ -1481,7 +1495,14 @@ textiowrapper_read_chunk(textio *self)
14811495
PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
14821496
if (next_input == NULL)
14831497
goto fail;
1484-
assert (PyBytes_Check(next_input));
1498+
if (!PyBytes_Check(next_input)) {
1499+
PyErr_Format(PyExc_TypeError,
1500+
"decoder getstate() should have returned a bytes "
1501+
"object, not '%.200s'",
1502+
Py_TYPE(next_input)->tp_name);
1503+
Py_DECREF(next_input);
1504+
goto fail;
1505+
}
14851506
Py_DECREF(dec_buffer);
14861507
Py_CLEAR(self->snapshot);
14871508
self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
@@ -1525,7 +1546,7 @@ textiowrapper_read(textio *self, PyObject *args)
15251546
decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
15261547
bytes, Py_True, NULL);
15271548
Py_DECREF(bytes);
1528-
if (decoded == NULL)
1549+
if (check_decoded(decoded) < 0)
15291550
goto fail;
15301551

15311552
result = textiowrapper_get_decoded_chars(self, -1);
@@ -2123,7 +2144,14 @@ textiowrapper_seek(textio *self, PyObject *args)
21232144
if (input_chunk == NULL)
21242145
goto fail;
21252146

2126-
assert (PyBytes_Check(input_chunk));
2147+
if (!PyBytes_Check(input_chunk)) {
2148+
PyErr_Format(PyExc_TypeError,
2149+
"underlying read() should have returned a bytes "
2150+
"object, not '%.200s'",
2151+
Py_TYPE(input_chunk)->tp_name);
2152+
Py_DECREF(input_chunk);
2153+
goto fail;
2154+
}
21272155

21282156
self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
21292157
if (self->snapshot == NULL) {
@@ -2134,7 +2162,7 @@ textiowrapper_seek(textio *self, PyObject *args)
21342162
decoded = PyObject_CallMethod(self->decoder, "decode",
21352163
"Oi", input_chunk, (int)cookie.need_eof);
21362164

2137-
if (decoded == NULL)
2165+
if (check_decoded(decoded) < 0)
21382166
goto fail;
21392167

21402168
textiowrapper_set_decoded_chars(self, decoded);
@@ -2257,9 +2285,8 @@ textiowrapper_tell(textio *self, PyObject *args)
22572285

22582286
PyObject *decoded = PyObject_CallMethod(
22592287
self->decoder, "decode", "y#", input, 1);
2260-
if (decoded == NULL)
2288+
if (check_decoded(decoded) < 0)
22612289
goto fail;
2262-
assert (PyUnicode_Check(decoded));
22632290
chars_decoded += PyUnicode_GET_SIZE(decoded);
22642291
Py_DECREF(decoded);
22652292

@@ -2291,9 +2318,8 @@ textiowrapper_tell(textio *self, PyObject *args)
22912318
/* We didn't get enough decoded data; signal EOF to get more. */
22922319
PyObject *decoded = PyObject_CallMethod(
22932320
self->decoder, "decode", "yi", "", /* final = */ 1);
2294-
if (decoded == NULL)
2321+
if (check_decoded(decoded) < 0)
22952322
goto fail;
2296-
assert (PyUnicode_Check(decoded));
22972323
chars_decoded += PyUnicode_GET_SIZE(decoded);
22982324
Py_DECREF(decoded);
22992325
cookie.need_eof = 1;

0 commit comments

Comments
 (0)