Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit d03ce4a

Browse files
Issue #17106: Fix a segmentation fault in io.TextIOWrapper when an underlying
stream or a decoder produces data of an unexpected type (i.e. when io.TextIOWrapper initialized with text stream or use bytes-to-bytes codec).
2 parents 7fc972a + 94dc673 commit d03ce4a

3 files changed

Lines changed: 76 additions & 28 deletions

File tree

Lib/test/test_io.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2542,6 +2542,30 @@ def test_rawio_write_through(self):
25422542
txt.write('5')
25432543
self.assertEqual(b''.join(raw._write_stack), b'123\n45')
25442544

2545+
def test_read_nonbytes(self):
2546+
# Issue #17106
2547+
# Crash when underlying read() returns non-bytes
2548+
t = self.TextIOWrapper(self.StringIO('a'))
2549+
self.assertRaises(TypeError, t.read, 1)
2550+
t = self.TextIOWrapper(self.StringIO('a'))
2551+
self.assertRaises(TypeError, t.readline)
2552+
t = self.TextIOWrapper(self.StringIO('a'))
2553+
self.assertRaises(TypeError, t.read)
2554+
2555+
def test_illegal_decoder(self):
2556+
# Issue #17106
2557+
# Crash when decoder returns non-string
2558+
t = self.TextIOWrapper(self.BytesIO(b'aaaaaa'), newline='\n',
2559+
encoding='quopri_codec')
2560+
self.assertRaises(TypeError, t.read, 1)
2561+
t = self.TextIOWrapper(self.BytesIO(b'aaaaaa'), newline='\n',
2562+
encoding='quopri_codec')
2563+
self.assertRaises(TypeError, t.readline)
2564+
t = self.TextIOWrapper(self.BytesIO(b'aaaaaa'), newline='\n',
2565+
encoding='quopri_codec')
2566+
self.assertRaises(TypeError, t.read)
2567+
2568+
25452569
class CTextIOWrapperTest(TextIOWrapperTest):
25462570

25472571
def test_initialization(self):

Misc/NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,10 @@ Core and Builtins
163163
Library
164164
-------
165165

166+
- Issue #17106: Fix a segmentation fault in io.TextIOWrapper when an underlying
167+
stream or a decoder produces data of an unexpected type (i.e. when
168+
io.TextIOWrapper initialized with text stream or use bytes-to-bytes codec).
169+
166170
- Issue #15633: httplib.HTTPResponse is now mark closed when the server
167171
sends less than the advertised Content-Length.
168172

Modules/_io/textio.c

Lines changed: 48 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,25 @@ incrementalnewlinedecoder_dealloc(nldecoder_object *self)
257257
Py_TYPE(self)->tp_free((PyObject *)self);
258258
}
259259

260+
static int
261+
check_decoded(PyObject *decoded)
262+
{
263+
if (decoded == NULL)
264+
return -1;
265+
if (!PyUnicode_Check(decoded)) {
266+
PyErr_Format(PyExc_TypeError,
267+
"decoder should return a string result, not '%.200s'",
268+
Py_TYPE(decoded)->tp_name);
269+
Py_DECREF(decoded);
270+
return -1;
271+
}
272+
if (PyUnicode_READY(decoded) < 0) {
273+
Py_DECREF(decoded);
274+
return -1;
275+
}
276+
return 0;
277+
}
278+
260279
#define SEEN_CR 1
261280
#define SEEN_LF 2
262281
#define SEEN_CRLF 4
@@ -286,18 +305,9 @@ _PyIncrementalNewlineDecoder_decode(PyObject *_self,
286305
Py_INCREF(output);
287306
}
288307

289-
if (output == NULL)
308+
if (check_decoded(output) < 0)
290309
return NULL;
291310

292-
if (!PyUnicode_Check(output)) {
293-
PyErr_SetString(PyExc_TypeError,
294-
"decoder should return a string result");
295-
goto error;
296-
}
297-
298-
if (PyUnicode_READY(output) == -1)
299-
goto error;
300-
301311
output_len = PyUnicode_GET_LENGTH(output);
302312
if (self->pendingcr && (final || output_len > 0)) {
303313
/* Prefix output with CR */
@@ -1458,7 +1468,13 @@ textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
14581468
Py_DECREF(chunk_size);
14591469
if (input_chunk == NULL)
14601470
goto fail;
1461-
assert(PyBytes_Check(input_chunk));
1471+
if (!PyBytes_Check(input_chunk)) {
1472+
PyErr_Format(PyExc_TypeError,
1473+
"underlying %s() should have returned a bytes object, "
1474+
"not '%.200s'", (self->has_read1 ? "read1": "read"),
1475+
Py_TYPE(input_chunk)->tp_name);
1476+
goto fail;
1477+
}
14621478

14631479
nbytes = PyBytes_Size(input_chunk);
14641480
eof = (nbytes == 0);
@@ -1472,10 +1488,7 @@ textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
14721488
_PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
14731489
}
14741490

1475-
/* TODO sanity check: isinstance(decoded_chars, unicode) */
1476-
if (decoded_chars == NULL)
1477-
goto fail;
1478-
if (PyUnicode_READY(decoded_chars) == -1)
1491+
if (check_decoded(decoded_chars) < 0)
14791492
goto fail;
14801493
textiowrapper_set_decoded_chars(self, decoded_chars);
14811494
nchars = PyUnicode_GET_LENGTH(decoded_chars);
@@ -1493,7 +1506,14 @@ textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
14931506
PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
14941507
if (next_input == NULL)
14951508
goto fail;
1496-
assert (PyBytes_Check(next_input));
1509+
if (!PyBytes_Check(next_input)) {
1510+
PyErr_Format(PyExc_TypeError,
1511+
"decoder getstate() should have returned a bytes "
1512+
"object, not '%.200s'",
1513+
Py_TYPE(next_input)->tp_name);
1514+
Py_DECREF(next_input);
1515+
goto fail;
1516+
}
14971517
Py_DECREF(dec_buffer);
14981518
Py_CLEAR(self->snapshot);
14991519
self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
@@ -1542,7 +1562,7 @@ textiowrapper_read(textio *self, PyObject *args)
15421562
decoded = PyObject_CallMethodObjArgs(
15431563
self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
15441564
Py_DECREF(bytes);
1545-
if (decoded == NULL)
1565+
if (check_decoded(decoded) < 0)
15461566
goto fail;
15471567

15481568
result = textiowrapper_get_decoded_chars(self, -1);
@@ -2151,7 +2171,14 @@ textiowrapper_seek(textio *self, PyObject *args)
21512171
if (input_chunk == NULL)
21522172
goto fail;
21532173

2154-
assert (PyBytes_Check(input_chunk));
2174+
if (!PyBytes_Check(input_chunk)) {
2175+
PyErr_Format(PyExc_TypeError,
2176+
"underlying read() should have returned a bytes "
2177+
"object, not '%.200s'",
2178+
Py_TYPE(input_chunk)->tp_name);
2179+
Py_DECREF(input_chunk);
2180+
goto fail;
2181+
}
21552182

21562183
self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
21572184
if (self->snapshot == NULL) {
@@ -2162,12 +2189,8 @@ textiowrapper_seek(textio *self, PyObject *args)
21622189
decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
21632190
"Oi", input_chunk, (int)cookie.need_eof);
21642191

2165-
if (decoded == NULL)
2192+
if (check_decoded(decoded) < 0)
21662193
goto fail;
2167-
if (PyUnicode_READY(decoded) == -1) {
2168-
Py_DECREF(decoded);
2169-
goto fail;
2170-
}
21712194

21722195
textiowrapper_set_decoded_chars(self, decoded);
21732196

@@ -2283,13 +2306,11 @@ textiowrapper_tell(textio *self, PyObject *args)
22832306
Py_DECREF(_state); \
22842307
} while (0)
22852308

2286-
/* TODO: replace assert with exception */
22872309
#define DECODER_DECODE(start, len, res) do { \
22882310
PyObject *_decoded = _PyObject_CallMethodId( \
22892311
self->decoder, &PyId_decode, "y#", start, len); \
2290-
if (_decoded == NULL) \
2312+
if (check_decoded(_decoded) < 0) \
22912313
goto fail; \
2292-
assert (PyUnicode_Check(_decoded)); \
22932314
res = PyUnicode_GET_LENGTH(_decoded); \
22942315
Py_DECREF(_decoded); \
22952316
} while (0)
@@ -2370,9 +2391,8 @@ textiowrapper_tell(textio *self, PyObject *args)
23702391
/* We didn't get enough decoded data; signal EOF to get more. */
23712392
PyObject *decoded = _PyObject_CallMethodId(
23722393
self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
2373-
if (decoded == NULL)
2394+
if (check_decoded(decoded) < 0)
23742395
goto fail;
2375-
assert (PyUnicode_Check(decoded));
23762396
chars_decoded += PyUnicode_GET_LENGTH(decoded);
23772397
Py_DECREF(decoded);
23782398
cookie.need_eof = 1;

0 commit comments

Comments
 (0)