Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 81fabdb

Browse files
committed
Issue #4874: Most builtin decoders now reject unicode input.
1 parent dd01f8f commit 81fabdb

3 files changed

Lines changed: 47 additions & 14 deletions

File tree

Lib/test/test_codecs.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1475,6 +1475,36 @@ def test_streamreaderwriter(self):
14751475
info.streamwriter, 'strict') as srw:
14761476
self.assertEquals(srw.read(), "\xfc")
14771477

1478+
class TypesTest(unittest.TestCase):
1479+
def test_decode_unicode(self):
1480+
# Most decoders don't accept unicode input
1481+
decoders = [
1482+
codecs.utf_7_decode,
1483+
codecs.utf_8_decode,
1484+
codecs.utf_16_le_decode,
1485+
codecs.utf_16_be_decode,
1486+
codecs.utf_16_ex_decode,
1487+
codecs.utf_32_decode,
1488+
codecs.utf_32_le_decode,
1489+
codecs.utf_32_be_decode,
1490+
codecs.utf_32_ex_decode,
1491+
codecs.latin_1_decode,
1492+
codecs.ascii_decode,
1493+
codecs.charmap_decode,
1494+
]
1495+
if hasattr(codecs, "mbcs_decode"):
1496+
decoders.append(codecs.mbcs_decode)
1497+
for decoder in decoders:
1498+
self.assertRaises(TypeError, decoder, "xxx")
1499+
1500+
def test_unicode_escape(self):
1501+
# Escape-decoding an unicode string is supported ang gives the same
1502+
# result as decoding the equivalent ASCII bytes string.
1503+
self.assertEquals(codecs.unicode_escape_decode(r"\u1234"), ("\u1234", 6))
1504+
self.assertEquals(codecs.unicode_escape_decode(br"\u1234"), ("\u1234", 6))
1505+
self.assertEquals(codecs.raw_unicode_escape_decode(r"\u1234"), ("\u1234", 6))
1506+
self.assertEquals(codecs.raw_unicode_escape_decode(br"\u1234"), ("\u1234", 6))
1507+
14781508

14791509
def test_main():
14801510
support.run_unittest(
@@ -1501,6 +1531,7 @@ def test_main():
15011531
BasicUnicodeTest,
15021532
CharmapTest,
15031533
WithStmtTest,
1534+
TypesTest,
15041535
)
15051536

15061537

Misc/NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ What's New in Python 3.1 alpha 0
1212
Core and Builtins
1313
-----------------
1414

15+
- Issue #4874: Most builtin decoders now reject unicode input.
16+
1517
- Issue #4842: Don't allow trailing 'L' when constructing an integer
1618
from a string.
1719

Modules/_codecsmodule.c

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ utf_7_decode(PyObject *self,
258258
Py_ssize_t consumed;
259259
PyObject *decoded = NULL;
260260

261-
if (!PyArg_ParseTuple(args, "s*|zi:utf_7_decode",
261+
if (!PyArg_ParseTuple(args, "y*|zi:utf_7_decode",
262262
&pbuf, &errors, &final))
263263
return NULL;
264264
consumed = pbuf.len;
@@ -281,7 +281,7 @@ utf_8_decode(PyObject *self,
281281
Py_ssize_t consumed;
282282
PyObject *decoded = NULL;
283283

284-
if (!PyArg_ParseTuple(args, "s*|zi:utf_8_decode",
284+
if (!PyArg_ParseTuple(args, "y*|zi:utf_8_decode",
285285
&pbuf, &errors, &final))
286286
return NULL;
287287
consumed = pbuf.len;
@@ -305,7 +305,7 @@ utf_16_decode(PyObject *self,
305305
Py_ssize_t consumed;
306306
PyObject *decoded;
307307

308-
if (!PyArg_ParseTuple(args, "s*|zi:utf_16_decode",
308+
if (!PyArg_ParseTuple(args, "y*|zi:utf_16_decode",
309309
&pbuf, &errors, &final))
310310
return NULL;
311311
consumed = pbuf.len; /* This is overwritten unless final is true. */
@@ -328,7 +328,7 @@ utf_16_le_decode(PyObject *self,
328328
Py_ssize_t consumed;
329329
PyObject *decoded = NULL;
330330

331-
if (!PyArg_ParseTuple(args, "s*|zi:utf_16_le_decode",
331+
if (!PyArg_ParseTuple(args, "y*|zi:utf_16_le_decode",
332332
&pbuf, &errors, &final))
333333
return NULL;
334334

@@ -352,7 +352,7 @@ utf_16_be_decode(PyObject *self,
352352
Py_ssize_t consumed;
353353
PyObject *decoded = NULL;
354354

355-
if (!PyArg_ParseTuple(args, "s*|zi:utf_16_be_decode",
355+
if (!PyArg_ParseTuple(args, "y*|zi:utf_16_be_decode",
356356
&pbuf, &errors, &final))
357357
return NULL;
358358

@@ -384,7 +384,7 @@ utf_16_ex_decode(PyObject *self,
384384
int final = 0;
385385
Py_ssize_t consumed;
386386

387-
if (!PyArg_ParseTuple(args, "s*|zii:utf_16_ex_decode",
387+
if (!PyArg_ParseTuple(args, "y*|zii:utf_16_ex_decode",
388388
&pbuf, &errors, &byteorder, &final))
389389
return NULL;
390390
consumed = pbuf.len; /* This is overwritten unless final is true. */
@@ -409,7 +409,7 @@ utf_32_decode(PyObject *self,
409409
Py_ssize_t consumed;
410410
PyObject *decoded;
411411

412-
if (!PyArg_ParseTuple(args, "s*|zi:utf_32_decode",
412+
if (!PyArg_ParseTuple(args, "y*|zi:utf_32_decode",
413413
&pbuf, &errors, &final))
414414
return NULL;
415415
consumed = pbuf.len; /* This is overwritten unless final is true. */
@@ -432,7 +432,7 @@ utf_32_le_decode(PyObject *self,
432432
Py_ssize_t consumed;
433433
PyObject *decoded;
434434

435-
if (!PyArg_ParseTuple(args, "s*|zi:utf_32_le_decode",
435+
if (!PyArg_ParseTuple(args, "y*|zi:utf_32_le_decode",
436436
&pbuf, &errors, &final))
437437
return NULL;
438438
consumed = pbuf.len; /* This is overwritten unless final is true. */
@@ -455,7 +455,7 @@ utf_32_be_decode(PyObject *self,
455455
Py_ssize_t consumed;
456456
PyObject *decoded;
457457

458-
if (!PyArg_ParseTuple(args, "s*|zi:utf_32_be_decode",
458+
if (!PyArg_ParseTuple(args, "y*|zi:utf_32_be_decode",
459459
&pbuf, &errors, &final))
460460
return NULL;
461461
consumed = pbuf.len; /* This is overwritten unless final is true. */
@@ -486,7 +486,7 @@ utf_32_ex_decode(PyObject *self,
486486
int final = 0;
487487
Py_ssize_t consumed;
488488

489-
if (!PyArg_ParseTuple(args, "s*|zii:utf_32_ex_decode",
489+
if (!PyArg_ParseTuple(args, "y*|zii:utf_32_ex_decode",
490490
&pbuf, &errors, &byteorder, &final))
491491
return NULL;
492492
consumed = pbuf.len; /* This is overwritten unless final is true. */
@@ -542,7 +542,7 @@ latin_1_decode(PyObject *self,
542542
PyObject *unicode;
543543
const char *errors = NULL;
544544

545-
if (!PyArg_ParseTuple(args, "s*|z:latin_1_decode",
545+
if (!PyArg_ParseTuple(args, "y*|z:latin_1_decode",
546546
&pbuf, &errors))
547547
return NULL;
548548

@@ -559,7 +559,7 @@ ascii_decode(PyObject *self,
559559
PyObject *unicode;
560560
const char *errors = NULL;
561561

562-
if (!PyArg_ParseTuple(args, "s*|z:ascii_decode",
562+
if (!PyArg_ParseTuple(args, "y*|z:ascii_decode",
563563
&pbuf, &errors))
564564
return NULL;
565565

@@ -577,7 +577,7 @@ charmap_decode(PyObject *self,
577577
const char *errors = NULL;
578578
PyObject *mapping = NULL;
579579

580-
if (!PyArg_ParseTuple(args, "s*|zO:charmap_decode",
580+
if (!PyArg_ParseTuple(args, "y*|zO:charmap_decode",
581581
&pbuf, &errors, &mapping))
582582
return NULL;
583583
if (mapping == Py_None)
@@ -600,7 +600,7 @@ mbcs_decode(PyObject *self,
600600
Py_ssize_t consumed;
601601
PyObject *decoded = NULL;
602602

603-
if (!PyArg_ParseTuple(args, "s*|zi:mbcs_decode",
603+
if (!PyArg_ParseTuple(args, "y*|zi:mbcs_decode",
604604
&pbuf, &errors, &final))
605605
return NULL;
606606
consumed = pbuf.len;

0 commit comments

Comments
 (0)