Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit a1f7655

Browse files
committed
Issue #15379: Fix passing of non-BMP characters as integers for the charmap decoder (already working as unicode strings).
Patch by Serhiy Storchaka.
2 parents 5011244 + 6f80f5d commit a1f7655

3 files changed

Lines changed: 123 additions & 3 deletions

File tree

Lib/test/test_codecs.py

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1692,6 +1692,15 @@ def test_decode_with_string_map(self):
16921692
("abc", 3)
16931693
)
16941694

1695+
self.assertEqual(
1696+
codecs.charmap_decode(b"\x00\x01\x02", "strict", "\U0010FFFFbc"),
1697+
("\U0010FFFFbc", 3)
1698+
)
1699+
1700+
self.assertRaises(UnicodeDecodeError,
1701+
codecs.charmap_decode, b"\x00\x01\x02", "strict", "ab"
1702+
)
1703+
16951704
self.assertEqual(
16961705
codecs.charmap_decode(b"\x00\x01\x02", "replace", "ab"),
16971706
("ab\ufffd", 3)
@@ -1718,6 +1727,113 @@ def test_decode_with_string_map(self):
17181727
("", len(allbytes))
17191728
)
17201729

1730+
def test_decode_with_int2str_map(self):
1731+
self.assertEqual(
1732+
codecs.charmap_decode(b"\x00\x01\x02", "strict",
1733+
{0: 'a', 1: 'b', 2: 'c'}),
1734+
("abc", 3)
1735+
)
1736+
1737+
self.assertEqual(
1738+
codecs.charmap_decode(b"\x00\x01\x02", "strict",
1739+
{0: 'Aa', 1: 'Bb', 2: 'Cc'}),
1740+
("AaBbCc", 3)
1741+
)
1742+
1743+
self.assertEqual(
1744+
codecs.charmap_decode(b"\x00\x01\x02", "strict",
1745+
{0: '\U0010FFFF', 1: 'b', 2: 'c'}),
1746+
("\U0010FFFFbc", 3)
1747+
)
1748+
1749+
self.assertEqual(
1750+
codecs.charmap_decode(b"\x00\x01\x02", "strict",
1751+
{0: 'a', 1: 'b', 2: ''}),
1752+
("ab", 3)
1753+
)
1754+
1755+
self.assertRaises(UnicodeDecodeError,
1756+
codecs.charmap_decode, b"\x00\x01\x02", "strict",
1757+
{0: 'a', 1: 'b'}
1758+
)
1759+
1760+
self.assertEqual(
1761+
codecs.charmap_decode(b"\x00\x01\x02", "replace",
1762+
{0: 'a', 1: 'b'}),
1763+
("ab\ufffd", 3)
1764+
)
1765+
1766+
self.assertEqual(
1767+
codecs.charmap_decode(b"\x00\x01\x02", "replace",
1768+
{0: 'a', 1: 'b', 2: None}),
1769+
("ab\ufffd", 3)
1770+
)
1771+
1772+
self.assertEqual(
1773+
codecs.charmap_decode(b"\x00\x01\x02", "ignore",
1774+
{0: 'a', 1: 'b'}),
1775+
("ab", 3)
1776+
)
1777+
1778+
self.assertEqual(
1779+
codecs.charmap_decode(b"\x00\x01\x02", "ignore",
1780+
{0: 'a', 1: 'b', 2: None}),
1781+
("ab", 3)
1782+
)
1783+
1784+
allbytes = bytes(range(256))
1785+
self.assertEqual(
1786+
codecs.charmap_decode(allbytes, "ignore", {}),
1787+
("", len(allbytes))
1788+
)
1789+
1790+
def test_decode_with_int2int_map(self):
1791+
a = ord('a')
1792+
b = ord('b')
1793+
c = ord('c')
1794+
1795+
self.assertEqual(
1796+
codecs.charmap_decode(b"\x00\x01\x02", "strict",
1797+
{0: a, 1: b, 2: c}),
1798+
("abc", 3)
1799+
)
1800+
1801+
# Issue #15379
1802+
self.assertEqual(
1803+
codecs.charmap_decode(b"\x00\x01\x02", "strict",
1804+
{0: 0x10FFFF, 1: b, 2: c}),
1805+
("\U0010FFFFbc", 3)
1806+
)
1807+
1808+
self.assertEqual(
1809+
codecs.charmap_decode(b"\x00\x01\x02", "strict",
1810+
{0: sys.maxunicode, 1: b, 2: c}),
1811+
(chr(sys.maxunicode) + "bc", 3)
1812+
)
1813+
1814+
self.assertRaises(TypeError,
1815+
codecs.charmap_decode, b"\x00\x01\x02", "strict",
1816+
{0: sys.maxunicode + 1, 1: b, 2: c}
1817+
)
1818+
1819+
self.assertRaises(UnicodeDecodeError,
1820+
codecs.charmap_decode, b"\x00\x01\x02", "strict",
1821+
{0: a, 1: b},
1822+
)
1823+
1824+
self.assertEqual(
1825+
codecs.charmap_decode(b"\x00\x01\x02", "replace",
1826+
{0: a, 1: b}),
1827+
("ab\ufffd", 3)
1828+
)
1829+
1830+
self.assertEqual(
1831+
codecs.charmap_decode(b"\x00\x01\x02", "ignore",
1832+
{0: a, 1: b}),
1833+
("ab", 3)
1834+
)
1835+
1836+
17211837
class WithStmtTest(unittest.TestCase):
17221838
def test_encodedfile(self):
17231839
f = io.BytesIO(b"\xc3\xbc")

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ What's New in Python 3.3.1
1010
Core and Builtins
1111
-----------------
1212

13+
- Issue #15379: Fix passing of non-BMP characters as integers for the charmap
14+
decoder (already working as unicode strings). Patch by Serhiy Storchaka.
15+
1316
- Issue #15144: Fix possible integer overflow when handling pointers as
1417
integer values, by using Py_uintptr_t instead of size_t. Patch by
1518
Serhiy Storchaka.

Objects/unicodeobject.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7525,9 +7525,10 @@ PyUnicode_DecodeCharmap(const char *s,
75257525
/* Apply mapping */
75267526
if (PyLong_Check(x)) {
75277527
long value = PyLong_AS_LONG(x);
7528-
if (value < 0 || value > 65535) {
7529-
PyErr_SetString(PyExc_TypeError,
7530-
"character mapping must be in range(65536)");
7528+
if (value < 0 || value > MAX_UNICODE) {
7529+
PyErr_Format(PyExc_TypeError,
7530+
"character mapping must be in range(0x%lx)",
7531+
(unsigned long)MAX_UNICODE + 1);
75317532
Py_DECREF(x);
75327533
goto onError;
75337534
}

0 commit comments

Comments
 (0)