Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 9765efc

Browse files
ZackerySpytzvstinner
authored andcommitted
bpo-19865: ctypes.create_unicode_buffer() supports non-BMP strings on Windows (GH-14081)
1 parent 21a92f8 commit 9765efc

3 files changed

Lines changed: 20 additions & 1 deletion

File tree

Lib/ctypes/__init__.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,15 @@ def create_unicode_buffer(init, size=None):
274274
"""
275275
if isinstance(init, str):
276276
if size is None:
277-
size = len(init)+1
277+
if sizeof(c_wchar) == 2:
278+
# UTF-16 requires a surrogate pair (2 wchar_t) for non-BMP
279+
# characters (outside [U+0000; U+FFFF] range). +1 for trailing
280+
# NUL character.
281+
size = sum(2 if ord(c) > 0xFFFF else 1 for c in init) + 1
282+
else:
283+
# 32-bit wchar_t (1 wchar_t per Unicode character). +1 for
284+
# trailing NUL character.
285+
size = len(init) + 1
278286
buftype = c_wchar * size
279287
buf = buftype()
280288
buf.value = init

Lib/ctypes/test/test_buffers.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,5 +60,14 @@ def test_unicode_conversion(self):
6060
self.assertEqual(b[::2], "ac")
6161
self.assertEqual(b[::5], "a")
6262

63+
@need_symbol('c_wchar')
64+
def test_create_unicode_buffer_non_bmp(self):
65+
expected = 5 if sizeof(c_wchar) == 2 else 3
66+
for s in '\U00010000\U00100000', '\U00010000\U0010ffff':
67+
b = create_unicode_buffer(s)
68+
self.assertEqual(len(b), expected)
69+
self.assertEqual(b[-1], '\0')
70+
71+
6372
if __name__ == "__main__":
6473
unittest.main()
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
:func:`ctypes.create_unicode_buffer()` now also supports non-BMP characters
2+
on platforms with 16-bit :c:type:`wchar_t` (for example, Windows and AIX).

0 commit comments

Comments
 (0)