Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit ce0b664

Browse files
committed
Added test case for UTF-8 encoding bug #541828.
1 parent a974561 commit ce0b664

2 files changed

Lines changed: 18 additions & 2 deletions

File tree

Lib/test/test_unicode.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -508,6 +508,22 @@ def __str__(self):
508508
verify(u'\udc00'.encode('utf-8') == '\xed\xb0\x80')
509509
verify((u'\ud800\udc02'*1000).encode('utf-8') ==
510510
'\xf0\x90\x80\x82'*1000)
511+
verify(u'\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
512+
u'\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
513+
u'\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
514+
u'\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
515+
u'\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das'
516+
u' Nunstuck git und'.encode('utf-8') ==
517+
'\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81'
518+
'\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3'
519+
'\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe'
520+
'\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
521+
'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8'
522+
'\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81'
523+
'\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81'
524+
'\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3'
525+
'\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf'
526+
'\xe3\x80\x8cWenn ist das Nunstuck git und')
511527

512528
# UTF-8 specific decoding tests
513529
verify(unicode('\xf0\xa3\x91\x96', 'utf-8') == u'\U00023456' )

Objects/unicodeobject.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1224,8 +1224,8 @@ PyObject *PyUnicode_EncodeUTF8(const Py_UNICODE *s,
12241224
Py_UCS4 ch2 = s[i];
12251225
/* Check for low surrogate */
12261226
if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
1227-
ch = ((ch - 0xD800)<<10 | (ch2-0xDC00))+0x10000;
1228-
*p++ = (char)((ch >> 18) | 0xf0);
1227+
ch = ((ch - 0xD800) << 10 | (ch2 - 0xDC00)) + 0x00010000;
1228+
*p++ = (char)(0xf0 | (ch >> 18));
12291229
*p++ = (char)(0x80 | ((ch >> 12) & 0x3f));
12301230
*p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
12311231
*p++ = (char)(0x80 | (ch & 0x3f));

0 commit comments

Comments
 (0)