@@ -1065,7 +1065,19 @@ def _unichr_is_printable(char):
10651065 if char == u" " :
10661066 return True
10671067 import unicodedata
1068- return unicodedata .category (char )[0 ] not in ("C" , "Z" )
1068+ return unicodedata .category (char ) not in ("C" , "Z" )
1069+
1070+ if sys .maxunicode >= 0x10000 :
1071+ _unichr = unichr
1072+ else :
1073+ # Needed for proper surrogate support if sizeof(Py_UNICODE) is 2 in gdb
1074+ def _unichr (x ):
1075+ if x < 0x10000 :
1076+ return unichr (x )
1077+ x -= 0x10000
1078+ ch1 = 0xD800 | (x >> 10 )
1079+ ch2 = 0xDC00 | (x & 0x3FF )
1080+ return unichr (ch1 ) + unichr (ch2 )
10691081
10701082
10711083class PyUnicodeObjectPtr (PyObjectPtr ):
@@ -1084,11 +1096,33 @@ def proxyval(self, visited):
10841096
10851097 # Gather a list of ints from the Py_UNICODE array; these are either
10861098 # UCS-2 or UCS-4 code points:
1087- Py_UNICODEs = [int (field_str [i ]) for i in safe_range (field_length )]
1099+ if self .char_width () > 2 :
1100+ Py_UNICODEs = [int (field_str [i ]) for i in safe_range (field_length )]
1101+ else :
1102+ # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the
1103+ # inferior process: we must join surrogate pairs.
1104+ Py_UNICODEs = []
1105+ i = 0
1106+ while i < field_length :
1107+ ucs = int (field_str [i ])
1108+ i += 1
1109+ if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length :
1110+ Py_UNICODEs .append (ucs )
1111+ continue
1112+ # This could be a surrogate pair.
1113+ ucs2 = int (field_str [i ])
1114+ if ucs2 < 0xDC00 or ucs2 > 0xDFFF :
1115+ continue
1116+ code = (ucs & 0x03FF ) << 10
1117+ code |= ucs2 & 0x03FF
1118+ code += 0x00010000
1119+ Py_UNICODEs .append (code )
1120+ i += 1
10881121
10891122 # Convert the int code points to unicode characters, and generate a
1090- # local unicode instance:
1091- result = u'' .join ([unichr (ucs ) for ucs in Py_UNICODEs ])
1123+ # local unicode instance.
1124+ # This splits surrogate pairs if sizeof(Py_UNICODE) is 2 here (in gdb).
1125+ result = u'' .join ([_unichr (ucs ) for ucs in Py_UNICODEs ])
10921126 return result
10931127
10941128 def write_repr (self , out , visited ):
@@ -1137,20 +1171,16 @@ def write_repr(self, out, visited):
11371171 else :
11381172 ucs = ch
11391173 orig_ucs = None
1174+ ch2 = None
11401175 if self .char_width () == 2 :
1141- # Get code point from surrogate pair
1176+ # If sizeof(Py_UNICODE) is 2 here (in gdb), join
1177+ # surrogate pairs before calling _unichr_is_printable.
11421178 if (i < len (proxy )
11431179 and 0xD800 <= ord (ch ) < 0xDC00 \
11441180 and 0xDC00 <= ord (proxy [i ]) <= 0xDFFF ):
11451181 ch2 = proxy [i ]
1146- code = (ord (ch ) & 0x03FF ) << 10
1147- code |= ord (ch2 ) & 0x03FF
1148- code += 0x00010000
1149- orig_ucs = ucs
1150- ucs = unichr (code )
1182+ ucs = ch + ch2
11511183 i += 1
1152- else :
1153- ch2 = None
11541184
11551185 printable = _unichr_is_printable (ucs )
11561186 if printable :
@@ -1195,7 +1225,7 @@ def write_repr(self, out, visited):
11951225 else :
11961226 # Copy characters as-is
11971227 out .write (ch )
1198- if self . char_width () == 2 and ( ch2 is not None ) :
1228+ if ch2 is not None :
11991229 out .write (ch2 )
12001230
12011231 out .write (quote )
0 commit comments