Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit b41e128

Browse files
committed
Issue #9188: The gdb extension now handles correctly narrow (UCS2) as well
as wide (UCS4) unicode builds for both the host interpreter (embedded inside gdb) and the interpreter under test.
1 parent 63b1767 commit b41e128

2 files changed

Lines changed: 50 additions & 13 deletions

File tree

Misc/NEWS

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,13 @@ Library
7676
guaranteed to exist in all Python implementations and the names of hash
7777
algorithms available in the current process.
7878

79+
Tools/Demos
80+
-----------
81+
82+
- Issue #9188: The gdb extension now handles correctly narrow (UCS2) as well
83+
as wide (UCS4) unicode builds for both the host interpreter (embedded
84+
inside gdb) and the interpreter under test.
85+
7986
Build
8087
-----
8188

Tools/gdb/libpython.py

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1065,7 +1065,19 @@ def _unichr_is_printable(char):
10651065
if char == u" ":
10661066
return True
10671067
import unicodedata
1068-
return unicodedata.category(char)[0] not in ("C", "Z")
1068+
return unicodedata.category(char) not in ("C", "Z")
1069+
1070+
if sys.maxunicode >= 0x10000:
1071+
_unichr = unichr
1072+
else:
1073+
# Needed for proper surrogate support if sizeof(Py_UNICODE) is 2 in gdb
1074+
def _unichr(x):
1075+
if x < 0x10000:
1076+
return unichr(x)
1077+
x -= 0x10000
1078+
ch1 = 0xD800 | (x >> 10)
1079+
ch2 = 0xDC00 | (x & 0x3FF)
1080+
return unichr(ch1) + unichr(ch2)
10691081

10701082

10711083
class PyUnicodeObjectPtr(PyObjectPtr):
@@ -1084,11 +1096,33 @@ def proxyval(self, visited):
10841096

10851097
# Gather a list of ints from the Py_UNICODE array; these are either
10861098
# UCS-2 or UCS-4 code points:
1087-
Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
1099+
if self.char_width() > 2:
1100+
Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
1101+
else:
1102+
# A more elaborate routine if sizeof(Py_UNICODE) is 2 in the
1103+
# inferior process: we must join surrogate pairs.
1104+
Py_UNICODEs = []
1105+
i = 0
1106+
while i < field_length:
1107+
ucs = int(field_str[i])
1108+
i += 1
1109+
if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length:
1110+
Py_UNICODEs.append(ucs)
1111+
continue
1112+
# This could be a surrogate pair.
1113+
ucs2 = int(field_str[i])
1114+
if ucs2 < 0xDC00 or ucs2 > 0xDFFF:
1115+
continue
1116+
code = (ucs & 0x03FF) << 10
1117+
code |= ucs2 & 0x03FF
1118+
code += 0x00010000
1119+
Py_UNICODEs.append(code)
1120+
i += 1
10881121

10891122
# Convert the int code points to unicode characters, and generate a
1090-
# local unicode instance:
1091-
result = u''.join([unichr(ucs) for ucs in Py_UNICODEs])
1123+
# local unicode instance.
1124+
# This splits surrogate pairs if sizeof(Py_UNICODE) is 2 here (in gdb).
1125+
result = u''.join([_unichr(ucs) for ucs in Py_UNICODEs])
10921126
return result
10931127

10941128
def write_repr(self, out, visited):
@@ -1137,20 +1171,16 @@ def write_repr(self, out, visited):
11371171
else:
11381172
ucs = ch
11391173
orig_ucs = None
1174+
ch2 = None
11401175
if self.char_width() == 2:
1141-
# Get code point from surrogate pair
1176+
# If sizeof(Py_UNICODE) is 2 here (in gdb), join
1177+
# surrogate pairs before calling _unichr_is_printable.
11421178
if (i < len(proxy)
11431179
and 0xD800 <= ord(ch) < 0xDC00 \
11441180
and 0xDC00 <= ord(proxy[i]) <= 0xDFFF):
11451181
ch2 = proxy[i]
1146-
code = (ord(ch) & 0x03FF) << 10
1147-
code |= ord(ch2) & 0x03FF
1148-
code += 0x00010000
1149-
orig_ucs = ucs
1150-
ucs = unichr(code)
1182+
ucs = ch + ch2
11511183
i += 1
1152-
else:
1153-
ch2 = None
11541184

11551185
printable = _unichr_is_printable(ucs)
11561186
if printable:
@@ -1195,7 +1225,7 @@ def write_repr(self, out, visited):
11951225
else:
11961226
# Copy characters as-is
11971227
out.write(ch)
1198-
if self.char_width() == 2 and (ch2 is not None):
1228+
if ch2 is not None:
11991229
out.write(ch2)
12001230

12011231
out.write(quote)

0 commit comments

Comments
 (0)