Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 150016f

Browse files
author
Victor Stinner
committed
Issue #8559: improve unicode support of (gdb) libpython.py
* Escape non printable characters (use locale.getpreferredencoding()) * Fix support of surrogate pairs * test_gdb.py: use ascii() instead of repr() in gdb program arguments to avoid encoding issues * Fix test_strings() of test_gdb.py for encoding different than UTF-8 (eg. ACSII)
1 parent 06710a8 commit 150016f

2 files changed

Lines changed: 61 additions & 39 deletions

File tree

Lib/test/test_gdb.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import subprocess
99
import sys
1010
import unittest
11+
import locale
1112

1213
from test.support import run_unittest, findfile
1314

@@ -177,7 +178,7 @@ def test_getting_backtrace(self):
177178
def assertGdbRepr(self, val, exp_repr=None, cmds_after_breakpoint=None):
178179
# Ensure that gdb's rendering of the value in a debugged process
179180
# matches repr(value) in this process:
180-
gdb_repr, gdb_output = self.get_gdb_repr('id(' + repr(val) + ')',
181+
gdb_repr, gdb_output = self.get_gdb_repr('id(' + ascii(val) + ')',
181182
cmds_after_breakpoint)
182183
if not exp_repr:
183184
exp_repr = repr(val)
@@ -226,31 +227,35 @@ def test_bytes(self):
226227

227228
def test_strings(self):
228229
'Verify the pretty-printing of unicode strings'
230+
encoding = locale.getpreferredencoding()
231+
def check_repr(text):
232+
try:
233+
text.encode(encoding)
234+
printable = True
235+
except UnicodeEncodeError:
236+
self.assertGdbRepr(text, ascii(text))
237+
else:
238+
self.assertGdbRepr(text)
239+
229240
self.assertGdbRepr('')
230241
self.assertGdbRepr('And now for something hopefully the same')
231242
self.assertGdbRepr('string with embedded NUL here \0 and then some more text')
232243

233244
# Test printing a single character:
234245
# U+2620 SKULL AND CROSSBONES
235-
self.assertGdbRepr('\u2620')
246+
check_repr('\u2620')
236247

237248
# Test printing a Japanese unicode string
238249
# (I believe this reads "mojibake", using 3 characters from the CJK
239250
# Unified Ideographs area, followed by U+3051 HIRAGANA LETTER KE)
240-
self.assertGdbRepr('\u6587\u5b57\u5316\u3051')
251+
check_repr('\u6587\u5b57\u5316\u3051')
241252

242253
# Test a character outside the BMP:
243254
# U+1D121 MUSICAL SYMBOL C CLEF
244255
# This is:
245256
# UTF-8: 0xF0 0x9D 0x84 0xA1
246257
# UTF-16: 0xD834 0xDD21
247-
if sys.maxunicode == 0x10FFFF:
248-
# wide unicode:
249-
self.assertGdbRepr(chr(0x1D121))
250-
else:
251-
# narrow unicode:
252-
self.assertGdbRepr(chr(0x1D121),
253-
"'\\U0000d834\\U0000dd21'")
258+
check_repr(chr(0x1D121))
254259

255260
def test_tuples(self):
256261
'Verify the pretty-printing of tuples'

Tools/gdb/libpython.py

Lines changed: 46 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
'''
4343
from __future__ import with_statement
4444
import gdb
45+
import locale
4546

4647
# Look up the gdb.Type for some standard types:
4748
_type_char_ptr = gdb.lookup_type('char').pointer() # char*
@@ -69,6 +70,7 @@
6970

7071
hexdigits = "0123456789abcdef"
7172

73+
ENCODING = locale.getpreferredencoding()
7274

7375
class NullPyObjectPtr(RuntimeError):
7476
pass
@@ -1128,53 +1130,68 @@ def write_repr(self, out, visited):
11281130

11291131
# Non-ASCII characters
11301132
else:
1131-
ucs = ch;
1132-
1133-
if self.char_width == 2:
1134-
ch2 = 0
1133+
ucs = ch
1134+
orig_ucs = None
1135+
if self.char_width() == 2:
11351136
# Get code point from surrogate pair
1136-
if i < len(proxy):
1137+
if (i < len(proxy)
1138+
and 0xD800 <= ord(ch) < 0xDC00 \
1139+
and 0xDC00 <= ord(proxy[i]) <= 0xDFFF):
11371140
ch2 = proxy[i]
1138-
if (ord(ch) >= 0xD800 and ord(ch) < 0xDC00
1139-
and ord(ch2) >= 0xDC00 and ord(ch2) <= 0xDFFF):
1140-
ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
1141-
i += 1
1141+
code = (ord(ch) & 0x03FF) << 10
1142+
code |= ord(ch2) & 0x03FF
1143+
code += 0x00010000
1144+
orig_ucs = ucs
1145+
ucs = unichr(code)
1146+
i += 1
1147+
else:
1148+
ch2 = None
1149+
1150+
printable = _unichr_is_printable(ucs)
1151+
if printable:
1152+
try:
1153+
ucs.encode(ENCODING)
1154+
except UnicodeEncodeError:
1155+
printable = False
1156+
if orig_ucs is not None:
1157+
ucs = orig_ucs
1158+
i -= 1
11421159

11431160
# Map Unicode whitespace and control characters
11441161
# (categories Z* and C* except ASCII space)
1145-
if not _unichr_is_printable(ucs):
1162+
if not printable:
11461163
# Unfortuately, Python 2's unicode type doesn't seem
11471164
# to expose the "isprintable" method
1165+
code = ord(ucs)
11481166

11491167
# Map 8-bit characters to '\\xhh'
1150-
if ucs <= 0xff:
1168+
if code <= 0xff:
11511169
out.write('\\x')
1152-
out.write(hexdigits[(ord(ucs) >> 4) & 0x000F])
1153-
out.write(hexdigits[ord(ucs) & 0x000F])
1170+
out.write(hexdigits[(code >> 4) & 0x000F])
1171+
out.write(hexdigits[code & 0x000F])
11541172
# Map 21-bit characters to '\U00xxxxxx'
1155-
elif ucs >= 0x10000:
1173+
elif code >= 0x10000:
11561174
out.write('\\U')
1157-
out.write(hexdigits[(ord(ucs) >> 28) & 0x0000000F])
1158-
out.write(hexdigits[(ord(ucs) >> 24) & 0x0000000F])
1159-
out.write(hexdigits[(ord(ucs) >> 20) & 0x0000000F])
1160-
out.write(hexdigits[(ord(ucs) >> 16) & 0x0000000F])
1161-
out.write(hexdigits[(ord(ucs) >> 12) & 0x0000000F])
1162-
out.write(hexdigits[(ord(ucs) >> 8) & 0x0000000F])
1163-
out.write(hexdigits[(ord(ucs) >> 4) & 0x0000000F])
1164-
out.write(hexdigits[ord(ucs) & 0x0000000F])
1175+
out.write(hexdigits[(code >> 28) & 0x0000000F])
1176+
out.write(hexdigits[(code >> 24) & 0x0000000F])
1177+
out.write(hexdigits[(code >> 20) & 0x0000000F])
1178+
out.write(hexdigits[(code >> 16) & 0x0000000F])
1179+
out.write(hexdigits[(code >> 12) & 0x0000000F])
1180+
out.write(hexdigits[(code >> 8) & 0x0000000F])
1181+
out.write(hexdigits[(code >> 4) & 0x0000000F])
1182+
out.write(hexdigits[code & 0x0000000F])
11651183
# Map 16-bit characters to '\uxxxx'
11661184
else:
11671185
out.write('\\u')
1168-
out.write(hexdigits[(ord(ucs) >> 12) & 0x000F])
1169-
out.write(hexdigits[(ord(ucs) >> 8) & 0x000F])
1170-
out.write(hexdigits[(ord(ucs) >> 4) & 0x000F])
1171-
out.write(hexdigits[ord(ucs) & 0x000F])
1186+
out.write(hexdigits[(code >> 12) & 0x000F])
1187+
out.write(hexdigits[(code >> 8) & 0x000F])
1188+
out.write(hexdigits[(code >> 4) & 0x000F])
1189+
out.write(hexdigits[code & 0x000F])
11721190
else:
11731191
# Copy characters as-is
11741192
out.write(ch)
1175-
if self.char_width == 2:
1176-
if ord(ucs) >= 0x10000:
1177-
out.write(ch2)
1193+
if self.char_width() == 2 and (ch2 is not None):
1194+
out.write(ch2)
11781195

11791196
out.write(quote)
11801197

0 commit comments

Comments
 (0)