@@ -105,6 +105,11 @@ def dump(*stuff):
105105_tran = string .join (_tran , '' )
106106del ch
107107
108+ try :
109+ UnicodeType = type (unicode ("" ))
110+ except NameError :
111+ UnicodeType = None
112+
108113class Parser :
109114
110115 def __init__ (self , indentwidth , tabwidth ):
@@ -113,6 +118,19 @@ def __init__(self, indentwidth, tabwidth):
113118
114119 def set_str (self , str ):
115120 assert len (str ) == 0 or str [- 1 ] == '\n '
121+ if type (str ) is UnicodeType :
122+ # The parse functions have no idea what to do with Unicode, so
123+ # replace all Unicode characters with "x". This is "safe"
124+ # so long as the only characters germane to parsing the structure
125+ # of Python are 7-bit ASCII. It's *necessary* because Unicode
126+ # strings don't have a .translate() method that supports
127+ # deletechars.
128+ uniphooey = str
129+ str = []
130+ push = str .append
131+ for raw in map (ord , uniphooey ):
132+ push (raw < 127 and chr (raw ) or "x" )
133+ str = "" .join (str )
116134 self .str = str
117135 self .study_level = 0
118136
@@ -385,13 +403,14 @@ def _study2(self, _rfind=string.rfind, _find=string.find,
385403 m = _chew_ordinaryre (str , p , q )
386404 if m :
387405 # we skipped at least one boring char
388- p = m .end ()
406+ newp = m .end ()
389407 # back up over totally boring whitespace
390- i = p - 1 # index of last boring char
391- while i >= 0 and str [i ] in " \t \n " :
408+ i = newp - 1 # index of last boring char
409+ while i >= p and str [i ] in " \t \n " :
392410 i = i - 1
393- if i >= 0 :
411+ if i >= p :
394412 lastch = str [i ]
413+ p = newp
395414 if p >= q :
396415 break
397416
0 commit comments