2222 pass
2323
2424# Encoding for file names
25- filesystemencoding = sys .getfilesystemencoding ()
25+ filesystemencoding = sys .getfilesystemencoding () ### currently unused
2626
27- encoding = " ascii"
27+ locale_encoding = ' ascii'
2828if sys .platform == 'win32' :
2929 # On Windows, we could use "mbcs". However, to give the user
3030 # a portable encoding name, we need to find the code page
3131 try :
32- encoding = locale .getdefaultlocale ()[1 ]
33- codecs .lookup (encoding )
32+ locale_encoding = locale .getdefaultlocale ()[1 ]
33+ codecs .lookup (locale_encoding )
3434 except LookupError :
3535 pass
3636else :
3939 # loaded, it may not offer nl_langinfo, or CODESET, or the
4040 # resulting codeset may be unknown to Python. We ignore all
4141 # these problems, falling back to ASCII
42- encoding = locale .nl_langinfo (locale .CODESET )
43- if encoding is None or encoding is '' :
42+ locale_encoding = locale .nl_langinfo (locale .CODESET )
43+ if locale_encoding is None or locale_encoding is '' :
4444 # situation occurs on Mac OS X
45- encoding = 'ascii'
46- codecs .lookup (encoding )
45+ locale_encoding = 'ascii'
46+ codecs .lookup (locale_encoding )
4747 except (NameError , AttributeError , LookupError ):
48- # Try getdefaultlocale well : it parses environment variables,
48+ # Try getdefaultlocale: it parses environment variables,
4949 # which may give a clue. Unfortunately, getdefaultlocale has
5050 # bugs that can cause ValueError.
5151 try :
52- encoding = locale .getdefaultlocale ()[1 ]
53- if encoding is None or encoding is '' :
52+ locale_encoding = locale .getdefaultlocale ()[1 ]
53+ if locale_encoding is None or locale_encoding is '' :
5454 # situation occurs on Mac OS X
55- encoding = 'ascii'
56- codecs .lookup (encoding )
55+ locale_encoding = 'ascii'
56+ codecs .lookup (locale_encoding )
5757 except (ValueError , LookupError ):
5858 pass
5959
60- encoding = encoding .lower ()
60+ locale_encoding = locale_encoding .lower ()
61+
62+ encoding = locale_encoding ### KBK 07Sep07 This is used all over IDLE, check!
63+ ### 'encoding' is used below in encode(), check!
6164
6265coding_re = re .compile ("coding[:=]\s*([-\w_.]+)" )
6366
@@ -110,26 +113,36 @@ def do_edit(self):
110113def coding_spec (data ):
111114 """Return the encoding declaration according to PEP 263.
112115
113- Raise LookupError if the encoding is declared but unknown.
116+ When checking encoded data, only the first two lines should be passed
117+ in to avoid a UnicodeDecodeError if the rest of the data is not unicode.
118+ The first two lines would contain the encoding specification.
119+
120+ Raise a LookupError if the encoding is declared but unknown.
114121 """
115122 if isinstance (data , bytes ):
116- str = data .decode ('utf-8' )
123+ try :
124+ lines = data .decode ('utf-8' )
125+ except UnicodeDecodeError :
126+ return None
117127 else :
118- str = data
119- # Only consider the first two lines
120- str = str .split ("\n " )[:2 ]
121- str = "\n " .join (str )
128+ lines = data
129+ # consider only the first two lines
130+ if '\n ' in lines :
131+ lst = lines .split ('\n ' )[:2 ]
132+ elif '\r ' in lines :
133+ lst = lines .split ('\r ' )[:2 ]
134+ else :
135+ lst = list (lines )
136+ str = '\n ' .join (lst )
122137 match = coding_re .search (str )
123138 if not match :
124139 return None
125140 name = match .group (1 )
126- # Check whether the encoding is known
127- import codecs
128141 try :
129142 codecs .lookup (name )
130143 except LookupError :
131144 # The standard encoding error does not indicate the encoding
132- raise LookupError ("Unknown encoding " + name )
145+ raise LookupError ("Unknown encoding: " + name )
133146 return name
134147
135148
@@ -236,12 +249,19 @@ def loadfile(self, filename):
236249 # open the file in binary mode so that we can handle
237250 # end-of-line convention ourselves.
238251 f = open (filename ,'rb' )
252+ two_lines = f .readline () + f .readline ()
253+ f .seek (0 )
239254 bytes = f .read ()
240255 f .close ()
241256 except IOError as msg :
242257 tkMessageBox .showerror ("I/O Error" , str (msg ), master = self .text )
243258 return False
244- chars = self .decode (bytes )
259+ chars = self ._decode (two_lines , bytes )
260+ if chars is None :
261+ tkMessageBox .showerror ("Decoding Error" ,
262+ "File %s\n Failed to Decode" % filename ,
263+ parent = self .text )
264+ return False
245265 # We now convert all end-of-lines to '\n's
246266 firsteol = self .eol_re .search (chars )
247267 if firsteol :
@@ -257,50 +277,61 @@ def loadfile(self, filename):
257277 self .updaterecentfileslist (filename )
258278 return True
259279
260- def decode (self , chars ):
261- """Create a Unicode string
262-
263- If that fails, let Tcl try its best
264- """
280+ def _decode (self , two_lines , bytes ):
281+ "Create a Unicode string."
282+ chars = None
265283 # Check presence of a UTF-8 signature first
266- if chars .startswith (BOM_UTF8 ):
284+ if bytes .startswith (BOM_UTF8 ):
267285 try :
268- chars = chars [3 :].decode ("utf-8" )
269- except UnicodeError :
286+ chars = bytes [3 :].decode ("utf-8" )
287+ except UnicodeDecodeError :
270288 # has UTF-8 signature, but fails to decode...
271- return chars
289+ return None
272290 else :
273291 # Indicates that this file originally had a BOM
274292 self .fileencoding = 'BOM'
275293 return chars
276294 # Next look for coding specification
277295 try :
278- enc = coding_spec (chars )
296+ enc = coding_spec (two_lines )
279297 except LookupError as name :
280298 tkMessageBox .showerror (
281299 title = "Error loading the file" ,
282300 message = "The encoding '%s' is not known to this Python " \
283301 "installation. The file may not display correctly" % name ,
284302 master = self .text )
285303 enc = None
304+ except UnicodeDecodeError :
305+ return None
286306 if enc :
287307 try :
288- return str (chars , enc )
289- except UnicodeError :
308+ chars = str (bytes , enc )
309+ self .fileencoding = enc
310+ return chars
311+ except UnicodeDecodeError :
290312 pass
291- # If it is ASCII, we need not to record anything
313+ # Try ascii:
292314 try :
293- return str (chars , 'ascii' )
294- except UnicodeError :
315+ chars = str (bytes , 'ascii' )
316+ self .fileencoding = None
317+ return chars
318+ except UnicodeDecodeError :
319+ pass
320+ # Try utf-8:
321+ try :
322+ chars = str (bytes , 'utf-8' )
323+ self .fileencoding = 'utf-8'
324+ return chars
325+ except UnicodeDecodeError :
295326 pass
296327 # Finally, try the locale's encoding. This is deprecated;
297328 # the user should declare a non-ASCII encoding
298329 try :
299- chars = str (chars , encoding )
300- self .fileencoding = encoding
301- except UnicodeError :
330+ chars = str (bytes , locale_encoding )
331+ self .fileencoding = locale_encoding
332+ except UnicodeDecodeError :
302333 pass
303- return chars
334+ return chars # None on failure
304335
305336 def maybesave (self ):
306337 if self .get_saved ():
@@ -383,8 +414,9 @@ def encode(self, chars):
383414 return chars .encode ('ascii' )
384415 except UnicodeError :
385416 pass
386- # If there is an encoding declared, try this first.
417+ # Check if there is an encoding declared
387418 try :
419+ # a string, let coding_spec slice it to the first two lines
388420 enc = coding_spec (chars )
389421 failed = None
390422 except LookupError as msg :
@@ -509,7 +541,6 @@ def askopenfile(self):
509541 self .opendialog = tkFileDialog .Open (master = self .text ,
510542 filetypes = self .filetypes )
511543 filename = self .opendialog .show (initialdir = dir , initialfile = base )
512- assert isinstance (filename , str )
513544 return filename
514545
515546 def defaultfilename (self , mode = "open" ):
0 commit comments