@@ -2378,6 +2378,22 @@ def getUnicode(value, encoding=None, noneToNull=False):
23782378 if isinstance (value , unicode ):
23792379 return value
23802380 elif isinstance (value , basestring ):
2381+ # Heuristics (if encoding not explicitly specified)
2382+ if all (_ in value for _ in ('<' , '>' )):
2383+ candidates = filter (None , (encoding , kb .get ("pageEncoding" ) if kb .get ("originalPage" ) else None , conf .get ("encoding" ), sys .getfilesystemencoding (), UNICODE_ENCODING ))
2384+ elif any (_ in value for _ in (":\\ " , '/' , '.' )) and '\n ' not in value :
2385+ candidates = filter (None , (encoding , sys .getfilesystemencoding (), kb .get ("pageEncoding" ) if kb .get ("originalPage" ) else None , UNICODE_ENCODING , conf .get ("encoding" )))
2386+ elif conf .get ("encoding" ) and '\n ' not in value :
2387+ candidates = filter (None , (encoding , conf .get ("encoding" ), kb .get ("pageEncoding" ) if kb .get ("originalPage" ) else None , sys .getfilesystemencoding (), UNICODE_ENCODING ))
2388+ else :
2389+ candidates = filter (None , (encoding , kb .get ("pageEncoding" ) if kb .get ("originalPage" ) else None , UNICODE_ENCODING , conf .get ("encoding" ), sys .getfilesystemencoding ()))
2390+
2391+ for candidate in candidates :
2392+ try :
2393+ return unicode (value , candidate )
2394+ except UnicodeDecodeError :
2395+ pass
2396+
23812397 while True :
23822398 try :
23832399 return unicode (value , encoding or (kb .get ("pageEncoding" ) if kb .get ("originalPage" ) else None ) or UNICODE_ENCODING )
0 commit comments