1919# Regex used for recognition of hex encoded characters
2020HEX_ENCODED_CHAR_REGEX = r"(?P<result>\\x[0-9A-Fa-f]{2})"
2121
22- # Regex used for recognition of representation for hex encoded invalid unicode characters
23- INVALID_UNICODE_CHAR_REGEX = r"(?P<result>\\\?[0-9A-Fa-f]{2})"
24-
2522# Raw chars that will be safe encoded to their slash (\) representations (e.g. newline to \n)
2623SAFE_ENCODE_SLASH_REPLACEMENTS = "\t \n \r \x0b \x0c "
2724
2825# Characters that don't need to be safe encoded
2926SAFE_CHARS = "" .join (filter (lambda x : x not in SAFE_ENCODE_SLASH_REPLACEMENTS , string .printable .replace ('\\ ' , '' )))
3027
28+ # Prefix used for hex encoded values
29+ HEX_ENCODED_PREFIX = r"\x"
30+
31+ # Strings used for temporary marking of hex encoded prefixes (to prevent double encoding)
32+ HEX_ENCODED_PREFIX_MARKER = "__HEX_ENCODED_PREFIX__"
33+
3134# String used for temporary marking of slash characters
3235SLASH_MARKER = "__SLASH__"
3336
@@ -45,6 +48,7 @@ def safecharencode(value):
4548
4649 if isinstance (value , basestring ):
4750 if any (_ not in SAFE_CHARS for _ in value ):
51+ retVal = retVal .replace (HEX_ENCODED_PREFIX , HEX_ENCODED_PREFIX_MARKER )
4852 retVal = retVal .replace ('\\ ' , SLASH_MARKER )
4953
5054 for char in SAFE_ENCODE_SLASH_REPLACEMENTS :
@@ -53,6 +57,7 @@ def safecharencode(value):
5357 retVal = reduce (lambda x , y : x + (y if (y in string .printable or isinstance (value , unicode ) and ord (y ) >= 160 ) else '\\ x%02x' % ord (y )), retVal , (unicode if isinstance (value , unicode ) else str )())
5458
5559 retVal = retVal .replace (SLASH_MARKER , "\\ \\ " )
60+ retVal = retVal .replace (HEX_ENCODED_PREFIX_MARKER , HEX_ENCODED_PREFIX )
5661 elif isinstance (value , list ):
5762 for i in xrange (len (value )):
5863 retVal [i ] = safecharencode (value [i ])
@@ -83,12 +88,6 @@ def safechardecode(value, binary=False):
8388 if binary :
8489 if isinstance (retVal , unicode ):
8590 retVal = retVal .encode ("utf8" )
86- while True :
87- match = re .search (INVALID_UNICODE_CHAR_REGEX , retVal )
88- if match :
89- retVal = retVal .replace (match .group ("result" ), chr (ord (binascii .unhexlify (match .group ("result" ).lstrip ("\\ ?" )))))
90- else :
91- break
9291
9392 elif isinstance (value , (list , tuple )):
9493 for i in xrange (len (value )):
0 commit comments