Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 5593bf2

Browse files
committed
Another patch related to #1539 (simplifying unicode bad chars and preventing double encoding of safe chars)
1 parent ca933fc commit 5593bf2

4 files changed

Lines changed: 14 additions & 11 deletions

File tree

extra/safe2bin/safe2bin.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,18 @@
1919
# Regex used for recognition of hex encoded characters
2020
HEX_ENCODED_CHAR_REGEX = r"(?P<result>\\x[0-9A-Fa-f]{2})"
2121

22-
# Regex used for recognition of representation for hex encoded invalid unicode characters
23-
INVALID_UNICODE_CHAR_REGEX = r"(?P<result>\\\?[0-9A-Fa-f]{2})"
24-
2522
# Raw chars that will be safe encoded to their slash (\) representations (e.g. newline to \n)
2623
SAFE_ENCODE_SLASH_REPLACEMENTS = "\t\n\r\x0b\x0c"
2724

2825
# Characters that don't need to be safe encoded
2926
SAFE_CHARS = "".join(filter(lambda x: x not in SAFE_ENCODE_SLASH_REPLACEMENTS, string.printable.replace('\\', '')))
3027

28+
# Prefix used for hex encoded values
29+
HEX_ENCODED_PREFIX = r"\x"
30+
31+
# Strings used for temporary marking of hex encoded prefixes (to prevent double encoding)
32+
HEX_ENCODED_PREFIX_MARKER = "__HEX_ENCODED_PREFIX__"
33+
3134
# String used for temporary marking of slash characters
3235
SLASH_MARKER = "__SLASH__"
3336

@@ -45,6 +48,7 @@ def safecharencode(value):
4548

4649
if isinstance(value, basestring):
4750
if any(_ not in SAFE_CHARS for _ in value):
51+
retVal = retVal.replace(HEX_ENCODED_PREFIX, HEX_ENCODED_PREFIX_MARKER)
4852
retVal = retVal.replace('\\', SLASH_MARKER)
4953

5054
for char in SAFE_ENCODE_SLASH_REPLACEMENTS:
@@ -53,6 +57,7 @@ def safecharencode(value):
5357
retVal = reduce(lambda x, y: x + (y if (y in string.printable or isinstance(value, unicode) and ord(y) >= 160) else '\\x%02x' % ord(y)), retVal, (unicode if isinstance(value, unicode) else str)())
5458

5559
retVal = retVal.replace(SLASH_MARKER, "\\\\")
60+
retVal = retVal.replace(HEX_ENCODED_PREFIX_MARKER, HEX_ENCODED_PREFIX)
5661
elif isinstance(value, list):
5762
for i in xrange(len(value)):
5863
retVal[i] = safecharencode(value[i])
@@ -83,12 +88,6 @@ def safechardecode(value, binary=False):
8388
if binary:
8489
if isinstance(retVal, unicode):
8590
retVal = retVal.encode("utf8")
86-
while True:
87-
match = re.search(INVALID_UNICODE_CHAR_REGEX, retVal)
88-
if match:
89-
retVal = retVal.replace(match.group("result"), chr(ord(binascii.unhexlify(match.group("result").lstrip("\\?")))))
90-
else:
91-
break
9291

9392
elif isinstance(value, (list, tuple)):
9493
for i in xrange(len(value)):

lib/core/common.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2954,6 +2954,10 @@ def decodeIntToUnicode(value):
29542954
retVal = unichr(value)
29552955
else:
29562956
retVal = getUnicode(raw, conf.charset)
2957+
2958+
if Backend.isDbms(DBMS.MYSQL):
2959+
import pdb
2960+
pdb.set_trace()
29572961
else:
29582962
retVal = getUnicode(chr(value))
29592963
except:

lib/core/settings.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -587,7 +587,7 @@
587587
LIMITED_ROWS_TEST_NUMBER = 15
588588

589589
# Format used for representing invalid unicode characters
590-
INVALID_UNICODE_CHAR_FORMAT = r"\?%02x"
590+
INVALID_UNICODE_CHAR_FORMAT = r"\x%02x"
591591

592592
# Regular expression for XML POST data
593593
XML_RECOGNITION_REGEX = r"(?s)\A\s*<[^>]+>(.+>)?\s*\Z"

xml/queries.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
<root>
44
<!-- MySQL -->
55
<dbms value="MySQL">
6-
<cast query="CAST(%s AS CHAR)"/>
6+
<cast query="CAST(%s AS CHAR CHARACTER SET latin1)"/>
77
<length query="CHAR_LENGTH(%s)"/>
88
<isnull query="IFNULL(%s,' ')"/>
99
<delimiter query=","/>

0 commit comments

Comments
 (0)