Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 925a322

Browse files
committed
#18155: Regex-escape delimiter, in case it is a regex special char.
Patch by Vajrasky Kok, with slight modification to the tests by me.
1 parent e4e530e commit 925a322

4 files changed

Lines changed: 48 additions & 6 deletions

File tree

Lib/csv.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -264,8 +264,9 @@ def _guess_quote_and_delimiter(self, data, delimiters):
264264

265265
# if we see an extra quote between delimiters, we've got a
266266
# double quoted format
267-
dq_regexp = re.compile(r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \
268-
{'delim':delim, 'quote':quotechar}, re.MULTILINE)
267+
dq_regexp = re.compile(
268+
r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \
269+
{'delim':re.escape(delim), 'quote':quotechar}, re.MULTILINE)
269270

270271

271272

Lib/test/test_csv.py

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -796,7 +796,7 @@ class TestSniffer(unittest.TestCase):
796796
'Tommy''s Place':'Blue Island':'IL':'12/28/02':'Blue Sunday/White Crow'
797797
'Stonecutters ''Seafood'' and Chop House':'Lemont':'IL':'12/19/02':'Week Back'
798798
"""
799-
header = '''\
799+
header1 = '''\
800800
"venue","city","state","date","performers"
801801
'''
802802
sample3 = '''\
@@ -815,10 +815,35 @@ class TestSniffer(unittest.TestCase):
815815
sample6 = "a|b|c\r\nd|e|f\r\n"
816816
sample7 = "'a'|'b'|'c'\r\n'd'|e|f\r\n"
817817

818+
# Issue 18155: Use a delimiter that is a special char to regex:
819+
820+
header2 = '''\
821+
"venue"+"city"+"state"+"date"+"performers"
822+
'''
823+
sample8 = """\
824+
Harry's+ Arlington Heights+ IL+ 2/1/03+ Kimi Hayes
825+
Shark City+ Glendale Heights+ IL+ 12/28/02+ Prezence
826+
Tommy's Place+ Blue Island+ IL+ 12/28/02+ Blue Sunday/White Crow
827+
Stonecutters Seafood and Chop House+ Lemont+ IL+ 12/19/02+ Week Back
828+
"""
829+
sample9 = """\
830+
'Harry''s'+ Arlington Heights'+ 'IL'+ '2/1/03'+ 'Kimi Hayes'
831+
'Shark City'+ Glendale Heights'+' IL'+ '12/28/02'+ 'Prezence'
832+
'Tommy''s Place'+ Blue Island'+ 'IL'+ '12/28/02'+ 'Blue Sunday/White Crow'
833+
'Stonecutters ''Seafood'' and Chop House'+ 'Lemont'+ 'IL'+ '12/19/02'+ 'Week Back'
834+
"""
835+
818836
def test_has_header(self):
819837
sniffer = csv.Sniffer()
820838
self.assertEqual(sniffer.has_header(self.sample1), False)
821-
self.assertEqual(sniffer.has_header(self.header+self.sample1), True)
839+
self.assertEqual(sniffer.has_header(self.header1 + self.sample1),
840+
True)
841+
842+
def test_has_header_regex_special_delimiter(self):
843+
sniffer = csv.Sniffer()
844+
self.assertEqual(sniffer.has_header(self.sample8), False)
845+
self.assertEqual(sniffer.has_header(self.header2 + self.sample8),
846+
True)
822847

823848
def test_sniff(self):
824849
sniffer = csv.Sniffer()
@@ -852,13 +877,24 @@ def test_delimiters(self):
852877
dialect = sniffer.sniff(self.sample7)
853878
self.assertEqual(dialect.delimiter, "|")
854879
self.assertEqual(dialect.quotechar, "'")
880+
dialect = sniffer.sniff(self.sample8)
881+
self.assertEqual(dialect.delimiter, '+')
882+
dialect = sniffer.sniff(self.sample9)
883+
self.assertEqual(dialect.delimiter, '+')
884+
self.assertEqual(dialect.quotechar, "'")
855885

856886
def test_doublequote(self):
857887
sniffer = csv.Sniffer()
858-
dialect = sniffer.sniff(self.header)
888+
dialect = sniffer.sniff(self.header1)
889+
self.assertFalse(dialect.doublequote)
890+
dialect = sniffer.sniff(self.header2)
859891
self.assertFalse(dialect.doublequote)
860892
dialect = sniffer.sniff(self.sample2)
861893
self.assertTrue(dialect.doublequote)
894+
dialect = sniffer.sniff(self.sample8)
895+
self.assertFalse(dialect.doublequote)
896+
dialect = sniffer.sniff(self.sample9)
897+
self.assertTrue(dialect.doublequote)
862898

863899
if not hasattr(sys, "gettotalrefcount"):
864900
if support.verbose: print("*** skipping leakage tests ***")

Misc/ACKS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -656,6 +656,7 @@ Kubilay Kocak
656656
Greg Kochanski
657657
Damon Kohler
658658
Marko Kohtala
659+
Vajrasky Kok
659660
Guido Kollerie
660661
Jacek Konieczny
661662
Марк Коренберг

Misc/NEWS

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@ Core and Builtins
3838
Library
3939
-------
4040

41+
- Issue #18155: The csv module now correctly handles csv files that use
42+
a delimter character that has a special meaning in regexes, instead of
43+
throwing an exception.
44+
4145
- Issue #14360: encode_quopri can now be successfully used as an encoder
4246
when constructing a MIMEApplication object.
4347

@@ -50,7 +54,7 @@ Library
5054

5155
- Issue #18259: Declare sethostname in socketmodule.c for AIX
5256

53-
- Issue #18167: cgi.FieldStorage no more fails to handle multipart/form-data
57+
- Issue #18167: cgi.FieldStorage no longer fails to handle multipart/form-data
5458
when \r\n appears at end of 65535 bytes without other newlines.
5559

5660
- subprocess: Prevent a possible double close of parent pipe fds when the

0 commit comments

Comments
 (0)