Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit c89f284

Browse files
committed
When using QUOTE_NONNUMERIC, we now test for "numericness" with
PyNumber_Check, rather than trying to convert to a float. Reimplemented writer - now raises exceptions when it sees a quotechar but neither doublequote or escapechar are set. Doublequote results are now more consistent (eg, single quote should generate """", rather than "", which is ambiguous).
1 parent 31d8896 commit c89f284

3 files changed

Lines changed: 81 additions & 92 deletions

File tree

Lib/test/test_csv.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -152,25 +152,35 @@ def test_write_bigfield(self):
152152
(bigstring, bigstring))
153153

154154
def test_write_quoting(self):
155-
self._write_test(['a','1','p,q'], 'a,1,"p,q"')
155+
self._write_test(['a',1,'p,q'], 'a,1,"p,q"')
156156
self.assertRaises(csv.Error,
157157
self._write_test,
158-
['a','1','p,q'], 'a,1,"p,q"',
158+
['a',1,'p,q'], 'a,1,p,q',
159159
quoting = csv.QUOTE_NONE)
160-
self._write_test(['a','1','p,q'], 'a,1,"p,q"',
160+
self._write_test(['a',1,'p,q'], 'a,1,"p,q"',
161161
quoting = csv.QUOTE_MINIMAL)
162-
self._write_test(['a','1','p,q'], '"a",1,"p,q"',
162+
self._write_test(['a',1,'p,q'], '"a",1,"p,q"',
163163
quoting = csv.QUOTE_NONNUMERIC)
164-
self._write_test(['a','1','p,q'], '"a","1","p,q"',
164+
self._write_test(['a',1,'p,q'], '"a","1","p,q"',
165165
quoting = csv.QUOTE_ALL)
166166

167167
def test_write_escape(self):
168-
self._write_test(['a','1','p,q'], 'a,1,"p,q"',
168+
self._write_test(['a',1,'p,q'], 'a,1,"p,q"',
169169
escapechar='\\')
170-
# FAILED - needs to be fixed [am]:
171-
# self._write_test(['a','1','p,"q"'], 'a,1,"p,\\"q\\"',
172-
# escapechar='\\', doublequote = 0)
173-
self._write_test(['a','1','p,q'], 'a,1,p\\,q',
170+
self.assertRaises(csv.Error,
171+
self._write_test,
172+
['a',1,'p,"q"'], 'a,1,"p,\\"q\\""',
173+
escapechar=None, doublequote=False)
174+
self._write_test(['a',1,'p,"q"'], 'a,1,"p,\\"q\\""',
175+
escapechar='\\', doublequote = False)
176+
self._write_test(['"'], '""""',
177+
escapechar='\\', quoting = csv.QUOTE_MINIMAL)
178+
self._write_test(['"'], '\\"',
179+
escapechar='\\', quoting = csv.QUOTE_MINIMAL,
180+
doublequote = False)
181+
self._write_test(['"'], '\\"',
182+
escapechar='\\', quoting = csv.QUOTE_NONE)
183+
self._write_test(['a',1,'p,q'], 'a,1,p\\,q',
174184
escapechar='\\', quoting = csv.QUOTE_NONE)
175185

176186
def test_writerows(self):

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ Library
4545
+ quotechar=None and quoting=QUOTE_NONE now work the way PEP 305
4646
dictates.
4747
+ the parser now removes the escapechar prefix from escaped characters.
48+
+ QUOTE_NONNUMERIC now tests for numeric objects, rather than attempting
49+
to cast to float.
50+
+ writer doublequote handling improved.
4851
+ Dialect classes passed to the module are no longer instantiated by
4952
the module before being parsed (the former validation scheme required
5053
this, but the mechanism was unreliable).

Modules/_csv.c

Lines changed: 58 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -944,81 +944,65 @@ join_append_data(WriterObj *self, char *field, int quote_empty,
944944
{
945945
DialectObj *dialect = self->dialect;
946946
int i, rec_len;
947+
char *lineterm;
948+
949+
#define ADDCH(c) \
950+
do {\
951+
if (copy_phase) \
952+
self->rec[rec_len] = c;\
953+
rec_len++;\
954+
} while(0)
955+
956+
lineterm = PyString_AsString(dialect->lineterminator);
957+
if (lineterm == NULL)
958+
return -1;
947959

948960
rec_len = self->rec_len;
949961

950-
/* If this is not the first field we need a field separator.
951-
*/
952-
if (self->num_fields > 0) {
953-
if (copy_phase)
954-
self->rec[rec_len] = dialect->delimiter;
955-
rec_len++;
956-
}
957-
/* Handle preceding quote.
958-
*/
959-
switch (dialect->quoting) {
960-
case QUOTE_ALL:
961-
*quoted = 1;
962-
if (copy_phase)
963-
self->rec[rec_len] = dialect->quotechar;
964-
rec_len++;
965-
break;
966-
case QUOTE_MINIMAL:
967-
case QUOTE_NONNUMERIC:
968-
/* We only know about quoted in the copy phase.
969-
*/
970-
if (copy_phase && *quoted) {
971-
self->rec[rec_len] = dialect->quotechar;
972-
rec_len++;
973-
}
974-
break;
975-
case QUOTE_NONE:
976-
break;
977-
}
978-
/* Copy/count field data.
979-
*/
962+
/* If this is not the first field we need a field separator */
963+
if (self->num_fields > 0)
964+
ADDCH(dialect->delimiter);
965+
966+
/* Handle preceding quote */
967+
if (copy_phase && *quoted)
968+
ADDCH(dialect->quotechar);
969+
970+
/* Copy/count field data */
980971
for (i = 0;; i++) {
981972
char c = field[i];
973+
int want_escape = 0;
982974

983975
if (c == '\0')
984976
break;
985-
/* If in doublequote mode we escape quote chars with a
986-
* quote.
987-
*/
988-
if (dialect->quoting != QUOTE_NONE &&
989-
c == dialect->quotechar && dialect->doublequote) {
990-
if (copy_phase)
991-
self->rec[rec_len] = dialect->quotechar;
992-
*quoted = 1;
993-
rec_len++;
994-
}
995977

996-
/* Some special characters need to be escaped. If we have a
997-
* quote character switch to quoted field instead of escaping
998-
* individual characters.
999-
*/
1000-
if (!*quoted
1001-
&& (c == dialect->delimiter ||
1002-
c == dialect->escapechar ||
1003-
c == '\n' || c == '\r')) {
1004-
if (dialect->quoting != QUOTE_NONE)
1005-
*quoted = 1;
1006-
else if (dialect->escapechar) {
1007-
if (copy_phase)
1008-
self->rec[rec_len] = dialect->escapechar;
1009-
rec_len++;
1010-
}
978+
if (c == dialect->delimiter ||
979+
c == dialect->escapechar ||
980+
c == dialect->quotechar ||
981+
strchr(lineterm, c)) {
982+
if (dialect->quoting == QUOTE_NONE)
983+
want_escape = 1;
1011984
else {
1012-
PyErr_Format(error_obj,
1013-
"delimiter must be quoted or escaped");
1014-
return -1;
985+
if (c == dialect->quotechar) {
986+
if (dialect->doublequote)
987+
ADDCH(dialect->quotechar);
988+
else
989+
want_escape = 1;
990+
}
991+
if (!want_escape)
992+
*quoted = 1;
993+
}
994+
if (want_escape) {
995+
if (!dialect->escapechar) {
996+
PyErr_Format(error_obj,
997+
"need to escape, but no escapechar set");
998+
return -1;
999+
}
1000+
ADDCH(dialect->escapechar);
10151001
}
10161002
}
10171003
/* Copy field character into record buffer.
10181004
*/
1019-
if (copy_phase)
1020-
self->rec[rec_len] = c;
1021-
rec_len++;
1005+
ADDCH(c);
10221006
}
10231007

10241008
/* If field is empty check if it needs to be quoted.
@@ -1033,20 +1017,14 @@ join_append_data(WriterObj *self, char *field, int quote_empty,
10331017
*quoted = 1;
10341018
}
10351019

1036-
/* Handle final quote character on field.
1037-
*/
10381020
if (*quoted) {
10391021
if (copy_phase)
1040-
self->rec[rec_len] = dialect->quotechar;
1022+
ADDCH(dialect->quotechar);
10411023
else
1042-
/* Didn't know about leading quote until we found it
1043-
* necessary in field data - compensate for it now.
1044-
*/
1045-
rec_len++;
1046-
rec_len++;
1024+
rec_len += 2;
10471025
}
1048-
10491026
return rec_len;
1027+
#undef ADDCH
10501028
}
10511029

10521030
static int
@@ -1146,18 +1124,16 @@ csv_writerow(WriterObj *self, PyObject *seq)
11461124
if (field == NULL)
11471125
return NULL;
11481126

1149-
quoted = 0;
1150-
if (dialect->quoting == QUOTE_NONNUMERIC) {
1151-
PyObject *num;
1152-
1153-
num = PyNumber_Float(field);
1154-
if (num == NULL) {
1155-
quoted = 1;
1156-
PyErr_Clear();
1157-
}
1158-
else {
1159-
Py_DECREF(num);
1160-
}
1127+
switch (dialect->quoting) {
1128+
case QUOTE_NONNUMERIC:
1129+
quoted = !PyNumber_Check(field);
1130+
break;
1131+
case QUOTE_ALL:
1132+
quoted = 1;
1133+
break;
1134+
default:
1135+
quoted = 0;
1136+
break;
11611137
}
11621138

11631139
if (PyString_Check(field)) {

0 commit comments

Comments
 (0)