Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 77ea640

Browse files
committed
Migrate the _csv module to the new unicode APIs
(except for a Py_UNICODE_strchr() call)
1 parent 0959554 commit 77ea640

1 file changed

Lines changed: 61 additions & 60 deletions

File tree

Modules/_csv.c

Lines changed: 61 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,9 @@ typedef struct {
4747
PyObject_HEAD
4848

4949
int doublequote; /* is " represented by ""? */
50-
Py_UNICODE delimiter; /* field separator */
51-
Py_UNICODE quotechar; /* quote character */
52-
Py_UNICODE escapechar; /* escape character */
50+
Py_UCS4 delimiter; /* field separator */
51+
Py_UCS4 quotechar; /* quote character */
52+
Py_UCS4 escapechar; /* escape character */
5353
int skipinitialspace; /* ignore spaces following delimiter? */
5454
PyObject *lineterminator; /* string to write between records */
5555
int quoting; /* style of quoting to write */
@@ -68,7 +68,7 @@ typedef struct {
6868

6969
PyObject *fields; /* field list for current record */
7070
ParserState state; /* current CSV parse state */
71-
Py_UNICODE *field; /* build current field in here */
71+
Py_UCS4 *field; /* temporary buffer */
7272
Py_ssize_t field_size; /* size of allocated buffer */
7373
Py_ssize_t field_len; /* length of current field */
7474
int numeric_field; /* treat field as numeric */
@@ -86,7 +86,7 @@ typedef struct {
8686

8787
DialectObj *dialect; /* parsing dialect */
8888

89-
Py_UNICODE *rec; /* buffer for parser.join */
89+
Py_UCS4 *rec; /* buffer for parser.join */
9090
Py_ssize_t rec_size; /* size of allocated record */
9191
Py_ssize_t rec_len; /* length of record */
9292
int num_fields; /* number of fields in record */
@@ -121,7 +121,7 @@ get_string(PyObject *str)
121121
}
122122

123123
static PyObject *
124-
get_nullchar_as_None(Py_UNICODE c)
124+
get_nullchar_as_None(Py_UCS4 c)
125125
{
126126
if (c == '\0') {
127127
Py_INCREF(Py_None);
@@ -199,25 +199,23 @@ _set_int(const char *name, int *target, PyObject *src, int dflt)
199199
}
200200

201201
static int
202-
_set_char(const char *name, Py_UNICODE *target, PyObject *src, Py_UNICODE dflt)
202+
_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
203203
{
204204
if (src == NULL)
205205
*target = dflt;
206206
else {
207207
*target = '\0';
208208
if (src != Py_None) {
209-
Py_UNICODE *buf;
210209
Py_ssize_t len;
211-
buf = PyUnicode_AsUnicode(src);
212210
len = PyUnicode_GetSize(src);
213-
if (buf == NULL || len > 1) {
211+
if (len > 1) {
214212
PyErr_Format(PyExc_TypeError,
215213
"\"%s\" must be an 1-character string",
216214
name);
217215
return -1;
218216
}
219217
if (len > 0)
220-
*target = buf[0];
218+
*target = PyUnicode_READ_CHAR(src, 0);
221219
}
222220
}
223221
return 0;
@@ -498,7 +496,8 @@ parse_save_field(ReaderObj *self)
498496
{
499497
PyObject *field;
500498

501-
field = PyUnicode_FromUnicode(self->field, self->field_len);
499+
field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
500+
(void *) self->field, self->field_len);
502501
if (field == NULL)
503502
return -1;
504503
self->field_len = 0;
@@ -507,11 +506,9 @@ parse_save_field(ReaderObj *self)
507506

508507
self->numeric_field = 0;
509508
tmp = PyNumber_Float(field);
510-
if (tmp == NULL) {
511-
Py_DECREF(field);
512-
return -1;
513-
}
514509
Py_DECREF(field);
510+
if (tmp == NULL)
511+
return -1;
515512
field = tmp;
516513
}
517514
PyList_Append(self->fields, field);
@@ -526,16 +523,16 @@ parse_grow_buff(ReaderObj *self)
526523
self->field_size = 4096;
527524
if (self->field != NULL)
528525
PyMem_Free(self->field);
529-
self->field = PyMem_New(Py_UNICODE, self->field_size);
526+
self->field = PyMem_New(Py_UCS4, self->field_size);
530527
}
531528
else {
532-
Py_UNICODE *field = self->field;
529+
Py_UCS4 *field = self->field;
533530
if (self->field_size > PY_SSIZE_T_MAX / 2) {
534531
PyErr_NoMemory();
535532
return 0;
536533
}
537534
self->field_size *= 2;
538-
self->field = PyMem_Resize(field, Py_UNICODE, self->field_size);
535+
self->field = PyMem_Resize(field, Py_UCS4, self->field_size);
539536
}
540537
if (self->field == NULL) {
541538
PyErr_NoMemory();
@@ -545,7 +542,7 @@ parse_grow_buff(ReaderObj *self)
545542
}
546543

547544
static int
548-
parse_add_char(ReaderObj *self, Py_UNICODE c)
545+
parse_add_char(ReaderObj *self, Py_UCS4 c)
549546
{
550547
if (self->field_len >= field_limit) {
551548
PyErr_Format(error_obj, "field larger than field limit (%ld)",
@@ -559,7 +556,7 @@ parse_add_char(ReaderObj *self, Py_UNICODE c)
559556
}
560557

561558
static int
562-
parse_process_char(ReaderObj *self, Py_UNICODE c)
559+
parse_process_char(ReaderObj *self, Py_UCS4 c)
563560
{
564561
DialectObj *dialect = self->dialect;
565562

@@ -744,10 +741,12 @@ parse_reset(ReaderObj *self)
744741
static PyObject *
745742
Reader_iternext(ReaderObj *self)
746743
{
747-
PyObject *lineobj;
748744
PyObject *fields = NULL;
749-
Py_UNICODE *line, c;
750-
Py_ssize_t linelen;
745+
Py_UCS4 c;
746+
Py_ssize_t pos, linelen;
747+
unsigned int kind;
748+
void *data;
749+
PyObject *lineobj;
751750

752751
if (parse_reset(self) < 0)
753752
return NULL;
@@ -771,14 +770,12 @@ Reader_iternext(ReaderObj *self)
771770
return NULL;
772771
}
773772
++self->line_num;
774-
line = PyUnicode_AsUnicode(lineobj);
775-
linelen = PyUnicode_GetSize(lineobj);
776-
if (line == NULL || linelen < 0) {
777-
Py_DECREF(lineobj);
778-
return NULL;
779-
}
773+
kind = PyUnicode_KIND(lineobj);
774+
data = PyUnicode_DATA(lineobj);
775+
pos = 0;
776+
linelen = PyUnicode_GET_LENGTH(lineobj);
780777
while (linelen--) {
781-
c = *line++;
778+
c = PyUnicode_READ(kind, data, pos);
782779
if (c == '\0') {
783780
Py_DECREF(lineobj);
784781
PyErr_Format(error_obj,
@@ -789,6 +786,7 @@ Reader_iternext(ReaderObj *self)
789786
Py_DECREF(lineobj);
790787
goto err;
791788
}
789+
pos++;
792790
}
793791
Py_DECREF(lineobj);
794792
if (parse_process_char(self, 0) < 0)
@@ -945,8 +943,9 @@ join_reset(WriterObj *self)
945943
* record length.
946944
*/
947945
static Py_ssize_t
948-
join_append_data(WriterObj *self, Py_UNICODE *field, int quote_empty,
949-
int *quoted, int copy_phase)
946+
join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
947+
Py_ssize_t field_len, int quote_empty, int *quoted,
948+
int copy_phase)
950949
{
951950
DialectObj *dialect = self->dialect;
952951
int i;
@@ -976,13 +975,10 @@ join_append_data(WriterObj *self, Py_UNICODE *field, int quote_empty,
976975

977976
/* Copy/count field data */
978977
/* If field is null just pass over */
979-
for (i = 0; field; i++) {
980-
Py_UNICODE c = field[i];
978+
for (i = 0; field_data && (i < field_len); i++) {
979+
Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
981980
int want_escape = 0;
982981

983-
if (c == '\0')
984-
break;
985-
986982
if (c == dialect->delimiter ||
987983
c == dialect->escapechar ||
988984
c == dialect->quotechar ||
@@ -1049,13 +1045,13 @@ join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
10491045
self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
10501046
if (self->rec != NULL)
10511047
PyMem_Free(self->rec);
1052-
self->rec = PyMem_New(Py_UNICODE, self->rec_size);
1048+
self->rec = PyMem_New(Py_UCS4, self->rec_size);
10531049
}
10541050
else {
1055-
Py_UNICODE* old_rec = self->rec;
1051+
Py_UCS4* old_rec = self->rec;
10561052

10571053
self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1058-
self->rec = PyMem_Resize(old_rec, Py_UNICODE, self->rec_size);
1054+
self->rec = PyMem_Resize(old_rec, Py_UCS4, self->rec_size);
10591055
if (self->rec == NULL)
10601056
PyMem_Free(old_rec);
10611057
}
@@ -1068,19 +1064,29 @@ join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
10681064
}
10691065

10701066
static int
1071-
join_append(WriterObj *self, Py_UNICODE *field, int *quoted, int quote_empty)
1067+
join_append(WriterObj *self, PyObject *field, int *quoted, int quote_empty)
10721068
{
1069+
unsigned int field_kind = -1;
1070+
void *field_data = NULL;
1071+
Py_ssize_t field_len = 0;
10731072
Py_ssize_t rec_len;
10741073

1075-
rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1074+
if (field != NULL) {
1075+
field_kind = PyUnicode_KIND(field);
1076+
field_data = PyUnicode_DATA(field);
1077+
field_len = PyUnicode_GET_LENGTH(field);
1078+
}
1079+
rec_len = join_append_data(self, field_kind, field_data, field_len,
1080+
quote_empty, quoted, 0);
10761081
if (rec_len < 0)
10771082
return 0;
10781083

10791084
/* grow record buffer if necessary */
10801085
if (!join_check_rec_size(self, rec_len))
10811086
return 0;
10821087

1083-
self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1088+
self->rec_len = join_append_data(self, field_kind, field_data, field_len,
1089+
quote_empty, quoted, 1);
10841090
self->num_fields++;
10851091

10861092
return 1;
@@ -1089,22 +1095,22 @@ join_append(WriterObj *self, Py_UNICODE *field, int *quoted, int quote_empty)
10891095
static int
10901096
join_append_lineterminator(WriterObj *self)
10911097
{
1092-
Py_ssize_t terminator_len;
1093-
Py_UNICODE *terminator;
1098+
Py_ssize_t terminator_len, i;
1099+
unsigned int term_kind;
1100+
void *term_data;
10941101

1095-
terminator_len = PyUnicode_GetSize(self->dialect->lineterminator);
1102+
terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
10961103
if (terminator_len == -1)
10971104
return 0;
10981105

10991106
/* grow record buffer if necessary */
11001107
if (!join_check_rec_size(self, self->rec_len + terminator_len))
11011108
return 0;
11021109

1103-
terminator = PyUnicode_AsUnicode(self->dialect->lineterminator);
1104-
if (terminator == NULL)
1105-
return 0;
1106-
memmove(self->rec + self->rec_len, terminator,
1107-
sizeof(Py_UNICODE)*terminator_len);
1110+
term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1111+
term_data = PyUnicode_DATA(self->dialect->lineterminator);
1112+
for (i = 0; i < terminator_len; i++)
1113+
self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
11081114
self->rec_len += terminator_len;
11091115

11101116
return 1;
@@ -1154,14 +1160,11 @@ csv_writerow(WriterObj *self, PyObject *seq)
11541160
}
11551161

11561162
if (PyUnicode_Check(field)) {
1157-
append_ok = join_append(self,
1158-
PyUnicode_AS_UNICODE(field),
1159-
&quoted, len == 1);
1163+
append_ok = join_append(self, field, &quoted, len == 1);
11601164
Py_DECREF(field);
11611165
}
11621166
else if (field == Py_None) {
1163-
append_ok = join_append(self, NULL,
1164-
&quoted, len == 1);
1167+
append_ok = join_append(self, NULL, &quoted, len == 1);
11651168
Py_DECREF(field);
11661169
}
11671170
else {
@@ -1171,9 +1174,7 @@ csv_writerow(WriterObj *self, PyObject *seq)
11711174
Py_DECREF(field);
11721175
if (str == NULL)
11731176
return NULL;
1174-
append_ok = join_append(self,
1175-
PyUnicode_AS_UNICODE(str),
1176-
&quoted, len == 1);
1177+
append_ok = join_append(self, str, &quoted, len == 1);
11771178
Py_DECREF(str);
11781179
}
11791180
if (!append_ok)

0 commit comments

Comments
 (0)