@@ -47,9 +47,9 @@ typedef struct {
4747 PyObject_HEAD
4848
4949 int doublequote ; /* is " represented by ""? */
50- Py_UNICODE delimiter ; /* field separator */
51- Py_UNICODE quotechar ; /* quote character */
52- Py_UNICODE escapechar ; /* escape character */
50+ Py_UCS4 delimiter ; /* field separator */
51+ Py_UCS4 quotechar ; /* quote character */
52+ Py_UCS4 escapechar ; /* escape character */
5353 int skipinitialspace ; /* ignore spaces following delimiter? */
5454 PyObject * lineterminator ; /* string to write between records */
5555 int quoting ; /* style of quoting to write */
@@ -68,7 +68,7 @@ typedef struct {
6868
6969 PyObject * fields ; /* field list for current record */
7070 ParserState state ; /* current CSV parse state */
71- Py_UNICODE * field ; /* build current field in here */
71+ Py_UCS4 * field ; /* temporary buffer */
7272 Py_ssize_t field_size ; /* size of allocated buffer */
7373 Py_ssize_t field_len ; /* length of current field */
7474 int numeric_field ; /* treat field as numeric */
@@ -86,7 +86,7 @@ typedef struct {
8686
8787 DialectObj * dialect ; /* parsing dialect */
8888
89- Py_UNICODE * rec ; /* buffer for parser.join */
89+ Py_UCS4 * rec ; /* buffer for parser.join */
9090 Py_ssize_t rec_size ; /* size of allocated record */
9191 Py_ssize_t rec_len ; /* length of record */
9292 int num_fields ; /* number of fields in record */
@@ -121,7 +121,7 @@ get_string(PyObject *str)
121121}
122122
123123static PyObject *
124- get_nullchar_as_None (Py_UNICODE c )
124+ get_nullchar_as_None (Py_UCS4 c )
125125{
126126 if (c == '\0' ) {
127127 Py_INCREF (Py_None );
@@ -199,25 +199,23 @@ _set_int(const char *name, int *target, PyObject *src, int dflt)
199199}
200200
201201static int
202- _set_char (const char * name , Py_UNICODE * target , PyObject * src , Py_UNICODE dflt )
202+ _set_char (const char * name , Py_UCS4 * target , PyObject * src , Py_UCS4 dflt )
203203{
204204 if (src == NULL )
205205 * target = dflt ;
206206 else {
207207 * target = '\0' ;
208208 if (src != Py_None ) {
209- Py_UNICODE * buf ;
210209 Py_ssize_t len ;
211- buf = PyUnicode_AsUnicode (src );
212210 len = PyUnicode_GetSize (src );
213- if (buf == NULL || len > 1 ) {
211+ if (len > 1 ) {
214212 PyErr_Format (PyExc_TypeError ,
215213 "\"%s\" must be an 1-character string" ,
216214 name );
217215 return -1 ;
218216 }
219217 if (len > 0 )
220- * target = buf [ 0 ] ;
218+ * target = PyUnicode_READ_CHAR ( src , 0 ) ;
221219 }
222220 }
223221 return 0 ;
@@ -498,7 +496,8 @@ parse_save_field(ReaderObj *self)
498496{
499497 PyObject * field ;
500498
501- field = PyUnicode_FromUnicode (self -> field , self -> field_len );
499+ field = PyUnicode_FromKindAndData (PyUnicode_4BYTE_KIND ,
500+ (void * ) self -> field , self -> field_len );
502501 if (field == NULL )
503502 return -1 ;
504503 self -> field_len = 0 ;
@@ -507,11 +506,9 @@ parse_save_field(ReaderObj *self)
507506
508507 self -> numeric_field = 0 ;
509508 tmp = PyNumber_Float (field );
510- if (tmp == NULL ) {
511- Py_DECREF (field );
512- return -1 ;
513- }
514509 Py_DECREF (field );
510+ if (tmp == NULL )
511+ return -1 ;
515512 field = tmp ;
516513 }
517514 PyList_Append (self -> fields , field );
@@ -526,16 +523,16 @@ parse_grow_buff(ReaderObj *self)
526523 self -> field_size = 4096 ;
527524 if (self -> field != NULL )
528525 PyMem_Free (self -> field );
529- self -> field = PyMem_New (Py_UNICODE , self -> field_size );
526+ self -> field = PyMem_New (Py_UCS4 , self -> field_size );
530527 }
531528 else {
532- Py_UNICODE * field = self -> field ;
529+ Py_UCS4 * field = self -> field ;
533530 if (self -> field_size > PY_SSIZE_T_MAX / 2 ) {
534531 PyErr_NoMemory ();
535532 return 0 ;
536533 }
537534 self -> field_size *= 2 ;
538- self -> field = PyMem_Resize (field , Py_UNICODE , self -> field_size );
535+ self -> field = PyMem_Resize (field , Py_UCS4 , self -> field_size );
539536 }
540537 if (self -> field == NULL ) {
541538 PyErr_NoMemory ();
@@ -545,7 +542,7 @@ parse_grow_buff(ReaderObj *self)
545542}
546543
547544static int
548- parse_add_char (ReaderObj * self , Py_UNICODE c )
545+ parse_add_char (ReaderObj * self , Py_UCS4 c )
549546{
550547 if (self -> field_len >= field_limit ) {
551548 PyErr_Format (error_obj , "field larger than field limit (%ld)" ,
@@ -559,7 +556,7 @@ parse_add_char(ReaderObj *self, Py_UNICODE c)
559556}
560557
561558static int
562- parse_process_char (ReaderObj * self , Py_UNICODE c )
559+ parse_process_char (ReaderObj * self , Py_UCS4 c )
563560{
564561 DialectObj * dialect = self -> dialect ;
565562
@@ -744,10 +741,12 @@ parse_reset(ReaderObj *self)
744741static PyObject *
745742Reader_iternext (ReaderObj * self )
746743{
747- PyObject * lineobj ;
748744 PyObject * fields = NULL ;
749- Py_UNICODE * line , c ;
750- Py_ssize_t linelen ;
745+ Py_UCS4 c ;
746+ Py_ssize_t pos , linelen ;
747+ unsigned int kind ;
748+ void * data ;
749+ PyObject * lineobj ;
751750
752751 if (parse_reset (self ) < 0 )
753752 return NULL ;
@@ -771,14 +770,12 @@ Reader_iternext(ReaderObj *self)
771770 return NULL ;
772771 }
773772 ++ self -> line_num ;
774- line = PyUnicode_AsUnicode (lineobj );
775- linelen = PyUnicode_GetSize (lineobj );
776- if (line == NULL || linelen < 0 ) {
777- Py_DECREF (lineobj );
778- return NULL ;
779- }
773+ kind = PyUnicode_KIND (lineobj );
774+ data = PyUnicode_DATA (lineobj );
775+ pos = 0 ;
776+ linelen = PyUnicode_GET_LENGTH (lineobj );
780777 while (linelen -- ) {
781- c = * line ++ ;
778+ c = PyUnicode_READ ( kind , data , pos ) ;
782779 if (c == '\0' ) {
783780 Py_DECREF (lineobj );
784781 PyErr_Format (error_obj ,
@@ -789,6 +786,7 @@ Reader_iternext(ReaderObj *self)
789786 Py_DECREF (lineobj );
790787 goto err ;
791788 }
789+ pos ++ ;
792790 }
793791 Py_DECREF (lineobj );
794792 if (parse_process_char (self , 0 ) < 0 )
@@ -945,8 +943,9 @@ join_reset(WriterObj *self)
945943 * record length.
946944 */
947945static Py_ssize_t
948- join_append_data (WriterObj * self , Py_UNICODE * field , int quote_empty ,
949- int * quoted , int copy_phase )
946+ join_append_data (WriterObj * self , unsigned int field_kind , void * field_data ,
947+ Py_ssize_t field_len , int quote_empty , int * quoted ,
948+ int copy_phase )
950949{
951950 DialectObj * dialect = self -> dialect ;
952951 int i ;
@@ -976,13 +975,10 @@ join_append_data(WriterObj *self, Py_UNICODE *field, int quote_empty,
976975
977976 /* Copy/count field data */
978977 /* If field is null just pass over */
979- for (i = 0 ; field ; i ++ ) {
980- Py_UNICODE c = field [ i ] ;
978+ for (i = 0 ; field_data && ( i < field_len ) ; i ++ ) {
979+ Py_UCS4 c = PyUnicode_READ ( field_kind , field_data , i ) ;
981980 int want_escape = 0 ;
982981
983- if (c == '\0' )
984- break ;
985-
986982 if (c == dialect -> delimiter ||
987983 c == dialect -> escapechar ||
988984 c == dialect -> quotechar ||
@@ -1049,13 +1045,13 @@ join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
10491045 self -> rec_size = (rec_len / MEM_INCR + 1 ) * MEM_INCR ;
10501046 if (self -> rec != NULL )
10511047 PyMem_Free (self -> rec );
1052- self -> rec = PyMem_New (Py_UNICODE , self -> rec_size );
1048+ self -> rec = PyMem_New (Py_UCS4 , self -> rec_size );
10531049 }
10541050 else {
1055- Py_UNICODE * old_rec = self -> rec ;
1051+ Py_UCS4 * old_rec = self -> rec ;
10561052
10571053 self -> rec_size = (rec_len / MEM_INCR + 1 ) * MEM_INCR ;
1058- self -> rec = PyMem_Resize (old_rec , Py_UNICODE , self -> rec_size );
1054+ self -> rec = PyMem_Resize (old_rec , Py_UCS4 , self -> rec_size );
10591055 if (self -> rec == NULL )
10601056 PyMem_Free (old_rec );
10611057 }
@@ -1068,19 +1064,29 @@ join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
10681064}
10691065
10701066static int
1071- join_append (WriterObj * self , Py_UNICODE * field , int * quoted , int quote_empty )
1067+ join_append (WriterObj * self , PyObject * field , int * quoted , int quote_empty )
10721068{
1069+ unsigned int field_kind = -1 ;
1070+ void * field_data = NULL ;
1071+ Py_ssize_t field_len = 0 ;
10731072 Py_ssize_t rec_len ;
10741073
1075- rec_len = join_append_data (self , field , quote_empty , quoted , 0 );
1074+ if (field != NULL ) {
1075+ field_kind = PyUnicode_KIND (field );
1076+ field_data = PyUnicode_DATA (field );
1077+ field_len = PyUnicode_GET_LENGTH (field );
1078+ }
1079+ rec_len = join_append_data (self , field_kind , field_data , field_len ,
1080+ quote_empty , quoted , 0 );
10761081 if (rec_len < 0 )
10771082 return 0 ;
10781083
10791084 /* grow record buffer if necessary */
10801085 if (!join_check_rec_size (self , rec_len ))
10811086 return 0 ;
10821087
1083- self -> rec_len = join_append_data (self , field , quote_empty , quoted , 1 );
1088+ self -> rec_len = join_append_data (self , field_kind , field_data , field_len ,
1089+ quote_empty , quoted , 1 );
10841090 self -> num_fields ++ ;
10851091
10861092 return 1 ;
@@ -1089,22 +1095,22 @@ join_append(WriterObj *self, Py_UNICODE *field, int *quoted, int quote_empty)
10891095static int
10901096join_append_lineterminator (WriterObj * self )
10911097{
1092- Py_ssize_t terminator_len ;
1093- Py_UNICODE * terminator ;
1098+ Py_ssize_t terminator_len , i ;
1099+ unsigned int term_kind ;
1100+ void * term_data ;
10941101
1095- terminator_len = PyUnicode_GetSize (self -> dialect -> lineterminator );
1102+ terminator_len = PyUnicode_GET_LENGTH (self -> dialect -> lineterminator );
10961103 if (terminator_len == -1 )
10971104 return 0 ;
10981105
10991106 /* grow record buffer if necessary */
11001107 if (!join_check_rec_size (self , self -> rec_len + terminator_len ))
11011108 return 0 ;
11021109
1103- terminator = PyUnicode_AsUnicode (self -> dialect -> lineterminator );
1104- if (terminator == NULL )
1105- return 0 ;
1106- memmove (self -> rec + self -> rec_len , terminator ,
1107- sizeof (Py_UNICODE )* terminator_len );
1110+ term_kind = PyUnicode_KIND (self -> dialect -> lineterminator );
1111+ term_data = PyUnicode_DATA (self -> dialect -> lineterminator );
1112+ for (i = 0 ; i < terminator_len ; i ++ )
1113+ self -> rec [self -> rec_len + i ] = PyUnicode_READ (term_kind , term_data , i );
11081114 self -> rec_len += terminator_len ;
11091115
11101116 return 1 ;
@@ -1154,14 +1160,11 @@ csv_writerow(WriterObj *self, PyObject *seq)
11541160 }
11551161
11561162 if (PyUnicode_Check (field )) {
1157- append_ok = join_append (self ,
1158- PyUnicode_AS_UNICODE (field ),
1159- & quoted , len == 1 );
1163+ append_ok = join_append (self , field , & quoted , len == 1 );
11601164 Py_DECREF (field );
11611165 }
11621166 else if (field == Py_None ) {
1163- append_ok = join_append (self , NULL ,
1164- & quoted , len == 1 );
1167+ append_ok = join_append (self , NULL , & quoted , len == 1 );
11651168 Py_DECREF (field );
11661169 }
11671170 else {
@@ -1171,9 +1174,7 @@ csv_writerow(WriterObj *self, PyObject *seq)
11711174 Py_DECREF (field );
11721175 if (str == NULL )
11731176 return NULL ;
1174- append_ok = join_append (self ,
1175- PyUnicode_AS_UNICODE (str ),
1176- & quoted , len == 1 );
1177+ append_ok = join_append (self , str , & quoted , len == 1 );
11771178 Py_DECREF (str );
11781179 }
11791180 if (!append_ok )
0 commit comments