From f80d4c6897798a57b7c4649832931f1af2245103 Mon Sep 17 00:00:00 2001 From: Dong-hee Na Date: Wed, 3 Jun 2020 23:16:10 +0900 Subject: [PATCH 1/7] bpo-40077: Convert _csv module to use PyType_FromSpec --- Modules/_csv.c | 413 +++++++++++++++++++++++++------------------------ 1 file changed, 210 insertions(+), 203 deletions(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index 2d4247740eb29e..5a79aa7a550723 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -19,6 +19,9 @@ typedef struct { PyObject *error_obj; /* CSV exception */ PyObject *dialects; /* Dialect registry */ long field_limit; /* max parsed field size */ + PyTypeObject *reader_type; + PyTypeObject *writer_type; + PyTypeObject *dialect_type; } _csvstate; static inline _csvstate* @@ -32,16 +35,25 @@ get_csv_state(PyObject *module) static int _csv_clear(PyObject *m) { - Py_CLEAR(get_csv_state(m)->error_obj); - Py_CLEAR(get_csv_state(m)->dialects); + _csvstate *state = get_csv_state(m); + Py_CLEAR(state->error_obj); + Py_CLEAR(state->dialects); + Py_CLEAR(state->dialect_type); + Py_CLEAR(state->reader_type); + Py_CLEAR(state->writer_type); return 0; } static int _csv_traverse(PyObject *m, visitproc visit, void *arg) { - Py_VISIT(get_csv_state(m)->error_obj); - Py_VISIT(get_csv_state(m)->dialects); + Py_VISIT(Py_TYPE(m)); + _csvstate *state = get_csv_state(m); + Py_VISIT(state->error_obj); + Py_VISIT(state->dialects); + Py_VISIT(state->dialect_type); + Py_VISIT(state->reader_type); + Py_VISIT(state->writer_type); return 0; } @@ -51,9 +63,18 @@ _csv_free(void *m) _csv_clear((PyObject *)m); } -static struct PyModuleDef _csvmodule; +PyDoc_STRVAR(unused_reduce_doc, "__reduce__() -> (cls, state)"); + +static PyObject * +unused_reduce(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + PyErr_Format(PyExc_TypeError, + "cannot pickle %s object", + Py_TYPE(self)->tp_name); + return NULL; +} -#define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule))) +static struct PyModuleDef _csvmodule; typedef enum { START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD, @@ -92,8 +113,6 @@ typedef struct { } DialectObj; -static PyTypeObject Dialect_Type; - typedef struct { PyObject_HEAD @@ -110,8 +129,6 @@ typedef struct { unsigned long line_num; /* Source-file line number */ } ReaderObj; -static PyTypeObject Reader_Type; - typedef struct { PyObject_HEAD @@ -125,21 +142,19 @@ typedef struct { int num_fields; /* number of fields in record */ } WriterObj; -static PyTypeObject Writer_Type; - /* * DIALECT class */ static PyObject * -get_dialect_from_registry(PyObject * name_obj) +get_dialect_from_registry(_csvstate *state, PyObject * name_obj) { PyObject *dialect_obj; - dialect_obj = PyDict_GetItemWithError(_csvstate_global->dialects, name_obj); + dialect_obj = PyDict_GetItemWithError(state->dialects, name_obj); if (dialect_obj == NULL) { if (!PyErr_Occurred()) - PyErr_Format(_csvstate_global->error_obj, "unknown dialect"); + PyErr_Format(state->error_obj, "unknown dialect"); } else Py_INCREF(dialect_obj); @@ -310,7 +325,9 @@ static void Dialect_dealloc(DialectObj *self) { Py_XDECREF(self->lineterminator); - Py_TYPE(self)->tp_free((PyObject *)self); + PyTypeObject* tp = Py_TYPE(self); + tp->tp_free((PyObject *)self); + Py_DECREF(tp); } static char *dialect_kws[] = { @@ -354,16 +371,17 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) &strict)) return NULL; + _csvstate *state = PyType_GetModuleState(type); if (dialect != NULL) { if (PyUnicode_Check(dialect)) { - dialect = get_dialect_from_registry(dialect); + dialect = get_dialect_from_registry(state, dialect); if (dialect == NULL) return NULL; } else Py_INCREF(dialect); /* Can we reuse this instance? */ - if (PyObject_TypeCheck(dialect, &Dialect_Type) && + if (PyObject_TypeCheck(dialect, state->dialect_type) && delimiter == NULL && doublequote == NULL && escapechar == NULL && @@ -454,53 +472,32 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) return ret; } +static struct PyMethodDef Dialect_methods[] = { + { "__reduce__", (PyCFunction)unused_reduce, METH_NOARGS, unused_reduce_doc}, + { NULL, NULL } +}; + PyDoc_STRVAR(Dialect_Type_doc, "CSV dialect\n" "\n" "The Dialect type records CSV parsing and generation options.\n"); -static PyTypeObject Dialect_Type = { - PyVarObject_HEAD_INIT(NULL, 0) - "_csv.Dialect", /* tp_name */ - sizeof(DialectObj), /* tp_basicsize */ - 0, /* tp_itemsize */ - /* methods */ - (destructor)Dialect_dealloc, /* tp_dealloc */ - 0, /* tp_vectorcall_offset */ - (getattrfunc)0, /* tp_getattr */ - (setattrfunc)0, /* tp_setattr */ - 0, /* tp_as_async */ - (reprfunc)0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - (hashfunc)0, /* tp_hash */ - (ternaryfunc)0, /* tp_call */ - (reprfunc)0, /* tp_str */ - 0, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ - Dialect_Type_doc, /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - 0, /* tp_methods */ - Dialect_memberlist, /* tp_members */ - Dialect_getsetlist, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - dialect_new, /* tp_new */ - 0, /* tp_free */ +static PyType_Slot Dialect_Type_slots[] = { + {Py_tp_doc, (void *)Dialect_Type_doc}, + {Py_tp_getset, Dialect_getsetlist}, + {Py_tp_members, Dialect_memberlist}, + {Py_tp_methods, Dialect_methods}, + {Py_tp_new, dialect_new}, + {Py_tp_dealloc, Dialect_dealloc}, + {0, 0} +}; + +static PyType_Spec Dialect_Type_spec = { + .name = "_csv.Dialect", + .basicsize = sizeof(DialectObj), + .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, + .slots = Dialect_Type_slots, }; /* @@ -508,9 +505,9 @@ static PyTypeObject Dialect_Type = { * description of the dialect */ static PyObject * -_call_dialect(PyObject *dialect_inst, PyObject *kwargs) +_call_dialect(PyTypeObject *dialect_type, PyObject *dialect_inst, PyObject *kwargs) { - PyObject *type = (PyObject *)&Dialect_Type; + PyObject *type = (PyObject *)dialect_type; if (dialect_inst) { return PyObject_VectorcallDict(type, &dialect_inst, 1, kwargs); } @@ -570,9 +567,14 @@ parse_grow_buff(ReaderObj *self) static int parse_add_char(ReaderObj *self, Py_UCS4 c) { - if (self->field_len >= _csvstate_global->field_limit) { - PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)", - _csvstate_global->field_limit); + PyTypeObject *reader_type = Py_TYPE(self); + _csvstate *state = PyType_GetModuleState(reader_type); + if (state == NULL) { + return -1; + } + if (self->field_len >= state->field_limit) { + PyErr_Format(state->error_obj, "field larger than field limit (%ld)", + state->field_limit); return -1; } if (self->field_len == self->field_size && !parse_grow_buff(self)) @@ -585,7 +587,8 @@ static int parse_process_char(ReaderObj *self, Py_UCS4 c) { DialectObj *dialect = self->dialect; - + PyTypeObject *reader_type = Py_TYPE(self); + _csvstate *state = PyType_GetModuleState(reader_type); switch (self->state) { case START_RECORD: /* start of record */ @@ -740,7 +743,7 @@ parse_process_char(ReaderObj *self, Py_UCS4 c) } else { /* illegal */ - PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'", + PyErr_Format(state->error_obj, "'%c' expected after '%c'", dialect->delimiter, dialect->quotechar); return -1; @@ -753,7 +756,7 @@ parse_process_char(ReaderObj *self, Py_UCS4 c) else if (c == '\0') self->state = START_RECORD; else { - PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?"); + PyErr_Format(state->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?"); return -1; } break; @@ -783,6 +786,8 @@ Reader_iternext(ReaderObj *self) unsigned int kind; const void *data; PyObject *lineobj; + PyTypeObject* reader_type = Py_TYPE(self); + _csvstate *state = PyType_GetModuleState(reader_type); if (parse_reset(self) < 0) return NULL; @@ -793,7 +798,7 @@ Reader_iternext(ReaderObj *self) if (!PyErr_Occurred() && (self->field_len != 0 || self->state == IN_QUOTED_FIELD)) { if (self->dialect->strict) - PyErr_SetString(_csvstate_global->error_obj, + PyErr_SetString(state->error_obj, "unexpected end of data"); else if (parse_save_field(self) >= 0) break; @@ -801,7 +806,7 @@ Reader_iternext(ReaderObj *self) return NULL; } if (!PyUnicode_Check(lineobj)) { - PyErr_Format(_csvstate_global->error_obj, + PyErr_Format(state->error_obj, "iterator should return strings, " "not %.200s " "(the file should be opened in text mode)", @@ -823,7 +828,7 @@ Reader_iternext(ReaderObj *self) c = PyUnicode_READ(kind, data, pos); if (c == '\0') { Py_DECREF(lineobj); - PyErr_Format(_csvstate_global->error_obj, + PyErr_Format(state->error_obj, "line contains NUL"); goto err; } @@ -851,8 +856,11 @@ Reader_dealloc(ReaderObj *self) Py_XDECREF(self->dialect); Py_XDECREF(self->input_iter); Py_XDECREF(self->fields); - if (self->field != NULL) + if (self->field != NULL) { PyMem_Free(self->field); + } + PyTypeObject *tp = Py_TYPE(self); + Py_DECREF(tp); PyObject_GC_Del(self); } @@ -882,6 +890,7 @@ PyDoc_STRVAR(Reader_Type_doc, ); static struct PyMethodDef Reader_methods[] = { + { "__reduce__", (PyCFunction)unused_reduce, METH_NOARGS, unused_reduce_doc}, { NULL, NULL } }; #define R_OFF(x) offsetof(ReaderObj, x) @@ -892,48 +901,30 @@ static struct PyMemberDef Reader_memberlist[] = { { NULL } }; +static PyType_Slot Reader_Type_slots[] = { + {Py_tp_doc, (void *)Reader_Type_doc}, + {Py_tp_traverse, Reader_traverse}, + {Py_tp_clear, Reader_clear}, + {Py_tp_iter, PyObject_SelfIter}, + {Py_tp_iternext, Reader_iternext}, + {Py_tp_methods, Reader_methods}, + {Py_tp_members, Reader_memberlist}, + {Py_tp_dealloc, Reader_dealloc}, + {0, 0} +}; -static PyTypeObject Reader_Type = { - PyVarObject_HEAD_INIT(NULL, 0) - "_csv.reader", /*tp_name*/ - sizeof(ReaderObj), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - /* methods */ - (destructor)Reader_dealloc, /*tp_dealloc*/ - 0, /*tp_vectorcall_offset*/ - (getattrfunc)0, /*tp_getattr*/ - (setattrfunc)0, /*tp_setattr*/ - 0, /*tp_as_async*/ - (reprfunc)0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - (hashfunc)0, /*tp_hash*/ - (ternaryfunc)0, /*tp_call*/ - (reprfunc)0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | - Py_TPFLAGS_HAVE_GC, /*tp_flags*/ - Reader_Type_doc, /*tp_doc*/ - (traverseproc)Reader_traverse, /*tp_traverse*/ - (inquiry)Reader_clear, /*tp_clear*/ - 0, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - PyObject_SelfIter, /*tp_iter*/ - (getiterfunc)Reader_iternext, /*tp_iternext*/ - Reader_methods, /*tp_methods*/ - Reader_memberlist, /*tp_members*/ - 0, /*tp_getset*/ - +static PyType_Spec Reader_Type_spec = { + .name = "_csv.reader", + .basicsize = sizeof(ReaderObj), + .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, + .slots = Reader_Type_slots, }; static PyObject * csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args) { PyObject * iterator, * dialect = NULL; - ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type); + ReaderObj * self = PyObject_GC_New(ReaderObj, get_csv_state(module)->reader_type); if (!self) return NULL; @@ -961,7 +952,8 @@ csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args) Py_DECREF(self); return NULL; } - self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args); + _csvstate *state = get_csv_state(module); + self->dialect = (DialectObj *)_call_dialect(state->dialect_type, dialect, keyword_args); if (self->dialect == NULL) { Py_DECREF(self); return NULL; @@ -995,6 +987,8 @@ join_append_data(WriterObj *self, unsigned int field_kind, const void *field_dat DialectObj *dialect = self->dialect; int i; Py_ssize_t rec_len; + PyTypeObject *writer_type = Py_TYPE(self); + _csvstate *state = PyType_GetModuleState(writer_type); #define INCLEN \ do {\ @@ -1047,7 +1041,7 @@ join_append_data(WriterObj *self, unsigned int field_kind, const void *field_dat } if (want_escape) { if (!dialect->escapechar) { - PyErr_Format(_csvstate_global->error_obj, + PyErr_Format(state->error_obj, "need to escape, but no escapechar set"); return -1; } @@ -1161,10 +1155,11 @@ csv_writerow(WriterObj *self, PyObject *seq) { DialectObj *dialect = self->dialect; PyObject *iter, *field, *line, *result; - + PyTypeObject *writer_type = Py_TYPE(self); + _csvstate* state = PyType_GetModuleState(writer_type); iter = PyObject_GetIter(seq); if (iter == NULL) - return PyErr_Format(_csvstate_global->error_obj, + return PyErr_Format(state->error_obj, "iterable expected, not %.200s", Py_TYPE(seq)->tp_name); @@ -1218,7 +1213,7 @@ csv_writerow(WriterObj *self, PyObject *seq) if (self->num_fields > 0 && self->rec_len == 0) { if (dialect->quoting == QUOTE_NONE) { - PyErr_Format(_csvstate_global->error_obj, + PyErr_Format(state->error_obj, "single empty field record must be quoted"); return NULL; } @@ -1279,6 +1274,7 @@ csv_writerows(WriterObj *self, PyObject *seqseq) static struct PyMethodDef Writer_methods[] = { { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc}, { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc}, + { "__reduce__", (PyCFunction)unused_reduce, METH_NOARGS, unused_reduce_doc}, { NULL, NULL } }; @@ -1295,8 +1291,11 @@ Writer_dealloc(WriterObj *self) PyObject_GC_UnTrack(self); Py_XDECREF(self->dialect); Py_XDECREF(self->write); - if (self->rec != NULL) + if (self->rec != NULL) { PyMem_Free(self->rec); + } + PyTypeObject *tp = Py_TYPE(self); + Py_DECREF(tp); PyObject_GC_Del(self); } @@ -1323,46 +1322,28 @@ PyDoc_STRVAR(Writer_Type_doc, "in CSV format from sequence input.\n" ); -static PyTypeObject Writer_Type = { - PyVarObject_HEAD_INIT(NULL, 0) - "_csv.writer", /*tp_name*/ - sizeof(WriterObj), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - /* methods */ - (destructor)Writer_dealloc, /*tp_dealloc*/ - 0, /*tp_vectorcall_offset*/ - (getattrfunc)0, /*tp_getattr*/ - (setattrfunc)0, /*tp_setattr*/ - 0, /*tp_as_async*/ - (reprfunc)0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - (hashfunc)0, /*tp_hash*/ - (ternaryfunc)0, /*tp_call*/ - (reprfunc)0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | - Py_TPFLAGS_HAVE_GC, /*tp_flags*/ - Writer_Type_doc, - (traverseproc)Writer_traverse, /*tp_traverse*/ - (inquiry)Writer_clear, /*tp_clear*/ - 0, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - (getiterfunc)0, /*tp_iter*/ - (getiterfunc)0, /*tp_iternext*/ - Writer_methods, /*tp_methods*/ - Writer_memberlist, /*tp_members*/ - 0, /*tp_getset*/ +static PyType_Slot Writer_Type_slots[] = { + {Py_tp_doc, (void *)Writer_Type_doc}, + {Py_tp_traverse, Writer_traverse}, + {Py_tp_clear, Writer_clear}, + {Py_tp_methods, Writer_methods}, + {Py_tp_members, Writer_memberlist}, + {Py_tp_dealloc, Writer_dealloc}, + {0, 0} +}; + +static PyType_Spec Writer_Type_spec = { + .name = "_csv.writer", + .basicsize = sizeof(WriterObj), + .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, + .slots = Writer_Type_slots, }; static PyObject * csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args) { PyObject * output_file, * dialect = NULL; - WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type); + WriterObj * self = PyObject_GC_New(WriterObj, get_csv_state(module)->writer_type); _Py_IDENTIFIER(write); if (!self) @@ -1390,7 +1371,8 @@ csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args) Py_DECREF(self); return NULL; } - self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args); + _csvstate *state = get_csv_state(module); + self->dialect = (DialectObj *)_call_dialect(state->dialect_type, dialect, keyword_args); if (self->dialect == NULL) { Py_DECREF(self); return NULL; @@ -1405,7 +1387,7 @@ csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args) static PyObject * csv_list_dialects(PyObject *module, PyObject *args) { - return PyDict_Keys(_csvstate_global->dialects); + return PyDict_Keys(get_csv_state(module)->dialects); } static PyObject * @@ -1423,10 +1405,12 @@ csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs) } if (PyUnicode_READY(name_obj) == -1) return NULL; - dialect = _call_dialect(dialect_obj, kwargs); + + _csvstate *state = get_csv_state(module); + dialect = _call_dialect(state->dialect_type, dialect_obj, kwargs); if (dialect == NULL) return NULL; - if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) { + if (PyDict_SetItem(state->dialects, name_obj, dialect) < 0) { Py_DECREF(dialect); return NULL; } @@ -1437,9 +1421,10 @@ csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs) static PyObject * csv_unregister_dialect(PyObject *module, PyObject *name_obj) { - if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0) { + _csvstate *state = get_csv_state(module); + if (PyDict_DelItem(state->dialects, name_obj) < 0) { if (PyErr_ExceptionMatches(PyExc_KeyError)) { - PyErr_Format(_csvstate_global->error_obj, "unknown dialect"); + PyErr_Format(state->error_obj, "unknown dialect"); } return NULL; } @@ -1449,14 +1434,15 @@ csv_unregister_dialect(PyObject *module, PyObject *name_obj) static PyObject * csv_get_dialect(PyObject *module, PyObject *name_obj) { - return get_dialect_from_registry(name_obj); + return get_dialect_from_registry(get_csv_state(module), name_obj); } static PyObject * csv_field_size_limit(PyObject *module, PyObject *args) { PyObject *new_limit = NULL; - long old_limit = _csvstate_global->field_limit; + _csvstate *state = get_csv_state(module); + long old_limit = state->field_limit; if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit)) return NULL; @@ -1466,9 +1452,10 @@ csv_field_size_limit(PyObject *module, PyObject *args) "limit must be an integer"); return NULL; } - _csvstate_global->field_limit = PyLong_AsLong(new_limit); - if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) { - _csvstate_global->field_limit = old_limit; + + state->field_limit = PyLong_AsLong(new_limit); + if (state->field_limit == -1 && PyErr_Occurred()) { + state->field_limit = old_limit; return NULL; } } @@ -1607,68 +1594,88 @@ static struct PyMethodDef csv_methods[] = { { NULL, NULL } }; -static struct PyModuleDef _csvmodule = { - PyModuleDef_HEAD_INIT, - "_csv", - csv_module_doc, - sizeof(_csvstate), - csv_methods, - NULL, - _csv_traverse, - _csv_clear, - _csv_free -}; -PyMODINIT_FUNC -PyInit__csv(void) + +static int +_csv_exec(PyObject *module) { - PyObject *module; + _csvstate *state = get_csv_state(module); const StyleDesc *style; - - if (PyType_Ready(&Reader_Type) < 0) - return NULL; - - if (PyType_Ready(&Writer_Type) < 0) - return NULL; - - /* Create the module and add the functions */ - module = PyModule_Create(&_csvmodule); - if (module == NULL) - return NULL; - + state->reader_type = (PyTypeObject*)PyType_FromModuleAndSpec(module, &Reader_Type_spec, NULL); + if (state->reader_type == NULL) { + return -1; + } + state->writer_type = (PyTypeObject*)PyType_FromModuleAndSpec(module, &Writer_Type_spec, NULL); + if (state->writer_type == NULL) { + return -1; + } /* Add version to the module. */ if (PyModule_AddStringConstant(module, "__version__", - MODULE_VERSION) == -1) - return NULL; + MODULE_VERSION) == -1) { + return -1; + } /* Set the field limit */ - get_csv_state(module)->field_limit = 128 * 1024; + state->field_limit = 128 * 1024; /* Do I still need to add this var to the Module Dict? */ /* Add _dialects dictionary */ - get_csv_state(module)->dialects = PyDict_New(); - if (get_csv_state(module)->dialects == NULL) - return NULL; - Py_INCREF(get_csv_state(module)->dialects); - if (PyModule_AddObject(module, "_dialects", get_csv_state(module)->dialects)) - return NULL; - + state->dialects = PyDict_New(); + if (state->dialects == NULL) { + return -1; + } + Py_INCREF(state->dialects); + if (PyModule_AddObject(module, "_dialects", state->dialects) < 0) { + return -1; + } /* Add quote styles into dictionary */ for (style = quote_styles; style->name; style++) { if (PyModule_AddIntConstant(module, style->name, - style->style) == -1) - return NULL; + style->style) == -1) { + return -1; + } } - if (PyModule_AddType(module, &Dialect_Type)) { - return NULL; + state->dialect_type = (PyTypeObject*)PyType_FromModuleAndSpec(module, &Dialect_Type_spec, NULL); + if (state->dialect_type == NULL) { + return -1; + } + if (PyModule_AddType(module, state->dialect_type) < 0) { + return -1; } /* Add the CSV exception object to the module. */ - get_csv_state(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL); - if (get_csv_state(module)->error_obj == NULL) - return NULL; - Py_INCREF(get_csv_state(module)->error_obj); - PyModule_AddObject(module, "Error", get_csv_state(module)->error_obj); - return module; + state->error_obj = PyErr_NewException("_csv.Error", NULL, NULL); + if (state->error_obj == NULL) { + return -1; + } + + if (PyModule_AddType(module, (PyTypeObject *)state->error_obj) < 0) { + return -1; + } + + return 0; +} + +static PyModuleDef_Slot _csv_slots[] = { + {Py_mod_exec, _csv_exec}, + {0, NULL} +}; + +static struct PyModuleDef _csvmodule = { + PyModuleDef_HEAD_INIT, + .m_name = "_csv", + .m_doc = csv_module_doc, + .m_size = sizeof(_csvstate), + .m_methods = csv_methods, + .m_slots = _csv_slots, + .m_traverse = _csv_traverse, + .m_clear = _csv_clear, + .m_free = _csv_free +}; + +PyMODINIT_FUNC +PyInit__csv(void) +{ + return PyModuleDef_Init(&_csvmodule); } From 53df5eb05ee0abe0e335074aee4d2555078eddcf Mon Sep 17 00:00:00 2001 From: Dong-hee Na Date: Fri, 19 Jun 2020 12:10:25 +0900 Subject: [PATCH 2/7] bpo-40077: Add NEWS.d --- .../Core and Builtins/2020-06-19-12-10-22.bpo-40077.O97CmK.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2020-06-19-12-10-22.bpo-40077.O97CmK.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-06-19-12-10-22.bpo-40077.O97CmK.rst b/Misc/NEWS.d/next/Core and Builtins/2020-06-19-12-10-22.bpo-40077.O97CmK.rst new file mode 100644 index 00000000000000..936cba258f2be7 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2020-06-19-12-10-22.bpo-40077.O97CmK.rst @@ -0,0 +1 @@ +Convert :mod:`_csv` to use :c:func:`PyType_FromSpec`. From dbc4d329128cfc59f713ca7eebbe51c574c1a8be Mon Sep 17 00:00:00 2001 From: Dong-hee Na Date: Fri, 19 Jun 2020 15:02:35 +0900 Subject: [PATCH 3/7] bpo-40077: Update --- Modules/_csv.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index 5a79aa7a550723..36384634d238cd 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -569,9 +569,7 @@ parse_add_char(ReaderObj *self, Py_UCS4 c) { PyTypeObject *reader_type = Py_TYPE(self); _csvstate *state = PyType_GetModuleState(reader_type); - if (state == NULL) { - return -1; - } + assert(state != NULL); if (self->field_len >= state->field_limit) { PyErr_Format(state->error_obj, "field larger than field limit (%ld)", state->field_limit); @@ -589,6 +587,7 @@ parse_process_char(ReaderObj *self, Py_UCS4 c) DialectObj *dialect = self->dialect; PyTypeObject *reader_type = Py_TYPE(self); _csvstate *state = PyType_GetModuleState(reader_type); + assert(state != NULL); switch (self->state) { case START_RECORD: /* start of record */ @@ -788,7 +787,7 @@ Reader_iternext(ReaderObj *self) PyObject *lineobj; PyTypeObject* reader_type = Py_TYPE(self); _csvstate *state = PyType_GetModuleState(reader_type); - + assert(state != NULL); if (parse_reset(self) < 0) return NULL; do { From 9f1266bd4c61d19824ef7ca8c09c4fe8fe9f6cc3 Mon Sep 17 00:00:00 2001 From: Dong-hee Na Date: Fri, 19 Jun 2020 15:13:43 +0900 Subject: [PATCH 4/7] bpo-40077: PEP7 --- Modules/_csv.c | 153 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 102 insertions(+), 51 deletions(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index 36384634d238cd..d5de38570ad5c0 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -153,11 +153,13 @@ get_dialect_from_registry(_csvstate *state, PyObject * name_obj) dialect_obj = PyDict_GetItemWithError(state->dialects, name_obj); if (dialect_obj == NULL) { - if (!PyErr_Occurred()) + if (!PyErr_Occurred()) { PyErr_Format(state->error_obj, "unknown dialect"); + } } - else + else { Py_INCREF(dialect_obj); + } return dialect_obj; } @@ -167,8 +169,9 @@ get_nullchar_as_None(Py_UCS4 c) if (c == '\0') { Py_RETURN_NONE; } - else + else { return PyUnicode_FromOrdinal(c); + } } static PyObject * @@ -205,12 +208,14 @@ Dialect_get_quoting(DialectObj *self, void *Py_UNUSED(ignored)) static int _set_bool(const char *name, char *target, PyObject *src, bool dflt) { - if (src == NULL) + if (src == NULL) { *target = dflt; + } else { int b = PyObject_IsTrue(src); - if (b < 0) + if (b < 0) { return -1; + } *target = (char)b; } return 0; @@ -219,8 +224,9 @@ _set_bool(const char *name, char *target, PyObject *src, bool dflt) static int _set_int(const char *name, int *target, PyObject *src, int dflt) { - if (src == NULL) + if (src == NULL) { *target = dflt; + } else { int value; if (!PyLong_CheckExact(src)) { @@ -240,8 +246,9 @@ _set_int(const char *name, int *target, PyObject *src, int dflt) static int _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt) { - if (src == NULL) + if (src == NULL) { *target = dflt; + } else { *target = '\0'; if (src != Py_None) { @@ -260,8 +267,9 @@ _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt) return -1; } /* PyUnicode_READY() is called in PyUnicode_GetLength() */ - if (len > 0) + if (len > 0) { *target = PyUnicode_READ_CHAR(src, 0); + } } } return 0; @@ -270,19 +278,22 @@ _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt) static int _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt) { - if (src == NULL) + if (src == NULL) { *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL); + } else { - if (src == Py_None) + if (src == Py_None) { *target = NULL; + } else if (!PyUnicode_Check(src)) { PyErr_Format(PyExc_TypeError, "\"%s\" must be a string", name); return -1; } else { - if (PyUnicode_READY(src) == -1) + if (PyUnicode_READY(src) == -1) { return -1; + } Py_INCREF(src); Py_XSETREF(*target, src); } @@ -296,8 +307,9 @@ dialect_check_quoting(int quoting) const StyleDesc *qs; for (qs = quote_styles; qs->name; qs++) { - if ((int)qs->style == quoting) + if ((int)qs->style == quoting) { return 0; + } } PyErr_Format(PyExc_TypeError, "bad \"quoting\" value"); return -1; @@ -375,11 +387,13 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) if (dialect != NULL) { if (PyUnicode_Check(dialect)) { dialect = get_dialect_from_registry(state, dialect); - if (dialect == NULL) + if (dialect == NULL) { return NULL; + } } - else + else { Py_INCREF(dialect); + } /* Can we reuse this instance? */ if (PyObject_TypeCheck(dialect, state->dialect_type) && delimiter == NULL && @@ -389,8 +403,9 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) quotechar == NULL && quoting == NULL && skipinitialspace == NULL && - strict == NULL) + strict == NULL) { return dialect; + } } self = (DialectObj *)type->tp_alloc(type, 0); @@ -526,8 +541,9 @@ parse_save_field(ReaderObj *self) field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, (void *) self->field, self->field_len); - if (field == NULL) + if (field == NULL) { return -1; + } self->field_len = 0; if (self->numeric_field) { PyObject *tmp; @@ -535,8 +551,9 @@ parse_save_field(ReaderObj *self) self->numeric_field = 0; tmp = PyNumber_Float(field); Py_DECREF(field); - if (tmp == NULL) + if (tmp == NULL) { return -1; + } field = tmp; } if (PyList_Append(self->fields, field) < 0) { @@ -575,8 +592,9 @@ parse_add_char(ReaderObj *self, Py_UCS4 c) state->field_limit); return -1; } - if (self->field_len == self->field_size && !parse_grow_buff(self)) + if (self->field_len == self->field_size && !parse_grow_buff(self)) { return -1; + } self->field[self->field_len++] = c; return 0; } @@ -591,9 +609,10 @@ parse_process_char(ReaderObj *self, Py_UCS4 c) switch (self->state) { case START_RECORD: /* start of record */ - if (c == '\0') + if (c == '\0') { /* empty line - return [] */ break; + } else if (c == '\n' || c == '\r') { self->state = EAT_CRNL; break; @@ -605,8 +624,9 @@ parse_process_char(ReaderObj *self, Py_UCS4 c) /* expecting field */ if (c == '\n' || c == '\r' || c == '\0') { /* save empty field - return [fields] */ - if (parse_save_field(self) < 0) + if (parse_save_field(self) < 0) { return -1; + } self->state = (c == '\0' ? START_RECORD : EAT_CRNL); } else if (c == dialect->quotechar && @@ -623,44 +643,52 @@ parse_process_char(ReaderObj *self, Py_UCS4 c) ; else if (c == dialect->delimiter) { /* save empty field */ - if (parse_save_field(self) < 0) + if (parse_save_field(self) < 0) { return -1; + } } else { /* begin new unquoted field */ - if (dialect->quoting == QUOTE_NONNUMERIC) + if (dialect->quoting == QUOTE_NONNUMERIC) { self->numeric_field = 1; - if (parse_add_char(self, c) < 0) + } + if (parse_add_char(self, c) < 0) { return -1; + } self->state = IN_FIELD; } break; case ESCAPED_CHAR: if (c == '\n' || c=='\r') { - if (parse_add_char(self, c) < 0) + if (parse_add_char(self, c) < 0) { return -1; + } self->state = AFTER_ESCAPED_CRNL; break; } - if (c == '\0') + if (c == '\0') { c = '\n'; - if (parse_add_char(self, c) < 0) + } + if (parse_add_char(self, c) < 0) { return -1; + } self->state = IN_FIELD; break; case AFTER_ESCAPED_CRNL: - if (c == '\0') + if (c == '\0') { break; + } /*fallthru*/ case IN_FIELD: /* in unquoted field */ if (c == '\n' || c == '\r' || c == '\0') { /* end of line - return [fields] */ - if (parse_save_field(self) < 0) + if (parse_save_field(self) < 0) { return -1; + } self->state = (c == '\0' ? START_RECORD : EAT_CRNL); } else if (c == dialect->escapechar) { @@ -669,14 +697,16 @@ parse_process_char(ReaderObj *self, Py_UCS4 c) } else if (c == dialect->delimiter) { /* save field - wait for new field */ - if (parse_save_field(self) < 0) + if (parse_save_field(self) < 0) { return -1; + } self->state = START_FIELD; } else { /* normal character - save in field */ - if (parse_add_char(self, c) < 0) + if (parse_add_char(self, c) < 0) { return -1; + } } break; @@ -701,16 +731,19 @@ parse_process_char(ReaderObj *self, Py_UCS4 c) } else { /* normal character - save in field */ - if (parse_add_char(self, c) < 0) + if (parse_add_char(self, c) < 0) { return -1; + } } break; case ESCAPE_IN_QUOTED_FIELD: - if (c == '\0') + if (c == '\0') { c = '\n'; - if (parse_add_char(self, c) < 0) + } + if (parse_add_char(self, c) < 0) { return -1; + } self->state = IN_QUOTED_FIELD; break; @@ -719,25 +752,29 @@ parse_process_char(ReaderObj *self, Py_UCS4 c) if (dialect->quoting != QUOTE_NONE && c == dialect->quotechar) { /* save "" as " */ - if (parse_add_char(self, c) < 0) + if (parse_add_char(self, c) < 0) { return -1; + } self->state = IN_QUOTED_FIELD; } else if (c == dialect->delimiter) { /* save field - wait for new field */ - if (parse_save_field(self) < 0) + if (parse_save_field(self) < 0) { return -1; + } self->state = START_FIELD; } else if (c == '\n' || c == '\r' || c == '\0') { /* end of line - return [fields] */ - if (parse_save_field(self) < 0) + if (parse_save_field(self) < 0) { return -1; + } self->state = (c == '\0' ? START_RECORD : EAT_CRNL); } else if (!dialect->strict) { - if (parse_add_char(self, c) < 0) + if (parse_add_char(self, c) < 0) { return -1; + } self->state = IN_FIELD; } else { @@ -752,8 +789,9 @@ parse_process_char(ReaderObj *self, Py_UCS4 c) case EAT_CRNL: if (c == '\n' || c == '\r') ; - else if (c == '\0') + else if (c == '\0') { self->state = START_RECORD; + } else { PyErr_Format(state->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?"); return -1; @@ -768,8 +806,9 @@ static int parse_reset(ReaderObj *self) { Py_XSETREF(self->fields, PyList_New(0)); - if (self->fields == NULL) + if (self->fields == NULL) { return -1; + } self->field_len = 0; self->state = START_RECORD; self->numeric_field = 0; @@ -788,19 +827,22 @@ Reader_iternext(ReaderObj *self) PyTypeObject* reader_type = Py_TYPE(self); _csvstate *state = PyType_GetModuleState(reader_type); assert(state != NULL); - if (parse_reset(self) < 0) + if (parse_reset(self) < 0) { return NULL; + } do { lineobj = PyIter_Next(self->input_iter); if (lineobj == NULL) { /* End of input OR exception */ if (!PyErr_Occurred() && (self->field_len != 0 || self->state == IN_QUOTED_FIELD)) { - if (self->dialect->strict) + if (self->dialect->strict) { PyErr_SetString(state->error_obj, "unexpected end of data"); - else if (parse_save_field(self) >= 0) + } + else if (parse_save_field(self) >= 0) { break; + } } return NULL; } @@ -838,8 +880,9 @@ Reader_iternext(ReaderObj *self) pos++; } Py_DECREF(lineobj); - if (parse_process_char(self, 0) < 0) + if (parse_process_char(self, 0) < 0) { goto err; + } } while (self->state != START_RECORD); fields = self->fields; @@ -1053,8 +1096,9 @@ join_append_data(WriterObj *self, unsigned int field_kind, const void *field_dat } if (*quoted) { - if (copy_phase) + if (copy_phase) { ADDCH(dialect->quotechar); + } else { INCLEN; /* starting quote */ INCLEN; /* ending quote */ @@ -1097,20 +1141,23 @@ join_append(WriterObj *self, PyObject *field, int quoted) Py_ssize_t rec_len; if (field != NULL) { - if (PyUnicode_READY(field) == -1) + if (PyUnicode_READY(field) == -1) { return 0; + } field_kind = PyUnicode_KIND(field); field_data = PyUnicode_DATA(field); field_len = PyUnicode_GET_LENGTH(field); } rec_len = join_append_data(self, field_kind, field_data, field_len, "ed, 0); - if (rec_len < 0) + if (rec_len < 0) { return 0; + } /* grow record buffer if necessary */ - if (!join_check_rec_size(self, rec_len)) + if (!join_check_rec_size(self, rec_len)) { return 0; + } self->rec_len = join_append_data(self, field_kind, field_data, field_len, "ed, 1); @@ -1127,17 +1174,20 @@ join_append_lineterminator(WriterObj *self) const void *term_data; terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator); - if (terminator_len == -1) + if (terminator_len == -1) { return 0; + } /* grow record buffer if necessary */ - if (!join_check_rec_size(self, self->rec_len + terminator_len)) + if (!join_check_rec_size(self, self->rec_len + terminator_len)) { return 0; + } term_kind = PyUnicode_KIND(self->dialect->lineterminator); term_data = PyUnicode_DATA(self->dialect->lineterminator); - for (i = 0; i < terminator_len; i++) + for (i = 0; i < terminator_len; i++) { self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i); + } self->rec_len += terminator_len; return 1; @@ -1157,10 +1207,11 @@ csv_writerow(WriterObj *self, PyObject *seq) PyTypeObject *writer_type = Py_TYPE(self); _csvstate* state = PyType_GetModuleState(writer_type); iter = PyObject_GetIter(seq); - if (iter == NULL) + if (iter == NULL) { return PyErr_Format(state->error_obj, "iterable expected, not %.200s", Py_TYPE(seq)->tp_name); + } /* Join all fields in internal buffer. */ From 85d36e9a066ee22c86cf715a4fb15c1d32e66220 Mon Sep 17 00:00:00 2001 From: Dong-hee Na Date: Fri, 19 Jun 2020 19:26:18 +0900 Subject: [PATCH 5/7] bpo-40077: Update --- Modules/_csv.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index d5de38570ad5c0..85899d89a442ba 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -47,7 +47,6 @@ _csv_clear(PyObject *m) static int _csv_traverse(PyObject *m, visitproc visit, void *arg) { - Py_VISIT(Py_TYPE(m)); _csvstate *state = get_csv_state(m); Py_VISIT(state->error_obj); Py_VISIT(state->dialects); @@ -63,10 +62,10 @@ _csv_free(void *m) _csv_clear((PyObject *)m); } -PyDoc_STRVAR(unused_reduce_doc, "__reduce__() -> (cls, state)"); +PyDoc_STRVAR(reduce_always_fail_doc, "__reduce__() -> (cls, state)"); static PyObject * -unused_reduce(PyObject *self, PyObject *Py_UNUSED(ignored)) +reduce_always_fail(PyObject *self, PyObject *Py_UNUSED(ignored)) { PyErr_Format(PyExc_TypeError, "cannot pickle %s object", @@ -488,7 +487,7 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) } static struct PyMethodDef Dialect_methods[] = { - { "__reduce__", (PyCFunction)unused_reduce, METH_NOARGS, unused_reduce_doc}, + { "__reduce__", (PyCFunction)reduce_always_fail, METH_NOARGS, reduce_always_fail_doc}, { NULL, NULL } }; @@ -932,7 +931,7 @@ PyDoc_STRVAR(Reader_Type_doc, ); static struct PyMethodDef Reader_methods[] = { - { "__reduce__", (PyCFunction)unused_reduce, METH_NOARGS, unused_reduce_doc}, + { "__reduce__", (PyCFunction)reduce_always_fail, METH_NOARGS, reduce_always_fail_doc}, { NULL, NULL } }; #define R_OFF(x) offsetof(ReaderObj, x) @@ -1324,7 +1323,7 @@ csv_writerows(WriterObj *self, PyObject *seqseq) static struct PyMethodDef Writer_methods[] = { { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc}, { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc}, - { "__reduce__", (PyCFunction)unused_reduce, METH_NOARGS, unused_reduce_doc}, + { "__reduce__", (PyCFunction)reduce_always_fail, METH_NOARGS, reduce_always_fail_doc}, { NULL, NULL } }; From 10114f006edfb9b21ad456573541070b8de1a890 Mon Sep 17 00:00:00 2001 From: Dong-hee Na Date: Fri, 19 Jun 2020 19:31:20 +0900 Subject: [PATCH 6/7] bpo-40077: Update --- Modules/_csv.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index 85899d89a442ba..1ffff12a460184 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -38,9 +38,6 @@ _csv_clear(PyObject *m) _csvstate *state = get_csv_state(m); Py_CLEAR(state->error_obj); Py_CLEAR(state->dialects); - Py_CLEAR(state->dialect_type); - Py_CLEAR(state->reader_type); - Py_CLEAR(state->writer_type); return 0; } @@ -50,9 +47,6 @@ _csv_traverse(PyObject *m, visitproc visit, void *arg) _csvstate *state = get_csv_state(m); Py_VISIT(state->error_obj); Py_VISIT(state->dialects); - Py_VISIT(state->dialect_type); - Py_VISIT(state->reader_type); - Py_VISIT(state->writer_type); return 0; } From 14c6de7a055af655e3504d8427c8a9d6145a7b7d Mon Sep 17 00:00:00 2001 From: Dong-hee Na Date: Fri, 19 Jun 2020 19:37:18 +0900 Subject: [PATCH 7/7] bpo-40077: Apply Victor's suggestion --- Modules/_csv.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index 1ffff12a460184..59131e389c2cf1 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -1496,11 +1496,11 @@ csv_field_size_limit(PyObject *module, PyObject *args) return NULL; } - state->field_limit = PyLong_AsLong(new_limit); - if (state->field_limit == -1 && PyErr_Occurred()) { - state->field_limit = old_limit; + long tmp_limit = PyLong_AsLong(new_limit); + if (tmp_limit == -1 && PyErr_Occurred()) { return NULL; } + state->field_limit = tmp_limit; } return PyLong_FromLong(old_limit); }