Thanks to visit codestin.com
Credit goes to github.com

Skip to content

bpo-36121: Override field_size_limit per csv.reader object #12071

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion Doc/library/csv.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ The :mod:`csv` module defines the following functions:
.. index::
single: universal newlines; csv.reader function

.. function:: reader(csvfile, dialect='excel', **fmtparams)
.. function:: reader(csvfile, dialect='excel', field_size_limit=None, **fmtparams)

Return a reader object which will iterate over lines in the given *csvfile*.
*csvfile* can be any object which supports the :term:`iterator` protocol and returns a
Expand Down Expand Up @@ -417,6 +417,10 @@ Reader objects have the following public attributes:
The number of lines read from the source iterator. This is not the same as the
number of records returned, as records can span multiple lines.

.. attribute:: csvreader.field_size_limit

The maximum field size allowed by this reader. If :const:`None`,
csv.field_size_limit() is used.

DictReader objects have the following public attribute:

Expand Down
30 changes: 30 additions & 0 deletions Lib/test/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,36 @@ def test_read_bigfield(self):
finally:
csv.field_size_limit(limit)

def test_override_size_limit(self):
line = ',,,'
reader = csv.reader([line])
self.assertEqual(reader.field_size_limit, None)
reader = csv.reader([line], field_size_limit=None)
self.assertEqual(reader.field_size_limit, None)

reader = csv.reader([line], field_size_limit=5)
self.assertEqual(reader.field_size_limit, 5)
reader.field_size_limit = None
self.assertEqual(reader.field_size_limit, None)
reader.field_size_limit = 6
self.assertEqual(reader.field_size_limit, 6)
del reader.field_size_limit
self.assertEqual(reader.field_size_limit, None)

with self.assertRaises(ValueError):
csv.reader([line], field_size_limit=-1)

with self.assertRaises(TypeError):
csv.reader([line], field_size_limit="string")

line = 'long_field,3,4,5'
reader = csv.reader([line, line], field_size_limit=4)
with self.assertRaises(csv.Error):
list(reader)
reader.field_size_limit = 50
self.assertEqual(list(reader), [['long_field', '3', '4', '5']])


def test_read_linenum(self):
r = csv.reader(['line,1', 'line,2', 'line,3'])
self.assertEqual(r.line_num, 0)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
``csv.field_size_limit`` can now be overriden for each csv.Reader
instance. Contributed by Rémi Lapeyre.
114 changes: 98 additions & 16 deletions Modules/_csv.c
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ typedef struct {
Py_ssize_t field_len; /* length of current field */
int numeric_field; /* treat field as numeric */
unsigned long line_num; /* Source-file line number */
long field_limit; /* field limit for current reader */
} ReaderObj;

static PyTypeObject Reader_Type;
Expand Down Expand Up @@ -572,13 +573,18 @@ parse_grow_buff(ReaderObj *self)
static int
parse_add_char(ReaderObj *self, Py_UCS4 c)
{
if (self->field_len >= _csvstate_global->field_limit) {
long limit = self->field_limit;
if (limit == -1) {
limit = _csvstate_global->field_limit;
}
if (self->field_len >= limit) {
PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
_csvstate_global->field_limit);
limit);
return -1;
}
if (self->field_len == self->field_size && !parse_grow_buff(self))
if (self->field_len == self->field_size && !parse_grow_buff(self)) {
return -1;
}
self->field[self->field_len++] = c;
return 0;
}
Expand Down Expand Up @@ -894,6 +900,45 @@ static struct PyMemberDef Reader_memberlist[] = {
{ NULL }
};

static PyObject *
Reader_get_field_size_limit(PyObject *self, void *Py_UNUSED(ignored))
{
ReaderObj *reader = (ReaderObj *)self;
if (reader->field_limit == -1) { // -1 is used as a flag for unset value
Py_RETURN_NONE;
}
else {
return PyLong_FromLong(reader->field_limit);
}
}

static int
Reader_set_field_size_limit(PyObject *self, PyObject *arg, void *Py_UNUSED(ignored))
{
ReaderObj *reader = (ReaderObj *)self;
if (arg == NULL || arg == Py_None) {
reader->field_limit = -1;
return 0;
}
else {
long limit = PyLong_AsLong(arg);
if (limit == -1 && PyErr_Occurred()) {
return -1;
}
if (limit <= 0) {
PyErr_Format(PyExc_ValueError, "field_size_limit must greater than 0");
return -1;
}
reader->field_limit = limit;
return 0;
}
}

static PyGetSetDef Reader_getset[] = {
{ "field_size_limit", Reader_get_field_size_limit,
Reader_set_field_size_limit, PyDoc_STR("field size limit") },
{ NULL },
};

static PyTypeObject Reader_Type = {
PyVarObject_HEAD_INIT(NULL, 0)
Expand Down Expand Up @@ -927,18 +972,19 @@ static PyTypeObject Reader_Type = {
(getiterfunc)Reader_iternext, /*tp_iternext*/
Reader_methods, /*tp_methods*/
Reader_memberlist, /*tp_members*/
0, /*tp_getset*/

Reader_getset, /*tp_getset*/
};

static PyObject *
csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
{
PyObject * iterator, * dialect = NULL;
PyObject * _field_size_limit = NULL;
ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);

if (!self)
if (!self) {
return NULL;
}

self->dialect = NULL;
self->fields = NULL;
Expand All @@ -947,30 +993,66 @@ csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
self->field_size = 0;
self->line_num = 0;

_field_size_limit = PyUnicode_FromString("field_size_limit");
if (_field_size_limit == NULL) {
goto fail;
}
PyObject *field_size_limit = NULL;
if (keyword_args != NULL) {
field_size_limit = PyDict_GetItemWithError(keyword_args, _field_size_limit);
if (PyErr_Occurred()) {
goto fail;
}
}
if (field_size_limit == NULL) {
self->field_limit = -1;
} else if (field_size_limit == Py_None) {
self->field_limit = -1;
if (PyDict_DelItem(keyword_args, _field_size_limit) < 0) {
goto fail;
}
}
else {
long limit = PyLong_AsLong(field_size_limit);
if (PyErr_Occurred()) {
goto fail;
}
if (limit <= 0) {
PyErr_Format(PyExc_ValueError, "field_size_limit must greater than 0");
goto fail;
}
if (PyDict_DelItem(keyword_args, _field_size_limit) < 0) {
goto fail;
}
self->field_limit = limit;
}
Py_CLEAR(_field_size_limit);

if (parse_reset(self) < 0) {
Py_DECREF(self);
return NULL;
goto fail;
}

if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
Py_DECREF(self);
return NULL;
goto fail;
}
self->input_iter = PyObject_GetIter(iterator);
if (self->input_iter == NULL) {
PyErr_SetString(PyExc_TypeError,
"argument 1 must be an iterator");
Py_DECREF(self);
return NULL;
goto fail;
}
self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
if (self->dialect == NULL) {
Py_DECREF(self);
return NULL;
goto fail;
}

PyObject_GC_Track(self);
return (PyObject *)self;

fail:
Py_XDECREF(_field_size_limit);
Py_DECREF(self);
return NULL;
}

/*
Expand Down Expand Up @@ -1535,8 +1617,8 @@ PyDoc_STRVAR(csv_module_doc,
" written as two quotes\n");

PyDoc_STRVAR(csv_reader_doc,
" csv_reader = reader(iterable [, dialect='excel']\n"
" [optional keyword args])\n"
" csv_reader = reader(iterable , dialect='excel',\n"
" field_size_limit=None, **fmtparams)\n"
" for row in csv_reader:\n"
" process(row)\n"
"\n"
Expand Down