@@ -44,6 +44,7 @@ module instead.
4444
4545static PyObject * error_obj ; /* CSV exception */
4646static PyObject * dialects ; /* Dialect registry */
47+ static long field_limit = 128 * 1024 ; /* max parsed field size */
4748
4849typedef enum {
4950 START_RECORD , START_FIELD , ESCAPED_CHAR , IN_FIELD ,
@@ -527,15 +528,21 @@ parse_grow_buff(ReaderObj *self)
527528 return 1 ;
528529}
529530
530- static void
531+ static int
531532parse_add_char (ReaderObj * self , char c )
532533{
534+ if (self -> field_len >= field_limit ) {
535+ PyErr_Format (error_obj , "field larger than field limit (%ld)" ,
536+ field_limit );
537+ return -1 ;
538+ }
533539 if (self -> field_len == self -> field_size && !parse_grow_buff (self ))
534- return ;
540+ return -1 ;
535541 self -> field [self -> field_len ++ ] = c ;
542+ return 0 ;
536543}
537544
538- static void
545+ static int
539546parse_process_char (ReaderObj * self , char c )
540547{
541548 DialectObj * dialect = self -> dialect ;
@@ -574,13 +581,15 @@ parse_process_char(ReaderObj *self, char c)
574581 }
575582 else {
576583 /* begin new unquoted field */
577- parse_add_char (self , c );
584+ if (parse_add_char (self , c ) < 0 )
585+ return -1 ;
578586 self -> state = IN_FIELD ;
579587 }
580588 break ;
581589
582590 case ESCAPED_CHAR :
583- parse_add_char (self , c );
591+ if (parse_add_char (self , c ) < 0 )
592+ return -1 ;
584593 self -> state = IN_FIELD ;
585594 break ;
586595
@@ -602,15 +611,17 @@ parse_process_char(ReaderObj *self, char c)
602611 }
603612 else {
604613 /* normal character - save in field */
605- parse_add_char (self , c );
614+ if (parse_add_char (self , c ) < 0 )
615+ return -1 ;
606616 }
607617 break ;
608618
609619 case IN_QUOTED_FIELD :
610620 /* in quoted field */
611621 if (c == '\n' ) {
612622 /* end of line - save '\n' in field */
613- parse_add_char (self , '\n' );
623+ if (parse_add_char (self , '\n' ) < 0 )
624+ return -1 ;
614625 }
615626 else if (c == dialect -> escapechar ) {
616627 /* Possible escape character */
@@ -629,12 +640,14 @@ parse_process_char(ReaderObj *self, char c)
629640 }
630641 else {
631642 /* normal character - save in field */
632- parse_add_char (self , c );
643+ if (parse_add_char (self , c ) < 0 )
644+ return -1 ;
633645 }
634646 break ;
635647
636648 case ESCAPE_IN_QUOTED_FIELD :
637- parse_add_char (self , c );
649+ if (parse_add_char (self , c ) < 0 )
650+ return -1 ;
638651 self -> state = IN_QUOTED_FIELD ;
639652 break ;
640653
@@ -643,7 +656,8 @@ parse_process_char(ReaderObj *self, char c)
643656 if (dialect -> quoting != QUOTE_NONE &&
644657 c == dialect -> quotechar ) {
645658 /* save "" as " */
646- parse_add_char (self , c );
659+ if (parse_add_char (self , c ) < 0 )
660+ return -1 ;
647661 self -> state = IN_QUOTED_FIELD ;
648662 }
649663 else if (c == dialect -> delimiter ) {
@@ -657,7 +671,8 @@ parse_process_char(ReaderObj *self, char c)
657671 self -> state = START_RECORD ;
658672 }
659673 else if (!dialect -> strict ) {
660- parse_add_char (self , c );
674+ if (parse_add_char (self , c ) < 0 )
675+ return -1 ;
661676 self -> state = IN_FIELD ;
662677 }
663678 else {
@@ -666,10 +681,12 @@ parse_process_char(ReaderObj *self, char c)
666681 PyErr_Format (error_obj , "%c expected after %c" ,
667682 dialect -> delimiter ,
668683 dialect -> quotechar );
684+ return -1 ;
669685 }
670686 break ;
671687
672688 }
689+ return 0 ;
673690}
674691
675692/*
@@ -754,13 +771,15 @@ Reader_iternext(ReaderObj *self)
754771 return PyErr_Format (error_obj ,
755772 "newline inside string" );
756773 }
757- parse_process_char (self , c );
758- if (PyErr_Occurred ()) {
759- Py_DECREF (lineobj );
760- return NULL ;
761- }
762- }
763- parse_process_char (self , '\n' );
774+ if (parse_process_char (self , c ) < 0 ) {
775+ Py_DECREF (lineobj );
776+ return NULL ;
777+ }
778+ }
779+ if (parse_process_char (self , '\n' ) < 0 ) {
780+ Py_DECREF (lineobj );
781+ return NULL ;
782+ }
764783 Py_DECREF (lineobj );
765784 } while (self -> state != START_RECORD );
766785
@@ -1387,6 +1406,25 @@ csv_get_dialect(PyObject *module, PyObject *name_obj)
13871406 return get_dialect_from_registry (name_obj );
13881407}
13891408
1409+ static PyObject *
1410+ csv_set_field_limit (PyObject * module , PyObject * args )
1411+ {
1412+ PyObject * new_limit = NULL ;
1413+ long old_limit = field_limit ;
1414+
1415+ if (!PyArg_UnpackTuple (args , "set_field_limit" , 0 , 1 , & new_limit ))
1416+ return NULL ;
1417+ if (new_limit != NULL ) {
1418+ if (!PyInt_Check (new_limit )) {
1419+ PyErr_Format (PyExc_TypeError ,
1420+ "limit must be an integer" );
1421+ return NULL ;
1422+ }
1423+ field_limit = PyInt_AsLong (new_limit );
1424+ }
1425+ return PyInt_FromLong (old_limit );
1426+ }
1427+
13901428/*
13911429 * MODULE
13921430 */
@@ -1494,20 +1532,29 @@ PyDoc_STRVAR(csv_unregister_dialect_doc,
14941532"Delete the name/dialect mapping associated with a string name.\n"
14951533" csv.unregister_dialect(name)" );
14961534
1535+ PyDoc_STRVAR (csv_set_field_limit_doc ,
1536+ "Sets an upper limit on parsed fields.\n"
1537+ " csv.set_field_limit([limit])\n"
1538+ "\n"
1539+ "Returns old limit. If limit is not given, no new limit is set and\n"
1540+ "the old limit is returned" );
1541+
14971542static struct PyMethodDef csv_methods [] = {
1498- { "reader" , (PyCFunction )csv_reader ,
1499- METH_VARARGS | METH_KEYWORDS , csv_reader_doc },
1500- { "writer" , (PyCFunction )csv_writer ,
1501- METH_VARARGS | METH_KEYWORDS , csv_writer_doc },
1502- { "list_dialects" , (PyCFunction )csv_list_dialects ,
1503- METH_NOARGS , csv_list_dialects_doc },
1504- { "register_dialect" , (PyCFunction )csv_register_dialect ,
1543+ { "reader" , (PyCFunction )csv_reader ,
1544+ METH_VARARGS | METH_KEYWORDS , csv_reader_doc },
1545+ { "writer" , (PyCFunction )csv_writer ,
1546+ METH_VARARGS | METH_KEYWORDS , csv_writer_doc },
1547+ { "list_dialects" , (PyCFunction )csv_list_dialects ,
1548+ METH_NOARGS , csv_list_dialects_doc },
1549+ { "register_dialect" , (PyCFunction )csv_register_dialect ,
15051550 METH_VARARGS | METH_KEYWORDS , csv_register_dialect_doc },
1506- { "unregister_dialect" , (PyCFunction )csv_unregister_dialect ,
1507- METH_O , csv_unregister_dialect_doc },
1508- { "get_dialect" , (PyCFunction )csv_get_dialect ,
1509- METH_O , csv_get_dialect_doc },
1510- { NULL , NULL }
1551+ { "unregister_dialect" , (PyCFunction )csv_unregister_dialect ,
1552+ METH_O , csv_unregister_dialect_doc },
1553+ { "get_dialect" , (PyCFunction )csv_get_dialect ,
1554+ METH_O , csv_get_dialect_doc },
1555+ { "set_field_limit" , (PyCFunction )csv_set_field_limit ,
1556+ METH_VARARGS , csv_set_field_limit_doc },
1557+ { NULL , NULL }
15111558};
15121559
15131560PyMODINIT_FUNC
0 commit comments