@@ -36,6 +36,7 @@ typedef struct _PyScannerObject {
3636 PyObject * parse_float ;
3737 PyObject * parse_int ;
3838 PyObject * parse_constant ;
39+ PyObject * memo ;
3940} PyScannerObject ;
4041
4142static PyMemberDef scanner_members [] = {
@@ -305,6 +306,21 @@ _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
305306 return tpl ;
306307}
307308
309+ #define APPEND_OLD_CHUNK \
310+ if (chunk != NULL) { \
311+ if (chunks == NULL) { \
312+ chunks = PyList_New(0); \
313+ if (chunks == NULL) { \
314+ goto bail; \
315+ } \
316+ } \
317+ if (PyList_Append(chunks, chunk)) { \
318+ Py_DECREF(chunk); \
319+ goto bail; \
320+ } \
321+ Py_CLEAR(chunk); \
322+ }
323+
308324static PyObject *
309325scanstring_unicode (PyObject * pystr , Py_ssize_t end , int strict , Py_ssize_t * next_end_ptr )
310326{
@@ -316,23 +332,21 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
316332
317333 Return value is a new PyUnicode
318334 */
319- PyObject * rval ;
335+ PyObject * rval = NULL ;
320336 Py_ssize_t len = PyUnicode_GET_SIZE (pystr );
321337 Py_ssize_t begin = end - 1 ;
322338 Py_ssize_t next = begin ;
323339 const Py_UNICODE * buf = PyUnicode_AS_UNICODE (pystr );
324- PyObject * chunks = PyList_New (0 );
325- if (chunks == NULL ) {
326- goto bail ;
327- }
340+ PyObject * chunks = NULL ;
341+ PyObject * chunk = NULL ;
342+
328343 if (end < 0 || len <= end ) {
329344 PyErr_SetString (PyExc_ValueError , "end is out of bounds" );
330345 goto bail ;
331346 }
332347 while (1 ) {
333348 /* Find the end of the string or the next escape */
334349 Py_UNICODE c = 0 ;
335- PyObject * chunk = NULL ;
336350 for (next = end ; next < len ; next ++ ) {
337351 c = buf [next ];
338352 if (c == '"' || c == '\\' ) {
@@ -349,15 +363,11 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
349363 }
350364 /* Pick up this chunk if it's not zero length */
351365 if (next != end ) {
366+ APPEND_OLD_CHUNK
352367 chunk = PyUnicode_FromUnicode (& buf [end ], next - end );
353368 if (chunk == NULL ) {
354369 goto bail ;
355370 }
356- if (PyList_Append (chunks , chunk )) {
357- Py_DECREF (chunk );
358- goto bail ;
359- }
360- Py_DECREF (chunk );
361371 }
362372 next ++ ;
363373 if (c == '"' ) {
@@ -459,27 +469,34 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
459469 }
460470#endif
461471 }
472+ APPEND_OLD_CHUNK
462473 chunk = PyUnicode_FromUnicode (& c , 1 );
463474 if (chunk == NULL ) {
464475 goto bail ;
465476 }
466- if (PyList_Append (chunks , chunk )) {
467- Py_DECREF (chunk );
477+ }
478+
479+ if (chunks == NULL ) {
480+ if (chunk != NULL )
481+ rval = chunk ;
482+ else
483+ rval = PyUnicode_FromStringAndSize ("" , 0 );
484+ }
485+ else {
486+ APPEND_OLD_CHUNK
487+ rval = join_list_unicode (chunks );
488+ if (rval == NULL ) {
468489 goto bail ;
469490 }
470- Py_DECREF ( chunk );
491+ Py_CLEAR ( chunks );
471492 }
472493
473- rval = join_list_unicode (chunks );
474- if (rval == NULL ) {
475- goto bail ;
476- }
477- Py_DECREF (chunks );
478494 * next_end_ptr = end ;
479495 return rval ;
480496bail :
481497 * next_end_ptr = -1 ;
482498 Py_XDECREF (chunks );
499+ Py_XDECREF (chunk );
483500 return NULL ;
484501}
485502
@@ -578,6 +595,7 @@ scanner_clear(PyObject *self)
578595 Py_CLEAR (s -> parse_float );
579596 Py_CLEAR (s -> parse_int );
580597 Py_CLEAR (s -> parse_constant );
598+ Py_CLEAR (s -> memo );
581599 return 0 ;
582600}
583601
@@ -593,10 +611,16 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
593611 Py_UNICODE * str = PyUnicode_AS_UNICODE (pystr );
594612 Py_ssize_t end_idx = PyUnicode_GET_SIZE (pystr ) - 1 ;
595613 PyObject * val = NULL ;
596- PyObject * rval = PyList_New ( 0 ) ;
614+ PyObject * rval = NULL ;
597615 PyObject * key = NULL ;
598616 int strict = PyObject_IsTrue (s -> strict );
617+ int has_pairs_hook = (s -> object_pairs_hook != Py_None );
599618 Py_ssize_t next_idx ;
619+
620+ if (has_pairs_hook )
621+ rval = PyList_New (0 );
622+ else
623+ rval = PyDict_New ();
600624 if (rval == NULL )
601625 return NULL ;
602626
@@ -606,6 +630,8 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
606630 /* only loop if the object is non-empty */
607631 if (idx <= end_idx && str [idx ] != '}' ) {
608632 while (idx <= end_idx ) {
633+ PyObject * memokey ;
634+
609635 /* read key */
610636 if (str [idx ] != '"' ) {
611637 raise_errmsg ("Expecting property name" , pystr , idx );
@@ -614,6 +640,16 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
614640 key = scanstring_unicode (pystr , idx + 1 , strict , & next_idx );
615641 if (key == NULL )
616642 goto bail ;
643+ memokey = PyDict_GetItem (s -> memo , key );
644+ if (memokey != NULL ) {
645+ Py_INCREF (memokey );
646+ Py_DECREF (key );
647+ key = memokey ;
648+ }
649+ else {
650+ if (PyDict_SetItem (s -> memo , key , key ) < 0 )
651+ goto bail ;
652+ }
617653 idx = next_idx ;
618654
619655 /* skip whitespace between key and : delimiter, read :, skip whitespace */
@@ -630,19 +666,24 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
630666 if (val == NULL )
631667 goto bail ;
632668
633- {
634- PyObject * tuple = PyTuple_Pack (2 , key , val );
635- if (tuple == NULL )
669+ if ( has_pairs_hook ) {
670+ PyObject * item = PyTuple_Pack (2 , key , val );
671+ if (item == NULL )
636672 goto bail ;
637- if (PyList_Append (rval , tuple ) == -1 ) {
638- Py_DECREF (tuple );
673+ Py_CLEAR (key );
674+ Py_CLEAR (val );
675+ if (PyList_Append (rval , item ) == -1 ) {
676+ Py_DECREF (item );
639677 goto bail ;
640678 }
641- Py_DECREF (tuple );
679+ Py_DECREF (item );
680+ }
681+ else {
682+ if (PyDict_SetItem (rval , key , val ) < 0 )
683+ goto bail ;
684+ Py_CLEAR (key );
685+ Py_CLEAR (val );
642686 }
643-
644- Py_CLEAR (key );
645- Py_CLEAR (val );
646687 idx = next_idx ;
647688
648689 /* skip whitespace before } or , */
@@ -672,36 +713,23 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
672713
673714 * next_idx_ptr = idx + 1 ;
674715
675- if (s -> object_pairs_hook != Py_None ) {
716+ if (has_pairs_hook ) {
676717 val = PyObject_CallFunctionObjArgs (s -> object_pairs_hook , rval , NULL );
677- if (val == NULL )
678- goto bail ;
679718 Py_DECREF (rval );
680719 return val ;
681720 }
682721
683- val = PyDict_New ();
684- if (val == NULL )
685- goto bail ;
686- if (PyDict_MergeFromSeq2 (val , rval , 1 ) == -1 )
687- goto bail ;
688- Py_DECREF (rval );
689- rval = val ;
690-
691722 /* if object_hook is not None: rval = object_hook(rval) */
692723 if (s -> object_hook != Py_None ) {
693724 val = PyObject_CallFunctionObjArgs (s -> object_hook , rval , NULL );
694- if (val == NULL )
695- goto bail ;
696725 Py_DECREF (rval );
697- rval = val ;
698- val = NULL ;
726+ return val ;
699727 }
700728 return rval ;
701729bail :
702730 Py_XDECREF (key );
703731 Py_XDECREF (val );
704- Py_DECREF (rval );
732+ Py_XDECREF (rval );
705733 return NULL ;
706734}
707735
@@ -988,6 +1016,9 @@ scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
9881016 Py_TYPE (pystr )-> tp_name );
9891017 return NULL ;
9901018 }
1019+ PyDict_Clear (s -> memo );
1020+ if (rval == NULL )
1021+ return NULL ;
9911022 return _build_rval_index_tuple (rval , next_idx );
9921023}
9931024
@@ -1021,6 +1052,12 @@ scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
10211052 if (!PyArg_ParseTupleAndKeywords (args , kwds , "O:make_scanner" , kwlist , & ctx ))
10221053 return -1 ;
10231054
1055+ if (s -> memo == NULL ) {
1056+ s -> memo = PyDict_New ();
1057+ if (s -> memo == NULL )
1058+ goto bail ;
1059+ }
1060+
10241061 /* All of these will fail "gracefully" so we don't need to verify them */
10251062 s -> strict = PyObject_GetAttrString (ctx , "strict" );
10261063 if (s -> strict == NULL )
0 commit comments