@@ -651,10 +651,11 @@ typedef struct UnpicklerObject {
651651 Pdata * stack ; /* Pickle data stack, store unpickled objects. */
652652
653653 /* The unpickler memo is just an array of PyObject *s. Using a dict
654- is unnecessary, since the keys are contiguous ints. */
654+ is unnecessary, since the keys usually are contiguous ints. */
655655 PyObject * * memo ;
656656 size_t memo_size ; /* Capacity of the memo array */
657657 size_t memo_len ; /* Number of objects in the memo */
658+ PyObject * memo_dict ; /* The backup memo dict for non-continuous keys. */
658659
659660 PyObject * persistent_load ; /* persistent_load() method, can be NULL. */
660661
@@ -1546,32 +1547,64 @@ _Unpickler_ResizeMemoList(UnpicklerObject *self, size_t new_size)
15461547
15471548/* Returns NULL if idx is out of bounds. */
15481549static PyObject *
1549- _Unpickler_MemoGet (UnpicklerObject * self , size_t idx )
1550+ _Unpickler_MemoGet (PickleState * st , UnpicklerObject * self , size_t idx )
15501551{
1551- if (idx >= self -> memo_size )
1552- return NULL ;
1553-
1554- return self -> memo [idx ];
1552+ PyObject * value ;
1553+ if (idx < self -> memo_size ) {
1554+ value = self -> memo [idx ];
1555+ if (value != NULL ) {
1556+ return value ;
1557+ }
1558+ }
1559+ if (self -> memo_dict != NULL ) {
1560+ PyObject * key = PyLong_FromSsize_t (idx );
1561+ if (key == NULL ) {
1562+ return NULL ;
1563+ }
1564+ if (idx < self -> memo_size ) {
1565+ (void )PyDict_Pop (self -> memo_dict , key , & value );
1566+ self -> memo [idx ] = value ;
1567+ }
1568+ else {
1569+ value = PyDict_GetItemWithError (self -> memo_dict , key );
1570+ }
1571+ Py_DECREF (key );
1572+ if (value != NULL || PyErr_Occurred ()) {
1573+ return value ;
1574+ }
1575+ }
1576+ PyErr_Format (st -> UnpicklingError , "Memo value not found at index %zd" , idx );
1577+ return NULL ;
15551578}
15561579
15571580/* Returns -1 (with an exception set) on failure, 0 on success.
15581581 This takes its own reference to `value`. */
15591582static int
1560- _Unpickler_MemoPut (PickleState * st , UnpicklerObject * self , size_t idx , PyObject * value )
1583+ _Unpickler_MemoPut (UnpicklerObject * self , size_t idx , PyObject * value )
15611584{
15621585 PyObject * old_item ;
15631586
15641587 if (idx >= self -> memo_size ) {
1565- /* MAX_MEMO_INDICES_GAP was introduced mainly for making testing of
1566- * PUT, BINPUT and LONG_BINPUT opcodes simpler. It should be more
1567- * than 1<<16 for LONG_BINPUT.
1568- * The standard pickler never produces data that requires more than 0.
1569- * The Python code does not have such limitation.
1570- */
1571- const int MAX_MEMO_INDICES_GAP = 1 << 17 ;
1572- if (idx > self -> memo_len * 2 + MAX_MEMO_INDICES_GAP ) {
1573- PyErr_SetString (st -> UnpicklingError , "too sparse memo indices" );
1574- return -1 ;
1588+ if (idx > self -> memo_len * 2 ) {
1589+ /* The memo keys are too sparse. Use a dict instead of
1590+ * a continuous array for the memo. */
1591+ if (self -> memo_dict == NULL ) {
1592+ self -> memo_dict = PyDict_New ();
1593+ if (self -> memo_dict == NULL ) {
1594+ return -1 ;
1595+ }
1596+ }
1597+ PyObject * key = PyLong_FromSize_t (idx );
1598+ if (key == NULL ) {
1599+ return -1 ;
1600+ }
1601+
1602+ if (PyDict_SetItem (self -> memo_dict , key , value ) < 0 ) {
1603+ Py_DECREF (key );
1604+ return -1 ;
1605+ }
1606+ Py_DECREF (key );
1607+ return 0 ;
15751608 }
15761609 if (_Unpickler_ResizeMemoList (self , idx * 2 ) < 0 )
15771610 return -1 ;
@@ -1642,6 +1675,7 @@ _Unpickler_New(PyObject *module)
16421675 self -> memo = memo ;
16431676 self -> memo_size = MEMO_SIZE ;
16441677 self -> memo_len = 0 ;
1678+ self -> memo_dict = NULL ;
16451679 self -> persistent_load = NULL ;
16461680 memset (& self -> buffer , 0 , sizeof (Py_buffer ));
16471681 self -> input_buffer = NULL ;
@@ -6149,20 +6183,15 @@ load_get(PickleState *st, UnpicklerObject *self)
61496183 if (key == NULL )
61506184 return -1 ;
61516185 idx = PyLong_AsSsize_t (key );
6186+ Py_DECREF (key );
61526187 if (idx == -1 && PyErr_Occurred ()) {
6153- Py_DECREF (key );
61546188 return -1 ;
61556189 }
61566190
6157- value = _Unpickler_MemoGet (self , idx );
6191+ value = _Unpickler_MemoGet (st , self , idx );
61586192 if (value == NULL ) {
6159- if (!PyErr_Occurred ()) {
6160- PyErr_Format (st -> UnpicklingError , "Memo value not found at index %ld" , idx );
6161- }
6162- Py_DECREF (key );
61636193 return -1 ;
61646194 }
6165- Py_DECREF (key );
61666195
61676196 PDATA_APPEND (self -> stack , value , -1 );
61686197 return 0 ;
@@ -6180,13 +6209,8 @@ load_binget(PickleState *st, UnpicklerObject *self)
61806209
61816210 idx = Py_CHARMASK (s [0 ]);
61826211
6183- value = _Unpickler_MemoGet (self , idx );
6212+ value = _Unpickler_MemoGet (st , self , idx );
61846213 if (value == NULL ) {
6185- PyObject * key = PyLong_FromSsize_t (idx );
6186- if (key != NULL ) {
6187- PyErr_Format (st -> UnpicklingError , "Memo value not found at index %ld" , idx );
6188- Py_DECREF (key );
6189- }
61906214 return -1 ;
61916215 }
61926216
@@ -6206,13 +6230,8 @@ load_long_binget(PickleState *st, UnpicklerObject *self)
62066230
62076231 idx = calc_binsize (s , 4 );
62086232
6209- value = _Unpickler_MemoGet (self , idx );
6233+ value = _Unpickler_MemoGet (st , self , idx );
62106234 if (value == NULL ) {
6211- PyObject * key = PyLong_FromSsize_t (idx );
6212- if (key != NULL ) {
6213- PyErr_Format (st -> UnpicklingError , "Memo value not found at index %ld" , idx );
6214- Py_DECREF (key );
6215- }
62166235 return -1 ;
62176236 }
62186237
@@ -6337,7 +6356,7 @@ load_put(PickleState *state, UnpicklerObject *self)
63376356 return -1 ;
63386357 }
63396358
6340- return _Unpickler_MemoPut (state , self , idx , value );
6359+ return _Unpickler_MemoPut (self , idx , value );
63416360}
63426361
63436362static int
@@ -6356,7 +6375,7 @@ load_binput(PickleState *state, UnpicklerObject *self)
63566375
63576376 idx = Py_CHARMASK (s [0 ]);
63586377
6359- return _Unpickler_MemoPut (state , self , idx , value );
6378+ return _Unpickler_MemoPut (self , idx , value );
63606379}
63616380
63626381static int
@@ -6380,7 +6399,7 @@ load_long_binput(PickleState *state, UnpicklerObject *self)
63806399 return -1 ;
63816400 }
63826401
6383- return _Unpickler_MemoPut (state , self , idx , value );
6402+ return _Unpickler_MemoPut (self , idx , value );
63846403}
63856404
63866405static int
@@ -6392,7 +6411,7 @@ load_memoize(PickleState *state, UnpicklerObject *self)
63926411 return Pdata_stack_underflow (state , self -> stack );
63936412 value = self -> stack -> data [Py_SIZE (self -> stack ) - 1 ];
63946413
6395- return _Unpickler_MemoPut (state , self , self -> memo_len , value );
6414+ return _Unpickler_MemoPut (self , self -> memo_len , value );
63966415}
63976416
63986417static int
@@ -7141,6 +7160,13 @@ _pickle_Unpickler___sizeof___impl(UnpicklerObject *self)
71417160 size_t res = _PyObject_SIZE (Py_TYPE (self ));
71427161 if (self -> memo != NULL )
71437162 res += self -> memo_size * sizeof (PyObject * );
7163+ if (self -> memo_dict != NULL ) {
7164+ size_t s = _PySys_GetSizeOf (self -> memo_dict );
7165+ if (s == (size_t )-1 ) {
7166+ return -1 ;
7167+ }
7168+ res += s ;
7169+ }
71447170 if (self -> marks != NULL )
71457171 res += (size_t )self -> marks_size * sizeof (Py_ssize_t );
71467172 if (self -> input_line != NULL )
@@ -7175,6 +7201,7 @@ Unpickler_clear(UnpicklerObject *self)
71757201 self -> buffer .buf = NULL ;
71767202 }
71777203
7204+ Py_CLEAR (self -> memo_dict );
71787205 _Unpickler_MemoCleanup (self );
71797206 PyMem_Free (self -> marks );
71807207 self -> marks = NULL ;
@@ -7209,6 +7236,7 @@ Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
72097236 Py_VISIT (self -> stack );
72107237 Py_VISIT (self -> persistent_load );
72117238 Py_VISIT (self -> buffers );
7239+ Py_VISIT (self -> memo_dict );
72127240 PyObject * * memo = self -> memo ;
72137241 if (memo ) {
72147242 Py_ssize_t i = self -> memo_size ;
@@ -7514,7 +7542,7 @@ Unpickler_set_memo(UnpicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored
75147542 "memo key must be positive integers." );
75157543 goto error ;
75167544 }
7517- if (_Unpickler_MemoPut (state , self , idx , value ) < 0 )
7545+ if (_Unpickler_MemoPut (self , idx , value ) < 0 )
75187546 goto error ;
75197547 }
75207548 }
0 commit comments