@@ -369,7 +369,7 @@ typedef struct UnpicklerObject {
369369 char * errors ; /* Name of errors handling scheme to used when
370370 decoding strings. The default value is
371371 "strict". */
372- Py_ssize_t * marks ; /* Mark stack, used for unpickling container
372+ Py_ssize_t * marks ; /* Mark stack, used for unpickling container
373373 objects. */
374374 Py_ssize_t num_marks ; /* Number of marks in the mark stack. */
375375 Py_ssize_t marks_size ; /* Current allocated size of the mark stack. */
@@ -1700,34 +1700,66 @@ save_bytes(PicklerObject *self, PyObject *obj)
17001700 if (self -> proto < 3 ) {
17011701 /* Older pickle protocols do not have an opcode for pickling bytes
17021702 objects. Therefore, we need to fake the copy protocol (i.e.,
1703- the __reduce__ method) to permit bytes object unpickling. */
1703+ the __reduce__ method) to permit bytes object unpickling.
1704+
1705+ Here we use a hack to be compatible with Python 2. Since in Python
1706+ 2 'bytes' is just an alias for 'str' (which has different
1707+ parameters than the actual bytes object), we use codecs.encode
1708+ to create the appropriate 'str' object when unpickled using
1709+ Python 2 *and* the appropriate 'bytes' object when unpickled
1710+ using Python 3. Again this is a hack and we don't need to do this
1711+ with newer protocols. */
1712+ static PyObject * codecs_encode = NULL ;
17041713 PyObject * reduce_value = NULL ;
1705- PyObject * bytelist = NULL ;
17061714 int status ;
17071715
1708- bytelist = PySequence_List (obj );
1709- if (bytelist == NULL )
1710- return -1 ;
1716+ if (codecs_encode == NULL ) {
1717+ PyObject * codecs_module = PyImport_ImportModule ("codecs" );
1718+ if (codecs_module == NULL ) {
1719+ return -1 ;
1720+ }
1721+ codecs_encode = PyObject_GetAttrString (codecs_module , "encode" );
1722+ Py_DECREF (codecs_module );
1723+ if (codecs_encode == NULL ) {
1724+ return -1 ;
1725+ }
1726+ }
17111727
1712- reduce_value = Py_BuildValue ("(O(O))" , (PyObject * )& PyBytes_Type ,
1713- bytelist );
1714- if (reduce_value == NULL ) {
1715- Py_DECREF (bytelist );
1716- return -1 ;
1728+ if (PyBytes_GET_SIZE (obj ) == 0 ) {
1729+ reduce_value = Py_BuildValue ("(O())" , (PyObject * )& PyBytes_Type );
17171730 }
1731+ else {
1732+ static PyObject * latin1 = NULL ;
1733+ PyObject * unicode_str =
1734+ PyUnicode_DecodeLatin1 (PyBytes_AS_STRING (obj ),
1735+ PyBytes_GET_SIZE (obj ),
1736+ "strict" );
1737+ if (unicode_str == NULL )
1738+ return -1 ;
1739+ if (latin1 == NULL ) {
1740+ latin1 = PyUnicode_InternFromString ("latin1" );
1741+ if (latin1 == NULL )
1742+ return -1 ;
1743+ }
1744+ reduce_value = Py_BuildValue ("(O(OO))" ,
1745+ codecs_encode , unicode_str , latin1 );
1746+ Py_DECREF (unicode_str );
1747+ }
1748+
1749+ if (reduce_value == NULL )
1750+ return -1 ;
17181751
17191752 /* save_reduce() will memoize the object automatically. */
17201753 status = save_reduce (self , reduce_value , obj );
17211754 Py_DECREF (reduce_value );
1722- Py_DECREF (bytelist );
17231755 return status ;
17241756 }
17251757 else {
17261758 Py_ssize_t size ;
17271759 char header [5 ];
17281760 Py_ssize_t len ;
17291761
1730- size = PyBytes_Size (obj );
1762+ size = PyBytes_GET_SIZE (obj );
17311763 if (size < 0 )
17321764 return -1 ;
17331765
0 commit comments