Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 3bfc65a

Browse files
committed
Issue #13505: Make pickling of bytes object compatible with Python 2.
Initial patch by sbt.
1 parent 7b7e39a commit 3bfc65a

5 files changed

Lines changed: 93 additions & 56 deletions

File tree

Lib/pickle.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -487,7 +487,11 @@ def save_float(self, obj, pack=struct.pack):
487487

488488
def save_bytes(self, obj, pack=struct.pack):
489489
if self.proto < 3:
490-
self.save_reduce(bytes, (list(obj),), obj=obj)
490+
if len(obj) == 0:
491+
self.save_reduce(bytes, (), obj=obj)
492+
else:
493+
self.save_reduce(codecs.encode,
494+
(str(obj, 'latin1'), 'latin1'), obj=obj)
491495
return
492496
n = len(obj)
493497
if n < 256:

Lib/pickletools.py

Lines changed: 31 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -2083,27 +2083,22 @@ def __init__(self, value):
20832083
29: ( MARK
20842084
30: d DICT (MARK at 29)
20852085
31: p PUT 2
2086-
34: c GLOBAL '__builtin__ bytes'
2087-
53: p PUT 3
2088-
56: ( MARK
2089-
57: ( MARK
2090-
58: l LIST (MARK at 57)
2086+
34: c GLOBAL '_codecs encode'
2087+
50: p PUT 3
2088+
53: ( MARK
2089+
54: V UNICODE 'abc'
20912090
59: p PUT 4
2092-
62: L LONG 97
2093-
67: a APPEND
2094-
68: L LONG 98
2095-
73: a APPEND
2096-
74: L LONG 99
2097-
79: a APPEND
2098-
80: t TUPLE (MARK at 56)
2099-
81: p PUT 5
2100-
84: R REDUCE
2101-
85: p PUT 6
2102-
88: V UNICODE 'def'
2103-
93: p PUT 7
2104-
96: s SETITEM
2105-
97: a APPEND
2106-
98: . STOP
2091+
62: V UNICODE 'latin1'
2092+
70: p PUT 5
2093+
73: t TUPLE (MARK at 53)
2094+
74: p PUT 6
2095+
77: R REDUCE
2096+
78: p PUT 7
2097+
81: V UNICODE 'def'
2098+
86: p PUT 8
2099+
89: s SETITEM
2100+
90: a APPEND
2101+
91: . STOP
21072102
highest protocol among opcodes = 0
21082103
21092104
Try again with a "binary" pickle.
@@ -2122,25 +2117,22 @@ def __init__(self, value):
21222117
14: q BINPUT 1
21232118
16: } EMPTY_DICT
21242119
17: q BINPUT 2
2125-
19: c GLOBAL '__builtin__ bytes'
2126-
38: q BINPUT 3
2127-
40: ( MARK
2128-
41: ] EMPTY_LIST
2129-
42: q BINPUT 4
2130-
44: ( MARK
2131-
45: K BININT1 97
2132-
47: K BININT1 98
2133-
49: K BININT1 99
2134-
51: e APPENDS (MARK at 44)
2135-
52: t TUPLE (MARK at 40)
2136-
53: q BINPUT 5
2137-
55: R REDUCE
2138-
56: q BINPUT 6
2139-
58: X BINUNICODE 'def'
2140-
66: q BINPUT 7
2141-
68: s SETITEM
2142-
69: e APPENDS (MARK at 3)
2143-
70: . STOP
2120+
19: c GLOBAL '_codecs encode'
2121+
35: q BINPUT 3
2122+
37: ( MARK
2123+
38: X BINUNICODE 'abc'
2124+
46: q BINPUT 4
2125+
48: X BINUNICODE 'latin1'
2126+
59: q BINPUT 5
2127+
61: t TUPLE (MARK at 37)
2128+
62: q BINPUT 6
2129+
64: R REDUCE
2130+
65: q BINPUT 7
2131+
67: X BINUNICODE 'def'
2132+
75: q BINPUT 8
2133+
77: s SETITEM
2134+
78: e APPENDS (MARK at 3)
2135+
79: . STOP
21442136
highest protocol among opcodes = 1
21452137
21462138
Exercise the INST/OBJ/BUILD family.

Lib/test/pickletester.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -636,9 +636,15 @@ def test_unicode_high_plane(self):
636636

637637
def test_bytes(self):
638638
for proto in protocols:
639-
for u in b'', b'xyz', b'xyz'*100:
640-
p = self.dumps(u)
641-
self.assertEqual(self.loads(p), u)
639+
for s in b'', b'xyz', b'xyz'*100:
640+
p = self.dumps(s)
641+
self.assertEqual(self.loads(p), s)
642+
for s in [bytes([i]) for i in range(256)]:
643+
p = self.dumps(s)
644+
self.assertEqual(self.loads(p), s)
645+
for s in [bytes([i, i]) for i in range(256)]:
646+
p = self.dumps(s)
647+
self.assertEqual(self.loads(p), s)
642648

643649
def test_ints(self):
644650
import sys

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ What's New in Python 3.2.3?
1010
Core and Builtins
1111
-----------------
1212

13+
- Issue #13505: Pickle bytes objects in a way that is compatible with
14+
Python 2 when using protocols <= 2.
15+
1316
- Issue #11147: Fix an unused argument in _Py_ANNOTATE_MEMORY_ORDER. (Fix
1417
given by Campbell Barton).
1518

Modules/_pickle.c

Lines changed: 45 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,7 @@ typedef struct UnpicklerObject {
369369
char *errors; /* Name of errors handling scheme to used when
370370
decoding strings. The default value is
371371
"strict". */
372-
Py_ssize_t *marks; /* Mark stack, used for unpickling container
372+
Py_ssize_t *marks; /* Mark stack, used for unpickling container
373373
objects. */
374374
Py_ssize_t num_marks; /* Number of marks in the mark stack. */
375375
Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
@@ -1700,34 +1700,66 @@ save_bytes(PicklerObject *self, PyObject *obj)
17001700
if (self->proto < 3) {
17011701
/* Older pickle protocols do not have an opcode for pickling bytes
17021702
objects. Therefore, we need to fake the copy protocol (i.e.,
1703-
the __reduce__ method) to permit bytes object unpickling. */
1703+
the __reduce__ method) to permit bytes object unpickling.
1704+
1705+
Here we use a hack to be compatible with Python 2. Since in Python
1706+
2 'bytes' is just an alias for 'str' (which has different
1707+
parameters than the actual bytes object), we use codecs.encode
1708+
to create the appropriate 'str' object when unpickled using
1709+
Python 2 *and* the appropriate 'bytes' object when unpickled
1710+
using Python 3. Again this is a hack and we don't need to do this
1711+
with newer protocols. */
1712+
static PyObject *codecs_encode = NULL;
17041713
PyObject *reduce_value = NULL;
1705-
PyObject *bytelist = NULL;
17061714
int status;
17071715

1708-
bytelist = PySequence_List(obj);
1709-
if (bytelist == NULL)
1710-
return -1;
1716+
if (codecs_encode == NULL) {
1717+
PyObject *codecs_module = PyImport_ImportModule("codecs");
1718+
if (codecs_module == NULL) {
1719+
return -1;
1720+
}
1721+
codecs_encode = PyObject_GetAttrString(codecs_module, "encode");
1722+
Py_DECREF(codecs_module);
1723+
if (codecs_encode == NULL) {
1724+
return -1;
1725+
}
1726+
}
17111727

1712-
reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
1713-
bytelist);
1714-
if (reduce_value == NULL) {
1715-
Py_DECREF(bytelist);
1716-
return -1;
1728+
if (PyBytes_GET_SIZE(obj) == 0) {
1729+
reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
17171730
}
1731+
else {
1732+
static PyObject *latin1 = NULL;
1733+
PyObject *unicode_str =
1734+
PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
1735+
PyBytes_GET_SIZE(obj),
1736+
"strict");
1737+
if (unicode_str == NULL)
1738+
return -1;
1739+
if (latin1 == NULL) {
1740+
latin1 = PyUnicode_InternFromString("latin1");
1741+
if (latin1 == NULL)
1742+
return -1;
1743+
}
1744+
reduce_value = Py_BuildValue("(O(OO))",
1745+
codecs_encode, unicode_str, latin1);
1746+
Py_DECREF(unicode_str);
1747+
}
1748+
1749+
if (reduce_value == NULL)
1750+
return -1;
17181751

17191752
/* save_reduce() will memoize the object automatically. */
17201753
status = save_reduce(self, reduce_value, obj);
17211754
Py_DECREF(reduce_value);
1722-
Py_DECREF(bytelist);
17231755
return status;
17241756
}
17251757
else {
17261758
Py_ssize_t size;
17271759
char header[5];
17281760
Py_ssize_t len;
17291761

1730-
size = PyBytes_Size(obj);
1762+
size = PyBytes_GET_SIZE(obj);
17311763
if (size < 0)
17321764
return -1;
17331765

0 commit comments

Comments
 (0)