Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 5a2d8f5

Browse files
committed
Begin documenting protocol 2.
1 parent bb38e30 commit 5a2d8f5

2 files changed

Lines changed: 304 additions & 0 deletions

File tree

Lib/pickle.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,22 @@ def __init__(self, value):
127127
TRUE = 'I01\n' # not an opcode; see INT docs in pickletools.py
128128
FALSE = 'I00\n' # not an opcode; see INT docs in pickletools.py
129129

130+
# Protocol 2 (not yet implemented) (XXX comments will be added later)
131+
132+
NEWOBJ = '\x81'
133+
PROTO = '\x80'
134+
EXT2 = '\x83'
135+
EXT1 = '\x82'
136+
TUPLE1 = '\x85'
137+
EXT4 = '\x84'
138+
TUPLE3 = '\x87'
139+
TUPLE2 = '\x86'
140+
NEWFALSE = '\x89'
141+
NEWTRUE = '\x88'
142+
LONG2 = '\x8b'
143+
LONG1 = '\x8a'
144+
LONG4 = '\x8c'
145+
130146

131147
__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])
132148
del x

Lib/pickletools.py

Lines changed: 288 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -578,6 +578,123 @@ def read_float8(f):
578578
(may not survive roundtrip pickling intact).
579579
""")
580580

581+
# Protocol 2 formats
582+
583+
def decode_long(data):
584+
r"""Decode a long from a two's complement little-endian binary string.
585+
>>> decode_long("\xff\x00")
586+
255L
587+
>>> decode_long("\xff\x7f")
588+
32767L
589+
>>> decode_long("\x00\xff")
590+
-256L
591+
>>> decode_long("\x00\x80")
592+
-32768L
593+
>>>
594+
"""
595+
x = 0L
596+
i = 0L
597+
for c in data:
598+
x |= long(ord(c)) << i
599+
i += 8L
600+
if i and (x & (1L << (i-1L))):
601+
x -= 1L << i
602+
return x
603+
604+
def read_long1(f):
605+
r"""
606+
>>> import StringIO
607+
>>> read_long1(StringIO.StringIO("\x02\xff\x00"))
608+
255L
609+
>>> read_long1(StringIO.StringIO("\x02\xff\x7f"))
610+
32767L
611+
>>> read_long1(StringIO.StringIO("\x02\x00\xff"))
612+
-256L
613+
>>> read_long1(StringIO.StringIO("\x02\x00\x80"))
614+
-32768L
615+
>>>
616+
"""
617+
618+
n = read_uint1(f)
619+
data = f.read(n)
620+
if len(data) != n:
621+
raise ValueError("not enough data in stream to read long1")
622+
return decode_long(data)
623+
624+
long1 = ArgumentDescriptor(
625+
name="long1",
626+
n=TAKEN_FROM_ARGUMENT,
627+
reader=read_long1,
628+
doc="""A binary long, little-endian, using 1-byte size.
629+
630+
This first reads one byte as an unsigned size, then reads that
631+
many bytes and interprets them as a little-endian long.
632+
""")
633+
634+
def read_long2(f):
635+
r"""
636+
>>> import StringIO
637+
>>> read_long2(StringIO.StringIO("\x02\x00\xff\x00"))
638+
255L
639+
>>> read_long2(StringIO.StringIO("\x02\x00\xff\x7f"))
640+
32767L
641+
>>> read_long2(StringIO.StringIO("\x02\x00\x00\xff"))
642+
-256L
643+
>>> read_long2(StringIO.StringIO("\x02\x00\x00\x80"))
644+
-32768L
645+
>>>
646+
"""
647+
648+
n = read_uint2(f)
649+
data = f.read(n)
650+
if len(data) != n:
651+
raise ValueError("not enough data in stream to read long2")
652+
return decode_long(data)
653+
654+
long2 = ArgumentDescriptor(
655+
name="long2",
656+
n=TAKEN_FROM_ARGUMENT,
657+
reader=read_long2,
658+
doc="""A binary long, little-endian, using 2-byte size.
659+
660+
This first reads two byte as an unsigned size, then reads that
661+
many bytes and interprets them as a little-endian long.
662+
""")
663+
664+
def read_long4(f):
665+
r"""
666+
>>> import StringIO
667+
>>> read_long4(StringIO.StringIO("\x02\x00\x00\x00\xff\x00"))
668+
255L
669+
>>> read_long4(StringIO.StringIO("\x02\x00\x00\x00\xff\x7f"))
670+
32767L
671+
>>> read_long4(StringIO.StringIO("\x02\x00\x00\x00\x00\xff"))
672+
-256L
673+
>>> read_long4(StringIO.StringIO("\x02\x00\x00\x00\x00\x80"))
674+
-32768L
675+
>>>
676+
"""
677+
678+
n = read_int4(f)
679+
if n < 0:
680+
raise ValueError("unicodestring4 byte count < 0: %d" % n)
681+
data = f.read(n)
682+
if len(data) != n:
683+
raise ValueError("not enough data in stream to read long1")
684+
return decode_long(data)
685+
686+
long4 = ArgumentDescriptor(
687+
name="long4",
688+
n=TAKEN_FROM_ARGUMENT,
689+
reader=read_long4,
690+
doc="""A binary representation of a long, little-endian.
691+
692+
This first reads four bytes as a signed size (but requires the
693+
size to be >= 0), then reads that many bytes and interprets them
694+
as a little-endian long.
695+
""")
696+
697+
581698
##############################################################################
582699
# Object descriptors. The stack used by the pickle machine holds objects,
583700
# and in the stack_before and stack_after attributes of OpcodeInfo
@@ -627,6 +744,11 @@ def __init__(self, name, obtype, doc):
627744
doc="A Python integer object (short or long), or "
628745
"a Python bool.")
629746

747+
pybool = StackObject(
748+
name='bool',
749+
obtype=(bool,),
750+
doc="A Python bool object.")
751+
630752
pyfloat = StackObject(
631753
name='float',
632754
obtype=float,
@@ -1436,6 +1558,172 @@ def __init__(self, name, code, arg,
14361558
ID is passed to self.persistent_load(), and whatever object that
14371559
returns is pushed on the stack. See PERSID for more detail.
14381560
"""),
1561+
1562+
# Protocol 2 opcodes
1563+
1564+
I(name='PROTO',
1565+
code='\x80',
1566+
arg=uint1,
1567+
stack_before=[],
1568+
stack_after=[],
1569+
proto=2,
1570+
doc="""Protocol version indicator.
1571+
1572+
For protocol 2 and above, a pickle must start with this opcode.
1573+
The argument is the protocol version, an int in range(2, 256).
1574+
"""),
1575+
1576+
I(name='NEWOBJ',
1577+
code='\x81',
1578+
arg=None,
1579+
stack_before=[anyobject, anyobject],
1580+
stack_after=[anyobject],
1581+
proto=2,
1582+
doc="""Build an object instance.
1583+
1584+
The stack before should be thought of as containing a class
1585+
object followed by an argument tuple (the tuple being the stack
1586+
top). Call these cls and args. They are popped off the stack,
1587+
and the value returned by cls.__new__(cls, *args) is pushed back
1588+
onto the stack.
1589+
"""),
1590+
1591+
I(name='EXT1',
1592+
code='\x82',
1593+
arg=uint1,
1594+
stack_before=[],
1595+
stack_after=[anyobject],
1596+
proto=2,
1597+
doc="""Extension code.
1598+
1599+
This code and the similar EXT2 and EXT4 allow using a registry
1600+
of popular objects that are pickled by name, typically classes.
1601+
It is envisioned that through a global negotiation and
1602+
registration process, third parties can set up a mapping between
1603+
ints and object names.
1604+
1605+
In order to guarantee pickle interchangeability, the extension
1606+
code registry ought to be global, although a range of codes may
1607+
be reserved for private use.
1608+
"""),
1609+
1610+
I(name='EXT2',
1611+
code='\x83',
1612+
arg=uint2,
1613+
stack_before=[],
1614+
stack_after=[anyobject],
1615+
proto=2,
1616+
doc="""Extension code.
1617+
1618+
See EXT1.
1619+
"""),
1620+
1621+
I(name='EXT4',
1622+
code='\x84',
1623+
arg=int4,
1624+
stack_before=[],
1625+
stack_after=[anyobject],
1626+
proto=2,
1627+
doc="""Extension code.
1628+
1629+
See EXT1.
1630+
"""),
1631+
1632+
I(name='TUPLE1',
1633+
code='\x85',
1634+
arg=None,
1635+
stack_before=[anyobject],
1636+
stack_after=[pytuple],
1637+
proto=2,
1638+
doc="""One-tuple.
1639+
1640+
This code pops one value off the stack and pushes a tuple of
1641+
length 1 whose one item is that value back onto it. IOW:
1642+
1643+
stack[-1] = tuple(stack[-1:])
1644+
"""),
1645+
1646+
I(name='TUPLE2',
1647+
code='\x86',
1648+
arg=None,
1649+
stack_before=[anyobject, anyobject],
1650+
stack_after=[pytuple],
1651+
proto=2,
1652+
doc="""One-tuple.
1653+
1654+
This code pops two values off the stack and pushes a tuple
1655+
of length 2 whose items are those values back onto it. IOW:
1656+
1657+
stack[-2:] = [tuple(stack[-2:])]
1658+
"""),
1659+
1660+
I(name='TUPLE3',
1661+
code='\x87',
1662+
arg=None,
1663+
stack_before=[anyobject, anyobject, anyobject],
1664+
stack_after=[pytuple],
1665+
proto=2,
1666+
doc="""One-tuple.
1667+
1668+
This code pops three values off the stack and pushes a tuple
1669+
of length 3 whose items are those values back onto it. IOW:
1670+
1671+
stack[-3:] = [tuple(stack[-3:])]
1672+
"""),
1673+
1674+
I(name='NEWTRUE',
1675+
code='\x88',
1676+
arg=None,
1677+
stack_before=[],
1678+
stack_after=[pybool],
1679+
proto=2,
1680+
doc="""True.
1681+
1682+
Push True onto the stack."""),
1683+
1684+
I(name='NEWFALSE',
1685+
code='\x89',
1686+
arg=None,
1687+
stack_before=[],
1688+
stack_after=[pybool],
1689+
proto=2,
1690+
doc="""True.
1691+
1692+
Push False onto the stack."""),
1693+
1694+
I(name="LONG1",
1695+
code='\x8a',
1696+
arg=long1,
1697+
stack_before=[],
1698+
stack_after=[pylong],
1699+
proto=2,
1700+
doc="""Long integer using one-byte length.
1701+
1702+
A more efficient encoding of a Python long; the long1 encoding
1703+
says it all."""),
1704+
1705+
I(name="LONG2",
1706+
code='\x8b',
1707+
arg=long2,
1708+
stack_before=[],
1709+
stack_after=[pylong],
1710+
proto=2,
1711+
doc="""Long integer using two-byte length.
1712+
1713+
A more efficient encoding of a Python long; the long2 encoding
1714+
says it all."""),
1715+
1716+
I(name="LONG4",
1717+
code='\x8c',
1718+
arg=long4,
1719+
stack_before=[],
1720+
stack_after=[pylong],
1721+
proto=2,
1722+
doc="""Long integer using found-byte length.
1723+
1724+
A more efficient encoding of a Python long; the long4 encoding
1725+
says it all."""),
1726+
14391727
]
14401728
del I
14411729

0 commit comments

Comments
 (0)