@@ -578,6 +578,123 @@ def read_float8(f):
578578 (may not survive roundtrip pickling intact).
579579 """ )
580580
581+ # Protocol 2 formats
582+
583+ def decode_long (data ):
584+ r"""Decode a long from a two's complement little-endian binary string.
585+ >>> decode_long("\xff\x00")
586+ 255L
587+ >>> decode_long("\xff\x7f")
588+ 32767L
589+ >>> decode_long("\x00\xff")
590+ -256L
591+ >>> decode_long("\x00\x80")
592+ -32768L
593+ >>>
594+ """
595+ x = 0L
596+ i = 0L
597+ for c in data :
598+ x |= long (ord (c )) << i
599+ i += 8L
600+ if i and (x & (1L << (i - 1L ))):
601+ x -= 1L << i
602+ return x
603+
604+ def read_long1 (f ):
605+ r"""
606+ >>> import StringIO
607+ >>> read_long1(StringIO.StringIO("\x02\xff\x00"))
608+ 255L
609+ >>> read_long1(StringIO.StringIO("\x02\xff\x7f"))
610+ 32767L
611+ >>> read_long1(StringIO.StringIO("\x02\x00\xff"))
612+ -256L
613+ >>> read_long1(StringIO.StringIO("\x02\x00\x80"))
614+ -32768L
615+ >>>
616+ """
617+
618+ n = read_uint1 (f )
619+ data = f .read (n )
620+ if len (data ) != n :
621+ raise ValueError ("not enough data in stream to read long1" )
622+ return decode_long (data )
623+
624+ long1 = ArgumentDescriptor (
625+ name = "long1" ,
626+ n = TAKEN_FROM_ARGUMENT ,
627+ reader = read_long1 ,
628+ doc = """A binary long, little-endian, using 1-byte size.
629+
630+ This first reads one byte as an unsigned size, then reads that
631+ many bytes and interprets them as a little-endian long.
632+ """ )
633+
634+ def read_long2 (f ):
635+ r"""
636+ >>> import StringIO
637+ >>> read_long2(StringIO.StringIO("\x02\x00\xff\x00"))
638+ 255L
639+ >>> read_long2(StringIO.StringIO("\x02\x00\xff\x7f"))
640+ 32767L
641+ >>> read_long2(StringIO.StringIO("\x02\x00\x00\xff"))
642+ -256L
643+ >>> read_long2(StringIO.StringIO("\x02\x00\x00\x80"))
644+ -32768L
645+ >>>
646+ """
647+
648+ n = read_uint2 (f )
649+ data = f .read (n )
650+ if len (data ) != n :
651+ raise ValueError ("not enough data in stream to read long2" )
652+ return decode_long (data )
653+
654+ long2 = ArgumentDescriptor (
655+ name = "long2" ,
656+ n = TAKEN_FROM_ARGUMENT ,
657+ reader = read_long2 ,
658+ doc = """A binary long, little-endian, using 2-byte size.
659+
660+ This first reads two byte as an unsigned size, then reads that
661+ many bytes and interprets them as a little-endian long.
662+ """ )
663+
664+ def read_long4 (f ):
665+ r"""
666+ >>> import StringIO
667+ >>> read_long4(StringIO.StringIO("\x02\x00\x00\x00\xff\x00"))
668+ 255L
669+ >>> read_long4(StringIO.StringIO("\x02\x00\x00\x00\xff\x7f"))
670+ 32767L
671+ >>> read_long4(StringIO.StringIO("\x02\x00\x00\x00\x00\xff"))
672+ -256L
673+ >>> read_long4(StringIO.StringIO("\x02\x00\x00\x00\x00\x80"))
674+ -32768L
675+ >>>
676+ """
677+
678+ n = read_int4 (f )
679+ if n < 0 :
680+ raise ValueError ("unicodestring4 byte count < 0: %d" % n )
681+ data = f .read (n )
682+ if len (data ) != n :
683+ raise ValueError ("not enough data in stream to read long1" )
684+ return decode_long (data )
685+
686+ long4 = ArgumentDescriptor (
687+ name = "long4" ,
688+ n = TAKEN_FROM_ARGUMENT ,
689+ reader = read_long4 ,
690+ doc = """A binary representation of a long, little-endian.
691+
692+ This first reads four bytes as a signed size (but requires the
693+ size to be >= 0), then reads that many bytes and interprets them
694+ as a little-endian long.
695+ """ )
696+
697+
581698##############################################################################
582699# Object descriptors. The stack used by the pickle machine holds objects,
583700# and in the stack_before and stack_after attributes of OpcodeInfo
@@ -627,6 +744,11 @@ def __init__(self, name, obtype, doc):
627744 doc = "A Python integer object (short or long), or "
628745 "a Python bool." )
629746
747+ pybool = StackObject (
748+ name = 'bool' ,
749+ obtype = (bool ,),
750+ doc = "A Python bool object." )
751+
630752pyfloat = StackObject (
631753 name = 'float' ,
632754 obtype = float ,
@@ -1436,6 +1558,172 @@ def __init__(self, name, code, arg,
14361558 ID is passed to self.persistent_load(), and whatever object that
14371559 returns is pushed on the stack. See PERSID for more detail.
14381560 """ ),
1561+
1562+ # Protocol 2 opcodes
1563+
1564+ I (name = 'PROTO' ,
1565+ code = '\x80 ' ,
1566+ arg = uint1 ,
1567+ stack_before = [],
1568+ stack_after = [],
1569+ proto = 2 ,
1570+ doc = """Protocol version indicator.
1571+
1572+ For protocol 2 and above, a pickle must start with this opcode.
1573+ The argument is the protocol version, an int in range(2, 256).
1574+ """ ),
1575+
1576+ I (name = 'NEWOBJ' ,
1577+ code = '\x81 ' ,
1578+ arg = None ,
1579+ stack_before = [anyobject , anyobject ],
1580+ stack_after = [anyobject ],
1581+ proto = 2 ,
1582+ doc = """Build an object instance.
1583+
1584+ The stack before should be thought of as containing a class
1585+ object followed by an argument tuple (the tuple being the stack
1586+ top). Call these cls and args. They are popped off the stack,
1587+ and the value returned by cls.__new__(cls, *args) is pushed back
1588+ onto the stack.
1589+ """ ),
1590+
1591+ I (name = 'EXT1' ,
1592+ code = '\x82 ' ,
1593+ arg = uint1 ,
1594+ stack_before = [],
1595+ stack_after = [anyobject ],
1596+ proto = 2 ,
1597+ doc = """Extension code.
1598+
1599+ This code and the similar EXT2 and EXT4 allow using a registry
1600+ of popular objects that are pickled by name, typically classes.
1601+ It is envisioned that through a global negotiation and
1602+ registration process, third parties can set up a mapping between
1603+ ints and object names.
1604+
1605+ In order to guarantee pickle interchangeability, the extension
1606+ code registry ought to be global, although a range of codes may
1607+ be reserved for private use.
1608+ """ ),
1609+
1610+ I (name = 'EXT2' ,
1611+ code = '\x83 ' ,
1612+ arg = uint2 ,
1613+ stack_before = [],
1614+ stack_after = [anyobject ],
1615+ proto = 2 ,
1616+ doc = """Extension code.
1617+
1618+ See EXT1.
1619+ """ ),
1620+
1621+ I (name = 'EXT4' ,
1622+ code = '\x84 ' ,
1623+ arg = int4 ,
1624+ stack_before = [],
1625+ stack_after = [anyobject ],
1626+ proto = 2 ,
1627+ doc = """Extension code.
1628+
1629+ See EXT1.
1630+ """ ),
1631+
1632+ I (name = 'TUPLE1' ,
1633+ code = '\x85 ' ,
1634+ arg = None ,
1635+ stack_before = [anyobject ],
1636+ stack_after = [pytuple ],
1637+ proto = 2 ,
1638+ doc = """One-tuple.
1639+
1640+ This code pops one value off the stack and pushes a tuple of
1641+ length 1 whose one item is that value back onto it. IOW:
1642+
1643+ stack[-1] = tuple(stack[-1:])
1644+ """ ),
1645+
1646+ I (name = 'TUPLE2' ,
1647+ code = '\x86 ' ,
1648+ arg = None ,
1649+ stack_before = [anyobject , anyobject ],
1650+ stack_after = [pytuple ],
1651+ proto = 2 ,
1652+ doc = """One-tuple.
1653+
1654+ This code pops two values off the stack and pushes a tuple
1655+ of length 2 whose items are those values back onto it. IOW:
1656+
1657+ stack[-2:] = [tuple(stack[-2:])]
1658+ """ ),
1659+
1660+ I (name = 'TUPLE3' ,
1661+ code = '\x87 ' ,
1662+ arg = None ,
1663+ stack_before = [anyobject , anyobject , anyobject ],
1664+ stack_after = [pytuple ],
1665+ proto = 2 ,
1666+ doc = """One-tuple.
1667+
1668+ This code pops three values off the stack and pushes a tuple
1669+ of length 3 whose items are those values back onto it. IOW:
1670+
1671+ stack[-3:] = [tuple(stack[-3:])]
1672+ """ ),
1673+
1674+ I (name = 'NEWTRUE' ,
1675+ code = '\x88 ' ,
1676+ arg = None ,
1677+ stack_before = [],
1678+ stack_after = [pybool ],
1679+ proto = 2 ,
1680+ doc = """True.
1681+
1682+ Push True onto the stack.""" ),
1683+
1684+ I (name = 'NEWFALSE' ,
1685+ code = '\x89 ' ,
1686+ arg = None ,
1687+ stack_before = [],
1688+ stack_after = [pybool ],
1689+ proto = 2 ,
1690+ doc = """True.
1691+
1692+ Push False onto the stack.""" ),
1693+
1694+ I (name = "LONG1" ,
1695+ code = '\x8a ' ,
1696+ arg = long1 ,
1697+ stack_before = [],
1698+ stack_after = [pylong ],
1699+ proto = 2 ,
1700+ doc = """Long integer using one-byte length.
1701+
1702+ A more efficient encoding of a Python long; the long1 encoding
1703+ says it all.""" ),
1704+
1705+ I (name = "LONG2" ,
1706+ code = '\x8b ' ,
1707+ arg = long2 ,
1708+ stack_before = [],
1709+ stack_after = [pylong ],
1710+ proto = 2 ,
1711+ doc = """Long integer using two-byte length.
1712+
1713+ A more efficient encoding of a Python long; the long2 encoding
1714+ says it all.""" ),
1715+
1716+ I (name = "LONG4" ,
1717+ code = '\x8c ' ,
1718+ arg = long4 ,
1719+ stack_before = [],
1720+ stack_after = [pylong ],
1721+ proto = 2 ,
1722+ doc = """Long integer using found-byte length.
1723+
1724+ A more efficient encoding of a Python long; the long4 encoding
1725+ says it all.""" ),
1726+
14391727]
14401728del I
14411729
0 commit comments