1313import codecs
1414import pickle
1515import re
16+ import sys
1617
1718__all__ = ['dis' , 'genops' , 'optimize' ]
1819
165166
166167# Represents the number of bytes consumed by a two-argument opcode where
167168# the first argument gives the number of bytes in the second argument.
168- TAKEN_FROM_ARGUMENT1 = - 2 # num bytes is 1-byte unsigned int
169- TAKEN_FROM_ARGUMENT4 = - 3 # num bytes is 4-byte signed little-endian int
169+ TAKEN_FROM_ARGUMENT1 = - 2 # num bytes is 1-byte unsigned int
170+ TAKEN_FROM_ARGUMENT4 = - 3 # num bytes is 4-byte signed little-endian int
171+ TAKEN_FROM_ARGUMENT4U = - 4 # num bytes is 4-byte unsigned little-endian int
170172
171173class ArgumentDescriptor (object ):
172174 __slots__ = (
@@ -194,7 +196,8 @@ def __init__(self, name, n, reader, doc):
194196 assert isinstance (n , int ) and (n >= 0 or
195197 n in (UP_TO_NEWLINE ,
196198 TAKEN_FROM_ARGUMENT1 ,
197- TAKEN_FROM_ARGUMENT4 ))
199+ TAKEN_FROM_ARGUMENT4 ,
200+ TAKEN_FROM_ARGUMENT4U ))
198201 self .n = n
199202
200203 self .reader = reader
@@ -265,6 +268,27 @@ def read_int4(f):
265268 doc = "Four-byte signed integer, little-endian, 2's complement." )
266269
267270
271+ def read_uint4 (f ):
272+ r"""
273+ >>> import io
274+ >>> read_uint4(io.BytesIO(b'\xff\x00\x00\x00'))
275+ 255
276+ >>> read_uint4(io.BytesIO(b'\x00\x00\x00\x80')) == 2**31
277+ True
278+ """
279+
280+ data = f .read (4 )
281+ if len (data ) == 4 :
282+ return _unpack ("<I" , data )[0 ]
283+ raise ValueError ("not enough data in stream to read uint4" )
284+
285+ uint4 = ArgumentDescriptor (
286+ name = 'uint4' ,
287+ n = 4 ,
288+ reader = read_uint4 ,
289+ doc = "Four-byte unsigned integer, little-endian." )
290+
291+
268292def read_stringnl (f , decode = True , stripquotes = True ):
269293 r"""
270294 >>> import io
@@ -421,6 +445,67 @@ def read_string1(f):
421445 """ )
422446
423447
448+ def read_bytes1 (f ):
449+ r"""
450+ >>> import io
451+ >>> read_bytes1(io.BytesIO(b"\x00"))
452+ b''
453+ >>> read_bytes1(io.BytesIO(b"\x03abcdef"))
454+ b'abc'
455+ """
456+
457+ n = read_uint1 (f )
458+ assert n >= 0
459+ data = f .read (n )
460+ if len (data ) == n :
461+ return data
462+ raise ValueError ("expected %d bytes in a bytes1, but only %d remain" %
463+ (n , len (data )))
464+
465+ bytes1 = ArgumentDescriptor (
466+ name = "bytes1" ,
467+ n = TAKEN_FROM_ARGUMENT1 ,
468+ reader = read_bytes1 ,
469+ doc = """A counted bytes string.
470+
471+ The first argument is a 1-byte unsigned int giving the number
472+ of bytes, and the second argument is that many bytes.
473+ """ )
474+
475+
476+ def read_bytes4 (f ):
477+ r"""
478+ >>> import io
479+ >>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x00abc"))
480+ b''
481+ >>> read_bytes4(io.BytesIO(b"\x03\x00\x00\x00abcdef"))
482+ b'abc'
483+ >>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x03abcdef"))
484+ Traceback (most recent call last):
485+ ...
486+ ValueError: expected 50331648 bytes in a bytes4, but only 6 remain
487+ """
488+
489+ n = read_uint4 (f )
490+ if n > sys .maxsize :
491+ raise ValueError ("bytes4 byte count > sys.maxsize: %d" % n )
492+ data = f .read (n )
493+ if len (data ) == n :
494+ return data
495+ raise ValueError ("expected %d bytes in a bytes4, but only %d remain" %
496+ (n , len (data )))
497+
498+ bytes4 = ArgumentDescriptor (
499+ name = "bytes4" ,
500+ n = TAKEN_FROM_ARGUMENT4U ,
501+ reader = read_bytes4 ,
502+ doc = """A counted bytes string.
503+
504+ The first argument is a 4-byte little-endian unsigned int giving
505+ the number of bytes, and the second argument is that many bytes.
506+ """ )
507+
508+
424509def read_unicodestringnl (f ):
425510 r"""
426511 >>> import io
@@ -464,9 +549,9 @@ def read_unicodestring4(f):
464549 ValueError: expected 7 bytes in a unicodestring4, but only 6 remain
465550 """
466551
467- n = read_int4 (f )
468- if n < 0 :
469- raise ValueError ("unicodestring4 byte count < 0 : %d" % n )
552+ n = read_uint4 (f )
553+ if n > sys . maxsize :
554+ raise ValueError ("unicodestring4 byte count > sys.maxsize : %d" % n )
470555 data = f .read (n )
471556 if len (data ) == n :
472557 return str (data , 'utf-8' , 'surrogatepass' )
@@ -475,7 +560,7 @@ def read_unicodestring4(f):
475560
476561unicodestring4 = ArgumentDescriptor (
477562 name = "unicodestring4" ,
478- n = TAKEN_FROM_ARGUMENT4 ,
563+ n = TAKEN_FROM_ARGUMENT4U ,
479564 reader = read_unicodestring4 ,
480565 doc = """A counted Unicode string.
481566
@@ -872,7 +957,7 @@ def __init__(self, name, code, arg,
872957 assert isinstance (x , StackObject )
873958 self .stack_after = stack_after
874959
875- assert isinstance (proto , int ) and 0 <= proto <= 3
960+ assert isinstance (proto , int ) and 0 <= proto <= pickle . HIGHEST_PROTOCOL
876961 self .proto = proto
877962
878963 assert isinstance (doc , str )
@@ -1038,28 +1123,28 @@ def __init__(self, name, code, arg,
10381123
10391124 I (name = 'BINBYTES' ,
10401125 code = 'B' ,
1041- arg = string4 ,
1126+ arg = bytes4 ,
10421127 stack_before = [],
10431128 stack_after = [pybytes ],
10441129 proto = 3 ,
10451130 doc = """Push a Python bytes object.
10461131
1047- There are two arguments: the first is a 4-byte little-endian signed int
1048- giving the number of bytes in the string , and the second is that many
1049- bytes, which are taken literally as the bytes content.
1132+ There are two arguments: the first is a 4-byte little-endian unsigned int
1133+ giving the number of bytes, and the second is that many bytes, which are
1134+ taken literally as the bytes content.
10501135 """ ),
10511136
10521137 I (name = 'SHORT_BINBYTES' ,
10531138 code = 'C' ,
1054- arg = string1 ,
1139+ arg = bytes1 ,
10551140 stack_before = [],
10561141 stack_after = [pybytes ],
10571142 proto = 3 ,
1058- doc = """Push a Python string object.
1143+ doc = """Push a Python bytes object.
10591144
10601145 There are two arguments: the first is a 1-byte unsigned int giving
1061- the number of bytes in the string , and the second is that many bytes,
1062- which are taken literally as the string content.
1146+ the number of bytes, and the second is that many bytes, which are taken
1147+ literally as the string content.
10631148 """ ),
10641149
10651150 # Ways to spell None.
@@ -1118,7 +1203,7 @@ def __init__(self, name, code, arg,
11181203 proto = 1 ,
11191204 doc = """Push a Python Unicode string object.
11201205
1121- There are two arguments: the first is a 4-byte little-endian signed int
1206+ There are two arguments: the first is a 4-byte little-endian unsigned int
11221207 giving the number of bytes in the string. The second is that many
11231208 bytes, and is the UTF-8 encoding of the Unicode string.
11241209 """ ),
@@ -1422,13 +1507,13 @@ def __init__(self, name, code, arg,
14221507
14231508 I (name = 'LONG_BINGET' ,
14241509 code = 'j' ,
1425- arg = int4 ,
1510+ arg = uint4 ,
14261511 stack_before = [],
14271512 stack_after = [anyobject ],
14281513 proto = 1 ,
14291514 doc = """Read an object from the memo and push it on the stack.
14301515
1431- The index of the memo object to push is given by the 4-byte signed
1516+ The index of the memo object to push is given by the 4-byte unsigned
14321517 little-endian integer following.
14331518 """ ),
14341519
@@ -1459,14 +1544,14 @@ def __init__(self, name, code, arg,
14591544
14601545 I (name = 'LONG_BINPUT' ,
14611546 code = 'r' ,
1462- arg = int4 ,
1547+ arg = uint4 ,
14631548 stack_before = [],
14641549 stack_after = [],
14651550 proto = 1 ,
14661551 doc = """Store the stack top into the memo. The stack is not popped.
14671552
14681553 The index of the memo location to write into is given by the 4-byte
1469- signed little-endian integer following.
1554+ unsigned little-endian integer following.
14701555 """ ),
14711556
14721557 # Access the extension registry (predefined objects). Akin to the GET
0 commit comments