Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 8db89ca

Browse files
committed
Issue #16550: Update the opcode descriptions of pickletools to use unsigned
integers where appropriate. Initial patch by Serhiy Storchaka.
1 parent 5c4874f commit 8db89ca

2 files changed

Lines changed: 109 additions & 21 deletions

File tree

Lib/pickletools.py

Lines changed: 106 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import codecs
1414
import pickle
1515
import re
16+
import sys
1617

1718
__all__ = ['dis', 'genops', 'optimize']
1819

@@ -165,8 +166,9 @@
165166

166167
# Represents the number of bytes consumed by a two-argument opcode where
167168
# the first argument gives the number of bytes in the second argument.
168-
TAKEN_FROM_ARGUMENT1 = -2 # num bytes is 1-byte unsigned int
169-
TAKEN_FROM_ARGUMENT4 = -3 # num bytes is 4-byte signed little-endian int
169+
TAKEN_FROM_ARGUMENT1 = -2 # num bytes is 1-byte unsigned int
170+
TAKEN_FROM_ARGUMENT4 = -3 # num bytes is 4-byte signed little-endian int
171+
TAKEN_FROM_ARGUMENT4U = -4 # num bytes is 4-byte unsigned little-endian int
170172

171173
class ArgumentDescriptor(object):
172174
__slots__ = (
@@ -194,7 +196,8 @@ def __init__(self, name, n, reader, doc):
194196
assert isinstance(n, int) and (n >= 0 or
195197
n in (UP_TO_NEWLINE,
196198
TAKEN_FROM_ARGUMENT1,
197-
TAKEN_FROM_ARGUMENT4))
199+
TAKEN_FROM_ARGUMENT4,
200+
TAKEN_FROM_ARGUMENT4U))
198201
self.n = n
199202

200203
self.reader = reader
@@ -265,6 +268,27 @@ def read_int4(f):
265268
doc="Four-byte signed integer, little-endian, 2's complement.")
266269

267270

271+
def read_uint4(f):
272+
r"""
273+
>>> import io
274+
>>> read_uint4(io.BytesIO(b'\xff\x00\x00\x00'))
275+
255
276+
>>> read_uint4(io.BytesIO(b'\x00\x00\x00\x80')) == 2**31
277+
True
278+
"""
279+
280+
data = f.read(4)
281+
if len(data) == 4:
282+
return _unpack("<I", data)[0]
283+
raise ValueError("not enough data in stream to read uint4")
284+
285+
uint4 = ArgumentDescriptor(
286+
name='uint4',
287+
n=4,
288+
reader=read_uint4,
289+
doc="Four-byte unsigned integer, little-endian.")
290+
291+
268292
def read_stringnl(f, decode=True, stripquotes=True):
269293
r"""
270294
>>> import io
@@ -421,6 +445,67 @@ def read_string1(f):
421445
""")
422446

423447

448+
def read_bytes1(f):
449+
r"""
450+
>>> import io
451+
>>> read_bytes1(io.BytesIO(b"\x00"))
452+
b''
453+
>>> read_bytes1(io.BytesIO(b"\x03abcdef"))
454+
b'abc'
455+
"""
456+
457+
n = read_uint1(f)
458+
assert n >= 0
459+
data = f.read(n)
460+
if len(data) == n:
461+
return data
462+
raise ValueError("expected %d bytes in a bytes1, but only %d remain" %
463+
(n, len(data)))
464+
465+
bytes1 = ArgumentDescriptor(
466+
name="bytes1",
467+
n=TAKEN_FROM_ARGUMENT1,
468+
reader=read_bytes1,
469+
doc="""A counted bytes string.
470+
471+
The first argument is a 1-byte unsigned int giving the number
472+
of bytes, and the second argument is that many bytes.
473+
""")
474+
475+
476+
def read_bytes4(f):
477+
r"""
478+
>>> import io
479+
>>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x00abc"))
480+
b''
481+
>>> read_bytes4(io.BytesIO(b"\x03\x00\x00\x00abcdef"))
482+
b'abc'
483+
>>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x03abcdef"))
484+
Traceback (most recent call last):
485+
...
486+
ValueError: expected 50331648 bytes in a bytes4, but only 6 remain
487+
"""
488+
489+
n = read_uint4(f)
490+
if n > sys.maxsize:
491+
raise ValueError("bytes4 byte count > sys.maxsize: %d" % n)
492+
data = f.read(n)
493+
if len(data) == n:
494+
return data
495+
raise ValueError("expected %d bytes in a bytes4, but only %d remain" %
496+
(n, len(data)))
497+
498+
bytes4 = ArgumentDescriptor(
499+
name="bytes4",
500+
n=TAKEN_FROM_ARGUMENT4U,
501+
reader=read_bytes4,
502+
doc="""A counted bytes string.
503+
504+
The first argument is a 4-byte little-endian unsigned int giving
505+
the number of bytes, and the second argument is that many bytes.
506+
""")
507+
508+
424509
def read_unicodestringnl(f):
425510
r"""
426511
>>> import io
@@ -464,9 +549,9 @@ def read_unicodestring4(f):
464549
ValueError: expected 7 bytes in a unicodestring4, but only 6 remain
465550
"""
466551

467-
n = read_int4(f)
468-
if n < 0:
469-
raise ValueError("unicodestring4 byte count < 0: %d" % n)
552+
n = read_uint4(f)
553+
if n > sys.maxsize:
554+
raise ValueError("unicodestring4 byte count > sys.maxsize: %d" % n)
470555
data = f.read(n)
471556
if len(data) == n:
472557
return str(data, 'utf-8', 'surrogatepass')
@@ -475,7 +560,7 @@ def read_unicodestring4(f):
475560

476561
unicodestring4 = ArgumentDescriptor(
477562
name="unicodestring4",
478-
n=TAKEN_FROM_ARGUMENT4,
563+
n=TAKEN_FROM_ARGUMENT4U,
479564
reader=read_unicodestring4,
480565
doc="""A counted Unicode string.
481566
@@ -872,7 +957,7 @@ def __init__(self, name, code, arg,
872957
assert isinstance(x, StackObject)
873958
self.stack_after = stack_after
874959

875-
assert isinstance(proto, int) and 0 <= proto <= 3
960+
assert isinstance(proto, int) and 0 <= proto <= pickle.HIGHEST_PROTOCOL
876961
self.proto = proto
877962

878963
assert isinstance(doc, str)
@@ -1038,28 +1123,28 @@ def __init__(self, name, code, arg,
10381123

10391124
I(name='BINBYTES',
10401125
code='B',
1041-
arg=string4,
1126+
arg=bytes4,
10421127
stack_before=[],
10431128
stack_after=[pybytes],
10441129
proto=3,
10451130
doc="""Push a Python bytes object.
10461131
1047-
There are two arguments: the first is a 4-byte little-endian signed int
1048-
giving the number of bytes in the string, and the second is that many
1049-
bytes, which are taken literally as the bytes content.
1132+
There are two arguments: the first is a 4-byte little-endian unsigned int
1133+
giving the number of bytes, and the second is that many bytes, which are
1134+
taken literally as the bytes content.
10501135
"""),
10511136

10521137
I(name='SHORT_BINBYTES',
10531138
code='C',
1054-
arg=string1,
1139+
arg=bytes1,
10551140
stack_before=[],
10561141
stack_after=[pybytes],
10571142
proto=3,
1058-
doc="""Push a Python string object.
1143+
doc="""Push a Python bytes object.
10591144
10601145
There are two arguments: the first is a 1-byte unsigned int giving
1061-
the number of bytes in the string, and the second is that many bytes,
1062-
which are taken literally as the string content.
1146+
the number of bytes, and the second is that many bytes, which are taken
1147+
literally as the string content.
10631148
"""),
10641149

10651150
# Ways to spell None.
@@ -1118,7 +1203,7 @@ def __init__(self, name, code, arg,
11181203
proto=1,
11191204
doc="""Push a Python Unicode string object.
11201205
1121-
There are two arguments: the first is a 4-byte little-endian signed int
1206+
There are two arguments: the first is a 4-byte little-endian unsigned int
11221207
giving the number of bytes in the string. The second is that many
11231208
bytes, and is the UTF-8 encoding of the Unicode string.
11241209
"""),
@@ -1422,13 +1507,13 @@ def __init__(self, name, code, arg,
14221507

14231508
I(name='LONG_BINGET',
14241509
code='j',
1425-
arg=int4,
1510+
arg=uint4,
14261511
stack_before=[],
14271512
stack_after=[anyobject],
14281513
proto=1,
14291514
doc="""Read an object from the memo and push it on the stack.
14301515
1431-
The index of the memo object to push is given by the 4-byte signed
1516+
The index of the memo object to push is given by the 4-byte unsigned
14321517
little-endian integer following.
14331518
"""),
14341519

@@ -1459,14 +1544,14 @@ def __init__(self, name, code, arg,
14591544

14601545
I(name='LONG_BINPUT',
14611546
code='r',
1462-
arg=int4,
1547+
arg=uint4,
14631548
stack_before=[],
14641549
stack_after=[],
14651550
proto=1,
14661551
doc="""Store the stack top into the memo. The stack is not popped.
14671552
14681553
The index of the memo location to write into is given by the 4-byte
1469-
signed little-endian integer following.
1554+
unsigned little-endian integer following.
14701555
"""),
14711556

14721557
# Access the extension registry (predefined objects). Akin to the GET

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ Library
5858
- Issue #17526: fix an IndexError raised while passing code without filename to
5959
inspect.findsource(). Initial patch by Tyler Doyle.
6060

61+
- Issue #16550: Update the opcode descriptions of pickletools to use unsigned
62+
integers where appropriate. Initial patch by Serhiy Storchaka.
63+
6164
IDLE
6265
----
6366

0 commit comments

Comments
 (0)