Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Update pickle from CPython 3.12.3 #5260

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 15 additions & 19 deletions Lib/pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,12 +98,6 @@ class _Stop(Exception):
def __init__(self, value):
self.value = value

# Jython has PyStringMap; it's a dict subclass with string keys
try:
from org.python.core import PyStringMap
except ImportError:
PyStringMap = None

# Pickle opcodes. See pickletools.py for extensive docs. The listing
# here is in kind-of alphabetical order of 1-character pickle code.
# pickletools groups them by purpose.
Expand Down Expand Up @@ -861,13 +855,13 @@ def save_str(self, obj):
else:
self.write(BINUNICODE + pack("<I", n) + encoded)
else:
obj = obj.replace("\\", "\\u005c")
obj = obj.replace("\0", "\\u0000")
obj = obj.replace("\n", "\\u000a")
obj = obj.replace("\r", "\\u000d")
obj = obj.replace("\x1a", "\\u001a") # EOF on DOS
self.write(UNICODE + obj.encode('raw-unicode-escape') +
b'\n')
# Escape what raw-unicode-escape doesn't, but memoize the original.
tmp = obj.replace("\\", "\\u005c")
tmp = tmp.replace("\0", "\\u0000")
tmp = tmp.replace("\n", "\\u000a")
tmp = tmp.replace("\r", "\\u000d")
tmp = tmp.replace("\x1a", "\\u001a") # EOF on DOS
self.write(UNICODE + tmp.encode('raw-unicode-escape') + b'\n')
self.memoize(obj)
dispatch[str] = save_str

Expand Down Expand Up @@ -972,8 +966,6 @@ def save_dict(self, obj):
self._batch_setitems(obj.items())

dispatch[dict] = save_dict
if PyStringMap is not None:
dispatch[PyStringMap] = save_dict

def _batch_setitems(self, items):
# Helper to batch up SETITEMS sequences; proto >= 1 only
Expand Down Expand Up @@ -1489,7 +1481,7 @@ def _instantiate(self, klass, args):
value = klass(*args)
except TypeError as err:
raise TypeError("in constructor for %s: %s" %
(klass.__name__, str(err)), sys.exc_info()[2])
(klass.__name__, str(err)), err.__traceback__)
else:
value = klass.__new__(klass)
self.append(value)
Expand Down Expand Up @@ -1799,7 +1791,7 @@ def _test():
parser = argparse.ArgumentParser(
description='display contents of the pickle files')
parser.add_argument(
'pickle_file', type=argparse.FileType('br'),
'pickle_file',
nargs='*', help='the pickle file')
parser.add_argument(
'-t', '--test', action='store_true',
Expand All @@ -1815,6 +1807,10 @@ def _test():
parser.print_help()
else:
import pprint
for f in args.pickle_file:
obj = load(f)
for fn in args.pickle_file:
if fn == '-':
obj = load(sys.stdin.buffer)
else:
with open(fn, 'rb') as f:
obj = load(f)
pprint.pprint(obj)
33 changes: 22 additions & 11 deletions Lib/pickletools.py
Original file line number Diff line number Diff line change
Expand Up @@ -1253,7 +1253,7 @@ def __init__(self, name, code, arg,
stack_before=[],
stack_after=[pyint],
proto=2,
doc="""Long integer using found-byte length.
doc="""Long integer using four-byte length.

A more efficient encoding of a Python long; the long4 encoding
says it all."""),
Expand Down Expand Up @@ -2848,10 +2848,10 @@ def _test():
parser = argparse.ArgumentParser(
description='disassemble one or more pickle files')
parser.add_argument(
'pickle_file', type=argparse.FileType('br'),
'pickle_file',
nargs='*', help='the pickle file')
parser.add_argument(
'-o', '--output', default=sys.stdout, type=argparse.FileType('w'),
'-o', '--output',
help='the file where the output should be written')
parser.add_argument(
'-m', '--memo', action='store_true',
Expand All @@ -2876,15 +2876,26 @@ def _test():
if args.test:
_test()
else:
annotate = 30 if args.annotate else 0
if not args.pickle_file:
parser.print_help()
elif len(args.pickle_file) == 1:
dis(args.pickle_file[0], args.output, None,
args.indentlevel, annotate)
else:
annotate = 30 if args.annotate else 0
memo = {} if args.memo else None
for f in args.pickle_file:
preamble = args.preamble.format(name=f.name)
args.output.write(preamble + '\n')
dis(f, args.output, memo, args.indentlevel, annotate)
if args.output is None:
output = sys.stdout
else:
output = open(args.output, 'w')
try:
for arg in args.pickle_file:
if len(args.pickle_file) > 1:
name = '<stdin>' if arg == '-' else arg
preamble = args.preamble.format(name=name)
output.write(preamble + '\n')
if arg == '-':
dis(sys.stdin.buffer, output, memo, args.indentlevel, annotate)
else:
with open(arg, 'rb') as f:
dis(f, output, memo, args.indentlevel, annotate)
finally:
if output is not sys.stdout:
output.close()
194 changes: 193 additions & 1 deletion Lib/test/pickletester.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import builtins
import collections
import copyreg
import dbm
Expand All @@ -11,6 +12,7 @@
import struct
import sys
import threading
import types
import unittest
import weakref
from textwrap import dedent
Expand Down Expand Up @@ -1380,6 +1382,7 @@ def test_truncated_data(self):
self.check_unpickling_error(self.truncated_errors, p)

@threading_helper.reap_threads
@threading_helper.requires_working_threading()
def test_unpickle_module_race(self):
# https://bugs.python.org/issue34572
locker_module = dedent("""
Expand Down Expand Up @@ -1822,6 +1825,14 @@ def test_unicode_high_plane(self):
t2 = self.loads(p)
self.assert_is_copy(t, t2)

def test_unicode_memoization(self):
# Repeated str is re-used (even when escapes added).
for proto in protocols:
for s in '', 'xyz', 'xyz\n', 'x\\yz', 'x\xa1yz\r':
p = self.dumps((s, s), proto)
s1, s2 = self.loads(p)
self.assertIs(s1, s2)

def test_bytes(self):
for proto in protocols:
for s in b'', b'xyz', b'xyz'*100:
Expand Down Expand Up @@ -1853,6 +1864,14 @@ def test_bytearray(self):
self.assertNotIn(b'bytearray', p)
self.assertTrue(opcode_in_pickle(pickle.BYTEARRAY8, p))

def test_bytearray_memoization_bug(self):
for proto in protocols:
for s in b'', b'xyz', b'xyz'*100:
b = bytearray(s)
p = self.dumps((b, b), proto)
b1, b2 = self.loads(p)
self.assertIs(b1, b2)

def test_ints(self):
for proto in protocols:
n = sys.maxsize
Expand Down Expand Up @@ -1971,6 +1990,35 @@ def test_singleton_types(self):
u = self.loads(s)
self.assertIs(type(singleton), u)

def test_builtin_types(self):
for t in builtins.__dict__.values():
if isinstance(t, type) and not issubclass(t, BaseException):
for proto in protocols:
s = self.dumps(t, proto)
self.assertIs(self.loads(s), t)

def test_builtin_exceptions(self):
for t in builtins.__dict__.values():
if isinstance(t, type) and issubclass(t, BaseException):
for proto in protocols:
s = self.dumps(t, proto)
u = self.loads(s)
if proto <= 2 and issubclass(t, OSError) and t is not BlockingIOError:
self.assertIs(u, OSError)
elif proto <= 2 and issubclass(t, ImportError):
self.assertIs(u, ImportError)
else:
self.assertIs(u, t)

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_builtin_functions(self):
for t in builtins.__dict__.values():
if isinstance(t, types.BuiltinFunctionType):
for proto in protocols:
s = self.dumps(t, proto)
self.assertIs(self.loads(s), t)

# Tests for protocol 2

def test_proto(self):
Expand Down Expand Up @@ -2370,13 +2418,17 @@ def test_reduce_calls_base(self):
y = self.loads(s)
self.assertEqual(y._reduce_called, 1)

# TODO: RUSTPYTHON
@unittest.expectedFailure
@no_tracing
def test_bad_getattr(self):
# Issue #3514: crash when there is an infinite loop in __getattr__
x = BadGetattr()
for proto in protocols:
for proto in range(2):
with support.infinite_recursion():
self.assertRaises(RuntimeError, self.dumps, x, proto)
for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
s = self.dumps(x, proto)

def test_reduce_bad_iterator(self):
# Issue4176: crash when 4th and 5th items of __reduce__()
Expand Down Expand Up @@ -2536,6 +2588,7 @@ def check_frame_opcodes(self, pickled):
self.assertLess(pos - frameless_start, self.FRAME_SIZE_MIN)

@support.skip_if_pgo_task
@support.requires_resource('cpu')
def test_framing_many_objects(self):
obj = list(range(10**5))
for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
Expand Down Expand Up @@ -3024,6 +3077,67 @@ def check_array(arr):
# 2-D, non-contiguous
check_array(arr[::2])

def test_evil_class_mutating_dict(self):
# https://github.com/python/cpython/issues/92930
from random import getrandbits

global Bad
class Bad:
def __eq__(self, other):
return ENABLED
def __hash__(self):
return 42
def __reduce__(self):
if getrandbits(6) == 0:
collection.clear()
return (Bad, ())

for proto in protocols:
for _ in range(20):
ENABLED = False
collection = {Bad(): Bad() for _ in range(20)}
for bad in collection:
bad.bad = bad
bad.collection = collection
ENABLED = True
try:
data = self.dumps(collection, proto)
self.loads(data)
except RuntimeError as e:
expected = "changed size during iteration"
self.assertIn(expected, str(e))

def test_evil_pickler_mutating_collection(self):
# https://github.com/python/cpython/issues/92930
if not hasattr(self, "pickler"):
raise self.skipTest(f"{type(self)} has no associated pickler type")

global Clearer
class Clearer:
pass

def check(collection):
class EvilPickler(self.pickler):
def persistent_id(self, obj):
if isinstance(obj, Clearer):
collection.clear()
return None
pickler = EvilPickler(io.BytesIO(), proto)
try:
pickler.dump(collection)
except RuntimeError as e:
expected = "changed size during iteration"
self.assertIn(expected, str(e))

for proto in protocols:
check([Clearer()])
check([Clearer(), Clearer()])
check({Clearer()})
check({Clearer(), Clearer()})
check({Clearer(): 1})
check({Clearer(): 1, Clearer(): 2})
check({1: Clearer(), 2: Clearer()})


class BigmemPickleTests:

Expand Down Expand Up @@ -3363,6 +3477,84 @@ def __init__(self): pass
self.assertRaises(pickle.PicklingError, BadPickler().dump, 0)
self.assertRaises(pickle.UnpicklingError, BadUnpickler().load)

def test_unpickler_bad_file(self):
# bpo-38384: Crash in _pickle if the read attribute raises an error.
def raises_oserror(self, *args, **kwargs):
raise OSError
@property
def bad_property(self):
1/0

# File without read and readline
class F:
pass
self.assertRaises((AttributeError, TypeError), self.Unpickler, F())

# File without read
class F:
readline = raises_oserror
self.assertRaises((AttributeError, TypeError), self.Unpickler, F())

# File without readline
class F:
read = raises_oserror
self.assertRaises((AttributeError, TypeError), self.Unpickler, F())

# File with bad read
class F:
read = bad_property
readline = raises_oserror
self.assertRaises(ZeroDivisionError, self.Unpickler, F())

# File with bad readline
class F:
readline = bad_property
read = raises_oserror
self.assertRaises(ZeroDivisionError, self.Unpickler, F())

# File with bad readline, no read
class F:
readline = bad_property
self.assertRaises(ZeroDivisionError, self.Unpickler, F())

# File with bad read, no readline
class F:
read = bad_property
self.assertRaises((AttributeError, ZeroDivisionError), self.Unpickler, F())

# File with bad peek
class F:
peek = bad_property
read = raises_oserror
readline = raises_oserror
try:
self.Unpickler(F())
except ZeroDivisionError:
pass

# File with bad readinto
class F:
readinto = bad_property
read = raises_oserror
readline = raises_oserror
try:
self.Unpickler(F())
except ZeroDivisionError:
pass

def test_pickler_bad_file(self):
# File without write
class F:
pass
self.assertRaises(TypeError, self.Pickler, F())

# File with bad write
class F:
@property
def write(self):
1/0
self.assertRaises(ZeroDivisionError, self.Pickler, F())

def check_dumps_loads_oob_buffers(self, dumps, loads):
# No need to do the full gamut of tests here, just enough to
# check that dumps() and loads() redirect their arguments
Expand Down
Loading