diff --git a/Lib/pickle.py b/Lib/pickle.py index f027e04320..6e3c61fd0b 100644 --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -98,12 +98,6 @@ class _Stop(Exception): def __init__(self, value): self.value = value -# Jython has PyStringMap; it's a dict subclass with string keys -try: - from org.python.core import PyStringMap -except ImportError: - PyStringMap = None - # Pickle opcodes. See pickletools.py for extensive docs. The listing # here is in kind-of alphabetical order of 1-character pickle code. # pickletools groups them by purpose. @@ -861,13 +855,13 @@ def save_str(self, obj): else: self.write(BINUNICODE + pack("= 1 only @@ -1489,7 +1481,7 @@ def _instantiate(self, klass, args): value = klass(*args) except TypeError as err: raise TypeError("in constructor for %s: %s" % - (klass.__name__, str(err)), sys.exc_info()[2]) + (klass.__name__, str(err)), err.__traceback__) else: value = klass.__new__(klass) self.append(value) @@ -1799,7 +1791,7 @@ def _test(): parser = argparse.ArgumentParser( description='display contents of the pickle files') parser.add_argument( - 'pickle_file', type=argparse.FileType('br'), + 'pickle_file', nargs='*', help='the pickle file') parser.add_argument( '-t', '--test', action='store_true', @@ -1815,6 +1807,10 @@ def _test(): parser.print_help() else: import pprint - for f in args.pickle_file: - obj = load(f) + for fn in args.pickle_file: + if fn == '-': + obj = load(sys.stdin.buffer) + else: + with open(fn, 'rb') as f: + obj = load(f) pprint.pprint(obj) diff --git a/Lib/pickletools.py b/Lib/pickletools.py index 95706e746c..51ee4a7a26 100644 --- a/Lib/pickletools.py +++ b/Lib/pickletools.py @@ -1253,7 +1253,7 @@ def __init__(self, name, code, arg, stack_before=[], stack_after=[pyint], proto=2, - doc="""Long integer using found-byte length. + doc="""Long integer using four-byte length. A more efficient encoding of a Python long; the long4 encoding says it all."""), @@ -2848,10 +2848,10 @@ def _test(): parser = argparse.ArgumentParser( description='disassemble one or more pickle files') parser.add_argument( - 'pickle_file', type=argparse.FileType('br'), + 'pickle_file', nargs='*', help='the pickle file') parser.add_argument( - '-o', '--output', default=sys.stdout, type=argparse.FileType('w'), + '-o', '--output', help='the file where the output should be written') parser.add_argument( '-m', '--memo', action='store_true', @@ -2876,15 +2876,26 @@ def _test(): if args.test: _test() else: - annotate = 30 if args.annotate else 0 if not args.pickle_file: parser.print_help() - elif len(args.pickle_file) == 1: - dis(args.pickle_file[0], args.output, None, - args.indentlevel, annotate) else: + annotate = 30 if args.annotate else 0 memo = {} if args.memo else None - for f in args.pickle_file: - preamble = args.preamble.format(name=f.name) - args.output.write(preamble + '\n') - dis(f, args.output, memo, args.indentlevel, annotate) + if args.output is None: + output = sys.stdout + else: + output = open(args.output, 'w') + try: + for arg in args.pickle_file: + if len(args.pickle_file) > 1: + name = '' if arg == '-' else arg + preamble = args.preamble.format(name=name) + output.write(preamble + '\n') + if arg == '-': + dis(sys.stdin.buffer, output, memo, args.indentlevel, annotate) + else: + with open(arg, 'rb') as f: + dis(f, output, memo, args.indentlevel, annotate) + finally: + if output is not sys.stdout: + output.close() diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index 8a4de7a4fd..177e2ed2ca 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -1,3 +1,4 @@ +import builtins import collections import copyreg import dbm @@ -11,6 +12,7 @@ import struct import sys import threading +import types import unittest import weakref from textwrap import dedent @@ -1380,6 +1382,7 @@ def test_truncated_data(self): self.check_unpickling_error(self.truncated_errors, p) @threading_helper.reap_threads + @threading_helper.requires_working_threading() def test_unpickle_module_race(self): # https://bugs.python.org/issue34572 locker_module = dedent(""" @@ -1822,6 +1825,14 @@ def test_unicode_high_plane(self): t2 = self.loads(p) self.assert_is_copy(t, t2) + def test_unicode_memoization(self): + # Repeated str is re-used (even when escapes added). + for proto in protocols: + for s in '', 'xyz', 'xyz\n', 'x\\yz', 'x\xa1yz\r': + p = self.dumps((s, s), proto) + s1, s2 = self.loads(p) + self.assertIs(s1, s2) + def test_bytes(self): for proto in protocols: for s in b'', b'xyz', b'xyz'*100: @@ -1853,6 +1864,14 @@ def test_bytearray(self): self.assertNotIn(b'bytearray', p) self.assertTrue(opcode_in_pickle(pickle.BYTEARRAY8, p)) + def test_bytearray_memoization_bug(self): + for proto in protocols: + for s in b'', b'xyz', b'xyz'*100: + b = bytearray(s) + p = self.dumps((b, b), proto) + b1, b2 = self.loads(p) + self.assertIs(b1, b2) + def test_ints(self): for proto in protocols: n = sys.maxsize @@ -1971,6 +1990,35 @@ def test_singleton_types(self): u = self.loads(s) self.assertIs(type(singleton), u) + def test_builtin_types(self): + for t in builtins.__dict__.values(): + if isinstance(t, type) and not issubclass(t, BaseException): + for proto in protocols: + s = self.dumps(t, proto) + self.assertIs(self.loads(s), t) + + def test_builtin_exceptions(self): + for t in builtins.__dict__.values(): + if isinstance(t, type) and issubclass(t, BaseException): + for proto in protocols: + s = self.dumps(t, proto) + u = self.loads(s) + if proto <= 2 and issubclass(t, OSError) and t is not BlockingIOError: + self.assertIs(u, OSError) + elif proto <= 2 and issubclass(t, ImportError): + self.assertIs(u, ImportError) + else: + self.assertIs(u, t) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_builtin_functions(self): + for t in builtins.__dict__.values(): + if isinstance(t, types.BuiltinFunctionType): + for proto in protocols: + s = self.dumps(t, proto) + self.assertIs(self.loads(s), t) + # Tests for protocol 2 def test_proto(self): @@ -2370,13 +2418,17 @@ def test_reduce_calls_base(self): y = self.loads(s) self.assertEqual(y._reduce_called, 1) + # TODO: RUSTPYTHON + @unittest.expectedFailure @no_tracing def test_bad_getattr(self): # Issue #3514: crash when there is an infinite loop in __getattr__ x = BadGetattr() - for proto in protocols: + for proto in range(2): with support.infinite_recursion(): self.assertRaises(RuntimeError, self.dumps, x, proto) + for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): + s = self.dumps(x, proto) def test_reduce_bad_iterator(self): # Issue4176: crash when 4th and 5th items of __reduce__() @@ -2536,6 +2588,7 @@ def check_frame_opcodes(self, pickled): self.assertLess(pos - frameless_start, self.FRAME_SIZE_MIN) @support.skip_if_pgo_task + @support.requires_resource('cpu') def test_framing_many_objects(self): obj = list(range(10**5)) for proto in range(4, pickle.HIGHEST_PROTOCOL + 1): @@ -3024,6 +3077,67 @@ def check_array(arr): # 2-D, non-contiguous check_array(arr[::2]) + def test_evil_class_mutating_dict(self): + # https://github.com/python/cpython/issues/92930 + from random import getrandbits + + global Bad + class Bad: + def __eq__(self, other): + return ENABLED + def __hash__(self): + return 42 + def __reduce__(self): + if getrandbits(6) == 0: + collection.clear() + return (Bad, ()) + + for proto in protocols: + for _ in range(20): + ENABLED = False + collection = {Bad(): Bad() for _ in range(20)} + for bad in collection: + bad.bad = bad + bad.collection = collection + ENABLED = True + try: + data = self.dumps(collection, proto) + self.loads(data) + except RuntimeError as e: + expected = "changed size during iteration" + self.assertIn(expected, str(e)) + + def test_evil_pickler_mutating_collection(self): + # https://github.com/python/cpython/issues/92930 + if not hasattr(self, "pickler"): + raise self.skipTest(f"{type(self)} has no associated pickler type") + + global Clearer + class Clearer: + pass + + def check(collection): + class EvilPickler(self.pickler): + def persistent_id(self, obj): + if isinstance(obj, Clearer): + collection.clear() + return None + pickler = EvilPickler(io.BytesIO(), proto) + try: + pickler.dump(collection) + except RuntimeError as e: + expected = "changed size during iteration" + self.assertIn(expected, str(e)) + + for proto in protocols: + check([Clearer()]) + check([Clearer(), Clearer()]) + check({Clearer()}) + check({Clearer(), Clearer()}) + check({Clearer(): 1}) + check({Clearer(): 1, Clearer(): 2}) + check({1: Clearer(), 2: Clearer()}) + class BigmemPickleTests: @@ -3363,6 +3477,84 @@ def __init__(self): pass self.assertRaises(pickle.PicklingError, BadPickler().dump, 0) self.assertRaises(pickle.UnpicklingError, BadUnpickler().load) + def test_unpickler_bad_file(self): + # bpo-38384: Crash in _pickle if the read attribute raises an error. + def raises_oserror(self, *args, **kwargs): + raise OSError + @property + def bad_property(self): + 1/0 + + # File without read and readline + class F: + pass + self.assertRaises((AttributeError, TypeError), self.Unpickler, F()) + + # File without read + class F: + readline = raises_oserror + self.assertRaises((AttributeError, TypeError), self.Unpickler, F()) + + # File without readline + class F: + read = raises_oserror + self.assertRaises((AttributeError, TypeError), self.Unpickler, F()) + + # File with bad read + class F: + read = bad_property + readline = raises_oserror + self.assertRaises(ZeroDivisionError, self.Unpickler, F()) + + # File with bad readline + class F: + readline = bad_property + read = raises_oserror + self.assertRaises(ZeroDivisionError, self.Unpickler, F()) + + # File with bad readline, no read + class F: + readline = bad_property + self.assertRaises(ZeroDivisionError, self.Unpickler, F()) + + # File with bad read, no readline + class F: + read = bad_property + self.assertRaises((AttributeError, ZeroDivisionError), self.Unpickler, F()) + + # File with bad peek + class F: + peek = bad_property + read = raises_oserror + readline = raises_oserror + try: + self.Unpickler(F()) + except ZeroDivisionError: + pass + + # File with bad readinto + class F: + readinto = bad_property + read = raises_oserror + readline = raises_oserror + try: + self.Unpickler(F()) + except ZeroDivisionError: + pass + + def test_pickler_bad_file(self): + # File without write + class F: + pass + self.assertRaises(TypeError, self.Pickler, F()) + + # File with bad write + class F: + @property + def write(self): + 1/0 + self.assertRaises(ZeroDivisionError, self.Pickler, F()) + def check_dumps_loads_oob_buffers(self, dumps, loads): # No need to do the full gamut of tests here, just enough to # check that dumps() and loads() redirect their arguments diff --git a/Lib/test/test_pickle.py b/Lib/test/test_pickle.py index 769677f242..d61570ce10 100644 --- a/Lib/test/test_pickle.py +++ b/Lib/test/test_pickle.py @@ -413,6 +413,34 @@ class CustomCPicklerClass(_pickle.Pickler, AbstractCustomPicklerClass): pass pickler_class = CustomCPicklerClass + @support.cpython_only + class HeapTypesTests(unittest.TestCase): + def setUp(self): + pickler = _pickle.Pickler(io.BytesIO()) + unpickler = _pickle.Unpickler(io.BytesIO()) + + self._types = ( + _pickle.Pickler, + _pickle.Unpickler, + type(pickler.memo), + type(unpickler.memo), + + # We cannot test the _pickle.Pdata; + # there's no way to get to it. + ) + + def test_have_gc(self): + import gc + for tp in self._types: + with self.subTest(tp=tp): + self.assertTrue(gc.is_tracked(tp)) + + def test_immutable(self): + for tp in self._types: + with self.subTest(tp=tp): + with self.assertRaisesRegex(TypeError, "immutable"): + tp.foo = "bar" + @support.cpython_only class SizeofTests(unittest.TestCase): check_sizeof = support.check_sizeof @@ -633,8 +661,8 @@ def test_exceptions(self): StopAsyncIteration, RecursionError, EncodingWarning, - ExceptionGroup, - BaseExceptionGroup): + BaseExceptionGroup, + ExceptionGroup): continue if exc is not OSError and issubclass(exc, OSError): self.assertEqual(reverse_mapping('builtins', name), @@ -653,6 +681,8 @@ def test_exceptions(self): def test_multiprocessing_exceptions(self): module = import_helper.import_module('multiprocessing.context') for name, exc in get_exceptions(module): + if issubclass(exc, Warning): + continue with self.subTest(name): self.assertEqual(reverse_mapping('multiprocessing.context', name), ('multiprocessing', name)) diff --git a/Lib/test/test_pickletools.py b/Lib/test/test_pickletools.py index 7dde4b17ed..f3b9dccb0d 100644 --- a/Lib/test/test_pickletools.py +++ b/Lib/test/test_pickletools.py @@ -65,8 +65,6 @@ def loads(self, buf, **kwds): # Test relies on writing by chunks into a file object. test_framed_write_sizes_with_delayed_writer = None - # TODO: RUSTPYTHON - @unittest.expectedFailure def test_optimize_long_binget(self): data = [str(i) for i in range(257)] data.append(data[-1]) diff --git a/Lib/test/test_socket.py b/Lib/test/test_socket.py index 17e9dae8c0..e3d14d0d9a 100644 --- a/Lib/test/test_socket.py +++ b/Lib/test/test_socket.py @@ -2881,6 +2881,7 @@ def _testSendmsgAncillaryGenerator(self): self.assertEqual(self.sendmsgToServer([MSG], (o for o in [])), len(MSG)) + @unittest.skipIf(sys.platform == "darwin", "flaky on macOS") def testSendmsgArray(self): # Send data from an array instead of the usual bytes object. self.assertEqual(self.serv_sock.recv(len(MSG)), MSG) @@ -2889,6 +2890,7 @@ def _testSendmsgArray(self): self.assertEqual(self.sendmsgToServer([array.array("B", MSG)]), len(MSG)) + @unittest.skipIf(sys.platform == "darwin", "flaky on macOS") def testSendmsgGather(self): # Send message data from more than one buffer (gather write). self.assertEqual(self.serv_sock.recv(len(MSG)), MSG) @@ -2951,6 +2953,7 @@ def _testSendmsgBadMultiCmsg(self): [MSG], [(0, 0, b""), object()]) self.sendToServer(b"done") + @unittest.skipIf(sys.platform == "darwin", "flaky on macOS") def testSendmsgExcessCmsgReject(self): # Check that sendmsg() rejects excess ancillary data items # when the number that can be sent is limited. diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py index 14975ec080..aaf4b62b45 100644 --- a/Lib/test/test_types.py +++ b/Lib/test/test_types.py @@ -392,7 +392,6 @@ def test(i, format_spec, result): test(123456, "1=20", '11111111111111123456') test(123456, "*=20", '**************123456') - # TODO: RUSTPYTHON @unittest.expectedFailure @run_with_locale('LC_NUMERIC', 'en_US.UTF8') def test_float__format__locale(self):