From 89fe85aef4cf6d75a6ddf64e026ce694770a50cb Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 10 Sep 2023 20:06:09 +0300 Subject: [PATCH] gh-50644: Forbid pickling of codecs streams (GH-109180) Attempts to pickle or create a shallow or deep copy of codecs streams now raise a TypeError. Previously, copying failed with a RecursionError, while pickling produced wrong results that eventually caused unpickling to fail with a RecursionError. (cherry picked from commit d6892c2b9263b39ea1c7905667942914b6a24b2c) Co-authored-by: Serhiy Storchaka --- Lib/codecs.py | 12 +++ Lib/test/test_codecs.py | 79 +++++++++++++++++++ ...3-09-09-15-08-37.gh-issue-50644.JUAZOh.rst | 4 + 3 files changed, 95 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2023-09-09-15-08-37.gh-issue-50644.JUAZOh.rst diff --git a/Lib/codecs.py b/Lib/codecs.py index 3b173b612101e7..c6165fcb142a7f 100644 --- a/Lib/codecs.py +++ b/Lib/codecs.py @@ -414,6 +414,9 @@ def __enter__(self): def __exit__(self, type, value, tb): self.stream.close() + def __reduce_ex__(self, proto): + raise TypeError("can't serialize %s" % self.__class__.__name__) + ### class StreamReader(Codec): @@ -663,6 +666,9 @@ def __enter__(self): def __exit__(self, type, value, tb): self.stream.close() + def __reduce_ex__(self, proto): + raise TypeError("can't serialize %s" % self.__class__.__name__) + ### class StreamReaderWriter: @@ -750,6 +756,9 @@ def __enter__(self): def __exit__(self, type, value, tb): self.stream.close() + def __reduce_ex__(self, proto): + raise TypeError("can't serialize %s" % self.__class__.__name__) + ### class StreamRecoder: @@ -866,6 +875,9 @@ def __enter__(self): def __exit__(self, type, value, tb): self.stream.close() + def __reduce_ex__(self, proto): + raise TypeError("can't serialize %s" % self.__class__.__name__) + ### Shortcuts def open(filename, mode='r', encoding=None, errors='strict', buffering=-1): diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index e170a30263f8ba..684e6cf7515481 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1,7 +1,9 @@ import codecs import contextlib +import copy import io import locale +import pickle import sys import unittest import encodings @@ -1772,6 +1774,61 @@ def test_readlines(self): f = self.reader(self.stream) self.assertEqual(f.readlines(), ['\ud55c\n', '\uae00']) + def test_copy(self): + f = self.reader(Queue(b'\xed\x95\x9c\n\xea\xb8\x80')) + with self.assertRaisesRegex(TypeError, 'StreamReader'): + copy.copy(f) + with self.assertRaisesRegex(TypeError, 'StreamReader'): + copy.deepcopy(f) + + def test_pickle(self): + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + with self.subTest(protocol=proto): + f = self.reader(Queue(b'\xed\x95\x9c\n\xea\xb8\x80')) + with self.assertRaisesRegex(TypeError, 'StreamReader'): + pickle.dumps(f, proto) + + +class StreamWriterTest(unittest.TestCase): + + def setUp(self): + self.writer = codecs.getwriter('utf-8') + + def test_copy(self): + f = self.writer(Queue(b'')) + with self.assertRaisesRegex(TypeError, 'StreamWriter'): + copy.copy(f) + with self.assertRaisesRegex(TypeError, 'StreamWriter'): + copy.deepcopy(f) + + def test_pickle(self): + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + with self.subTest(protocol=proto): + f = self.writer(Queue(b'')) + with self.assertRaisesRegex(TypeError, 'StreamWriter'): + pickle.dumps(f, proto) + + +class StreamReaderWriterTest(unittest.TestCase): + + def setUp(self): + self.reader = codecs.getreader('latin1') + self.writer = codecs.getwriter('utf-8') + + def test_copy(self): + f = codecs.StreamReaderWriter(Queue(b''), self.reader, self.writer) + with self.assertRaisesRegex(TypeError, 'StreamReaderWriter'): + copy.copy(f) + with self.assertRaisesRegex(TypeError, 'StreamReaderWriter'): + copy.deepcopy(f) + + def test_pickle(self): + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + with self.subTest(protocol=proto): + f = codecs.StreamReaderWriter(Queue(b''), self.reader, self.writer) + with self.assertRaisesRegex(TypeError, 'StreamReaderWriter'): + pickle.dumps(f, proto) + class EncodedFileTest(unittest.TestCase): @@ -3369,6 +3426,28 @@ def test_seeking_write(self): self.assertEqual(sr.readline(), b'abc\n') self.assertEqual(sr.readline(), b'789\n') + def test_copy(self): + bio = io.BytesIO() + codec = codecs.lookup('ascii') + sr = codecs.StreamRecoder(bio, codec.encode, codec.decode, + encodings.ascii.StreamReader, encodings.ascii.StreamWriter) + + with self.assertRaisesRegex(TypeError, 'StreamRecoder'): + copy.copy(sr) + with self.assertRaisesRegex(TypeError, 'StreamRecoder'): + copy.deepcopy(sr) + + def test_pickle(self): + q = Queue(b'') + codec = codecs.lookup('ascii') + sr = codecs.StreamRecoder(q, codec.encode, codec.decode, + encodings.ascii.StreamReader, encodings.ascii.StreamWriter) + + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + with self.subTest(protocol=proto): + with self.assertRaisesRegex(TypeError, 'StreamRecoder'): + pickle.dumps(sr, proto) + @unittest.skipIf(_testinternalcapi is None, 'need _testinternalcapi module') class LocaleCodecTest(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2023-09-09-15-08-37.gh-issue-50644.JUAZOh.rst b/Misc/NEWS.d/next/Library/2023-09-09-15-08-37.gh-issue-50644.JUAZOh.rst new file mode 100644 index 00000000000000..a7a442e35289d3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-09-09-15-08-37.gh-issue-50644.JUAZOh.rst @@ -0,0 +1,4 @@ +Attempts to pickle or create a shallow or deep copy of :mod:`codecs` streams +now raise a TypeError. Previously, copying failed with a RecursionError, +while pickling produced wrong results that eventually caused unpickling +to fail with a RecursionError.