From 42bb0bda079a73036874e74758a122c17a9ce105 Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Fri, 3 Jan 2025 23:47:37 -0800 Subject: [PATCH 01/15] add is_s390x and skip_on_s390x to test support --- Lib/test/support/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index b49ffbd536..ed8cdd0653 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -532,6 +532,9 @@ def requires_legacy_unicode_capi(): has_fork_support = hasattr(os, "fork") and not is_emscripten and not is_wasi +is_s390x = hasattr(os, 'uname') and os.uname().machine == 's390x' +skip_on_s390x = unittest.skipIf(is_s390x, 'skipped on s390x') + def requires_fork(): return unittest.skipUnless(has_fork_support, "requires working os.fork()") From 471e2a0e2ee55d949ad73cc58496e3962f9289fc Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Sat, 4 Jan 2025 00:34:05 -0800 Subject: [PATCH 02/15] update zlib tests to 3.12 --- Lib/test/test_zlib.py | 108 ++++++++++++++++++++---------------------- 1 file changed, 51 insertions(+), 57 deletions(-) diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py index 026a5abc25..013d689054 100644 --- a/Lib/test/test_zlib.py +++ b/Lib/test/test_zlib.py @@ -7,20 +7,36 @@ import pickle import random import sys -from test.support import bigmemtest, _1G, _4G +from test.support import bigmemtest, _1G, _4G, is_s390x zlib = import_helper.import_module('zlib') requires_Compress_copy = unittest.skipUnless( - hasattr(zlib.compressobj(), "copy"), - 'requires Compress.copy()') + hasattr(zlib.compressobj(), "copy"), + 'requires Compress.copy()') requires_Decompress_copy = unittest.skipUnless( - hasattr(zlib.decompressobj(), "copy"), - 'requires Decompress.copy()') + hasattr(zlib.decompressobj(), "copy"), + 'requires Decompress.copy()') + + +# def _zlib_runtime_version_tuple(zlib_version=zlib.ZLIB_RUNTIME_VERSION): +# # Register "1.2.3" as "1.2.3.0" +# # or "1.2.0-linux","1.2.0.f","1.2.0.f-linux" +# v = zlib_version.split('-', 1)[0].split('.') +# if len(v) < 4: +# v.append('0') +# elif not v[-1].isnumeric(): +# v[-1] = '0' +# return tuple(map(int, v)) +# +# +# ZLIB_RUNTIME_VERSION_TUPLE = _zlib_runtime_version_tuple() -# bpo-46623: On s390x, when a hardware accelerator is used, using different -# ways to compress data with zlib can produce different compressed data. + +# bpo-46623: When a hardware accelerator is used (currently only on s390x), +# using different ways to compress data with zlib can produce different +# compressed data. # Simplified test_pair() code: # # def func1(data): @@ -43,10 +59,9 @@ # # zlib.decompress(func1(data)) == zlib.decompress(func2(data)) == data # -# Make the assumption that s390x always has an accelerator to simplify the skip -# condition. Windows doesn't have os.uname() but it doesn't support s390x. -skip_on_s390x = unittest.skipIf(hasattr(os, 'uname') and os.uname().machine == 's390x', - 'skipped on s390x') +# To simplify the skip condition, make the assumption that s390x always has an +# accelerator, and nothing else has it. +HW_ACCELERATED = is_s390x class VersionTestCase(unittest.TestCase): @@ -141,7 +156,7 @@ def test_badcompressobj(self): self.assertRaises(ValueError, zlib.compressobj, 1, zlib.DEFLATED, 0) # specifying total bits too large causes an error self.assertRaises(ValueError, - zlib.compressobj, 1, zlib.DEFLATED, zlib.MAX_WBITS + 1) + zlib.compressobj, 1, zlib.DEFLATED, zlib.MAX_WBITS + 1) def test_baddecompressobj(self): # verify failure on building decompress object with bad params @@ -214,12 +229,14 @@ def test_keywords(self): bufsize=zlib.DEF_BUF_SIZE), HAMLET_SCENE) - @skip_on_s390x def test_speech128(self): # compress more data data = HAMLET_SCENE * 128 x = zlib.compress(data) - self.assertEqual(zlib.compress(bytearray(data)), x) + # With hardware acceleration, the compressed bytes + # might not be identical. + if not HW_ACCELERATED: + self.assertEqual(zlib.compress(bytearray(data)), x) for ob in x, bytearray(x): self.assertEqual(zlib.decompress(ob), data) @@ -227,8 +244,8 @@ def test_incomplete_stream(self): # A useful error message is given x = zlib.compress(HAMLET_SCENE) self.assertRaisesRegex(zlib.error, - "Error -5 while decompressing data: incomplete or truncated stream", - zlib.decompress, x[:-1]) + "Error -5 while decompressing data: incomplete or truncated stream", + zlib.decompress, x[:-1]) # Memory use of the following functions takes into account overallocation @@ -268,7 +285,6 @@ class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase): # TODO: RUSTPYTHON @unittest.expectedFailure # Test compression object - @skip_on_s390x def test_pair(self): # straightforward compress/decompress objects datasrc = HAMLET_SCENE * 128 @@ -279,7 +295,10 @@ def test_pair(self): x1 = co.compress(data) x2 = co.flush() self.assertRaises(zlib.error, co.flush) # second flush should not work - self.assertEqual(x1 + x2, datazip) + # With hardware acceleration, the compressed bytes might not + # be identical. + if not HW_ACCELERATED: + self.assertEqual(x1 + x2, datazip) for v1, v2 in ((x1, x2), (bytearray(x1), bytearray(x2))): dco = zlib.decompressobj() y1 = dco.decompress(v1 + v2) @@ -364,7 +383,7 @@ def test_decompinc(self, flush=False, source=None, cx=256, dcx=64): bufs.append(dco.decompress(combuf[i:i+dcx])) self.assertEqual(b'', dco.unconsumed_tail, ######## "(A) uct should be b'': not %d long" % - len(dco.unconsumed_tail)) + len(dco.unconsumed_tail)) self.assertEqual(b'', dco.unused_data) if flush: bufs.append(dco.flush()) @@ -377,7 +396,7 @@ def test_decompinc(self, flush=False, source=None, cx=256, dcx=64): break self.assertEqual(b'', dco.unconsumed_tail, ######## "(B) uct should be b'': not %d long" % - len(dco.unconsumed_tail)) + len(dco.unconsumed_tail)) self.assertEqual(b'', dco.unused_data) self.assertEqual(data, b''.join(bufs)) # Failure means: "decompressobj with init options failed" @@ -406,7 +425,7 @@ def test_decompimax(self, source=None, cx=256, dcx=64): #max_length = 1 + len(cb)//10 chunk = dco.decompress(cb, dcx) self.assertFalse(len(chunk) > dcx, - 'chunk too big (%d>%d)' % (len(chunk), dcx)) + 'chunk too big (%d>%d)' % (len(chunk), dcx)) bufs.append(chunk) cb = dco.unconsumed_tail bufs.append(dco.flush()) @@ -431,7 +450,7 @@ def test_decompressmaxlen(self, flush=False): max_length = 1 + len(cb)//10 chunk = dco.decompress(cb, max_length) self.assertFalse(len(chunk) > max_length, - 'chunk too big (%d>%d)' % (len(chunk),max_length)) + 'chunk too big (%d>%d)' % (len(chunk),max_length)) bufs.append(chunk) cb = dco.unconsumed_tail if flush: @@ -440,7 +459,7 @@ def test_decompressmaxlen(self, flush=False): while chunk: chunk = dco.decompress(b'', max_length) self.assertFalse(len(chunk) > max_length, - 'chunk too big (%d>%d)' % (len(chunk),max_length)) + 'chunk too big (%d>%d)' % (len(chunk),max_length)) bufs.append(chunk) self.assertEqual(data, b''.join(bufs), 'Wrong data retrieved') @@ -487,9 +506,8 @@ def test_flushes(self): sync_opt = ['Z_NO_FLUSH', 'Z_SYNC_FLUSH', 'Z_FULL_FLUSH', 'Z_PARTIAL_FLUSH'] - ver = tuple(int(v) for v in zlib.ZLIB_RUNTIME_VERSION.split('.')) # Z_BLOCK has a known failure prior to 1.2.5.3 - if ver >= (1, 2, 5, 3): + if ZLIB_RUNTIME_VERSION_TUPLE >= (1, 2, 5, 3): sync_opt.append('Z_BLOCK') sync_opt = [getattr(zlib, opt) for opt in sync_opt @@ -498,20 +516,16 @@ def test_flushes(self): for sync in sync_opt: for level in range(10): - try: + with self.subTest(sync=sync, level=level): obj = zlib.compressobj( level ) a = obj.compress( data[:3000] ) b = obj.flush( sync ) c = obj.compress( data[3000:] ) d = obj.flush() - except: - print("Error for flush mode={}, level={}" - .format(sync, level)) - raise - self.assertEqual(zlib.decompress(b''.join([a,b,c,d])), - data, ("Decompress failed: flush " - "mode=%i, level=%i") % (sync, level)) - del obj + self.assertEqual(zlib.decompress(b''.join([a,b,c,d])), + data, ("Decompress failed: flush " + "mode=%i, level=%i") % (sync, level)) + del obj @unittest.skipUnless(hasattr(zlib, 'Z_SYNC_FLUSH'), 'requires zlib.Z_SYNC_FLUSH') @@ -526,18 +540,7 @@ def test_odd_flush(self): # Try 17K of data # generate random data stream - try: - # In 2.3 and later, WichmannHill is the RNG of the bug report - gen = random.WichmannHill() - except AttributeError: - try: - # 2.2 called it Random - gen = random.Random() - except AttributeError: - # others might simply have a single RNG - gen = random - gen.seed(1) - data = gen.randbytes(17 * 1024) + data = random.randbytes(17 * 1024) # compress, sync-flush, and decompress first = co.compress(data) @@ -642,7 +645,7 @@ def test_decompress_unused_data(self): self.assertEqual(dco.unconsumed_tail, b'') else: data += dco.decompress( - dco.unconsumed_tail + x[i : i + step], maxlen) + dco.unconsumed_tail + x[i : i + step], maxlen) data += dco.flush() self.assertTrue(dco.eof) self.assertEqual(data, source) @@ -830,16 +833,7 @@ def test_large_unconsumed_tail(self, size): @unittest.expectedFailure def test_wbits(self): # wbits=0 only supported since zlib v1.2.3.5 - # Register "1.2.3" as "1.2.3.0" - # or "1.2.0-linux","1.2.0.f","1.2.0.f-linux" - v = zlib.ZLIB_RUNTIME_VERSION.split('-', 1)[0].split('.') - if len(v) < 4: - v.append('0') - elif not v[-1].isnumeric(): - v[-1] = '0' - - v = tuple(map(int, v)) - supports_wbits_0 = v >= (1, 2, 3, 5) + supports_wbits_0 = ZLIB_RUNTIME_VERSION_TUPLE >= (1, 2, 3, 5) co = zlib.compressobj(level=1, wbits=15) zlib15 = co.compress(HAMLET_SCENE) + co.flush() From 1f59f6976a040c11a779dc13c8d4be70697366e5 Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Mon, 13 Jan 2025 16:36:35 -0800 Subject: [PATCH 03/15] attempted _ZlibDecompressor implementation --- stdlib/src/zlib.rs | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/stdlib/src/zlib.rs b/stdlib/src/zlib.rs index 37ee1c83f7..7882d435c7 100644 --- a/stdlib/src/zlib.rs +++ b/stdlib/src/zlib.rs @@ -47,6 +47,7 @@ mod zlib { use libz_sys::{ Z_BLOCK, Z_DEFAULT_STRATEGY, Z_FILTERED, Z_FINISH, Z_FIXED, Z_HUFFMAN_ONLY, Z_RLE, Z_TREES, }; + use rustpython_vm::types::Constructor; // copied from zlibmodule.c (commit 530f506ac91338) #[pyattr] @@ -587,4 +588,46 @@ mod zlib { Ok(Self::new(int)) } } + + #[pyattr] + #[pyclass(name = "_ZlibDecompressor")] + #[derive(Debug, PyPayload)] + pub struct ZlibDecompressor { + decompress: PyMutex, + } + + impl Constructor for ZlibDecompressor { + type Args = (); + + fn py_new(cls: PyTypeRef, _args: Self::Args, vm: &VirtualMachine) -> PyResult { + let decompress = Decompress::new(true); + let zlib_decompressor = ZlibDecompressor { + decompress: PyMutex::new(decompress), + }; + zlib_decompressor.into_ref_with_type(vm, cls).map(Into::into) + } + } + + #[pyclass] + impl ZlibDecompressor { + #[pymethod] + fn decompress(&self, data: ArgBytesLike, vm: &VirtualMachine) -> PyResult> { + let mut d = self.decompress.lock(); + let (buf, stream_end) = _decompress( + &data.borrow_buf(), + &mut d, + DEF_BUF_SIZE, + None, + false, + vm, + )?; + if !stream_end { + return Err(new_zlib_error( + "Error -5 while decompressing data: incomplete or truncated stream", + vm, + )); + } + Ok(buf) + } + } } From 83e539a58d129d6d22f546e3a2e4b8a59d69fa36 Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Mon, 13 Jan 2025 23:17:15 -0800 Subject: [PATCH 04/15] updated zlib tests --- Lib/test/test_zlib.py | 170 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 170 insertions(+) diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py index 013d689054..f55509b617 100644 --- a/Lib/test/test_zlib.py +++ b/Lib/test/test_zlib.py @@ -959,6 +959,176 @@ def choose_lines(source, number, seed=None, generator=random): Farewell. """ +class ZlibDecompressorTest(unittest.TestCase): + # Test adopted from test_bz2.py + TEXT = HAMLET_SCENE + DATA = zlib.compress(HAMLET_SCENE) + BAD_DATA = b"Not a valid deflate block" + BIG_TEXT = DATA * ((128 * 1024 // len(DATA)) + 1) + BIG_DATA = zlib.compress(BIG_TEXT) + + def test_Constructor(self): + self.assertRaises(TypeError, zlib._ZlibDecompressor, "ASDA") + self.assertRaises(TypeError, zlib._ZlibDecompressor, -15, "notbytes") + self.assertRaises(TypeError, zlib._ZlibDecompressor, -15, b"bytes", 5) + + def testDecompress(self): + zlibd = zlib._ZlibDecompressor() + self.assertRaises(TypeError, zlibd.decompress) + text = zlibd.decompress(self.DATA) + self.assertEqual(text, self.TEXT) + + def testDecompressChunks10(self): + zlibd = zlib._ZlibDecompressor() + text = b'' + n = 0 + while True: + str = self.DATA[n*10:(n+1)*10] + if not str: + break + text += zlibd.decompress(str) + n += 1 + self.assertEqual(text, self.TEXT) + + def testDecompressUnusedData(self): + zlibd = zlib._ZlibDecompressor() + unused_data = b"this is unused data" + text = zlibd.decompress(self.DATA+unused_data) + self.assertEqual(text, self.TEXT) + self.assertEqual(zlibd.unused_data, unused_data) + + def testEOFError(self): + zlibd = zlib._ZlibDecompressor() + text = zlibd.decompress(self.DATA) + self.assertRaises(EOFError, zlibd.decompress, b"anything") + self.assertRaises(EOFError, zlibd.decompress, b"") + + @support.skip_if_pgo_task + @bigmemtest(size=_4G + 100, memuse=3.3) + def testDecompress4G(self, size): + # "Test zlib._ZlibDecompressor.decompress() with >4GiB input" + blocksize = min(10 * 1024 * 1024, size) + block = random.randbytes(blocksize) + try: + data = block * ((size-1) // blocksize + 1) + compressed = zlib.compress(data) + zlibd = zlib._ZlibDecompressor() + decompressed = zlibd.decompress(compressed) + self.assertTrue(decompressed == data) + finally: + data = None + compressed = None + decompressed = None + + def testPickle(self): + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + with self.assertRaises(TypeError): + pickle.dumps(zlib._ZlibDecompressor(), proto) + + def testDecompressorChunksMaxsize(self): + zlibd = zlib._ZlibDecompressor() + max_length = 100 + out = [] + + # Feed some input + len_ = len(self.BIG_DATA) - 64 + out.append(zlibd.decompress(self.BIG_DATA[:len_], + max_length=max_length)) + self.assertFalse(zlibd.needs_input) + self.assertEqual(len(out[-1]), max_length) + + # Retrieve more data without providing more input + out.append(zlibd.decompress(b'', max_length=max_length)) + self.assertFalse(zlibd.needs_input) + self.assertEqual(len(out[-1]), max_length) + + # Retrieve more data while providing more input + out.append(zlibd.decompress(self.BIG_DATA[len_:], + max_length=max_length)) + self.assertLessEqual(len(out[-1]), max_length) + + # Retrieve remaining uncompressed data + while not zlibd.eof: + out.append(zlibd.decompress(b'', max_length=max_length)) + self.assertLessEqual(len(out[-1]), max_length) + + out = b"".join(out) + self.assertEqual(out, self.BIG_TEXT) + self.assertEqual(zlibd.unused_data, b"") + + def test_decompressor_inputbuf_1(self): + # Test reusing input buffer after moving existing + # contents to beginning + zlibd = zlib._ZlibDecompressor() + out = [] + + # Create input buffer and fill it + self.assertEqual(zlibd.decompress(self.DATA[:100], + max_length=0), b'') + + # Retrieve some results, freeing capacity at beginning + # of input buffer + out.append(zlibd.decompress(b'', 2)) + + # Add more data that fits into input buffer after + # moving existing data to beginning + out.append(zlibd.decompress(self.DATA[100:105], 15)) + + # Decompress rest of data + out.append(zlibd.decompress(self.DATA[105:])) + self.assertEqual(b''.join(out), self.TEXT) + + def test_decompressor_inputbuf_2(self): + # Test reusing input buffer by appending data at the + # end right away + zlibd = zlib._ZlibDecompressor() + out = [] + + # Create input buffer and empty it + self.assertEqual(zlibd.decompress(self.DATA[:200], + max_length=0), b'') + out.append(zlibd.decompress(b'')) + + # Fill buffer with new data + out.append(zlibd.decompress(self.DATA[200:280], 2)) + + # Append some more data, not enough to require resize + out.append(zlibd.decompress(self.DATA[280:300], 2)) + + # Decompress rest of data + out.append(zlibd.decompress(self.DATA[300:])) + self.assertEqual(b''.join(out), self.TEXT) + + def test_decompressor_inputbuf_3(self): + # Test reusing input buffer after extending it + + zlibd = zlib._ZlibDecompressor() + out = [] + + # Create almost full input buffer + out.append(zlibd.decompress(self.DATA[:200], 5)) + + # Add even more data to it, requiring resize + out.append(zlibd.decompress(self.DATA[200:300], 5)) + + # Decompress rest of data + out.append(zlibd.decompress(self.DATA[300:])) + self.assertEqual(b''.join(out), self.TEXT) + + def test_failure(self): + zlibd = zlib._ZlibDecompressor() + self.assertRaises(Exception, zlibd.decompress, self.BAD_DATA * 30) + # Previously, a second call could crash due to internal inconsistency + self.assertRaises(Exception, zlibd.decompress, self.BAD_DATA * 30) + + @support.refcount_test + def test_refleaks_in___init__(self): + gettotalrefcount = support.get_attribute(sys, 'gettotalrefcount') + zlibd = zlib._ZlibDecompressor() + refs_before = gettotalrefcount() + for i in range(100): + zlibd.__init__() + self.assertAlmostEqual(gettotalrefcount() - refs_before, 0, delta=10) class CustomInt: def __index__(self): From cfad24adeeb35d5b2b9a0ee8f4f347be021467dd Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Mon, 13 Jan 2025 23:17:36 -0800 Subject: [PATCH 05/15] formatting --- stdlib/src/zlib.rs | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/stdlib/src/zlib.rs b/stdlib/src/zlib.rs index 7882d435c7..be4c01d68a 100644 --- a/stdlib/src/zlib.rs +++ b/stdlib/src/zlib.rs @@ -604,7 +604,9 @@ mod zlib { let zlib_decompressor = ZlibDecompressor { decompress: PyMutex::new(decompress), }; - zlib_decompressor.into_ref_with_type(vm, cls).map(Into::into) + zlib_decompressor + .into_ref_with_type(vm, cls) + .map(Into::into) } } @@ -613,14 +615,8 @@ mod zlib { #[pymethod] fn decompress(&self, data: ArgBytesLike, vm: &VirtualMachine) -> PyResult> { let mut d = self.decompress.lock(); - let (buf, stream_end) = _decompress( - &data.borrow_buf(), - &mut d, - DEF_BUF_SIZE, - None, - false, - vm, - )?; + let (buf, stream_end) = + _decompress(&data.borrow_buf(), &mut d, DEF_BUF_SIZE, None, false, vm)?; if !stream_end { return Err(new_zlib_error( "Error -5 while decompressing data: incomplete or truncated stream", From 760b192240b8833a1c7183fc65218af2cc72e97e Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Tue, 14 Jan 2025 08:43:03 -0800 Subject: [PATCH 06/15] type error if _ZlibDecompressor gets a non-zero number of arguments passed to it's __init__ --- stdlib/src/zlib.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/stdlib/src/zlib.rs b/stdlib/src/zlib.rs index be4c01d68a..afca48b3e2 100644 --- a/stdlib/src/zlib.rs +++ b/stdlib/src/zlib.rs @@ -47,6 +47,7 @@ mod zlib { use libz_sys::{ Z_BLOCK, Z_DEFAULT_STRATEGY, Z_FILTERED, Z_FINISH, Z_FIXED, Z_HUFFMAN_ONLY, Z_RLE, Z_TREES, }; + use rustpython_vm::function::FuncArgs; use rustpython_vm::types::Constructor; // copied from zlibmodule.c (commit 530f506ac91338) @@ -597,9 +598,13 @@ mod zlib { } impl Constructor for ZlibDecompressor { - type Args = (); + type Args = FuncArgs; + + fn py_new(cls: PyTypeRef, args: Self::Args, vm: &VirtualMachine) -> PyResult { + if args.args.len() != 0 { + return Err(vm.new_type_error("No arguments expected".to_owned())); + } - fn py_new(cls: PyTypeRef, _args: Self::Args, vm: &VirtualMachine) -> PyResult { let decompress = Decompress::new(true); let zlib_decompressor = ZlibDecompressor { decompress: PyMutex::new(decompress), From f3048cf1d77e16d87fe10f5a6d726b441d5618f1 Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Tue, 14 Jan 2025 09:04:12 -0800 Subject: [PATCH 07/15] eof error if _ZlibDecompressor decompresses a byte string that terminates pre-emptively --- stdlib/src/zlib.rs | 7 +++---- vm/src/vm/vm_new.rs | 5 +++++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/stdlib/src/zlib.rs b/stdlib/src/zlib.rs index afca48b3e2..e271d4afa9 100644 --- a/stdlib/src/zlib.rs +++ b/stdlib/src/zlib.rs @@ -595,6 +595,7 @@ mod zlib { #[derive(Debug, PyPayload)] pub struct ZlibDecompressor { decompress: PyMutex, + // TODO: unused data store } impl Constructor for ZlibDecompressor { @@ -623,10 +624,8 @@ mod zlib { let (buf, stream_end) = _decompress(&data.borrow_buf(), &mut d, DEF_BUF_SIZE, None, false, vm)?; if !stream_end { - return Err(new_zlib_error( - "Error -5 while decompressing data: incomplete or truncated stream", - vm, - )); + // Return EOF error as per tests + return Err(vm.new_eof_error("EOF when reading a chunk".to_owned())); } Ok(buf) } diff --git a/vm/src/vm/vm_new.rs b/vm/src/vm/vm_new.rs index ea8fa21c17..55f06e90da 100644 --- a/vm/src/vm/vm_new.rs +++ b/vm/src/vm/vm_new.rs @@ -394,4 +394,9 @@ impl VirtualMachine { obj.as_object(), ) } + + pub fn new_eof_error(&self, msg: String) -> PyBaseExceptionRef { + let eof_error = self.ctx.exceptions.eof_error.to_owned(); + self.new_exception_msg(eof_error, msg) + } } From 32394ad9d60077a06f4edd32492503418b6497e7 Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Tue, 14 Jan 2025 09:20:27 -0800 Subject: [PATCH 08/15] reverted bad changes --- stdlib/src/zlib.rs | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/stdlib/src/zlib.rs b/stdlib/src/zlib.rs index e271d4afa9..e3eebf4574 100644 --- a/stdlib/src/zlib.rs +++ b/stdlib/src/zlib.rs @@ -47,7 +47,6 @@ mod zlib { use libz_sys::{ Z_BLOCK, Z_DEFAULT_STRATEGY, Z_FILTERED, Z_FINISH, Z_FIXED, Z_HUFFMAN_ONLY, Z_RLE, Z_TREES, }; - use rustpython_vm::function::FuncArgs; use rustpython_vm::types::Constructor; // copied from zlibmodule.c (commit 530f506ac91338) @@ -599,13 +598,9 @@ mod zlib { } impl Constructor for ZlibDecompressor { - type Args = FuncArgs; - - fn py_new(cls: PyTypeRef, args: Self::Args, vm: &VirtualMachine) -> PyResult { - if args.args.len() != 0 { - return Err(vm.new_type_error("No arguments expected".to_owned())); - } + type Args = (); + fn py_new(cls: PyTypeRef, _args: Self::Args, vm: &VirtualMachine) -> PyResult { let decompress = Decompress::new(true); let zlib_decompressor = ZlibDecompressor { decompress: PyMutex::new(decompress), @@ -624,8 +619,10 @@ mod zlib { let (buf, stream_end) = _decompress(&data.borrow_buf(), &mut d, DEF_BUF_SIZE, None, false, vm)?; if !stream_end { - // Return EOF error as per tests - return Err(vm.new_eof_error("EOF when reading a chunk".to_owned())); + return Err(new_zlib_error( + "Error -5 while decompressing data: incomplete or truncated stream", + vm, + )); } Ok(buf) } From c9b0ad06775a89a5fe5ded0b09686b2520e031d3 Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Tue, 14 Jan 2025 09:23:19 -0800 Subject: [PATCH 09/15] disabled some tests --- Lib/test/test_zlib.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py index f55509b617..881c0b4662 100644 --- a/Lib/test/test_zlib.py +++ b/Lib/test/test_zlib.py @@ -990,6 +990,8 @@ def testDecompressChunks10(self): n += 1 self.assertEqual(text, self.TEXT) + # TODO: RUSTPYTHON + @unittest.expectedFailure def testDecompressUnusedData(self): zlibd = zlib._ZlibDecompressor() unused_data = b"this is unused data" @@ -1003,6 +1005,8 @@ def testEOFError(self): self.assertRaises(EOFError, zlibd.decompress, b"anything") self.assertRaises(EOFError, zlibd.decompress, b"") + # TODO: RUSTPYTHON + @unittest.expectedFailure @support.skip_if_pgo_task @bigmemtest(size=_4G + 100, memuse=3.3) def testDecompress4G(self, size): @@ -1025,6 +1029,8 @@ def testPickle(self): with self.assertRaises(TypeError): pickle.dumps(zlib._ZlibDecompressor(), proto) + # TODO: RUSTPYTHON + @unittest.expectedFailure def testDecompressorChunksMaxsize(self): zlibd = zlib._ZlibDecompressor() max_length = 100 @@ -1056,6 +1062,8 @@ def testDecompressorChunksMaxsize(self): self.assertEqual(out, self.BIG_TEXT) self.assertEqual(zlibd.unused_data, b"") + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_decompressor_inputbuf_1(self): # Test reusing input buffer after moving existing # contents to beginning @@ -1078,6 +1086,8 @@ def test_decompressor_inputbuf_1(self): out.append(zlibd.decompress(self.DATA[105:])) self.assertEqual(b''.join(out), self.TEXT) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_decompressor_inputbuf_2(self): # Test reusing input buffer by appending data at the # end right away @@ -1099,6 +1109,8 @@ def test_decompressor_inputbuf_2(self): out.append(zlibd.decompress(self.DATA[300:])) self.assertEqual(b''.join(out), self.TEXT) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_decompressor_inputbuf_3(self): # Test reusing input buffer after extending it From 54b45a9d9ec2313a5f439364e72c4f2d3e295c64 Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Tue, 14 Jan 2025 11:49:53 -0800 Subject: [PATCH 10/15] tried to fix zlibd --- Lib/test/test_zlib.py | 2 - stdlib/src/zlib.rs | 113 ++++++++++++++++++++++++++++++++++++++---- 2 files changed, 103 insertions(+), 12 deletions(-) diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py index 881c0b4662..db0fc53518 100644 --- a/Lib/test/test_zlib.py +++ b/Lib/test/test_zlib.py @@ -990,8 +990,6 @@ def testDecompressChunks10(self): n += 1 self.assertEqual(text, self.TEXT) - # TODO: RUSTPYTHON - @unittest.expectedFailure def testDecompressUnusedData(self): zlibd = zlib._ZlibDecompressor() unused_data = b"this is unused data" diff --git a/stdlib/src/zlib.rs b/stdlib/src/zlib.rs index e3eebf4574..4da3a1b09c 100644 --- a/stdlib/src/zlib.rs +++ b/stdlib/src/zlib.rs @@ -594,7 +594,8 @@ mod zlib { #[derive(Debug, PyPayload)] pub struct ZlibDecompressor { decompress: PyMutex, - // TODO: unused data store + unused_data: PyMutex, + unconsumed_tail: PyMutex, } impl Constructor for ZlibDecompressor { @@ -604,6 +605,8 @@ mod zlib { let decompress = Decompress::new(true); let zlib_decompressor = ZlibDecompressor { decompress: PyMutex::new(decompress), + unused_data: PyMutex::new(PyBytes::from(vec![]).into_ref(&vm.ctx)), + unconsumed_tail: PyMutex::new(PyBytes::from(vec![]).into_ref(&vm.ctx)), }; zlib_decompressor .into_ref_with_type(vm, cls) @@ -613,18 +616,108 @@ mod zlib { #[pyclass] impl ZlibDecompressor { + #[pygetset] + fn unused_data(&self) -> PyBytesRef { + self.unused_data.lock().clone() + } + + #[pygetset] + fn unconsumed_tail(&self) -> PyBytesRef { + self.unconsumed_tail.lock().clone() + } + + fn save_unused_input( + &self, + d: &Decompress, + data: &[u8], + stream_end: bool, + orig_in: u64, + vm: &VirtualMachine, + ) { + let leftover = &data[(d.total_in() - orig_in) as usize..]; + + if stream_end && !leftover.is_empty() { + let mut unused_data = self.unused_data.lock(); + let unused: Vec<_> = unused_data + .as_bytes() + .iter() + .chain(leftover) + .copied() + .collect(); + *unused_data = vm.ctx.new_pyref(unused); + } + } + #[pymethod] - fn decompress(&self, data: ArgBytesLike, vm: &VirtualMachine) -> PyResult> { + fn decompress(&self, args: DecompressArgs, vm: &VirtualMachine) -> PyResult> { + let max_length = args.max_length.value; + let max_length = (max_length != 0).then_some(max_length); + let data = args.data.borrow_buf(); + let data = &*data; + let mut d = self.decompress.lock(); - let (buf, stream_end) = - _decompress(&data.borrow_buf(), &mut d, DEF_BUF_SIZE, None, false, vm)?; - if !stream_end { - return Err(new_zlib_error( - "Error -5 while decompressing data: incomplete or truncated stream", - vm, - )); + let orig_in = d.total_in(); + + let (ret, stream_end) = + match _decompress(data, &mut d, DEF_BUF_SIZE, max_length, false, vm) { + Ok((_buf, true)) => { + // Eof is true + // (Ok(buf), true) + return Err(vm.new_eof_error("EOF when reading a chunk".to_owned())); + } + Ok((buf, false)) => (Ok(buf), false), + Err(err) => (Err(err), false), + }; + self.save_unused_input(&d, data, stream_end, orig_in, vm); + + let leftover = if stream_end { + b"" + } else { + &data[(d.total_in() - orig_in) as usize..] + }; + + let mut unconsumed_tail = self.unconsumed_tail.lock(); + if !leftover.is_empty() || !unconsumed_tail.is_empty() { + *unconsumed_tail = PyBytes::from(leftover.to_owned()).into_ref(&vm.ctx); } - Ok(buf) + + ret + } + + #[pymethod] + fn flush(&self, length: OptionalArg, vm: &VirtualMachine) -> PyResult> { + let length = match length { + OptionalArg::Present(l) => { + let l: isize = l.into(); + if l <= 0 { + return Err( + vm.new_value_error("length must be greater than zero".to_owned()) + ); + } else { + l as usize + } + } + OptionalArg::Missing => DEF_BUF_SIZE, + }; + + let mut data = self.unconsumed_tail.lock(); + let mut d = self.decompress.lock(); + + let orig_in = d.total_in(); + + let (ret, stream_end) = match _decompress(&data, &mut d, length, None, true, vm) { + Ok((buf, stream_end)) => (Ok(buf), stream_end), + Err(err) => (Err(err), false), + }; + self.save_unused_input(&d, &data, stream_end, orig_in, vm); + + *data = PyBytes::from(Vec::new()).into_ref(&vm.ctx); + + // TODO: drop the inner decompressor, somehow + // if stream_end { + // + // } + ret } } } From 91274671b533e5ba219f9380f1379a52a21fa944 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Thu, 16 Jan 2025 00:51:22 +0900 Subject: [PATCH 11/15] Fix Expected payload error --- stdlib/src/zlib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stdlib/src/zlib.rs b/stdlib/src/zlib.rs index 4da3a1b09c..4131ddfb88 100644 --- a/stdlib/src/zlib.rs +++ b/stdlib/src/zlib.rs @@ -603,7 +603,7 @@ mod zlib { fn py_new(cls: PyTypeRef, _args: Self::Args, vm: &VirtualMachine) -> PyResult { let decompress = Decompress::new(true); - let zlib_decompressor = ZlibDecompressor { + let zlib_decompressor = Self { decompress: PyMutex::new(decompress), unused_data: PyMutex::new(PyBytes::from(vec![]).into_ref(&vm.ctx)), unconsumed_tail: PyMutex::new(PyBytes::from(vec![]).into_ref(&vm.ctx)), @@ -614,7 +614,7 @@ mod zlib { } } - #[pyclass] + #[pyclass(with(Constructor))] impl ZlibDecompressor { #[pygetset] fn unused_data(&self) -> PyBytesRef { From 31e6e0a408dec0427ba3c8c3899a9a7639a46d89 Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Wed, 15 Jan 2025 10:38:16 -0800 Subject: [PATCH 12/15] force type cohesion --- stdlib/src/zlib.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/stdlib/src/zlib.rs b/stdlib/src/zlib.rs index 4da3a1b09c..46799e5531 100644 --- a/stdlib/src/zlib.rs +++ b/stdlib/src/zlib.rs @@ -649,11 +649,11 @@ mod zlib { } #[pymethod] - fn decompress(&self, args: DecompressArgs, vm: &VirtualMachine) -> PyResult> { - let max_length = args.max_length.value; - let max_length = (max_length != 0).then_some(max_length); - let data = args.data.borrow_buf(); - let data = &*data; + fn decompress(&self, args: PyBytesRef, vm: &VirtualMachine) -> PyResult> { + // let max_length = args.max_length.value; + // let max_length = (max_length != 0).then_some(max_length); + let max_length = None; + let data = args.as_bytes(); let mut d = self.decompress.lock(); let orig_in = d.total_in(); From 0be849d65f487cfa98ce629f788e7c89b7db1e14 Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Wed, 15 Jan 2025 10:49:56 -0800 Subject: [PATCH 13/15] passed more tests --- Lib/test/test_zlib.py | 2 -- stdlib/src/zlib.rs | 5 ++--- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py index db0fc53518..9c3d4ac6e0 100644 --- a/Lib/test/test_zlib.py +++ b/Lib/test/test_zlib.py @@ -1003,8 +1003,6 @@ def testEOFError(self): self.assertRaises(EOFError, zlibd.decompress, b"anything") self.assertRaises(EOFError, zlibd.decompress, b"") - # TODO: RUSTPYTHON - @unittest.expectedFailure @support.skip_if_pgo_task @bigmemtest(size=_4G + 100, memuse=3.3) def testDecompress4G(self, size): diff --git a/stdlib/src/zlib.rs b/stdlib/src/zlib.rs index b44ce52b21..3a9b65623b 100644 --- a/stdlib/src/zlib.rs +++ b/stdlib/src/zlib.rs @@ -660,10 +660,9 @@ mod zlib { let (ret, stream_end) = match _decompress(data, &mut d, DEF_BUF_SIZE, max_length, false, vm) { - Ok((_buf, true)) => { + Ok((buf, true)) => { // Eof is true - // (Ok(buf), true) - return Err(vm.new_eof_error("EOF when reading a chunk".to_owned())); + (Ok(buf), true) } Ok((buf, false)) => (Ok(buf), false), Err(err) => (Err(err), false), From 8eb49a2c3f2d37c530e4386143699350bfe84e31 Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Wed, 15 Jan 2025 11:00:02 -0800 Subject: [PATCH 14/15] all tests pass --- Lib/test/test_zlib.py | 4 ++++ stdlib/src/zlib.rs | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py index 9c3d4ac6e0..686131be74 100644 --- a/Lib/test/test_zlib.py +++ b/Lib/test/test_zlib.py @@ -997,6 +997,8 @@ def testDecompressUnusedData(self): self.assertEqual(text, self.TEXT) self.assertEqual(zlibd.unused_data, unused_data) + # TODO: RUSTPYTHON + @unittest.expectedFailure def testEOFError(self): zlibd = zlib._ZlibDecompressor() text = zlibd.decompress(self.DATA) @@ -1020,6 +1022,8 @@ def testDecompress4G(self, size): compressed = None decompressed = None + # TODO: RUSTPYTHON + @unittest.expectedFailure def testPickle(self): for proto in range(pickle.HIGHEST_PROTOCOL + 1): with self.assertRaises(TypeError): diff --git a/stdlib/src/zlib.rs b/stdlib/src/zlib.rs index 3a9b65623b..83a7535c33 100644 --- a/stdlib/src/zlib.rs +++ b/stdlib/src/zlib.rs @@ -718,5 +718,11 @@ mod zlib { // } ret } + + // TODO: Wait for getstate pyslot to be fixed + // #[pyslot] + // fn getstate(zelf: &PyObject, vm: &VirtualMachine) -> PyResult { + // Err(vm.new_type_error("cannot serialize '_ZlibDecompressor' object".to_owned())) + // } } } From 041553c802816bf91636897bfa0d5c40076409b2 Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Wed, 15 Jan 2025 19:56:45 -0800 Subject: [PATCH 15/15] added comment as per review --- Lib/test/support/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index ed8cdd0653..a1a6bd8e73 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -532,6 +532,7 @@ def requires_legacy_unicode_capi(): has_fork_support = hasattr(os, "fork") and not is_emscripten and not is_wasi +# From python 3.12.6 is_s390x = hasattr(os, 'uname') and os.uname().machine == 's390x' skip_on_s390x = unittest.skipIf(is_s390x, 'skipped on s390x')