diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index b49ffbd536..a1a6bd8e73 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -532,6 +532,10 @@ def requires_legacy_unicode_capi(): has_fork_support = hasattr(os, "fork") and not is_emscripten and not is_wasi +# From python 3.12.6 +is_s390x = hasattr(os, 'uname') and os.uname().machine == 's390x' +skip_on_s390x = unittest.skipIf(is_s390x, 'skipped on s390x') + def requires_fork(): return unittest.skipUnless(has_fork_support, "requires working os.fork()") diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py index 026a5abc25..686131be74 100644 --- a/Lib/test/test_zlib.py +++ b/Lib/test/test_zlib.py @@ -7,20 +7,36 @@ import pickle import random import sys -from test.support import bigmemtest, _1G, _4G +from test.support import bigmemtest, _1G, _4G, is_s390x zlib = import_helper.import_module('zlib') requires_Compress_copy = unittest.skipUnless( - hasattr(zlib.compressobj(), "copy"), - 'requires Compress.copy()') + hasattr(zlib.compressobj(), "copy"), + 'requires Compress.copy()') requires_Decompress_copy = unittest.skipUnless( - hasattr(zlib.decompressobj(), "copy"), - 'requires Decompress.copy()') + hasattr(zlib.decompressobj(), "copy"), + 'requires Decompress.copy()') + + +# def _zlib_runtime_version_tuple(zlib_version=zlib.ZLIB_RUNTIME_VERSION): +# # Register "1.2.3" as "1.2.3.0" +# # or "1.2.0-linux","1.2.0.f","1.2.0.f-linux" +# v = zlib_version.split('-', 1)[0].split('.') +# if len(v) < 4: +# v.append('0') +# elif not v[-1].isnumeric(): +# v[-1] = '0' +# return tuple(map(int, v)) +# +# +# ZLIB_RUNTIME_VERSION_TUPLE = _zlib_runtime_version_tuple() + -# bpo-46623: On s390x, when a hardware accelerator is used, using different -# ways to compress data with zlib can produce different compressed data. +# bpo-46623: When a hardware accelerator is used (currently only on s390x), +# using different ways to compress data with zlib can produce different +# compressed data. # Simplified test_pair() code: # # def func1(data): @@ -43,10 +59,9 @@ # # zlib.decompress(func1(data)) == zlib.decompress(func2(data)) == data # -# Make the assumption that s390x always has an accelerator to simplify the skip -# condition. Windows doesn't have os.uname() but it doesn't support s390x. -skip_on_s390x = unittest.skipIf(hasattr(os, 'uname') and os.uname().machine == 's390x', - 'skipped on s390x') +# To simplify the skip condition, make the assumption that s390x always has an +# accelerator, and nothing else has it. +HW_ACCELERATED = is_s390x class VersionTestCase(unittest.TestCase): @@ -141,7 +156,7 @@ def test_badcompressobj(self): self.assertRaises(ValueError, zlib.compressobj, 1, zlib.DEFLATED, 0) # specifying total bits too large causes an error self.assertRaises(ValueError, - zlib.compressobj, 1, zlib.DEFLATED, zlib.MAX_WBITS + 1) + zlib.compressobj, 1, zlib.DEFLATED, zlib.MAX_WBITS + 1) def test_baddecompressobj(self): # verify failure on building decompress object with bad params @@ -214,12 +229,14 @@ def test_keywords(self): bufsize=zlib.DEF_BUF_SIZE), HAMLET_SCENE) - @skip_on_s390x def test_speech128(self): # compress more data data = HAMLET_SCENE * 128 x = zlib.compress(data) - self.assertEqual(zlib.compress(bytearray(data)), x) + # With hardware acceleration, the compressed bytes + # might not be identical. + if not HW_ACCELERATED: + self.assertEqual(zlib.compress(bytearray(data)), x) for ob in x, bytearray(x): self.assertEqual(zlib.decompress(ob), data) @@ -227,8 +244,8 @@ def test_incomplete_stream(self): # A useful error message is given x = zlib.compress(HAMLET_SCENE) self.assertRaisesRegex(zlib.error, - "Error -5 while decompressing data: incomplete or truncated stream", - zlib.decompress, x[:-1]) + "Error -5 while decompressing data: incomplete or truncated stream", + zlib.decompress, x[:-1]) # Memory use of the following functions takes into account overallocation @@ -268,7 +285,6 @@ class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase): # TODO: RUSTPYTHON @unittest.expectedFailure # Test compression object - @skip_on_s390x def test_pair(self): # straightforward compress/decompress objects datasrc = HAMLET_SCENE * 128 @@ -279,7 +295,10 @@ def test_pair(self): x1 = co.compress(data) x2 = co.flush() self.assertRaises(zlib.error, co.flush) # second flush should not work - self.assertEqual(x1 + x2, datazip) + # With hardware acceleration, the compressed bytes might not + # be identical. + if not HW_ACCELERATED: + self.assertEqual(x1 + x2, datazip) for v1, v2 in ((x1, x2), (bytearray(x1), bytearray(x2))): dco = zlib.decompressobj() y1 = dco.decompress(v1 + v2) @@ -364,7 +383,7 @@ def test_decompinc(self, flush=False, source=None, cx=256, dcx=64): bufs.append(dco.decompress(combuf[i:i+dcx])) self.assertEqual(b'', dco.unconsumed_tail, ######## "(A) uct should be b'': not %d long" % - len(dco.unconsumed_tail)) + len(dco.unconsumed_tail)) self.assertEqual(b'', dco.unused_data) if flush: bufs.append(dco.flush()) @@ -377,7 +396,7 @@ def test_decompinc(self, flush=False, source=None, cx=256, dcx=64): break self.assertEqual(b'', dco.unconsumed_tail, ######## "(B) uct should be b'': not %d long" % - len(dco.unconsumed_tail)) + len(dco.unconsumed_tail)) self.assertEqual(b'', dco.unused_data) self.assertEqual(data, b''.join(bufs)) # Failure means: "decompressobj with init options failed" @@ -406,7 +425,7 @@ def test_decompimax(self, source=None, cx=256, dcx=64): #max_length = 1 + len(cb)//10 chunk = dco.decompress(cb, dcx) self.assertFalse(len(chunk) > dcx, - 'chunk too big (%d>%d)' % (len(chunk), dcx)) + 'chunk too big (%d>%d)' % (len(chunk), dcx)) bufs.append(chunk) cb = dco.unconsumed_tail bufs.append(dco.flush()) @@ -431,7 +450,7 @@ def test_decompressmaxlen(self, flush=False): max_length = 1 + len(cb)//10 chunk = dco.decompress(cb, max_length) self.assertFalse(len(chunk) > max_length, - 'chunk too big (%d>%d)' % (len(chunk),max_length)) + 'chunk too big (%d>%d)' % (len(chunk),max_length)) bufs.append(chunk) cb = dco.unconsumed_tail if flush: @@ -440,7 +459,7 @@ def test_decompressmaxlen(self, flush=False): while chunk: chunk = dco.decompress(b'', max_length) self.assertFalse(len(chunk) > max_length, - 'chunk too big (%d>%d)' % (len(chunk),max_length)) + 'chunk too big (%d>%d)' % (len(chunk),max_length)) bufs.append(chunk) self.assertEqual(data, b''.join(bufs), 'Wrong data retrieved') @@ -487,9 +506,8 @@ def test_flushes(self): sync_opt = ['Z_NO_FLUSH', 'Z_SYNC_FLUSH', 'Z_FULL_FLUSH', 'Z_PARTIAL_FLUSH'] - ver = tuple(int(v) for v in zlib.ZLIB_RUNTIME_VERSION.split('.')) # Z_BLOCK has a known failure prior to 1.2.5.3 - if ver >= (1, 2, 5, 3): + if ZLIB_RUNTIME_VERSION_TUPLE >= (1, 2, 5, 3): sync_opt.append('Z_BLOCK') sync_opt = [getattr(zlib, opt) for opt in sync_opt @@ -498,20 +516,16 @@ def test_flushes(self): for sync in sync_opt: for level in range(10): - try: + with self.subTest(sync=sync, level=level): obj = zlib.compressobj( level ) a = obj.compress( data[:3000] ) b = obj.flush( sync ) c = obj.compress( data[3000:] ) d = obj.flush() - except: - print("Error for flush mode={}, level={}" - .format(sync, level)) - raise - self.assertEqual(zlib.decompress(b''.join([a,b,c,d])), - data, ("Decompress failed: flush " - "mode=%i, level=%i") % (sync, level)) - del obj + self.assertEqual(zlib.decompress(b''.join([a,b,c,d])), + data, ("Decompress failed: flush " + "mode=%i, level=%i") % (sync, level)) + del obj @unittest.skipUnless(hasattr(zlib, 'Z_SYNC_FLUSH'), 'requires zlib.Z_SYNC_FLUSH') @@ -526,18 +540,7 @@ def test_odd_flush(self): # Try 17K of data # generate random data stream - try: - # In 2.3 and later, WichmannHill is the RNG of the bug report - gen = random.WichmannHill() - except AttributeError: - try: - # 2.2 called it Random - gen = random.Random() - except AttributeError: - # others might simply have a single RNG - gen = random - gen.seed(1) - data = gen.randbytes(17 * 1024) + data = random.randbytes(17 * 1024) # compress, sync-flush, and decompress first = co.compress(data) @@ -642,7 +645,7 @@ def test_decompress_unused_data(self): self.assertEqual(dco.unconsumed_tail, b'') else: data += dco.decompress( - dco.unconsumed_tail + x[i : i + step], maxlen) + dco.unconsumed_tail + x[i : i + step], maxlen) data += dco.flush() self.assertTrue(dco.eof) self.assertEqual(data, source) @@ -830,16 +833,7 @@ def test_large_unconsumed_tail(self, size): @unittest.expectedFailure def test_wbits(self): # wbits=0 only supported since zlib v1.2.3.5 - # Register "1.2.3" as "1.2.3.0" - # or "1.2.0-linux","1.2.0.f","1.2.0.f-linux" - v = zlib.ZLIB_RUNTIME_VERSION.split('-', 1)[0].split('.') - if len(v) < 4: - v.append('0') - elif not v[-1].isnumeric(): - v[-1] = '0' - - v = tuple(map(int, v)) - supports_wbits_0 = v >= (1, 2, 3, 5) + supports_wbits_0 = ZLIB_RUNTIME_VERSION_TUPLE >= (1, 2, 3, 5) co = zlib.compressobj(level=1, wbits=15) zlib15 = co.compress(HAMLET_SCENE) + co.flush() @@ -965,6 +959,188 @@ def choose_lines(source, number, seed=None, generator=random): Farewell. """ +class ZlibDecompressorTest(unittest.TestCase): + # Test adopted from test_bz2.py + TEXT = HAMLET_SCENE + DATA = zlib.compress(HAMLET_SCENE) + BAD_DATA = b"Not a valid deflate block" + BIG_TEXT = DATA * ((128 * 1024 // len(DATA)) + 1) + BIG_DATA = zlib.compress(BIG_TEXT) + + def test_Constructor(self): + self.assertRaises(TypeError, zlib._ZlibDecompressor, "ASDA") + self.assertRaises(TypeError, zlib._ZlibDecompressor, -15, "notbytes") + self.assertRaises(TypeError, zlib._ZlibDecompressor, -15, b"bytes", 5) + + def testDecompress(self): + zlibd = zlib._ZlibDecompressor() + self.assertRaises(TypeError, zlibd.decompress) + text = zlibd.decompress(self.DATA) + self.assertEqual(text, self.TEXT) + + def testDecompressChunks10(self): + zlibd = zlib._ZlibDecompressor() + text = b'' + n = 0 + while True: + str = self.DATA[n*10:(n+1)*10] + if not str: + break + text += zlibd.decompress(str) + n += 1 + self.assertEqual(text, self.TEXT) + + def testDecompressUnusedData(self): + zlibd = zlib._ZlibDecompressor() + unused_data = b"this is unused data" + text = zlibd.decompress(self.DATA+unused_data) + self.assertEqual(text, self.TEXT) + self.assertEqual(zlibd.unused_data, unused_data) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def testEOFError(self): + zlibd = zlib._ZlibDecompressor() + text = zlibd.decompress(self.DATA) + self.assertRaises(EOFError, zlibd.decompress, b"anything") + self.assertRaises(EOFError, zlibd.decompress, b"") + + @support.skip_if_pgo_task + @bigmemtest(size=_4G + 100, memuse=3.3) + def testDecompress4G(self, size): + # "Test zlib._ZlibDecompressor.decompress() with >4GiB input" + blocksize = min(10 * 1024 * 1024, size) + block = random.randbytes(blocksize) + try: + data = block * ((size-1) // blocksize + 1) + compressed = zlib.compress(data) + zlibd = zlib._ZlibDecompressor() + decompressed = zlibd.decompress(compressed) + self.assertTrue(decompressed == data) + finally: + data = None + compressed = None + decompressed = None + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def testPickle(self): + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + with self.assertRaises(TypeError): + pickle.dumps(zlib._ZlibDecompressor(), proto) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def testDecompressorChunksMaxsize(self): + zlibd = zlib._ZlibDecompressor() + max_length = 100 + out = [] + + # Feed some input + len_ = len(self.BIG_DATA) - 64 + out.append(zlibd.decompress(self.BIG_DATA[:len_], + max_length=max_length)) + self.assertFalse(zlibd.needs_input) + self.assertEqual(len(out[-1]), max_length) + + # Retrieve more data without providing more input + out.append(zlibd.decompress(b'', max_length=max_length)) + self.assertFalse(zlibd.needs_input) + self.assertEqual(len(out[-1]), max_length) + + # Retrieve more data while providing more input + out.append(zlibd.decompress(self.BIG_DATA[len_:], + max_length=max_length)) + self.assertLessEqual(len(out[-1]), max_length) + + # Retrieve remaining uncompressed data + while not zlibd.eof: + out.append(zlibd.decompress(b'', max_length=max_length)) + self.assertLessEqual(len(out[-1]), max_length) + + out = b"".join(out) + self.assertEqual(out, self.BIG_TEXT) + self.assertEqual(zlibd.unused_data, b"") + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_decompressor_inputbuf_1(self): + # Test reusing input buffer after moving existing + # contents to beginning + zlibd = zlib._ZlibDecompressor() + out = [] + + # Create input buffer and fill it + self.assertEqual(zlibd.decompress(self.DATA[:100], + max_length=0), b'') + + # Retrieve some results, freeing capacity at beginning + # of input buffer + out.append(zlibd.decompress(b'', 2)) + + # Add more data that fits into input buffer after + # moving existing data to beginning + out.append(zlibd.decompress(self.DATA[100:105], 15)) + + # Decompress rest of data + out.append(zlibd.decompress(self.DATA[105:])) + self.assertEqual(b''.join(out), self.TEXT) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_decompressor_inputbuf_2(self): + # Test reusing input buffer by appending data at the + # end right away + zlibd = zlib._ZlibDecompressor() + out = [] + + # Create input buffer and empty it + self.assertEqual(zlibd.decompress(self.DATA[:200], + max_length=0), b'') + out.append(zlibd.decompress(b'')) + + # Fill buffer with new data + out.append(zlibd.decompress(self.DATA[200:280], 2)) + + # Append some more data, not enough to require resize + out.append(zlibd.decompress(self.DATA[280:300], 2)) + + # Decompress rest of data + out.append(zlibd.decompress(self.DATA[300:])) + self.assertEqual(b''.join(out), self.TEXT) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_decompressor_inputbuf_3(self): + # Test reusing input buffer after extending it + + zlibd = zlib._ZlibDecompressor() + out = [] + + # Create almost full input buffer + out.append(zlibd.decompress(self.DATA[:200], 5)) + + # Add even more data to it, requiring resize + out.append(zlibd.decompress(self.DATA[200:300], 5)) + + # Decompress rest of data + out.append(zlibd.decompress(self.DATA[300:])) + self.assertEqual(b''.join(out), self.TEXT) + + def test_failure(self): + zlibd = zlib._ZlibDecompressor() + self.assertRaises(Exception, zlibd.decompress, self.BAD_DATA * 30) + # Previously, a second call could crash due to internal inconsistency + self.assertRaises(Exception, zlibd.decompress, self.BAD_DATA * 30) + + @support.refcount_test + def test_refleaks_in___init__(self): + gettotalrefcount = support.get_attribute(sys, 'gettotalrefcount') + zlibd = zlib._ZlibDecompressor() + refs_before = gettotalrefcount() + for i in range(100): + zlibd.__init__() + self.assertAlmostEqual(gettotalrefcount() - refs_before, 0, delta=10) class CustomInt: def __index__(self): diff --git a/stdlib/src/zlib.rs b/stdlib/src/zlib.rs index 37ee1c83f7..83a7535c33 100644 --- a/stdlib/src/zlib.rs +++ b/stdlib/src/zlib.rs @@ -47,6 +47,7 @@ mod zlib { use libz_sys::{ Z_BLOCK, Z_DEFAULT_STRATEGY, Z_FILTERED, Z_FINISH, Z_FIXED, Z_HUFFMAN_ONLY, Z_RLE, Z_TREES, }; + use rustpython_vm::types::Constructor; // copied from zlibmodule.c (commit 530f506ac91338) #[pyattr] @@ -587,4 +588,141 @@ mod zlib { Ok(Self::new(int)) } } + + #[pyattr] + #[pyclass(name = "_ZlibDecompressor")] + #[derive(Debug, PyPayload)] + pub struct ZlibDecompressor { + decompress: PyMutex, + unused_data: PyMutex, + unconsumed_tail: PyMutex, + } + + impl Constructor for ZlibDecompressor { + type Args = (); + + fn py_new(cls: PyTypeRef, _args: Self::Args, vm: &VirtualMachine) -> PyResult { + let decompress = Decompress::new(true); + let zlib_decompressor = Self { + decompress: PyMutex::new(decompress), + unused_data: PyMutex::new(PyBytes::from(vec![]).into_ref(&vm.ctx)), + unconsumed_tail: PyMutex::new(PyBytes::from(vec![]).into_ref(&vm.ctx)), + }; + zlib_decompressor + .into_ref_with_type(vm, cls) + .map(Into::into) + } + } + + #[pyclass(with(Constructor))] + impl ZlibDecompressor { + #[pygetset] + fn unused_data(&self) -> PyBytesRef { + self.unused_data.lock().clone() + } + + #[pygetset] + fn unconsumed_tail(&self) -> PyBytesRef { + self.unconsumed_tail.lock().clone() + } + + fn save_unused_input( + &self, + d: &Decompress, + data: &[u8], + stream_end: bool, + orig_in: u64, + vm: &VirtualMachine, + ) { + let leftover = &data[(d.total_in() - orig_in) as usize..]; + + if stream_end && !leftover.is_empty() { + let mut unused_data = self.unused_data.lock(); + let unused: Vec<_> = unused_data + .as_bytes() + .iter() + .chain(leftover) + .copied() + .collect(); + *unused_data = vm.ctx.new_pyref(unused); + } + } + + #[pymethod] + fn decompress(&self, args: PyBytesRef, vm: &VirtualMachine) -> PyResult> { + // let max_length = args.max_length.value; + // let max_length = (max_length != 0).then_some(max_length); + let max_length = None; + let data = args.as_bytes(); + + let mut d = self.decompress.lock(); + let orig_in = d.total_in(); + + let (ret, stream_end) = + match _decompress(data, &mut d, DEF_BUF_SIZE, max_length, false, vm) { + Ok((buf, true)) => { + // Eof is true + (Ok(buf), true) + } + Ok((buf, false)) => (Ok(buf), false), + Err(err) => (Err(err), false), + }; + self.save_unused_input(&d, data, stream_end, orig_in, vm); + + let leftover = if stream_end { + b"" + } else { + &data[(d.total_in() - orig_in) as usize..] + }; + + let mut unconsumed_tail = self.unconsumed_tail.lock(); + if !leftover.is_empty() || !unconsumed_tail.is_empty() { + *unconsumed_tail = PyBytes::from(leftover.to_owned()).into_ref(&vm.ctx); + } + + ret + } + + #[pymethod] + fn flush(&self, length: OptionalArg, vm: &VirtualMachine) -> PyResult> { + let length = match length { + OptionalArg::Present(l) => { + let l: isize = l.into(); + if l <= 0 { + return Err( + vm.new_value_error("length must be greater than zero".to_owned()) + ); + } else { + l as usize + } + } + OptionalArg::Missing => DEF_BUF_SIZE, + }; + + let mut data = self.unconsumed_tail.lock(); + let mut d = self.decompress.lock(); + + let orig_in = d.total_in(); + + let (ret, stream_end) = match _decompress(&data, &mut d, length, None, true, vm) { + Ok((buf, stream_end)) => (Ok(buf), stream_end), + Err(err) => (Err(err), false), + }; + self.save_unused_input(&d, &data, stream_end, orig_in, vm); + + *data = PyBytes::from(Vec::new()).into_ref(&vm.ctx); + + // TODO: drop the inner decompressor, somehow + // if stream_end { + // + // } + ret + } + + // TODO: Wait for getstate pyslot to be fixed + // #[pyslot] + // fn getstate(zelf: &PyObject, vm: &VirtualMachine) -> PyResult { + // Err(vm.new_type_error("cannot serialize '_ZlibDecompressor' object".to_owned())) + // } + } } diff --git a/vm/src/vm/vm_new.rs b/vm/src/vm/vm_new.rs index ea8fa21c17..55f06e90da 100644 --- a/vm/src/vm/vm_new.rs +++ b/vm/src/vm/vm_new.rs @@ -394,4 +394,9 @@ impl VirtualMachine { obj.as_object(), ) } + + pub fn new_eof_error(&self, msg: String) -> PyBaseExceptionRef { + let eof_error = self.ctx.exceptions.eof_error.to_owned(); + self.new_exception_msg(eof_error, msg) + } }