diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ba72877f..9ca603c5 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -43,6 +43,8 @@ jobs: if: matrix.build != 'mingw' - run: cargo test --features zlib-ng --no-default-features if: matrix.build != 'mingw' + - run: cargo test --features zlib-rs --no-default-features + if: matrix.build != 'mingw' - run: cargo test --features cloudflare_zlib --no-default-features if: matrix.build != 'mingw' - run: | diff --git a/Cargo.toml b/Cargo.toml index b59d2a6d..66cf59e5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "flate2" authors = ["Alex Crichton ", "Josh Triplett "] -version = "1.0.28" +version = "1.0.29" edition = "2018" license = "MIT OR Apache-2.0" readme = "README.md" @@ -19,6 +19,7 @@ and raw deflate streams. [dependencies] libz-sys = { version = "1.1.8", optional = true, default-features = false } libz-ng-sys = { version = "1.1.8", optional = true } +libz-rs-sys = { version = "0.1.1", optional = true, default-features = false, features = ["std", "rust-allocator"] } cloudflare-zlib-sys = { version = "0.3.0", optional = true } miniz_oxide = { version = "0.7.1", optional = true, default-features = false, features = ["with-alloc"] } crc32fast = "1.2.0" @@ -38,6 +39,7 @@ zlib = ["any_zlib", "libz-sys"] zlib-default = ["any_zlib", "libz-sys/default"] zlib-ng-compat = ["zlib", "libz-sys/zlib-ng"] zlib-ng = ["any_zlib", "libz-ng-sys"] +zlib-rs = ["any_zlib", "libz-rs-sys"] cloudflare_zlib = ["any_zlib", "cloudflare-zlib-sys"] rust_backend = ["miniz_oxide", "any_impl"] miniz-sys = ["rust_backend"] # For backwards compatibility diff --git a/src/deflate/bufread.rs b/src/deflate/bufread.rs index c70a6303..65358374 100644 --- a/src/deflate/bufread.rs +++ b/src/deflate/bufread.rs @@ -243,3 +243,50 @@ impl Write for DeflateDecoder { self.get_mut().flush() } } + +#[cfg(test)] +mod test { + use crate::bufread::DeflateDecoder; + use crate::deflate::write; + use crate::Compression; + use std::io::{Read, Write}; + + // DeflateDecoder consumes one deflate archive and then returns 0 for subsequent reads, allowing any + // additional data to be consumed by the caller. + #[test] + fn decode_extra_data() { + let expected = "Hello World"; + + let compressed = { + let mut e = write::DeflateEncoder::new(Vec::new(), Compression::default()); + e.write(expected.as_ref()).unwrap(); + let mut b = e.finish().unwrap(); + b.push(b'x'); + b + }; + + let mut output = Vec::new(); + let mut decoder = DeflateDecoder::new(compressed.as_slice()); + let decoded_bytes = decoder.read_to_end(&mut output).unwrap(); + assert_eq!(decoded_bytes, output.len()); + let actual = std::str::from_utf8(&output).expect("String parsing error"); + assert_eq!( + actual, expected, + "after decompression we obtain the original input" + ); + + output.clear(); + assert_eq!( + decoder.read(&mut output).unwrap(), + 0, + "subsequent read of decoder returns 0, but inner reader can return additional data" + ); + let mut reader = decoder.into_inner(); + assert_eq!( + reader.read_to_end(&mut output).unwrap(), + 1, + "extra data is accessible in underlying buf-read" + ); + assert_eq!(output, b"x"); + } +} diff --git a/src/deflate/write.rs b/src/deflate/write.rs index 2c44556a..0bd8502c 100644 --- a/src/deflate/write.rs +++ b/src/deflate/write.rs @@ -320,3 +320,43 @@ impl Read for DeflateDecoder { self.inner.get_mut().read(buf) } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::Compression; + + const STR: &str = "Hello World Hello World Hello World Hello World Hello World \ + Hello World Hello World Hello World Hello World Hello World \ + Hello World Hello World Hello World Hello World Hello World \ + Hello World Hello World Hello World Hello World Hello World \ + Hello World Hello World Hello World Hello World Hello World"; + + // DeflateDecoder consumes one zlib archive and then returns 0 for subsequent writes, allowing any + // additional data to be consumed by the caller. + #[test] + fn decode_extra_data() { + let compressed = { + let mut e = DeflateEncoder::new(Vec::new(), Compression::default()); + e.write(STR.as_ref()).unwrap(); + let mut b = e.finish().unwrap(); + b.push(b'x'); + b + }; + + let mut writer = Vec::new(); + let mut decoder = DeflateDecoder::new(writer); + let mut consumed_bytes = 0; + loop { + let n = decoder.write(&compressed[consumed_bytes..]).unwrap(); + if n == 0 { + break; + } + consumed_bytes += n; + } + writer = decoder.finish().unwrap(); + let actual = String::from_utf8(writer).expect("String parsing error"); + assert_eq!(actual, STR); + assert_eq!(&compressed[consumed_bytes..], b"x"); + } +} diff --git a/src/ffi/c.rs b/src/ffi/c.rs index 32864f8f..75d69b14 100644 --- a/src/ffi/c.rs +++ b/src/ffi/c.rs @@ -1,15 +1,12 @@ //! Implementation for C backends. -use std::alloc::{self, Layout}; use std::cmp; -use std::convert::TryFrom; use std::fmt; use std::marker; -use std::ops::{Deref, DerefMut}; -use std::os::raw::{c_int, c_uint, c_void}; +use std::os::raw::{c_int, c_uint}; use std::ptr; use super::*; -use crate::mem::{self, FlushDecompress, Status}; +use crate::mem; #[derive(Default)] pub struct ErrorMessage(Option<&'static str>); @@ -21,7 +18,10 @@ impl ErrorMessage { } pub struct StreamWrapper { - pub inner: Box, + // SAFETY: The field `inner` must always be accessed as a raw pointer, + // since it points to a cyclic structure, and it must never be copied + // by Rust. + pub inner: *mut mz_stream, } impl fmt::Debug for StreamWrapper { @@ -32,8 +32,12 @@ impl fmt::Debug for StreamWrapper { impl Default for StreamWrapper { fn default() -> StreamWrapper { + // SAFETY: The field `state` will be initialized across the FFI to + // point to the opaque type `mz_internal_state`, which will contain a copy + // of `inner`. This cyclic structure breaks the uniqueness invariant of + // &mut mz_stream, so we must use a raw pointer instead of Box. StreamWrapper { - inner: Box::new(mz_stream { + inner: Box::into_raw(Box::new(mz_stream { next_in: ptr::null_mut(), avail_in: 0, total_in: 0, @@ -46,81 +50,100 @@ impl Default for StreamWrapper { reserved: 0, opaque: ptr::null_mut(), state: ptr::null_mut(), - #[cfg(all(feature = "any_zlib", not(feature = "cloudflare-zlib-sys")))] - zalloc, - #[cfg(all(feature = "any_zlib", not(feature = "cloudflare-zlib-sys")))] - zfree, - #[cfg(not(all(feature = "any_zlib", not(feature = "cloudflare-zlib-sys"))))] - zalloc: Some(zalloc), - #[cfg(not(all(feature = "any_zlib", not(feature = "cloudflare-zlib-sys"))))] - zfree: Some(zfree), - }), + #[cfg(all( + feature = "any_zlib", + not(any(feature = "cloudflare-zlib-sys", feature = "libz-rs-sys")) + ))] + zalloc: allocator::zalloc, + #[cfg(all( + feature = "any_zlib", + not(any(feature = "cloudflare-zlib-sys", feature = "libz-rs-sys")) + ))] + zfree: allocator::zfree, + + #[cfg(all(feature = "any_zlib", feature = "cloudflare-zlib-sys"))] + zalloc: Some(allocator::zalloc), + #[cfg(all(feature = "any_zlib", feature = "cloudflare-zlib-sys"))] + zfree: Some(allocator::zfree), + + // for zlib-rs, it is most efficient to have it provide the allocator. + // The libz-rs-sys dependency is configured to use the rust system allocator + #[cfg(all(feature = "any_zlib", feature = "libz-rs-sys"))] + zalloc: None, + #[cfg(all(feature = "any_zlib", feature = "libz-rs-sys"))] + zfree: None, + })), } } } -const ALIGN: usize = std::mem::align_of::(); - -fn align_up(size: usize, align: usize) -> usize { - (size + align - 1) & !(align - 1) -} - -extern "C" fn zalloc(_ptr: *mut c_void, items: AllocSize, item_size: AllocSize) -> *mut c_void { - // We need to multiply `items` and `item_size` to get the actual desired - // allocation size. Since `zfree` doesn't receive a size argument we - // also need to allocate space for a `usize` as a header so we can store - // how large the allocation is to deallocate later. - let size = match items - .checked_mul(item_size) - .and_then(|i| usize::try_from(i).ok()) - .map(|size| align_up(size, ALIGN)) - .and_then(|i| i.checked_add(std::mem::size_of::())) - { - Some(i) => i, - None => return ptr::null_mut(), - }; - - // Make sure the `size` isn't too big to fail `Layout`'s restrictions - let layout = match Layout::from_size_align(size, ALIGN) { - Ok(layout) => layout, - Err(_) => return ptr::null_mut(), - }; - - unsafe { - // Allocate the data, and if successful store the size we allocated - // at the beginning and then return an offset pointer. - let ptr = alloc::alloc(layout) as *mut usize; - if ptr.is_null() { - return ptr as *mut c_void; - } - *ptr = size; - ptr.add(1) as *mut c_void +impl Drop for StreamWrapper { + fn drop(&mut self) { + // SAFETY: At this point, every other allocation for struct has been freed by + // `inflateEnd` or `deflateEnd`, and no copies of `inner` are retained by `C`, + // so it is safe to drop the struct as long as the user respects the invariant that + // `inner` must never be copied by Rust. + drop(unsafe { Box::from_raw(self.inner) }); } } -extern "C" fn zfree(_ptr: *mut c_void, address: *mut c_void) { - unsafe { - // Move our address being freed back one pointer, read the size we - // stored in `zalloc`, and then free it using the standard Rust - // allocator. - let ptr = (address as *mut usize).offset(-1); - let size = *ptr; - let layout = Layout::from_size_align_unchecked(size, ALIGN); - alloc::dealloc(ptr as *mut u8, layout) +#[cfg(all(feature = "any_zlib", not(feature = "libz-rs-sys")))] +mod allocator { + use super::*; + + use std::alloc::{self, Layout}; + use std::convert::TryFrom; + use std::os::raw::c_void; + + const ALIGN: usize = std::mem::align_of::(); + + fn align_up(size: usize, align: usize) -> usize { + (size + align - 1) & !(align - 1) } -} -impl Deref for StreamWrapper { - type Target = mz_stream; + pub extern "C" fn zalloc(_ptr: *mut c_void, items: uInt, item_size: uInt) -> *mut c_void { + // We need to multiply `items` and `item_size` to get the actual desired + // allocation size. Since `zfree` doesn't receive a size argument we + // also need to allocate space for a `usize` as a header so we can store + // how large the allocation is to deallocate later. + let size = match items + .checked_mul(item_size) + .and_then(|i| usize::try_from(i).ok()) + .map(|size| align_up(size, ALIGN)) + .and_then(|i| i.checked_add(std::mem::size_of::())) + { + Some(i) => i, + None => return ptr::null_mut(), + }; + + // Make sure the `size` isn't too big to fail `Layout`'s restrictions + let layout = match Layout::from_size_align(size, ALIGN) { + Ok(layout) => layout, + Err(_) => return ptr::null_mut(), + }; - fn deref(&self) -> &Self::Target { - &*self.inner + unsafe { + // Allocate the data, and if successful store the size we allocated + // at the beginning and then return an offset pointer. + let ptr = alloc::alloc(layout) as *mut usize; + if ptr.is_null() { + return ptr as *mut c_void; + } + *ptr = size; + ptr.add(1) as *mut c_void + } } -} -impl DerefMut for StreamWrapper { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut *self.inner + pub extern "C" fn zfree(_ptr: *mut c_void, address: *mut c_void) { + unsafe { + // Move our address being freed back one pointer, read the size we + // stored in `zalloc`, and then free it using the standard Rust + // allocator. + let ptr = (address as *mut usize).offset(-1); + let size = *ptr; + let layout = Layout::from_size_align_unchecked(size, ALIGN); + alloc::dealloc(ptr as *mut u8, layout) + } } } @@ -148,7 +171,10 @@ pub struct Stream { impl Stream { pub fn msg(&self) -> ErrorMessage { - let msg = self.stream_wrapper.msg; + // SAFETY: The field `inner` must always be accessed as a raw pointer, + // since it points to a cyclic structure. No copies of `inner` can be + // retained for longer than the lifetime of `self`. + let msg = unsafe { (*self.stream_wrapper.inner).msg }; ErrorMessage(if msg.is_null() { None } else { @@ -161,7 +187,7 @@ impl Stream { impl Drop for Stream { fn drop(&mut self) { unsafe { - let _ = D::destroy(&mut *self.stream_wrapper); + let _ = D::destroy(self.stream_wrapper.inner); } } } @@ -185,9 +211,9 @@ pub struct Inflate { impl InflateBackend for Inflate { fn make(zlib_header: bool, window_bits: u8) -> Self { unsafe { - let mut state = StreamWrapper::default(); + let state = StreamWrapper::default(); let ret = mz_inflateInit2( - &mut *state, + state.inner, if zlib_header { window_bits as c_int } else { @@ -212,33 +238,38 @@ impl InflateBackend for Inflate { output: &mut [u8], flush: FlushDecompress, ) -> Result { - let raw = &mut *self.inner.stream_wrapper; - raw.msg = ptr::null_mut(); - raw.next_in = input.as_ptr() as *mut u8; - raw.avail_in = cmp::min(input.len(), c_uint::MAX as usize) as c_uint; - raw.next_out = output.as_mut_ptr(); - raw.avail_out = cmp::min(output.len(), c_uint::MAX as usize) as c_uint; - - let rc = unsafe { mz_inflate(raw, flush as c_int) }; - - // Unfortunately the total counters provided by zlib might be only - // 32 bits wide and overflow while processing large amounts of data. - self.inner.total_in += (raw.next_in as usize - input.as_ptr() as usize) as u64; - self.inner.total_out += (raw.next_out as usize - output.as_ptr() as usize) as u64; - - // reset these pointers so we don't accidentally read them later - raw.next_in = ptr::null_mut(); - raw.avail_in = 0; - raw.next_out = ptr::null_mut(); - raw.avail_out = 0; - - match rc { - MZ_DATA_ERROR | MZ_STREAM_ERROR => mem::decompress_failed(self.inner.msg()), - MZ_OK => Ok(Status::Ok), - MZ_BUF_ERROR => Ok(Status::BufError), - MZ_STREAM_END => Ok(Status::StreamEnd), - MZ_NEED_DICT => mem::decompress_need_dict(raw.adler as u32), - c => panic!("unknown return code: {}", c), + let raw = self.inner.stream_wrapper.inner; + // SAFETY: The field `inner` must always be accessed as a raw pointer, + // since it points to a cyclic structure. No copies of `inner` can be + // retained for longer than the lifetime of `self`. + unsafe { + (*raw).msg = ptr::null_mut(); + (*raw).next_in = input.as_ptr() as *mut u8; + (*raw).avail_in = cmp::min(input.len(), c_uint::MAX as usize) as c_uint; + (*raw).next_out = output.as_mut_ptr(); + (*raw).avail_out = cmp::min(output.len(), c_uint::MAX as usize) as c_uint; + + let rc = mz_inflate(raw, flush as c_int); + + // Unfortunately the total counters provided by zlib might be only + // 32 bits wide and overflow while processing large amounts of data. + self.inner.total_in += ((*raw).next_in as usize - input.as_ptr() as usize) as u64; + self.inner.total_out += ((*raw).next_out as usize - output.as_ptr() as usize) as u64; + + // reset these pointers so we don't accidentally read them later + (*raw).next_in = ptr::null_mut(); + (*raw).avail_in = 0; + (*raw).next_out = ptr::null_mut(); + (*raw).avail_out = 0; + + match rc { + MZ_DATA_ERROR | MZ_STREAM_ERROR => mem::decompress_failed(self.inner.msg()), + MZ_OK => Ok(Status::Ok), + MZ_BUF_ERROR => Ok(Status::BufError), + MZ_STREAM_END => Ok(Status::StreamEnd), + MZ_NEED_DICT => mem::decompress_need_dict((*raw).adler as u32), + c => panic!("unknown return code: {}", c), + } } } @@ -249,7 +280,7 @@ impl InflateBackend for Inflate { -MZ_DEFAULT_WINDOW_BITS }; unsafe { - inflateReset2(&mut *self.inner.stream_wrapper, bits); + inflateReset2(self.inner.stream_wrapper.inner, bits); } self.inner.total_out = 0; self.inner.total_in = 0; @@ -276,9 +307,9 @@ pub struct Deflate { impl DeflateBackend for Deflate { fn make(level: Compression, zlib_header: bool, window_bits: u8) -> Self { unsafe { - let mut state = StreamWrapper::default(); + let state = StreamWrapper::default(); let ret = mz_deflateInit2( - &mut *state, + state.inner, level.0 as c_int, MZ_DEFLATED, if zlib_header { @@ -306,39 +337,44 @@ impl DeflateBackend for Deflate { output: &mut [u8], flush: FlushCompress, ) -> Result { - let raw = &mut *self.inner.stream_wrapper; - raw.msg = ptr::null_mut(); - raw.next_in = input.as_ptr() as *mut _; - raw.avail_in = cmp::min(input.len(), c_uint::MAX as usize) as c_uint; - raw.next_out = output.as_mut_ptr(); - raw.avail_out = cmp::min(output.len(), c_uint::MAX as usize) as c_uint; - - let rc = unsafe { mz_deflate(raw, flush as c_int) }; - - // Unfortunately the total counters provided by zlib might be only - // 32 bits wide and overflow while processing large amounts of data. - self.inner.total_in += (raw.next_in as usize - input.as_ptr() as usize) as u64; - self.inner.total_out += (raw.next_out as usize - output.as_ptr() as usize) as u64; - - // reset these pointers so we don't accidentally read them later - raw.next_in = ptr::null_mut(); - raw.avail_in = 0; - raw.next_out = ptr::null_mut(); - raw.avail_out = 0; - - match rc { - MZ_OK => Ok(Status::Ok), - MZ_BUF_ERROR => Ok(Status::BufError), - MZ_STREAM_END => Ok(Status::StreamEnd), - MZ_STREAM_ERROR => mem::compress_failed(self.inner.msg()), - c => panic!("unknown return code: {}", c), + let raw = self.inner.stream_wrapper.inner; + // SAFETY: The field `inner` must always be accessed as a raw pointer, + // since it points to a cyclic structure. No copies of `inner` can be + // retained for longer than the lifetime of `self`. + unsafe { + (*raw).msg = ptr::null_mut(); + (*raw).next_in = input.as_ptr() as *mut _; + (*raw).avail_in = cmp::min(input.len(), c_uint::MAX as usize) as c_uint; + (*raw).next_out = output.as_mut_ptr(); + (*raw).avail_out = cmp::min(output.len(), c_uint::MAX as usize) as c_uint; + + let rc = mz_deflate(raw, flush as c_int); + + // Unfortunately the total counters provided by zlib might be only + // 32 bits wide and overflow while processing large amounts of data. + + self.inner.total_in += ((*raw).next_in as usize - input.as_ptr() as usize) as u64; + self.inner.total_out += ((*raw).next_out as usize - output.as_ptr() as usize) as u64; + // reset these pointers so we don't accidentally read them later + (*raw).next_in = ptr::null_mut(); + (*raw).avail_in = 0; + (*raw).next_out = ptr::null_mut(); + (*raw).avail_out = 0; + + match rc { + MZ_OK => Ok(Status::Ok), + MZ_BUF_ERROR => Ok(Status::BufError), + MZ_STREAM_END => Ok(Status::StreamEnd), + MZ_STREAM_ERROR => mem::compress_failed(self.inner.msg()), + c => panic!("unknown return code: {}", c), + } } } fn reset(&mut self) { self.inner.total_in = 0; self.inner.total_out = 0; - let rc = unsafe { mz_deflateReset(&mut *self.inner.stream_wrapper) }; + let rc = unsafe { mz_deflateReset(self.inner.stream_wrapper.inner) }; assert_eq!(rc, MZ_OK); } } @@ -359,6 +395,7 @@ pub use self::c_backend::*; /// For backwards compatibility, we provide symbols as `mz_` to mimic the miniz API #[allow(bad_style)] +#[allow(unused_imports)] mod c_backend { use std::mem; use std::os::raw::{c_char, c_int}; @@ -366,10 +403,17 @@ mod c_backend { #[cfg(feature = "zlib-ng")] use libz_ng_sys as libz; + #[cfg(feature = "zlib-rs")] + use libz_rs_sys as libz; + #[cfg(all(not(feature = "zlib-ng"), feature = "cloudflare_zlib"))] use cloudflare_zlib_sys as libz; - #[cfg(all(not(feature = "cloudflare_zlib"), not(feature = "zlib-ng")))] + #[cfg(all( + not(feature = "cloudflare_zlib"), + not(feature = "zlib-ng"), + not(feature = "zlib-rs") + ))] use libz_sys as libz; pub use libz::deflate as mz_deflate; @@ -394,13 +438,14 @@ mod c_backend { pub use libz::Z_STREAM_END as MZ_STREAM_END; pub use libz::Z_STREAM_ERROR as MZ_STREAM_ERROR; pub use libz::Z_SYNC_FLUSH as MZ_SYNC_FLUSH; - pub type AllocSize = libz::uInt; pub const MZ_DEFAULT_WINDOW_BITS: c_int = 15; #[cfg(feature = "zlib-ng")] const ZLIB_VERSION: &'static str = "2.1.0.devel\0"; - #[cfg(not(feature = "zlib-ng"))] + #[cfg(feature = "zlib-rs")] + const ZLIB_VERSION: &'static str = "0.1.0\0"; + #[cfg(not(any(feature = "zlib-ng", feature = "zlib-rs")))] const ZLIB_VERSION: &'static str = "1.2.8\0"; pub unsafe extern "C" fn mz_deflateInit2( diff --git a/src/mem.rs b/src/mem.rs index d4a50917..86fa8d3b 100644 --- a/src/mem.rs +++ b/src/mem.rs @@ -265,16 +265,19 @@ impl Compress { /// Returns the Adler-32 checksum of the dictionary. #[cfg(feature = "any_zlib")] pub fn set_dictionary(&mut self, dictionary: &[u8]) -> Result { - let stream = &mut *self.inner.inner.stream_wrapper; - stream.msg = std::ptr::null_mut(); + // SAFETY: The field `inner` must always be accessed as a raw pointer, + // since it points to a cyclic structure. No copies of `inner` can be + // retained for longer than the lifetime of `self.inner.inner.stream_wrapper`. + let stream = self.inner.inner.stream_wrapper.inner; let rc = unsafe { + (*stream).msg = std::ptr::null_mut(); assert!(dictionary.len() < ffi::uInt::MAX as usize); ffi::deflateSetDictionary(stream, dictionary.as_ptr(), dictionary.len() as ffi::uInt) }; match rc { ffi::MZ_STREAM_ERROR => compress_failed(self.inner.inner.msg()), - ffi::MZ_OK => Ok(stream.adler as u32), + ffi::MZ_OK => Ok(unsafe { (*stream).adler } as u32), c => panic!("unknown return code: {}", c), } } @@ -299,9 +302,13 @@ impl Compress { #[cfg(feature = "any_zlib")] pub fn set_level(&mut self, level: Compression) -> Result<(), CompressError> { use std::os::raw::c_int; - let stream = &mut *self.inner.inner.stream_wrapper; - stream.msg = std::ptr::null_mut(); - + // SAFETY: The field `inner` must always be accessed as a raw pointer, + // since it points to a cyclic structure. No copies of `inner` can be + // retained for longer than the lifetime of `self.inner.inner.stream_wrapper`. + let stream = self.inner.inner.stream_wrapper.inner; + unsafe { + (*stream).msg = std::ptr::null_mut(); + } let rc = unsafe { ffi::deflateParams(stream, level.0 as c_int, ffi::MZ_DEFAULT_STRATEGY) }; match rc { @@ -476,17 +483,20 @@ impl Decompress { /// Specifies the decompression dictionary to use. #[cfg(feature = "any_zlib")] pub fn set_dictionary(&mut self, dictionary: &[u8]) -> Result { - let stream = &mut *self.inner.inner.stream_wrapper; - stream.msg = std::ptr::null_mut(); + // SAFETY: The field `inner` must always be accessed as a raw pointer, + // since it points to a cyclic structure. No copies of `inner` can be + // retained for longer than the lifetime of `self.inner.inner.stream_wrapper`. + let stream = self.inner.inner.stream_wrapper.inner; let rc = unsafe { + (*stream).msg = std::ptr::null_mut(); assert!(dictionary.len() < ffi::uInt::MAX as usize); ffi::inflateSetDictionary(stream, dictionary.as_ptr(), dictionary.len() as ffi::uInt) }; match rc { ffi::MZ_STREAM_ERROR => decompress_failed(self.inner.inner.msg()), - ffi::MZ_DATA_ERROR => decompress_need_dict(stream.adler as u32), - ffi::MZ_OK => Ok(stream.adler as u32), + ffi::MZ_DATA_ERROR => decompress_need_dict(unsafe { (*stream).adler } as u32), + ffi::MZ_OK => Ok(unsafe { (*stream).adler } as u32), c => panic!("unknown return code: {}", c), } } diff --git a/src/zlib/bufread.rs b/src/zlib/bufread.rs index 85bbd38a..da7ed95e 100644 --- a/src/zlib/bufread.rs +++ b/src/zlib/bufread.rs @@ -251,3 +251,50 @@ impl Write for ZlibDecoder { self.get_mut().flush() } } + +#[cfg(test)] +mod test { + use crate::bufread::ZlibDecoder; + use crate::zlib::write; + use crate::Compression; + use std::io::{Read, Write}; + + // ZlibDecoder consumes one zlib archive and then returns 0 for subsequent reads, allowing any + // additional data to be consumed by the caller. + #[test] + fn decode_extra_data() { + let expected = "Hello World"; + + let compressed = { + let mut e = write::ZlibEncoder::new(Vec::new(), Compression::default()); + e.write(expected.as_ref()).unwrap(); + let mut b = e.finish().unwrap(); + b.push(b'x'); + b + }; + + let mut output = Vec::new(); + let mut decoder = ZlibDecoder::new(compressed.as_slice()); + let decoded_bytes = decoder.read_to_end(&mut output).unwrap(); + assert_eq!(decoded_bytes, output.len()); + let actual = std::str::from_utf8(&output).expect("String parsing error"); + assert_eq!( + actual, expected, + "after decompression we obtain the original input" + ); + + output.clear(); + assert_eq!( + decoder.read(&mut output).unwrap(), + 0, + "subsequent read of decoder returns 0, but inner reader can return additional data" + ); + let mut reader = decoder.into_inner(); + assert_eq!( + reader.read_to_end(&mut output).unwrap(), + 1, + "extra data is accessible in underlying buf-read" + ); + assert_eq!(output, b"x"); + } +} diff --git a/src/zlib/write.rs b/src/zlib/write.rs index d8ad2f26..64c2c872 100644 --- a/src/zlib/write.rs +++ b/src/zlib/write.rs @@ -338,3 +338,43 @@ impl Read for ZlibDecoder { self.inner.get_mut().read(buf) } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::Compression; + + const STR: &str = "Hello World Hello World Hello World Hello World Hello World \ + Hello World Hello World Hello World Hello World Hello World \ + Hello World Hello World Hello World Hello World Hello World \ + Hello World Hello World Hello World Hello World Hello World \ + Hello World Hello World Hello World Hello World Hello World"; + + // ZlibDecoder consumes one zlib archive and then returns 0 for subsequent writes, allowing any + // additional data to be consumed by the caller. + #[test] + fn decode_extra_data() { + let compressed = { + let mut e = ZlibEncoder::new(Vec::new(), Compression::default()); + e.write(STR.as_ref()).unwrap(); + let mut b = e.finish().unwrap(); + b.push(b'x'); + b + }; + + let mut writer = Vec::new(); + let mut decoder = ZlibDecoder::new(writer); + let mut consumed_bytes = 0; + loop { + let n = decoder.write(&compressed[consumed_bytes..]).unwrap(); + if n == 0 { + break; + } + consumed_bytes += n; + } + writer = decoder.finish().unwrap(); + let actual = String::from_utf8(writer).expect("String parsing error"); + assert_eq!(actual, STR); + assert_eq!(&compressed[consumed_bytes..], b"x"); + } +}