From 396812d66c60b87fee4d8aa50da419e317904ce2 Mon Sep 17 00:00:00 2001 From: Dennis Zhuang Date: Tue, 14 Jun 2022 16:29:21 +0900 Subject: [PATCH 01/10] Refactor byte operations --- vm/src/builtins/bytearray.rs | 6 +++--- vm/src/byte.rs | 27 +++++++++++++++++++++++++++ vm/src/bytesinner.rs | 24 +----------------------- vm/src/lib.rs | 1 + 4 files changed, 32 insertions(+), 26 deletions(-) create mode 100644 vm/src/byte.rs diff --git a/vm/src/builtins/bytearray.rs b/vm/src/builtins/bytearray.rs index 368227eb6b..7104afc77e 100644 --- a/vm/src/builtins/bytearray.rs +++ b/vm/src/builtins/bytearray.rs @@ -5,10 +5,10 @@ use super::{ }; use crate::{ anystr::{self, AnyStr}, + byte::{bytes_from_object, value_from_object}, bytesinner::{ - bytes_decode, bytes_from_object, value_from_object, ByteInnerFindOptions, - ByteInnerNewOptions, ByteInnerPaddingOptions, ByteInnerSplitOptions, - ByteInnerTranslateOptions, DecodeArgs, PyBytesInner, + bytes_decode, ByteInnerFindOptions, ByteInnerNewOptions, ByteInnerPaddingOptions, + ByteInnerSplitOptions, ByteInnerTranslateOptions, DecodeArgs, PyBytesInner, }, class::PyClassImpl, common::{ diff --git a/vm/src/byte.rs b/vm/src/byte.rs new file mode 100644 index 0000000000..4933b2b3a4 --- /dev/null +++ b/vm/src/byte.rs @@ -0,0 +1,27 @@ +//! byte operation APIs +use crate::object::AsObject; +use crate::{PyObject, PyResult, VirtualMachine}; +use num_traits::ToPrimitive; + +pub fn bytes_from_object(vm: &VirtualMachine, obj: &PyObject) -> PyResult> { + if let Ok(elements) = obj.try_bytes_like(vm, |bytes| bytes.to_vec()) { + return Ok(elements); + } + + if !obj.fast_isinstance(vm.ctx.types.str_type) { + if let Ok(elements) = vm.map_iterable_object(obj, |x| value_from_object(vm, &x)) { + return elements; + } + } + + Err(vm.new_type_error( + "can assign only bytes, buffers, or iterables of ints in range(0, 256)".to_owned(), + )) +} + +pub fn value_from_object(vm: &VirtualMachine, obj: &PyObject) -> PyResult { + vm.to_index(obj)? + .as_bigint() + .to_u8() + .ok_or_else(|| vm.new_value_error("byte must be in range(0, 256)".to_owned())) +} diff --git a/vm/src/bytesinner.rs b/vm/src/bytesinner.rs index b3063d3cec..14667a69e8 100644 --- a/vm/src/bytesinner.rs +++ b/vm/src/bytesinner.rs @@ -3,6 +3,7 @@ use crate::{ builtins::{ pystr, PyByteArray, PyBytes, PyBytesRef, PyInt, PyIntRef, PyStr, PyStrRef, PyTypeRef, }, + byte::bytes_from_object, cformat::CFormatBytes, function::{ArgIterable, Either, OptionalArg, OptionalOption, PyComparisonValue}, identifier, @@ -1207,26 +1208,3 @@ pub fn bytes_to_hex( pub const fn is_py_ascii_whitespace(b: u8) -> bool { matches!(b, b'\t' | b'\n' | b'\x0C' | b'\r' | b' ' | b'\x0B') } - -pub fn bytes_from_object(vm: &VirtualMachine, obj: &PyObject) -> PyResult> { - if let Ok(elements) = obj.try_bytes_like(vm, |bytes| bytes.to_vec()) { - return Ok(elements); - } - - if !obj.fast_isinstance(vm.ctx.types.str_type) { - if let Ok(elements) = vm.map_iterable_object(obj, |x| value_from_object(vm, &x)) { - return elements; - } - } - - Err(vm.new_type_error( - "can assign only bytes, buffers, or iterables of ints in range(0, 256)".to_owned(), - )) -} - -pub fn value_from_object(vm: &VirtualMachine, obj: &PyObject) -> PyResult { - vm.to_index(obj)? - .as_bigint() - .to_u8() - .ok_or_else(|| vm.new_value_error("byte must be in range(0, 256)".to_owned())) -} diff --git a/vm/src/lib.rs b/vm/src/lib.rs index a3b86dd626..95ee6e14df 100644 --- a/vm/src/lib.rs +++ b/vm/src/lib.rs @@ -41,6 +41,7 @@ pub(crate) mod macros; mod anystr; pub mod buffer; pub mod builtins; +pub mod byte; mod bytesinner; pub mod cformat; pub mod class; From cc4583ef505ff69fcb9463f901f42b5e11f85de6 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Wed, 9 Mar 2022 01:07:36 +0900 Subject: [PATCH 02/10] mmap skeleton --- Cargo.lock | 21 ++++++ stdlib/Cargo.toml | 2 + stdlib/src/lib.rs | 3 + stdlib/src/mmap.rs | 156 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 182 insertions(+) create mode 100644 stdlib/src/mmap.rs diff --git a/Cargo.lock b/Cargo.lock index be1bdd5402..072d12200d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -996,6 +996,15 @@ version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" +[[package]] +name = "memmap2" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "057a3db23999c867821a7a59feb06a578fcb03685e983dff90daf9e7d24ac08f" +dependencies = [ + "libc", +] + [[package]] name = "memoffset" version = "0.6.5" @@ -1190,6 +1199,16 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "978aa494585d3ca4ad74929863093e87cac9790d81fe7aba2b3dc2890643a0fc" +[[package]] +name = "page_size" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eebde548fbbf1ea81a99b128872779c437752fb99f217c45245e1a61dcd9edcd" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "parking_lot" version = "0.12.0" @@ -1719,6 +1738,7 @@ dependencies = [ "libz-sys", "md-5", "memchr", + "memmap2", "mt19937", "nix", "num-bigint", @@ -1729,6 +1749,7 @@ dependencies = [ "openssl", "openssl-probe", "openssl-sys", + "page_size", "paste", "puruspe", "rand", diff --git a/stdlib/Cargo.toml b/stdlib/Cargo.toml index 5e91278667..bbb22e8624 100644 --- a/stdlib/Cargo.toml +++ b/stdlib/Cargo.toml @@ -63,6 +63,8 @@ ahash = "0.7.6" libz-sys = { version = "1.1.5", optional = true } num_enum = "0.5.7" ascii = "1.0.0" +memmap2 = "0.5.0" +page_size = "0.4.2" [target.'cfg(all(unix, not(target_os = "redox")))'.dependencies] termios = "0.3.3" diff --git a/stdlib/src/lib.rs b/stdlib/src/lib.rs index 44ca0ef941..6159b6a24f 100644 --- a/stdlib/src/lib.rs +++ b/stdlib/src/lib.rs @@ -16,6 +16,8 @@ mod gc; mod hashlib; mod json; mod math; +#[cfg(unix)] +mod mmap; mod platform; mod pyexpat; mod pystruct; @@ -125,6 +127,7 @@ pub fn get_module_inits() -> impl Iterator, StdlibInit { "_posixsubprocess" => posixsubprocess::make_module, "syslog" => syslog::make_module, + "mmap" => mmap::make_module, } #[cfg(target_os = "macos")] { diff --git a/stdlib/src/mmap.rs b/stdlib/src/mmap.rs new file mode 100644 index 0000000000..4cda3395a2 --- /dev/null +++ b/stdlib/src/mmap.rs @@ -0,0 +1,156 @@ +pub(crate) use mmap::make_module; + +#[pymodule] +mod mmap { + use crate::vm::{ + builtins::PyTypeRef, convert::ToPyResult, function::OptionalArg, types::Constructor, + FromArgs, PyObject, PyPayload, PyResult, TryFromBorrowedObject, VirtualMachine, + }; + use memmap2::{MmapMut, MmapOptions}; + + #[repr(C)] + #[derive(PartialEq, Eq, Debug)] + enum AccessMode { + Default = 0, + Read = 1, + Write = 2, + Copy = 3, + } + + impl TryFromBorrowedObject for AccessMode { + fn try_from_borrowed_object(vm: &VirtualMachine, obj: &PyObject) -> PyResult { + let i = u32::try_from_borrowed_object(vm, obj)?; + Ok(match i { + 0 => Self::Default, + 1 => Self::Read, + 2 => Self::Write, + 3 => Self::Copy, + _ => return Err(vm.new_value_error("Not a valid AccessMode value".to_owned())), + }) + } + } + + #[pyattr] + use libc::{MAP_ANON, MAP_ANONYMOUS, MAP_PRIVATE, MAP_SHARED, PROT_READ, PROT_WRITE}; + #[pyattr] + const ACCESS_DEFAULT: u32 = AccessMode::Default as u32; + #[pyattr] + const ACCESS_READ: u32 = AccessMode::Read as u32; + #[pyattr] + const ACCESS_WRITE: u32 = AccessMode::Write as u32; + #[pyattr] + const ACCESS_COPY: u32 = AccessMode::Copy as u32; + + #[pyattr(name = "PAGESIZE")] + fn pagesize(vm: &VirtualMachine) -> usize { + page_size::get() + } + + #[pyattr] + #[pyclass(name = "mmap")] + #[derive(Debug, PyPayload)] + struct PyMmap { + mmap: MmapMut, + exports: usize, + // PyObject *weakreflist; + access: AccessMode, + } + + #[derive(FromArgs)] + struct MmapNewArgs { + #[pyarg(any)] + fileno: std::os::unix::io::RawFd, + #[pyarg(any)] + length: isize, + #[pyarg(any, default = "MAP_SHARED")] + flags: libc::c_int, + #[pyarg(any, default = "PROT_WRITE|PROT_READ")] + prot: libc::c_int, + #[pyarg(any, default = "AccessMode::Default")] + access: AccessMode, + #[pyarg(any, default = "0")] + offset: u64, + } + + impl Constructor for PyMmap { + type Args = MmapNewArgs; + + fn py_new( + cls: PyTypeRef, + MmapNewArgs { + fileno: fd, + length, + flags, + prot, + access, + offset, + }: Self::Args, + vm: &VirtualMachine, + ) -> PyResult { + if length < 0 { + return Err( + vm.new_overflow_error("memory mapped length must be positive".to_owned()) + ); + } + // if offset < 0 { + // return Err(vm.new_overflow_error("memory mapped offset must be positive".to_owned())); + // } + if (access != AccessMode::Default) + && ((flags != MAP_SHARED) || (prot != (PROT_WRITE | PROT_READ))) + { + return Err(vm.new_value_error( + "mmap can't specify both access and flags, prot.".to_owned(), + )); + } + + let (flags, prot, access) = match access { + AccessMode::Read => (MAP_SHARED, PROT_READ, access), + AccessMode::Write => (MAP_SHARED, PROT_READ | PROT_WRITE, access), + AccessMode::Copy => (MAP_PRIVATE, PROT_READ | PROT_WRITE, access), + AccessMode::Default => { + let access = if (prot & PROT_READ) != 0 && (prot & PROT_WRITE) != 0 { + access + } else if (prot & PROT_WRITE) != 0 { + AccessMode::Write + } else { + AccessMode::Read + }; + (flags, prot, access) + } + _ => return Err(vm.new_value_error("mmap invalid access parameter.".to_owned())), + }; + + let mut mmap_opt = MmapOptions::new(); + let mmap_opt = mmap_opt.offset(offset); + // .len(map_size) + let mmap = match access { + AccessMode::Write => unsafe { mmap_opt.map_mut(fd) }, + // AccessMode::Read => mmap_opt.map(fd), + AccessMode::Copy => unsafe { mmap_opt.map_copy(fd) }, + _ => unreachable!("access must be decided before here"), + } + .map_err(|_| vm.new_value_error("FIXME: mmap error".to_owned()))?; + + let m_obj = Self { + mmap, + exports: 0, + access, + }; + + m_obj.to_pyresult(vm) + } + } + + #[pyimpl] + impl PyMmap { + #[pymethod] + fn close(&self) -> PyResult<()> { + if self.exports > 0 { + // PyErr_SetString(PyExc_BufferError, "cannot close "\ + // "exported pointers exist"); + } + // self.mmap = MmapMut::map_anon(0).unwrap(); + Ok(()) + } + } +} From d4f001fa21eb30bd1b5a289b0fc2ce3326c1dd30 Mon Sep 17 00:00:00 2001 From: Dennis Zhuang Date: Sat, 28 May 2022 19:12:45 +0800 Subject: [PATCH 03/10] Basic mmap module --- Cargo.lock | 8 +- Cargo.toml | 2 +- common/Cargo.toml | 2 +- stdlib/Cargo.toml | 9 +- stdlib/src/mmap.rs | 384 +++++++++++++++++++++++++++++++++++++++----- vm/Cargo.toml | 2 +- vm/src/vm/vm_new.rs | 5 + 7 files changed, 366 insertions(+), 46 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 072d12200d..46bfa7d9b3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -890,9 +890,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.123" +version = "0.2.126" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb691a747a7ab48abc15c5b42066eaafde10dc427e3b6ee2a1cf43db04c763bd" +checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" [[package]] name = "libffi" @@ -998,9 +998,9 @@ checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" [[package]] name = "memmap2" -version = "0.5.3" +version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "057a3db23999c867821a7a59feb06a578fcb03685e983dff90daf9e7d24ac08f" +checksum = "d5172b50c23043ff43dd53e51392f36519d9b35a8f3a410d30ece5d1aedd58ae" dependencies = [ "libc", ] diff --git a/Cargo.toml b/Cargo.toml index 8b5dd6e842..d584879f0a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,7 +41,7 @@ rustpython-stdlib = {path = "stdlib", optional = true, default-features = false, dirs = { package = "dirs-next", version = "2.0.0" } num-traits = "0.2.14" cfg-if = "1.0.0" -libc = "0.2.123" +libc = "0.2.126" flame = { version = "0.2.2", optional = true } flamescope = { version = "0.1.2", optional = true } diff --git a/common/Cargo.toml b/common/Cargo.toml index 12c70b5413..16c4538ebc 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -21,7 +21,7 @@ siphasher = "0.3" rand = "0.8" volatile = "0.3" radium = "0.7" -libc = "0.2.101" +libc = "0.2.126" ascii = "1.0" unic-ucd-category = "0.9" diff --git a/stdlib/Cargo.toml b/stdlib/Cargo.toml index bbb22e8624..c4a7f0e073 100644 --- a/stdlib/Cargo.toml +++ b/stdlib/Cargo.toml @@ -57,14 +57,17 @@ hex = "0.4.3" puruspe = "0.1.5" nix = "0.23.1" xml-rs = "0.8.4" -libc = "0.2.123" +libc = "0.2.126" cfg-if = "1.0.0" ahash = "0.7.6" libz-sys = { version = "1.1.5", optional = true } num_enum = "0.5.7" ascii = "1.0.0" -memmap2 = "0.5.0" -page_size = "0.4.2" + +# mmap +[target.'cfg(all(unix, not(target_arch = "wasm32")))'.dependencies] +memmap2 = "0.5.4" +page_size = "0.4" [target.'cfg(all(unix, not(target_os = "redox")))'.dependencies] termios = "0.3.3" diff --git a/stdlib/src/mmap.rs b/stdlib/src/mmap.rs index 4cda3395a2..cfb583c82a 100644 --- a/stdlib/src/mmap.rs +++ b/stdlib/src/mmap.rs @@ -1,12 +1,23 @@ +//! mmap module pub(crate) use mmap::make_module; #[pymodule] mod mmap { + use crate::common::lock::{PyMutex, PyMutexGuard}; use crate::vm::{ - builtins::PyTypeRef, convert::ToPyResult, function::OptionalArg, types::Constructor, - FromArgs, PyObject, PyPayload, PyResult, TryFromBorrowedObject, VirtualMachine, + builtins::{PyInt, PyIntRef, PyTypeRef}, + function::FuncArgs, + sliceable::saturate_index, + types::Constructor, + FromArgs, PyObject, PyPayload, PyRef, PyResult, TryFromBorrowedObject, VirtualMachine, }; - use memmap2::{MmapMut, MmapOptions}; + use crossbeam_utils::atomic::AtomicCell; + use memmap2::{Mmap, MmapMut, MmapOptions}; + use nix::unistd; + use std::fs::File; + use std::ops::Deref; + #[cfg(all(unix, not(target_os = "redox")))] + use std::os::unix::io::{FromRawFd, IntoRawFd, RawFd}; #[repr(C)] #[derive(PartialEq, Eq, Debug)] @@ -31,7 +42,27 @@ mod mmap { } #[pyattr] - use libc::{MAP_ANON, MAP_ANONYMOUS, MAP_PRIVATE, MAP_SHARED, PROT_READ, PROT_WRITE}; + use libc::{ + MADV_DONTNEED, MADV_NORMAL, MADV_RANDOM, MADV_SEQUENTIAL, MADV_WILLNEED, MAP_ANON, + MAP_ANONYMOUS, MAP_PRIVATE, MAP_SHARED, PROT_READ, PROT_WRITE, + }; + + #[cfg(target_os = "macos")] + #[pyattr] + use libc::{MADV_FREE_REUSABLE, MADV_FREE_REUSE}; + + #[cfg(target_os = "linux")] + #[pyattr] + use libc::{ + MADV_DODUMP, MADV_DOFORK, MADV_DONTDUMP, MADV_DONTFORK, MADV_FREE, MADV_HUGEPAGE, + MADV_HWPOISON, MADV_MERGEABLE, MADV_NOHUGEPAGE, MADV_REMOVE, MADV_SOFT_OFFLINE, + MADV_UNMERGEABLE, + }; + + #[cfg(all(target_os = "linux", target_arch = "x86_64", target_env = "gnu"))] + #[pyattr] + use libc::{MAP_DENYWRITE, MAP_EXECUTABLE, MAP_POPULATE}; + #[pyattr] const ACCESS_DEFAULT: u32 = AccessMode::Default as u32; #[pyattr] @@ -41,25 +72,47 @@ mod mmap { #[pyattr] const ACCESS_COPY: u32 = AccessMode::Copy as u32; - #[pyattr(name = "PAGESIZE")] - fn pagesize(vm: &VirtualMachine) -> usize { + #[cfg(all(unix, not(target_arch = "wasm32")))] + #[pyattr(name = "PAGESIZE", once)] + fn page_size(_vm: &VirtualMachine) -> usize { page_size::get() } + #[cfg(all(unix, not(target_arch = "wasm32")))] + #[pyattr(name = "ALLOCATIONGRANULARITY", once)] + fn granularity(_vm: &VirtualMachine) -> usize { + page_size::get_granularity() + } + + #[pyattr(name = "error", once)] + fn error_type(vm: &VirtualMachine) -> PyTypeRef { + vm.ctx.exceptions.os_error.to_owned() + } + + #[derive(Debug)] + enum MmapObj { + Write(MmapMut), + Read(Mmap), + } + #[pyattr] #[pyclass(name = "mmap")] #[derive(Debug, PyPayload)] struct PyMmap { - mmap: MmapMut, - exports: usize, - // PyObject *weakreflist; + closed: AtomicCell, + mmap: PyMutex>, + fd: RawFd, + offset: isize, + size: AtomicCell, + pos: AtomicCell, // relative to offset + exports: AtomicCell, access: AccessMode, } #[derive(FromArgs)] struct MmapNewArgs { #[pyarg(any)] - fileno: std::os::unix::io::RawFd, + fileno: RawFd, #[pyarg(any)] length: isize, #[pyarg(any, default = "MAP_SHARED")] @@ -69,16 +122,46 @@ mod mmap { #[pyarg(any, default = "AccessMode::Default")] access: AccessMode, #[pyarg(any, default = "0")] - offset: u64, + offset: isize, + } + + #[derive(FromArgs)] + pub struct FlushOptions { + #[pyarg(positional, default)] + offset: Option, + #[pyarg(positional, default)] + size: Option, + } + + #[derive(FromArgs, Clone)] + pub struct FindOptions { + #[pyarg(positional)] + sub: Vec, + #[pyarg(positional, default)] + start: Option, + #[pyarg(positional, default)] + end: Option, + } + + #[derive(FromArgs)] + pub struct AdviseOptions { + #[pyarg(positional)] + option: libc::c_int, + #[pyarg(positional, default)] + start: Option, + #[pyarg(positional, default)] + length: Option, } impl Constructor for PyMmap { type Args = MmapNewArgs; + // TODO: Windows is not supported right now. + #[cfg(all(unix, not(target_os = "redox")))] fn py_new( cls: PyTypeRef, MmapNewArgs { - fileno: fd, + fileno: mut fd, length, flags, prot, @@ -87,14 +170,19 @@ mod mmap { }: Self::Args, vm: &VirtualMachine, ) -> PyResult { - if length < 0 { + let mut map_size = length; + if map_size < 0 { return Err( vm.new_overflow_error("memory mapped length must be positive".to_owned()) ); } - // if offset < 0 { - // return Err(vm.new_overflow_error("memory mapped offset must be positive".to_owned())); - // } + + if offset < 0 { + return Err( + vm.new_overflow_error("memory mapped offset must be positive".to_owned()) + ); + } + if (access != AccessMode::Default) && ((flags != MAP_SHARED) || (prot != (PROT_WRITE | PROT_READ))) { @@ -103,7 +191,8 @@ mod mmap { )); } - let (flags, prot, access) = match access { + // TODO: memmap2 doesn't support mapping with pro and flags right now + let (_flags, _prot, access) = match access { AccessMode::Read => (MAP_SHARED, PROT_READ, access), AccessMode::Write => (MAP_SHARED, PROT_READ | PROT_WRITE, access), AccessMode::Copy => (MAP_PRIVATE, PROT_READ | PROT_WRITE, access), @@ -117,40 +206,263 @@ mod mmap { }; (flags, prot, access) } - _ => return Err(vm.new_value_error("mmap invalid access parameter.".to_owned())), }; - let mut mmap_opt = MmapOptions::new(); - let mmap_opt = mmap_opt.offset(offset); - // .len(map_size) - let mmap = match access { - AccessMode::Write => unsafe { mmap_opt.map_mut(fd) }, - // AccessMode::Read => mmap_opt.map(fd), - AccessMode::Copy => unsafe { mmap_opt.map_copy(fd) }, - _ => unreachable!("access must be decided before here"), + if fd != -1 { + let file = unsafe { File::from_raw_fd(fd) }; + let file_len = match file.metadata() { + Ok(m) => m.len().try_into().expect("file size overflow"), + Err(e) => return Err(vm.new_os_error(e.to_string())), + }; + // File::from_raw_fd will consume the fd, so we + // have to get it again. + fd = file.into_raw_fd(); + if map_size == 0 { + if file_len == 0 { + return Err(vm.new_value_error("cannot mmap an empty file".to_owned())); + } + + if offset > file_len { + return Err( + vm.new_value_error("mmap offset is greater than file size".to_owned()) + ); + } + + //if file_len - offset > isize::MAX { + // return Err(vm.new_value_error("mmap length is too large".to_owned())); + //} + + map_size = file_len - offset; + } else if offset > file_len || file_len - offset < map_size { + return Err( + vm.new_value_error("mmap length is greater than file size".to_owned()) + ); + } } - .map_err(|_| vm.new_value_error("FIXME: mmap error".to_owned()))?; + + let mut mmap_opt = MmapOptions::new(); + let mmap_opt = mmap_opt + .offset(offset.try_into().unwrap()) + .len(map_size.try_into().unwrap()); + + let (fd, mmap) = if fd == -1 { + ( + fd, + MmapObj::Write( + mmap_opt + .map_anon() + .map_err(|e| vm.new_os_error(e.to_string()))?, + ), + ) + } else { + let new_fd = unistd::dup(fd).map_err(|e| vm.new_os_error(e.to_string()))?; + let mmap = match access { + AccessMode::Default | AccessMode::Write => MmapObj::Write( + unsafe { mmap_opt.map_mut(fd) } + .map_err(|e| vm.new_os_error(e.to_string()))?, + ), + AccessMode::Read => MmapObj::Read( + unsafe { mmap_opt.map(fd) }.map_err(|e| vm.new_os_error(e.to_string()))?, + ), + AccessMode::Copy => MmapObj::Write( + unsafe { mmap_opt.map_copy(fd) } + .map_err(|e| vm.new_os_error(e.to_string()))?, + ), + }; + (new_fd, mmap) + }; let m_obj = Self { - mmap, - exports: 0, + closed: AtomicCell::new(false), + mmap: PyMutex::new(Some(mmap)), + fd, + offset, + size: AtomicCell::new(map_size), + pos: AtomicCell::new(0), + exports: AtomicCell::new(0), access, }; - m_obj.to_pyresult(vm) + m_obj.into_ref_with_type(vm, cls).map(Into::into) } } - #[pyimpl] + #[pyimpl(with(Constructor), flags(BASETYPE))] impl PyMmap { + #[pymethod(magic)] + pub(crate) fn len(&self) -> usize { + self.inner_size() as usize + } + + #[inline] + fn inner_size(&self) -> isize { + self.size.load() + } + + #[inline] + fn inner_pos(&self) -> isize { + self.pos.load() + } + + fn check_valid(&self, vm: &VirtualMachine) -> PyResult>> { + let m = self.mmap.lock(); + + if m.is_none() { + return Err(vm.new_value_error("mmap closed or invalid".to_owned())); + } + + Ok(m) + } + + #[pyproperty] + fn closed(&self) -> bool { + self.closed.load() + } + + #[pymethod(magic)] + fn repr(zelf: PyRef) -> PyResult { + let mmap = zelf.mmap.lock(); + + if mmap.is_none() { + return Ok("".to_owned()); + } + + let access_str = match zelf.access { + AccessMode::Default => "ACCESS_DEFAULT", + AccessMode::Read => "ACCESS_READ", + AccessMode::Write => "ACCESS_WRITE", + AccessMode::Copy => "ACCESS_COPY", + }; + + let repr = format!( + "", + access_str, + zelf.len(), + zelf.inner_pos(), + zelf.offset + ); + + Ok(repr) + } + #[pymethod] - fn close(&self) -> PyResult<()> { - if self.exports > 0 { - // PyErr_SetString(PyExc_BufferError, "cannot close "\ - // "exported pointers exist"); + fn close(&self, vm: &VirtualMachine) -> PyResult<()> { + if self.closed() { + return Ok(()); + } + + if self.exports.load() > 0 { + return Err(vm.new_buffer_error("cannot close exported pointers exist.".to_owned())); } - // self.mmap = MmapMut::map_anon(0).unwrap(); + let mut mmap = self.mmap.lock(); + self.closed.store(true); + *mmap = None; + Ok(()) } + + fn get_find_range(&self, options: FindOptions) -> (usize, usize) { + let pos = self.inner_pos(); + let size = self.inner_size(); + let start = options.start.unwrap_or(pos); + let end = options.end.unwrap_or(size); + + let size = size.try_into().unwrap(); + (saturate_index(start, size), saturate_index(end, size)) + } + + #[pymethod] + fn find(&self, options: FindOptions, vm: &VirtualMachine) -> PyResult { + let (start, end) = self.get_find_range(options.clone()); + + let sub = &options.sub; + + if sub.is_empty() { + return Ok(PyInt::from(0isize)); + } + + let mmap = self.check_valid(vm)?; + let buf = match mmap.as_ref().unwrap() { + MmapObj::Read(mmap) => &mmap[start..end], + MmapObj::Write(mmap) => &mmap[start..end], + }; + let pos = buf.windows(sub.len()).position(|window| window == sub); + + Ok(pos.map_or(PyInt::from(-1isize), |i| PyInt::from(start + i))) + } + + #[pymethod] + fn rfind(&self, options: FindOptions, vm: &VirtualMachine) -> PyResult { + let (start, end) = self.get_find_range(options.clone()); + + let sub = &options.sub; + if sub.is_empty() { + return Ok(PyInt::from(0isize)); + } + + let mmap = self.check_valid(vm)?; + let buf = match mmap.as_ref().unwrap() { + MmapObj::Read(mmap) => &mmap[start..end], + MmapObj::Write(mmap) => &mmap[start..end], + }; + let pos = buf.windows(sub.len()).rposition(|window| window == sub); + + Ok(pos.map_or(PyInt::from(-1isize), |i| PyInt::from(start + i))) + } + + #[pymethod] + fn flush(&self, options: FlushOptions, vm: &VirtualMachine) -> PyResult<()> { + let offset = options.offset.unwrap_or(0); + let size = options.size.unwrap_or_else(|| self.inner_size()); + + if size < 0 || offset < 0 || self.inner_size() - offset < size { + return Err(vm.new_value_error("flush values out of range".to_owned())); + } + + let size = size as usize; + let offset = offset as usize; + + if self.access == AccessMode::Read || self.access == AccessMode::Copy { + return Ok(()); + } + + match self.check_valid(vm)?.deref().as_ref().unwrap() { + MmapObj::Read(_mmap) => {} + MmapObj::Write(mmap) => { + mmap.flush_range(offset, size) + .map_err(|e| vm.new_os_error(e.to_string()))?; + } + } + + Ok(()) + } + + #[pymethod] + fn size(&self, vm: &VirtualMachine) -> PyResult { + let new_fd = unistd::dup(self.fd).map_err(|e| vm.new_os_error(e.to_string()))?; + let file = unsafe { File::from_raw_fd(new_fd) }; + let file_len = match file.metadata() { + Ok(m) => m.len(), + Err(e) => return Err(vm.new_os_error(e.to_string())), + }; + + Ok(PyInt::from(file_len).into_ref(vm)) + } + + #[pymethod] + fn tell(&self) -> PyResult { + Ok(self.inner_pos()) + } + + #[pymethod(magic)] + fn enter(zelf: PyRef, vm: &VirtualMachine) -> PyResult> { + let _m = zelf.check_valid(vm)?; + Ok(zelf.to_owned()) + } + + #[pymethod(magic)] + fn exit(zelf: PyRef, _args: FuncArgs, vm: &VirtualMachine) -> PyResult<()> { + zelf.close(vm) + } } } diff --git a/vm/Cargo.toml b/vm/Cargo.toml index 1f22fe1a1c..3c72b2de0a 100644 --- a/vm/Cargo.toml +++ b/vm/Cargo.toml @@ -48,7 +48,7 @@ hexf-parse = "0.2.1" indexmap = "1.8.1" ahash = "0.7.6" bitflags = "1.3.2" -libc = "0.2.123" +libc = "0.2.126" nix = "0.23.1" paste = "1.0.7" is-macro = "0.2.0" diff --git a/vm/src/vm/vm_new.rs b/vm/src/vm/vm_new.rs index b636f68eac..fc85cb855a 100644 --- a/vm/src/vm/vm_new.rs +++ b/vm/src/vm/vm_new.rs @@ -151,6 +151,11 @@ impl VirtualMachine { self.new_exception_msg(os_error, msg) } + pub fn new_system_error(&self, msg: String) -> PyBaseExceptionRef { + let sys_error = self.ctx.exceptions.system_error.to_owned(); + self.new_exception_msg(sys_error, msg) + } + pub fn new_unicode_decode_error(&self, msg: String) -> PyBaseExceptionRef { let unicode_decode_error = self.ctx.exceptions.unicode_decode_error.to_owned(); self.new_exception_msg(unicode_decode_error, msg) From d96b165fad96c44416d6e11a03e9cc9e220193ba Mon Sep 17 00:00:00 2001 From: Dennis Zhuang Date: Tue, 14 Jun 2022 17:00:25 +0900 Subject: [PATCH 04/10] Impl madvise method --- stdlib/src/mmap.rs | 86 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 82 insertions(+), 4 deletions(-) diff --git a/stdlib/src/mmap.rs b/stdlib/src/mmap.rs index cfb583c82a..b81107efab 100644 --- a/stdlib/src/mmap.rs +++ b/stdlib/src/mmap.rs @@ -12,13 +12,46 @@ mod mmap { FromArgs, PyObject, PyPayload, PyRef, PyResult, TryFromBorrowedObject, VirtualMachine, }; use crossbeam_utils::atomic::AtomicCell; - use memmap2::{Mmap, MmapMut, MmapOptions}; + use memmap2::{Advice, Mmap, MmapMut, MmapOptions}; use nix::unistd; use std::fs::File; use std::ops::Deref; #[cfg(all(unix, not(target_os = "redox")))] use std::os::unix::io::{FromRawFd, IntoRawFd, RawFd}; + fn advice_try_from_i32(vm: &VirtualMachine, i: i32) -> PyResult { + Ok(match i { + libc::MADV_NORMAL => Advice::Normal, + libc::MADV_RANDOM => Advice::Random, + libc::MADV_SEQUENTIAL => Advice::Sequential, + libc::MADV_WILLNEED => Advice::WillNeed, + libc::MADV_DONTNEED => Advice::DontNeed, + #[cfg(any(target_os = "linux", target_os = "macos", target_os = "ios"))] + libc::MADV_FREE => Advice::Free, + #[cfg(target_os = "linux")] + libc::MADV_DONTFORK => Advice::DontFork, + #[cfg(target_os = "linux")] + libc::MADV_DOFORK => Advice::DoFork, + #[cfg(target_os = "linux")] + libc::MADV_MERGEABLE => Advice::Mergeable, + #[cfg(target_os = "linux")] + libc::MADV_UNMERGEABLE => Advice::Unmergeable, + #[cfg(target_os = "linux")] + libc::MADV_HUGEPAGE => Advice::HugePage, + #[cfg(target_os = "linux")] + libc::MADV_NOHUGEPAGE => Advice::NoHugePage, + #[cfg(target_os = "linux")] + libc::MADV_REMOVE => Advice::Remove, + #[cfg(target_os = "linux")] + libc::MADV_DONTDUMP => Advice::DontDump, + #[cfg(target_os = "linux")] + libc::MADV_DODUMP => Advice::DoDump, + #[cfg(target_os = "linux")] + libc::MADV_HWPOISON => Advice::HwPoison, + _ => return Err(vm.new_value_error("Not a valid Advice value".to_owned())), + }) + } + #[repr(C)] #[derive(PartialEq, Eq, Debug)] enum AccessMode { @@ -51,12 +84,24 @@ mod mmap { #[pyattr] use libc::{MADV_FREE_REUSABLE, MADV_FREE_REUSE}; + #[cfg(any( + target_os = "android", + target_os = "dragonfly", + target_os = "fuchsia", + target_os = "freebsd", + target_os = "linux", + target_os = "netbsd", + target_os = "openbsd", + target_vendor = "apple" + ))] + #[pyattr] + use libc::MADV_FREE; + #[cfg(target_os = "linux")] #[pyattr] use libc::{ - MADV_DODUMP, MADV_DOFORK, MADV_DONTDUMP, MADV_DONTFORK, MADV_FREE, MADV_HUGEPAGE, - MADV_HWPOISON, MADV_MERGEABLE, MADV_NOHUGEPAGE, MADV_REMOVE, MADV_SOFT_OFFLINE, - MADV_UNMERGEABLE, + MADV_DODUMP, MADV_DOFORK, MADV_DONTDUMP, MADV_DONTFORK, MADV_HUGEPAGE, MADV_HWPOISON, + MADV_MERGEABLE, MADV_NOHUGEPAGE, MADV_REMOVE, MADV_SOFT_OFFLINE, MADV_UNMERGEABLE, }; #[cfg(all(target_os = "linux", target_arch = "x86_64", target_env = "gnu"))] @@ -437,6 +482,39 @@ mod mmap { Ok(()) } + #[allow(unused_assignments)] + #[pymethod] + fn madvise(&self, options: AdviseOptions, vm: &VirtualMachine) -> PyResult<()> { + let start = options.start.unwrap_or(0); + let mut length = options.length.unwrap_or_else(|| self.inner_size()); + + if start < 0 || start >= self.inner_size() { + return Err(vm.new_value_error("madvise start out of bounds".to_owned())); + } + if length < 0 { + return Err(vm.new_value_error("madvise length invalid".to_owned())); + } + + if isize::MAX - start < length { + return Err(vm.new_overflow_error("madvise length too large".to_owned())); + } + + if start + length > self.inner_size() { + length = self.inner_size() - start; + } + + let advice = advice_try_from_i32(vm, options.option)?; + + //TODO: memmap2 doesn't support madvise range right now. + match self.check_valid(vm)?.deref().as_ref().unwrap() { + MmapObj::Read(mmap) => mmap.advise(advice), + MmapObj::Write(mmap) => mmap.advise(advice), + } + .map_err(|e| vm.new_os_error(e.to_string()))?; + + Ok(()) + } + #[pymethod] fn size(&self, vm: &VirtualMachine) -> PyResult { let new_fd = unistd::dup(self.fd).map_err(|e| vm.new_os_error(e.to_string()))?; From ba33bc990de97ef82f1a75e83732530fb9d7ba2f Mon Sep 17 00:00:00 2001 From: Dennis Zhuang Date: Tue, 14 Jun 2022 16:58:47 +0900 Subject: [PATCH 05/10] Impl read, read_byte and readline methods --- stdlib/src/mmap.rs | 110 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 107 insertions(+), 3 deletions(-) diff --git a/stdlib/src/mmap.rs b/stdlib/src/mmap.rs index b81107efab..b297cfc782 100644 --- a/stdlib/src/mmap.rs +++ b/stdlib/src/mmap.rs @@ -5,11 +5,12 @@ pub(crate) use mmap::make_module; mod mmap { use crate::common::lock::{PyMutex, PyMutexGuard}; use crate::vm::{ - builtins::{PyInt, PyIntRef, PyTypeRef}, - function::FuncArgs, + builtins::{PyBytes, PyBytesRef, PyInt, PyIntRef, PyTypeRef}, + function::{FuncArgs, OptionalArg}, sliceable::saturate_index, types::Constructor, - FromArgs, PyObject, PyPayload, PyRef, PyResult, TryFromBorrowedObject, VirtualMachine, + AsObject, FromArgs, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, + TryFromBorrowedObject, VirtualMachine, }; use crossbeam_utils::atomic::AtomicCell; use memmap2::{Advice, Mmap, MmapMut, MmapOptions}; @@ -349,6 +350,11 @@ mod mmap { self.pos.load() } + #[inline] + fn advance_pos(&self, step: isize) { + self.pos.store(self.inner_pos() + step); + } + fn check_valid(&self, vm: &VirtualMachine) -> PyResult>> { let m = self.mmap.lock(); @@ -515,6 +521,104 @@ mod mmap { Ok(()) } + #[pymethod] + fn read(&self, n: OptionalArg, vm: &VirtualMachine) -> PyResult { + let mut num_bytes = n + .map(|obj| { + let name = obj.class().name().to_string(); + obj.try_into_value::>(vm).map_err(|_| { + vm.new_type_error(format!( + "read argument must be int or None, not {}", + name, + )) + }) + }) + .transpose()? + .flatten() + .unwrap_or(isize::MAX); + let mmap = self.check_valid(vm)?; + let pos = self.inner_pos(); + + let remaining = if pos < self.inner_size() { + self.inner_size() - pos + } else { + 0 + }; + + if num_bytes < 0 || num_bytes > remaining { + num_bytes = remaining; + } + + let end_pos = (pos + num_bytes) as usize; + let bytes = match mmap.deref().as_ref().unwrap() { + MmapObj::Read(mmap) => mmap[pos as usize..end_pos].to_vec(), + MmapObj::Write(mmap) => mmap[pos as usize..end_pos].to_vec(), + }; + + let result = PyBytes::from(bytes).into_ref(vm); + + self.advance_pos(num_bytes); + + Ok(result) + } + + #[pymethod] + fn read_byte(&self, vm: &VirtualMachine) -> PyResult { + let pos = self.inner_pos(); + if pos >= self.inner_size() { + return Err(vm.new_value_error("read byte out of range".to_owned())); + } + + let b = match self.check_valid(vm)?.deref().as_ref().unwrap() { + MmapObj::Read(mmap) => mmap[pos as usize], + MmapObj::Write(mmap) => mmap[pos as usize], + }; + + self.advance_pos(1); + + Ok(PyInt::from(b).into_ref(vm)) + } + + #[pymethod] + fn readline(&self, vm: &VirtualMachine) -> PyResult { + let pos = self.inner_pos(); + let mmap = self.check_valid(vm)?; + + let remaining = if pos < self.inner_size() { + self.inner_size() - pos + } else { + 0 + }; + + if remaining == 0 { + return Ok(PyBytes::from(vec![]).into_ref(vm)); + } + + let eof = match mmap.as_ref().unwrap() { + MmapObj::Read(mmap) => &mmap[pos as usize..], + MmapObj::Write(mmap) => &mmap[pos as usize..], + } + .iter() + .position(|&x| x == b'\n'); + + let end_pos = if let Some(i) = eof { + pos as usize + i + 1 + } else { + self.inner_size() as usize + }; + + let bytes = match mmap.deref().as_ref().unwrap() { + MmapObj::Read(mmap) => mmap[pos as usize..end_pos].to_vec(), + MmapObj::Write(mmap) => mmap[pos as usize..end_pos].to_vec(), + }; + + let result = PyBytes::from(bytes).into_ref(vm); + + self.advance_pos(end_pos as isize - pos); + + Ok(result) + } + #[pymethod] fn size(&self, vm: &VirtualMachine) -> PyResult { let new_fd = unistd::dup(self.fd).map_err(|e| vm.new_os_error(e.to_string()))?; From 929ad30c9d604c718810ca6fdcabf05ed7075107 Mon Sep 17 00:00:00 2001 From: Dennis Zhuang Date: Tue, 14 Jun 2022 16:56:45 +0900 Subject: [PATCH 06/10] Impl seek, write, write_byte methods etc. --- stdlib/src/mmap.rs | 157 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 155 insertions(+), 2 deletions(-) diff --git a/stdlib/src/mmap.rs b/stdlib/src/mmap.rs index b297cfc782..542fb7c53d 100644 --- a/stdlib/src/mmap.rs +++ b/stdlib/src/mmap.rs @@ -6,7 +6,8 @@ mod mmap { use crate::common::lock::{PyMutex, PyMutexGuard}; use crate::vm::{ builtins::{PyBytes, PyBytesRef, PyInt, PyIntRef, PyTypeRef}, - function::{FuncArgs, OptionalArg}, + byte::value_from_object, + function::{ArgBytesLike, FuncArgs, OptionalArg}, sliceable::saturate_index, types::Constructor, AsObject, FromArgs, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, @@ -16,7 +17,8 @@ mod mmap { use memmap2::{Advice, Mmap, MmapMut, MmapOptions}; use nix::unistd; use std::fs::File; - use std::ops::Deref; + use std::io::Write; + use std::ops::{Deref, DerefMut}; #[cfg(all(unix, not(target_os = "redox")))] use std::os::unix::io::{FromRawFd, IntoRawFd, RawFd}; @@ -355,6 +357,24 @@ mod mmap { self.pos.store(self.inner_pos() + step); } + #[inline] + fn try_writable( + &self, + vm: &VirtualMachine, + f: impl FnOnce(&mut MmapMut) -> R, + ) -> PyResult { + if matches!(self.access, AccessMode::Read) { + return Err( + vm.new_type_error("mmap can't modify a readonly memory map.".to_owned()) + ); + } + + match self.check_valid(vm)?.deref_mut().as_mut().unwrap() { + MmapObj::Write(mmap) => Ok(f(mmap)), + _ => unreachable!("already check"), + } + } + fn check_valid(&self, vm: &VirtualMachine) -> PyResult>> { let m = self.mmap.lock(); @@ -365,6 +385,24 @@ mod mmap { Ok(m) } + /// TODO: impl resize + #[allow(dead_code)] + fn check_resizeable(&self, vm: &VirtualMachine) -> PyResult<()> { + if self.exports.load() > 0 { + return Err(vm.new_buffer_error( + "mmap can't resize with extant buffers exported.".to_owned(), + )); + } + + if self.access == AccessMode::Write || self.access == AccessMode::Default { + return Ok(()); + } + + Err(vm.new_type_error( + "mmap can't resize a readonly or copy-on-write memory map.".to_owned(), + )) + } + #[pyproperty] fn closed(&self) -> bool { self.closed.load() @@ -521,6 +559,30 @@ mod mmap { Ok(()) } + #[pymethod(name = "move")] + fn move_(&self, dest: isize, src: isize, cnt: isize, vm: &VirtualMachine) -> PyResult<()> { + let size = self.inner_size(); + if dest < 0 || src < 0 || cnt < 0 || size - dest < cnt || size - src < cnt { + return Err( + vm.new_value_error("source, destination, or count out of range".to_owned()) + ); + } + + let dest: usize = dest.try_into().unwrap(); + let cnt: usize = cnt.try_into().unwrap(); + let dest_end = dest + cnt; + let src: usize = src.try_into().unwrap(); + let src_end = src + cnt; + + self.try_writable(vm, |mmap| { + let src_buf = mmap[src..src_end].to_vec(); + (&mut mmap[dest..dest_end]) + .write(&src_buf) + .map_err(|e| vm.new_os_error(e.to_string()))?; + Ok(()) + })? + } + #[pymethod] fn read(&self, n: OptionalArg, vm: &VirtualMachine) -> PyResult { let mut num_bytes = n @@ -619,6 +681,54 @@ mod mmap { Ok(result) } + //TODO: supports resize + #[pymethod] + fn resize(&self, _newsize: PyIntRef, vm: &VirtualMachine) -> PyResult<()> { + self.check_resizeable(vm)?; + Err(vm.new_system_error("mmap: resizing not available--no mremap()".to_owned())) + } + + #[pymethod] + fn seek( + &self, + pos: isize, + whence: OptionalArg, + vm: &VirtualMachine, + ) -> PyResult<()> { + let dist = pos; + + let how = whence.unwrap_or(0); + let size = self.inner_size(); + + let new_pos = match how { + 0 => dist, // relative to start + 1 => { + // relative to current position + let pos = self.inner_pos(); + if isize::MAX - pos < dist { + return Err(vm.new_value_error("seek out of range".to_owned())); + } + pos + dist + } + 2 => { + // relative to end + if isize::MAX - size < dist { + return Err(vm.new_value_error("seek out of range".to_owned())); + } + size + dist + } + _ => return Err(vm.new_value_error("unknown seek type".to_owned())), + }; + + if new_pos > size || new_pos < 0 { + return Err(vm.new_value_error("seek out of range".to_owned())); + } + + self.pos.store(new_pos); + + Ok(()) + } + #[pymethod] fn size(&self, vm: &VirtualMachine) -> PyResult { let new_fd = unistd::dup(self.fd).map_err(|e| vm.new_os_error(e.to_string()))?; @@ -636,6 +746,49 @@ mod mmap { Ok(self.inner_pos()) } + #[pymethod] + fn write(&self, bytes: ArgBytesLike, vm: &VirtualMachine) -> PyResult { + let pos = self.inner_pos(); + let size = self.inner_size(); + + let data = bytes.borrow_buf(); + + if pos > size || size - pos < data.len() as isize { + return Err(vm.new_value_error("data out of range".to_owned())); + } + + let len = self.try_writable(vm, |mmap| { + (&mut mmap[pos as usize..(pos as usize + data.len())]) + .write(&data) + .map_err(|e| vm.new_os_error(e.to_string()))?; + Ok(data.len()) + })??; + + self.advance_pos(len as isize); + + Ok(PyInt::from(len).into_ref(vm)) + } + + #[pymethod] + fn write_byte(&self, byte: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> { + let b = value_from_object(vm, &byte)?; + + let pos = self.inner_pos(); + let size = self.inner_size(); + + if pos >= size { + return Err(vm.new_value_error("write byte out of range".to_owned())); + } + + self.try_writable(vm, |mmap| { + mmap[pos as usize] = b; + })?; + + self.advance_pos(1); + + Ok(()) + } + #[pymethod(magic)] fn enter(zelf: PyRef, vm: &VirtualMachine) -> PyResult> { let _m = zelf.check_valid(vm)?; From e5a76797b25af5a7d394625802f3db5e92df009d Mon Sep 17 00:00:00 2001 From: Dennis Zhuang Date: Tue, 14 Jun 2022 16:51:03 +0900 Subject: [PATCH 07/10] Impl AsBuffer, AsMapping and AsSequence protocols for PyMmap --- stdlib/src/mmap.rs | 246 ++++++++++++++++++++++++++++++++++++++++++-- vm/src/sliceable.rs | 2 +- 2 files changed, 241 insertions(+), 7 deletions(-) diff --git a/stdlib/src/mmap.rs b/stdlib/src/mmap.rs index 542fb7c53d..4dfcf1ecc4 100644 --- a/stdlib/src/mmap.rs +++ b/stdlib/src/mmap.rs @@ -3,14 +3,20 @@ pub(crate) use mmap::make_module; #[pymodule] mod mmap { - use crate::common::lock::{PyMutex, PyMutexGuard}; + use crate::common::{ + borrow::{BorrowedValue, BorrowedValueMut}, + lock::{MapImmutable, PyMutex, PyMutexGuard}, + }; use crate::vm::{ builtins::{PyBytes, PyBytesRef, PyInt, PyIntRef, PyTypeRef}, - byte::value_from_object, + byte::{bytes_from_object, value_from_object}, function::{ArgBytesLike, FuncArgs, OptionalArg}, - sliceable::saturate_index, - types::Constructor, - AsObject, FromArgs, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, + protocol::{ + BufferDescriptor, BufferMethods, PyBuffer, PyMappingMethods, PySequenceMethods, + }, + sliceable::{saturate_index, wrap_index, SaturatedSlice, SequenceIndex}, + types::{AsBuffer, AsMapping, AsSequence, Constructor}, + AsObject, FromArgs, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, TryFromBorrowedObject, VirtualMachine, }; use crossbeam_utils::atomic::AtomicCell; @@ -335,8 +341,87 @@ mod mmap { } } - #[pyimpl(with(Constructor), flags(BASETYPE))] + static BUFFER_METHODS: BufferMethods = BufferMethods { + obj_bytes: |buffer| buffer.obj_as::().as_bytes(), + obj_bytes_mut: |buffer| buffer.obj_as::().as_bytes_mut(), + release: |buffer| { + buffer.obj_as::().exports.fetch_sub(1); + }, + retain: |buffer| { + buffer.obj_as::().exports.fetch_add(1); + }, + }; + + impl AsBuffer for PyMmap { + fn as_buffer(zelf: &Py, _vm: &VirtualMachine) -> PyResult { + let buf = PyBuffer::new( + zelf.to_owned().into(), + BufferDescriptor::simple(zelf.len(), true), + &BUFFER_METHODS, + ); + + Ok(buf) + } + } + + impl AsMapping for PyMmap { + const AS_MAPPING: PyMappingMethods = PyMappingMethods { + length: Some(|mapping, _vm| Ok(Self::mapping_downcast(mapping).len())), + subscript: Some(|mapping, needle, vm| { + Self::mapping_downcast(mapping)._getitem(needle, vm) + }), + ass_subscript: Some(|mapping, needle, value, vm| { + let zelf = Self::mapping_downcast(mapping); + if let Some(value) = value { + Self::_setitem(zelf.to_owned(), needle, value, vm) + } else { + Err(vm.new_type_error("mmap object doesn't support item deletion".to_owned())) + } + }), + }; + } + + impl AsSequence for PyMmap { + const AS_SEQUENCE: PySequenceMethods = PySequenceMethods { + length: Some(|seq, _vm| Ok(Self::sequence_downcast(seq).len())), + item: Some(|seq, i, vm| { + let zelf = Self::sequence_downcast(seq); + zelf.get_item_by_index(i, vm) + }), + ass_item: Some(|seq, i, value, vm| { + let zelf = Self::sequence_downcast(seq); + if let Some(value) = value { + Self::setitem_by_index(zelf.to_owned(), i, value, vm) + } else { + Err(vm.new_type_error("mmap object doesn't support item deletion".to_owned())) + } + }), + ..PySequenceMethods::NOT_IMPLEMENTED + }; + } + + #[pyimpl(with(Constructor, AsMapping, AsSequence, AsBuffer), flags(BASETYPE))] impl PyMmap { + fn as_bytes_mut(&self) -> BorrowedValueMut<[u8]> { + PyMutexGuard::map(self.mmap.lock(), |m| { + match m.as_mut().expect("mmap closed or invalid") { + MmapObj::Read(_) => panic!("mmap can't modify a readonly memory map."), + MmapObj::Write(mmap) => &mut mmap[..], + } + }) + .into() + } + + fn as_bytes(&self) -> BorrowedValue<[u8]> { + PyMutexGuard::map_immutable(self.mmap.lock(), |m| { + match m.as_ref().expect("mmap closed or invalid") { + MmapObj::Read(ref mmap) => &mmap[..], + MmapObj::Write(ref mmap) => &mmap[..], + } + }) + .into() + } + #[pymethod(magic)] pub(crate) fn len(&self) -> usize { self.inner_size() as usize @@ -789,6 +874,155 @@ mod mmap { Ok(()) } + fn get_item_by_index(&self, i: isize, vm: &VirtualMachine) -> PyResult { + let i = wrap_index(i, self.len()) + .ok_or_else(|| vm.new_index_error("mmap index out of range".to_owned()))?; + + let b = match self.check_valid(vm)?.deref().as_ref().unwrap() { + MmapObj::Read(mmap) => mmap[i as usize], + MmapObj::Write(mmap) => mmap[i as usize], + }; + + Ok(PyInt::from(b).into_ref(vm).into()) + } + + fn getitem_by_slice( + &self, + slice: &SaturatedSlice, + vm: &VirtualMachine, + ) -> PyResult { + let (range, step, slicelen) = slice.adjust_indices(self.len()); + + let mmap = self.check_valid(vm)?; + + if slicelen == 0 { + return Ok(PyBytes::from(vec![]).into_ref(vm).into()); + } else if step == 1 { + let bytes = match mmap.deref().as_ref().unwrap() { + MmapObj::Read(mmap) => &mmap[range], + MmapObj::Write(mmap) => &mmap[range], + }; + return Ok(PyBytes::from(bytes.to_vec()).into_ref(vm).into()); + } + + let mut result_buf = Vec::with_capacity(slicelen); + if step.is_negative() { + for i in range.rev().step_by(step.unsigned_abs()) { + let b = match mmap.deref().as_ref().unwrap() { + MmapObj::Read(mmap) => mmap[i], + MmapObj::Write(mmap) => mmap[i], + }; + result_buf.push(b); + } + } else { + for i in range.step_by(step.unsigned_abs()) { + let b = match mmap.deref().as_ref().unwrap() { + MmapObj::Read(mmap) => mmap[i], + MmapObj::Write(mmap) => mmap[i], + }; + result_buf.push(b); + } + } + Ok(PyBytes::from(result_buf).into_ref(vm).into()) + } + + fn _getitem(&self, needle: &PyObject, vm: &VirtualMachine) -> PyResult { + match SequenceIndex::try_from_borrowed_object(vm, needle, "mmap")? { + SequenceIndex::Int(i) => self.get_item_by_index(i, vm), + SequenceIndex::Slice(slice) => self.getitem_by_slice(&slice, vm), + } + } + + #[pymethod(magic)] + fn getitem(&self, needle: PyObjectRef, vm: &VirtualMachine) -> PyResult { + self._getitem(&needle, vm) + } + + fn _setitem( + zelf: PyRef, + needle: &PyObject, + value: PyObjectRef, + vm: &VirtualMachine, + ) -> PyResult<()> { + match SequenceIndex::try_from_borrowed_object(vm, needle, "mmap")? { + SequenceIndex::Int(i) => Self::setitem_by_index(zelf, i, value, vm), + SequenceIndex::Slice(slice) => Self::setitem_by_slice(zelf, &slice, value, vm), + } + } + + fn setitem_by_index( + zelf: PyRef, + i: isize, + value: PyObjectRef, + vm: &VirtualMachine, + ) -> PyResult<()> { + let i = wrap_index(i, zelf.len()) + .ok_or_else(|| vm.new_index_error("mmap index out of range".to_owned()))?; + + let b = value_from_object(vm, &value)?; + + zelf.try_writable(vm, |mmap| { + mmap[i as usize] = b; + })?; + + Ok(()) + } + + fn setitem_by_slice( + zelf: PyRef, + slice: &SaturatedSlice, + value: PyObjectRef, + vm: &VirtualMachine, + ) -> PyResult<()> { + let (range, step, slicelen) = slice.adjust_indices(zelf.len()); + + let bytes = bytes_from_object(vm, &value)?; + + if bytes.len() != slicelen { + return Err(vm.new_index_error("mmap slice assignment is wrong size".to_owned())); + } + + if slicelen == 0 { + // do nothing + Ok(()) + } else if step == 1 { + zelf.try_writable(vm, |mmap| { + (&mut mmap[range]) + .write(&bytes) + .map_err(|e| vm.new_os_error(e.to_string()))?; + Ok(()) + })? + } else { + let mut bi = 0; // bytes index + if step.is_negative() { + for i in range.rev().step_by(step.unsigned_abs()) { + zelf.try_writable(vm, |mmap| { + mmap[i] = bytes[bi]; + })?; + bi += 1; + } + } else { + for i in range.step_by(step.unsigned_abs()) { + zelf.try_writable(vm, |mmap| { + mmap[i] = bytes[bi]; + })?; + bi += 1; + } + } + Ok(()) + } + } + + #[pymethod(magic)] + fn setitem( + zelf: PyRef, + needle: PyObjectRef, + value: PyObjectRef, + vm: &VirtualMachine, + ) -> PyResult<()> { + Self::_setitem(zelf, &needle, value, vm) + } + #[pymethod(magic)] fn enter(zelf: PyRef, vm: &VirtualMachine) -> PyResult> { let _m = zelf.check_valid(vm)?; diff --git a/vm/src/sliceable.rs b/vm/src/sliceable.rs index d32a8d131e..11f06da241 100644 --- a/vm/src/sliceable.rs +++ b/vm/src/sliceable.rs @@ -292,7 +292,7 @@ impl SequenceIndex { } // Use PySliceableSequence::wrap_index for implementors -pub(crate) fn wrap_index(p: isize, len: usize) -> Option { +pub fn wrap_index(p: isize, len: usize) -> Option { let neg = p.is_negative(); let p = p.wrapping_abs() as usize; if neg { From 3217de7af220d1ea7493c8ef5eaf832550ff75ad Mon Sep 17 00:00:00 2001 From: Dennis Zhuang Date: Tue, 14 Jun 2022 11:09:20 +0800 Subject: [PATCH 08/10] Adds test_mmap.py from CPython 3.10.4 --- Lib/test/test_mmap.py | 862 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 862 insertions(+) create mode 100644 Lib/test/test_mmap.py diff --git a/Lib/test/test_mmap.py b/Lib/test/test_mmap.py new file mode 100644 index 0000000000..8f34c182f8 --- /dev/null +++ b/Lib/test/test_mmap.py @@ -0,0 +1,862 @@ +from test.support import (requires, _2G, _4G, gc_collect, cpython_only) +from test.support.import_helper import import_module +from test.support.os_helper import TESTFN, unlink +import unittest +import os +import re +import itertools +import socket +import sys +import weakref + +# Skip test if we can't import mmap. +mmap = import_module('mmap') + +PAGESIZE = mmap.PAGESIZE + + +class MmapTests(unittest.TestCase): + + def setUp(self): + if os.path.exists(TESTFN): + os.unlink(TESTFN) + + def tearDown(self): + try: + os.unlink(TESTFN) + except OSError: + pass + + def test_basic(self): + # Test mmap module on Unix systems and Windows + + # Create a file to be mmap'ed. + f = open(TESTFN, 'bw+') + try: + # Write 2 pages worth of data to the file + f.write(b'\0'* PAGESIZE) + f.write(b'foo') + f.write(b'\0'* (PAGESIZE-3) ) + f.flush() + m = mmap.mmap(f.fileno(), 2 * PAGESIZE) + finally: + f.close() + + # Simple sanity checks + + tp = str(type(m)) # SF bug 128713: segfaulted on Linux + self.assertEqual(m.find(b'foo'), PAGESIZE) + + self.assertEqual(len(m), 2*PAGESIZE) + + self.assertEqual(m[0], 0) + self.assertEqual(m[0:3], b'\0\0\0') + + # Shouldn't crash on boundary (Issue #5292) + self.assertRaises(IndexError, m.__getitem__, len(m)) + self.assertRaises(IndexError, m.__setitem__, len(m), b'\0') + + # Modify the file's content + m[0] = b'3'[0] + m[PAGESIZE +3: PAGESIZE +3+3] = b'bar' + + # Check that the modification worked + self.assertEqual(m[0], b'3'[0]) + self.assertEqual(m[0:3], b'3\0\0') + self.assertEqual(m[PAGESIZE-1 : PAGESIZE + 7], b'\0foobar\0') + + m.flush() + + # Test doing a regular expression match in an mmap'ed file + match = re.search(b'[A-Za-z]+', m) + if match is None: + self.fail('regex match on mmap failed!') + else: + start, end = match.span(0) + length = end - start + + self.assertEqual(start, PAGESIZE) + self.assertEqual(end, PAGESIZE + 6) + + # test seeking around (try to overflow the seek implementation) + m.seek(0,0) + self.assertEqual(m.tell(), 0) + m.seek(42,1) + self.assertEqual(m.tell(), 42) + m.seek(0,2) + self.assertEqual(m.tell(), len(m)) + + # Try to seek to negative position... + self.assertRaises(ValueError, m.seek, -1) + + # Try to seek beyond end of mmap... + self.assertRaises(ValueError, m.seek, 1, 2) + + # Try to seek to negative position... + self.assertRaises(ValueError, m.seek, -len(m)-1, 2) + + # Try resizing map + try: + m.resize(512) + except SystemError: + # resize() not supported + # No messages are printed, since the output of this test suite + # would then be different across platforms. + pass + else: + # resize() is supported + self.assertEqual(len(m), 512) + # Check that we can no longer seek beyond the new size. + self.assertRaises(ValueError, m.seek, 513, 0) + + # Check that the underlying file is truncated too + # (bug #728515) + f = open(TESTFN, 'rb') + try: + f.seek(0, 2) + self.assertEqual(f.tell(), 512) + finally: + f.close() + self.assertEqual(m.size(), 512) + + m.close() + + def test_access_parameter(self): + # Test for "access" keyword parameter + mapsize = 10 + with open(TESTFN, "wb") as fp: + fp.write(b"a"*mapsize) + with open(TESTFN, "rb") as f: + m = mmap.mmap(f.fileno(), mapsize, access=mmap.ACCESS_READ) + self.assertEqual(m[:], b'a'*mapsize, "Readonly memory map data incorrect.") + + # Ensuring that readonly mmap can't be slice assigned + try: + m[:] = b'b'*mapsize + except TypeError: + pass + else: + self.fail("Able to write to readonly memory map") + + # Ensuring that readonly mmap can't be item assigned + try: + m[0] = b'b' + except TypeError: + pass + else: + self.fail("Able to write to readonly memory map") + + # Ensuring that readonly mmap can't be write() to + try: + m.seek(0,0) + m.write(b'abc') + except TypeError: + pass + else: + self.fail("Able to write to readonly memory map") + + # Ensuring that readonly mmap can't be write_byte() to + try: + m.seek(0,0) + m.write_byte(b'd') + except TypeError: + pass + else: + self.fail("Able to write to readonly memory map") + + # Ensuring that readonly mmap can't be resized + try: + m.resize(2*mapsize) + except SystemError: # resize is not universally supported + pass + except TypeError: + pass + else: + self.fail("Able to resize readonly memory map") + with open(TESTFN, "rb") as fp: + self.assertEqual(fp.read(), b'a'*mapsize, + "Readonly memory map data file was modified") + + # Opening mmap with size too big + with open(TESTFN, "r+b") as f: + try: + m = mmap.mmap(f.fileno(), mapsize+1) + except ValueError: + # we do not expect a ValueError on Windows + # CAUTION: This also changes the size of the file on disk, and + # later tests assume that the length hasn't changed. We need to + # repair that. + if sys.platform.startswith('win'): + self.fail("Opening mmap with size+1 should work on Windows.") + else: + # we expect a ValueError on Unix, but not on Windows + if not sys.platform.startswith('win'): + self.fail("Opening mmap with size+1 should raise ValueError.") + m.close() + if sys.platform.startswith('win'): + # Repair damage from the resizing test. + with open(TESTFN, 'r+b') as f: + f.truncate(mapsize) + + # Opening mmap with access=ACCESS_WRITE + with open(TESTFN, "r+b") as f: + m = mmap.mmap(f.fileno(), mapsize, access=mmap.ACCESS_WRITE) + # Modifying write-through memory map + m[:] = b'c'*mapsize + self.assertEqual(m[:], b'c'*mapsize, + "Write-through memory map memory not updated properly.") + m.flush() + m.close() + with open(TESTFN, 'rb') as f: + stuff = f.read() + self.assertEqual(stuff, b'c'*mapsize, + "Write-through memory map data file not updated properly.") + + # Opening mmap with access=ACCESS_COPY + with open(TESTFN, "r+b") as f: + m = mmap.mmap(f.fileno(), mapsize, access=mmap.ACCESS_COPY) + # Modifying copy-on-write memory map + m[:] = b'd'*mapsize + self.assertEqual(m[:], b'd' * mapsize, + "Copy-on-write memory map data not written correctly.") + m.flush() + with open(TESTFN, "rb") as fp: + self.assertEqual(fp.read(), b'c'*mapsize, + "Copy-on-write test data file should not be modified.") + # Ensuring copy-on-write maps cannot be resized + self.assertRaises(TypeError, m.resize, 2*mapsize) + m.close() + + # Ensuring invalid access parameter raises exception + with open(TESTFN, "r+b") as f: + self.assertRaises(ValueError, mmap.mmap, f.fileno(), mapsize, access=4) + + if os.name == "posix": + # Try incompatible flags, prot and access parameters. + with open(TESTFN, "r+b") as f: + self.assertRaises(ValueError, mmap.mmap, f.fileno(), mapsize, + flags=mmap.MAP_PRIVATE, + prot=mmap.PROT_READ, access=mmap.ACCESS_WRITE) + + # Try writing with PROT_EXEC and without PROT_WRITE + prot = mmap.PROT_READ | getattr(mmap, 'PROT_EXEC', 0) + with open(TESTFN, "r+b") as f: + m = mmap.mmap(f.fileno(), mapsize, prot=prot) + self.assertRaises(TypeError, m.write, b"abcdef") + self.assertRaises(TypeError, m.write_byte, 0) + m.close() + + def test_bad_file_desc(self): + # Try opening a bad file descriptor... + self.assertRaises(OSError, mmap.mmap, -2, 4096) + + def test_tougher_find(self): + # Do a tougher .find() test. SF bug 515943 pointed out that, in 2.2, + # searching for data with embedded \0 bytes didn't work. + with open(TESTFN, 'wb+') as f: + + data = b'aabaac\x00deef\x00\x00aa\x00' + n = len(data) + f.write(data) + f.flush() + m = mmap.mmap(f.fileno(), n) + + for start in range(n+1): + for finish in range(start, n+1): + slice = data[start : finish] + self.assertEqual(m.find(slice), data.find(slice)) + self.assertEqual(m.find(slice + b'x'), -1) + m.close() + + def test_find_end(self): + # test the new 'end' parameter works as expected + with open(TESTFN, 'wb+') as f: + data = b'one two ones' + n = len(data) + f.write(data) + f.flush() + m = mmap.mmap(f.fileno(), n) + + self.assertEqual(m.find(b'one'), 0) + self.assertEqual(m.find(b'ones'), 8) + self.assertEqual(m.find(b'one', 0, -1), 0) + self.assertEqual(m.find(b'one', 1), 8) + self.assertEqual(m.find(b'one', 1, -1), 8) + self.assertEqual(m.find(b'one', 1, -2), -1) + self.assertEqual(m.find(bytearray(b'one')), 0) + + + def test_rfind(self): + # test the new 'end' parameter works as expected + with open(TESTFN, 'wb+') as f: + data = b'one two ones' + n = len(data) + f.write(data) + f.flush() + m = mmap.mmap(f.fileno(), n) + + self.assertEqual(m.rfind(b'one'), 8) + self.assertEqual(m.rfind(b'one '), 0) + self.assertEqual(m.rfind(b'one', 0, -1), 8) + self.assertEqual(m.rfind(b'one', 0, -2), 0) + self.assertEqual(m.rfind(b'one', 1, -1), 8) + self.assertEqual(m.rfind(b'one', 1, -2), -1) + self.assertEqual(m.rfind(bytearray(b'one')), 8) + + + def test_double_close(self): + # make sure a double close doesn't crash on Solaris (Bug# 665913) + with open(TESTFN, 'wb+') as f: + f.write(2**16 * b'a') # Arbitrary character + + with open(TESTFN, 'rb') as f: + mf = mmap.mmap(f.fileno(), 2**16, access=mmap.ACCESS_READ) + mf.close() + mf.close() + + def test_entire_file(self): + # test mapping of entire file by passing 0 for map length + with open(TESTFN, "wb+") as f: + f.write(2**16 * b'm') # Arbitrary character + + with open(TESTFN, "rb+") as f, \ + mmap.mmap(f.fileno(), 0) as mf: + self.assertEqual(len(mf), 2**16, "Map size should equal file size.") + self.assertEqual(mf.read(2**16), 2**16 * b"m") + + def test_length_0_offset(self): + # Issue #10916: test mapping of remainder of file by passing 0 for + # map length with an offset doesn't cause a segfault. + # NOTE: allocation granularity is currently 65536 under Win64, + # and therefore the minimum offset alignment. + with open(TESTFN, "wb") as f: + f.write((65536 * 2) * b'm') # Arbitrary character + + with open(TESTFN, "rb") as f: + with mmap.mmap(f.fileno(), 0, offset=65536, access=mmap.ACCESS_READ) as mf: + self.assertRaises(IndexError, mf.__getitem__, 80000) + + def test_length_0_large_offset(self): + # Issue #10959: test mapping of a file by passing 0 for + # map length with a large offset doesn't cause a segfault. + with open(TESTFN, "wb") as f: + f.write(115699 * b'm') # Arbitrary character + + with open(TESTFN, "w+b") as f: + self.assertRaises(ValueError, mmap.mmap, f.fileno(), 0, + offset=2147418112) + + def test_move(self): + # make move works everywhere (64-bit format problem earlier) + with open(TESTFN, 'wb+') as f: + + f.write(b"ABCDEabcde") # Arbitrary character + f.flush() + + mf = mmap.mmap(f.fileno(), 10) + mf.move(5, 0, 5) + self.assertEqual(mf[:], b"ABCDEABCDE", "Map move should have duplicated front 5") + mf.close() + + # more excessive test + data = b"0123456789" + for dest in range(len(data)): + for src in range(len(data)): + for count in range(len(data) - max(dest, src)): + expected = data[:dest] + data[src:src+count] + data[dest+count:] + m = mmap.mmap(-1, len(data)) + m[:] = data + m.move(dest, src, count) + self.assertEqual(m[:], expected) + m.close() + + # segfault test (Issue 5387) + m = mmap.mmap(-1, 100) + offsets = [-100, -1, 0, 1, 100] + for source, dest, size in itertools.product(offsets, offsets, offsets): + try: + m.move(source, dest, size) + except ValueError: + pass + + offsets = [(-1, -1, -1), (-1, -1, 0), (-1, 0, -1), (0, -1, -1), + (-1, 0, 0), (0, -1, 0), (0, 0, -1)] + for source, dest, size in offsets: + self.assertRaises(ValueError, m.move, source, dest, size) + + m.close() + + m = mmap.mmap(-1, 1) # single byte + self.assertRaises(ValueError, m.move, 0, 0, 2) + self.assertRaises(ValueError, m.move, 1, 0, 1) + self.assertRaises(ValueError, m.move, 0, 1, 1) + m.move(0, 0, 1) + m.move(0, 0, 0) + + + def test_anonymous(self): + # anonymous mmap.mmap(-1, PAGE) + m = mmap.mmap(-1, PAGESIZE) + for x in range(PAGESIZE): + self.assertEqual(m[x], 0, + "anonymously mmap'ed contents should be zero") + + for x in range(PAGESIZE): + b = x & 0xff + m[x] = b + self.assertEqual(m[x], b) + + def test_read_all(self): + m = mmap.mmap(-1, 16) + self.addCleanup(m.close) + + # With no parameters, or None or a negative argument, reads all + m.write(bytes(range(16))) + m.seek(0) + self.assertEqual(m.read(), bytes(range(16))) + m.seek(8) + self.assertEqual(m.read(), bytes(range(8, 16))) + m.seek(16) + self.assertEqual(m.read(), b'') + m.seek(3) + self.assertEqual(m.read(None), bytes(range(3, 16))) + m.seek(4) + self.assertEqual(m.read(-1), bytes(range(4, 16))) + m.seek(5) + self.assertEqual(m.read(-2), bytes(range(5, 16))) + m.seek(9) + self.assertEqual(m.read(-42), bytes(range(9, 16))) + + def test_read_invalid_arg(self): + m = mmap.mmap(-1, 16) + self.addCleanup(m.close) + + self.assertRaises(TypeError, m.read, 'foo') + self.assertRaises(TypeError, m.read, 5.5) + self.assertRaises(TypeError, m.read, [1, 2, 3]) + + def test_extended_getslice(self): + # Test extended slicing by comparing with list slicing. + s = bytes(reversed(range(256))) + m = mmap.mmap(-1, len(s)) + m[:] = s + self.assertEqual(m[:], s) + indices = (0, None, 1, 3, 19, 300, sys.maxsize, -1, -2, -31, -300) + for start in indices: + for stop in indices: + # Skip step 0 (invalid) + for step in indices[1:]: + self.assertEqual(m[start:stop:step], + s[start:stop:step]) + + def test_extended_set_del_slice(self): + # Test extended slicing by comparing with list slicing. + s = bytes(reversed(range(256))) + m = mmap.mmap(-1, len(s)) + indices = (0, None, 1, 3, 19, 300, sys.maxsize, -1, -2, -31, -300) + for start in indices: + for stop in indices: + # Skip invalid step 0 + for step in indices[1:]: + m[:] = s + self.assertEqual(m[:], s) + L = list(s) + # Make sure we have a slice of exactly the right length, + # but with different data. + data = L[start:stop:step] + data = bytes(reversed(data)) + L[start:stop:step] = data + m[start:stop:step] = data + self.assertEqual(m[:], bytes(L)) + + def make_mmap_file (self, f, halfsize): + # Write 2 pages worth of data to the file + f.write (b'\0' * halfsize) + f.write (b'foo') + f.write (b'\0' * (halfsize - 3)) + f.flush () + return mmap.mmap (f.fileno(), 0) + + def test_empty_file (self): + f = open (TESTFN, 'w+b') + f.close() + with open(TESTFN, "rb") as f : + self.assertRaisesRegex(ValueError, + "cannot mmap an empty file", + mmap.mmap, f.fileno(), 0, + access=mmap.ACCESS_READ) + + def test_offset (self): + f = open (TESTFN, 'w+b') + + try: # unlink TESTFN no matter what + halfsize = mmap.ALLOCATIONGRANULARITY + m = self.make_mmap_file (f, halfsize) + m.close () + f.close () + + mapsize = halfsize * 2 + # Try invalid offset + f = open(TESTFN, "r+b") + for offset in [-2, -1, None]: + try: + m = mmap.mmap(f.fileno(), mapsize, offset=offset) + self.assertEqual(0, 1) + except (ValueError, TypeError, OverflowError): + pass + else: + self.assertEqual(0, 0) + f.close() + + # Try valid offset, hopefully 8192 works on all OSes + f = open(TESTFN, "r+b") + m = mmap.mmap(f.fileno(), mapsize - halfsize, offset=halfsize) + self.assertEqual(m[0:3], b'foo') + f.close() + + # Try resizing map + try: + m.resize(512) + except SystemError: + pass + else: + # resize() is supported + self.assertEqual(len(m), 512) + # Check that we can no longer seek beyond the new size. + self.assertRaises(ValueError, m.seek, 513, 0) + # Check that the content is not changed + self.assertEqual(m[0:3], b'foo') + + # Check that the underlying file is truncated too + f = open(TESTFN, 'rb') + f.seek(0, 2) + self.assertEqual(f.tell(), halfsize + 512) + f.close() + self.assertEqual(m.size(), halfsize + 512) + + m.close() + + finally: + f.close() + try: + os.unlink(TESTFN) + except OSError: + pass + + def test_subclass(self): + class anon_mmap(mmap.mmap): + def __new__(klass, *args, **kwargs): + return mmap.mmap.__new__(klass, -1, *args, **kwargs) + anon_mmap(PAGESIZE) + + @unittest.skipUnless(hasattr(mmap, 'PROT_READ'), "needs mmap.PROT_READ") + def test_prot_readonly(self): + mapsize = 10 + with open(TESTFN, "wb") as fp: + fp.write(b"a"*mapsize) + with open(TESTFN, "rb") as f: + m = mmap.mmap(f.fileno(), mapsize, prot=mmap.PROT_READ) + self.assertRaises(TypeError, m.write, "foo") + + def test_error(self): + self.assertIs(mmap.error, OSError) + + def test_io_methods(self): + data = b"0123456789" + with open(TESTFN, "wb") as fp: + fp.write(b"x"*len(data)) + with open(TESTFN, "r+b") as f: + m = mmap.mmap(f.fileno(), len(data)) + # Test write_byte() + for i in range(len(data)): + self.assertEqual(m.tell(), i) + m.write_byte(data[i]) + self.assertEqual(m.tell(), i+1) + self.assertRaises(ValueError, m.write_byte, b"x"[0]) + self.assertEqual(m[:], data) + # Test read_byte() + m.seek(0) + for i in range(len(data)): + self.assertEqual(m.tell(), i) + self.assertEqual(m.read_byte(), data[i]) + self.assertEqual(m.tell(), i+1) + self.assertRaises(ValueError, m.read_byte) + # Test read() + m.seek(3) + self.assertEqual(m.read(3), b"345") + self.assertEqual(m.tell(), 6) + # Test write() + m.seek(3) + m.write(b"bar") + self.assertEqual(m.tell(), 6) + self.assertEqual(m[:], b"012bar6789") + m.write(bytearray(b"baz")) + self.assertEqual(m.tell(), 9) + self.assertEqual(m[:], b"012barbaz9") + self.assertRaises(ValueError, m.write, b"ba") + + def test_non_ascii_byte(self): + for b in (129, 200, 255): # > 128 + m = mmap.mmap(-1, 1) + m.write_byte(b) + self.assertEqual(m[0], b) + m.seek(0) + self.assertEqual(m.read_byte(), b) + m.close() + + @unittest.skipUnless(os.name == 'nt', 'requires Windows') + def test_tagname(self): + data1 = b"0123456789" + data2 = b"abcdefghij" + assert len(data1) == len(data2) + + # Test same tag + m1 = mmap.mmap(-1, len(data1), tagname="foo") + m1[:] = data1 + m2 = mmap.mmap(-1, len(data2), tagname="foo") + m2[:] = data2 + self.assertEqual(m1[:], data2) + self.assertEqual(m2[:], data2) + m2.close() + m1.close() + + # Test different tag + m1 = mmap.mmap(-1, len(data1), tagname="foo") + m1[:] = data1 + m2 = mmap.mmap(-1, len(data2), tagname="boo") + m2[:] = data2 + self.assertEqual(m1[:], data1) + self.assertEqual(m2[:], data2) + m2.close() + m1.close() + + @cpython_only + @unittest.skipUnless(os.name == 'nt', 'requires Windows') + def test_sizeof(self): + m1 = mmap.mmap(-1, 100) + tagname = "foo" + m2 = mmap.mmap(-1, 100, tagname=tagname) + self.assertEqual(sys.getsizeof(m2), + sys.getsizeof(m1) + len(tagname) + 1) + + @unittest.skipUnless(os.name == 'nt', 'requires Windows') + def test_crasher_on_windows(self): + # Should not crash (Issue 1733986) + m = mmap.mmap(-1, 1000, tagname="foo") + try: + mmap.mmap(-1, 5000, tagname="foo")[:] # same tagname, but larger size + except: + pass + m.close() + + # Should not crash (Issue 5385) + with open(TESTFN, "wb") as fp: + fp.write(b"x"*10) + f = open(TESTFN, "r+b") + m = mmap.mmap(f.fileno(), 0) + f.close() + try: + m.resize(0) # will raise OSError + except: + pass + try: + m[:] + except: + pass + m.close() + + @unittest.skipUnless(os.name == 'nt', 'requires Windows') + def test_invalid_descriptor(self): + # socket file descriptors are valid, but out of range + # for _get_osfhandle, causing a crash when validating the + # parameters to _get_osfhandle. + s = socket.socket() + try: + with self.assertRaises(OSError): + m = mmap.mmap(s.fileno(), 10) + finally: + s.close() + + def test_context_manager(self): + with mmap.mmap(-1, 10) as m: + self.assertFalse(m.closed) + self.assertTrue(m.closed) + + def test_context_manager_exception(self): + # Test that the OSError gets passed through + with self.assertRaises(Exception) as exc: + with mmap.mmap(-1, 10) as m: + raise OSError + self.assertIsInstance(exc.exception, OSError, + "wrong exception raised in context manager") + self.assertTrue(m.closed, "context manager failed") + + def test_weakref(self): + # Check mmap objects are weakrefable + mm = mmap.mmap(-1, 16) + wr = weakref.ref(mm) + self.assertIs(wr(), mm) + del mm + gc_collect() + self.assertIs(wr(), None) + + def test_write_returning_the_number_of_bytes_written(self): + mm = mmap.mmap(-1, 16) + self.assertEqual(mm.write(b""), 0) + self.assertEqual(mm.write(b"x"), 1) + self.assertEqual(mm.write(b"yz"), 2) + self.assertEqual(mm.write(b"python"), 6) + + @unittest.skipIf(os.name == 'nt', 'cannot resize anonymous mmaps on Windows') + def test_resize_past_pos(self): + m = mmap.mmap(-1, 8192) + self.addCleanup(m.close) + m.read(5000) + try: + m.resize(4096) + except SystemError: + self.skipTest("resizing not supported") + self.assertEqual(m.read(14), b'') + self.assertRaises(ValueError, m.read_byte) + self.assertRaises(ValueError, m.write_byte, 42) + self.assertRaises(ValueError, m.write, b'abc') + + def test_concat_repeat_exception(self): + m = mmap.mmap(-1, 16) + with self.assertRaises(TypeError): + m + m + with self.assertRaises(TypeError): + m * 2 + + def test_flush_return_value(self): + # mm.flush() should return None on success, raise an + # exception on error under all platforms. + mm = mmap.mmap(-1, 16) + self.addCleanup(mm.close) + mm.write(b'python') + result = mm.flush() + self.assertIsNone(result) + if sys.platform.startswith('linux'): + # 'offset' must be a multiple of mmap.PAGESIZE on Linux. + # See bpo-34754 for details. + self.assertRaises(OSError, mm.flush, 1, len(b'python')) + + def test_repr(self): + open_mmap_repr_pat = re.compile( + r"\S+), " + r"length=(?P\d+), " + r"pos=(?P\d+), " + r"offset=(?P\d+)>") + closed_mmap_repr_pat = re.compile(r"") + mapsizes = (50, 100, 1_000, 1_000_000, 10_000_000) + offsets = tuple((mapsize // 2 // mmap.ALLOCATIONGRANULARITY) + * mmap.ALLOCATIONGRANULARITY for mapsize in mapsizes) + for offset, mapsize in zip(offsets, mapsizes): + data = b'a' * mapsize + length = mapsize - offset + accesses = ('ACCESS_DEFAULT', 'ACCESS_READ', + 'ACCESS_COPY', 'ACCESS_WRITE') + positions = (0, length//10, length//5, length//4) + with open(TESTFN, "wb+") as fp: + fp.write(data) + fp.flush() + for access, pos in itertools.product(accesses, positions): + accint = getattr(mmap, access) + with mmap.mmap(fp.fileno(), + length, + access=accint, + offset=offset) as mm: + mm.seek(pos) + match = open_mmap_repr_pat.match(repr(mm)) + self.assertIsNotNone(match) + self.assertEqual(match.group('access'), access) + self.assertEqual(match.group('length'), str(length)) + self.assertEqual(match.group('pos'), str(pos)) + self.assertEqual(match.group('offset'), str(offset)) + match = closed_mmap_repr_pat.match(repr(mm)) + self.assertIsNotNone(match) + + @unittest.skipUnless(hasattr(mmap.mmap, 'madvise'), 'needs madvise') + def test_madvise(self): + size = 2 * PAGESIZE + m = mmap.mmap(-1, size) + + with self.assertRaisesRegex(ValueError, "madvise start out of bounds"): + m.madvise(mmap.MADV_NORMAL, size) + with self.assertRaisesRegex(ValueError, "madvise start out of bounds"): + m.madvise(mmap.MADV_NORMAL, -1) + with self.assertRaisesRegex(ValueError, "madvise length invalid"): + m.madvise(mmap.MADV_NORMAL, 0, -1) + with self.assertRaisesRegex(OverflowError, "madvise length too large"): + m.madvise(mmap.MADV_NORMAL, PAGESIZE, sys.maxsize) + self.assertEqual(m.madvise(mmap.MADV_NORMAL), None) + self.assertEqual(m.madvise(mmap.MADV_NORMAL, PAGESIZE), None) + self.assertEqual(m.madvise(mmap.MADV_NORMAL, PAGESIZE, size), None) + self.assertEqual(m.madvise(mmap.MADV_NORMAL, 0, 2), None) + self.assertEqual(m.madvise(mmap.MADV_NORMAL, 0, size), None) + + +class LargeMmapTests(unittest.TestCase): + + def setUp(self): + unlink(TESTFN) + + def tearDown(self): + unlink(TESTFN) + + def _make_test_file(self, num_zeroes, tail): + if sys.platform[:3] == 'win' or sys.platform == 'darwin': + requires('largefile', + 'test requires %s bytes and a long time to run' % str(0x180000000)) + f = open(TESTFN, 'w+b') + try: + f.seek(num_zeroes) + f.write(tail) + f.flush() + except (OSError, OverflowError, ValueError): + try: + f.close() + except (OSError, OverflowError): + pass + raise unittest.SkipTest("filesystem does not have largefile support") + return f + + def test_large_offset(self): + with self._make_test_file(0x14FFFFFFF, b" ") as f: + with mmap.mmap(f.fileno(), 0, offset=0x140000000, access=mmap.ACCESS_READ) as m: + self.assertEqual(m[0xFFFFFFF], 32) + + def test_large_filesize(self): + with self._make_test_file(0x17FFFFFFF, b" ") as f: + if sys.maxsize < 0x180000000: + # On 32 bit platforms the file is larger than sys.maxsize so + # mapping the whole file should fail -- Issue #16743 + with self.assertRaises(OverflowError): + mmap.mmap(f.fileno(), 0x180000000, access=mmap.ACCESS_READ) + with self.assertRaises(ValueError): + mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) + with mmap.mmap(f.fileno(), 0x10000, access=mmap.ACCESS_READ) as m: + self.assertEqual(m.size(), 0x180000000) + + # Issue 11277: mmap() with large (~4 GiB) sparse files crashes on OS X. + + def _test_around_boundary(self, boundary): + tail = b' DEARdear ' + start = boundary - len(tail) // 2 + end = start + len(tail) + with self._make_test_file(start, tail) as f: + with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as m: + self.assertEqual(m[start:end], tail) + + @unittest.skipUnless(sys.maxsize > _4G, "test cannot run on 32-bit systems") + def test_around_2GB(self): + self._test_around_boundary(_2G) + + @unittest.skipUnless(sys.maxsize > _4G, "test cannot run on 32-bit systems") + def test_around_4GB(self): + self._test_around_boundary(_4G) + + +if __name__ == '__main__': + unittest.main() From 1ff3e0a5515b92b02f916b792c76354b95848947 Mon Sep 17 00:00:00 2001 From: Dennis Zhuang Date: Mon, 13 Jun 2022 10:37:06 +0800 Subject: [PATCH 09/10] Skip test_flush_return_value on linux platform --- Lib/test/test_mmap.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/test/test_mmap.py b/Lib/test/test_mmap.py index 8f34c182f8..fa371a291d 100644 --- a/Lib/test/test_mmap.py +++ b/Lib/test/test_mmap.py @@ -728,6 +728,7 @@ def test_concat_repeat_exception(self): with self.assertRaises(TypeError): m * 2 + @unittest.skipIf(sys.platform.startswith("linux"), "TODO: RUSTPYTHON, memmap2 doesn't throw OSError when offset is not a multiple of mmap.PAGESIZE on Linux") def test_flush_return_value(self): # mm.flush() should return None on success, raise an # exception on error under all platforms. From af2fac980406a2331b8acec1e4afc4b938b7fc79 Mon Sep 17 00:00:00 2001 From: Jeong Yunwon Date: Tue, 14 Jun 2022 18:07:13 +0900 Subject: [PATCH 10/10] use usize for mmap fields --- stdlib/src/mmap.rs | 285 ++++++++++++++++++++++++++------------------- 1 file changed, 164 insertions(+), 121 deletions(-) diff --git a/stdlib/src/mmap.rs b/stdlib/src/mmap.rs index 4dfcf1ecc4..aff3d17c57 100644 --- a/stdlib/src/mmap.rs +++ b/stdlib/src/mmap.rs @@ -22,6 +22,7 @@ mod mmap { use crossbeam_utils::atomic::AtomicCell; use memmap2::{Advice, Mmap, MmapMut, MmapOptions}; use nix::unistd; + use num_traits::Signed; use std::fs::File; use std::io::Write; use std::ops::{Deref, DerefMut}; @@ -156,9 +157,9 @@ mod mmap { closed: AtomicCell, mmap: PyMutex>, fd: RawFd, - offset: isize, - size: AtomicCell, - pos: AtomicCell, // relative to offset + offset: libc::off_t, + size: AtomicCell, + pos: AtomicCell, // relative to offset exports: AtomicCell, access: AccessMode, } @@ -176,7 +177,7 @@ mod mmap { #[pyarg(any, default = "AccessMode::Default")] access: AccessMode, #[pyarg(any, default = "0")] - offset: isize, + offset: libc::off_t, } #[derive(FromArgs)] @@ -187,6 +188,31 @@ mod mmap { size: Option, } + impl FlushOptions { + fn values(self, len: usize) -> Option<(usize, usize)> { + let offset = if let Some(offset) = self.offset { + if offset < 0 { + return None; + } + offset as usize + } else { + 0 + }; + let size = if let Some(size) = self.size { + if size < 0 { + return None; + } + size as usize + } else { + len + }; + if len.checked_sub(offset)? < size { + return None; + } + Some((offset, size)) + } + } + #[derive(FromArgs, Clone)] pub struct FindOptions { #[pyarg(positional)] @@ -202,9 +228,44 @@ mod mmap { #[pyarg(positional)] option: libc::c_int, #[pyarg(positional, default)] - start: Option, + start: Option, #[pyarg(positional, default)] - length: Option, + length: Option, + } + + impl AdviseOptions { + fn values(self, len: usize, vm: &VirtualMachine) -> PyResult<(libc::c_int, usize, usize)> { + let start = self + .start + .map(|s| { + s.try_to_primitive::(vm) + .ok() + .filter(|s| *s < len) + .ok_or_else(|| vm.new_value_error("madvise start out of bounds".to_owned())) + }) + .transpose()? + .unwrap_or(0); + let length = self + .length + .map(|s| { + s.try_to_primitive::(vm) + .map_err(|_| vm.new_value_error("madvise length invalid".to_owned())) + }) + .transpose()? + .unwrap_or(len); + + if isize::MAX as usize - start < length { + return Err(vm.new_overflow_error("madvise length too large".to_owned())); + } + + let length = if start + length > len { + len - start + } else { + length + }; + + Ok((self.option, start, length)) + } } impl Constructor for PyMmap { @@ -224,12 +285,13 @@ mod mmap { }: Self::Args, vm: &VirtualMachine, ) -> PyResult { - let mut map_size = length; + let map_size = length; if map_size < 0 { return Err( vm.new_overflow_error("memory mapped length must be positive".to_owned()) ); } + let mut map_size = map_size as usize; if offset < 0 { return Err( @@ -264,10 +326,10 @@ mod mmap { if fd != -1 { let file = unsafe { File::from_raw_fd(fd) }; - let file_len = match file.metadata() { - Ok(m) => m.len().try_into().expect("file size overflow"), - Err(e) => return Err(vm.new_os_error(e.to_string())), - }; + let metadata = file + .metadata() + .map_err(|e| vm.new_os_error(e.to_string()))?; + let file_len: libc::off_t = metadata.len().try_into().expect("file size overflow"); // File::from_raw_fd will consume the fd, so we // have to get it again. fd = file.into_raw_fd(); @@ -282,12 +344,10 @@ mod mmap { ); } - //if file_len - offset > isize::MAX { - // return Err(vm.new_value_error("mmap length is too large".to_owned())); - //} - - map_size = file_len - offset; - } else if offset > file_len || file_len - offset < map_size { + map_size = (file_len - offset) + .try_into() + .map_err(|_| vm.new_value_error("mmap length is too large".to_owned()))?; + } else if offset > file_len || file_len - offset < map_size as libc::off_t { return Err( vm.new_value_error("mmap length is greater than file size".to_owned()) ); @@ -295,9 +355,7 @@ mod mmap { } let mut mmap_opt = MmapOptions::new(); - let mmap_opt = mmap_opt - .offset(offset.try_into().unwrap()) - .len(map_size.try_into().unwrap()); + let mmap_opt = mmap_opt.offset(offset.try_into().unwrap()).len(map_size); let (fd, mmap) = if fd == -1 { ( @@ -423,23 +481,18 @@ mod mmap { } #[pymethod(magic)] - pub(crate) fn len(&self) -> usize { - self.inner_size() as usize - } - - #[inline] - fn inner_size(&self) -> isize { + fn len(&self) -> usize { self.size.load() } #[inline] - fn inner_pos(&self) -> isize { + fn pos(&self) -> usize { self.pos.load() } #[inline] - fn advance_pos(&self, step: isize) { - self.pos.store(self.inner_pos() + step); + fn advance_pos(&self, step: usize) { + self.pos.store(self.pos() + step); } #[inline] @@ -512,7 +565,7 @@ mod mmap { "", access_str, zelf.len(), - zelf.inner_pos(), + zelf.pos(), zelf.offset ); @@ -536,13 +589,16 @@ mod mmap { } fn get_find_range(&self, options: FindOptions) -> (usize, usize) { - let pos = self.inner_pos(); - let size = self.inner_size(); - let start = options.start.unwrap_or(pos); - let end = options.end.unwrap_or(size); - - let size = size.try_into().unwrap(); - (saturate_index(start, size), saturate_index(end, size)) + let size = self.len(); + let start = options + .start + .map(|start| saturate_index(start, size)) + .unwrap_or_else(|| self.pos()); + let end = options + .end + .map(|end| saturate_index(end, size)) + .unwrap_or(size); + (start, end) } #[pymethod] @@ -586,15 +642,9 @@ mod mmap { #[pymethod] fn flush(&self, options: FlushOptions, vm: &VirtualMachine) -> PyResult<()> { - let offset = options.offset.unwrap_or(0); - let size = options.size.unwrap_or_else(|| self.inner_size()); - - if size < 0 || offset < 0 || self.inner_size() - offset < size { - return Err(vm.new_value_error("flush values out of range".to_owned())); - } - - let size = size as usize; - let offset = offset as usize; + let (offset, size) = options + .values(self.len()) + .ok_or_else(|| vm.new_value_error("flush values out of range".to_owned()))?; if self.access == AccessMode::Read || self.access == AccessMode::Copy { return Ok(()); @@ -614,25 +664,8 @@ mod mmap { #[allow(unused_assignments)] #[pymethod] fn madvise(&self, options: AdviseOptions, vm: &VirtualMachine) -> PyResult<()> { - let start = options.start.unwrap_or(0); - let mut length = options.length.unwrap_or_else(|| self.inner_size()); - - if start < 0 || start >= self.inner_size() { - return Err(vm.new_value_error("madvise start out of bounds".to_owned())); - } - if length < 0 { - return Err(vm.new_value_error("madvise length invalid".to_owned())); - } - - if isize::MAX - start < length { - return Err(vm.new_overflow_error("madvise length too large".to_owned())); - } - - if start + length > self.inner_size() { - length = self.inner_size() - start; - } - - let advice = advice_try_from_i32(vm, options.option)?; + let (option, _start, _length) = options.values(self.len(), vm)?; + let advice = advice_try_from_i32(vm, option)?; //TODO: memmap2 doesn't support madvise range right now. match self.check_valid(vm)?.deref().as_ref().unwrap() { @@ -645,18 +678,41 @@ mod mmap { } #[pymethod(name = "move")] - fn move_(&self, dest: isize, src: isize, cnt: isize, vm: &VirtualMachine) -> PyResult<()> { - let size = self.inner_size(); - if dest < 0 || src < 0 || cnt < 0 || size - dest < cnt || size - src < cnt { - return Err( - vm.new_value_error("source, destination, or count out of range".to_owned()) - ); + fn move_( + &self, + dest: PyIntRef, + src: PyIntRef, + cnt: PyIntRef, + vm: &VirtualMachine, + ) -> PyResult<()> { + fn args( + dest: PyIntRef, + src: PyIntRef, + cnt: PyIntRef, + size: usize, + vm: &VirtualMachine, + ) -> Option<(usize, usize, usize)> { + if dest.as_bigint().is_negative() + || src.as_bigint().is_negative() + || cnt.as_bigint().is_negative() + { + return None; + } + let dest = dest.try_to_primitive(vm).ok()?; + let src = src.try_to_primitive(vm).ok()?; + let cnt = cnt.try_to_primitive(vm).ok()?; + if size - dest < cnt || size - src < cnt { + return None; + } + Some((dest, src, cnt)) } - let dest: usize = dest.try_into().unwrap(); - let cnt: usize = cnt.try_into().unwrap(); + let size = self.len(); + let (dest, src, cnt) = args(dest, src, cnt, size, vm).ok_or_else(|| { + vm.new_value_error("source, destination, or count out of range".to_owned()) + })?; + let dest_end = dest + cnt; - let src: usize = src.try_into().unwrap(); let src_end = src + cnt; self.try_writable(vm, |mmap| { @@ -670,7 +726,7 @@ mod mmap { #[pymethod] fn read(&self, n: OptionalArg, vm: &VirtualMachine) -> PyResult { - let mut num_bytes = n + let num_bytes = n .map(|obj| { let name = obj.class().name().to_string(); obj.try_into_value::>(vm).map_err(|_| { @@ -681,20 +737,14 @@ mod mmap { }) }) .transpose()? - .flatten() - .unwrap_or(isize::MAX); + .flatten(); let mmap = self.check_valid(vm)?; - let pos = self.inner_pos(); - - let remaining = if pos < self.inner_size() { - self.inner_size() - pos - } else { - 0 - }; - - if num_bytes < 0 || num_bytes > remaining { - num_bytes = remaining; - } + let pos = self.pos(); + let remaining = self.len().saturating_sub(pos); + let num_bytes = num_bytes + .filter(|&n| n >= 0 && (n as usize) <= remaining) + .map(|n| n as usize) + .unwrap_or(remaining); let end_pos = (pos + num_bytes) as usize; let bytes = match mmap.deref().as_ref().unwrap() { @@ -711,8 +761,8 @@ mod mmap { #[pymethod] fn read_byte(&self, vm: &VirtualMachine) -> PyResult { - let pos = self.inner_pos(); - if pos >= self.inner_size() { + let pos = self.pos(); + if pos >= self.len() { return Err(vm.new_value_error("read byte out of range".to_owned())); } @@ -728,30 +778,25 @@ mod mmap { #[pymethod] fn readline(&self, vm: &VirtualMachine) -> PyResult { - let pos = self.inner_pos(); + let pos = self.pos(); let mmap = self.check_valid(vm)?; - let remaining = if pos < self.inner_size() { - self.inner_size() - pos - } else { - 0 - }; - + let remaining = self.len().saturating_sub(pos); if remaining == 0 { return Ok(PyBytes::from(vec![]).into_ref(vm)); } let eof = match mmap.as_ref().unwrap() { - MmapObj::Read(mmap) => &mmap[pos as usize..], - MmapObj::Write(mmap) => &mmap[pos as usize..], + MmapObj::Read(mmap) => &mmap[pos..], + MmapObj::Write(mmap) => &mmap[pos..], } .iter() .position(|&x| x == b'\n'); let end_pos = if let Some(i) = eof { - pos as usize + i + 1 + pos + i + 1 } else { - self.inner_size() as usize + self.len() }; let bytes = match mmap.deref().as_ref().unwrap() { @@ -761,7 +806,7 @@ mod mmap { let result = PyBytes::from(bytes).into_ref(vm); - self.advance_pos(end_pos as isize - pos); + self.advance_pos(end_pos - pos); Ok(result) } @@ -776,40 +821,38 @@ mod mmap { #[pymethod] fn seek( &self, - pos: isize, + dist: isize, whence: OptionalArg, vm: &VirtualMachine, ) -> PyResult<()> { - let dist = pos; - let how = whence.unwrap_or(0); - let size = self.inner_size(); + let size = self.len(); let new_pos = match how { 0 => dist, // relative to start 1 => { // relative to current position - let pos = self.inner_pos(); - if isize::MAX - pos < dist { + let pos = self.pos(); + if (((isize::MAX as usize) - pos) as isize) < dist { return Err(vm.new_value_error("seek out of range".to_owned())); } - pos + dist + pos as isize + dist } 2 => { // relative to end - if isize::MAX - size < dist { + if (((isize::MAX as usize) - size) as isize) < dist { return Err(vm.new_value_error("seek out of range".to_owned())); } - size + dist + size as isize + dist } _ => return Err(vm.new_value_error("unknown seek type".to_owned())), }; - if new_pos > size || new_pos < 0 { + if new_pos < 0 || (new_pos as usize) > size { return Err(vm.new_value_error("seek out of range".to_owned())); } - self.pos.store(new_pos); + self.pos.store(new_pos as usize); Ok(()) } @@ -827,18 +870,18 @@ mod mmap { } #[pymethod] - fn tell(&self) -> PyResult { - Ok(self.inner_pos()) + fn tell(&self) -> PyResult { + Ok(self.pos()) } #[pymethod] fn write(&self, bytes: ArgBytesLike, vm: &VirtualMachine) -> PyResult { - let pos = self.inner_pos(); - let size = self.inner_size(); + let pos = self.pos(); + let size = self.len(); let data = bytes.borrow_buf(); - if pos > size || size - pos < data.len() as isize { + if pos > size || size - pos < data.len() { return Err(vm.new_value_error("data out of range".to_owned())); } @@ -849,7 +892,7 @@ mod mmap { Ok(data.len()) })??; - self.advance_pos(len as isize); + self.advance_pos(len); Ok(PyInt::from(len).into_ref(vm)) } @@ -858,8 +901,8 @@ mod mmap { fn write_byte(&self, byte: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> { let b = value_from_object(vm, &byte)?; - let pos = self.inner_pos(); - let size = self.inner_size(); + let pos = self.pos(); + let size = self.len(); if pos >= size { return Err(vm.new_value_error("write byte out of range".to_owned()));