diff --git a/Cargo.lock b/Cargo.lock index be1bdd5402..46bfa7d9b3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -890,9 +890,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.123" +version = "0.2.126" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb691a747a7ab48abc15c5b42066eaafde10dc427e3b6ee2a1cf43db04c763bd" +checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" [[package]] name = "libffi" @@ -996,6 +996,15 @@ version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" +[[package]] +name = "memmap2" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5172b50c23043ff43dd53e51392f36519d9b35a8f3a410d30ece5d1aedd58ae" +dependencies = [ + "libc", +] + [[package]] name = "memoffset" version = "0.6.5" @@ -1190,6 +1199,16 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "978aa494585d3ca4ad74929863093e87cac9790d81fe7aba2b3dc2890643a0fc" +[[package]] +name = "page_size" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eebde548fbbf1ea81a99b128872779c437752fb99f217c45245e1a61dcd9edcd" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "parking_lot" version = "0.12.0" @@ -1719,6 +1738,7 @@ dependencies = [ "libz-sys", "md-5", "memchr", + "memmap2", "mt19937", "nix", "num-bigint", @@ -1729,6 +1749,7 @@ dependencies = [ "openssl", "openssl-probe", "openssl-sys", + "page_size", "paste", "puruspe", "rand", diff --git a/Cargo.toml b/Cargo.toml index 8b5dd6e842..d584879f0a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,7 +41,7 @@ rustpython-stdlib = {path = "stdlib", optional = true, default-features = false, dirs = { package = "dirs-next", version = "2.0.0" } num-traits = "0.2.14" cfg-if = "1.0.0" -libc = "0.2.123" +libc = "0.2.126" flame = { version = "0.2.2", optional = true } flamescope = { version = "0.1.2", optional = true } diff --git a/Lib/test/test_mmap.py b/Lib/test/test_mmap.py new file mode 100644 index 0000000000..fa371a291d --- /dev/null +++ b/Lib/test/test_mmap.py @@ -0,0 +1,863 @@ +from test.support import (requires, _2G, _4G, gc_collect, cpython_only) +from test.support.import_helper import import_module +from test.support.os_helper import TESTFN, unlink +import unittest +import os +import re +import itertools +import socket +import sys +import weakref + +# Skip test if we can't import mmap. +mmap = import_module('mmap') + +PAGESIZE = mmap.PAGESIZE + + +class MmapTests(unittest.TestCase): + + def setUp(self): + if os.path.exists(TESTFN): + os.unlink(TESTFN) + + def tearDown(self): + try: + os.unlink(TESTFN) + except OSError: + pass + + def test_basic(self): + # Test mmap module on Unix systems and Windows + + # Create a file to be mmap'ed. + f = open(TESTFN, 'bw+') + try: + # Write 2 pages worth of data to the file + f.write(b'\0'* PAGESIZE) + f.write(b'foo') + f.write(b'\0'* (PAGESIZE-3) ) + f.flush() + m = mmap.mmap(f.fileno(), 2 * PAGESIZE) + finally: + f.close() + + # Simple sanity checks + + tp = str(type(m)) # SF bug 128713: segfaulted on Linux + self.assertEqual(m.find(b'foo'), PAGESIZE) + + self.assertEqual(len(m), 2*PAGESIZE) + + self.assertEqual(m[0], 0) + self.assertEqual(m[0:3], b'\0\0\0') + + # Shouldn't crash on boundary (Issue #5292) + self.assertRaises(IndexError, m.__getitem__, len(m)) + self.assertRaises(IndexError, m.__setitem__, len(m), b'\0') + + # Modify the file's content + m[0] = b'3'[0] + m[PAGESIZE +3: PAGESIZE +3+3] = b'bar' + + # Check that the modification worked + self.assertEqual(m[0], b'3'[0]) + self.assertEqual(m[0:3], b'3\0\0') + self.assertEqual(m[PAGESIZE-1 : PAGESIZE + 7], b'\0foobar\0') + + m.flush() + + # Test doing a regular expression match in an mmap'ed file + match = re.search(b'[A-Za-z]+', m) + if match is None: + self.fail('regex match on mmap failed!') + else: + start, end = match.span(0) + length = end - start + + self.assertEqual(start, PAGESIZE) + self.assertEqual(end, PAGESIZE + 6) + + # test seeking around (try to overflow the seek implementation) + m.seek(0,0) + self.assertEqual(m.tell(), 0) + m.seek(42,1) + self.assertEqual(m.tell(), 42) + m.seek(0,2) + self.assertEqual(m.tell(), len(m)) + + # Try to seek to negative position... + self.assertRaises(ValueError, m.seek, -1) + + # Try to seek beyond end of mmap... + self.assertRaises(ValueError, m.seek, 1, 2) + + # Try to seek to negative position... + self.assertRaises(ValueError, m.seek, -len(m)-1, 2) + + # Try resizing map + try: + m.resize(512) + except SystemError: + # resize() not supported + # No messages are printed, since the output of this test suite + # would then be different across platforms. + pass + else: + # resize() is supported + self.assertEqual(len(m), 512) + # Check that we can no longer seek beyond the new size. + self.assertRaises(ValueError, m.seek, 513, 0) + + # Check that the underlying file is truncated too + # (bug #728515) + f = open(TESTFN, 'rb') + try: + f.seek(0, 2) + self.assertEqual(f.tell(), 512) + finally: + f.close() + self.assertEqual(m.size(), 512) + + m.close() + + def test_access_parameter(self): + # Test for "access" keyword parameter + mapsize = 10 + with open(TESTFN, "wb") as fp: + fp.write(b"a"*mapsize) + with open(TESTFN, "rb") as f: + m = mmap.mmap(f.fileno(), mapsize, access=mmap.ACCESS_READ) + self.assertEqual(m[:], b'a'*mapsize, "Readonly memory map data incorrect.") + + # Ensuring that readonly mmap can't be slice assigned + try: + m[:] = b'b'*mapsize + except TypeError: + pass + else: + self.fail("Able to write to readonly memory map") + + # Ensuring that readonly mmap can't be item assigned + try: + m[0] = b'b' + except TypeError: + pass + else: + self.fail("Able to write to readonly memory map") + + # Ensuring that readonly mmap can't be write() to + try: + m.seek(0,0) + m.write(b'abc') + except TypeError: + pass + else: + self.fail("Able to write to readonly memory map") + + # Ensuring that readonly mmap can't be write_byte() to + try: + m.seek(0,0) + m.write_byte(b'd') + except TypeError: + pass + else: + self.fail("Able to write to readonly memory map") + + # Ensuring that readonly mmap can't be resized + try: + m.resize(2*mapsize) + except SystemError: # resize is not universally supported + pass + except TypeError: + pass + else: + self.fail("Able to resize readonly memory map") + with open(TESTFN, "rb") as fp: + self.assertEqual(fp.read(), b'a'*mapsize, + "Readonly memory map data file was modified") + + # Opening mmap with size too big + with open(TESTFN, "r+b") as f: + try: + m = mmap.mmap(f.fileno(), mapsize+1) + except ValueError: + # we do not expect a ValueError on Windows + # CAUTION: This also changes the size of the file on disk, and + # later tests assume that the length hasn't changed. We need to + # repair that. + if sys.platform.startswith('win'): + self.fail("Opening mmap with size+1 should work on Windows.") + else: + # we expect a ValueError on Unix, but not on Windows + if not sys.platform.startswith('win'): + self.fail("Opening mmap with size+1 should raise ValueError.") + m.close() + if sys.platform.startswith('win'): + # Repair damage from the resizing test. + with open(TESTFN, 'r+b') as f: + f.truncate(mapsize) + + # Opening mmap with access=ACCESS_WRITE + with open(TESTFN, "r+b") as f: + m = mmap.mmap(f.fileno(), mapsize, access=mmap.ACCESS_WRITE) + # Modifying write-through memory map + m[:] = b'c'*mapsize + self.assertEqual(m[:], b'c'*mapsize, + "Write-through memory map memory not updated properly.") + m.flush() + m.close() + with open(TESTFN, 'rb') as f: + stuff = f.read() + self.assertEqual(stuff, b'c'*mapsize, + "Write-through memory map data file not updated properly.") + + # Opening mmap with access=ACCESS_COPY + with open(TESTFN, "r+b") as f: + m = mmap.mmap(f.fileno(), mapsize, access=mmap.ACCESS_COPY) + # Modifying copy-on-write memory map + m[:] = b'd'*mapsize + self.assertEqual(m[:], b'd' * mapsize, + "Copy-on-write memory map data not written correctly.") + m.flush() + with open(TESTFN, "rb") as fp: + self.assertEqual(fp.read(), b'c'*mapsize, + "Copy-on-write test data file should not be modified.") + # Ensuring copy-on-write maps cannot be resized + self.assertRaises(TypeError, m.resize, 2*mapsize) + m.close() + + # Ensuring invalid access parameter raises exception + with open(TESTFN, "r+b") as f: + self.assertRaises(ValueError, mmap.mmap, f.fileno(), mapsize, access=4) + + if os.name == "posix": + # Try incompatible flags, prot and access parameters. + with open(TESTFN, "r+b") as f: + self.assertRaises(ValueError, mmap.mmap, f.fileno(), mapsize, + flags=mmap.MAP_PRIVATE, + prot=mmap.PROT_READ, access=mmap.ACCESS_WRITE) + + # Try writing with PROT_EXEC and without PROT_WRITE + prot = mmap.PROT_READ | getattr(mmap, 'PROT_EXEC', 0) + with open(TESTFN, "r+b") as f: + m = mmap.mmap(f.fileno(), mapsize, prot=prot) + self.assertRaises(TypeError, m.write, b"abcdef") + self.assertRaises(TypeError, m.write_byte, 0) + m.close() + + def test_bad_file_desc(self): + # Try opening a bad file descriptor... + self.assertRaises(OSError, mmap.mmap, -2, 4096) + + def test_tougher_find(self): + # Do a tougher .find() test. SF bug 515943 pointed out that, in 2.2, + # searching for data with embedded \0 bytes didn't work. + with open(TESTFN, 'wb+') as f: + + data = b'aabaac\x00deef\x00\x00aa\x00' + n = len(data) + f.write(data) + f.flush() + m = mmap.mmap(f.fileno(), n) + + for start in range(n+1): + for finish in range(start, n+1): + slice = data[start : finish] + self.assertEqual(m.find(slice), data.find(slice)) + self.assertEqual(m.find(slice + b'x'), -1) + m.close() + + def test_find_end(self): + # test the new 'end' parameter works as expected + with open(TESTFN, 'wb+') as f: + data = b'one two ones' + n = len(data) + f.write(data) + f.flush() + m = mmap.mmap(f.fileno(), n) + + self.assertEqual(m.find(b'one'), 0) + self.assertEqual(m.find(b'ones'), 8) + self.assertEqual(m.find(b'one', 0, -1), 0) + self.assertEqual(m.find(b'one', 1), 8) + self.assertEqual(m.find(b'one', 1, -1), 8) + self.assertEqual(m.find(b'one', 1, -2), -1) + self.assertEqual(m.find(bytearray(b'one')), 0) + + + def test_rfind(self): + # test the new 'end' parameter works as expected + with open(TESTFN, 'wb+') as f: + data = b'one two ones' + n = len(data) + f.write(data) + f.flush() + m = mmap.mmap(f.fileno(), n) + + self.assertEqual(m.rfind(b'one'), 8) + self.assertEqual(m.rfind(b'one '), 0) + self.assertEqual(m.rfind(b'one', 0, -1), 8) + self.assertEqual(m.rfind(b'one', 0, -2), 0) + self.assertEqual(m.rfind(b'one', 1, -1), 8) + self.assertEqual(m.rfind(b'one', 1, -2), -1) + self.assertEqual(m.rfind(bytearray(b'one')), 8) + + + def test_double_close(self): + # make sure a double close doesn't crash on Solaris (Bug# 665913) + with open(TESTFN, 'wb+') as f: + f.write(2**16 * b'a') # Arbitrary character + + with open(TESTFN, 'rb') as f: + mf = mmap.mmap(f.fileno(), 2**16, access=mmap.ACCESS_READ) + mf.close() + mf.close() + + def test_entire_file(self): + # test mapping of entire file by passing 0 for map length + with open(TESTFN, "wb+") as f: + f.write(2**16 * b'm') # Arbitrary character + + with open(TESTFN, "rb+") as f, \ + mmap.mmap(f.fileno(), 0) as mf: + self.assertEqual(len(mf), 2**16, "Map size should equal file size.") + self.assertEqual(mf.read(2**16), 2**16 * b"m") + + def test_length_0_offset(self): + # Issue #10916: test mapping of remainder of file by passing 0 for + # map length with an offset doesn't cause a segfault. + # NOTE: allocation granularity is currently 65536 under Win64, + # and therefore the minimum offset alignment. + with open(TESTFN, "wb") as f: + f.write((65536 * 2) * b'm') # Arbitrary character + + with open(TESTFN, "rb") as f: + with mmap.mmap(f.fileno(), 0, offset=65536, access=mmap.ACCESS_READ) as mf: + self.assertRaises(IndexError, mf.__getitem__, 80000) + + def test_length_0_large_offset(self): + # Issue #10959: test mapping of a file by passing 0 for + # map length with a large offset doesn't cause a segfault. + with open(TESTFN, "wb") as f: + f.write(115699 * b'm') # Arbitrary character + + with open(TESTFN, "w+b") as f: + self.assertRaises(ValueError, mmap.mmap, f.fileno(), 0, + offset=2147418112) + + def test_move(self): + # make move works everywhere (64-bit format problem earlier) + with open(TESTFN, 'wb+') as f: + + f.write(b"ABCDEabcde") # Arbitrary character + f.flush() + + mf = mmap.mmap(f.fileno(), 10) + mf.move(5, 0, 5) + self.assertEqual(mf[:], b"ABCDEABCDE", "Map move should have duplicated front 5") + mf.close() + + # more excessive test + data = b"0123456789" + for dest in range(len(data)): + for src in range(len(data)): + for count in range(len(data) - max(dest, src)): + expected = data[:dest] + data[src:src+count] + data[dest+count:] + m = mmap.mmap(-1, len(data)) + m[:] = data + m.move(dest, src, count) + self.assertEqual(m[:], expected) + m.close() + + # segfault test (Issue 5387) + m = mmap.mmap(-1, 100) + offsets = [-100, -1, 0, 1, 100] + for source, dest, size in itertools.product(offsets, offsets, offsets): + try: + m.move(source, dest, size) + except ValueError: + pass + + offsets = [(-1, -1, -1), (-1, -1, 0), (-1, 0, -1), (0, -1, -1), + (-1, 0, 0), (0, -1, 0), (0, 0, -1)] + for source, dest, size in offsets: + self.assertRaises(ValueError, m.move, source, dest, size) + + m.close() + + m = mmap.mmap(-1, 1) # single byte + self.assertRaises(ValueError, m.move, 0, 0, 2) + self.assertRaises(ValueError, m.move, 1, 0, 1) + self.assertRaises(ValueError, m.move, 0, 1, 1) + m.move(0, 0, 1) + m.move(0, 0, 0) + + + def test_anonymous(self): + # anonymous mmap.mmap(-1, PAGE) + m = mmap.mmap(-1, PAGESIZE) + for x in range(PAGESIZE): + self.assertEqual(m[x], 0, + "anonymously mmap'ed contents should be zero") + + for x in range(PAGESIZE): + b = x & 0xff + m[x] = b + self.assertEqual(m[x], b) + + def test_read_all(self): + m = mmap.mmap(-1, 16) + self.addCleanup(m.close) + + # With no parameters, or None or a negative argument, reads all + m.write(bytes(range(16))) + m.seek(0) + self.assertEqual(m.read(), bytes(range(16))) + m.seek(8) + self.assertEqual(m.read(), bytes(range(8, 16))) + m.seek(16) + self.assertEqual(m.read(), b'') + m.seek(3) + self.assertEqual(m.read(None), bytes(range(3, 16))) + m.seek(4) + self.assertEqual(m.read(-1), bytes(range(4, 16))) + m.seek(5) + self.assertEqual(m.read(-2), bytes(range(5, 16))) + m.seek(9) + self.assertEqual(m.read(-42), bytes(range(9, 16))) + + def test_read_invalid_arg(self): + m = mmap.mmap(-1, 16) + self.addCleanup(m.close) + + self.assertRaises(TypeError, m.read, 'foo') + self.assertRaises(TypeError, m.read, 5.5) + self.assertRaises(TypeError, m.read, [1, 2, 3]) + + def test_extended_getslice(self): + # Test extended slicing by comparing with list slicing. + s = bytes(reversed(range(256))) + m = mmap.mmap(-1, len(s)) + m[:] = s + self.assertEqual(m[:], s) + indices = (0, None, 1, 3, 19, 300, sys.maxsize, -1, -2, -31, -300) + for start in indices: + for stop in indices: + # Skip step 0 (invalid) + for step in indices[1:]: + self.assertEqual(m[start:stop:step], + s[start:stop:step]) + + def test_extended_set_del_slice(self): + # Test extended slicing by comparing with list slicing. + s = bytes(reversed(range(256))) + m = mmap.mmap(-1, len(s)) + indices = (0, None, 1, 3, 19, 300, sys.maxsize, -1, -2, -31, -300) + for start in indices: + for stop in indices: + # Skip invalid step 0 + for step in indices[1:]: + m[:] = s + self.assertEqual(m[:], s) + L = list(s) + # Make sure we have a slice of exactly the right length, + # but with different data. + data = L[start:stop:step] + data = bytes(reversed(data)) + L[start:stop:step] = data + m[start:stop:step] = data + self.assertEqual(m[:], bytes(L)) + + def make_mmap_file (self, f, halfsize): + # Write 2 pages worth of data to the file + f.write (b'\0' * halfsize) + f.write (b'foo') + f.write (b'\0' * (halfsize - 3)) + f.flush () + return mmap.mmap (f.fileno(), 0) + + def test_empty_file (self): + f = open (TESTFN, 'w+b') + f.close() + with open(TESTFN, "rb") as f : + self.assertRaisesRegex(ValueError, + "cannot mmap an empty file", + mmap.mmap, f.fileno(), 0, + access=mmap.ACCESS_READ) + + def test_offset (self): + f = open (TESTFN, 'w+b') + + try: # unlink TESTFN no matter what + halfsize = mmap.ALLOCATIONGRANULARITY + m = self.make_mmap_file (f, halfsize) + m.close () + f.close () + + mapsize = halfsize * 2 + # Try invalid offset + f = open(TESTFN, "r+b") + for offset in [-2, -1, None]: + try: + m = mmap.mmap(f.fileno(), mapsize, offset=offset) + self.assertEqual(0, 1) + except (ValueError, TypeError, OverflowError): + pass + else: + self.assertEqual(0, 0) + f.close() + + # Try valid offset, hopefully 8192 works on all OSes + f = open(TESTFN, "r+b") + m = mmap.mmap(f.fileno(), mapsize - halfsize, offset=halfsize) + self.assertEqual(m[0:3], b'foo') + f.close() + + # Try resizing map + try: + m.resize(512) + except SystemError: + pass + else: + # resize() is supported + self.assertEqual(len(m), 512) + # Check that we can no longer seek beyond the new size. + self.assertRaises(ValueError, m.seek, 513, 0) + # Check that the content is not changed + self.assertEqual(m[0:3], b'foo') + + # Check that the underlying file is truncated too + f = open(TESTFN, 'rb') + f.seek(0, 2) + self.assertEqual(f.tell(), halfsize + 512) + f.close() + self.assertEqual(m.size(), halfsize + 512) + + m.close() + + finally: + f.close() + try: + os.unlink(TESTFN) + except OSError: + pass + + def test_subclass(self): + class anon_mmap(mmap.mmap): + def __new__(klass, *args, **kwargs): + return mmap.mmap.__new__(klass, -1, *args, **kwargs) + anon_mmap(PAGESIZE) + + @unittest.skipUnless(hasattr(mmap, 'PROT_READ'), "needs mmap.PROT_READ") + def test_prot_readonly(self): + mapsize = 10 + with open(TESTFN, "wb") as fp: + fp.write(b"a"*mapsize) + with open(TESTFN, "rb") as f: + m = mmap.mmap(f.fileno(), mapsize, prot=mmap.PROT_READ) + self.assertRaises(TypeError, m.write, "foo") + + def test_error(self): + self.assertIs(mmap.error, OSError) + + def test_io_methods(self): + data = b"0123456789" + with open(TESTFN, "wb") as fp: + fp.write(b"x"*len(data)) + with open(TESTFN, "r+b") as f: + m = mmap.mmap(f.fileno(), len(data)) + # Test write_byte() + for i in range(len(data)): + self.assertEqual(m.tell(), i) + m.write_byte(data[i]) + self.assertEqual(m.tell(), i+1) + self.assertRaises(ValueError, m.write_byte, b"x"[0]) + self.assertEqual(m[:], data) + # Test read_byte() + m.seek(0) + for i in range(len(data)): + self.assertEqual(m.tell(), i) + self.assertEqual(m.read_byte(), data[i]) + self.assertEqual(m.tell(), i+1) + self.assertRaises(ValueError, m.read_byte) + # Test read() + m.seek(3) + self.assertEqual(m.read(3), b"345") + self.assertEqual(m.tell(), 6) + # Test write() + m.seek(3) + m.write(b"bar") + self.assertEqual(m.tell(), 6) + self.assertEqual(m[:], b"012bar6789") + m.write(bytearray(b"baz")) + self.assertEqual(m.tell(), 9) + self.assertEqual(m[:], b"012barbaz9") + self.assertRaises(ValueError, m.write, b"ba") + + def test_non_ascii_byte(self): + for b in (129, 200, 255): # > 128 + m = mmap.mmap(-1, 1) + m.write_byte(b) + self.assertEqual(m[0], b) + m.seek(0) + self.assertEqual(m.read_byte(), b) + m.close() + + @unittest.skipUnless(os.name == 'nt', 'requires Windows') + def test_tagname(self): + data1 = b"0123456789" + data2 = b"abcdefghij" + assert len(data1) == len(data2) + + # Test same tag + m1 = mmap.mmap(-1, len(data1), tagname="foo") + m1[:] = data1 + m2 = mmap.mmap(-1, len(data2), tagname="foo") + m2[:] = data2 + self.assertEqual(m1[:], data2) + self.assertEqual(m2[:], data2) + m2.close() + m1.close() + + # Test different tag + m1 = mmap.mmap(-1, len(data1), tagname="foo") + m1[:] = data1 + m2 = mmap.mmap(-1, len(data2), tagname="boo") + m2[:] = data2 + self.assertEqual(m1[:], data1) + self.assertEqual(m2[:], data2) + m2.close() + m1.close() + + @cpython_only + @unittest.skipUnless(os.name == 'nt', 'requires Windows') + def test_sizeof(self): + m1 = mmap.mmap(-1, 100) + tagname = "foo" + m2 = mmap.mmap(-1, 100, tagname=tagname) + self.assertEqual(sys.getsizeof(m2), + sys.getsizeof(m1) + len(tagname) + 1) + + @unittest.skipUnless(os.name == 'nt', 'requires Windows') + def test_crasher_on_windows(self): + # Should not crash (Issue 1733986) + m = mmap.mmap(-1, 1000, tagname="foo") + try: + mmap.mmap(-1, 5000, tagname="foo")[:] # same tagname, but larger size + except: + pass + m.close() + + # Should not crash (Issue 5385) + with open(TESTFN, "wb") as fp: + fp.write(b"x"*10) + f = open(TESTFN, "r+b") + m = mmap.mmap(f.fileno(), 0) + f.close() + try: + m.resize(0) # will raise OSError + except: + pass + try: + m[:] + except: + pass + m.close() + + @unittest.skipUnless(os.name == 'nt', 'requires Windows') + def test_invalid_descriptor(self): + # socket file descriptors are valid, but out of range + # for _get_osfhandle, causing a crash when validating the + # parameters to _get_osfhandle. + s = socket.socket() + try: + with self.assertRaises(OSError): + m = mmap.mmap(s.fileno(), 10) + finally: + s.close() + + def test_context_manager(self): + with mmap.mmap(-1, 10) as m: + self.assertFalse(m.closed) + self.assertTrue(m.closed) + + def test_context_manager_exception(self): + # Test that the OSError gets passed through + with self.assertRaises(Exception) as exc: + with mmap.mmap(-1, 10) as m: + raise OSError + self.assertIsInstance(exc.exception, OSError, + "wrong exception raised in context manager") + self.assertTrue(m.closed, "context manager failed") + + def test_weakref(self): + # Check mmap objects are weakrefable + mm = mmap.mmap(-1, 16) + wr = weakref.ref(mm) + self.assertIs(wr(), mm) + del mm + gc_collect() + self.assertIs(wr(), None) + + def test_write_returning_the_number_of_bytes_written(self): + mm = mmap.mmap(-1, 16) + self.assertEqual(mm.write(b""), 0) + self.assertEqual(mm.write(b"x"), 1) + self.assertEqual(mm.write(b"yz"), 2) + self.assertEqual(mm.write(b"python"), 6) + + @unittest.skipIf(os.name == 'nt', 'cannot resize anonymous mmaps on Windows') + def test_resize_past_pos(self): + m = mmap.mmap(-1, 8192) + self.addCleanup(m.close) + m.read(5000) + try: + m.resize(4096) + except SystemError: + self.skipTest("resizing not supported") + self.assertEqual(m.read(14), b'') + self.assertRaises(ValueError, m.read_byte) + self.assertRaises(ValueError, m.write_byte, 42) + self.assertRaises(ValueError, m.write, b'abc') + + def test_concat_repeat_exception(self): + m = mmap.mmap(-1, 16) + with self.assertRaises(TypeError): + m + m + with self.assertRaises(TypeError): + m * 2 + + @unittest.skipIf(sys.platform.startswith("linux"), "TODO: RUSTPYTHON, memmap2 doesn't throw OSError when offset is not a multiple of mmap.PAGESIZE on Linux") + def test_flush_return_value(self): + # mm.flush() should return None on success, raise an + # exception on error under all platforms. + mm = mmap.mmap(-1, 16) + self.addCleanup(mm.close) + mm.write(b'python') + result = mm.flush() + self.assertIsNone(result) + if sys.platform.startswith('linux'): + # 'offset' must be a multiple of mmap.PAGESIZE on Linux. + # See bpo-34754 for details. + self.assertRaises(OSError, mm.flush, 1, len(b'python')) + + def test_repr(self): + open_mmap_repr_pat = re.compile( + r"\S+), " + r"length=(?P\d+), " + r"pos=(?P\d+), " + r"offset=(?P\d+)>") + closed_mmap_repr_pat = re.compile(r"") + mapsizes = (50, 100, 1_000, 1_000_000, 10_000_000) + offsets = tuple((mapsize // 2 // mmap.ALLOCATIONGRANULARITY) + * mmap.ALLOCATIONGRANULARITY for mapsize in mapsizes) + for offset, mapsize in zip(offsets, mapsizes): + data = b'a' * mapsize + length = mapsize - offset + accesses = ('ACCESS_DEFAULT', 'ACCESS_READ', + 'ACCESS_COPY', 'ACCESS_WRITE') + positions = (0, length//10, length//5, length//4) + with open(TESTFN, "wb+") as fp: + fp.write(data) + fp.flush() + for access, pos in itertools.product(accesses, positions): + accint = getattr(mmap, access) + with mmap.mmap(fp.fileno(), + length, + access=accint, + offset=offset) as mm: + mm.seek(pos) + match = open_mmap_repr_pat.match(repr(mm)) + self.assertIsNotNone(match) + self.assertEqual(match.group('access'), access) + self.assertEqual(match.group('length'), str(length)) + self.assertEqual(match.group('pos'), str(pos)) + self.assertEqual(match.group('offset'), str(offset)) + match = closed_mmap_repr_pat.match(repr(mm)) + self.assertIsNotNone(match) + + @unittest.skipUnless(hasattr(mmap.mmap, 'madvise'), 'needs madvise') + def test_madvise(self): + size = 2 * PAGESIZE + m = mmap.mmap(-1, size) + + with self.assertRaisesRegex(ValueError, "madvise start out of bounds"): + m.madvise(mmap.MADV_NORMAL, size) + with self.assertRaisesRegex(ValueError, "madvise start out of bounds"): + m.madvise(mmap.MADV_NORMAL, -1) + with self.assertRaisesRegex(ValueError, "madvise length invalid"): + m.madvise(mmap.MADV_NORMAL, 0, -1) + with self.assertRaisesRegex(OverflowError, "madvise length too large"): + m.madvise(mmap.MADV_NORMAL, PAGESIZE, sys.maxsize) + self.assertEqual(m.madvise(mmap.MADV_NORMAL), None) + self.assertEqual(m.madvise(mmap.MADV_NORMAL, PAGESIZE), None) + self.assertEqual(m.madvise(mmap.MADV_NORMAL, PAGESIZE, size), None) + self.assertEqual(m.madvise(mmap.MADV_NORMAL, 0, 2), None) + self.assertEqual(m.madvise(mmap.MADV_NORMAL, 0, size), None) + + +class LargeMmapTests(unittest.TestCase): + + def setUp(self): + unlink(TESTFN) + + def tearDown(self): + unlink(TESTFN) + + def _make_test_file(self, num_zeroes, tail): + if sys.platform[:3] == 'win' or sys.platform == 'darwin': + requires('largefile', + 'test requires %s bytes and a long time to run' % str(0x180000000)) + f = open(TESTFN, 'w+b') + try: + f.seek(num_zeroes) + f.write(tail) + f.flush() + except (OSError, OverflowError, ValueError): + try: + f.close() + except (OSError, OverflowError): + pass + raise unittest.SkipTest("filesystem does not have largefile support") + return f + + def test_large_offset(self): + with self._make_test_file(0x14FFFFFFF, b" ") as f: + with mmap.mmap(f.fileno(), 0, offset=0x140000000, access=mmap.ACCESS_READ) as m: + self.assertEqual(m[0xFFFFFFF], 32) + + def test_large_filesize(self): + with self._make_test_file(0x17FFFFFFF, b" ") as f: + if sys.maxsize < 0x180000000: + # On 32 bit platforms the file is larger than sys.maxsize so + # mapping the whole file should fail -- Issue #16743 + with self.assertRaises(OverflowError): + mmap.mmap(f.fileno(), 0x180000000, access=mmap.ACCESS_READ) + with self.assertRaises(ValueError): + mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) + with mmap.mmap(f.fileno(), 0x10000, access=mmap.ACCESS_READ) as m: + self.assertEqual(m.size(), 0x180000000) + + # Issue 11277: mmap() with large (~4 GiB) sparse files crashes on OS X. + + def _test_around_boundary(self, boundary): + tail = b' DEARdear ' + start = boundary - len(tail) // 2 + end = start + len(tail) + with self._make_test_file(start, tail) as f: + with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as m: + self.assertEqual(m[start:end], tail) + + @unittest.skipUnless(sys.maxsize > _4G, "test cannot run on 32-bit systems") + def test_around_2GB(self): + self._test_around_boundary(_2G) + + @unittest.skipUnless(sys.maxsize > _4G, "test cannot run on 32-bit systems") + def test_around_4GB(self): + self._test_around_boundary(_4G) + + +if __name__ == '__main__': + unittest.main() diff --git a/common/Cargo.toml b/common/Cargo.toml index 12c70b5413..16c4538ebc 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -21,7 +21,7 @@ siphasher = "0.3" rand = "0.8" volatile = "0.3" radium = "0.7" -libc = "0.2.101" +libc = "0.2.126" ascii = "1.0" unic-ucd-category = "0.9" diff --git a/stdlib/Cargo.toml b/stdlib/Cargo.toml index 5e91278667..c4a7f0e073 100644 --- a/stdlib/Cargo.toml +++ b/stdlib/Cargo.toml @@ -57,13 +57,18 @@ hex = "0.4.3" puruspe = "0.1.5" nix = "0.23.1" xml-rs = "0.8.4" -libc = "0.2.123" +libc = "0.2.126" cfg-if = "1.0.0" ahash = "0.7.6" libz-sys = { version = "1.1.5", optional = true } num_enum = "0.5.7" ascii = "1.0.0" +# mmap +[target.'cfg(all(unix, not(target_arch = "wasm32")))'.dependencies] +memmap2 = "0.5.4" +page_size = "0.4" + [target.'cfg(all(unix, not(target_os = "redox")))'.dependencies] termios = "0.3.3" diff --git a/stdlib/src/lib.rs b/stdlib/src/lib.rs index 44ca0ef941..6159b6a24f 100644 --- a/stdlib/src/lib.rs +++ b/stdlib/src/lib.rs @@ -16,6 +16,8 @@ mod gc; mod hashlib; mod json; mod math; +#[cfg(unix)] +mod mmap; mod platform; mod pyexpat; mod pystruct; @@ -125,6 +127,7 @@ pub fn get_module_inits() -> impl Iterator, StdlibInit { "_posixsubprocess" => posixsubprocess::make_module, "syslog" => syslog::make_module, + "mmap" => mmap::make_module, } #[cfg(target_os = "macos")] { diff --git a/stdlib/src/mmap.rs b/stdlib/src/mmap.rs new file mode 100644 index 0000000000..aff3d17c57 --- /dev/null +++ b/stdlib/src/mmap.rs @@ -0,0 +1,1080 @@ +//! mmap module +pub(crate) use mmap::make_module; + +#[pymodule] +mod mmap { + use crate::common::{ + borrow::{BorrowedValue, BorrowedValueMut}, + lock::{MapImmutable, PyMutex, PyMutexGuard}, + }; + use crate::vm::{ + builtins::{PyBytes, PyBytesRef, PyInt, PyIntRef, PyTypeRef}, + byte::{bytes_from_object, value_from_object}, + function::{ArgBytesLike, FuncArgs, OptionalArg}, + protocol::{ + BufferDescriptor, BufferMethods, PyBuffer, PyMappingMethods, PySequenceMethods, + }, + sliceable::{saturate_index, wrap_index, SaturatedSlice, SequenceIndex}, + types::{AsBuffer, AsMapping, AsSequence, Constructor}, + AsObject, FromArgs, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, + TryFromBorrowedObject, VirtualMachine, + }; + use crossbeam_utils::atomic::AtomicCell; + use memmap2::{Advice, Mmap, MmapMut, MmapOptions}; + use nix::unistd; + use num_traits::Signed; + use std::fs::File; + use std::io::Write; + use std::ops::{Deref, DerefMut}; + #[cfg(all(unix, not(target_os = "redox")))] + use std::os::unix::io::{FromRawFd, IntoRawFd, RawFd}; + + fn advice_try_from_i32(vm: &VirtualMachine, i: i32) -> PyResult { + Ok(match i { + libc::MADV_NORMAL => Advice::Normal, + libc::MADV_RANDOM => Advice::Random, + libc::MADV_SEQUENTIAL => Advice::Sequential, + libc::MADV_WILLNEED => Advice::WillNeed, + libc::MADV_DONTNEED => Advice::DontNeed, + #[cfg(any(target_os = "linux", target_os = "macos", target_os = "ios"))] + libc::MADV_FREE => Advice::Free, + #[cfg(target_os = "linux")] + libc::MADV_DONTFORK => Advice::DontFork, + #[cfg(target_os = "linux")] + libc::MADV_DOFORK => Advice::DoFork, + #[cfg(target_os = "linux")] + libc::MADV_MERGEABLE => Advice::Mergeable, + #[cfg(target_os = "linux")] + libc::MADV_UNMERGEABLE => Advice::Unmergeable, + #[cfg(target_os = "linux")] + libc::MADV_HUGEPAGE => Advice::HugePage, + #[cfg(target_os = "linux")] + libc::MADV_NOHUGEPAGE => Advice::NoHugePage, + #[cfg(target_os = "linux")] + libc::MADV_REMOVE => Advice::Remove, + #[cfg(target_os = "linux")] + libc::MADV_DONTDUMP => Advice::DontDump, + #[cfg(target_os = "linux")] + libc::MADV_DODUMP => Advice::DoDump, + #[cfg(target_os = "linux")] + libc::MADV_HWPOISON => Advice::HwPoison, + _ => return Err(vm.new_value_error("Not a valid Advice value".to_owned())), + }) + } + + #[repr(C)] + #[derive(PartialEq, Eq, Debug)] + enum AccessMode { + Default = 0, + Read = 1, + Write = 2, + Copy = 3, + } + + impl TryFromBorrowedObject for AccessMode { + fn try_from_borrowed_object(vm: &VirtualMachine, obj: &PyObject) -> PyResult { + let i = u32::try_from_borrowed_object(vm, obj)?; + Ok(match i { + 0 => Self::Default, + 1 => Self::Read, + 2 => Self::Write, + 3 => Self::Copy, + _ => return Err(vm.new_value_error("Not a valid AccessMode value".to_owned())), + }) + } + } + + #[pyattr] + use libc::{ + MADV_DONTNEED, MADV_NORMAL, MADV_RANDOM, MADV_SEQUENTIAL, MADV_WILLNEED, MAP_ANON, + MAP_ANONYMOUS, MAP_PRIVATE, MAP_SHARED, PROT_READ, PROT_WRITE, + }; + + #[cfg(target_os = "macos")] + #[pyattr] + use libc::{MADV_FREE_REUSABLE, MADV_FREE_REUSE}; + + #[cfg(any( + target_os = "android", + target_os = "dragonfly", + target_os = "fuchsia", + target_os = "freebsd", + target_os = "linux", + target_os = "netbsd", + target_os = "openbsd", + target_vendor = "apple" + ))] + #[pyattr] + use libc::MADV_FREE; + + #[cfg(target_os = "linux")] + #[pyattr] + use libc::{ + MADV_DODUMP, MADV_DOFORK, MADV_DONTDUMP, MADV_DONTFORK, MADV_HUGEPAGE, MADV_HWPOISON, + MADV_MERGEABLE, MADV_NOHUGEPAGE, MADV_REMOVE, MADV_SOFT_OFFLINE, MADV_UNMERGEABLE, + }; + + #[cfg(all(target_os = "linux", target_arch = "x86_64", target_env = "gnu"))] + #[pyattr] + use libc::{MAP_DENYWRITE, MAP_EXECUTABLE, MAP_POPULATE}; + + #[pyattr] + const ACCESS_DEFAULT: u32 = AccessMode::Default as u32; + #[pyattr] + const ACCESS_READ: u32 = AccessMode::Read as u32; + #[pyattr] + const ACCESS_WRITE: u32 = AccessMode::Write as u32; + #[pyattr] + const ACCESS_COPY: u32 = AccessMode::Copy as u32; + + #[cfg(all(unix, not(target_arch = "wasm32")))] + #[pyattr(name = "PAGESIZE", once)] + fn page_size(_vm: &VirtualMachine) -> usize { + page_size::get() + } + + #[cfg(all(unix, not(target_arch = "wasm32")))] + #[pyattr(name = "ALLOCATIONGRANULARITY", once)] + fn granularity(_vm: &VirtualMachine) -> usize { + page_size::get_granularity() + } + + #[pyattr(name = "error", once)] + fn error_type(vm: &VirtualMachine) -> PyTypeRef { + vm.ctx.exceptions.os_error.to_owned() + } + + #[derive(Debug)] + enum MmapObj { + Write(MmapMut), + Read(Mmap), + } + + #[pyattr] + #[pyclass(name = "mmap")] + #[derive(Debug, PyPayload)] + struct PyMmap { + closed: AtomicCell, + mmap: PyMutex>, + fd: RawFd, + offset: libc::off_t, + size: AtomicCell, + pos: AtomicCell, // relative to offset + exports: AtomicCell, + access: AccessMode, + } + + #[derive(FromArgs)] + struct MmapNewArgs { + #[pyarg(any)] + fileno: RawFd, + #[pyarg(any)] + length: isize, + #[pyarg(any, default = "MAP_SHARED")] + flags: libc::c_int, + #[pyarg(any, default = "PROT_WRITE|PROT_READ")] + prot: libc::c_int, + #[pyarg(any, default = "AccessMode::Default")] + access: AccessMode, + #[pyarg(any, default = "0")] + offset: libc::off_t, + } + + #[derive(FromArgs)] + pub struct FlushOptions { + #[pyarg(positional, default)] + offset: Option, + #[pyarg(positional, default)] + size: Option, + } + + impl FlushOptions { + fn values(self, len: usize) -> Option<(usize, usize)> { + let offset = if let Some(offset) = self.offset { + if offset < 0 { + return None; + } + offset as usize + } else { + 0 + }; + let size = if let Some(size) = self.size { + if size < 0 { + return None; + } + size as usize + } else { + len + }; + if len.checked_sub(offset)? < size { + return None; + } + Some((offset, size)) + } + } + + #[derive(FromArgs, Clone)] + pub struct FindOptions { + #[pyarg(positional)] + sub: Vec, + #[pyarg(positional, default)] + start: Option, + #[pyarg(positional, default)] + end: Option, + } + + #[derive(FromArgs)] + pub struct AdviseOptions { + #[pyarg(positional)] + option: libc::c_int, + #[pyarg(positional, default)] + start: Option, + #[pyarg(positional, default)] + length: Option, + } + + impl AdviseOptions { + fn values(self, len: usize, vm: &VirtualMachine) -> PyResult<(libc::c_int, usize, usize)> { + let start = self + .start + .map(|s| { + s.try_to_primitive::(vm) + .ok() + .filter(|s| *s < len) + .ok_or_else(|| vm.new_value_error("madvise start out of bounds".to_owned())) + }) + .transpose()? + .unwrap_or(0); + let length = self + .length + .map(|s| { + s.try_to_primitive::(vm) + .map_err(|_| vm.new_value_error("madvise length invalid".to_owned())) + }) + .transpose()? + .unwrap_or(len); + + if isize::MAX as usize - start < length { + return Err(vm.new_overflow_error("madvise length too large".to_owned())); + } + + let length = if start + length > len { + len - start + } else { + length + }; + + Ok((self.option, start, length)) + } + } + + impl Constructor for PyMmap { + type Args = MmapNewArgs; + + // TODO: Windows is not supported right now. + #[cfg(all(unix, not(target_os = "redox")))] + fn py_new( + cls: PyTypeRef, + MmapNewArgs { + fileno: mut fd, + length, + flags, + prot, + access, + offset, + }: Self::Args, + vm: &VirtualMachine, + ) -> PyResult { + let map_size = length; + if map_size < 0 { + return Err( + vm.new_overflow_error("memory mapped length must be positive".to_owned()) + ); + } + let mut map_size = map_size as usize; + + if offset < 0 { + return Err( + vm.new_overflow_error("memory mapped offset must be positive".to_owned()) + ); + } + + if (access != AccessMode::Default) + && ((flags != MAP_SHARED) || (prot != (PROT_WRITE | PROT_READ))) + { + return Err(vm.new_value_error( + "mmap can't specify both access and flags, prot.".to_owned(), + )); + } + + // TODO: memmap2 doesn't support mapping with pro and flags right now + let (_flags, _prot, access) = match access { + AccessMode::Read => (MAP_SHARED, PROT_READ, access), + AccessMode::Write => (MAP_SHARED, PROT_READ | PROT_WRITE, access), + AccessMode::Copy => (MAP_PRIVATE, PROT_READ | PROT_WRITE, access), + AccessMode::Default => { + let access = if (prot & PROT_READ) != 0 && (prot & PROT_WRITE) != 0 { + access + } else if (prot & PROT_WRITE) != 0 { + AccessMode::Write + } else { + AccessMode::Read + }; + (flags, prot, access) + } + }; + + if fd != -1 { + let file = unsafe { File::from_raw_fd(fd) }; + let metadata = file + .metadata() + .map_err(|e| vm.new_os_error(e.to_string()))?; + let file_len: libc::off_t = metadata.len().try_into().expect("file size overflow"); + // File::from_raw_fd will consume the fd, so we + // have to get it again. + fd = file.into_raw_fd(); + if map_size == 0 { + if file_len == 0 { + return Err(vm.new_value_error("cannot mmap an empty file".to_owned())); + } + + if offset > file_len { + return Err( + vm.new_value_error("mmap offset is greater than file size".to_owned()) + ); + } + + map_size = (file_len - offset) + .try_into() + .map_err(|_| vm.new_value_error("mmap length is too large".to_owned()))?; + } else if offset > file_len || file_len - offset < map_size as libc::off_t { + return Err( + vm.new_value_error("mmap length is greater than file size".to_owned()) + ); + } + } + + let mut mmap_opt = MmapOptions::new(); + let mmap_opt = mmap_opt.offset(offset.try_into().unwrap()).len(map_size); + + let (fd, mmap) = if fd == -1 { + ( + fd, + MmapObj::Write( + mmap_opt + .map_anon() + .map_err(|e| vm.new_os_error(e.to_string()))?, + ), + ) + } else { + let new_fd = unistd::dup(fd).map_err(|e| vm.new_os_error(e.to_string()))?; + let mmap = match access { + AccessMode::Default | AccessMode::Write => MmapObj::Write( + unsafe { mmap_opt.map_mut(fd) } + .map_err(|e| vm.new_os_error(e.to_string()))?, + ), + AccessMode::Read => MmapObj::Read( + unsafe { mmap_opt.map(fd) }.map_err(|e| vm.new_os_error(e.to_string()))?, + ), + AccessMode::Copy => MmapObj::Write( + unsafe { mmap_opt.map_copy(fd) } + .map_err(|e| vm.new_os_error(e.to_string()))?, + ), + }; + (new_fd, mmap) + }; + + let m_obj = Self { + closed: AtomicCell::new(false), + mmap: PyMutex::new(Some(mmap)), + fd, + offset, + size: AtomicCell::new(map_size), + pos: AtomicCell::new(0), + exports: AtomicCell::new(0), + access, + }; + + m_obj.into_ref_with_type(vm, cls).map(Into::into) + } + } + + static BUFFER_METHODS: BufferMethods = BufferMethods { + obj_bytes: |buffer| buffer.obj_as::().as_bytes(), + obj_bytes_mut: |buffer| buffer.obj_as::().as_bytes_mut(), + release: |buffer| { + buffer.obj_as::().exports.fetch_sub(1); + }, + retain: |buffer| { + buffer.obj_as::().exports.fetch_add(1); + }, + }; + + impl AsBuffer for PyMmap { + fn as_buffer(zelf: &Py, _vm: &VirtualMachine) -> PyResult { + let buf = PyBuffer::new( + zelf.to_owned().into(), + BufferDescriptor::simple(zelf.len(), true), + &BUFFER_METHODS, + ); + + Ok(buf) + } + } + + impl AsMapping for PyMmap { + const AS_MAPPING: PyMappingMethods = PyMappingMethods { + length: Some(|mapping, _vm| Ok(Self::mapping_downcast(mapping).len())), + subscript: Some(|mapping, needle, vm| { + Self::mapping_downcast(mapping)._getitem(needle, vm) + }), + ass_subscript: Some(|mapping, needle, value, vm| { + let zelf = Self::mapping_downcast(mapping); + if let Some(value) = value { + Self::_setitem(zelf.to_owned(), needle, value, vm) + } else { + Err(vm.new_type_error("mmap object doesn't support item deletion".to_owned())) + } + }), + }; + } + + impl AsSequence for PyMmap { + const AS_SEQUENCE: PySequenceMethods = PySequenceMethods { + length: Some(|seq, _vm| Ok(Self::sequence_downcast(seq).len())), + item: Some(|seq, i, vm| { + let zelf = Self::sequence_downcast(seq); + zelf.get_item_by_index(i, vm) + }), + ass_item: Some(|seq, i, value, vm| { + let zelf = Self::sequence_downcast(seq); + if let Some(value) = value { + Self::setitem_by_index(zelf.to_owned(), i, value, vm) + } else { + Err(vm.new_type_error("mmap object doesn't support item deletion".to_owned())) + } + }), + ..PySequenceMethods::NOT_IMPLEMENTED + }; + } + + #[pyimpl(with(Constructor, AsMapping, AsSequence, AsBuffer), flags(BASETYPE))] + impl PyMmap { + fn as_bytes_mut(&self) -> BorrowedValueMut<[u8]> { + PyMutexGuard::map(self.mmap.lock(), |m| { + match m.as_mut().expect("mmap closed or invalid") { + MmapObj::Read(_) => panic!("mmap can't modify a readonly memory map."), + MmapObj::Write(mmap) => &mut mmap[..], + } + }) + .into() + } + + fn as_bytes(&self) -> BorrowedValue<[u8]> { + PyMutexGuard::map_immutable(self.mmap.lock(), |m| { + match m.as_ref().expect("mmap closed or invalid") { + MmapObj::Read(ref mmap) => &mmap[..], + MmapObj::Write(ref mmap) => &mmap[..], + } + }) + .into() + } + + #[pymethod(magic)] + fn len(&self) -> usize { + self.size.load() + } + + #[inline] + fn pos(&self) -> usize { + self.pos.load() + } + + #[inline] + fn advance_pos(&self, step: usize) { + self.pos.store(self.pos() + step); + } + + #[inline] + fn try_writable( + &self, + vm: &VirtualMachine, + f: impl FnOnce(&mut MmapMut) -> R, + ) -> PyResult { + if matches!(self.access, AccessMode::Read) { + return Err( + vm.new_type_error("mmap can't modify a readonly memory map.".to_owned()) + ); + } + + match self.check_valid(vm)?.deref_mut().as_mut().unwrap() { + MmapObj::Write(mmap) => Ok(f(mmap)), + _ => unreachable!("already check"), + } + } + + fn check_valid(&self, vm: &VirtualMachine) -> PyResult>> { + let m = self.mmap.lock(); + + if m.is_none() { + return Err(vm.new_value_error("mmap closed or invalid".to_owned())); + } + + Ok(m) + } + + /// TODO: impl resize + #[allow(dead_code)] + fn check_resizeable(&self, vm: &VirtualMachine) -> PyResult<()> { + if self.exports.load() > 0 { + return Err(vm.new_buffer_error( + "mmap can't resize with extant buffers exported.".to_owned(), + )); + } + + if self.access == AccessMode::Write || self.access == AccessMode::Default { + return Ok(()); + } + + Err(vm.new_type_error( + "mmap can't resize a readonly or copy-on-write memory map.".to_owned(), + )) + } + + #[pyproperty] + fn closed(&self) -> bool { + self.closed.load() + } + + #[pymethod(magic)] + fn repr(zelf: PyRef) -> PyResult { + let mmap = zelf.mmap.lock(); + + if mmap.is_none() { + return Ok("".to_owned()); + } + + let access_str = match zelf.access { + AccessMode::Default => "ACCESS_DEFAULT", + AccessMode::Read => "ACCESS_READ", + AccessMode::Write => "ACCESS_WRITE", + AccessMode::Copy => "ACCESS_COPY", + }; + + let repr = format!( + "", + access_str, + zelf.len(), + zelf.pos(), + zelf.offset + ); + + Ok(repr) + } + + #[pymethod] + fn close(&self, vm: &VirtualMachine) -> PyResult<()> { + if self.closed() { + return Ok(()); + } + + if self.exports.load() > 0 { + return Err(vm.new_buffer_error("cannot close exported pointers exist.".to_owned())); + } + let mut mmap = self.mmap.lock(); + self.closed.store(true); + *mmap = None; + + Ok(()) + } + + fn get_find_range(&self, options: FindOptions) -> (usize, usize) { + let size = self.len(); + let start = options + .start + .map(|start| saturate_index(start, size)) + .unwrap_or_else(|| self.pos()); + let end = options + .end + .map(|end| saturate_index(end, size)) + .unwrap_or(size); + (start, end) + } + + #[pymethod] + fn find(&self, options: FindOptions, vm: &VirtualMachine) -> PyResult { + let (start, end) = self.get_find_range(options.clone()); + + let sub = &options.sub; + + if sub.is_empty() { + return Ok(PyInt::from(0isize)); + } + + let mmap = self.check_valid(vm)?; + let buf = match mmap.as_ref().unwrap() { + MmapObj::Read(mmap) => &mmap[start..end], + MmapObj::Write(mmap) => &mmap[start..end], + }; + let pos = buf.windows(sub.len()).position(|window| window == sub); + + Ok(pos.map_or(PyInt::from(-1isize), |i| PyInt::from(start + i))) + } + + #[pymethod] + fn rfind(&self, options: FindOptions, vm: &VirtualMachine) -> PyResult { + let (start, end) = self.get_find_range(options.clone()); + + let sub = &options.sub; + if sub.is_empty() { + return Ok(PyInt::from(0isize)); + } + + let mmap = self.check_valid(vm)?; + let buf = match mmap.as_ref().unwrap() { + MmapObj::Read(mmap) => &mmap[start..end], + MmapObj::Write(mmap) => &mmap[start..end], + }; + let pos = buf.windows(sub.len()).rposition(|window| window == sub); + + Ok(pos.map_or(PyInt::from(-1isize), |i| PyInt::from(start + i))) + } + + #[pymethod] + fn flush(&self, options: FlushOptions, vm: &VirtualMachine) -> PyResult<()> { + let (offset, size) = options + .values(self.len()) + .ok_or_else(|| vm.new_value_error("flush values out of range".to_owned()))?; + + if self.access == AccessMode::Read || self.access == AccessMode::Copy { + return Ok(()); + } + + match self.check_valid(vm)?.deref().as_ref().unwrap() { + MmapObj::Read(_mmap) => {} + MmapObj::Write(mmap) => { + mmap.flush_range(offset, size) + .map_err(|e| vm.new_os_error(e.to_string()))?; + } + } + + Ok(()) + } + + #[allow(unused_assignments)] + #[pymethod] + fn madvise(&self, options: AdviseOptions, vm: &VirtualMachine) -> PyResult<()> { + let (option, _start, _length) = options.values(self.len(), vm)?; + let advice = advice_try_from_i32(vm, option)?; + + //TODO: memmap2 doesn't support madvise range right now. + match self.check_valid(vm)?.deref().as_ref().unwrap() { + MmapObj::Read(mmap) => mmap.advise(advice), + MmapObj::Write(mmap) => mmap.advise(advice), + } + .map_err(|e| vm.new_os_error(e.to_string()))?; + + Ok(()) + } + + #[pymethod(name = "move")] + fn move_( + &self, + dest: PyIntRef, + src: PyIntRef, + cnt: PyIntRef, + vm: &VirtualMachine, + ) -> PyResult<()> { + fn args( + dest: PyIntRef, + src: PyIntRef, + cnt: PyIntRef, + size: usize, + vm: &VirtualMachine, + ) -> Option<(usize, usize, usize)> { + if dest.as_bigint().is_negative() + || src.as_bigint().is_negative() + || cnt.as_bigint().is_negative() + { + return None; + } + let dest = dest.try_to_primitive(vm).ok()?; + let src = src.try_to_primitive(vm).ok()?; + let cnt = cnt.try_to_primitive(vm).ok()?; + if size - dest < cnt || size - src < cnt { + return None; + } + Some((dest, src, cnt)) + } + + let size = self.len(); + let (dest, src, cnt) = args(dest, src, cnt, size, vm).ok_or_else(|| { + vm.new_value_error("source, destination, or count out of range".to_owned()) + })?; + + let dest_end = dest + cnt; + let src_end = src + cnt; + + self.try_writable(vm, |mmap| { + let src_buf = mmap[src..src_end].to_vec(); + (&mut mmap[dest..dest_end]) + .write(&src_buf) + .map_err(|e| vm.new_os_error(e.to_string()))?; + Ok(()) + })? + } + + #[pymethod] + fn read(&self, n: OptionalArg, vm: &VirtualMachine) -> PyResult { + let num_bytes = n + .map(|obj| { + let name = obj.class().name().to_string(); + obj.try_into_value::>(vm).map_err(|_| { + vm.new_type_error(format!( + "read argument must be int or None, not {}", + name, + )) + }) + }) + .transpose()? + .flatten(); + let mmap = self.check_valid(vm)?; + let pos = self.pos(); + let remaining = self.len().saturating_sub(pos); + let num_bytes = num_bytes + .filter(|&n| n >= 0 && (n as usize) <= remaining) + .map(|n| n as usize) + .unwrap_or(remaining); + + let end_pos = (pos + num_bytes) as usize; + let bytes = match mmap.deref().as_ref().unwrap() { + MmapObj::Read(mmap) => mmap[pos as usize..end_pos].to_vec(), + MmapObj::Write(mmap) => mmap[pos as usize..end_pos].to_vec(), + }; + + let result = PyBytes::from(bytes).into_ref(vm); + + self.advance_pos(num_bytes); + + Ok(result) + } + + #[pymethod] + fn read_byte(&self, vm: &VirtualMachine) -> PyResult { + let pos = self.pos(); + if pos >= self.len() { + return Err(vm.new_value_error("read byte out of range".to_owned())); + } + + let b = match self.check_valid(vm)?.deref().as_ref().unwrap() { + MmapObj::Read(mmap) => mmap[pos as usize], + MmapObj::Write(mmap) => mmap[pos as usize], + }; + + self.advance_pos(1); + + Ok(PyInt::from(b).into_ref(vm)) + } + + #[pymethod] + fn readline(&self, vm: &VirtualMachine) -> PyResult { + let pos = self.pos(); + let mmap = self.check_valid(vm)?; + + let remaining = self.len().saturating_sub(pos); + if remaining == 0 { + return Ok(PyBytes::from(vec![]).into_ref(vm)); + } + + let eof = match mmap.as_ref().unwrap() { + MmapObj::Read(mmap) => &mmap[pos..], + MmapObj::Write(mmap) => &mmap[pos..], + } + .iter() + .position(|&x| x == b'\n'); + + let end_pos = if let Some(i) = eof { + pos + i + 1 + } else { + self.len() + }; + + let bytes = match mmap.deref().as_ref().unwrap() { + MmapObj::Read(mmap) => mmap[pos as usize..end_pos].to_vec(), + MmapObj::Write(mmap) => mmap[pos as usize..end_pos].to_vec(), + }; + + let result = PyBytes::from(bytes).into_ref(vm); + + self.advance_pos(end_pos - pos); + + Ok(result) + } + + //TODO: supports resize + #[pymethod] + fn resize(&self, _newsize: PyIntRef, vm: &VirtualMachine) -> PyResult<()> { + self.check_resizeable(vm)?; + Err(vm.new_system_error("mmap: resizing not available--no mremap()".to_owned())) + } + + #[pymethod] + fn seek( + &self, + dist: isize, + whence: OptionalArg, + vm: &VirtualMachine, + ) -> PyResult<()> { + let how = whence.unwrap_or(0); + let size = self.len(); + + let new_pos = match how { + 0 => dist, // relative to start + 1 => { + // relative to current position + let pos = self.pos(); + if (((isize::MAX as usize) - pos) as isize) < dist { + return Err(vm.new_value_error("seek out of range".to_owned())); + } + pos as isize + dist + } + 2 => { + // relative to end + if (((isize::MAX as usize) - size) as isize) < dist { + return Err(vm.new_value_error("seek out of range".to_owned())); + } + size as isize + dist + } + _ => return Err(vm.new_value_error("unknown seek type".to_owned())), + }; + + if new_pos < 0 || (new_pos as usize) > size { + return Err(vm.new_value_error("seek out of range".to_owned())); + } + + self.pos.store(new_pos as usize); + + Ok(()) + } + + #[pymethod] + fn size(&self, vm: &VirtualMachine) -> PyResult { + let new_fd = unistd::dup(self.fd).map_err(|e| vm.new_os_error(e.to_string()))?; + let file = unsafe { File::from_raw_fd(new_fd) }; + let file_len = match file.metadata() { + Ok(m) => m.len(), + Err(e) => return Err(vm.new_os_error(e.to_string())), + }; + + Ok(PyInt::from(file_len).into_ref(vm)) + } + + #[pymethod] + fn tell(&self) -> PyResult { + Ok(self.pos()) + } + + #[pymethod] + fn write(&self, bytes: ArgBytesLike, vm: &VirtualMachine) -> PyResult { + let pos = self.pos(); + let size = self.len(); + + let data = bytes.borrow_buf(); + + if pos > size || size - pos < data.len() { + return Err(vm.new_value_error("data out of range".to_owned())); + } + + let len = self.try_writable(vm, |mmap| { + (&mut mmap[pos as usize..(pos as usize + data.len())]) + .write(&data) + .map_err(|e| vm.new_os_error(e.to_string()))?; + Ok(data.len()) + })??; + + self.advance_pos(len); + + Ok(PyInt::from(len).into_ref(vm)) + } + + #[pymethod] + fn write_byte(&self, byte: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> { + let b = value_from_object(vm, &byte)?; + + let pos = self.pos(); + let size = self.len(); + + if pos >= size { + return Err(vm.new_value_error("write byte out of range".to_owned())); + } + + self.try_writable(vm, |mmap| { + mmap[pos as usize] = b; + })?; + + self.advance_pos(1); + + Ok(()) + } + + fn get_item_by_index(&self, i: isize, vm: &VirtualMachine) -> PyResult { + let i = wrap_index(i, self.len()) + .ok_or_else(|| vm.new_index_error("mmap index out of range".to_owned()))?; + + let b = match self.check_valid(vm)?.deref().as_ref().unwrap() { + MmapObj::Read(mmap) => mmap[i as usize], + MmapObj::Write(mmap) => mmap[i as usize], + }; + + Ok(PyInt::from(b).into_ref(vm).into()) + } + + fn getitem_by_slice( + &self, + slice: &SaturatedSlice, + vm: &VirtualMachine, + ) -> PyResult { + let (range, step, slicelen) = slice.adjust_indices(self.len()); + + let mmap = self.check_valid(vm)?; + + if slicelen == 0 { + return Ok(PyBytes::from(vec![]).into_ref(vm).into()); + } else if step == 1 { + let bytes = match mmap.deref().as_ref().unwrap() { + MmapObj::Read(mmap) => &mmap[range], + MmapObj::Write(mmap) => &mmap[range], + }; + return Ok(PyBytes::from(bytes.to_vec()).into_ref(vm).into()); + } + + let mut result_buf = Vec::with_capacity(slicelen); + if step.is_negative() { + for i in range.rev().step_by(step.unsigned_abs()) { + let b = match mmap.deref().as_ref().unwrap() { + MmapObj::Read(mmap) => mmap[i], + MmapObj::Write(mmap) => mmap[i], + }; + result_buf.push(b); + } + } else { + for i in range.step_by(step.unsigned_abs()) { + let b = match mmap.deref().as_ref().unwrap() { + MmapObj::Read(mmap) => mmap[i], + MmapObj::Write(mmap) => mmap[i], + }; + result_buf.push(b); + } + } + Ok(PyBytes::from(result_buf).into_ref(vm).into()) + } + + fn _getitem(&self, needle: &PyObject, vm: &VirtualMachine) -> PyResult { + match SequenceIndex::try_from_borrowed_object(vm, needle, "mmap")? { + SequenceIndex::Int(i) => self.get_item_by_index(i, vm), + SequenceIndex::Slice(slice) => self.getitem_by_slice(&slice, vm), + } + } + + #[pymethod(magic)] + fn getitem(&self, needle: PyObjectRef, vm: &VirtualMachine) -> PyResult { + self._getitem(&needle, vm) + } + + fn _setitem( + zelf: PyRef, + needle: &PyObject, + value: PyObjectRef, + vm: &VirtualMachine, + ) -> PyResult<()> { + match SequenceIndex::try_from_borrowed_object(vm, needle, "mmap")? { + SequenceIndex::Int(i) => Self::setitem_by_index(zelf, i, value, vm), + SequenceIndex::Slice(slice) => Self::setitem_by_slice(zelf, &slice, value, vm), + } + } + + fn setitem_by_index( + zelf: PyRef, + i: isize, + value: PyObjectRef, + vm: &VirtualMachine, + ) -> PyResult<()> { + let i = wrap_index(i, zelf.len()) + .ok_or_else(|| vm.new_index_error("mmap index out of range".to_owned()))?; + + let b = value_from_object(vm, &value)?; + + zelf.try_writable(vm, |mmap| { + mmap[i as usize] = b; + })?; + + Ok(()) + } + + fn setitem_by_slice( + zelf: PyRef, + slice: &SaturatedSlice, + value: PyObjectRef, + vm: &VirtualMachine, + ) -> PyResult<()> { + let (range, step, slicelen) = slice.adjust_indices(zelf.len()); + + let bytes = bytes_from_object(vm, &value)?; + + if bytes.len() != slicelen { + return Err(vm.new_index_error("mmap slice assignment is wrong size".to_owned())); + } + + if slicelen == 0 { + // do nothing + Ok(()) + } else if step == 1 { + zelf.try_writable(vm, |mmap| { + (&mut mmap[range]) + .write(&bytes) + .map_err(|e| vm.new_os_error(e.to_string()))?; + Ok(()) + })? + } else { + let mut bi = 0; // bytes index + if step.is_negative() { + for i in range.rev().step_by(step.unsigned_abs()) { + zelf.try_writable(vm, |mmap| { + mmap[i] = bytes[bi]; + })?; + bi += 1; + } + } else { + for i in range.step_by(step.unsigned_abs()) { + zelf.try_writable(vm, |mmap| { + mmap[i] = bytes[bi]; + })?; + bi += 1; + } + } + Ok(()) + } + } + + #[pymethod(magic)] + fn setitem( + zelf: PyRef, + needle: PyObjectRef, + value: PyObjectRef, + vm: &VirtualMachine, + ) -> PyResult<()> { + Self::_setitem(zelf, &needle, value, vm) + } + + #[pymethod(magic)] + fn enter(zelf: PyRef, vm: &VirtualMachine) -> PyResult> { + let _m = zelf.check_valid(vm)?; + Ok(zelf.to_owned()) + } + + #[pymethod(magic)] + fn exit(zelf: PyRef, _args: FuncArgs, vm: &VirtualMachine) -> PyResult<()> { + zelf.close(vm) + } + } +} diff --git a/vm/Cargo.toml b/vm/Cargo.toml index 1f22fe1a1c..3c72b2de0a 100644 --- a/vm/Cargo.toml +++ b/vm/Cargo.toml @@ -48,7 +48,7 @@ hexf-parse = "0.2.1" indexmap = "1.8.1" ahash = "0.7.6" bitflags = "1.3.2" -libc = "0.2.123" +libc = "0.2.126" nix = "0.23.1" paste = "1.0.7" is-macro = "0.2.0" diff --git a/vm/src/builtins/bytearray.rs b/vm/src/builtins/bytearray.rs index 368227eb6b..7104afc77e 100644 --- a/vm/src/builtins/bytearray.rs +++ b/vm/src/builtins/bytearray.rs @@ -5,10 +5,10 @@ use super::{ }; use crate::{ anystr::{self, AnyStr}, + byte::{bytes_from_object, value_from_object}, bytesinner::{ - bytes_decode, bytes_from_object, value_from_object, ByteInnerFindOptions, - ByteInnerNewOptions, ByteInnerPaddingOptions, ByteInnerSplitOptions, - ByteInnerTranslateOptions, DecodeArgs, PyBytesInner, + bytes_decode, ByteInnerFindOptions, ByteInnerNewOptions, ByteInnerPaddingOptions, + ByteInnerSplitOptions, ByteInnerTranslateOptions, DecodeArgs, PyBytesInner, }, class::PyClassImpl, common::{ diff --git a/vm/src/byte.rs b/vm/src/byte.rs new file mode 100644 index 0000000000..4933b2b3a4 --- /dev/null +++ b/vm/src/byte.rs @@ -0,0 +1,27 @@ +//! byte operation APIs +use crate::object::AsObject; +use crate::{PyObject, PyResult, VirtualMachine}; +use num_traits::ToPrimitive; + +pub fn bytes_from_object(vm: &VirtualMachine, obj: &PyObject) -> PyResult> { + if let Ok(elements) = obj.try_bytes_like(vm, |bytes| bytes.to_vec()) { + return Ok(elements); + } + + if !obj.fast_isinstance(vm.ctx.types.str_type) { + if let Ok(elements) = vm.map_iterable_object(obj, |x| value_from_object(vm, &x)) { + return elements; + } + } + + Err(vm.new_type_error( + "can assign only bytes, buffers, or iterables of ints in range(0, 256)".to_owned(), + )) +} + +pub fn value_from_object(vm: &VirtualMachine, obj: &PyObject) -> PyResult { + vm.to_index(obj)? + .as_bigint() + .to_u8() + .ok_or_else(|| vm.new_value_error("byte must be in range(0, 256)".to_owned())) +} diff --git a/vm/src/bytesinner.rs b/vm/src/bytesinner.rs index b3063d3cec..14667a69e8 100644 --- a/vm/src/bytesinner.rs +++ b/vm/src/bytesinner.rs @@ -3,6 +3,7 @@ use crate::{ builtins::{ pystr, PyByteArray, PyBytes, PyBytesRef, PyInt, PyIntRef, PyStr, PyStrRef, PyTypeRef, }, + byte::bytes_from_object, cformat::CFormatBytes, function::{ArgIterable, Either, OptionalArg, OptionalOption, PyComparisonValue}, identifier, @@ -1207,26 +1208,3 @@ pub fn bytes_to_hex( pub const fn is_py_ascii_whitespace(b: u8) -> bool { matches!(b, b'\t' | b'\n' | b'\x0C' | b'\r' | b' ' | b'\x0B') } - -pub fn bytes_from_object(vm: &VirtualMachine, obj: &PyObject) -> PyResult> { - if let Ok(elements) = obj.try_bytes_like(vm, |bytes| bytes.to_vec()) { - return Ok(elements); - } - - if !obj.fast_isinstance(vm.ctx.types.str_type) { - if let Ok(elements) = vm.map_iterable_object(obj, |x| value_from_object(vm, &x)) { - return elements; - } - } - - Err(vm.new_type_error( - "can assign only bytes, buffers, or iterables of ints in range(0, 256)".to_owned(), - )) -} - -pub fn value_from_object(vm: &VirtualMachine, obj: &PyObject) -> PyResult { - vm.to_index(obj)? - .as_bigint() - .to_u8() - .ok_or_else(|| vm.new_value_error("byte must be in range(0, 256)".to_owned())) -} diff --git a/vm/src/lib.rs b/vm/src/lib.rs index a3b86dd626..95ee6e14df 100644 --- a/vm/src/lib.rs +++ b/vm/src/lib.rs @@ -41,6 +41,7 @@ pub(crate) mod macros; mod anystr; pub mod buffer; pub mod builtins; +pub mod byte; mod bytesinner; pub mod cformat; pub mod class; diff --git a/vm/src/sliceable.rs b/vm/src/sliceable.rs index d32a8d131e..11f06da241 100644 --- a/vm/src/sliceable.rs +++ b/vm/src/sliceable.rs @@ -292,7 +292,7 @@ impl SequenceIndex { } // Use PySliceableSequence::wrap_index for implementors -pub(crate) fn wrap_index(p: isize, len: usize) -> Option { +pub fn wrap_index(p: isize, len: usize) -> Option { let neg = p.is_negative(); let p = p.wrapping_abs() as usize; if neg { diff --git a/vm/src/vm/vm_new.rs b/vm/src/vm/vm_new.rs index b636f68eac..fc85cb855a 100644 --- a/vm/src/vm/vm_new.rs +++ b/vm/src/vm/vm_new.rs @@ -151,6 +151,11 @@ impl VirtualMachine { self.new_exception_msg(os_error, msg) } + pub fn new_system_error(&self, msg: String) -> PyBaseExceptionRef { + let sys_error = self.ctx.exceptions.system_error.to_owned(); + self.new_exception_msg(sys_error, msg) + } + pub fn new_unicode_decode_error(&self, msg: String) -> PyBaseExceptionRef { let unicode_decode_error = self.ctx.exceptions.unicode_decode_error.to_owned(); self.new_exception_msg(unicode_decode_error, msg)