diff --git a/Cargo.lock b/Cargo.lock index 1d1fd5fb29..e6398bfbed 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1100,6 +1100,15 @@ version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" +[[package]] +name = "memmap2" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "057a3db23999c867821a7a59feb06a578fcb03685e983dff90daf9e7d24ac08f" +dependencies = [ + "libc", +] + [[package]] name = "memoffset" version = "0.6.4" @@ -1321,6 +1330,16 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "978aa494585d3ca4ad74929863093e87cac9790d81fe7aba2b3dc2890643a0fc" +[[package]] +name = "page_size" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eebde548fbbf1ea81a99b128872779c437752fb99f217c45245e1a61dcd9edcd" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "parking_lot" version = "0.11.2" @@ -1894,6 +1913,7 @@ dependencies = [ "libz-sys", "md-5", "memchr", + "memmap2", "mt19937", "nix 0.23.0", "num-bigint", @@ -1904,6 +1924,7 @@ dependencies = [ "openssl", "openssl-probe", "openssl-sys", + "page_size", "paste", "puruspe", "rand", diff --git a/Lib/test/test_mmap.py b/Lib/test/test_mmap.py new file mode 100644 index 0000000000..8f34c182f8 --- /dev/null +++ b/Lib/test/test_mmap.py @@ -0,0 +1,862 @@ +from test.support import (requires, _2G, _4G, gc_collect, cpython_only) +from test.support.import_helper import import_module +from test.support.os_helper import TESTFN, unlink +import unittest +import os +import re +import itertools +import socket +import sys +import weakref + +# Skip test if we can't import mmap. +mmap = import_module('mmap') + +PAGESIZE = mmap.PAGESIZE + + +class MmapTests(unittest.TestCase): + + def setUp(self): + if os.path.exists(TESTFN): + os.unlink(TESTFN) + + def tearDown(self): + try: + os.unlink(TESTFN) + except OSError: + pass + + def test_basic(self): + # Test mmap module on Unix systems and Windows + + # Create a file to be mmap'ed. + f = open(TESTFN, 'bw+') + try: + # Write 2 pages worth of data to the file + f.write(b'\0'* PAGESIZE) + f.write(b'foo') + f.write(b'\0'* (PAGESIZE-3) ) + f.flush() + m = mmap.mmap(f.fileno(), 2 * PAGESIZE) + finally: + f.close() + + # Simple sanity checks + + tp = str(type(m)) # SF bug 128713: segfaulted on Linux + self.assertEqual(m.find(b'foo'), PAGESIZE) + + self.assertEqual(len(m), 2*PAGESIZE) + + self.assertEqual(m[0], 0) + self.assertEqual(m[0:3], b'\0\0\0') + + # Shouldn't crash on boundary (Issue #5292) + self.assertRaises(IndexError, m.__getitem__, len(m)) + self.assertRaises(IndexError, m.__setitem__, len(m), b'\0') + + # Modify the file's content + m[0] = b'3'[0] + m[PAGESIZE +3: PAGESIZE +3+3] = b'bar' + + # Check that the modification worked + self.assertEqual(m[0], b'3'[0]) + self.assertEqual(m[0:3], b'3\0\0') + self.assertEqual(m[PAGESIZE-1 : PAGESIZE + 7], b'\0foobar\0') + + m.flush() + + # Test doing a regular expression match in an mmap'ed file + match = re.search(b'[A-Za-z]+', m) + if match is None: + self.fail('regex match on mmap failed!') + else: + start, end = match.span(0) + length = end - start + + self.assertEqual(start, PAGESIZE) + self.assertEqual(end, PAGESIZE + 6) + + # test seeking around (try to overflow the seek implementation) + m.seek(0,0) + self.assertEqual(m.tell(), 0) + m.seek(42,1) + self.assertEqual(m.tell(), 42) + m.seek(0,2) + self.assertEqual(m.tell(), len(m)) + + # Try to seek to negative position... + self.assertRaises(ValueError, m.seek, -1) + + # Try to seek beyond end of mmap... + self.assertRaises(ValueError, m.seek, 1, 2) + + # Try to seek to negative position... + self.assertRaises(ValueError, m.seek, -len(m)-1, 2) + + # Try resizing map + try: + m.resize(512) + except SystemError: + # resize() not supported + # No messages are printed, since the output of this test suite + # would then be different across platforms. + pass + else: + # resize() is supported + self.assertEqual(len(m), 512) + # Check that we can no longer seek beyond the new size. + self.assertRaises(ValueError, m.seek, 513, 0) + + # Check that the underlying file is truncated too + # (bug #728515) + f = open(TESTFN, 'rb') + try: + f.seek(0, 2) + self.assertEqual(f.tell(), 512) + finally: + f.close() + self.assertEqual(m.size(), 512) + + m.close() + + def test_access_parameter(self): + # Test for "access" keyword parameter + mapsize = 10 + with open(TESTFN, "wb") as fp: + fp.write(b"a"*mapsize) + with open(TESTFN, "rb") as f: + m = mmap.mmap(f.fileno(), mapsize, access=mmap.ACCESS_READ) + self.assertEqual(m[:], b'a'*mapsize, "Readonly memory map data incorrect.") + + # Ensuring that readonly mmap can't be slice assigned + try: + m[:] = b'b'*mapsize + except TypeError: + pass + else: + self.fail("Able to write to readonly memory map") + + # Ensuring that readonly mmap can't be item assigned + try: + m[0] = b'b' + except TypeError: + pass + else: + self.fail("Able to write to readonly memory map") + + # Ensuring that readonly mmap can't be write() to + try: + m.seek(0,0) + m.write(b'abc') + except TypeError: + pass + else: + self.fail("Able to write to readonly memory map") + + # Ensuring that readonly mmap can't be write_byte() to + try: + m.seek(0,0) + m.write_byte(b'd') + except TypeError: + pass + else: + self.fail("Able to write to readonly memory map") + + # Ensuring that readonly mmap can't be resized + try: + m.resize(2*mapsize) + except SystemError: # resize is not universally supported + pass + except TypeError: + pass + else: + self.fail("Able to resize readonly memory map") + with open(TESTFN, "rb") as fp: + self.assertEqual(fp.read(), b'a'*mapsize, + "Readonly memory map data file was modified") + + # Opening mmap with size too big + with open(TESTFN, "r+b") as f: + try: + m = mmap.mmap(f.fileno(), mapsize+1) + except ValueError: + # we do not expect a ValueError on Windows + # CAUTION: This also changes the size of the file on disk, and + # later tests assume that the length hasn't changed. We need to + # repair that. + if sys.platform.startswith('win'): + self.fail("Opening mmap with size+1 should work on Windows.") + else: + # we expect a ValueError on Unix, but not on Windows + if not sys.platform.startswith('win'): + self.fail("Opening mmap with size+1 should raise ValueError.") + m.close() + if sys.platform.startswith('win'): + # Repair damage from the resizing test. + with open(TESTFN, 'r+b') as f: + f.truncate(mapsize) + + # Opening mmap with access=ACCESS_WRITE + with open(TESTFN, "r+b") as f: + m = mmap.mmap(f.fileno(), mapsize, access=mmap.ACCESS_WRITE) + # Modifying write-through memory map + m[:] = b'c'*mapsize + self.assertEqual(m[:], b'c'*mapsize, + "Write-through memory map memory not updated properly.") + m.flush() + m.close() + with open(TESTFN, 'rb') as f: + stuff = f.read() + self.assertEqual(stuff, b'c'*mapsize, + "Write-through memory map data file not updated properly.") + + # Opening mmap with access=ACCESS_COPY + with open(TESTFN, "r+b") as f: + m = mmap.mmap(f.fileno(), mapsize, access=mmap.ACCESS_COPY) + # Modifying copy-on-write memory map + m[:] = b'd'*mapsize + self.assertEqual(m[:], b'd' * mapsize, + "Copy-on-write memory map data not written correctly.") + m.flush() + with open(TESTFN, "rb") as fp: + self.assertEqual(fp.read(), b'c'*mapsize, + "Copy-on-write test data file should not be modified.") + # Ensuring copy-on-write maps cannot be resized + self.assertRaises(TypeError, m.resize, 2*mapsize) + m.close() + + # Ensuring invalid access parameter raises exception + with open(TESTFN, "r+b") as f: + self.assertRaises(ValueError, mmap.mmap, f.fileno(), mapsize, access=4) + + if os.name == "posix": + # Try incompatible flags, prot and access parameters. + with open(TESTFN, "r+b") as f: + self.assertRaises(ValueError, mmap.mmap, f.fileno(), mapsize, + flags=mmap.MAP_PRIVATE, + prot=mmap.PROT_READ, access=mmap.ACCESS_WRITE) + + # Try writing with PROT_EXEC and without PROT_WRITE + prot = mmap.PROT_READ | getattr(mmap, 'PROT_EXEC', 0) + with open(TESTFN, "r+b") as f: + m = mmap.mmap(f.fileno(), mapsize, prot=prot) + self.assertRaises(TypeError, m.write, b"abcdef") + self.assertRaises(TypeError, m.write_byte, 0) + m.close() + + def test_bad_file_desc(self): + # Try opening a bad file descriptor... + self.assertRaises(OSError, mmap.mmap, -2, 4096) + + def test_tougher_find(self): + # Do a tougher .find() test. SF bug 515943 pointed out that, in 2.2, + # searching for data with embedded \0 bytes didn't work. + with open(TESTFN, 'wb+') as f: + + data = b'aabaac\x00deef\x00\x00aa\x00' + n = len(data) + f.write(data) + f.flush() + m = mmap.mmap(f.fileno(), n) + + for start in range(n+1): + for finish in range(start, n+1): + slice = data[start : finish] + self.assertEqual(m.find(slice), data.find(slice)) + self.assertEqual(m.find(slice + b'x'), -1) + m.close() + + def test_find_end(self): + # test the new 'end' parameter works as expected + with open(TESTFN, 'wb+') as f: + data = b'one two ones' + n = len(data) + f.write(data) + f.flush() + m = mmap.mmap(f.fileno(), n) + + self.assertEqual(m.find(b'one'), 0) + self.assertEqual(m.find(b'ones'), 8) + self.assertEqual(m.find(b'one', 0, -1), 0) + self.assertEqual(m.find(b'one', 1), 8) + self.assertEqual(m.find(b'one', 1, -1), 8) + self.assertEqual(m.find(b'one', 1, -2), -1) + self.assertEqual(m.find(bytearray(b'one')), 0) + + + def test_rfind(self): + # test the new 'end' parameter works as expected + with open(TESTFN, 'wb+') as f: + data = b'one two ones' + n = len(data) + f.write(data) + f.flush() + m = mmap.mmap(f.fileno(), n) + + self.assertEqual(m.rfind(b'one'), 8) + self.assertEqual(m.rfind(b'one '), 0) + self.assertEqual(m.rfind(b'one', 0, -1), 8) + self.assertEqual(m.rfind(b'one', 0, -2), 0) + self.assertEqual(m.rfind(b'one', 1, -1), 8) + self.assertEqual(m.rfind(b'one', 1, -2), -1) + self.assertEqual(m.rfind(bytearray(b'one')), 8) + + + def test_double_close(self): + # make sure a double close doesn't crash on Solaris (Bug# 665913) + with open(TESTFN, 'wb+') as f: + f.write(2**16 * b'a') # Arbitrary character + + with open(TESTFN, 'rb') as f: + mf = mmap.mmap(f.fileno(), 2**16, access=mmap.ACCESS_READ) + mf.close() + mf.close() + + def test_entire_file(self): + # test mapping of entire file by passing 0 for map length + with open(TESTFN, "wb+") as f: + f.write(2**16 * b'm') # Arbitrary character + + with open(TESTFN, "rb+") as f, \ + mmap.mmap(f.fileno(), 0) as mf: + self.assertEqual(len(mf), 2**16, "Map size should equal file size.") + self.assertEqual(mf.read(2**16), 2**16 * b"m") + + def test_length_0_offset(self): + # Issue #10916: test mapping of remainder of file by passing 0 for + # map length with an offset doesn't cause a segfault. + # NOTE: allocation granularity is currently 65536 under Win64, + # and therefore the minimum offset alignment. + with open(TESTFN, "wb") as f: + f.write((65536 * 2) * b'm') # Arbitrary character + + with open(TESTFN, "rb") as f: + with mmap.mmap(f.fileno(), 0, offset=65536, access=mmap.ACCESS_READ) as mf: + self.assertRaises(IndexError, mf.__getitem__, 80000) + + def test_length_0_large_offset(self): + # Issue #10959: test mapping of a file by passing 0 for + # map length with a large offset doesn't cause a segfault. + with open(TESTFN, "wb") as f: + f.write(115699 * b'm') # Arbitrary character + + with open(TESTFN, "w+b") as f: + self.assertRaises(ValueError, mmap.mmap, f.fileno(), 0, + offset=2147418112) + + def test_move(self): + # make move works everywhere (64-bit format problem earlier) + with open(TESTFN, 'wb+') as f: + + f.write(b"ABCDEabcde") # Arbitrary character + f.flush() + + mf = mmap.mmap(f.fileno(), 10) + mf.move(5, 0, 5) + self.assertEqual(mf[:], b"ABCDEABCDE", "Map move should have duplicated front 5") + mf.close() + + # more excessive test + data = b"0123456789" + for dest in range(len(data)): + for src in range(len(data)): + for count in range(len(data) - max(dest, src)): + expected = data[:dest] + data[src:src+count] + data[dest+count:] + m = mmap.mmap(-1, len(data)) + m[:] = data + m.move(dest, src, count) + self.assertEqual(m[:], expected) + m.close() + + # segfault test (Issue 5387) + m = mmap.mmap(-1, 100) + offsets = [-100, -1, 0, 1, 100] + for source, dest, size in itertools.product(offsets, offsets, offsets): + try: + m.move(source, dest, size) + except ValueError: + pass + + offsets = [(-1, -1, -1), (-1, -1, 0), (-1, 0, -1), (0, -1, -1), + (-1, 0, 0), (0, -1, 0), (0, 0, -1)] + for source, dest, size in offsets: + self.assertRaises(ValueError, m.move, source, dest, size) + + m.close() + + m = mmap.mmap(-1, 1) # single byte + self.assertRaises(ValueError, m.move, 0, 0, 2) + self.assertRaises(ValueError, m.move, 1, 0, 1) + self.assertRaises(ValueError, m.move, 0, 1, 1) + m.move(0, 0, 1) + m.move(0, 0, 0) + + + def test_anonymous(self): + # anonymous mmap.mmap(-1, PAGE) + m = mmap.mmap(-1, PAGESIZE) + for x in range(PAGESIZE): + self.assertEqual(m[x], 0, + "anonymously mmap'ed contents should be zero") + + for x in range(PAGESIZE): + b = x & 0xff + m[x] = b + self.assertEqual(m[x], b) + + def test_read_all(self): + m = mmap.mmap(-1, 16) + self.addCleanup(m.close) + + # With no parameters, or None or a negative argument, reads all + m.write(bytes(range(16))) + m.seek(0) + self.assertEqual(m.read(), bytes(range(16))) + m.seek(8) + self.assertEqual(m.read(), bytes(range(8, 16))) + m.seek(16) + self.assertEqual(m.read(), b'') + m.seek(3) + self.assertEqual(m.read(None), bytes(range(3, 16))) + m.seek(4) + self.assertEqual(m.read(-1), bytes(range(4, 16))) + m.seek(5) + self.assertEqual(m.read(-2), bytes(range(5, 16))) + m.seek(9) + self.assertEqual(m.read(-42), bytes(range(9, 16))) + + def test_read_invalid_arg(self): + m = mmap.mmap(-1, 16) + self.addCleanup(m.close) + + self.assertRaises(TypeError, m.read, 'foo') + self.assertRaises(TypeError, m.read, 5.5) + self.assertRaises(TypeError, m.read, [1, 2, 3]) + + def test_extended_getslice(self): + # Test extended slicing by comparing with list slicing. + s = bytes(reversed(range(256))) + m = mmap.mmap(-1, len(s)) + m[:] = s + self.assertEqual(m[:], s) + indices = (0, None, 1, 3, 19, 300, sys.maxsize, -1, -2, -31, -300) + for start in indices: + for stop in indices: + # Skip step 0 (invalid) + for step in indices[1:]: + self.assertEqual(m[start:stop:step], + s[start:stop:step]) + + def test_extended_set_del_slice(self): + # Test extended slicing by comparing with list slicing. + s = bytes(reversed(range(256))) + m = mmap.mmap(-1, len(s)) + indices = (0, None, 1, 3, 19, 300, sys.maxsize, -1, -2, -31, -300) + for start in indices: + for stop in indices: + # Skip invalid step 0 + for step in indices[1:]: + m[:] = s + self.assertEqual(m[:], s) + L = list(s) + # Make sure we have a slice of exactly the right length, + # but with different data. + data = L[start:stop:step] + data = bytes(reversed(data)) + L[start:stop:step] = data + m[start:stop:step] = data + self.assertEqual(m[:], bytes(L)) + + def make_mmap_file (self, f, halfsize): + # Write 2 pages worth of data to the file + f.write (b'\0' * halfsize) + f.write (b'foo') + f.write (b'\0' * (halfsize - 3)) + f.flush () + return mmap.mmap (f.fileno(), 0) + + def test_empty_file (self): + f = open (TESTFN, 'w+b') + f.close() + with open(TESTFN, "rb") as f : + self.assertRaisesRegex(ValueError, + "cannot mmap an empty file", + mmap.mmap, f.fileno(), 0, + access=mmap.ACCESS_READ) + + def test_offset (self): + f = open (TESTFN, 'w+b') + + try: # unlink TESTFN no matter what + halfsize = mmap.ALLOCATIONGRANULARITY + m = self.make_mmap_file (f, halfsize) + m.close () + f.close () + + mapsize = halfsize * 2 + # Try invalid offset + f = open(TESTFN, "r+b") + for offset in [-2, -1, None]: + try: + m = mmap.mmap(f.fileno(), mapsize, offset=offset) + self.assertEqual(0, 1) + except (ValueError, TypeError, OverflowError): + pass + else: + self.assertEqual(0, 0) + f.close() + + # Try valid offset, hopefully 8192 works on all OSes + f = open(TESTFN, "r+b") + m = mmap.mmap(f.fileno(), mapsize - halfsize, offset=halfsize) + self.assertEqual(m[0:3], b'foo') + f.close() + + # Try resizing map + try: + m.resize(512) + except SystemError: + pass + else: + # resize() is supported + self.assertEqual(len(m), 512) + # Check that we can no longer seek beyond the new size. + self.assertRaises(ValueError, m.seek, 513, 0) + # Check that the content is not changed + self.assertEqual(m[0:3], b'foo') + + # Check that the underlying file is truncated too + f = open(TESTFN, 'rb') + f.seek(0, 2) + self.assertEqual(f.tell(), halfsize + 512) + f.close() + self.assertEqual(m.size(), halfsize + 512) + + m.close() + + finally: + f.close() + try: + os.unlink(TESTFN) + except OSError: + pass + + def test_subclass(self): + class anon_mmap(mmap.mmap): + def __new__(klass, *args, **kwargs): + return mmap.mmap.__new__(klass, -1, *args, **kwargs) + anon_mmap(PAGESIZE) + + @unittest.skipUnless(hasattr(mmap, 'PROT_READ'), "needs mmap.PROT_READ") + def test_prot_readonly(self): + mapsize = 10 + with open(TESTFN, "wb") as fp: + fp.write(b"a"*mapsize) + with open(TESTFN, "rb") as f: + m = mmap.mmap(f.fileno(), mapsize, prot=mmap.PROT_READ) + self.assertRaises(TypeError, m.write, "foo") + + def test_error(self): + self.assertIs(mmap.error, OSError) + + def test_io_methods(self): + data = b"0123456789" + with open(TESTFN, "wb") as fp: + fp.write(b"x"*len(data)) + with open(TESTFN, "r+b") as f: + m = mmap.mmap(f.fileno(), len(data)) + # Test write_byte() + for i in range(len(data)): + self.assertEqual(m.tell(), i) + m.write_byte(data[i]) + self.assertEqual(m.tell(), i+1) + self.assertRaises(ValueError, m.write_byte, b"x"[0]) + self.assertEqual(m[:], data) + # Test read_byte() + m.seek(0) + for i in range(len(data)): + self.assertEqual(m.tell(), i) + self.assertEqual(m.read_byte(), data[i]) + self.assertEqual(m.tell(), i+1) + self.assertRaises(ValueError, m.read_byte) + # Test read() + m.seek(3) + self.assertEqual(m.read(3), b"345") + self.assertEqual(m.tell(), 6) + # Test write() + m.seek(3) + m.write(b"bar") + self.assertEqual(m.tell(), 6) + self.assertEqual(m[:], b"012bar6789") + m.write(bytearray(b"baz")) + self.assertEqual(m.tell(), 9) + self.assertEqual(m[:], b"012barbaz9") + self.assertRaises(ValueError, m.write, b"ba") + + def test_non_ascii_byte(self): + for b in (129, 200, 255): # > 128 + m = mmap.mmap(-1, 1) + m.write_byte(b) + self.assertEqual(m[0], b) + m.seek(0) + self.assertEqual(m.read_byte(), b) + m.close() + + @unittest.skipUnless(os.name == 'nt', 'requires Windows') + def test_tagname(self): + data1 = b"0123456789" + data2 = b"abcdefghij" + assert len(data1) == len(data2) + + # Test same tag + m1 = mmap.mmap(-1, len(data1), tagname="foo") + m1[:] = data1 + m2 = mmap.mmap(-1, len(data2), tagname="foo") + m2[:] = data2 + self.assertEqual(m1[:], data2) + self.assertEqual(m2[:], data2) + m2.close() + m1.close() + + # Test different tag + m1 = mmap.mmap(-1, len(data1), tagname="foo") + m1[:] = data1 + m2 = mmap.mmap(-1, len(data2), tagname="boo") + m2[:] = data2 + self.assertEqual(m1[:], data1) + self.assertEqual(m2[:], data2) + m2.close() + m1.close() + + @cpython_only + @unittest.skipUnless(os.name == 'nt', 'requires Windows') + def test_sizeof(self): + m1 = mmap.mmap(-1, 100) + tagname = "foo" + m2 = mmap.mmap(-1, 100, tagname=tagname) + self.assertEqual(sys.getsizeof(m2), + sys.getsizeof(m1) + len(tagname) + 1) + + @unittest.skipUnless(os.name == 'nt', 'requires Windows') + def test_crasher_on_windows(self): + # Should not crash (Issue 1733986) + m = mmap.mmap(-1, 1000, tagname="foo") + try: + mmap.mmap(-1, 5000, tagname="foo")[:] # same tagname, but larger size + except: + pass + m.close() + + # Should not crash (Issue 5385) + with open(TESTFN, "wb") as fp: + fp.write(b"x"*10) + f = open(TESTFN, "r+b") + m = mmap.mmap(f.fileno(), 0) + f.close() + try: + m.resize(0) # will raise OSError + except: + pass + try: + m[:] + except: + pass + m.close() + + @unittest.skipUnless(os.name == 'nt', 'requires Windows') + def test_invalid_descriptor(self): + # socket file descriptors are valid, but out of range + # for _get_osfhandle, causing a crash when validating the + # parameters to _get_osfhandle. + s = socket.socket() + try: + with self.assertRaises(OSError): + m = mmap.mmap(s.fileno(), 10) + finally: + s.close() + + def test_context_manager(self): + with mmap.mmap(-1, 10) as m: + self.assertFalse(m.closed) + self.assertTrue(m.closed) + + def test_context_manager_exception(self): + # Test that the OSError gets passed through + with self.assertRaises(Exception) as exc: + with mmap.mmap(-1, 10) as m: + raise OSError + self.assertIsInstance(exc.exception, OSError, + "wrong exception raised in context manager") + self.assertTrue(m.closed, "context manager failed") + + def test_weakref(self): + # Check mmap objects are weakrefable + mm = mmap.mmap(-1, 16) + wr = weakref.ref(mm) + self.assertIs(wr(), mm) + del mm + gc_collect() + self.assertIs(wr(), None) + + def test_write_returning_the_number_of_bytes_written(self): + mm = mmap.mmap(-1, 16) + self.assertEqual(mm.write(b""), 0) + self.assertEqual(mm.write(b"x"), 1) + self.assertEqual(mm.write(b"yz"), 2) + self.assertEqual(mm.write(b"python"), 6) + + @unittest.skipIf(os.name == 'nt', 'cannot resize anonymous mmaps on Windows') + def test_resize_past_pos(self): + m = mmap.mmap(-1, 8192) + self.addCleanup(m.close) + m.read(5000) + try: + m.resize(4096) + except SystemError: + self.skipTest("resizing not supported") + self.assertEqual(m.read(14), b'') + self.assertRaises(ValueError, m.read_byte) + self.assertRaises(ValueError, m.write_byte, 42) + self.assertRaises(ValueError, m.write, b'abc') + + def test_concat_repeat_exception(self): + m = mmap.mmap(-1, 16) + with self.assertRaises(TypeError): + m + m + with self.assertRaises(TypeError): + m * 2 + + def test_flush_return_value(self): + # mm.flush() should return None on success, raise an + # exception on error under all platforms. + mm = mmap.mmap(-1, 16) + self.addCleanup(mm.close) + mm.write(b'python') + result = mm.flush() + self.assertIsNone(result) + if sys.platform.startswith('linux'): + # 'offset' must be a multiple of mmap.PAGESIZE on Linux. + # See bpo-34754 for details. + self.assertRaises(OSError, mm.flush, 1, len(b'python')) + + def test_repr(self): + open_mmap_repr_pat = re.compile( + r"\S+), " + r"length=(?P\d+), " + r"pos=(?P\d+), " + r"offset=(?P\d+)>") + closed_mmap_repr_pat = re.compile(r"") + mapsizes = (50, 100, 1_000, 1_000_000, 10_000_000) + offsets = tuple((mapsize // 2 // mmap.ALLOCATIONGRANULARITY) + * mmap.ALLOCATIONGRANULARITY for mapsize in mapsizes) + for offset, mapsize in zip(offsets, mapsizes): + data = b'a' * mapsize + length = mapsize - offset + accesses = ('ACCESS_DEFAULT', 'ACCESS_READ', + 'ACCESS_COPY', 'ACCESS_WRITE') + positions = (0, length//10, length//5, length//4) + with open(TESTFN, "wb+") as fp: + fp.write(data) + fp.flush() + for access, pos in itertools.product(accesses, positions): + accint = getattr(mmap, access) + with mmap.mmap(fp.fileno(), + length, + access=accint, + offset=offset) as mm: + mm.seek(pos) + match = open_mmap_repr_pat.match(repr(mm)) + self.assertIsNotNone(match) + self.assertEqual(match.group('access'), access) + self.assertEqual(match.group('length'), str(length)) + self.assertEqual(match.group('pos'), str(pos)) + self.assertEqual(match.group('offset'), str(offset)) + match = closed_mmap_repr_pat.match(repr(mm)) + self.assertIsNotNone(match) + + @unittest.skipUnless(hasattr(mmap.mmap, 'madvise'), 'needs madvise') + def test_madvise(self): + size = 2 * PAGESIZE + m = mmap.mmap(-1, size) + + with self.assertRaisesRegex(ValueError, "madvise start out of bounds"): + m.madvise(mmap.MADV_NORMAL, size) + with self.assertRaisesRegex(ValueError, "madvise start out of bounds"): + m.madvise(mmap.MADV_NORMAL, -1) + with self.assertRaisesRegex(ValueError, "madvise length invalid"): + m.madvise(mmap.MADV_NORMAL, 0, -1) + with self.assertRaisesRegex(OverflowError, "madvise length too large"): + m.madvise(mmap.MADV_NORMAL, PAGESIZE, sys.maxsize) + self.assertEqual(m.madvise(mmap.MADV_NORMAL), None) + self.assertEqual(m.madvise(mmap.MADV_NORMAL, PAGESIZE), None) + self.assertEqual(m.madvise(mmap.MADV_NORMAL, PAGESIZE, size), None) + self.assertEqual(m.madvise(mmap.MADV_NORMAL, 0, 2), None) + self.assertEqual(m.madvise(mmap.MADV_NORMAL, 0, size), None) + + +class LargeMmapTests(unittest.TestCase): + + def setUp(self): + unlink(TESTFN) + + def tearDown(self): + unlink(TESTFN) + + def _make_test_file(self, num_zeroes, tail): + if sys.platform[:3] == 'win' or sys.platform == 'darwin': + requires('largefile', + 'test requires %s bytes and a long time to run' % str(0x180000000)) + f = open(TESTFN, 'w+b') + try: + f.seek(num_zeroes) + f.write(tail) + f.flush() + except (OSError, OverflowError, ValueError): + try: + f.close() + except (OSError, OverflowError): + pass + raise unittest.SkipTest("filesystem does not have largefile support") + return f + + def test_large_offset(self): + with self._make_test_file(0x14FFFFFFF, b" ") as f: + with mmap.mmap(f.fileno(), 0, offset=0x140000000, access=mmap.ACCESS_READ) as m: + self.assertEqual(m[0xFFFFFFF], 32) + + def test_large_filesize(self): + with self._make_test_file(0x17FFFFFFF, b" ") as f: + if sys.maxsize < 0x180000000: + # On 32 bit platforms the file is larger than sys.maxsize so + # mapping the whole file should fail -- Issue #16743 + with self.assertRaises(OverflowError): + mmap.mmap(f.fileno(), 0x180000000, access=mmap.ACCESS_READ) + with self.assertRaises(ValueError): + mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) + with mmap.mmap(f.fileno(), 0x10000, access=mmap.ACCESS_READ) as m: + self.assertEqual(m.size(), 0x180000000) + + # Issue 11277: mmap() with large (~4 GiB) sparse files crashes on OS X. + + def _test_around_boundary(self, boundary): + tail = b' DEARdear ' + start = boundary - len(tail) // 2 + end = start + len(tail) + with self._make_test_file(start, tail) as f: + with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as m: + self.assertEqual(m[start:end], tail) + + @unittest.skipUnless(sys.maxsize > _4G, "test cannot run on 32-bit systems") + def test_around_2GB(self): + self._test_around_boundary(_2G) + + @unittest.skipUnless(sys.maxsize > _4G, "test cannot run on 32-bit systems") + def test_around_4GB(self): + self._test_around_boundary(_4G) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/Cargo.toml b/stdlib/Cargo.toml index 18d2119ecd..e807a33b4a 100644 --- a/stdlib/Cargo.toml +++ b/stdlib/Cargo.toml @@ -63,6 +63,8 @@ ahash = "0.7.4" libz-sys = { version = "1.0", optional = true } num_enum = "0.5.4" ascii = "1.0.0" +memmap2 = "0.5.0" +page_size = "0.4.2" [target.'cfg(all(unix, not(target_os = "redox")))'.dependencies] termios = "0.3" diff --git a/stdlib/src/lib.rs b/stdlib/src/lib.rs index 854009dd90..9405fc2f24 100644 --- a/stdlib/src/lib.rs +++ b/stdlib/src/lib.rs @@ -20,6 +20,8 @@ mod pyexpat; mod random; // TODO: maybe make this an extension module, if we ever get those // mod re; +#[cfg(unix)] +mod mmap; #[cfg(not(target_arch = "wasm32"))] pub mod socket; mod statistics; @@ -120,6 +122,7 @@ pub fn get_module_inits() -> impl Iterator, StdlibInit { "_posixsubprocess" => posixsubprocess::make_module, "syslog" => syslog::make_module, + "mmap" => mmap::make_module, } #[cfg(target_os = "macos")] { diff --git a/stdlib/src/mmap.rs b/stdlib/src/mmap.rs new file mode 100644 index 0000000000..6801f17d82 --- /dev/null +++ b/stdlib/src/mmap.rs @@ -0,0 +1,308 @@ +pub(crate) use mmap::make_module; + +#[pymodule] +mod mmap { + use crate::vm::{ + builtins::PyTypeRef, + function::{IntoPyResult, OptionalArg}, + types::Constructor, + FromArgs, PyObject, PyResult, PyValue, TryFromBorrowedObject, VirtualMachine, + }; + use memmap2::{MmapMut, MmapOptions}; + + #[repr(C)] + #[derive(PartialEq, Eq, Debug)] + enum AccessMode { + Default = 0, + Read = 1, + Write = 2, + Copy = 3, + } + + impl TryFromBorrowedObject for AccessMode { + fn try_from_borrowed_object(vm: &VirtualMachine, obj: &PyObject) -> PyResult { + let i = u32::try_from_borrowed_object(vm, obj)?; + Ok(match i { + 0 => Self::Default, + 1 => Self::Read, + 2 => Self::Write, + 3 => Self::Copy, + _ => return Err(vm.new_value_error("Not a valid AccessMode value".to_owned())), + }) + } + } + + #[pyattr] + use libc::{ + MAP_ANON, MAP_ANONYMOUS, MAP_DENYWRITE, MAP_EXECUTABLE, MAP_POPULATE, MAP_PRIVATE, + MAP_SHARED, PROT_READ, PROT_WRITE, + }; + #[pyattr] + const ACCESS_DEFAULT: u32 = AccessMode::Default as u32; + #[pyattr] + const ACCESS_READ: u32 = AccessMode::Read as u32; + #[pyattr] + const ACCESS_WRITE: u32 = AccessMode::Write as u32; + #[pyattr] + const ACCESS_COPY: u32 = AccessMode::Copy as u32; + + + #[pyattr(name = "PAGESIZE")] + fn pagesize(vm: &VirtualMachine) -> usize { + page_size::get() + } + + #[pyattr] + #[pyclass(name = "mmap")] + #[derive(Debug, PyValue)] + struct PyMmap { + mmap: MmapMut, + exports: usize, + // PyObject *weakreflist; + access: AccessMode, + } + + #[derive(FromArgs)] + struct MmapNewArgs { + #[pyarg(any)] + fileno: std::os::unix::io::RawFd, + #[pyarg(any)] + length: isize, + #[pyarg(any, default = "MAP_SHARED")] + flags: libc::c_int, + #[pyarg(any, default = "PROT_WRITE|PROT_READ")] + prot: libc::c_int, + #[pyarg(any, default = "AccessMode::Default")] + access: AccessMode, + #[pyarg(any, default = "0")] + offset: u64, + } + + impl Constructor for PyMmap { + type Args = MmapNewArgs; + + fn py_new( + cls: PyTypeRef, + MmapNewArgs { + fileno: fd, + length, + flags, + prot, + access, + offset, + }: Self::Args, + vm: &VirtualMachine, + ) -> PyResult { + if length < 0 { + return Err( + vm.new_overflow_error("memory mapped length must be positive".to_owned()) + ); + } + // if offset < 0 { + // return Err(vm.new_overflow_error("memory mapped offset must be positive".to_owned())); + // } + if (access != AccessMode::Default) + && ((flags != MAP_SHARED) || (prot != (PROT_WRITE | PROT_READ))) + { + return Err(vm.new_value_error( + "mmap can't specify both access and flags, prot.".to_owned(), + )); + } + + let (flags, prot, access) = match access { + AccessMode::Read => (MAP_SHARED, PROT_READ, access), + AccessMode::Write => (MAP_SHARED, PROT_READ | PROT_WRITE, access), + AccessMode::Copy => (MAP_PRIVATE, PROT_READ | PROT_WRITE, access), + AccessMode::Default => { + let access = if (prot & PROT_READ) != 0 && (prot & PROT_WRITE) != 0 { + access + } else if (prot & PROT_WRITE) != 0 { + AccessMode::Write + } else { + AccessMode::Read + }; + (flags, prot, access) + } + _ => return Err(vm.new_value_error("mmap invalid access parameter.".to_owned())), + }; + + // if (PySys_Audit("mmap.__new__", "ini" _Py_PARSE_OFF_T, + // fd, map_size, access, offset) < 0) { + // return NULL; + // } + + // #[cfg(target_vendor = "apple")] + // Issue #11277: fsync(2) is not enough on OS X - a special, OS X specific + // fcntl(2) is necessary to force DISKSYNC and get around mmap(2) bug + // if fd != -1 { + // fcntl(fd, F_FULLFSYNC); + // } + + // if fd != -1 { + // Py_BEGIN_ALLOW_THREADS + // fstat_result = _Py_fstat_noraise(fd, &status); + // Py_END_ALLOW_THREADS + // } + + // if (fd != -1 && fstat_result == 0 && S_ISREG(status.st_mode)) { + // if (map_size == 0) { + // if (status.st_size == 0) { + // PyErr_SetString(PyExc_ValueError, + // "cannot mmap an empty file"); + // return NULL; + // } + // if (offset >= status.st_size) { + // PyErr_SetString(PyExc_ValueError, + // "mmap offset is greater than file size"); + // return NULL; + // } + // if (status.st_size - offset > PY_SSIZE_T_MAX) { + // PyErr_SetString(PyExc_ValueError, + // "mmap length is too large"); + // return NULL; + // } + // map_size = (Py_ssize_t) (status.st_size - offset); + // } else if (offset > status.st_size || status.st_size - offset < map_size) { + // PyErr_SetString(PyExc_ValueError, + // "mmap length is greater than file size"); + // return NULL; + // } + // } + let mut mmap_opt = MmapOptions::new(); + let mmap_opt = mmap_opt.offset(offset); + // .len(map_size) + let mmap = match access { + AccessMode::Write => unsafe { mmap_opt.map_mut(fd) }, + // AccessMode::Read => mmap_opt.map(fd), + AccessMode::Copy => unsafe { mmap_opt.map_copy(fd) }, + _ => unreachable!("access must be decided before here"), + } + .map_err(|_| vm.new_value_error("FIXME: mmap error".to_owned()))?; + + let m_obj = Self { + mmap, + exports: 0, + access, + }; + + // if (m_obj == NULL) {return NULL;} + // m_obj->data = NULL; + // m_obj->size = map_size; + // m_obj->pos = 0; + // m_obj->weakreflist = NULL; + // m_obj->exports = 0; + // m_obj->offset = offset; + // if (fd == -1) { + // m_obj->fd = -1; + // /* Assume the caller wants to map anonymous memory. + // This is the same behaviour as Windows. mmap.mmap(-1, size) + // on both Windows and Unix map anonymous memory. + // */ + // #ifdef MAP_ANONYMOUS + // /* BSD way to map anonymous memory */ + // flags |= MAP_ANONYMOUS; + + // /* VxWorks only supports MAP_ANONYMOUS with MAP_PRIVATE flag */ + // #ifdef __VXWORKS__ + // flags &= ~MAP_SHARED; + // flags |= MAP_PRIVATE; + // #endif + + // #else + // /* SVR4 method to map anonymous memory is to open /dev/zero */ + // fd = devzero = _Py_open("/dev/zero", O_RDWR); + // if (devzero == -1) { + // Py_DECREF(m_obj); + // return NULL; + // } + // #endif + // } + // else { + // m_obj->fd = _Py_dup(fd); + // if (m_obj->fd == -1) { + // Py_DECREF(m_obj); + // return NULL; + // } + // } + + // m_obj->data = mmap(NULL, map_size, + // prot, flags, + // fd, offset); + + // if (devzero != -1) { + // close(devzero); + // } + + // if (m_obj->data == (char *)-1) { + // m_obj->data = NULL; + // Py_DECREF(m_obj); + // PyErr_SetFromErrno(PyExc_OSError); + // return NULL; + // } + // m_obj->access = (AccessMode)access; + // return (PyObject *)m_obj; + // } + m_obj.into_pyresult(vm) + } + } + + #[pyimpl] + impl PyMmap { + // {Py_tp_new, new_mmap_object}, + // {Py_tp_dealloc, mmap_object_dealloc}, + // {Py_tp_repr, mmap__repr__method}, + // {Py_tp_doc, (void *)mmap_doc}, + // {Py_tp_methods, mmap_object_methods}, + // {Py_tp_members, mmap_object_members}, + // {Py_tp_getset, mmap_object_getset}, + // {Py_tp_getattro, PyObject_GenericGetAttr}, + // {Py_tp_traverse, mmap_object_traverse}, + + // /* as sequence */ + // {Py_sq_length, mmap_length}, + // {Py_sq_item, mmap_item}, + // {Py_sq_ass_item, mmap_ass_item}, + + // /* as mapping */ + // {Py_mp_length, mmap_length}, + // {Py_mp_subscript, mmap_subscript}, + // {Py_mp_ass_subscript, mmap_ass_subscript}, + + // /* as buffer */ + // {Py_bf_getbuffer, mmap_buffer_getbuf}, + // {Py_bf_releasebuffer, mmap_buffer_releasebuf}, + + // {"close", (PyCFunction) mmap_close_method, METH_NOARGS}, + #[pymethod] + fn close(&self) -> PyResult<()> { + if self.exports > 0 { + // PyErr_SetString(PyExc_BufferError, "cannot close "\ + // "exported pointers exist"); + } + // self.mmap = MmapMut::map_anon(0).unwrap(); + Ok(()) + } + + // {"find", (PyCFunction) mmap_find_method, METH_VARARGS}, + // {"rfind", (PyCFunction) mmap_rfind_method, METH_VARARGS}, + // {"flush", (PyCFunction) mmap_flush_method, METH_VARARGS}, + // #ifdef HAVE_MADVISE + // {"madvise", (PyCFunction) mmap_madvise_method, METH_VARARGS}, + // #endif + // {"move", (PyCFunction) mmap_move_method, METH_VARARGS}, + // {"read", (PyCFunction) mmap_read_method, METH_VARARGS}, + // {"read_byte", (PyCFunction) mmap_read_byte_method, METH_NOARGS}, + // {"readline", (PyCFunction) mmap_read_line_method, METH_NOARGS}, + // {"resize", (PyCFunction) mmap_resize_method, METH_VARARGS}, + // {"seek", (PyCFunction) mmap_seek_method, METH_VARARGS}, + // {"size", (PyCFunction) mmap_size_method, METH_NOARGS}, + // {"tell", (PyCFunction) mmap_tell_method, METH_NOARGS}, + // {"write", (PyCFunction) mmap_write_method, METH_VARARGS}, + // {"write_byte", (PyCFunction) mmap_write_byte_method, METH_VARARGS}, + // {"__enter__", (PyCFunction) mmap__enter__method, METH_NOARGS}, + // {"__exit__", (PyCFunction) mmap__exit__method, METH_VARARGS}, + // #ifdef MS_WINDOWS + // {"__sizeof__", (PyCFunction) mmap__sizeof__method, METH_NOARGS}, + // #endif + } +}