From fc519fc64a04d7c6808a6f158734ce099e29cafa Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 21 Apr 2020 00:29:49 +0200 Subject: [PATCH] bpo-40346: Add random.BaseRandom BaseRandom implements random() and randbytes() using getrandbits(). It has no state and its gauss() method is thread safe. It has no VERSION attribute and its seed() method has no version parameter. The implementation of random.Random, random.SystemRandom and random.Random subclasses are not affected by this change. Changes: * random.SystemRandom now inherits from BaseRandom instead of Random. It no longer inherits from _random.Random. An instance now only takes 48 bytes of memory, rather than 2568 bytes (on x86-64). * Move random() method implementation from SystemRandom to BaseRandom. random.Radom overrides it with _random.Random.random(). * Move the gauss_next attribute of the gauss() method optimization to random.Random class. * Mark randbytes() parameter as positional-only. --- Doc/library/random.rst | 70 ++++- Doc/whatsnew/3.9.rst | 19 ++ Lib/random.py | 270 ++++++++++-------- Lib/test/test_random.py | 105 +++++++ .../2020-04-21-00-59-44.bpo-40346.fCRiUM.rst | 3 + 5 files changed, 332 insertions(+), 135 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2020-04-21-00-59-44.bpo-40346.fCRiUM.rst diff --git a/Doc/library/random.rst b/Doc/library/random.rst index 291eca3a3f16a1..e24b5a5902bde4 100644 --- a/Doc/library/random.rst +++ b/Doc/library/random.rst @@ -33,11 +33,8 @@ The functions supplied by this module are actually bound methods of a hidden instance of the :class:`random.Random` class. You can instantiate your own instances of :class:`Random` to get generators that don't share state. -Class :class:`Random` can also be subclassed if you want to use a different -basic generator of your own devising: in that case, override the :meth:`~Random.random`, -:meth:`~Random.seed`, :meth:`~Random.getstate`, and :meth:`~Random.setstate` methods. -Optionally, a new generator can supply a :meth:`~Random.getrandbits` method --- this -allows :meth:`randrange` to produce selections over an arbitrarily large range. +The base class :class:`BaseRandom` can be subclassed if you want to use a +different basic generator of your own devising. The :mod:`random` module also provides the :class:`SystemRandom` class which uses the system function :func:`os.urandom` to generate random numbers @@ -301,6 +298,7 @@ be found in any statistics text. deviation. This is slightly faster than the :func:`normalvariate` function defined below. + This function is not thread-safe. .. function:: lognormvariate(mu, sigma) @@ -337,25 +335,71 @@ be found in any statistics text. Alternative Generator --------------------- +.. class:: BaseRandom + + Random number generator base class. + + A subclass must only implement a single method: :meth:`getrandbits`. + + Optionally, the following methods can also be implemented if the generator + has a state: + + * :meth:`~BaseRandom.seed`, + * :meth:`~BaseRandom.getstate` + * :meth:`~BaseRandom.setstate` + + .. versionadded:: 3.9 + + .. method:: getrandbits(k) + + Returns a Python integer with *k* random bits. + + .. method:: seed([a]) + + Initialize the random number generator. + + .. method:: getstate() + + Return an object capturing the current internal state of the generator. + This object can be passed to :meth:`setstate` to restore the state. + + .. method:: setstate(state) + + *state* should have been obtained from a previous call to + :meth:`getstate`, and :meth:`setstate` restores the internal state of the + generator to what it was at the time :meth:`getstate` was called. + + .. class:: Random([seed]) - Class that implements the default pseudo-random number generator used by the - :mod:`random` module. + Mersenne Twister pseudo-random number generator. + + It is used by the bound module functions. + + You can instantiate your own instances of :class:`Random` to get generators + that don't share state. + + Inherit from :class:`BaseRandom`. .. deprecated:: 3.9 In the future, the *seed* must be one of the following types: :class:`NoneType`, :class:`int`, :class:`float`, :class:`str`, :class:`bytes`, or :class:`bytearray`. -.. class:: SystemRandom([seed]) - Class that uses the :func:`os.urandom` function for generating random numbers - from sources provided by the operating system. Not available on all systems. - Does not rely on software state, and sequences are not reproducible. Accordingly, - the :meth:`seed` method has no effect and is ignored. +.. class:: SystemRandom + + Random number generator which uses the system function :func:`os.random` to + generate random numbers from sources provided by the operating system. + + Does not rely on software state, and sequences are not reproducible. + Accordingly, the :meth:`seed` method has no effect and is ignored. + The :meth:`getstate` and :meth:`setstate` methods raise :exc:`NotImplementedError` if called. + Inherit from :class:`BaseRandom`. + Notes on Reproducibility ------------------------ @@ -370,7 +414,7 @@ change across Python versions, but two aspects are guaranteed not to change: * If a new seeding method is added, then a backward compatible seeder will be offered. -* The generator's :meth:`~Random.random` method will continue to produce the same +* The generator's :meth:`~BaseRandom.random` method will continue to produce the same sequence when the compatible seeder is given the same seed. .. _random-examples: diff --git a/Doc/whatsnew/3.9.rst b/Doc/whatsnew/3.9.rst index ee851706055a30..02e7b1e34fd105 100644 --- a/Doc/whatsnew/3.9.rst +++ b/Doc/whatsnew/3.9.rst @@ -380,6 +380,15 @@ random Add a new :attr:`random.Random.randbytes` method: generate random bytes. (Contributed by Victor Stinner in :issue:`40286`.) +Add a new :attr:`random.BaseRandom` class: random number generator base class. +A :attr:`random.BaseRandom` subclass must only implement a single method: +:meth:`~random.BaseRandom.getrandbits`, whereas a :class:`random.Random` +subclass must override 6 methods (:meth:`~random.Random.getrandbits`, +:meth:`~random.Random.random`, :meth:`~random.Random.randbytes` +:meth:`~random.Random.seed`, :meth:`~random.Random.getstate` and +:meth:`~random.Random.setstate`). +(Contributed by Victor Stinner in :issue:`40346`.) + signal ------ @@ -847,6 +856,16 @@ Changes in the Python API ``PyCF_ALLOW_TOP_LEVEL_AWAIT`` was clashing with ``CO_FUTURE_DIVISION``. (Contributed by Batuhan Taskaya in :issue:`39562`) +* Subclasses of :class:`random.Random` should now override the + :meth:`~random.Random.randbytes` method in addition to the 5 methods: + :meth:`~random.Random.getrandbits`, :meth:`~random.Random.random`, + :meth:`~random.Random.seed`, :meth:`~random.Random.getstate` and + :meth:`~random.Random.setstate`. Or the new :attr:`random.BaseRandom` base + class can be used, a subclass must only implement a single method: + :meth:`~random.BaseRandom.getrandbits`. + (Contributed by Victor Stinner in :issue:`40346`.) + + CPython bytecode changes ------------------------ diff --git a/Lib/random.py b/Lib/random.py index f1df18d5c187b8..0f24821776432e 100644 --- a/Lib/random.py +++ b/Lib/random.py @@ -45,6 +45,7 @@ from itertools import accumulate as _accumulate, repeat as _repeat from bisect import bisect as _bisect import os as _os +import _random try: # hashlib is pretty heavy to load, try lean internal module first @@ -73,32 +74,8 @@ # Adrian Baddeley. Adapted by Raymond Hettinger for use with # the Mersenne Twister and os.urandom() core generators. -import _random - -class Random(_random.Random): - """Random number generator base class used by bound module functions. - - Used to instantiate instances of Random to get generators that don't - share state. - - Class Random can also be subclassed if you want to use a different basic - generator of your own devising: in that case, override the following - methods: random(), seed(), getstate(), and setstate(). - Optionally, implement a getrandbits() method so that randrange() - can cover arbitrarily large ranges. - - """ - - VERSION = 3 # used by getstate/setstate - - def __init__(self, x=None): - """Initialize an instance. - - Optional argument x controls seeding, as for Random.seed(). - """ - - self.seed(x) - self.gauss_next = None +class BaseRandom: + """Random number generator base class.""" def __init_subclass__(cls, /, **kwargs): """Control how subclasses generate random integers. @@ -120,74 +97,26 @@ def __init_subclass__(cls, /, **kwargs): cls._randbelow = cls._randbelow_without_getrandbits break - def seed(self, a=None, version=2): - """Initialize internal state from a seed. - - The only supported seed types are None, int, float, - str, bytes, and bytearray. - - None or no argument seeds from current time or from an operating - system specific randomness source if available. - - If *a* is an int, all bits are used. - - For version 2 (the default), all of the bits are used if *a* is a str, - bytes, or bytearray. For version 1 (provided for reproducing random - sequences from older versions of Python), the algorithm for str and - bytes generates a narrower range of seeds. - - """ + def getrandbits(self, k): + raise NotImplementedError - if version == 1 and isinstance(a, (str, bytes)): - a = a.decode('latin-1') if isinstance(a, bytes) else a - x = ord(a[0]) << 7 if a else 0 - for c in map(ord, a): - x = ((1000003 * x) ^ c) & 0xFFFFFFFFFFFFFFFF - x ^= len(a) - a = -2 if x == -1 else x + def seed(self, a=None, /): + raise NotImplementedError - elif version == 2 and isinstance(a, (str, bytes, bytearray)): - if isinstance(a, str): - a = a.encode() - a += _sha512(a).digest() - a = int.from_bytes(a, 'big') + def getstate(self): + raise NotImplementedError - elif not isinstance(a, (type(None), int, float, str, bytes, bytearray)): - _warn('Seeding based on hashing is deprecated\n' - 'since Python 3.9 and will be removed in a subsequent ' - 'version. The only \n' - 'supported seed types are: None, ' - 'int, float, str, bytes, and bytearray.', - DeprecationWarning, 2) + def setstate(self, state): + raise NotImplementedError - super().seed(a) - self.gauss_next = None +## -------------------- float and bytes ------------------- - def getstate(self): - """Return internal state; can be passed to setstate() later.""" - return self.VERSION, super().getstate(), self.gauss_next + def random(self): + """Get the next random number in the range [0.0, 1.0).""" + return self.getrandbits(53) * RECIP_BPF - def setstate(self, state): - """Restore internal state from object returned by getstate().""" - version = state[0] - if version == 3: - version, internalstate, self.gauss_next = state - super().setstate(internalstate) - elif version == 2: - version, internalstate, self.gauss_next = state - # In version 2, the state was saved as signed ints, which causes - # inconsistencies between 32/64-bit systems. The state is - # really unsigned 32-bit ints, so we convert negative ints from - # version 2 to positive longs for version 3. - try: - internalstate = tuple(x % (2**32) for x in internalstate) - except ValueError as e: - raise TypeError from e - super().setstate(internalstate) - else: - raise ValueError("state with version %s passed to " - "Random.setstate() of version %s" % - (version, self.VERSION)) + def randbytes(self, n, /): + return self.getrandbits(n * 8).to_bytes(n, 'little') ## ---- Methods below this point do not need to be overridden when ## ---- subclassing for the purpose of using a different core generator. @@ -636,9 +565,6 @@ def gauss(self, mu, sigma): mu is the mean, and sigma is the standard deviation. This is slightly faster than the normalvariate() function. - - Not thread-safe without a lock around calls. - """ # When x and y are two variables from [0, 1), uniformly @@ -652,22 +578,10 @@ def gauss(self, mu, sigma): # (Lambert Meertens) # (corrected version; bug discovered by Mike Miller, fixed by LM) - # Multithreading note: When two threads call this function - # simultaneously, it is possible that they will receive the - # same return value. The window is very small though. To - # avoid this, you have to use a lock around all calls. (I - # didn't want to slow this down in the serial case by using a - # lock here.) - random = self.random - z = self.gauss_next - self.gauss_next = None - if z is None: - x2pi = random() * TWOPI - g2rad = _sqrt(-2.0 * _log(1.0 - random())) - z = _cos(x2pi) * g2rad - self.gauss_next = _sin(x2pi) * g2rad - + x2pi = random() * TWOPI + g2rad = _sqrt(-2.0 * _log(1.0 - random())) + z = _cos(x2pi) * g2rad return mu + z*sigma ## -------------------- beta -------------------- @@ -722,19 +636,136 @@ def weibullvariate(self, alpha, beta): u = 1.0 - self.random() return alpha * (-_log(u)) ** (1.0/beta) -## --------------- Operating System Random Source ------------------ -class SystemRandom(Random): - """Alternate random number generator using sources provided - by the operating system (such as /dev/urandom on Unix or - CryptGenRandom on Windows). +## --------------- Mersene Twister Source --------------------------- + +class Random(_random.Random, BaseRandom): + """Mersenne Twister Random number generator. - Not available on all systems (see os.urandom() for details). + It is used by bound module functions. + + Used to instantiate instances of Random to get generators that don't + share state. """ - def random(self): - """Get the next random number in the range [0.0, 1.0).""" - return (int.from_bytes(_urandom(7), 'big') >> 3) * RECIP_BPF + VERSION = 3 # used by getstate/setstate + + def __init__(self, x=None): + """Initialize an instance. + + Optional argument x controls seeding, as for Random.seed(). + """ + + self.seed(x) + + def seed(self, a=None, version=2): + """Initialize internal state from a seed. + + The only supported seed types are None, int, float, + str, bytes, and bytearray. + + None or no argument seeds from current time or from an operating + system specific randomness source if available. + + If *a* is an int, all bits are used. + + For version 2 (the default), all of the bits are used if *a* is a str, + bytes, or bytearray. For version 1 (provided for reproducing random + sequences from older versions of Python), the algorithm for str and + bytes generates a narrower range of seeds. + + """ + + if version == 1 and isinstance(a, (str, bytes)): + a = a.decode('latin-1') if isinstance(a, bytes) else a + x = ord(a[0]) << 7 if a else 0 + for c in map(ord, a): + x = ((1000003 * x) ^ c) & 0xFFFFFFFFFFFFFFFF + x ^= len(a) + a = -2 if x == -1 else x + + elif version == 2 and isinstance(a, (str, bytes, bytearray)): + if isinstance(a, str): + a = a.encode() + a += _sha512(a).digest() + a = int.from_bytes(a, 'big') + + elif not isinstance(a, (type(None), int, float, str, bytes, bytearray)): + _warn('Seeding based on hashing is deprecated\n' + 'since Python 3.9 and will be removed in a subsequent ' + 'version. The only \n' + 'supported seed types are: None, ' + 'int, float, str, bytes, and bytearray.', + DeprecationWarning, 2) + + super().seed(a) + self.gauss_next = None + + def getstate(self): + """Return internal state; can be passed to setstate() later.""" + return self.VERSION, super().getstate(), self.gauss_next + + def setstate(self, state): + """Restore internal state from object returned by getstate().""" + version = state[0] + if version == 3: + version, internalstate, self.gauss_next = state + super().setstate(internalstate) + elif version == 2: + version, internalstate, self.gauss_next = state + # In version 2, the state was saved as signed ints, which causes + # inconsistencies between 32/64-bit systems. The state is + # really unsigned 32-bit ints, so we convert negative ints from + # version 2 to positive longs for version 3. + try: + internalstate = tuple(x % (2**32) for x in internalstate) + except ValueError as e: + raise TypeError from e + super().setstate(internalstate) + else: + raise ValueError("state with version %s passed to " + "Random.setstate() of version %s" % + (version, self.VERSION)) + +## -------------------- Gauss (faster alternative) -------------------- + + def gauss(self, mu, sigma): + """Gaussian distribution. + + mu is the mean, and sigma is the standard deviation. This is + slightly faster than the normalvariate() function. + + Not thread-safe without a lock around calls. + + """ + + # Multithreading note: When two threads call this function + # simultaneously, it is possible that they will receive the + # same return value. The window is very small though. To + # avoid this, you have to use a lock around all calls. (I + # didn't want to slow this down in the serial case by using a + # lock here.) + # + # See also BaseRandom.gauss(). + + random = self.random + z = self.gauss_next + self.gauss_next = None + if z is None: + x2pi = random() * TWOPI + g2rad = _sqrt(-2.0 * _log(1.0 - random())) + z = _cos(x2pi) * g2rad + self.gauss_next = _sin(x2pi) * g2rad + + return mu + z*sigma + + +## --------------- Operating System Random Source ------------------ + +class SystemRandom(BaseRandom): + """Random number generator which uses the system function os.random() + to generate random numbers from sources provided by the operating system. + """ def getrandbits(self, k): """getrandbits(k) -> x. Generates an int with k random bits.""" @@ -744,7 +775,7 @@ def getrandbits(self, k): x = int.from_bytes(_urandom(numbytes), 'big') return x >> (numbytes * 8 - k) # trim excess bits - def randbytes(self, n): + def randbytes(self, n, /): """Generate n random bytes.""" # os.urandom(n) fails with ValueError for n < 0 # and returns an empty bytes string for n == 0. @@ -752,12 +783,7 @@ def randbytes(self, n): def seed(self, *args, **kwds): "Stub method. Not used for a system random number generator." - return None - - def _notimplemented(self, *args, **kwds): - "Method should not be called for a system random number generator." - raise NotImplementedError('System entropy source does not have state.') - getstate = setstate = _notimplemented + pass ## -------------------- test program -------------------- diff --git a/Lib/test/test_random.py b/Lib/test/test_random.py index 42c68dd1c24422..1cb4fe3743cc28 100644 --- a/Lib/test/test_random.py +++ b/Lib/test/test_random.py @@ -334,6 +334,7 @@ def test_randbytes(self): self.assertRaises(TypeError, self.gen.randbytes, 1, 2) self.assertRaises(ValueError, self.gen.randbytes, -1) self.assertRaises(TypeError, self.gen.randbytes, 1.0) + self.assertRaises(TypeError, self.gen.randbytes, n=4) try: @@ -1166,5 +1167,109 @@ def test_after_fork(self): support.wait_process(pid, exitcode=0) +class TestBaseRandom(unittest.TestCase): + def test_getrandbits(self): + # Subclass which only implements getrandbits() + class SimpleLCG(random.BaseRandom): + _lcg = 123 + calls = [] + + def _rand_uint32(self): + # Numerical Recipes LCG: generate 32 bits of entropy + self._lcg = (self._lcg * 1664525 + 1013904223) % (2 ** 32) + return self._lcg + + def getrandbits(self, n): + self.calls.append(n) + x = self._rand_uint32() + for _ in range((n + 31) // 32 - 1): + x <<= 32 + x += self._rand_uint32() + if n % 32: + x >>= (32 - (n % 32)) + return x + + rng = SimpleLCG() + self.assertEqual([rng.randint(1, 10_000) for _ in range(5)], + [4649, 7130, 634, 3619, 5889]) + self.assertEqual(rng.calls, [14] * 5) + + def test_full(self): + # Subclass which implements getrandbits(), seed(), getstate() and + # setstate(), but don't override random() or randbytes(). + class FullLCG(random.BaseRandom): + def __init__(self, x=0, /): + self.seed(x) + + def _rand_uint32(self): + # Numerical Recipes LCG: generate 32 bits of entropy + self._lcg = (self._lcg * 1664525 + 1013904223) % (2 ** 32) + return self._lcg + + def getrandbits(self, n): + x = self._rand_uint32() + for _ in range((n + 31) // 32 - 1): + x <<= 32 + x += self._rand_uint32() + if n % 32: + x >>= (32 - (n % 32)) + return x + + def seed(self, a=None, /): + self._lcg = int(a % (2 ** 32)) + + def getstate(self): + return self._lcg + + def setstate(self, state): + self.seed(state) + + rng = FullLCG() + + # getrandbits + rng.seed(123) + self.assertEqual([rng.getrandbits(32) for _ in range(10)], + [1218640798, + 1868869221, + 166005888, + 948671967, + 1543727538, + 2535079273, + 1551689652, + 1403809667, + 342478598, + 2782976685]) + + # randint, random, randbytes + rng.seed(123) + self.assertEqual([rng.randint(1, 10_000) for _ in range(5)], + [4649, 7130, 634, 3619, 5889]) + + rng.seed(123) + self.assertEqual([rng.random() for _ in range(5)], + [0.28373692148251684, + 0.03865125780479972, + 0.35942707643617033, + 0.36128090050230954, + 0.0797395125608803]) + + rng.seed(123) + self.assertEqual([rng.randbytes(10) for _ in range(5)], + [b'\xe5\te\xaedo\x9e\xfb\xa2H', + b'\x1a\x97\xb2i\x03\\\xdf\x95\x8b8', + b'i\x14\x83o\xacS\xb4\xe7|\\', + b'.\xf8(\xfc\xdf\x9a\xad\xda\xe0\xa5', + b'G\x171Jl\xf5\x9a\xff\x8b\xc4']) + + # getstate, setstate + rng.seed(123) + self.assertEqual(rng.getstate(), 123) + + rng.seed(0) + rng.setstate(123) + self.assertEqual([rng.getrandbits(32) for _ in range(3)], + [1218640798, 1868869221, 166005888]) + + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Library/2020-04-21-00-59-44.bpo-40346.fCRiUM.rst b/Misc/NEWS.d/next/Library/2020-04-21-00-59-44.bpo-40346.fCRiUM.rst new file mode 100644 index 00000000000000..795f98d93b60a2 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-04-21-00-59-44.bpo-40346.fCRiUM.rst @@ -0,0 +1,3 @@ +Add :class:`random.BaseRandom`: Random number generator base class. +A subclass must only implement a single method: +:meth:`~random.BaseRandom.getrandbits`.