From e8b84012463927a1fc9c5a236ce7c4ab4a7effaa Mon Sep 17 00:00:00 2001 From: Warren Weckesser Date: Sat, 25 Apr 2020 12:28:57 -0400 Subject: [PATCH 1/4] BUG: random: Generator.integers(2**32) always returned 0. When the input to Generator.integers was 2**32, the value 2**32-1 was being passed as the `rng` argument to the 32-bit Lemire method, but that method requires `rng` be strictly less then 2**32-1. The fix was to handle 2**32-1 by calling next_uint32 directly. This also works for the legacy code without changing the stream of random integers from `randint`. Closes gh-16066. --- .../random/src/distributions/distributions.c | 42 ++++++++++++++----- numpy/random/tests/test_generator_mt19937.py | 15 +++++++ 2 files changed, 46 insertions(+), 11 deletions(-) diff --git a/numpy/random/src/distributions/distributions.c b/numpy/random/src/distributions/distributions.c index 0b46dc6d8e26..d3ea647354ec 100644 --- a/numpy/random/src/distributions/distributions.c +++ b/numpy/random/src/distributions/distributions.c @@ -1337,6 +1337,14 @@ uint64_t random_bounded_uint64(bitgen_t *bitgen_state, uint64_t off, return off; } else if (rng <= 0xFFFFFFFFUL) { /* Call 32-bit generator if range in 32-bit. */ + if (rng == 0xFFFFFFFFUL) { + /* + * The 32-bit Lemire method does not handle rng=0xFFFFFFFF, so we'll + * call next_uint32 directly. This also works when use_masked is True, + * so we handle both cases here. + */ + return off + (uint64_t) next_uint32(bitgen_state); + } if (use_masked) { return off + buffered_bounded_masked_uint32(bitgen_state, rng, mask, NULL, NULL); @@ -1450,22 +1458,34 @@ void random_bounded_uint64_fill(bitgen_t *bitgen_state, uint64_t off, out[i] = off; } } else if (rng <= 0xFFFFFFFFUL) { - uint32_t buf = 0; - int bcnt = 0; - /* Call 32-bit generator if range in 32-bit. */ - if (use_masked) { - /* Smallest bit mask >= max */ - uint64_t mask = gen_mask(rng); + /* + * The 32-bit Lemire method does not handle rng=0xFFFFFFFF, so we'll + * call next_uint32 directly. This also works when use_masked is True, + * so we handle both cases here. + */ + if (rng == 0xFFFFFFFFUL) { for (i = 0; i < cnt; i++) { - out[i] = off + buffered_bounded_masked_uint32(bitgen_state, rng, mask, - &bcnt, &buf); + out[i] = off + (uint64_t) next_uint32(bitgen_state); } } else { - for (i = 0; i < cnt; i++) { - out[i] = off + - buffered_bounded_lemire_uint32(bitgen_state, rng, &bcnt, &buf); + uint32_t buf = 0; + int bcnt = 0; + + if (use_masked) { + /* Smallest bit mask >= max */ + uint64_t mask = gen_mask(rng); + + for (i = 0; i < cnt; i++) { + out[i] = off + buffered_bounded_masked_uint32(bitgen_state, rng, mask, + &bcnt, &buf); + } + } else { + for (i = 0; i < cnt; i++) { + out[i] = off + + buffered_bounded_lemire_uint32(bitgen_state, rng, &bcnt, &buf); + } } } } else if (rng == 0xFFFFFFFFFFFFFFFFULL) { diff --git a/numpy/random/tests/test_generator_mt19937.py b/numpy/random/tests/test_generator_mt19937.py index 3e820d9d7eaa..aa7dd45e9b7d 100644 --- a/numpy/random/tests/test_generator_mt19937.py +++ b/numpy/random/tests/test_generator_mt19937.py @@ -520,6 +520,21 @@ def test_repeatability_broadcasting(self, endpoint): assert_array_equal(val, val_bc) + @pytest.mark.parametrize( + 'bound, expected', + [(2**32 - 1, np.array([517043486, 1364798665, 1733884389, 1353720612, + 3769704066, 1170797179, 4108474671])), + (2**32, np.array([517043487, 1364798666, 1733884390, 1353720613, + 3769704067, 1170797180, 4108474672])), + (2**32 + 1, np.array([517043487, 1733884390, 3769704068, 4108474673, + 1831631863, 1215661561, 3869512430]))] + ) + def test_repeatability_32bit_boundary(self, bound, expected): + for size in [None, len(expected)]: + random = Generator(MT19937(1234)) + x = random.integers(bound, size=size) + assert_equal(x, expected if size is not None else expected[0]) + def test_int64_uint64_broadcast_exceptions(self, endpoint): configs = {np.uint64: ((0, 2**65), (-1, 2**62), (10, 9), (0, 0)), np.int64: ((0, 2**64), (-(2**64), 2**62), (10, 9), (0, 0), From 1ded86d35efcc6516083840497b558247b263cfb Mon Sep 17 00:00:00 2001 From: Warren Weckesser Date: Sun, 26 Apr 2020 16:54:38 -0400 Subject: [PATCH 2/4] MAINT: random: Add assert() statements. Assert that an invalid value (2**n-1 for n = 8, 16, 32, 64) has not been passed to the Lemire function. --- numpy/random/src/distributions/distributions.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/numpy/random/src/distributions/distributions.c b/numpy/random/src/distributions/distributions.c index d3ea647354ec..586e38aa5bcf 100644 --- a/numpy/random/src/distributions/distributions.c +++ b/numpy/random/src/distributions/distributions.c @@ -6,6 +6,8 @@ #include #endif +#include + /* Inline generators for internal use */ static NPY_INLINE uint32_t next_uint32(bitgen_t *bitgen_state) { return bitgen_state->next_uint32(bitgen_state->state); @@ -1149,6 +1151,8 @@ static NPY_INLINE uint64_t bounded_lemire_uint64(bitgen_t *bitgen_state, */ const uint64_t rng_excl = rng + 1; + assert(rng != 0xFFFFFFFFFFFFFFFFULL); + #if __SIZEOF_INT128__ /* 128-bit uint available (e.g. GCC/clang). `m` is the __uint128_t scaled * integer. */ @@ -1239,6 +1243,8 @@ static NPY_INLINE uint32_t buffered_bounded_lemire_uint32( uint64_t m; uint32_t leftover; + assert(rng != 0xFFFFFFFFUL); + /* Generate a scaled random number. */ m = ((uint64_t)next_uint32(bitgen_state)) * rng_excl; @@ -1273,6 +1279,8 @@ static NPY_INLINE uint16_t buffered_bounded_lemire_uint16( uint32_t m; uint16_t leftover; + assert(rng != 0xFFFFU); + /* Generate a scaled random number. */ m = ((uint32_t)buffered_uint16(bitgen_state, bcnt, buf)) * rng_excl; @@ -1308,6 +1316,9 @@ static NPY_INLINE uint8_t buffered_bounded_lemire_uint8(bitgen_t *bitgen_state, uint16_t m; uint8_t leftover; + assert(rng != 0xFFU); + + /* Generate a scaled random number. */ m = ((uint16_t)buffered_uint8(bitgen_state, bcnt, buf)) * rng_excl; From ca5ade84d02801401d4833eec9d8a50ef04e46ce Mon Sep 17 00:00:00 2001 From: Warren Weckesser Date: Mon, 27 Apr 2020 13:06:05 -0400 Subject: [PATCH 3/4] TST: random: Add more repeatability tests for random integers. Add repeatability tests for when the range of the integers is `2**32` (and `2**32 +/- 1` for good measure) with broadcasting. The underlying functions called by Generator.integers and random.randint when the inputs are broadcast are different than when the inputs are scalars. --- numpy/random/tests/test_generator_mt19937.py | 23 ++++++++++++++++++++ numpy/random/tests/test_randomstate.py | 22 +++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/numpy/random/tests/test_generator_mt19937.py b/numpy/random/tests/test_generator_mt19937.py index aa7dd45e9b7d..08b44e4dbba3 100644 --- a/numpy/random/tests/test_generator_mt19937.py +++ b/numpy/random/tests/test_generator_mt19937.py @@ -535,6 +535,29 @@ def test_repeatability_32bit_boundary(self, bound, expected): x = random.integers(bound, size=size) assert_equal(x, expected if size is not None else expected[0]) + def test_repeatability_32bit_boundary_broadcasting(self): + desired = np.array([[[1622936284, 3620788691, 1659384060], + [1417365545, 760222891, 1909653332], + [3788118662, 660249498, 4092002593]], + [[3625610153, 2979601262, 3844162757], + [ 685800658, 120261497, 2694012896], + [1207779440, 1586594375, 3854335050]], + [[3004074748, 2310761796, 3012642217], + [2067714190, 2786677879, 1363865881], + [ 791663441, 1867303284, 2169727960]], + [[1939603804, 1250951100, 298950036], + [1040128489, 3791912209, 3317053765], + [3155528714, 61360675, 2305155588]], + [[ 817688762, 1335621943, 3288952434], + [1770890872, 1102951817, 1957607470], + [3099996017, 798043451, 48334215]]]) + for size in [None, (5, 3, 3)]: + random = Generator(MT19937(12345)) + x = random.integers([[-1], [0], [1]], + [2**32 - 1, 2**32, 2**32 + 1], + size=size) + assert_array_equal(x, desired if size is not None else desired[0]) + def test_int64_uint64_broadcast_exceptions(self, endpoint): configs = {np.uint64: ((0, 2**65), (-1, 2**62), (10, 9), (0, 0)), np.int64: ((0, 2**64), (-(2**64), 2**62), (10, 9), (0, 0), diff --git a/numpy/random/tests/test_randomstate.py b/numpy/random/tests/test_randomstate.py index ebe8558ba16d..9c2128529d06 100644 --- a/numpy/random/tests/test_randomstate.py +++ b/numpy/random/tests/test_randomstate.py @@ -350,6 +350,28 @@ def test_repeatability(self): res = hashlib.md5(val).hexdigest() assert_(tgt[np.dtype(bool).name] == res) + def test_repeatability_32bit_boundary_broadcasting(self): + desired = np.array([[[3992670689, 2438360420, 2557845020], + [4107320065, 4142558326, 3216529513], + [1605979228, 2807061240, 665605495]], + [[3211410639, 4128781000, 457175120], + [1712592594, 1282922662, 3081439808], + [3997822960, 2008322436, 1563495165]], + [[1398375547, 4269260146, 115316740], + [3414372578, 3437564012, 2112038651], + [3572980305, 2260248732, 3908238631]], + [[2561372503, 223155946, 3127879445], + [ 441282060, 3514786552, 2148440361], + [1629275283, 3479737011, 3003195987]], + [[ 412181688, 940383289, 3047321305], + [2978368172, 764731833, 2282559898], + [ 105711276, 720447391, 3596512484]]]) + for size in [None, (5, 3, 3)]: + random.seed(12345) + x = self.rfunc([[-1], [0], [1]], [2**32 - 1, 2**32, 2**32 + 1], + size=size) + assert_array_equal(x, desired if size is not None else desired[0]) + def test_int64_uint64_corner_case(self): # When stored in Numpy arrays, `lbnd` is casted # as np.int64, and `ubnd` is casted as np.uint64. From 621efc7718d3676c390bd673c0f8b53bcb4fc308 Mon Sep 17 00:00:00 2001 From: Warren Weckesser Date: Mon, 27 Apr 2020 13:48:04 -0400 Subject: [PATCH 4/4] TST: random: Skip a test if integers are 32 bit. --- numpy/random/tests/test_randomstate.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/numpy/random/tests/test_randomstate.py b/numpy/random/tests/test_randomstate.py index 9c2128529d06..edd7811bf5c7 100644 --- a/numpy/random/tests/test_randomstate.py +++ b/numpy/random/tests/test_randomstate.py @@ -350,6 +350,8 @@ def test_repeatability(self): res = hashlib.md5(val).hexdigest() assert_(tgt[np.dtype(bool).name] == res) + @pytest.mark.skipif(np.iinfo('l').max < 2**32, + reason='Cannot test with 32-bit C long') def test_repeatability_32bit_boundary_broadcasting(self): desired = np.array([[[3992670689, 2438360420, 2557845020], [4107320065, 4142558326, 3216529513],