@@ -3225,6 +3225,123 @@ math_prod_impl(PyObject *module, PyObject *iterable, PyObject *start)
32253225}
32263226
32273227
3228+ /* least significant 64 bits of the odd part of factorial(n), for n in range(128).
3229+
3230+ Python code to generate the values:
3231+
3232+ import math
3233+
3234+ for n in range(128):
3235+ fac = math.factorial(n)
3236+ fac_odd_part = fac // (fac & -fac)
3237+ reduced_fac_odd_part = fac_odd_part % (2**64)
3238+ print(f"{reduced_fac_odd_part:#018x}u")
3239+ */
3240+ static const uint64_t reduced_factorial_odd_part [] = {
3241+ 0x0000000000000001u , 0x0000000000000001u , 0x0000000000000001u , 0x0000000000000003u ,
3242+ 0x0000000000000003u , 0x000000000000000fu , 0x000000000000002du , 0x000000000000013bu ,
3243+ 0x000000000000013bu , 0x0000000000000b13u , 0x000000000000375fu , 0x0000000000026115u ,
3244+ 0x000000000007233fu , 0x00000000005cca33u , 0x0000000002898765u , 0x00000000260eeeebu ,
3245+ 0x00000000260eeeebu , 0x0000000286fddd9bu , 0x00000016beecca73u , 0x000001b02b930689u ,
3246+ 0x00000870d9df20adu , 0x0000b141df4dae31u , 0x00079dd498567c1bu , 0x00af2e19afc5266du ,
3247+ 0x020d8a4d0f4f7347u , 0x335281867ec241efu , 0x9b3093d46fdd5923u , 0x5e1f9767cc5866b1u ,
3248+ 0x92dd23d6966aced7u , 0xa30d0f4f0a196e5bu , 0x8dc3e5a1977d7755u , 0x2ab8ce915831734bu ,
3249+ 0x2ab8ce915831734bu , 0x81d2a0bc5e5fdcabu , 0x9efcac82445da75bu , 0xbc8b95cf58cde171u ,
3250+ 0xa0e8444a1f3cecf9u , 0x4191deb683ce3ffdu , 0xddd3878bc84ebfc7u , 0xcb39a64b83ff3751u ,
3251+ 0xf8203f7993fc1495u , 0xbd2a2a78b35f4bddu , 0x84757be6b6d13921u , 0x3fbbcfc0b524988bu ,
3252+ 0xbd11ed47c8928df9u , 0x3c26b59e41c2f4c5u , 0x677a5137e883fdb3u , 0xff74e943b03b93ddu ,
3253+ 0xfe5ebbcb10b2bb97u , 0xb021f1de3235e7e7u , 0x33509eb2e743a58fu , 0x390f9da41279fb7du ,
3254+ 0xe5cb0154f031c559u , 0x93074695ba4ddb6du , 0x81c471caa636247fu , 0xe1347289b5a1d749u ,
3255+ 0x286f21c3f76ce2ffu , 0x00be84a2173e8ac7u , 0x1595065ca215b88bu , 0xf95877595b018809u ,
3256+ 0x9c2efe3c5516f887u , 0x373294604679382bu , 0xaf1ff7a888adcd35u , 0x18ddf279a2c5800bu ,
3257+ 0x18ddf279a2c5800bu , 0x505a90e2542582cbu , 0x5bacad2cd8d5dc2bu , 0xfe3152bcbff89f41u ,
3258+ 0xe1467e88bf829351u , 0xb8001adb9e31b4d5u , 0x2803ac06a0cbb91fu , 0x1904b5d698805799u ,
3259+ 0xe12a648b5c831461u , 0x3516abbd6160cfa9u , 0xac46d25f12fe036du , 0x78bfa1da906b00efu ,
3260+ 0xf6390338b7f111bdu , 0x0f25f80f538255d9u , 0x4ec8ca55b8db140fu , 0x4ff670740b9b30a1u ,
3261+ 0x8fd032443a07f325u , 0x80dfe7965c83eeb5u , 0xa3dc1714d1213afdu , 0x205b7bbfcdc62007u ,
3262+ 0xa78126bbe140a093u , 0x9de1dc61ca7550cfu , 0x84f0046d01b492c5u , 0x2d91810b945de0f3u ,
3263+ 0xf5408b7f6008aa71u , 0x43707f4863034149u , 0xdac65fb9679279d5u , 0xc48406e7d1114eb7u ,
3264+ 0xa7dc9ed3c88e1271u , 0xfb25b2efdb9cb30du , 0x1bebda0951c4df63u , 0x5c85e975580ee5bdu ,
3265+ 0x1591bc60082cb137u , 0x2c38606318ef25d7u , 0x76ca72f7c5c63e27u , 0xf04a75d17baa0915u ,
3266+ 0x77458175139ae30du , 0x0e6c1330bc1b9421u , 0xdf87d2b5797e8293u , 0xefa5c703e1e68925u ,
3267+ 0x2b6b1b3278b4f6e1u , 0xceee27b382394249u , 0xd74e3829f5dab91du , 0xfdb17989c26b5f1fu ,
3268+ 0xc1b7d18781530845u , 0x7b4436b2105a8561u , 0x7ba7c0418372a7d7u , 0x9dbc5c67feb6c639u ,
3269+ 0x502686d7f6ff6b8fu , 0x6101855406be7a1fu , 0x9956afb5806930e7u , 0xe1f0ee88af40f7c5u ,
3270+ 0x984b057bda5c1151u , 0x9a49819acc13ea05u , 0x8ef0dead0896ef27u , 0x71f7826efe292b21u ,
3271+ 0xad80a480e46986efu , 0x01cdc0ebf5e0c6f7u , 0x6e06f839968f68dbu , 0xdd5943ab56e76139u ,
3272+ 0xcdcf31bf8604c5e7u , 0x7e2b4a847054a1cbu , 0x0ca75697a4d3d0f5u , 0x4703f53ac514a98bu ,
3273+ };
3274+
3275+ /* inverses of reduced_factorial_odd_part values modulo 2**64.
3276+
3277+ Python code to generate the values:
3278+
3279+ import math
3280+
3281+ for n in range(128):
3282+ fac = math.factorial(n)
3283+ fac_odd_part = fac // (fac & -fac)
3284+ inverted_fac_odd_part = pow(fac_odd_part, -1, 2**64)
3285+ print(f"{inverted_fac_odd_part:#018x}u")
3286+ */
3287+ static const uint64_t inverted_factorial_odd_part [] = {
3288+ 0x0000000000000001u , 0x0000000000000001u , 0x0000000000000001u , 0xaaaaaaaaaaaaaaabu ,
3289+ 0xaaaaaaaaaaaaaaabu , 0xeeeeeeeeeeeeeeefu , 0x4fa4fa4fa4fa4fa5u , 0x2ff2ff2ff2ff2ff3u ,
3290+ 0x2ff2ff2ff2ff2ff3u , 0x938cc70553e3771bu , 0xb71c27cddd93e49fu , 0xb38e3229fcdee63du ,
3291+ 0xe684bb63544a4cbfu , 0xc2f684917ca340fbu , 0xf747c9cba417526du , 0xbb26eb51d7bd49c3u ,
3292+ 0xbb26eb51d7bd49c3u , 0xb0a7efb985294093u , 0xbe4b8c69f259eabbu , 0x6854d17ed6dc4fb9u ,
3293+ 0xe1aa904c915f4325u , 0x3b8206df131cead1u , 0x79c6009fea76fe13u , 0xd8c5d381633cd365u ,
3294+ 0x4841f12b21144677u , 0x4a91ff68200b0d0fu , 0x8f9513a58c4f9e8bu , 0x2b3e690621a42251u ,
3295+ 0x4f520f00e03c04e7u , 0x2edf84ee600211d3u , 0xadcaa2764aaacdfdu , 0x161f4f9033f4fe63u ,
3296+ 0x161f4f9033f4fe63u , 0xbada2932ea4d3e03u , 0xcec189f3efaa30d3u , 0xf7475bb68330bf91u ,
3297+ 0x37eb7bf7d5b01549u , 0x46b35660a4e91555u , 0xa567c12d81f151f7u , 0x4c724007bb2071b1u ,
3298+ 0x0f4a0cce58a016bdu , 0xfa21068e66106475u , 0x244ab72b5a318ae1u , 0x366ce67e080d0f23u ,
3299+ 0xd666fdae5dd2a449u , 0xd740ddd0acc06a0du , 0xb050bbbb28e6f97bu , 0x70b003fe890a5c75u ,
3300+ 0xd03aabff83037427u , 0x13ec4ca72c783bd7u , 0x90282c06afdbd96fu , 0x4414ddb9db4a95d5u ,
3301+ 0xa2c68735ae6832e9u , 0xbf72d71455676665u , 0xa8469fab6b759b7fu , 0xc1e55b56e606caf9u ,
3302+ 0x40455630fc4a1cffu , 0x0120a7b0046d16f7u , 0xa7c3553b08faef23u , 0x9f0bfd1b08d48639u ,
3303+ 0xa433ffce9a304d37u , 0xa22ad1d53915c683u , 0xcb6cbc723ba5dd1du , 0x547fb1b8ab9d0ba3u ,
3304+ 0x547fb1b8ab9d0ba3u , 0x8f15a826498852e3u , 0x32e1a03f38880283u , 0x3de4cce63283f0c1u ,
3305+ 0x5dfe6667e4da95b1u , 0xfda6eeeef479e47du , 0xf14de991cc7882dfu , 0xe68db79247630ca9u ,
3306+ 0xa7d6db8207ee8fa1u , 0x255e1f0fcf034499u , 0xc9a8990e43dd7e65u , 0x3279b6f289702e0fu ,
3307+ 0xe7b5905d9b71b195u , 0x03025ba41ff0da69u , 0xb7df3d6d3be55aefu , 0xf89b212ebff2b361u ,
3308+ 0xfe856d095996f0adu , 0xd6e533e9fdf20f9du , 0xf8c0e84a63da3255u , 0xa677876cd91b4db7u ,
3309+ 0x07ed4f97780d7d9bu , 0x90a8705f258db62fu , 0xa41bbb2be31b1c0du , 0x6ec28690b038383bu ,
3310+ 0xdb860c3bb2edd691u , 0x0838286838a980f9u , 0x558417a74b36f77du , 0x71779afc3646ef07u ,
3311+ 0x743cda377ccb6e91u , 0x7fdf9f3fe89153c5u , 0xdc97d25df49b9a4bu , 0x76321a778eb37d95u ,
3312+ 0x7cbb5e27da3bd487u , 0x9cff4ade1a009de7u , 0x70eb166d05c15197u , 0xdcf0460b71d5fe3du ,
3313+ 0x5ac1ee5260b6a3c5u , 0xc922dedfdd78efe1u , 0xe5d381dc3b8eeb9bu , 0xd57e5347bafc6aadu ,
3314+ 0x86939040983acd21u , 0x395b9d69740a4ff9u , 0x1467299c8e43d135u , 0x5fe440fcad975cdfu ,
3315+ 0xcaa9a39794a6ca8du , 0xf61dbd640868dea1u , 0xac09d98d74843be7u , 0x2b103b9e1a6b4809u ,
3316+ 0x2ab92d16960f536fu , 0x6653323d5e3681dfu , 0xefd48c1c0624e2d7u , 0xa496fefe04816f0du ,
3317+ 0x1754a7b07bbdd7b1u , 0x23353c829a3852cdu , 0xbf831261abd59097u , 0x57a8e656df0618e1u ,
3318+ 0x16e9206c3100680fu , 0xadad4c6ee921dac7u , 0x635f2b3860265353u , 0xdd6d0059f44b3d09u ,
3319+ 0xac4dd6b894447dd7u , 0x42ea183eeaa87be3u , 0x15612d1550ee5b5du , 0x226fa19d656cb623u ,
3320+ };
3321+
3322+ /* exponent of the largest power of 2 dividing factorial(n), for n in range(68)
3323+
3324+ Python code to generate the values:
3325+
3326+ import math
3327+
3328+ for n in range(128):
3329+ fac = math.factorial(n)
3330+ fac_trailing_zeros = (fac & -fac).bit_length() - 1
3331+ print(fac_trailing_zeros)
3332+ */
3333+
3334+ static const uint8_t factorial_trailing_zeros [] = {
3335+ 0 , 0 , 1 , 1 , 3 , 3 , 4 , 4 , 7 , 7 , 8 , 8 , 10 , 10 , 11 , 11 , // 0-15
3336+ 15 , 15 , 16 , 16 , 18 , 18 , 19 , 19 , 22 , 22 , 23 , 23 , 25 , 25 , 26 , 26 , // 16-31
3337+ 31 , 31 , 32 , 32 , 34 , 34 , 35 , 35 , 38 , 38 , 39 , 39 , 41 , 41 , 42 , 42 , // 32-47
3338+ 46 , 46 , 47 , 47 , 49 , 49 , 50 , 50 , 53 , 53 , 54 , 54 , 56 , 56 , 57 , 57 , // 48-63
3339+ 63 , 63 , 64 , 64 , 66 , 66 , 67 , 67 , 70 , 70 , 71 , 71 , 73 , 73 , 74 , 74 , // 64-79
3340+ 78 , 78 , 79 , 79 , 81 , 81 , 82 , 82 , 85 , 85 , 86 , 86 , 88 , 88 , 89 , 89 , // 80-95
3341+ 94 , 94 , 95 , 95 , 97 , 97 , 98 , 98 , 101 , 101 , 102 , 102 , 104 , 104 , 105 , 105 , // 96-111
3342+ 109 , 109 , 110 , 110 , 112 , 112 , 113 , 113 , 116 , 116 , 117 , 117 , 119 , 119 , 120 , 120 , // 112-127
3343+ };
3344+
32283345/* Number of permutations and combinations.
32293346 * P(n, k) = n! / (n-k)!
32303347 * C(n, k) = P(n, k) / k!
@@ -3234,48 +3351,93 @@ math_prod_impl(PyObject *module, PyObject *iterable, PyObject *start)
32343351static PyObject *
32353352perm_comb_small (unsigned long long n , unsigned long long k , int iscomb )
32363353{
3237- /* long long is at least 64 bit */
3238- static const unsigned long long fast_comb_limits [] = {
3239- 0 , ULLONG_MAX , 4294967296ULL , 3329022 , 102570 , 13467 , 3612 , 1449 , // 0-7
3240- 746 , 453 , 308 , 227 , 178 , 147 , 125 , 110 , // 8-15
3241- 99 , 90 , 84 , 79 , 75 , 72 , 69 , 68 , // 16-23
3242- 66 , 65 , 64 , 63 , 63 , 62 , 62 , 62 , // 24-31
3243- };
3244- static const unsigned long long fast_perm_limits [] = {
3245- 0 , ULLONG_MAX , 4294967296ULL , 2642246 , 65537 , 7133 , 1627 , 568 , // 0-7
3246- 259 , 142 , 88 , 61 , 45 , 36 , 30 , // 8-14
3247- };
3248-
32493354 if (k == 0 ) {
32503355 return PyLong_FromLong (1 );
32513356 }
32523357
32533358 /* For small enough n and k the result fits in the 64-bit range and can
32543359 * be calculated without allocating intermediate PyLong objects. */
3255- if (iscomb
3256- ? (k < Py_ARRAY_LENGTH (fast_comb_limits )
3257- && n <= fast_comb_limits [k ])
3258- : (k < Py_ARRAY_LENGTH (fast_perm_limits )
3259- && n <= fast_perm_limits [k ]))
3260- {
3261- unsigned long long result = n ;
3262- if (iscomb ) {
3360+ if (iscomb ) {
3361+ /* Maps k to the maximal n so that 2*k-1 <= n <= 127 and C(n, k)
3362+ * fits into a uint64_t. Exclude k = 1, because the second fast
3363+ * path is faster for this case.*/
3364+ static const unsigned char fast_comb_limits1 [] = {
3365+ 0 , 0 , 127 , 127 , 127 , 127 , 127 , 127 , // 0-7
3366+ 127 , 127 , 127 , 127 , 127 , 127 , 127 , 127 , // 8-15
3367+ 116 , 105 , 97 , 91 , 86 , 82 , 78 , 76 , // 16-23
3368+ 74 , 72 , 71 , 70 , 69 , 68 , 68 , 67 , // 24-31
3369+ 67 , 67 , 67 , // 32-34
3370+ };
3371+ if (k < Py_ARRAY_LENGTH (fast_comb_limits1 ) && n <= fast_comb_limits1 [k ]) {
3372+ /*
3373+ comb(n, k) fits into a uint64_t. We compute it as
3374+
3375+ comb_odd_part << shift
3376+
3377+ where 2**shift is the largest power of two dividing comb(n, k)
3378+ and comb_odd_part is comb(n, k) >> shift. comb_odd_part can be
3379+ calculated efficiently via arithmetic modulo 2**64, using three
3380+ lookups and two uint64_t multiplications.
3381+ */
3382+ uint64_t comb_odd_part = reduced_factorial_odd_part [n ]
3383+ * inverted_factorial_odd_part [k ]
3384+ * inverted_factorial_odd_part [n - k ];
3385+ int shift = factorial_trailing_zeros [n ]
3386+ - factorial_trailing_zeros [k ]
3387+ - factorial_trailing_zeros [n - k ];
3388+ return PyLong_FromUnsignedLongLong (comb_odd_part << shift );
3389+ }
3390+
3391+ /* Maps k to the maximal n so that 2*k-1 <= n <= 127 and C(n, k)*k
3392+ * fits into a long long (which is at least 64 bit). Only contains
3393+ * items larger than in fast_comb_limits1. */
3394+ static const unsigned long long fast_comb_limits2 [] = {
3395+ 0 , ULLONG_MAX , 4294967296ULL , 3329022 , 102570 , 13467 , 3612 , 1449 , // 0-7
3396+ 746 , 453 , 308 , 227 , 178 , 147 , // 8-13
3397+ };
3398+ if (k < Py_ARRAY_LENGTH (fast_comb_limits2 ) && n <= fast_comb_limits2 [k ]) {
3399+ /* C(n, k) = C(n, k-1) * (n-k+1) / k */
3400+ unsigned long long result = n ;
32633401 for (unsigned long long i = 1 ; i < k ;) {
32643402 result *= -- n ;
32653403 result /= ++ i ;
32663404 }
3405+ return PyLong_FromUnsignedLongLong (result );
32673406 }
3268- else {
3407+ }
3408+ else {
3409+ /* Maps k to the maximal n so that k <= n and P(n, k)
3410+ * fits into a long long (which is at least 64 bit). */
3411+ static const unsigned long long fast_perm_limits [] = {
3412+ 0 , ULLONG_MAX , 4294967296ULL , 2642246 , 65537 , 7133 , 1627 , 568 , // 0-7
3413+ 259 , 142 , 88 , 61 , 45 , 36 , 30 , 26 , // 8-15
3414+ 24 , 22 , 21 , 20 , 20 , // 16-20
3415+ };
3416+ if (k < Py_ARRAY_LENGTH (fast_perm_limits ) && n <= fast_perm_limits [k ]) {
3417+ if (n <= 127 ) {
3418+ /* P(n, k) fits into a uint64_t. */
3419+ uint64_t perm_odd_part = reduced_factorial_odd_part [n ]
3420+ * inverted_factorial_odd_part [n - k ];
3421+ int shift = factorial_trailing_zeros [n ]
3422+ - factorial_trailing_zeros [n - k ];
3423+ return PyLong_FromUnsignedLongLong (perm_odd_part << shift );
3424+ }
3425+
3426+ /* P(n, k) = P(n, k-1) * (n-k+1) */
3427+ unsigned long long result = n ;
32693428 for (unsigned long long i = 1 ; i < k ;) {
32703429 result *= -- n ;
32713430 ++ i ;
32723431 }
3432+ return PyLong_FromUnsignedLongLong (result );
32733433 }
3274- return PyLong_FromUnsignedLongLong (result );
32753434 }
32763435
3277- /* For larger n use recursive formula. */
3278- /* C(n, k) = C(n, j) * C(n-j, k-j) // C(k, j) */
3436+ /* For larger n use recursive formulas:
3437+ *
3438+ * P(n, k) = P(n, j) * P(n-j, k-j)
3439+ * C(n, k) = C(n, j) * C(n-j, k-j) // C(k, j)
3440+ */
32793441 unsigned long long j = k / 2 ;
32803442 PyObject * a , * b ;
32813443 a = perm_comb_small (n , j , iscomb );
@@ -3450,90 +3612,6 @@ math_perm_impl(PyObject *module, PyObject *n, PyObject *k)
34503612 return NULL ;
34513613}
34523614
3453- /* least significant 64 bits of the odd part of factorial(n), for n in range(68).
3454-
3455- Python code to generate the values:
3456-
3457- import math
3458-
3459- for n in range(68):
3460- fac = math.factorial(n)
3461- fac_odd_part = fac // (fac & -fac)
3462- reduced_fac_odd_part = fac_odd_part % (2**64)
3463- print(f"{reduced_fac_odd_part:#018x}u")
3464- */
3465- static const uint64_t reduced_factorial_odd_part [] = {
3466- 0x0000000000000001u , 0x0000000000000001u , 0x0000000000000001u , 0x0000000000000003u ,
3467- 0x0000000000000003u , 0x000000000000000fu , 0x000000000000002du , 0x000000000000013bu ,
3468- 0x000000000000013bu , 0x0000000000000b13u , 0x000000000000375fu , 0x0000000000026115u ,
3469- 0x000000000007233fu , 0x00000000005cca33u , 0x0000000002898765u , 0x00000000260eeeebu ,
3470- 0x00000000260eeeebu , 0x0000000286fddd9bu , 0x00000016beecca73u , 0x000001b02b930689u ,
3471- 0x00000870d9df20adu , 0x0000b141df4dae31u , 0x00079dd498567c1bu , 0x00af2e19afc5266du ,
3472- 0x020d8a4d0f4f7347u , 0x335281867ec241efu , 0x9b3093d46fdd5923u , 0x5e1f9767cc5866b1u ,
3473- 0x92dd23d6966aced7u , 0xa30d0f4f0a196e5bu , 0x8dc3e5a1977d7755u , 0x2ab8ce915831734bu ,
3474- 0x2ab8ce915831734bu , 0x81d2a0bc5e5fdcabu , 0x9efcac82445da75bu , 0xbc8b95cf58cde171u ,
3475- 0xa0e8444a1f3cecf9u , 0x4191deb683ce3ffdu , 0xddd3878bc84ebfc7u , 0xcb39a64b83ff3751u ,
3476- 0xf8203f7993fc1495u , 0xbd2a2a78b35f4bddu , 0x84757be6b6d13921u , 0x3fbbcfc0b524988bu ,
3477- 0xbd11ed47c8928df9u , 0x3c26b59e41c2f4c5u , 0x677a5137e883fdb3u , 0xff74e943b03b93ddu ,
3478- 0xfe5ebbcb10b2bb97u , 0xb021f1de3235e7e7u , 0x33509eb2e743a58fu , 0x390f9da41279fb7du ,
3479- 0xe5cb0154f031c559u , 0x93074695ba4ddb6du , 0x81c471caa636247fu , 0xe1347289b5a1d749u ,
3480- 0x286f21c3f76ce2ffu , 0x00be84a2173e8ac7u , 0x1595065ca215b88bu , 0xf95877595b018809u ,
3481- 0x9c2efe3c5516f887u , 0x373294604679382bu , 0xaf1ff7a888adcd35u , 0x18ddf279a2c5800bu ,
3482- 0x18ddf279a2c5800bu , 0x505a90e2542582cbu , 0x5bacad2cd8d5dc2bu , 0xfe3152bcbff89f41u ,
3483- };
3484-
3485- /* inverses of reduced_factorial_odd_part values modulo 2**64.
3486-
3487- Python code to generate the values:
3488-
3489- import math
3490-
3491- for n in range(68):
3492- fac = math.factorial(n)
3493- fac_odd_part = fac // (fac & -fac)
3494- inverted_fac_odd_part = pow(fac_odd_part, -1, 2**64)
3495- print(f"{inverted_fac_odd_part:#018x}u")
3496- */
3497- static const uint64_t inverted_factorial_odd_part [] = {
3498- 0x0000000000000001u , 0x0000000000000001u , 0x0000000000000001u , 0xaaaaaaaaaaaaaaabu ,
3499- 0xaaaaaaaaaaaaaaabu , 0xeeeeeeeeeeeeeeefu , 0x4fa4fa4fa4fa4fa5u , 0x2ff2ff2ff2ff2ff3u ,
3500- 0x2ff2ff2ff2ff2ff3u , 0x938cc70553e3771bu , 0xb71c27cddd93e49fu , 0xb38e3229fcdee63du ,
3501- 0xe684bb63544a4cbfu , 0xc2f684917ca340fbu , 0xf747c9cba417526du , 0xbb26eb51d7bd49c3u ,
3502- 0xbb26eb51d7bd49c3u , 0xb0a7efb985294093u , 0xbe4b8c69f259eabbu , 0x6854d17ed6dc4fb9u ,
3503- 0xe1aa904c915f4325u , 0x3b8206df131cead1u , 0x79c6009fea76fe13u , 0xd8c5d381633cd365u ,
3504- 0x4841f12b21144677u , 0x4a91ff68200b0d0fu , 0x8f9513a58c4f9e8bu , 0x2b3e690621a42251u ,
3505- 0x4f520f00e03c04e7u , 0x2edf84ee600211d3u , 0xadcaa2764aaacdfdu , 0x161f4f9033f4fe63u ,
3506- 0x161f4f9033f4fe63u , 0xbada2932ea4d3e03u , 0xcec189f3efaa30d3u , 0xf7475bb68330bf91u ,
3507- 0x37eb7bf7d5b01549u , 0x46b35660a4e91555u , 0xa567c12d81f151f7u , 0x4c724007bb2071b1u ,
3508- 0x0f4a0cce58a016bdu , 0xfa21068e66106475u , 0x244ab72b5a318ae1u , 0x366ce67e080d0f23u ,
3509- 0xd666fdae5dd2a449u , 0xd740ddd0acc06a0du , 0xb050bbbb28e6f97bu , 0x70b003fe890a5c75u ,
3510- 0xd03aabff83037427u , 0x13ec4ca72c783bd7u , 0x90282c06afdbd96fu , 0x4414ddb9db4a95d5u ,
3511- 0xa2c68735ae6832e9u , 0xbf72d71455676665u , 0xa8469fab6b759b7fu , 0xc1e55b56e606caf9u ,
3512- 0x40455630fc4a1cffu , 0x0120a7b0046d16f7u , 0xa7c3553b08faef23u , 0x9f0bfd1b08d48639u ,
3513- 0xa433ffce9a304d37u , 0xa22ad1d53915c683u , 0xcb6cbc723ba5dd1du , 0x547fb1b8ab9d0ba3u ,
3514- 0x547fb1b8ab9d0ba3u , 0x8f15a826498852e3u , 0x32e1a03f38880283u , 0x3de4cce63283f0c1u ,
3515- };
3516-
3517- /* exponent of the largest power of 2 dividing factorial(n), for n in range(68)
3518-
3519- Python code to generate the values:
3520-
3521- import math
3522-
3523- for n in range(68):
3524- fac = math.factorial(n)
3525- fac_trailing_zeros = (fac & -fac).bit_length() - 1
3526- print(fac_trailing_zeros)
3527- */
3528-
3529- static const uint8_t factorial_trailing_zeros [] = {
3530- 0 , 0 , 1 , 1 , 3 , 3 , 4 , 4 , 7 , 7 , 8 , 8 , 10 , 10 , 11 , 11 , // 0-15
3531- 15 , 15 , 16 , 16 , 18 , 18 , 19 , 19 , 22 , 22 , 23 , 23 , 25 , 25 , 26 , 26 , // 16-31
3532- 31 , 31 , 32 , 32 , 34 , 34 , 35 , 35 , 38 , 38 , 39 , 39 , 41 , 41 , 42 , 42 , // 32-47
3533- 46 , 46 , 47 , 47 , 49 , 49 , 50 , 50 , 53 , 53 , 54 , 54 , 56 , 56 , 57 , 57 , // 48-63
3534- 63 , 63 , 64 , 64 , // 64-67
3535- };
3536-
35373615/*[clinic input]
35383616math.comb
35393617
@@ -3597,28 +3675,6 @@ math_comb_impl(PyObject *module, PyObject *n, PyObject *k)
35973675 }
35983676 assert (ki >= 0 );
35993677
3600- if (ni <= 67 ) {
3601- /*
3602- For 0 <= k <= n <= 67, comb(n, k) always fits into a uint64_t.
3603- We compute it as
3604-
3605- comb_odd_part << shift
3606-
3607- where 2**shift is the largest power of two dividing comb(n, k)
3608- and comb_odd_part is comb(n, k) >> shift. comb_odd_part can be
3609- calculated efficiently via arithmetic modulo 2**64, using three
3610- lookups and two uint64_t multiplications.
3611- */
3612- uint64_t comb_odd_part = reduced_factorial_odd_part [ni ]
3613- * inverted_factorial_odd_part [ki ]
3614- * inverted_factorial_odd_part [ni - ki ];
3615- int shift = factorial_trailing_zeros [ni ]
3616- - factorial_trailing_zeros [ki ]
3617- - factorial_trailing_zeros [ni - ki ];
3618- result = PyLong_FromUnsignedLongLong (comb_odd_part << shift );
3619- goto done ;
3620- }
3621-
36223678 ki = Py_MIN (ki , ni - ki );
36233679 if (ki > 1 ) {
36243680 result = perm_comb_small ((unsigned long long )ni ,
0 commit comments