Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit e0b2309

Browse files
committed
Issues #26289 and #26315: Optimize floor/modulo div for single-digit longs
Microbenchmarks show 2-2.5x improvement. Built-in 'divmod' function is now also ~10% faster. -m timeit -s "x=22331" "x//2;x//-3;x//4;x//5;x//-6;x//7;x//8;x//-99;x//100;" with patch: 0.321 without patch: 0.633 -m timeit -s "x=22331" "x%2;x%3;x%-4;x%5;x%6;x%-7;x%8;x%99;x%-100;" with patch: 0.224 without patch: 0.66 Big thanks to Serhiy Storchaka, Mark Dickinson and Victor Stinner for thorow code reviews and algorithms improvements.
1 parent 2da89d7 commit e0b2309

3 files changed

Lines changed: 116 additions & 0 deletions

File tree

Lib/test/test_long.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -689,6 +689,20 @@ def test_nan_inf(self):
689689
self.assertRaises(OverflowError, int, float('-inf'))
690690
self.assertRaises(ValueError, int, float('nan'))
691691

692+
def test_mod_division(self):
693+
with self.assertRaises(ZeroDivisionError):
694+
_ = 1 % 0
695+
696+
self.assertEqual(13 % 10, 3)
697+
self.assertEqual(-13 % 10, 7)
698+
self.assertEqual(13 % -10, -7)
699+
self.assertEqual(-13 % -10, -3)
700+
701+
self.assertEqual(12 % 4, 0)
702+
self.assertEqual(-12 % 4, 0)
703+
self.assertEqual(12 % -4, 0)
704+
self.assertEqual(-12 % -4, 0)
705+
692706
def test_true_division(self):
693707
huge = 1 << 40000
694708
mhuge = -huge
@@ -723,6 +737,25 @@ def test_true_division(self):
723737
for zero in ["huge / 0", "mhuge / 0"]:
724738
self.assertRaises(ZeroDivisionError, eval, zero, namespace)
725739

740+
def test_floordiv(self):
741+
with self.assertRaises(ZeroDivisionError):
742+
_ = 1 // 0
743+
744+
self.assertEqual(2 // 3, 0)
745+
self.assertEqual(2 // -3, -1)
746+
self.assertEqual(-2 // 3, -1)
747+
self.assertEqual(-2 // -3, 0)
748+
749+
self.assertEqual(-11 // -3, 3)
750+
self.assertEqual(-11 // 3, -4)
751+
self.assertEqual(11 // -3, -4)
752+
self.assertEqual(11 // 3, 3)
753+
754+
self.assertEqual(-12 // -3, 4)
755+
self.assertEqual(-12 // 3, -4)
756+
self.assertEqual(12 // -3, -4)
757+
self.assertEqual(12 // 3, 4)
758+
726759
def check_truediv(self, a, b, skip_small=True):
727760
"""Verify that the result of a/b is correctly rounded, by
728761
comparing it with a pure Python implementation of correctly

Misc/NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,10 @@ Core and Builtins
176176

177177
- Issue #26288: Optimize PyLong_AsDouble.
178178

179+
- Issues #26289 and #26315: Optimize floor and modulo division for
180+
single-digit longs. Microbenchmarks show 2-2.5x improvement. Built-in
181+
'divmod' function is now also ~10% faster.
182+
179183
Library
180184
-------
181185

Objects/longobject.c

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3502,6 +3502,52 @@ long_mul(PyLongObject *a, PyLongObject *b)
35023502
return (PyObject *)z;
35033503
}
35043504

3505+
/* Fast modulo division for single-digit longs. */
3506+
static PyObject *
3507+
fast_mod(PyLongObject *a, PyLongObject *b)
3508+
{
3509+
sdigit left = a->ob_digit[0];
3510+
sdigit right = b->ob_digit[0];
3511+
sdigit mod;
3512+
3513+
assert(Py_ABS(Py_SIZE(a)) == 1);
3514+
assert(Py_ABS(Py_SIZE(b)) == 1);
3515+
3516+
if (Py_SIZE(a) == Py_SIZE(b)) {
3517+
/* 'a' and 'b' have the same sign. */
3518+
mod = left % right;
3519+
}
3520+
else {
3521+
/* Either 'a' or 'b' is negative. */
3522+
mod = right - 1 - (left - 1) % right;
3523+
}
3524+
3525+
return PyLong_FromLong(mod * Py_SIZE(b));
3526+
}
3527+
3528+
/* Fast floor division for single-digit longs. */
3529+
static PyObject *
3530+
fast_floor_div(PyLongObject *a, PyLongObject *b)
3531+
{
3532+
sdigit left = a->ob_digit[0];
3533+
sdigit right = b->ob_digit[0];
3534+
sdigit div;
3535+
3536+
assert(Py_ABS(Py_SIZE(a)) == 1);
3537+
assert(Py_ABS(Py_SIZE(b)) == 1);
3538+
3539+
if (Py_SIZE(a) == Py_SIZE(b)) {
3540+
/* 'a' and 'b' have the same sign. */
3541+
div = left / right;
3542+
}
3543+
else {
3544+
/* Either 'a' or 'b' is negative. */
3545+
div = -1 - (left - 1) / right;
3546+
}
3547+
3548+
return PyLong_FromLong(div);
3549+
}
3550+
35053551
/* The / and % operators are now defined in terms of divmod().
35063552
The expression a mod b has the value a - b*floor(a/b).
35073553
The long_divrem function gives the remainder after division of
@@ -3529,6 +3575,30 @@ l_divmod(PyLongObject *v, PyLongObject *w,
35293575
{
35303576
PyLongObject *div, *mod;
35313577

3578+
if (Py_ABS(Py_SIZE(v)) == 1 && Py_ABS(Py_SIZE(w)) == 1) {
3579+
/* Fast path for single-digit longs */
3580+
div = NULL;
3581+
if (pdiv != NULL) {
3582+
div = (PyLongObject *)fast_floor_div(v, w);
3583+
if (div == NULL) {
3584+
return -1;
3585+
}
3586+
}
3587+
if (pmod != NULL) {
3588+
mod = (PyLongObject *)fast_mod(v, w);
3589+
if (mod == NULL) {
3590+
Py_XDECREF(div);
3591+
return -1;
3592+
}
3593+
*pmod = mod;
3594+
}
3595+
if (pdiv != NULL) {
3596+
/* We only want to set `*pdiv` when `*pmod` is
3597+
set successfully. */
3598+
*pdiv = div;
3599+
}
3600+
return 0;
3601+
}
35323602
if (long_divrem(v, w, &div, &mod) < 0)
35333603
return -1;
35343604
if ((Py_SIZE(mod) < 0 && Py_SIZE(w) > 0) ||
@@ -3573,6 +3643,11 @@ long_div(PyObject *a, PyObject *b)
35733643
PyLongObject *div;
35743644

35753645
CHECK_BINOP(a, b);
3646+
3647+
if (Py_ABS(Py_SIZE(a)) == 1 && Py_ABS(Py_SIZE(b)) == 1) {
3648+
return fast_floor_div((PyLongObject*)a, (PyLongObject*)b);
3649+
}
3650+
35763651
if (l_divmod((PyLongObject*)a, (PyLongObject*)b, &div, NULL) < 0)
35773652
div = NULL;
35783653
return (PyObject *)div;
@@ -3848,6 +3923,10 @@ long_mod(PyObject *a, PyObject *b)
38483923

38493924
CHECK_BINOP(a, b);
38503925

3926+
if (Py_ABS(Py_SIZE(a)) == 1 && Py_ABS(Py_SIZE(b)) == 1) {
3927+
return fast_mod((PyLongObject*)a, (PyLongObject*)b);
3928+
}
3929+
38513930
if (l_divmod((PyLongObject*)a, (PyLongObject*)b, NULL, &mod) < 0)
38523931
mod = NULL;
38533932
return (PyObject *)mod;

0 commit comments

Comments
 (0)