python
diff --git a/‎Doc/library/sys.rst‎
Lines changed: 17 additions & 0 deletions b/‎Doc/library/sys.rst‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎Doc/whatsnew/3.1.rst‎
Lines changed: 23 additions & 0 deletions b/‎Doc/whatsnew/3.1.rst‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎Include/longintrepr.h‎
Lines changed: 53 additions & 15 deletions b/‎Include/longintrepr.h‎
Lines changed: 53 additions & 15 deletions
diff --git a/‎Include/longobject.h‎
Lines changed: 1 addition & 0 deletions b/‎Include/longobject.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎Include/pyport.h‎
Lines changed: 51 additions & 0 deletions b/‎Include/pyport.h‎
Lines changed: 51 additions & 0 deletions
diff --git a/‎Lib/test/test_long.py‎
Lines changed: 30 additions & 1 deletion b/‎Lib/test/test_long.py‎
Lines changed: 30 additions & 1 deletion
diff --git a/‎Lib/test/test_sys.py‎
Lines changed: 14 additions & 9 deletions b/‎Lib/test/test_sys.py‎
Lines changed: 14 additions & 9 deletions
diff --git a/‎Misc/NEWS‎
Lines changed: 7 additions & 0 deletions b/‎Misc/NEWS‎
Lines changed: 7 additions & 0 deletions
@@ -413,6 +413,23 @@ always available.
    same information.
 
 
+.. data:: int_info
+
+   A struct sequence that holds information about Python's
+   internal representation of integers.  The attributes are read only.
+
+   +-------------------------+----------------------------------------------+
+   | attribute               | explanation                                  |
+   +=========================+==============================================+
+   | :const:`bits_per_digit` | number of bits held in each digit.  Python   |
+   |                         | integers are stored internally in base       |
+   |                         | ``2**int_info.bits_per_digit``               |
+   +-------------------------+----------------------------------------------+
+   | :const:`sizeof_digit`   | size in bytes of the C type used to          |
+   |                         | represent a digit                            |
+   +-------------------------+----------------------------------------------+
+
+
 .. function:: intern(string)
 
    Enter *string* in the table of "interned" strings and return the interned string
 
@@ -87,5 +87,28 @@ Some smaller changes made to the core Python language are:
 
   (Contributed by Fredrik Johansson and Victor Stinner; :issue:`3439`.)
 
+* Integers are now stored internally either in base 2**15 or in base
+  2**30, the base being determined at build time.  Previously, they
+  were always stored in base 2**15.  Using base 2**30 gives
+  significant performance improvements on 64-bit machines, but
+  benchmark results on 32-bit machines have been mixed.  Therefore,
+  the default is to use base 2**30 on 64-bit machines and base 2**15
+  on 32-bit machines; on Unix, there's a new configure option
+  --enable-big-digits that can be used to override this default.
+
+  Apart from the performance improvements this change should be
+  invisible to end users, with one exception: for testing and
+  debugging purposes there's a new structseq ``sys.int_info`` that
+  provides information about the internal format, giving the number of
+  bits per digit and the size in bytes of the C type used to store
+  each digit::
+
+     >>> import sys
+     >>> sys.int_info
+     sys.int_info(bits_per_digit=30, sizeof_digit=4)
+
+
+  (Contributed by Mark Dickinson; :issue:`4258`.)
+
 
 .. ======================================================================
@@ -7,24 +7,62 @@ extern "C" {
 
 /* This is published for the benefit of "friend" marshal.c only. */
 
-/* Parameters of the long integer representation.
-   These shouldn't have to be changed as C should guarantee that a short
-   contains at least 16 bits, but it's made changeable anyway.
-   Note: 'digit' should be able to hold 2*MASK+1, and 'twodigits'
-   should be able to hold the intermediate results in 'mul'
-   (at most (BASE-1)*(2*BASE+1) == MASK*(2*MASK+3)).
-   Also, x_sub assumes that 'digit' is an unsigned type, and overflow
-   is handled by taking the result mod 2**N for some N > SHIFT.
-   And, at some places it is assumed that MASK fits in an int, as well.
-   long_pow() requires that SHIFT be divisible by 5. */
+/* Parameters of the long integer representation.  There are two different
+   sets of parameters: one set for 30-bit digits, stored in an unsigned 32-bit
+   integer type, and one set for 15-bit digits with each digit stored in an
+   unsigned short.  The value of PYLONG_BITS_IN_DIGIT, defined either at
+   configure time or in pyport.h, is used to decide which digit size to use.
 
-typedef unsigned short digit;
-typedef short sdigit;                   /* signed variant of digit */
-#define BASE_TWODIGITS_TYPE long
-typedef unsigned BASE_TWODIGITS_TYPE twodigits;
-typedef BASE_TWODIGITS_TYPE stwodigits; /* signed variant of twodigits */
+   Type 'digit' should be able to hold 2*PyLong_BASE-1, and type 'twodigits'
+   should be an unsigned integer type able to hold all integers up to
+   PyLong_BASE*PyLong_BASE-1.  x_sub assumes that 'digit' is an unsigned type,
+   and that overflow is handled by taking the result modulo 2**N for some N >
+   PyLong_SHIFT.  The majority of the code doesn't care about the precise
+   value of PyLong_SHIFT, but there are some notable exceptions:
+
+   - long_pow() requires that PyLong_SHIFT be divisible by 5
+
+   - PyLong_{As,From}ByteArray require that PyLong_SHIFT be at least 8
+
+   - long_hash() requires that PyLong_SHIFT is *strictly* less than the number
+     of bits in an unsigned long, as do the PyLong <-> long (or unsigned long)
+     conversion functions
+
+   - the long <-> size_t/Py_ssize_t conversion functions expect that
+     PyLong_SHIFT is strictly less than the number of bits in a size_t
+
+   - the marshal code currently expects that PyLong_SHIFT is a multiple of 15
+
+   - NSMALLNEGINTS and NSMALLPOSINTS should be small enough to fit in a single
+     digit; with the current values this forces PyLong_SHIFT >= 9
 
+  The values 15 and 30 should fit all of the above requirements, on any
+  platform.
+*/
+
+#if HAVE_STDINT_H
+#include <stdint.h>
+#endif
+
+#if PYLONG_BITS_IN_DIGIT == 30
+#if !(defined HAVE_UINT64_T && defined HAVE_UINT32_T &&          \
+      defined HAVE_INT64_T && defined HAVE_INT32_T)
+#error "30-bit long digits requested, but the necessary types are not available on this platform"
+#endif
+typedef PY_UINT32_T digit;
+typedef PY_INT32_T sdigit; /* signed variant of digit */
+typedef PY_UINT64_T twodigits;
+typedef PY_INT64_T stwodigits; /* signed variant of twodigits */
+#define PyLong_SHIFT	30
+#elif PYLONG_BITS_IN_DIGIT == 15
+typedef unsigned short digit;
+typedef short sdigit; /* signed variant of digit */
+typedef unsigned long twodigits;
+typedef long stwodigits; /* signed variant of twodigits */
 #define PyLong_SHIFT	15
+#else
+#error "PYLONG_BITS_IN_DIGIT should be 15 or 30"
+#endif
 #define PyLong_BASE	((digit)1 << PyLong_SHIFT)
 #define PyLong_MASK	((digit)(PyLong_BASE - 1))
 
 
@@ -26,6 +26,7 @@ PyAPI_FUNC(Py_ssize_t) PyLong_AsSsize_t(PyObject *);
 PyAPI_FUNC(size_t) PyLong_AsSize_t(PyObject *);
 PyAPI_FUNC(unsigned long) PyLong_AsUnsignedLong(PyObject *);
 PyAPI_FUNC(unsigned long) PyLong_AsUnsignedLongMask(PyObject *);
+PyAPI_FUNC(PyObject *) PyLong_GetInfo(void);
 
 /* It may be useful in the future. I've added it in the PyInt -> PyLong
    cleanup to keep the extra information. [CH] */
 
@@ -69,6 +69,57 @@ Used in:  PY_LONG_LONG
 #endif
 #endif /* HAVE_LONG_LONG */
 
+/* a build with 30-bit digits for Python long integers needs an exact-width
+ * 32-bit unsigned integer type to store those digits.  (We could just use
+ * type 'unsigned long', but that would be wasteful on a system where longs
+ * are 64-bits.)  On Unix systems, the autoconf macro AC_TYPE_UINT32_T defines
+ * uint32_t to be such a type unless stdint.h or inttypes.h defines uint32_t.
+ * However, it doesn't set HAVE_UINT32_T, so we do that here.
+ */
+#if (defined UINT32_MAX || defined uint32_t)
+#ifndef PY_UINT32_T
+#define HAVE_UINT32_T 1
+#define PY_UINT32_T uint32_t
+#endif
+#endif
+
+/* Macros for a 64-bit unsigned integer type; used for type 'twodigits' in the
+ * long integer implementation, when 30-bit digits are enabled.
+ */
+#if (defined UINT64_MAX || defined uint64_t)
+#ifndef PY_UINT64_T
+#define HAVE_UINT64_T 1
+#define PY_UINT64_T uint64_t
+#endif
+#endif
+
+/* Signed variants of the above */
+#if (defined INT32_MAX || defined int32_t)
+#ifndef PY_INT32_T
+#define HAVE_INT32_T 1
+#define PY_INT32_T int32_t
+#endif
+#endif
+#if (defined INT64_MAX || defined int64_t)
+#ifndef PY_INT64_T
+#define HAVE_INT64_T 1
+#define PY_INT64_T int64_t
+#endif
+#endif
+
+/* If PYLONG_BITS_IN_DIGIT is not defined then we'll use 30-bit digits if all
+   the necessary integer types are available, and we're on a 64-bit platform
+   (as determined by SIZEOF_VOID_P); otherwise we use 15-bit digits. */
+
+#ifndef PYLONG_BITS_IN_DIGIT
+#if (defined HAVE_UINT64_T && defined HAVE_INT64_T && \
+     defined HAVE_UINT32_T && defined HAVE_INT32_T && SIZEOF_VOID_P >= 8)
+#define PYLONG_BITS_IN_DIGIT 30
+#else
+#define PYLONG_BITS_IN_DIGIT 15
+#endif
+#endif
+
 /* uintptr_t is the C9X name for an unsigned integral type such that a
  * legitimate void* can be cast to uintptr_t and then back to void* again
  * without loss of information.  Similarly for intptr_t, wrt a signed
 
@@ -15,7 +15,7 @@ def __str__(self):
         return self.format % self.args
 
 # SHIFT should match the value in longintrepr.h for best testing.
-SHIFT = 15
+SHIFT = sys.int_info.bits_per_digit
 BASE = 2 ** SHIFT
 MASK = BASE - 1
 KARATSUBA_CUTOFF = 70   # from longobject.c
@@ -120,6 +120,35 @@ def test_division(self):
                 y = self.getran(leny) or 1
                 self.check_division(x, y)
 
+        # specific numbers chosen to exercise corner cases of the
+        # current long division implementation
+
+        # 30-bit cases involving a quotient digit estimate of BASE+1
+        self.check_division(1231948412290879395966702881,
+                            1147341367131428698)
+        self.check_division(815427756481275430342312021515587883,
+                       707270836069027745)
+        self.check_division(627976073697012820849443363563599041,
+                       643588798496057020)
+        self.check_division(1115141373653752303710932756325578065,
+                       1038556335171453937726882627)
+        # 30-bit cases that require the post-subtraction correction step
+        self.check_division(922498905405436751940989320930368494,
+                       949985870686786135626943396)
+        self.check_division(768235853328091167204009652174031844,
+                       1091555541180371554426545266)
+
+        # 15-bit cases involving a quotient digit estimate of BASE+1
+        self.check_division(20172188947443, 615611397)
+        self.check_division(1020908530270155025, 950795710)
+        self.check_division(128589565723112408, 736393718)
+        self.check_division(609919780285761575, 18613274546784)
+        # 15-bit cases that require the post-subtraction correction step
+        self.check_division(710031681576388032, 26769404391308)
+        self.check_division(1933622614268221, 30212853348836)
+
+
+
     def test_karatsuba(self):
         digits = list(range(1, 5)) + list(range(KARATSUBA_CUTOFF,
                                                 KARATSUBA_CUTOFF + 10))
 
@@ -333,6 +333,9 @@ def test_attributes(self):
         self.assert_(isinstance(sys.executable, str))
         self.assertEqual(len(sys.float_info), 11)
         self.assertEqual(sys.float_info.radix, 2)
+        self.assertEqual(len(sys.int_info), 2)
+        self.assert_(sys.int_info.bits_per_digit % 5 == 0)
+        self.assert_(sys.int_info.sizeof_digit >= 1)
         self.assert_(isinstance(sys.hexversion, int))
         self.assert_(isinstance(sys.maxsize, int))
         self.assert_(isinstance(sys.maxunicode, int))
@@ -437,6 +440,7 @@ def setUp(self):
         if hasattr(sys, "gettotalrefcount"):
             self.header += '2P'
             self.vheader += '2P'
+        self.longdigit = sys.int_info.sizeof_digit
         import _testcapi
         self.gc_headsize = _testcapi.SIZEOF_PYGC_HEAD
         self.file = open(test.support.TESTFN, 'wb')
@@ -471,16 +475,16 @@ def test_gc_head_size(self):
         size = self.calcsize
         gc_header_size = self.gc_headsize
         # bool objects are not gc tracked
-        self.assertEqual(sys.getsizeof(True), size(vh) + self.H)
+        self.assertEqual(sys.getsizeof(True), size(vh) + self.longdigit)
         # but lists are
         self.assertEqual(sys.getsizeof([]), size(vh + 'PP') + gc_header_size)
 
     def test_default(self):
         h = self.header
         vh = self.vheader
         size = self.calcsize
-        self.assertEqual(sys.getsizeof(True), size(vh) + self.H)
-        self.assertEqual(sys.getsizeof(True, -1), size(vh) + self.H)
+        self.assertEqual(sys.getsizeof(True), size(vh) + self.longdigit)
+        self.assertEqual(sys.getsizeof(True, -1), size(vh) + self.longdigit)
 
     def test_objecttypes(self):
         # check all types defined in Objects/
@@ -489,7 +493,7 @@ def test_objecttypes(self):
         size = self.calcsize
         check = self.check_sizeof
         # bool
-        check(True, size(vh) + self.H)
+        check(True, size(vh) + self.longdigit)
         # buffer
         # XXX
         # builtin_function_or_method
@@ -607,11 +611,12 @@ def get_gen(): yield 1
         check(reversed([]), size(h + 'lP'))
         # long
         check(0, size(vh))
-        check(1, size(vh) + self.H)
-        check(-1, size(vh) + self.H)
-        check(32768, size(vh) + 2*self.H)
-        check(32768*32768-1, size(vh) + 2*self.H)
-        check(32768*32768, size(vh) + 3*self.H)
+        check(1, size(vh) + self.longdigit)
+        check(-1, size(vh) + self.longdigit)
+        PyLong_BASE = 2**sys.int_info.bits_per_digit
+        check(PyLong_BASE, size(vh) + 2*self.longdigit)
+        check(PyLong_BASE**2-1, size(vh) + 2*self.longdigit)
+        check(PyLong_BASE**2, size(vh) + 3*self.longdigit)
         # memory
         check(memoryview(b''), size(h + 'P PP2P2i7P'))
         # module
 
@@ -12,6 +12,13 @@ What's New in Python 3.1 alpha 2?
 Core and Builtins
 -----------------
 
+- Issue #4258: Make it possible to use base 2**30 instead of base
+  2**15 for the internal representation of integers, for performance
+  reasons.  Base 2**30 is enabled by default on 64-bit machines.  Add
+  --enable-big-digits option to configure, which overrides the
+  default.  Add sys.int_info structseq to provide information about
+  the internal format.
+
 - Issue #4474: PyUnicode_FromWideChar now converts characters outside
   the BMP to surrogate pairs, on systems with sizeof(wchar_t) == 4
   and sizeof(Py_UNICODE) == 2.