Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit f489caf

Browse files
committed
Issue #5859: Remove use of fixed-length buffers for float formatting
in unicodeobject.c and the fallback version of PyOS_double_to_string. As a result, operations like '%.120e' % 12.34 no longer raise an exception.
1 parent fb526ac commit f489caf

4 files changed

Lines changed: 91 additions & 99 deletions

File tree

Lib/test/string_tests.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1105,14 +1105,7 @@ def test_floatformatting(self):
11051105
value = 0.01
11061106
for x in range(60):
11071107
value = value * 3.141592655 / 3.0 * 10.0
1108-
# The formatfloat() code in stringobject.c and
1109-
# unicodeobject.c uses a 120 byte buffer and switches from
1110-
# 'f' formatting to 'g' at precision 50, so we expect
1111-
# OverflowErrors for the ranges x < 50 and prec >= 67.
1112-
if x < 50 and prec >= 67:
1113-
self.checkraises(OverflowError, format, "__mod__", value)
1114-
else:
1115-
self.checkcall(format, "__mod__", value)
1108+
self.checkcall(format, "__mod__", value)
11161109

11171110
def test_inplace_rewrites(self):
11181111
# Check that strings don't copy and modify cached single-character strings

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ What's New in Python 3.1 beta 1?
1212
Core and Builtins
1313
-----------------
1414

15+
- Issue #5859: Remove length restrictions for float formatting:
16+
'%.67f' % 12.34 and '%.120e' % 12.34 no longer raise an exception.
17+
1518
- Issue #1588: Add complex.__format__. For example,
1619
format(complex(1, 2./3), '.5') now produces a sensible result.
1720

Objects/unicodeobject.c

Lines changed: 18 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -8792,73 +8792,30 @@ getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
87928792
return NULL;
87938793
}
87948794

8795-
static void
8796-
strtounicode(Py_UNICODE *buffer, const char *charbuffer, Py_ssize_t len)
8797-
{
8798-
register Py_ssize_t i;
8799-
for (i = len - 1; i >= 0; i--)
8800-
buffer[i] = (Py_UNICODE) charbuffer[i];
8801-
}
8795+
/* Returns a new reference to a PyUnicode object, or NULL on failure. */
88028796

8803-
static int
8804-
formatfloat(Py_UNICODE *buf,
8805-
size_t buflen,
8806-
int flags,
8807-
int prec,
8808-
int type,
8809-
PyObject *v)
8810-
{
8811-
/* eric.smith: To minimize disturbances in PyUnicode_Format (the
8812-
only caller of this routine), I'm going to keep the existing
8813-
API to this function. That means that we'll allocate memory and
8814-
then copy back into the supplied buffer. But that's better than
8815-
all of the changes that would be required in PyUnicode_Format
8816-
because it does lots of memory management tricks. */
8817-
8818-
char* p = NULL;
8819-
int result = -1;
8797+
static PyObject *
8798+
formatfloat(PyObject *v, int flags, int prec, int type)
8799+
{
8800+
char *p;
8801+
PyObject *result;
88208802
double x;
8821-
Py_ssize_t len;
88228803

88238804
x = PyFloat_AsDouble(v);
88248805
if (x == -1.0 && PyErr_Occurred())
8825-
goto done;
8806+
return NULL;
8807+
88268808
if (prec < 0)
88278809
prec = 6;
88288810

8829-
/* make sure that the decimal representation of precision really does
8830-
need at most 10 digits: platforms with sizeof(int) == 8 exist! */
8831-
if (prec > 0x7fffffffL) {
8832-
PyErr_SetString(PyExc_OverflowError,
8833-
"outrageously large precision "
8834-
"for formatted float");
8835-
goto done;
8836-
}
8837-
88388811
if (type == 'f' && fabs(x) >= 1e50)
88398812
type = 'g';
88408813

8841-
if (((type == 'g' || type == 'G') &&
8842-
buflen <= (size_t)10 + (size_t)prec) ||
8843-
((type == 'f' || type == 'F') &&
8844-
buflen <= (size_t)53 + (size_t)prec)) {
8845-
PyErr_SetString(PyExc_OverflowError,
8846-
"formatted float is too long (precision too large?)");
8847-
goto done;
8848-
}
8849-
88508814
p = PyOS_double_to_string(x, type, prec,
88518815
(flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
8852-
len = strlen(p);
8853-
if (len+1 >= buflen) {
8854-
/* Caller supplied buffer is not large enough. */
8855-
PyErr_NoMemory();
8856-
goto done;
8857-
}
8858-
strtounicode(buf, p, len);
8859-
result = Py_SAFE_DOWNCAST(len, Py_ssize_t, int);
8860-
8861-
done:
8816+
if (p == NULL)
8817+
return NULL;
8818+
result = PyUnicode_FromStringAndSize(p, strlen(p));
88628819
PyMem_Free(p);
88638820
return result;
88648821
}
@@ -8940,14 +8897,9 @@ formatchar(Py_UNICODE *buf,
89408897
}
89418898

89428899
/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
8943-
8944-
FORMATBUFLEN is the length of the buffer in which the floats, ints, &
8945-
chars are formatted. XXX This is a magic number. Each formatting
8946-
routine does bounds checking to ensure no overflow, but a better
8947-
solution may be to malloc a buffer of appropriate size for each
8948-
format. For now, the current solution is sufficient.
8900+
FORMATBUFLEN is the length of the buffer in which chars are formatted.
89498901
*/
8950-
#define FORMATBUFLEN (size_t)120
8902+
#define FORMATBUFLEN (size_t)10
89518903

89528904
PyObject *PyUnicode_Format(PyObject *format,
89538905
PyObject *args)
@@ -9012,7 +8964,7 @@ PyObject *PyUnicode_Format(PyObject *format,
90128964
Py_UNICODE *pbuf;
90138965
Py_UNICODE sign;
90148966
Py_ssize_t len;
9015-
Py_UNICODE formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
8967+
Py_UNICODE formatbuf[FORMATBUFLEN]; /* For formatchar() */
90168968

90178969
fmt++;
90188970
if (*fmt == '(') {
@@ -9257,11 +9209,11 @@ PyObject *PyUnicode_Format(PyObject *format,
92579209
case 'F':
92589210
case 'g':
92599211
case 'G':
9260-
pbuf = formatbuf;
9261-
len = formatfloat(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE),
9262-
flags, prec, c, v);
9263-
if (len < 0)
9212+
temp = formatfloat(v, flags, prec, c);
9213+
if (!temp)
92649214
goto onError;
9215+
pbuf = PyUnicode_AS_UNICODE(temp);
9216+
len = PyUnicode_GET_SIZE(temp);
92659217
sign = 1;
92669218
if (flags & F_ZERO)
92679219
fill = '0';

Python/pystrtod.c

Lines changed: 69 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -620,12 +620,10 @@ PyAPI_FUNC(char *) PyOS_double_to_string(double val,
620620
int flags,
621621
int *type)
622622
{
623-
char buf[128];
624623
char format[32];
625-
Py_ssize_t len;
626-
char *result;
627-
char *p;
628-
int t;
624+
Py_ssize_t bufsize;
625+
char *buf;
626+
int t, exp;
629627
int upper = 0;
630628

631629
/* Validate format_code, and map upper and lower case */
@@ -669,6 +667,61 @@ PyAPI_FUNC(char *) PyOS_double_to_string(double val,
669667
return NULL;
670668
}
671669

670+
/* Here's a quick-and-dirty calculation to figure out how big a buffer
671+
we need. In general, for a finite float we need:
672+
673+
1 byte for each digit of the decimal significand, and
674+
675+
1 for a possible sign
676+
1 for a possible decimal point
677+
2 for a possible [eE][+-]
678+
1 for each digit of the exponent; if we allow 19 digits
679+
total then we're safe up to exponents of 2**63.
680+
1 for the trailing nul byte
681+
682+
This gives a total of 24 + the number of digits in the significand,
683+
and the number of digits in the significand is:
684+
685+
for 'g' format: at most precision, except possibly
686+
when precision == 0, when it's 1.
687+
for 'e' format: precision+1
688+
for 'f' format: precision digits after the point, at least 1
689+
before. To figure out how many digits appear before the point
690+
we have to examine the size of the number. If fabs(val) < 1.0
691+
then there will be only one digit before the point. If
692+
fabs(val) >= 1.0, then there are at most
693+
694+
1+floor(log10(ceiling(fabs(val))))
695+
696+
digits before the point (where the 'ceiling' allows for the
697+
possibility that the rounding rounds the integer part of val
698+
up). A safe upper bound for the above quantity is
699+
1+floor(exp/3), where exp is the unique integer such that 0.5
700+
<= fabs(val)/2**exp < 1.0. This exp can be obtained from
701+
frexp.
702+
703+
So we allow room for precision+1 digits for all formats, plus an
704+
extra floor(exp/3) digits for 'f' format.
705+
706+
*/
707+
708+
if (Py_IS_NAN(val) || Py_IS_INFINITY(val))
709+
/* 3 for 'inf'/'nan', 1 for sign, 1 for '\0' */
710+
bufsize = 5;
711+
else {
712+
bufsize = 25 + precision;
713+
if (format_code == 'f' && fabs(val) >= 1.0) {
714+
frexp(val, &exp);
715+
bufsize += exp/3;
716+
}
717+
}
718+
719+
buf = PyMem_Malloc(bufsize);
720+
if (buf == NULL) {
721+
PyErr_NoMemory();
722+
return NULL;
723+
}
724+
672725
/* Handle nan and inf. */
673726
if (Py_IS_NAN(val)) {
674727
strcpy(buf, "nan");
@@ -687,38 +740,29 @@ PyAPI_FUNC(char *) PyOS_double_to_string(double val,
687740
PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
688741
(flags & Py_DTSF_ALT ? "#" : ""), precision,
689742
format_code);
690-
_PyOS_ascii_formatd(buf, sizeof(buf), format, val, precision);
691-
}
692-
693-
len = strlen(buf);
694-
695-
/* Add 1 for the trailing 0 byte.
696-
Add 1 because we might need to make room for the sign.
697-
*/
698-
result = PyMem_Malloc(len + 2);
699-
if (result == NULL) {
700-
PyErr_NoMemory();
701-
return NULL;
743+
_PyOS_ascii_formatd(buf, bufsize, format, val, precision);
702744
}
703-
p = result;
704745

705746
/* Add sign when requested. It's convenient (esp. when formatting
706747
complex numbers) to include a sign even for inf and nan. */
707-
if (flags & Py_DTSF_SIGN && buf[0] != '-')
708-
*p++ = '+';
709-
710-
strcpy(p, buf);
711-
748+
if (flags & Py_DTSF_SIGN && buf[0] != '-') {
749+
size_t len = strlen(buf);
750+
/* the bufsize calculations above should ensure that we've got
751+
space to add a sign */
752+
assert((size_t)bufsize >= len+2);
753+
memmove(buf+1, buf, len+1);
754+
buf[0] = '+';
755+
}
712756
if (upper) {
713757
/* Convert to upper case. */
714758
char *p1;
715-
for (p1 = p; *p1; p1++)
759+
for (p1 = buf; *p1; p1++)
716760
*p1 = Py_TOUPPER(*p1);
717761
}
718762

719763
if (type)
720764
*type = t;
721-
return result;
765+
return buf;
722766
}
723767

724768
#else

0 commit comments

Comments
 (0)