Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit f28dd83

Browse files
committed
Marc-Andre Lemburg <[email protected]>:
New buffer overflow checks for formatting strings. By Trent Mick.
1 parent 587794b commit f28dd83

2 files changed

Lines changed: 129 additions & 49 deletions

File tree

Objects/stringobject.c

Lines changed: 68 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -124,8 +124,13 @@ PyObject *
124124
PyString_FromString(str)
125125
const char *str;
126126
{
127-
register unsigned int size = strlen(str);
127+
register size_t size = strlen(str);
128128
register PyStringObject *op;
129+
if (size > INT_MAX) {
130+
PyErr_SetString(PyExc_OverflowError,
131+
"string is too long for a Python string");
132+
return NULL;
133+
}
129134
#ifndef DONT_SHARE_SHORT_STRINGS
130135
if (size == 0 && (op = nullstring) != NULL) {
131136
#ifdef COUNT_ALLOCS
@@ -237,9 +242,13 @@ static PyObject *
237242
string_repr(op)
238243
register PyStringObject *op;
239244
{
240-
/* XXX overflow? */
241-
int newsize = 2 + 4 * op->ob_size * sizeof(char);
242-
PyObject *v = PyString_FromStringAndSize((char *)NULL, newsize);
245+
size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
246+
PyObject *v;
247+
if (newsize > INT_MAX) {
248+
PyErr_SetString(PyExc_OverflowError,
249+
"string is too large to make repr");
250+
}
251+
v = PyString_FromStringAndSize((char *)NULL, newsize);
243252
if (v == NULL) {
244253
return NULL;
245254
}
@@ -2335,52 +2344,77 @@ getnextarg(args, arglen, p_argidx)
23352344
#define F_ZERO (1<<4)
23362345

23372346
static int
2338-
formatfloat(buf, flags, prec, type, v)
2347+
formatfloat(buf, buflen, flags, prec, type, v)
23392348
char *buf;
2349+
size_t buflen;
23402350
int flags;
23412351
int prec;
23422352
int type;
23432353
PyObject *v;
23442354
{
2355+
/* fmt = '%#.' + `prec` + `type`
2356+
worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
23452357
char fmt[20];
23462358
double x;
23472359
if (!PyArg_Parse(v, "d;float argument required", &x))
23482360
return -1;
23492361
if (prec < 0)
23502362
prec = 6;
2351-
if (prec > 50)
2352-
prec = 50; /* Arbitrary limitation */
23532363
if (type == 'f' && fabs(x)/1e25 >= 1e25)
23542364
type = 'g';
23552365
sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
2366+
/* worst case length calc to ensure no buffer overrun:
2367+
fmt = %#.<prec>g
2368+
buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
2369+
for any double rep.)
2370+
len = 1 + prec + 1 + 2 + 5 = 9 + prec
2371+
If prec=0 the effective precision is 1 (the leading digit is
2372+
always given), therefore increase by one to 10+prec. */
2373+
if (buflen <= (size_t)10 + (size_t)prec) {
2374+
PyErr_SetString(PyExc_OverflowError,
2375+
"formatted float is too long (precision too long?)");
2376+
return -1;
2377+
}
23562378
sprintf(buf, fmt, x);
23572379
return strlen(buf);
23582380
}
23592381

23602382
static int
2361-
formatint(buf, flags, prec, type, v)
2383+
formatint(buf, buflen, flags, prec, type, v)
23622384
char *buf;
2385+
size_t buflen;
23632386
int flags;
23642387
int prec;
23652388
int type;
23662389
PyObject *v;
23672390
{
2391+
/* fmt = '%#.' + `prec` + 'l' + `type`
2392+
worst case length = 3 + 10 (len of INT_MAX) + 1 + 1 = 15 (use 20)*/
23682393
char fmt[20];
23692394
long x;
23702395
if (!PyArg_Parse(v, "l;int argument required", &x))
23712396
return -1;
23722397
if (prec < 0)
23732398
prec = 1;
23742399
sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
2400+
/* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec,len(x in octal))
2401+
worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
2402+
if (buflen <= 13 || buflen <= (size_t)2+(size_t)prec) {
2403+
PyErr_SetString(PyExc_OverflowError,
2404+
"formatted integer is too long (precision too long?)");
2405+
return -1;
2406+
}
23752407
sprintf(buf, fmt, x);
23762408
return strlen(buf);
23772409
}
23782410

23792411
static int
2380-
formatchar(buf, v)
2412+
formatchar(buf, buflen, v)
23812413
char *buf;
2414+
size_t buflen;
23822415
PyObject *v;
23832416
{
2417+
/* presume that the buffer is at least 2 characters long */
23842418
if (PyString_Check(v)) {
23852419
if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
23862420
return -1;
@@ -2394,7 +2428,15 @@ formatchar(buf, v)
23942428
}
23952429

23962430

2397-
/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
2431+
/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2432+
2433+
FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2434+
chars are formatted. XXX This is a magic number. Each formatting
2435+
routine does bounds checking to ensure no overflow, but a better
2436+
solution may be to malloc a buffer of appropriate size for each
2437+
format. For now, the current solution is sufficient.
2438+
*/
2439+
#define FORMATBUFLEN (size_t)120
23982440

23992441
PyObject *
24002442
PyString_Format(format, args)
@@ -2451,10 +2493,10 @@ PyString_Format(format, args)
24512493
int fill;
24522494
PyObject *v = NULL;
24532495
PyObject *temp = NULL;
2454-
char *buf;
2496+
char *pbuf;
24552497
int sign;
24562498
int len;
2457-
char tmpbuf[120]; /* For format{float,int,char}() */
2499+
char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
24582500
char *fmt_start = fmt;
24592501

24602502
fmt++;
@@ -2602,7 +2644,7 @@ PyString_Format(format, args)
26022644
fill = ' ';
26032645
switch (c) {
26042646
case '%':
2605-
buf = "%";
2647+
pbuf = "%";
26062648
len = 1;
26072649
break;
26082650
case 's':
@@ -2622,7 +2664,7 @@ PyString_Format(format, args)
26222664
"%s argument has non-string str()");
26232665
goto error;
26242666
}
2625-
buf = PyString_AsString(temp);
2667+
pbuf = PyString_AsString(temp);
26262668
len = PyString_Size(temp);
26272669
if (prec >= 0 && len > prec)
26282670
len = prec;
@@ -2635,18 +2677,18 @@ PyString_Format(format, args)
26352677
case 'X':
26362678
if (c == 'i')
26372679
c = 'd';
2638-
buf = tmpbuf;
2639-
len = formatint(buf, flags, prec, c, v);
2680+
pbuf = formatbuf;
2681+
len = formatint(pbuf, sizeof(formatbuf), flags, prec, c, v);
26402682
if (len < 0)
26412683
goto error;
26422684
sign = (c == 'd');
26432685
if (flags&F_ZERO) {
26442686
fill = '0';
26452687
if ((flags&F_ALT) &&
26462688
(c == 'x' || c == 'X') &&
2647-
buf[0] == '0' && buf[1] == c) {
2648-
*res++ = *buf++;
2649-
*res++ = *buf++;
2689+
pbuf[0] == '0' && pbuf[1] == c) {
2690+
*res++ = *pbuf++;
2691+
*res++ = *pbuf++;
26502692
rescnt -= 2;
26512693
len -= 2;
26522694
width -= 2;
@@ -2660,17 +2702,17 @@ PyString_Format(format, args)
26602702
case 'f':
26612703
case 'g':
26622704
case 'G':
2663-
buf = tmpbuf;
2664-
len = formatfloat(buf, flags, prec, c, v);
2705+
pbuf = formatbuf;
2706+
len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
26652707
if (len < 0)
26662708
goto error;
26672709
sign = 1;
26682710
if (flags&F_ZERO)
26692711
fill = '0';
26702712
break;
26712713
case 'c':
2672-
buf = tmpbuf;
2673-
len = formatchar(buf, v);
2714+
pbuf = formatbuf;
2715+
len = formatchar(pbuf, sizeof(formatbuf), v);
26742716
if (len < 0)
26752717
goto error;
26762718
break;
@@ -2681,8 +2723,8 @@ PyString_Format(format, args)
26812723
goto error;
26822724
}
26832725
if (sign) {
2684-
if (*buf == '-' || *buf == '+') {
2685-
sign = *buf++;
2726+
if (*pbuf == '-' || *pbuf == '+') {
2727+
sign = *pbuf++;
26862728
len--;
26872729
}
26882730
else if (flags & F_SIGN)
@@ -2718,7 +2760,7 @@ PyString_Format(format, args)
27182760
}
27192761
if (sign && fill == ' ')
27202762
*res++ = sign;
2721-
memcpy(res, buf, len);
2763+
memcpy(res, pbuf, len);
27222764
res += len;
27232765
rescnt -= len;
27242766
while (--width >= len) {

0 commit comments

Comments
 (0)