Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit f6d0aee

Browse files
Issue #16741: Fix an error reporting in int().
1 parent 1f35ae0 commit f6d0aee

5 files changed

Lines changed: 101 additions & 67 deletions

File tree

Include/longobject.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ PyAPI_FUNC(PyObject *) PyLong_FromString(char *, char **, int);
8484
#ifndef Py_LIMITED_API
8585
PyAPI_FUNC(PyObject *) PyLong_FromUnicode(Py_UNICODE*, Py_ssize_t, int);
8686
PyAPI_FUNC(PyObject *) PyLong_FromUnicodeObject(PyObject *u, int base);
87+
PyAPI_FUNC(PyObject *) _PyLong_FromBytes(const char *, Py_ssize_t, int);
8788
#endif
8889

8990
#ifndef Py_LIMITED_API

Lib/test/test_int.py

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -73,14 +73,6 @@ def test_basic(self):
7373
x = -1-sys.maxsize
7474
self.assertEqual(x >> 1, x//2)
7575

76-
self.assertRaises(ValueError, int, '123\0')
77-
self.assertRaises(ValueError, int, '53', 40)
78-
79-
# SF bug 1545497: embedded NULs were not detected with
80-
# explicit base
81-
self.assertRaises(ValueError, int, '123\0', 10)
82-
self.assertRaises(ValueError, int, '123\x00 245', 20)
83-
8476
x = int('1' * 600)
8577
self.assertIsInstance(x, int)
8678

@@ -360,14 +352,37 @@ def __trunc__(self):
360352
int(TruncReturnsBadInt())
361353

362354
def test_error_message(self):
363-
testlist = ('\xbd', '123\xbd', ' 123 456 ')
364-
for s in testlist:
365-
try:
366-
int(s)
367-
except ValueError as e:
368-
self.assertIn(s.strip(), e.args[0])
369-
else:
370-
self.fail("Expected int(%r) to raise a ValueError", s)
355+
def check(s, base=None):
356+
with self.assertRaises(ValueError,
357+
msg="int(%r, %r)" % (s, base)) as cm:
358+
if base is None:
359+
int(s)
360+
else:
361+
int(s, base)
362+
self.assertEqual(cm.exception.args[0],
363+
"invalid literal for int() with base %d: %r" %
364+
(10 if base is None else base, s))
365+
366+
check('\xbd')
367+
check('123\xbd')
368+
check(' 123 456 ')
369+
370+
check('123\x00')
371+
# SF bug 1545497: embedded NULs were not detected with explicit base
372+
check('123\x00', 10)
373+
check('123\x00 245', 20)
374+
check('123\x00 245', 16)
375+
check('123\x00245', 20)
376+
check('123\x00245', 16)
377+
# byte string with embedded NUL
378+
check(b'123\x00')
379+
check(b'123\x00', 10)
380+
# non-UTF-8 byte string
381+
check(b'123\xbd')
382+
check(b'123\xbd', 10)
383+
# lone surrogate in Unicode string
384+
check('123\ud800')
385+
check('123\ud800', 10)
371386

372387
def test_main():
373388
support.run_unittest(IntTestCases)

Misc/NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ What's New in Python 3.3.3 release candidate 1?
1212
Core and Builtins
1313
-----------------
1414

15+
- Issue #16741: Fix an error reporting in int().
16+
1517
- Issue #17899: Fix rare file descriptor leak in os.listdir().
1618

1719
- Issue #18552: Check return value of PyArena_AddPyObject() in

Objects/abstract.c

Lines changed: 5 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1240,25 +1240,6 @@ convert_integral_to_int(PyObject *integral, const char *error_format)
12401240
}
12411241

12421242

1243-
/* Add a check for embedded NULL-bytes in the argument. */
1244-
static PyObject *
1245-
long_from_string(const char *s, Py_ssize_t len)
1246-
{
1247-
char *end;
1248-
PyObject *x;
1249-
1250-
x = PyLong_FromString((char*)s, &end, 10);
1251-
if (x == NULL)
1252-
return NULL;
1253-
if (end != s + len) {
1254-
PyErr_SetString(PyExc_ValueError,
1255-
"null byte in argument for int()");
1256-
Py_DECREF(x);
1257-
return NULL;
1258-
}
1259-
return x;
1260-
}
1261-
12621243
PyObject *
12631244
PyNumber_Long(PyObject *o)
12641245
{
@@ -1306,16 +1287,16 @@ PyNumber_Long(PyObject *o)
13061287

13071288
if (PyBytes_Check(o))
13081289
/* need to do extra error checking that PyLong_FromString()
1309-
* doesn't do. In particular int('9.5') must raise an
1310-
* exception, not truncate the float.
1290+
* doesn't do. In particular int('9\x005') must raise an
1291+
* exception, not truncate at the null.
13111292
*/
1312-
return long_from_string(PyBytes_AS_STRING(o),
1313-
PyBytes_GET_SIZE(o));
1293+
return _PyLong_FromBytes(PyBytes_AS_STRING(o),
1294+
PyBytes_GET_SIZE(o), 10);
13141295
if (PyUnicode_Check(o))
13151296
/* The above check is done in PyLong_FromUnicode(). */
13161297
return PyLong_FromUnicodeObject(o, 10);
13171298
if (!PyObject_AsCharBuffer(o, &buffer, &buffer_len))
1318-
return long_from_string(buffer, buffer_len);
1299+
return _PyLong_FromBytes(buffer, buffer_len, 10);
13191300

13201301
return type_error("int() argument must be a string or a "
13211302
"number, not '%.200s'", o);

Objects/longobject.c

Lines changed: 62 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2005,6 +2005,14 @@ long_from_binary_base(char **str, int base)
20052005
return long_normalize(z);
20062006
}
20072007

2008+
/* Parses a long from a bytestring. Leading and trailing whitespace will be
2009+
* ignored.
2010+
*
2011+
* If successful, a PyLong object will be returned and 'pend' will be pointing
2012+
* to the first unused byte unless it's NULL.
2013+
*
2014+
* If unsuccessful, NULL will be returned.
2015+
*/
20082016
PyObject *
20092017
PyLong_FromString(char *str, char **pend, int base)
20102018
{
@@ -2267,12 +2275,17 @@ digit beyond the first.
22672275
str++;
22682276
if (*str != '\0')
22692277
goto onError;
2270-
if (pend)
2271-
*pend = str;
22722278
long_normalize(z);
2273-
return (PyObject *) maybe_small_long(z);
2279+
z = maybe_small_long(z);
2280+
if (z == NULL)
2281+
return NULL;
2282+
if (pend != NULL)
2283+
*pend = str;
2284+
return (PyObject *) z;
22742285

22752286
onError:
2287+
if (pend != NULL)
2288+
*pend = str;
22762289
Py_XDECREF(z);
22772290
slen = strlen(orig_str) < 200 ? strlen(orig_str) : 200;
22782291
strobj = PyUnicode_FromStringAndSize(orig_str, slen);
@@ -2285,6 +2298,31 @@ digit beyond the first.
22852298
return NULL;
22862299
}
22872300

2301+
/* Since PyLong_FromString doesn't have a length parameter,
2302+
* check here for possible NULs in the string.
2303+
*
2304+
* Reports an invalid literal as a bytes object.
2305+
*/
2306+
PyObject *
2307+
_PyLong_FromBytes(const char *s, Py_ssize_t len, int base)
2308+
{
2309+
PyObject *result, *strobj;
2310+
char *end = NULL;
2311+
2312+
result = PyLong_FromString((char*)s, &end, base);
2313+
if (end == NULL || (result != NULL && end == s + len))
2314+
return result;
2315+
Py_XDECREF(result);
2316+
strobj = PyBytes_FromStringAndSize(s, Py_MIN(len, 200));
2317+
if (strobj != NULL) {
2318+
PyErr_Format(PyExc_ValueError,
2319+
"invalid literal for int() with base %d: %R",
2320+
base, strobj);
2321+
Py_DECREF(strobj);
2322+
}
2323+
return NULL;
2324+
}
2325+
22882326
PyObject *
22892327
PyLong_FromUnicode(Py_UNICODE *u, Py_ssize_t length, int base)
22902328
{
@@ -2299,9 +2337,8 @@ PyLong_FromUnicode(Py_UNICODE *u, Py_ssize_t length, int base)
22992337
PyObject *
23002338
PyLong_FromUnicodeObject(PyObject *u, int base)
23012339
{
2302-
PyObject *result;
2303-
PyObject *asciidig;
2304-
char *buffer, *end;
2340+
PyObject *result, *asciidig, *strobj;
2341+
char *buffer, *end = NULL;
23052342
Py_ssize_t buflen;
23062343

23072344
asciidig = _PyUnicode_TransformDecimalAndSpaceToASCII(u);
@@ -2310,17 +2347,26 @@ PyLong_FromUnicodeObject(PyObject *u, int base)
23102347
buffer = PyUnicode_AsUTF8AndSize(asciidig, &buflen);
23112348
if (buffer == NULL) {
23122349
Py_DECREF(asciidig);
2313-
return NULL;
2350+
if (!PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2351+
return NULL;
23142352
}
2315-
result = PyLong_FromString(buffer, &end, base);
2316-
if (result != NULL && end != buffer + buflen) {
2317-
PyErr_SetString(PyExc_ValueError,
2318-
"null byte in argument for int()");
2319-
Py_DECREF(result);
2320-
result = NULL;
2353+
else {
2354+
result = PyLong_FromString(buffer, &end, base);
2355+
if (end == NULL || (result != NULL && end == buffer + buflen)) {
2356+
Py_DECREF(asciidig);
2357+
return result;
2358+
}
2359+
Py_DECREF(asciidig);
2360+
Py_XDECREF(result);
23212361
}
2322-
Py_DECREF(asciidig);
2323-
return result;
2362+
strobj = PySequence_GetSlice(u, 0, 200);
2363+
if (strobj != NULL) {
2364+
PyErr_Format(PyExc_ValueError,
2365+
"invalid literal for int() with base %d: %R",
2366+
base, strobj);
2367+
Py_DECREF(strobj);
2368+
}
2369+
return NULL;
23242370
}
23252371

23262372
/* forward */
@@ -4308,23 +4354,12 @@ long_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
43084354
if (PyUnicode_Check(x))
43094355
return PyLong_FromUnicodeObject(x, (int)base);
43104356
else if (PyByteArray_Check(x) || PyBytes_Check(x)) {
4311-
/* Since PyLong_FromString doesn't have a length parameter,
4312-
* check here for possible NULs in the string. */
43134357
char *string;
4314-
Py_ssize_t size = Py_SIZE(x);
43154358
if (PyByteArray_Check(x))
43164359
string = PyByteArray_AS_STRING(x);
43174360
else
43184361
string = PyBytes_AS_STRING(x);
4319-
if (strlen(string) != (size_t)size || !size) {
4320-
/* We only see this if there's a null byte in x or x is empty,
4321-
x is a bytes or buffer, *and* a base is given. */
4322-
PyErr_Format(PyExc_ValueError,
4323-
"invalid literal for int() with base %d: %R",
4324-
(int)base, x);
4325-
return NULL;
4326-
}
4327-
return PyLong_FromString(string, NULL, (int)base);
4362+
return _PyLong_FromBytes(string, Py_SIZE(x), (int)base);
43284363
}
43294364
else {
43304365
PyErr_SetString(PyExc_TypeError,

0 commit comments

Comments
 (0)