Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 1205f27

Browse files
author
Victor Stinner
committed
Issue #9738: PyUnicode_FromFormat() and PyErr_Format() raise an error on
a non-ASCII byte in the format string. Document also the encoding.
1 parent cd419ab commit 1205f27

8 files changed

Lines changed: 53 additions & 6 deletions

File tree

Doc/c-api/exceptions.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ in various ways. There is a separate error indicator for each thread.
146146
.. cfunction:: PyObject* PyErr_Format(PyObject *exception, const char *format, ...)
147147

148148
This function sets the error indicator and returns *NULL*. *exception* should be
149-
a Python exception (class, not an instance). *format* should be a string,
149+
a Python exception (class, not an instance). *format* should be an ASCII-encoded string,
150150
containing format codes, similar to :cfunc:`printf`. The ``width.precision``
151151
before a format code is parsed, but the width part is ignored.
152152

Doc/c-api/unicode.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ APIs:
234234
arguments, calculate the size of the resulting Python unicode string and return
235235
a string with the values formatted into it. The variable arguments must be C
236236
types and must correspond exactly to the format characters in the *format*
237-
string. The following format characters are allowed:
237+
ASCII-encoded string. The following format characters are allowed:
238238

239239
.. % This should be exactly the same as the table in PyErr_Format.
240240
.. % The descriptions for %zd and %zu are wrong, but the truth is complicated

Include/pyerrors.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,11 @@ PyAPI_FUNC(PyObject *) PyErr_SetFromErrnoWithUnicodeFilename(
183183
PyObject *, const Py_UNICODE *);
184184
#endif /* MS_WINDOWS */
185185

186-
PyAPI_FUNC(PyObject *) PyErr_Format(PyObject *, const char *, ...);
186+
PyAPI_FUNC(PyObject *) PyErr_Format(
187+
PyObject *exception,
188+
const char *format, /* ASCII-encoded string */
189+
...
190+
);
187191

188192
#ifdef MS_WINDOWS
189193
PyAPI_FUNC(PyObject *) PyErr_SetFromWindowsErrWithFilenameObject(

Include/unicodeobject.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -550,8 +550,14 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
550550
register PyObject *obj /* Object */
551551
);
552552

553-
PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(const char*, va_list);
554-
PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(const char*, ...);
553+
PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(
554+
const char *format, /* ASCII-encoded string */
555+
va_list vargs
556+
);
557+
PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(
558+
const char *format, /* ASCII-encoded string */
559+
...
560+
);
555561

556562
/* Format the object based on the format_spec, as defined in PEP 3101
557563
(Advanced String Formatting). */

Lib/test/test_unicode.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1385,6 +1385,20 @@ def __str__(self):
13851385
self.assertEquals("%s" % s, '__str__ overridden')
13861386
self.assertEquals("{}".format(s), '__str__ overridden')
13871387

1388+
def test_from_format(self):
1389+
# Ensure that PyUnicode_FromFormat() raises an error for a non-ascii
1390+
# format string.
1391+
from _testcapi import format_unicode
1392+
1393+
# ascii format, non-ascii argument
1394+
text = format_unicode(b'ascii\x7f=%U', 'unicode\xe9')
1395+
self.assertEqual(text, 'ascii\x7f=unicode\xe9')
1396+
1397+
# non-ascii format, ascii argument
1398+
self.assertRaisesRegexp(ValueError,
1399+
'^PyUnicode_FromFormatV\(\) expects an ASCII-encoded format '
1400+
'string, got a non-ascii byte: 0xe9$',
1401+
format_unicode, b'unicode\xe9=%s', 'ascii')
13881402

13891403
def test_main():
13901404
support.run_unittest(__name__)

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ What's New in Python 3.2 Alpha 3?
1010
Core and Builtins
1111
-----------------
1212

13+
- Issue #9738: PyUnicode_FromFormat() and PyErr_Format() raise an error on
14+
a non-ASCII byte in the format string.
15+
1316
- Issue #4617: Previously it was illegal to delete a name from the local
1417
namespace if it occurs as a free variable in a nested block. This limitation
1518
of the compiler has been lifted, and a new opcode introduced (DELETE_DEREF).

Modules/_testcapimodule.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2193,6 +2193,17 @@ crash_no_current_thread(PyObject *self)
21932193
return NULL;
21942194
}
21952195

2196+
static PyObject *
2197+
format_unicode(PyObject *self, PyObject *args)
2198+
{
2199+
const char *format;
2200+
PyObject *arg;
2201+
if (!PyArg_ParseTuple(args, "yU", &format, &arg))
2202+
return NULL;
2203+
return PyUnicode_FromFormat(format, arg);
2204+
2205+
}
2206+
21962207
static PyMethodDef TestMethods[] = {
21972208
{"raise_exception", raise_exception, METH_VARARGS},
21982209
{"raise_memoryerror", (PyCFunction)raise_memoryerror, METH_NOARGS},
@@ -2272,6 +2283,7 @@ static PyMethodDef TestMethods[] = {
22722283
{"make_exception_with_doc", (PyCFunction)make_exception_with_doc,
22732284
METH_VARARGS | METH_KEYWORDS},
22742285
{"crash_no_current_thread", (PyCFunction)crash_no_current_thread, METH_NOARGS},
2286+
{"format_unicode", format_unicode, METH_VARARGS},
22752287
{NULL, NULL} /* sentinel */
22762288
};
22772289

Objects/unicodeobject.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1102,7 +1102,15 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
11021102
appendstring(p);
11031103
goto end;
11041104
}
1105-
} else
1105+
}
1106+
else if (128 <= (unsigned char)*f) {
1107+
PyErr_Format(PyExc_ValueError,
1108+
"PyUnicode_FromFormatV() expects an ASCII-encoded format "
1109+
"string, got a non-ascii byte: 0x%02x",
1110+
(unsigned char)*f);
1111+
goto fail;
1112+
}
1113+
else
11061114
*s++ = *f;
11071115
}
11081116

0 commit comments

Comments
 (0)