Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 621ef3d

Browse files
committed
Issue #15609: Optimize str%args for integer argument
- Use _PyLong_FormatWriter() instead of formatlong() when possible, to avoid a temporary buffer - Enable the fast path when width is smaller or equals to the length, and when the precision is bigger or equals to the length - Add unit tests! - formatlong() uses PyUnicode_Resize() instead of _PyUnicode_FromASCII() to resize the output string
1 parent fd0d3e5 commit 621ef3d

3 files changed

Lines changed: 119 additions & 68 deletions

File tree

Lib/test/test_format.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,22 @@ def test_locale(self):
307307
finally:
308308
locale.setlocale(locale.LC_ALL, oldloc)
309309

310+
@support.cpython_only
311+
def test_optimisations(self):
312+
text = "abcde" # 5 characters
313+
314+
self.assertIs("%s" % text, text)
315+
self.assertIs("%.5s" % text, text)
316+
self.assertIs("%.10s" % text, text)
317+
self.assertIs("%1s" % text, text)
318+
self.assertIs("%5s" % text, text)
319+
320+
self.assertIs("{0}".format(text), text)
321+
self.assertIs("{0:s}".format(text), text)
322+
self.assertIs("{0:.5s}".format(text), text)
323+
self.assertIs("{0:.10s}".format(text), text)
324+
self.assertIs("{0:1s}".format(text), text)
325+
self.assertIs("{0:5s}".format(text), text)
310326

311327

312328
def test_main():

Objects/unicodeobject.c

Lines changed: 101 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -13288,7 +13288,10 @@ formatlong(PyObject *val, int flags, int prec, int type)
1328813288
assert(PyLong_Check(val));
1328913289

1329013290
switch (type) {
13291+
default:
13292+
assert(!"'type' not in [diuoxX]");
1329113293
case 'd':
13294+
case 'i':
1329213295
case 'u':
1329313296
/* Special-case boolean: we want 0/1 */
1329413297
if (PyBool_Check(val))
@@ -13305,8 +13308,6 @@ formatlong(PyObject *val, int flags, int prec, int type)
1330513308
numnondigits = 2;
1330613309
result = PyNumber_ToBase(val, 16);
1330713310
break;
13308-
default:
13309-
assert(!"'type' not in [duoxX]");
1331013311
}
1331113312
if (!result)
1331213313
return NULL;
@@ -13379,15 +13380,94 @@ formatlong(PyObject *val, int flags, int prec, int type)
1337913380
if (buf[i] >= 'a' && buf[i] <= 'x')
1338013381
buf[i] -= 'a'-'A';
1338113382
}
13382-
if (!PyUnicode_Check(result) || len != PyUnicode_GET_LENGTH(result)) {
13383+
if (!PyUnicode_Check(result)
13384+
|| buf != PyUnicode_DATA(result)) {
1338313385
PyObject *unicode;
1338413386
unicode = _PyUnicode_FromASCII(buf, len);
1338513387
Py_DECREF(result);
1338613388
result = unicode;
1338713389
}
13390+
else if (len != PyUnicode_GET_LENGTH(result)) {
13391+
if (PyUnicode_Resize(&result, len) < 0)
13392+
Py_CLEAR(result);
13393+
}
1338813394
return result;
1338913395
}
1339013396

13397+
/* Format an integer.
13398+
* Return 1 if the number has been formatted into the writer,
13399+
* 0 if the number has been formatted into *p_result
13400+
* -1 and raise an exception on error */
13401+
static int
13402+
mainformatlong(_PyUnicodeWriter *writer, PyObject *v,
13403+
int c, Py_ssize_t width, int prec, int flags,
13404+
PyObject **p_result)
13405+
{
13406+
PyObject *iobj, *res;
13407+
13408+
if (!PyNumber_Check(v))
13409+
goto wrongtype;
13410+
13411+
if (!PyLong_Check(v)) {
13412+
iobj = PyNumber_Long(v);
13413+
if (iobj == NULL) {
13414+
if (PyErr_ExceptionMatches(PyExc_TypeError))
13415+
goto wrongtype;
13416+
return -1;
13417+
}
13418+
assert(PyLong_Check(iobj));
13419+
}
13420+
else {
13421+
iobj = v;
13422+
Py_INCREF(iobj);
13423+
}
13424+
13425+
if (PyLong_CheckExact(v)
13426+
&& width == -1 && prec == -1
13427+
&& !(flags & (F_SIGN | F_BLANK))
13428+
&& c != 'X')
13429+
{
13430+
/* Fast path */
13431+
int alternate = flags & F_ALT;
13432+
int base;
13433+
13434+
switch(c)
13435+
{
13436+
default:
13437+
assert(0 && "'type' not in [diuoxX]");
13438+
case 'd':
13439+
case 'i':
13440+
case 'u':
13441+
base = 10;
13442+
break;
13443+
case 'o':
13444+
base = 8;
13445+
break;
13446+
case 'x':
13447+
case 'X':
13448+
base = 16;
13449+
break;
13450+
}
13451+
13452+
if (_PyLong_FormatWriter(writer, v, base, alternate) == -1)
13453+
return -1;
13454+
return 1;
13455+
}
13456+
13457+
res = formatlong(iobj, flags, prec, c);
13458+
Py_DECREF(iobj);
13459+
if (res == NULL)
13460+
return -1;
13461+
*p_result = res;
13462+
return 0;
13463+
13464+
wrongtype:
13465+
PyErr_Format(PyExc_TypeError,
13466+
"%%%c format: a number is required, "
13467+
"not %.200s", (char)c, Py_TYPE(v)->tp_name);
13468+
return -1;
13469+
}
13470+
1339113471
static Py_UCS4
1339213472
formatchar(PyObject *v)
1339313473
{
@@ -13493,7 +13573,6 @@ PyUnicode_Format(PyObject *format, PyObject *args)
1349313573
Py_UCS4 fill;
1349413574
int sign;
1349513575
Py_UCS4 signchar;
13496-
int isnumok;
1349713576
PyObject *v = NULL;
1349813577
void *pbuf = NULL;
1349913578
Py_ssize_t pindex, len;
@@ -13692,64 +13771,18 @@ PyUnicode_Format(PyObject *format, PyObject *args)
1369213771
case 'o':
1369313772
case 'x':
1369413773
case 'X':
13695-
if (PyLong_CheckExact(v)
13696-
&& width == -1 && prec == -1
13697-
&& !(flags & (F_SIGN | F_BLANK)))
13698-
{
13699-
/* Fast path */
13700-
switch(c)
13701-
{
13702-
case 'd':
13703-
case 'i':
13704-
case 'u':
13705-
if (_PyLong_FormatWriter(&writer, v, 10, flags & F_ALT) == -1)
13706-
goto onError;
13707-
goto nextarg;
13708-
case 'x':
13709-
if (_PyLong_FormatWriter(&writer, v, 16, flags & F_ALT) == -1)
13710-
goto onError;
13711-
goto nextarg;
13712-
case 'o':
13713-
if (_PyLong_FormatWriter(&writer, v, 8, flags & F_ALT) == -1)
13714-
goto onError;
13715-
goto nextarg;
13716-
default:
13717-
break;
13718-
}
13719-
}
13720-
13721-
isnumok = 0;
13722-
if (PyNumber_Check(v)) {
13723-
PyObject *iobj=NULL;
13724-
13725-
if (PyLong_Check(v)) {
13726-
iobj = v;
13727-
Py_INCREF(iobj);
13728-
}
13729-
else {
13730-
iobj = PyNumber_Long(v);
13731-
}
13732-
if (iobj!=NULL) {
13733-
if (PyLong_Check(iobj)) {
13734-
isnumok = 1;
13735-
sign = 1;
13736-
temp = formatlong(iobj, flags, prec, (c == 'i'? 'd': c));
13737-
Py_DECREF(iobj);
13738-
}
13739-
else {
13740-
Py_DECREF(iobj);
13741-
}
13742-
}
13743-
}
13744-
if (!isnumok) {
13745-
PyErr_Format(PyExc_TypeError,
13746-
"%%%c format: a number is required, "
13747-
"not %.200s", (char)c, Py_TYPE(v)->tp_name);
13774+
{
13775+
int ret = mainformatlong(&writer, v, c, width, prec,
13776+
flags, &temp);
13777+
if (ret == 1)
13778+
goto nextarg;
13779+
if (ret == -1)
1374813780
goto onError;
13749-
}
13781+
sign = 1;
1375013782
if (flags & F_ZERO)
1375113783
fill = '0';
1375213784
break;
13785+
}
1375313786

1375413787
case 'e':
1375513788
case 'E':
@@ -13803,7 +13836,14 @@ PyUnicode_Format(PyObject *format, PyObject *args)
1380313836
goto onError;
1380413837
assert (PyUnicode_Check(temp));
1380513838

13806-
if (width == -1 && prec == -1
13839+
if (PyUnicode_READY(temp) == -1) {
13840+
Py_CLEAR(temp);
13841+
goto onError;
13842+
}
13843+
13844+
len = PyUnicode_GET_LENGTH(temp);
13845+
if ((width == -1 || width <= len)
13846+
&& (prec == -1 || prec >= len)
1380713847
&& !(flags & (F_SIGN | F_BLANK)))
1380813848
{
1380913849
/* Fast path */
@@ -13812,20 +13852,14 @@ PyUnicode_Format(PyObject *format, PyObject *args)
1381213852
goto nextarg;
1381313853
}
1381413854

13815-
if (PyUnicode_READY(temp) == -1) {
13816-
Py_CLEAR(temp);
13817-
goto onError;
13818-
}
13819-
kind = PyUnicode_KIND(temp);
13820-
pbuf = PyUnicode_DATA(temp);
13821-
len = PyUnicode_GET_LENGTH(temp);
13822-
1382313855
if (c == 's' || c == 'r' || c == 'a') {
1382413856
if (prec >= 0 && len > prec)
1382513857
len = prec;
1382613858
}
1382713859

1382813860
/* pbuf is initialized here. */
13861+
kind = PyUnicode_KIND(temp);
13862+
pbuf = PyUnicode_DATA(temp);
1382913863
pindex = 0;
1383013864
if (sign) {
1383113865
Py_UCS4 ch = PyUnicode_READ(kind, pbuf, pindex);

Python/formatter_unicode.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -757,7 +757,8 @@ format_string_internal(PyObject *value, const InternalFormatSpec *format,
757757
goto done;
758758
}
759759

760-
if (format->width == -1 && format->precision == -1) {
760+
if ((format->width == -1 || format->width <= len)
761+
&& (format->precision == -1 || format->precision >= len)) {
761762
/* Fast path */
762763
return _PyUnicodeWriter_WriteStr(writer, value);
763764
}

0 commit comments

Comments
 (0)