Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit c516610

Browse files
committed
Optimize str%arg for number formats: %i, %d, %u, %x, %p
Write a specialized function to write an ASCII/latin1 C char* string into a Python Unicode string.
1 parent 99d7ad0 commit c516610

1 file changed

Lines changed: 52 additions & 4 deletions

File tree

Objects/unicodeobject.c

Lines changed: 52 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1637,6 +1637,51 @@ unicode_putchar(PyObject **p_unicode, Py_ssize_t *pos,
16371637
return 0;
16381638
}
16391639

1640+
/* Copy a ASCII or latin1 char* string into a Python Unicode string.
1641+
Return the length of the input string.
1642+
1643+
WARNING: Don't copy the terminating null character and don't check the
1644+
maximum character (may write a latin1 character in an ASCII string). */
1645+
static Py_ssize_t
1646+
unicode_write_cstr(PyObject *unicode, Py_ssize_t index, const char *str)
1647+
{
1648+
enum PyUnicode_Kind kind = PyUnicode_KIND(unicode);
1649+
void *data = PyUnicode_DATA(unicode);
1650+
1651+
switch (kind) {
1652+
case PyUnicode_1BYTE_KIND: {
1653+
Py_ssize_t len = strlen(str);
1654+
assert(index + len <= PyUnicode_GET_LENGTH(unicode));
1655+
memcpy(data + index, str, len);
1656+
return len;
1657+
}
1658+
case PyUnicode_2BYTE_KIND: {
1659+
Py_UCS2 *start = (Py_UCS2 *)data + index;
1660+
Py_UCS2 *ucs2 = start;
1661+
assert(index <= PyUnicode_GET_LENGTH(unicode));
1662+
1663+
for (; *str; ++ucs2, ++str)
1664+
*ucs2 = (Py_UCS2)*str;
1665+
1666+
assert((ucs2 - start) <= PyUnicode_GET_LENGTH(unicode));
1667+
return ucs2 - start;
1668+
}
1669+
default: {
1670+
Py_UCS4 *start = (Py_UCS4 *)data + index;
1671+
Py_UCS4 *ucs4 = start;
1672+
assert(kind == PyUnicode_4BYTE_KIND);
1673+
assert(index <= PyUnicode_GET_LENGTH(unicode));
1674+
1675+
for (; *str; ++ucs4, ++str)
1676+
*ucs4 = (Py_UCS4)*str;
1677+
1678+
assert((ucs4 - start) <= PyUnicode_GET_LENGTH(unicode));
1679+
return ucs4 - start;
1680+
}
1681+
}
1682+
}
1683+
1684+
16401685
static PyObject*
16411686
get_latin1_char(unsigned char ch)
16421687
{
@@ -2590,19 +2635,23 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
25902635
case 'u':
25912636
case 'x':
25922637
case 'p':
2638+
{
2639+
Py_ssize_t written;
25932640
/* unused, since we already have the result */
25942641
if (*f == 'p')
25952642
(void) va_arg(vargs, void *);
25962643
else
25972644
(void) va_arg(vargs, int);
25982645
/* extract the result from numberresults and append. */
2599-
for (; *numberresult; ++i, ++numberresult)
2600-
PyUnicode_WRITE(kind, data, i, *numberresult);
2646+
written = unicode_write_cstr(string, i, numberresult);
26012647
/* skip over the separating '\0' */
2648+
i += written;
2649+
numberresult += written;
26022650
assert(*numberresult == '\0');
26032651
numberresult++;
26042652
assert(numberresult <= numberresults + numbersize);
26052653
break;
2654+
}
26062655
case 's':
26072656
{
26082657
/* unused, since we already have the result */
@@ -2669,8 +2718,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
26692718
PyUnicode_WRITE(kind, data, i++, '%');
26702719
break;
26712720
default:
2672-
for (; *p; ++p, ++i)
2673-
PyUnicode_WRITE(kind, data, i, *p);
2721+
i += unicode_write_cstr(string, i, p);
26742722
assert(i == PyUnicode_GET_LENGTH(string));
26752723
goto end;
26762724
}

0 commit comments

Comments
 (0)