Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit da3dc5b

Browse files
committed
Patch #416953: Cache ASCII characters to speed up ASCII decoding.
1 parent f384832 commit da3dc5b

1 file changed

Lines changed: 33 additions & 0 deletions

File tree

Objects/unicodeobject.c

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ static PyUnicodeObject *unicode_empty;
9090
static PyUnicodeObject *unicode_freelist;
9191
static int unicode_freelist_size;
9292

93+
static PyUnicodeObject *unicode_ascii[128];
94+
9395
/* Default encoding to use and assume when NULL is passed as encoding
9496
parameter; it is initialized by _PyUnicode_Init().
9597
@@ -251,6 +253,19 @@ PyObject *PyUnicode_FromUnicode(const Py_UNICODE *u,
251253
{
252254
PyUnicodeObject *unicode;
253255

256+
if (size == 1 && *u < 128) {
257+
unicode = unicode_ascii[*u];
258+
if (!unicode) {
259+
unicode = _PyUnicode_New(1);
260+
unicode->str[0] = *u;
261+
if (!unicode)
262+
return NULL;
263+
unicode_ascii[*u] = unicode;
264+
}
265+
Py_INCREF(unicode);
266+
return (PyObject*)unicode;
267+
}
268+
254269
unicode = _PyUnicode_New(size);
255270
if (!unicode)
256271
return NULL;
@@ -1655,6 +1670,11 @@ PyObject *PyUnicode_DecodeASCII(const char *s,
16551670
{
16561671
PyUnicodeObject *v;
16571672
Py_UNICODE *p;
1673+
1674+
if (size == 1 && *(unsigned char*)s < 128) {
1675+
Py_UNICODE r = *(unsigned char*)s;
1676+
return PyUnicode_FromUnicode(&r, 1);
1677+
}
16581678

16591679
/* ASCII is equivalent to the first 128 ordinals in Unicode. */
16601680
v = _PyUnicode_New(size);
@@ -5189,6 +5209,8 @@ PyTypeObject PyUnicode_Type = {
51895209

51905210
void _PyUnicode_Init(void)
51915211
{
5212+
int i;
5213+
51925214
/* Doublecheck the configuration... */
51935215
if (sizeof(Py_UNICODE) != 2)
51945216
Py_FatalError("Unicode configuration error: "
@@ -5199,6 +5221,9 @@ void _PyUnicode_Init(void)
51995221
unicode_freelist_size = 0;
52005222
unicode_empty = _PyUnicode_New(0);
52015223
strcpy(unicode_default_encoding, "ascii");
5224+
5225+
for (i = 0; i < 128; i++)
5226+
unicode_ascii[i] = NULL;
52025227
}
52035228

52045229
/* Finalize the Unicode implementation */
@@ -5207,10 +5232,18 @@ void
52075232
_PyUnicode_Fini(void)
52085233
{
52095234
PyUnicodeObject *u;
5235+
int i;
52105236

52115237
Py_XDECREF(unicode_empty);
52125238
unicode_empty = NULL;
52135239

5240+
for (i = 0; i < 128; i++) {
5241+
if (unicode_ascii[i]) {
5242+
Py_DECREF(unicode_ascii[i]);
5243+
unicode_ascii[i] = NULL;
5244+
}
5245+
}
5246+
52145247
for (u = unicode_freelist; u != NULL;) {
52155248
PyUnicodeObject *v = u;
52165249
u = *(PyUnicodeObject **)u;

0 commit comments

Comments
 (0)