Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 985a82a

Browse files
committed
add unicode_char() in unicodeobject.c to factorize code
1 parent 5c86733 commit 985a82a

1 file changed

Lines changed: 31 additions & 55 deletions

File tree

Objects/unicodeobject.c

Lines changed: 31 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1749,7 +1749,6 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
17491749
}
17501750
}
17511751

1752-
17531752
static PyObject*
17541753
get_latin1_char(unsigned char ch)
17551754
{
@@ -1766,6 +1765,31 @@ get_latin1_char(unsigned char ch)
17661765
return unicode;
17671766
}
17681767

1768+
static PyObject*
1769+
unicode_char(Py_UCS4 ch)
1770+
{
1771+
PyObject *unicode;
1772+
1773+
assert(ch <= MAX_UNICODE);
1774+
1775+
unicode = PyUnicode_New(1, ch);
1776+
if (unicode == NULL)
1777+
return NULL;
1778+
switch (PyUnicode_KIND(unicode)) {
1779+
case PyUnicode_1BYTE_KIND:
1780+
PyUnicode_1BYTE_DATA(unicode)[0] = (Py_UCS1)ch;
1781+
break;
1782+
case PyUnicode_2BYTE_KIND:
1783+
PyUnicode_2BYTE_DATA(unicode)[0] = (Py_UCS2)ch;
1784+
break;
1785+
default:
1786+
assert(PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND);
1787+
PyUnicode_4BYTE_DATA(unicode)[0] = ch;
1788+
}
1789+
assert(_PyUnicode_CheckConsistency(unicode, 1));
1790+
return unicode;
1791+
}
1792+
17691793
PyObject *
17701794
PyUnicode_FromUnicode(const Py_UNICODE *u, Py_ssize_t size)
17711795
{
@@ -1964,22 +1988,8 @@ _PyUnicode_FromUCS2(const Py_UCS2 *u, Py_ssize_t size)
19641988
if (size == 0)
19651989
_Py_RETURN_UNICODE_EMPTY();
19661990
assert(size > 0);
1967-
if (size == 1) {
1968-
Py_UCS4 ch = u[0];
1969-
int kind;
1970-
void *data;
1971-
if (ch < 256)
1972-
return get_latin1_char((unsigned char)ch);
1973-
1974-
res = PyUnicode_New(1, ch);
1975-
if (res == NULL)
1976-
return NULL;
1977-
kind = PyUnicode_KIND(res);
1978-
data = PyUnicode_DATA(res);
1979-
PyUnicode_WRITE(kind, data, 0, ch);
1980-
assert(_PyUnicode_CheckConsistency(res, 1));
1981-
return res;
1982-
}
1991+
if (size == 1)
1992+
return unicode_char(u[0]);
19831993

19841994
max_char = ucs2lib_find_max_char(u, u + size);
19851995
res = PyUnicode_New(size, max_char);
@@ -2004,22 +2014,8 @@ _PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size)
20042014
if (size == 0)
20052015
_Py_RETURN_UNICODE_EMPTY();
20062016
assert(size > 0);
2007-
if (size == 1) {
2008-
Py_UCS4 ch = u[0];
2009-
int kind;
2010-
void *data;
2011-
if (ch < 256)
2012-
return get_latin1_char((unsigned char)ch);
2013-
2014-
res = PyUnicode_New(1, ch);
2015-
if (res == NULL)
2016-
return NULL;
2017-
kind = PyUnicode_KIND(res);
2018-
data = PyUnicode_DATA(res);
2019-
PyUnicode_WRITE(kind, data, 0, ch);
2020-
assert(_PyUnicode_CheckConsistency(res, 1));
2021-
return res;
2022-
}
2017+
if (size == 1)
2018+
return unicode_char(u[0]);
20232019

20242020
max_char = ucs4lib_find_max_char(u, u + size);
20252021
res = PyUnicode_New(size, max_char);
@@ -2887,17 +2883,7 @@ PyUnicode_FromOrdinal(int ordinal)
28872883
return NULL;
28882884
}
28892885

2890-
if ((Py_UCS4)ordinal < 256)
2891-
return get_latin1_char((unsigned char)ordinal);
2892-
2893-
v = PyUnicode_New(1, ordinal);
2894-
if (v == NULL)
2895-
return NULL;
2896-
kind = PyUnicode_KIND(v);
2897-
data = PyUnicode_DATA(v);
2898-
PyUnicode_WRITE(kind, data, 0, ordinal);
2899-
assert(_PyUnicode_CheckConsistency(v, 1));
2900-
return v;
2886+
return unicode_char((Py_UCS4)ordinal);
29012887
}
29022888

29032889
PyObject *
@@ -11354,17 +11340,7 @@ unicode_getitem(PyObject *self, Py_ssize_t index)
1135411340
kind = PyUnicode_KIND(self);
1135511341
data = PyUnicode_DATA(self);
1135611342
ch = PyUnicode_READ(kind, data, index);
11357-
if (ch < 256)
11358-
return get_latin1_char(ch);
11359-
11360-
res = PyUnicode_New(1, ch);
11361-
if (res == NULL)
11362-
return NULL;
11363-
kind = PyUnicode_KIND(res);
11364-
data = PyUnicode_DATA(res);
11365-
PyUnicode_WRITE(kind, data, 0, ch);
11366-
assert(_PyUnicode_CheckConsistency(res, 1));
11367-
return res;
11343+
return unicode_char(ch);
1136811344
}
1136911345

1137011346
/* Believe it or not, this produces the same value for ASCII strings

0 commit comments

Comments
 (0)