@@ -1749,7 +1749,6 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
17491749 }
17501750}
17511751
1752-
17531752static PyObject *
17541753get_latin1_char (unsigned char ch )
17551754{
@@ -1766,6 +1765,31 @@ get_latin1_char(unsigned char ch)
17661765 return unicode ;
17671766}
17681767
1768+ static PyObject *
1769+ unicode_char (Py_UCS4 ch )
1770+ {
1771+ PyObject * unicode ;
1772+
1773+ assert (ch <= MAX_UNICODE );
1774+
1775+ unicode = PyUnicode_New (1 , ch );
1776+ if (unicode == NULL )
1777+ return NULL ;
1778+ switch (PyUnicode_KIND (unicode )) {
1779+ case PyUnicode_1BYTE_KIND :
1780+ PyUnicode_1BYTE_DATA (unicode )[0 ] = (Py_UCS1 )ch ;
1781+ break ;
1782+ case PyUnicode_2BYTE_KIND :
1783+ PyUnicode_2BYTE_DATA (unicode )[0 ] = (Py_UCS2 )ch ;
1784+ break ;
1785+ default :
1786+ assert (PyUnicode_KIND (unicode ) == PyUnicode_4BYTE_KIND );
1787+ PyUnicode_4BYTE_DATA (unicode )[0 ] = ch ;
1788+ }
1789+ assert (_PyUnicode_CheckConsistency (unicode , 1 ));
1790+ return unicode ;
1791+ }
1792+
17691793PyObject *
17701794PyUnicode_FromUnicode (const Py_UNICODE * u , Py_ssize_t size )
17711795{
@@ -1964,22 +1988,8 @@ _PyUnicode_FromUCS2(const Py_UCS2 *u, Py_ssize_t size)
19641988 if (size == 0 )
19651989 _Py_RETURN_UNICODE_EMPTY ();
19661990 assert (size > 0 );
1967- if (size == 1 ) {
1968- Py_UCS4 ch = u [0 ];
1969- int kind ;
1970- void * data ;
1971- if (ch < 256 )
1972- return get_latin1_char ((unsigned char )ch );
1973-
1974- res = PyUnicode_New (1 , ch );
1975- if (res == NULL )
1976- return NULL ;
1977- kind = PyUnicode_KIND (res );
1978- data = PyUnicode_DATA (res );
1979- PyUnicode_WRITE (kind , data , 0 , ch );
1980- assert (_PyUnicode_CheckConsistency (res , 1 ));
1981- return res ;
1982- }
1991+ if (size == 1 )
1992+ return unicode_char (u [0 ]);
19831993
19841994 max_char = ucs2lib_find_max_char (u , u + size );
19851995 res = PyUnicode_New (size , max_char );
@@ -2004,22 +2014,8 @@ _PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size)
20042014 if (size == 0 )
20052015 _Py_RETURN_UNICODE_EMPTY ();
20062016 assert (size > 0 );
2007- if (size == 1 ) {
2008- Py_UCS4 ch = u [0 ];
2009- int kind ;
2010- void * data ;
2011- if (ch < 256 )
2012- return get_latin1_char ((unsigned char )ch );
2013-
2014- res = PyUnicode_New (1 , ch );
2015- if (res == NULL )
2016- return NULL ;
2017- kind = PyUnicode_KIND (res );
2018- data = PyUnicode_DATA (res );
2019- PyUnicode_WRITE (kind , data , 0 , ch );
2020- assert (_PyUnicode_CheckConsistency (res , 1 ));
2021- return res ;
2022- }
2017+ if (size == 1 )
2018+ return unicode_char (u [0 ]);
20232019
20242020 max_char = ucs4lib_find_max_char (u , u + size );
20252021 res = PyUnicode_New (size , max_char );
@@ -2887,17 +2883,7 @@ PyUnicode_FromOrdinal(int ordinal)
28872883 return NULL ;
28882884 }
28892885
2890- if ((Py_UCS4 )ordinal < 256 )
2891- return get_latin1_char ((unsigned char )ordinal );
2892-
2893- v = PyUnicode_New (1 , ordinal );
2894- if (v == NULL )
2895- return NULL ;
2896- kind = PyUnicode_KIND (v );
2897- data = PyUnicode_DATA (v );
2898- PyUnicode_WRITE (kind , data , 0 , ordinal );
2899- assert (_PyUnicode_CheckConsistency (v , 1 ));
2900- return v ;
2886+ return unicode_char ((Py_UCS4 )ordinal );
29012887}
29022888
29032889PyObject *
@@ -11354,17 +11340,7 @@ unicode_getitem(PyObject *self, Py_ssize_t index)
1135411340 kind = PyUnicode_KIND (self );
1135511341 data = PyUnicode_DATA (self );
1135611342 ch = PyUnicode_READ (kind , data , index );
11357- if (ch < 256 )
11358- return get_latin1_char (ch );
11359-
11360- res = PyUnicode_New (1 , ch );
11361- if (res == NULL )
11362- return NULL ;
11363- kind = PyUnicode_KIND (res );
11364- data = PyUnicode_DATA (res );
11365- PyUnicode_WRITE (kind , data , 0 , ch );
11366- assert (_PyUnicode_CheckConsistency (res , 1 ));
11367- return res ;
11343+ return unicode_char (ch );
1136811344}
1136911345
1137011346/* Believe it or not, this produces the same value for ASCII strings
0 commit comments