@@ -174,25 +174,24 @@ BB_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
174174static PyObject *
175175u_getitem (arrayobject * ap , Py_ssize_t i )
176176{
177- return PyUnicode_FromOrdinal ((( Py_UCS4 * ) ap -> ob_item )[i ]);
177+ return PyUnicode_FromUnicode ( & (( Py_UNICODE * ) ap -> ob_item )[i ], 1 );
178178}
179179
180180static int
181181u_setitem (arrayobject * ap , Py_ssize_t i , PyObject * v )
182182{
183- PyObject * p ;
183+ Py_UNICODE * p ;
184+ Py_ssize_t len ;
184185
185- if (!PyArg_Parse (v , "U;array item must be unicode character" , & p ))
186- return -1 ;
187- if (PyUnicode_READY (p ))
186+ if (!PyArg_Parse (v , "u#;array item must be unicode character" , & p , & len ))
188187 return -1 ;
189- if (PyUnicode_GET_LENGTH ( p ) != 1 ) {
188+ if (len != 1 ) {
190189 PyErr_SetString (PyExc_TypeError ,
191190 "array item must be unicode character" );
192191 return -1 ;
193192 }
194193 if (i >= 0 )
195- ((Py_UCS4 * )ap -> ob_item )[i ] = PyUnicode_READ_CHAR ( p , 0 ) ;
194+ ((Py_UNICODE * )ap -> ob_item )[i ] = p [ 0 ] ;
196195 return 0 ;
197196}
198197
@@ -444,13 +443,6 @@ d_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
444443 return 0 ;
445444}
446445
447- #if SIZEOF_INT == 4
448- # define STRUCT_LONG_FORMAT "I"
449- #elif SIZEOF_LONG == 4
450- # define STRUCT_LONG_FORMAT "L"
451- #else
452- # error "Unable to get struct format for Py_UCS4"
453- #endif
454446
455447/* Description of types.
456448 *
@@ -460,7 +452,7 @@ d_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
460452static struct arraydescr descriptors [] = {
461453 {'b' , 1 , b_getitem , b_setitem , "b" , 1 , 1 },
462454 {'B' , 1 , BB_getitem , BB_setitem , "B" , 1 , 0 },
463- {'u' , sizeof (Py_UCS4 ), u_getitem , u_setitem , STRUCT_LONG_FORMAT , 0 , 0 },
455+ {'u' , sizeof (Py_UNICODE ), u_getitem , u_setitem , "u" , 0 , 0 },
464456 {'h' , sizeof (short ), h_getitem , h_setitem , "h" , 1 , 1 },
465457 {'H' , sizeof (short ), HH_getitem , HH_setitem , "H" , 1 , 0 },
466458 {'i' , sizeof (int ), i_getitem , i_setitem , "i" , 1 , 1 },
@@ -1519,26 +1511,25 @@ This method is deprecated. Use tobytes instead.");
15191511static PyObject *
15201512array_fromunicode (arrayobject * self , PyObject * args )
15211513{
1522- PyObject * ustr ;
1514+ Py_UNICODE * ustr ;
15231515 Py_ssize_t n ;
1516+ char typecode ;
15241517
1525- if (!PyArg_ParseTuple (args , "U :fromunicode" , & ustr ))
1518+ if (!PyArg_ParseTuple (args , "u# :fromunicode" , & ustr , & n ))
15261519 return NULL ;
1527- if (self -> ob_descr -> typecode != 'u' ) {
1520+ typecode = self -> ob_descr -> typecode ;
1521+ if ((typecode != 'u' )) {
15281522 PyErr_SetString (PyExc_ValueError ,
15291523 "fromunicode() may only be called on "
15301524 "unicode type arrays" );
15311525 return NULL ;
15321526 }
1533- if (PyUnicode_READY (ustr ))
1534- return NULL ;
1535- n = PyUnicode_GET_LENGTH (ustr );
15361527 if (n > 0 ) {
15371528 Py_ssize_t old_size = Py_SIZE (self );
15381529 if (array_resize (self , old_size + n ) == -1 )
15391530 return NULL ;
1540- if (! PyUnicode_AsUCS4 ( ustr , ( Py_UCS4 * ) self -> ob_item + old_size , n , 0 ))
1541- return NULL ;
1531+ memcpy ( self -> ob_item + old_size * sizeof ( Py_UNICODE ),
1532+ ustr , n * sizeof ( Py_UNICODE )) ;
15421533 }
15431534
15441535 Py_INCREF (Py_None );
@@ -1557,14 +1548,14 @@ append Unicode data to an array of some other type.");
15571548static PyObject *
15581549array_tounicode (arrayobject * self , PyObject * unused )
15591550{
1560- if (self -> ob_descr -> typecode != 'u' ) {
1551+ char typecode ;
1552+ typecode = self -> ob_descr -> typecode ;
1553+ if ((typecode != 'u' )) {
15611554 PyErr_SetString (PyExc_ValueError ,
15621555 "tounicode() may only be called on unicode type arrays" );
15631556 return NULL ;
15641557 }
1565- return PyUnicode_FromKindAndData (PyUnicode_4BYTE_KIND ,
1566- (Py_UCS4 * ) self -> ob_item ,
1567- Py_SIZE (self ));
1558+ return PyUnicode_FromUnicode ((Py_UNICODE * ) self -> ob_item , Py_SIZE (self ));
15681559}
15691560
15701561PyDoc_STRVAR (tounicode_doc ,
@@ -1671,7 +1662,13 @@ typecode_to_mformat_code(char typecode)
16711662 return UNSIGNED_INT8 ;
16721663
16731664 case 'u' :
1674- return UTF32_LE + is_big_endian ;
1665+ if (sizeof (Py_UNICODE ) == 2 ) {
1666+ return UTF16_LE + is_big_endian ;
1667+ }
1668+ if (sizeof (Py_UNICODE ) == 4 ) {
1669+ return UTF32_LE + is_big_endian ;
1670+ }
1671+ return UNKNOWN_FORMAT ;
16751672
16761673 case 'f' :
16771674 if (sizeof (float ) == 4 ) {
@@ -2419,8 +2416,14 @@ array_buffer_getbuf(arrayobject *self, Py_buffer *view, int flags)
24192416 view -> strides = & (view -> itemsize );
24202417 view -> format = NULL ;
24212418 view -> internal = NULL ;
2422- if ((flags & PyBUF_FORMAT ) == PyBUF_FORMAT )
2419+ if ((flags & PyBUF_FORMAT ) == PyBUF_FORMAT ) {
24232420 view -> format = self -> ob_descr -> formats ;
2421+ #ifdef Py_UNICODE_WIDE
2422+ if (self -> ob_descr -> typecode == 'u' ) {
2423+ view -> format = "w" ;
2424+ }
2425+ #endif
2426+ }
24242427
24252428 finish :
24262429 self -> ob_exports ++ ;
@@ -2534,25 +2537,29 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
25342537 Py_DECREF (v );
25352538 }
25362539 else if (initial != NULL && PyUnicode_Check (initial )) {
2540+ Py_UNICODE * ustr ;
25372541 Py_ssize_t n ;
2538- if (PyUnicode_READY (initial )) {
2542+
2543+ ustr = PyUnicode_AsUnicode (initial );
2544+ if (ustr == NULL ) {
2545+ PyErr_NoMemory ();
25392546 Py_DECREF (a );
25402547 return NULL ;
25412548 }
2542- n = PyUnicode_GET_LENGTH (initial );
2549+
2550+ n = PyUnicode_GET_DATA_SIZE (initial );
25432551 if (n > 0 ) {
25442552 arrayobject * self = (arrayobject * )a ;
2545- Py_UCS4 * item = ( Py_UCS4 * ) self -> ob_item ;
2546- item = (Py_UCS4 * )PyMem_Realloc (item , n * sizeof ( Py_UCS4 ) );
2553+ char * item = self -> ob_item ;
2554+ item = (char * )PyMem_Realloc (item , n );
25472555 if (item == NULL ) {
25482556 PyErr_NoMemory ();
25492557 Py_DECREF (a );
25502558 return NULL ;
25512559 }
2552- self -> ob_item = (char * )item ;
2553- Py_SIZE (self ) = n ;
2554- if (!PyUnicode_AsUCS4 (initial , item , n , 0 ))
2555- return NULL ;
2560+ self -> ob_item = item ;
2561+ Py_SIZE (self ) = n / sizeof (Py_UNICODE );
2562+ memcpy (item , ustr , n );
25562563 self -> allocated = Py_SIZE (self );
25572564 }
25582565 }
@@ -2593,7 +2600,7 @@ is a single character. The following type codes are defined:\n\
25932600 Type code C Type Minimum size in bytes \n\
25942601 'b' signed integer 1 \n\
25952602 'B' unsigned integer 1 \n\
2596- 'u' Unicode character 4 \n\
2603+ 'u' Unicode character 2 (see note) \n\
25972604 'h' signed integer 2 \n\
25982605 'H' unsigned integer 2 \n\
25992606 'i' signed integer 2 \n\
@@ -2605,6 +2612,9 @@ is a single character. The following type codes are defined:\n\
26052612 'f' floating point 4 \n\
26062613 'd' floating point 8 \n\
26072614\n\
2615+ NOTE: The 'u' typecode corresponds to Python's unicode character. On \n\
2616+ narrow builds this is 2-bytes on wide builds this is 4-bytes.\n\
2617+ \n\
26082618NOTE: The 'q' and 'Q' type codes are only available if the platform \n\
26092619C compiler used to build Python supports 'long long', or, on Windows, \n\
26102620'__int64'.\n\
0 commit comments