Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 62bb394

Browse files
committed
Close #13072: Restore code before the PEP 393 for the array module
'u' format of the array module uses again Py_UNICODE type for backward compatibility with Python 3.2. The only change from Python 3.2 is that PyUnicode_AsUnicode() result is now checked for NULL value.
1 parent 3af2617 commit 62bb394

2 files changed

Lines changed: 56 additions & 41 deletions

File tree

Doc/library/array.rst

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ defined:
2121
+-----------+--------------------+-------------------+-----------------------+-------+
2222
| ``'B'`` | unsigned char | int | 1 | |
2323
+-----------+--------------------+-------------------+-----------------------+-------+
24-
| ``'u'`` | Py_UCS4 | Unicode character | 4 | |
24+
| ``'u'`` | Py_UNICODE | Unicode character | 2 | \(1) |
2525
+-----------+--------------------+-------------------+-----------------------+-------+
2626
| ``'h'`` | signed short | int | 2 | |
2727
+-----------+--------------------+-------------------+-----------------------+-------+
@@ -35,9 +35,9 @@ defined:
3535
+-----------+--------------------+-------------------+-----------------------+-------+
3636
| ``'L'`` | unsigned long | int | 4 | |
3737
+-----------+--------------------+-------------------+-----------------------+-------+
38-
| ``'q'`` | signed long long | int | 8 | \(1) |
38+
| ``'q'`` | signed long long | int | 8 | \(2) |
3939
+-----------+--------------------+-------------------+-----------------------+-------+
40-
| ``'Q'`` | unsigned long long | int | 8 | \(1) |
40+
| ``'Q'`` | unsigned long long | int | 8 | \(2) |
4141
+-----------+--------------------+-------------------+-----------------------+-------+
4242
| ``'f'`` | float | float | 4 | |
4343
+-----------+--------------------+-------------------+-----------------------+-------+
@@ -47,6 +47,11 @@ defined:
4747
Notes:
4848

4949
(1)
50+
The ``'u'`` type code corresponds to Python's unicode character
51+
(:c:type:`Py_UNICODE` which is :c:type:`wchar_t`). Depending on the
52+
platform, it can be 16 bits or 32 bits.
53+
54+
(2)
5055
The ``'q'`` and ``'Q'`` type codes are available only if
5156
the platform C compiler used to build Python supports C :c:type:`long long`,
5257
or, on Windows, :c:type:`__int64`.

Modules/arraymodule.c

Lines changed: 48 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -174,25 +174,24 @@ BB_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
174174
static PyObject *
175175
u_getitem(arrayobject *ap, Py_ssize_t i)
176176
{
177-
return PyUnicode_FromOrdinal(((Py_UCS4 *) ap->ob_item)[i]);
177+
return PyUnicode_FromUnicode(&((Py_UNICODE *) ap->ob_item)[i], 1);
178178
}
179179

180180
static int
181181
u_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
182182
{
183-
PyObject *p;
183+
Py_UNICODE *p;
184+
Py_ssize_t len;
184185

185-
if (!PyArg_Parse(v, "U;array item must be unicode character", &p))
186-
return -1;
187-
if (PyUnicode_READY(p))
186+
if (!PyArg_Parse(v, "u#;array item must be unicode character", &p, &len))
188187
return -1;
189-
if (PyUnicode_GET_LENGTH(p) != 1) {
188+
if (len != 1) {
190189
PyErr_SetString(PyExc_TypeError,
191190
"array item must be unicode character");
192191
return -1;
193192
}
194193
if (i >= 0)
195-
((Py_UCS4 *)ap->ob_item)[i] = PyUnicode_READ_CHAR(p, 0);
194+
((Py_UNICODE *)ap->ob_item)[i] = p[0];
196195
return 0;
197196
}
198197

@@ -444,13 +443,6 @@ d_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
444443
return 0;
445444
}
446445

447-
#if SIZEOF_INT == 4
448-
# define STRUCT_LONG_FORMAT "I"
449-
#elif SIZEOF_LONG == 4
450-
# define STRUCT_LONG_FORMAT "L"
451-
#else
452-
# error "Unable to get struct format for Py_UCS4"
453-
#endif
454446

455447
/* Description of types.
456448
*
@@ -460,7 +452,7 @@ d_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
460452
static struct arraydescr descriptors[] = {
461453
{'b', 1, b_getitem, b_setitem, "b", 1, 1},
462454
{'B', 1, BB_getitem, BB_setitem, "B", 1, 0},
463-
{'u', sizeof(Py_UCS4), u_getitem, u_setitem, STRUCT_LONG_FORMAT, 0, 0},
455+
{'u', sizeof(Py_UNICODE), u_getitem, u_setitem, "u", 0, 0},
464456
{'h', sizeof(short), h_getitem, h_setitem, "h", 1, 1},
465457
{'H', sizeof(short), HH_getitem, HH_setitem, "H", 1, 0},
466458
{'i', sizeof(int), i_getitem, i_setitem, "i", 1, 1},
@@ -1519,26 +1511,25 @@ This method is deprecated. Use tobytes instead.");
15191511
static PyObject *
15201512
array_fromunicode(arrayobject *self, PyObject *args)
15211513
{
1522-
PyObject *ustr;
1514+
Py_UNICODE *ustr;
15231515
Py_ssize_t n;
1516+
char typecode;
15241517

1525-
if (!PyArg_ParseTuple(args, "U:fromunicode", &ustr))
1518+
if (!PyArg_ParseTuple(args, "u#:fromunicode", &ustr, &n))
15261519
return NULL;
1527-
if (self->ob_descr->typecode != 'u') {
1520+
typecode = self->ob_descr->typecode;
1521+
if ((typecode != 'u')) {
15281522
PyErr_SetString(PyExc_ValueError,
15291523
"fromunicode() may only be called on "
15301524
"unicode type arrays");
15311525
return NULL;
15321526
}
1533-
if (PyUnicode_READY(ustr))
1534-
return NULL;
1535-
n = PyUnicode_GET_LENGTH(ustr);
15361527
if (n > 0) {
15371528
Py_ssize_t old_size = Py_SIZE(self);
15381529
if (array_resize(self, old_size + n) == -1)
15391530
return NULL;
1540-
if (!PyUnicode_AsUCS4(ustr, (Py_UCS4 *)self->ob_item + old_size, n, 0))
1541-
return NULL;
1531+
memcpy(self->ob_item + old_size * sizeof(Py_UNICODE),
1532+
ustr, n * sizeof(Py_UNICODE));
15421533
}
15431534

15441535
Py_INCREF(Py_None);
@@ -1557,14 +1548,14 @@ append Unicode data to an array of some other type.");
15571548
static PyObject *
15581549
array_tounicode(arrayobject *self, PyObject *unused)
15591550
{
1560-
if (self->ob_descr->typecode != 'u') {
1551+
char typecode;
1552+
typecode = self->ob_descr->typecode;
1553+
if ((typecode != 'u')) {
15611554
PyErr_SetString(PyExc_ValueError,
15621555
"tounicode() may only be called on unicode type arrays");
15631556
return NULL;
15641557
}
1565-
return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1566-
(Py_UCS4 *) self->ob_item,
1567-
Py_SIZE(self));
1558+
return PyUnicode_FromUnicode((Py_UNICODE *) self->ob_item, Py_SIZE(self));
15681559
}
15691560

15701561
PyDoc_STRVAR(tounicode_doc,
@@ -1671,7 +1662,13 @@ typecode_to_mformat_code(char typecode)
16711662
return UNSIGNED_INT8;
16721663

16731664
case 'u':
1674-
return UTF32_LE + is_big_endian;
1665+
if (sizeof(Py_UNICODE) == 2) {
1666+
return UTF16_LE + is_big_endian;
1667+
}
1668+
if (sizeof(Py_UNICODE) == 4) {
1669+
return UTF32_LE + is_big_endian;
1670+
}
1671+
return UNKNOWN_FORMAT;
16751672

16761673
case 'f':
16771674
if (sizeof(float) == 4) {
@@ -2419,8 +2416,14 @@ array_buffer_getbuf(arrayobject *self, Py_buffer *view, int flags)
24192416
view->strides = &(view->itemsize);
24202417
view->format = NULL;
24212418
view->internal = NULL;
2422-
if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT)
2419+
if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT) {
24232420
view->format = self->ob_descr->formats;
2421+
#ifdef Py_UNICODE_WIDE
2422+
if (self->ob_descr->typecode == 'u') {
2423+
view->format = "w";
2424+
}
2425+
#endif
2426+
}
24242427

24252428
finish:
24262429
self->ob_exports++;
@@ -2534,25 +2537,29 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
25342537
Py_DECREF(v);
25352538
}
25362539
else if (initial != NULL && PyUnicode_Check(initial)) {
2540+
Py_UNICODE *ustr;
25372541
Py_ssize_t n;
2538-
if (PyUnicode_READY(initial)) {
2542+
2543+
ustr = PyUnicode_AsUnicode(initial);
2544+
if (ustr == NULL) {
2545+
PyErr_NoMemory();
25392546
Py_DECREF(a);
25402547
return NULL;
25412548
}
2542-
n = PyUnicode_GET_LENGTH(initial);
2549+
2550+
n = PyUnicode_GET_DATA_SIZE(initial);
25432551
if (n > 0) {
25442552
arrayobject *self = (arrayobject *)a;
2545-
Py_UCS4 *item = (Py_UCS4 *)self->ob_item;
2546-
item = (Py_UCS4 *)PyMem_Realloc(item, n * sizeof(Py_UCS4));
2553+
char *item = self->ob_item;
2554+
item = (char *)PyMem_Realloc(item, n);
25472555
if (item == NULL) {
25482556
PyErr_NoMemory();
25492557
Py_DECREF(a);
25502558
return NULL;
25512559
}
2552-
self->ob_item = (char*)item;
2553-
Py_SIZE(self) = n;
2554-
if (!PyUnicode_AsUCS4(initial, item, n, 0))
2555-
return NULL;
2560+
self->ob_item = item;
2561+
Py_SIZE(self) = n / sizeof(Py_UNICODE);
2562+
memcpy(item, ustr, n);
25562563
self->allocated = Py_SIZE(self);
25572564
}
25582565
}
@@ -2593,7 +2600,7 @@ is a single character. The following type codes are defined:\n\
25932600
Type code C Type Minimum size in bytes \n\
25942601
'b' signed integer 1 \n\
25952602
'B' unsigned integer 1 \n\
2596-
'u' Unicode character 4 \n\
2603+
'u' Unicode character 2 (see note) \n\
25972604
'h' signed integer 2 \n\
25982605
'H' unsigned integer 2 \n\
25992606
'i' signed integer 2 \n\
@@ -2605,6 +2612,9 @@ is a single character. The following type codes are defined:\n\
26052612
'f' floating point 4 \n\
26062613
'd' floating point 8 \n\
26072614
\n\
2615+
NOTE: The 'u' typecode corresponds to Python's unicode character. On \n\
2616+
narrow builds this is 2-bytes on wide builds this is 4-bytes.\n\
2617+
\n\
26082618
NOTE: The 'q' and 'Q' type codes are only available if the platform \n\
26092619
C compiler used to build Python supports 'long long', or, on Windows, \n\
26102620
'__int64'.\n\

0 commit comments

Comments
 (0)