Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 21d9f10

Browse files
Merge from 3.5.
2 parents c6f9b2b + 9c0e1f8 commit 21d9f10

5 files changed

Lines changed: 94 additions & 7 deletions

File tree

Doc/c-api/unicode.rst

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -572,13 +572,16 @@ APIs:
572572
.. versionadded:: 3.3
573573
574574
575-
.. c:function:: int PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start, \
576-
PyObject *from, Py_ssize_t from_start, Py_ssize_t how_many)
575+
.. c:function:: Py_ssize_t PyUnicode_CopyCharacters(PyObject *to, \
576+
Py_ssize_t to_start, \
577+
PyObject *from, \
578+
Py_ssize_t from_start, \
579+
Py_ssize_t how_many)
577580
578581
Copy characters from one Unicode object into another. This function performs
579582
character conversion when necessary and falls back to :c:func:`memcpy` if
580583
possible. Returns ``-1`` and sets an exception on error, otherwise returns
581-
``0``.
584+
the number of copied characters.
582585
583586
.. versionadded:: 3.3
584587

Lib/test/test_unicode.py

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
55
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
66
7-
"""#"
7+
"""
88
import _string
99
import codecs
1010
import itertools
@@ -2735,6 +2735,49 @@ def test_asucs4(self):
27352735
self.assertEqual(unicode_asucs4(s, len(s), 1), s+'\0')
27362736
self.assertEqual(unicode_asucs4(s, len(s), 0), s+'\uffff')
27372737

2738+
# Test PyUnicode_CopyCharacters()
2739+
@support.cpython_only
2740+
def test_copycharacters(self):
2741+
from _testcapi import unicode_copycharacters
2742+
2743+
strings = [
2744+
'abcde', '\xa1\xa2\xa3\xa4\xa5',
2745+
'\u4f60\u597d\u4e16\u754c\uff01',
2746+
'\U0001f600\U0001f601\U0001f602\U0001f603\U0001f604'
2747+
]
2748+
2749+
for idx, from_ in enumerate(strings):
2750+
# wide -> narrow: exceed maxchar limitation
2751+
for to in strings[:idx]:
2752+
self.assertRaises(
2753+
SystemError,
2754+
unicode_copycharacters, to, 0, from_, 0, 5
2755+
)
2756+
# same kind
2757+
for from_start in range(5):
2758+
self.assertEqual(
2759+
unicode_copycharacters(from_, 0, from_, from_start, 5),
2760+
(from_[from_start:from_start+5].ljust(5, '\0'),
2761+
5-from_start)
2762+
)
2763+
for to_start in range(5):
2764+
self.assertEqual(
2765+
unicode_copycharacters(from_, to_start, from_, to_start, 5),
2766+
(from_[to_start:to_start+5].rjust(5, '\0'),
2767+
5-to_start)
2768+
)
2769+
# narrow -> wide
2770+
# Tests omitted since this creates invalid strings.
2771+
2772+
s = strings[0]
2773+
self.assertRaises(IndexError, unicode_copycharacters, s, 6, s, 0, 5)
2774+
self.assertRaises(IndexError, unicode_copycharacters, s, -1, s, 0, 5)
2775+
self.assertRaises(IndexError, unicode_copycharacters, s, 0, s, 6, 5)
2776+
self.assertRaises(IndexError, unicode_copycharacters, s, 0, s, -1, 5)
2777+
self.assertRaises(SystemError, unicode_copycharacters, s, 1, s, 0, 5)
2778+
self.assertRaises(SystemError, unicode_copycharacters, s, 0, s, 0, -1)
2779+
self.assertRaises(SystemError, unicode_copycharacters, s, 0, b'', 0, 0)
2780+
27382781
@support.cpython_only
27392782
def test_encode_decimal(self):
27402783
from _testcapi import unicode_encodedecimal

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ What's New in Python 3.6.0 beta 2
1010
Core and Builtins
1111
-----------------
1212

13+
- Issue #28379: Added sanity checks and tests for PyUnicode_CopyCharacters().
14+
Patch by Xiang Zhang.
15+
1316
- Issue #28376: The type of long range iterator is now registered as Iterator.
1417
Patch by Oren Milman.
1518

Modules/_testcapimodule.c

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1858,6 +1858,39 @@ unicode_asucs4(PyObject *self, PyObject *args)
18581858
return result;
18591859
}
18601860

1861+
static PyObject *
1862+
unicode_copycharacters(PyObject *self, PyObject *args)
1863+
{
1864+
PyObject *from, *to, *to_copy;
1865+
Py_ssize_t from_start, to_start, how_many, copied;
1866+
1867+
if (!PyArg_ParseTuple(args, "UnOnn:unicode_copycharacters", &to, &to_start,
1868+
&from, &from_start, &how_many)) {
1869+
return NULL;
1870+
}
1871+
1872+
if (PyUnicode_READY(to) < 0) {
1873+
return NULL;
1874+
}
1875+
1876+
if (!(to_copy = PyUnicode_New(PyUnicode_GET_LENGTH(to),
1877+
PyUnicode_MAX_CHAR_VALUE(to)))) {
1878+
return NULL;
1879+
}
1880+
if (PyUnicode_Fill(to_copy, 0, PyUnicode_GET_LENGTH(to_copy), 0U) < 0) {
1881+
Py_DECREF(to_copy);
1882+
return NULL;
1883+
}
1884+
1885+
if ((copied = PyUnicode_CopyCharacters(to_copy, to_start, from,
1886+
from_start, how_many)) < 0) {
1887+
Py_DECREF(to_copy);
1888+
return NULL;
1889+
}
1890+
1891+
return Py_BuildValue("(Nn)", to_copy, copied);
1892+
}
1893+
18611894
static PyObject *
18621895
unicode_encodedecimal(PyObject *self, PyObject *args)
18631896
{
@@ -4061,6 +4094,7 @@ static PyMethodDef TestMethods[] = {
40614094
{"unicode_aswidechar", unicode_aswidechar, METH_VARARGS},
40624095
{"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS},
40634096
{"unicode_asucs4", unicode_asucs4, METH_VARARGS},
4097+
{"unicode_copycharacters", unicode_copycharacters, METH_VARARGS},
40644098
{"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS},
40654099
{"unicode_transformdecimaltoascii", unicode_transformdecimaltoascii, METH_VARARGS},
40664100
{"unicode_legacy_string", unicode_legacy_string, METH_VARARGS},

Objects/unicodeobject.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1549,15 +1549,19 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
15491549
if (PyUnicode_READY(to) == -1)
15501550
return -1;
15511551

1552-
if (from_start < 0) {
1552+
if ((size_t)from_start > (size_t)PyUnicode_GET_LENGTH(from)) {
15531553
PyErr_SetString(PyExc_IndexError, "string index out of range");
15541554
return -1;
15551555
}
1556-
if (to_start < 0) {
1556+
if ((size_t)to_start > (size_t)PyUnicode_GET_LENGTH(to)) {
15571557
PyErr_SetString(PyExc_IndexError, "string index out of range");
15581558
return -1;
15591559
}
1560-
how_many = Py_MIN(PyUnicode_GET_LENGTH(from), how_many);
1560+
if (how_many < 0) {
1561+
PyErr_SetString(PyExc_SystemError, "how_many cannot be negative");
1562+
return -1;
1563+
}
1564+
how_many = Py_MIN(PyUnicode_GET_LENGTH(from)-from_start, how_many);
15611565
if (to_start + how_many > PyUnicode_GET_LENGTH(to)) {
15621566
PyErr_Format(PyExc_SystemError,
15631567
"Cannot write %zi characters at %zi "

0 commit comments

Comments
 (0)