Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 0030cd5

Browse files
committed
Issue #25227: Cleanup unicode_encode_ucs1() error handler
* Change limit type from unsigned int to Py_UCS4, to use the same type than the "ch" variable (an Unicode character). * Reuse ch variable for _Py_ERROR_XMLCHARREFREPLACE * Add some newlines for readability
1 parent 1e5fcc3 commit 0030cd5

1 file changed

Lines changed: 13 additions & 9 deletions

File tree

Objects/unicodeobject.c

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6415,7 +6415,7 @@ unicode_encode_call_errorhandler(const char *errors,
64156415
static PyObject *
64166416
unicode_encode_ucs1(PyObject *unicode,
64176417
const char *errors,
6418-
unsigned int limit)
6418+
const Py_UCS4 limit)
64196419
{
64206420
/* input state */
64216421
Py_ssize_t pos=0, size;
@@ -6449,12 +6449,12 @@ unicode_encode_ucs1(PyObject *unicode,
64496449
ressize = size;
64506450

64516451
while (pos < size) {
6452-
Py_UCS4 c = PyUnicode_READ(kind, data, pos);
6452+
Py_UCS4 ch = PyUnicode_READ(kind, data, pos);
64536453

64546454
/* can we encode this? */
6455-
if (c<limit) {
6455+
if (ch < limit) {
64566456
/* no overflow check, because we know that the space is enough */
6457-
*str++ = (char)c;
6457+
*str++ = (char)ch;
64586458
++pos;
64596459
}
64606460
else {
@@ -6481,7 +6481,7 @@ unicode_encode_ucs1(PyObject *unicode,
64816481
case _Py_ERROR_REPLACE:
64826482
while (collstart++ < collend)
64836483
*str++ = '?';
6484-
/* fall through */
6484+
/* fall through ignore error handler */
64856485
case _Py_ERROR_IGNORE:
64866486
pos = collend;
64876487
break;
@@ -6491,8 +6491,9 @@ unicode_encode_ucs1(PyObject *unicode,
64916491
requiredsize = respos;
64926492
/* determine replacement size */
64936493
for (i = collstart; i < collend; ++i) {
6494-
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
64956494
Py_ssize_t incr;
6495+
6496+
ch = PyUnicode_READ(kind, data, i);
64966497
if (ch < 10)
64976498
incr = 2+1+1;
64986499
else if (ch < 100)
@@ -6538,6 +6539,7 @@ unicode_encode_ucs1(PyObject *unicode,
65386539
if (repunicode == NULL || (PyUnicode_Check(repunicode) &&
65396540
PyUnicode_READY(repunicode) == -1))
65406541
goto onError;
6542+
65416543
if (PyBytes_Check(repunicode)) {
65426544
/* Directly copy bytes result to output. */
65436545
repsize = PyBytes_Size(repunicode);
@@ -6561,6 +6563,7 @@ unicode_encode_ucs1(PyObject *unicode,
65616563
Py_DECREF(repunicode);
65626564
break;
65636565
}
6566+
65646567
/* need more space? (at least enough for what we
65656568
have+the replacement+the rest of the string, so
65666569
we won't have to check space for encodable characters) */
@@ -6583,17 +6586,18 @@ unicode_encode_ucs1(PyObject *unicode,
65836586
str = PyBytes_AS_STRING(res) + respos;
65846587
ressize = requiredsize;
65856588
}
6589+
65866590
/* check if there is anything unencodable in the replacement
65876591
and copy it to the output */
65886592
for (i = 0; repsize-->0; ++i, ++str) {
6589-
c = PyUnicode_READ_CHAR(repunicode, i);
6590-
if (c >= limit) {
6593+
ch = PyUnicode_READ_CHAR(repunicode, i);
6594+
if (ch >= limit) {
65916595
raise_encode_exception(&exc, encoding, unicode,
65926596
pos, pos+1, reason);
65936597
Py_DECREF(repunicode);
65946598
goto onError;
65956599
}
6596-
*str = (char)c;
6600+
*str = (char)ch;
65976601
}
65986602
pos = newpos;
65996603
Py_DECREF(repunicode);

0 commit comments

Comments
 (0)