Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 7e30373

Browse files
committed
remove MAX_MAXCHAR because it's unsafe for computing maximum codepoitn value (see #18183)
1 parent 0e547b6 commit 7e30373

3 files changed

Lines changed: 32 additions & 31 deletions

File tree

Lib/test/test_unicode.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,9 @@ def test_lower(self):
566566
self.assertEqual('\U0008fffe'.lower(), '\U0008fffe')
567567
self.assertEqual('\u2177'.lower(), '\u2177')
568568

569+
# See issue #18183 for this one.
570+
'\U00010000\U00100000'.lower()
571+
569572
def test_casefold(self):
570573
self.assertEqual('hello'.casefold(), 'hello')
571574
self.assertEqual('hELlo'.casefold(), 'hello')

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ What's New in Python 3.3.3 release candidate 1?
1212
Core and Builtins
1313
-----------------
1414

15+
- Issue #18183: Fix various unicode operations on strings with large unicode
16+
codepoints.
17+
1518
- Issue #18180: Fix ref leak in _PyImport_GetDynLoadWindows().
1619

1720
- Issue #18038: SyntaxError raised during compilation sources with illegal

Objects/unicodeobject.c

Lines changed: 26 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -112,11 +112,6 @@ extern "C" {
112112
#define _PyUnicode_DATA_ANY(op) \
113113
(((PyUnicodeObject*)(op))->data.any)
114114

115-
/* Optimized version of Py_MAX() to compute the maximum character:
116-
use it when your are computing the second argument of PyUnicode_New() */
117-
#define MAX_MAXCHAR(maxchar1, maxchar2) \
118-
((maxchar1) | (maxchar2))
119-
120115
#undef PyUnicode_READY
121116
#define PyUnicode_READY(op) \
122117
(assert(_PyUnicode_CHECK(op)), \
@@ -2495,7 +2490,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
24952490
case 'c':
24962491
{
24972492
Py_UCS4 ordinal = va_arg(count, int);
2498-
maxchar = MAX_MAXCHAR(maxchar, ordinal);
2493+
maxchar = Py_MAX(maxchar, ordinal);
24992494
n++;
25002495
break;
25012496
}
@@ -2591,7 +2586,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
25912586
/* since PyUnicode_DecodeUTF8 returns already flexible
25922587
unicode objects, there is no need to call ready on them */
25932588
argmaxchar = PyUnicode_MAX_CHAR_VALUE(str);
2594-
maxchar = MAX_MAXCHAR(maxchar, argmaxchar);
2589+
maxchar = Py_MAX(maxchar, argmaxchar);
25952590
n += PyUnicode_GET_LENGTH(str);
25962591
/* Remember the str and switch to the next slot */
25972592
*callresult++ = str;
@@ -2604,7 +2599,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
26042599
if (PyUnicode_READY(obj) == -1)
26052600
goto fail;
26062601
argmaxchar = PyUnicode_MAX_CHAR_VALUE(obj);
2607-
maxchar = MAX_MAXCHAR(maxchar, argmaxchar);
2602+
maxchar = Py_MAX(maxchar, argmaxchar);
26082603
n += PyUnicode_GET_LENGTH(obj);
26092604
break;
26102605
}
@@ -2619,7 +2614,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
26192614
if (PyUnicode_READY(obj) == -1)
26202615
goto fail;
26212616
argmaxchar = PyUnicode_MAX_CHAR_VALUE(obj);
2622-
maxchar = MAX_MAXCHAR(maxchar, argmaxchar);
2617+
maxchar = Py_MAX(maxchar, argmaxchar);
26232618
n += PyUnicode_GET_LENGTH(obj);
26242619
*callresult++ = NULL;
26252620
}
@@ -2632,7 +2627,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
26322627
goto fail;
26332628
}
26342629
argmaxchar = PyUnicode_MAX_CHAR_VALUE(str_obj);
2635-
maxchar = MAX_MAXCHAR(maxchar, argmaxchar);
2630+
maxchar = Py_MAX(maxchar, argmaxchar);
26362631
n += PyUnicode_GET_LENGTH(str_obj);
26372632
*callresult++ = str_obj;
26382633
}
@@ -2651,7 +2646,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
26512646
goto fail;
26522647
}
26532648
argmaxchar = PyUnicode_MAX_CHAR_VALUE(str);
2654-
maxchar = MAX_MAXCHAR(maxchar, argmaxchar);
2649+
maxchar = Py_MAX(maxchar, argmaxchar);
26552650
n += PyUnicode_GET_LENGTH(str);
26562651
/* Remember the str and switch to the next slot */
26572652
*callresult++ = str;
@@ -2670,7 +2665,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
26702665
goto fail;
26712666
}
26722667
argmaxchar = PyUnicode_MAX_CHAR_VALUE(repr);
2673-
maxchar = MAX_MAXCHAR(maxchar, argmaxchar);
2668+
maxchar = Py_MAX(maxchar, argmaxchar);
26742669
n += PyUnicode_GET_LENGTH(repr);
26752670
/* Remember the repr and switch to the next slot */
26762671
*callresult++ = repr;
@@ -2689,7 +2684,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
26892684
goto fail;
26902685
}
26912686
argmaxchar = PyUnicode_MAX_CHAR_VALUE(ascii);
2692-
maxchar = MAX_MAXCHAR(maxchar, argmaxchar);
2687+
maxchar = Py_MAX(maxchar, argmaxchar);
26932688
n += PyUnicode_GET_LENGTH(ascii);
26942689
/* Remember the repr and switch to the next slot */
26952690
*callresult++ = ascii;
@@ -8628,11 +8623,11 @@ fix_decimal_and_space_to_ascii(PyObject *self)
86288623
}
86298624
if (fixed != 0) {
86308625
modified = 1;
8631-
maxchar = MAX_MAXCHAR(maxchar, fixed);
8626+
maxchar = Py_MAX(maxchar, fixed);
86328627
PyUnicode_WRITE(kind, data, i, fixed);
86338628
}
86348629
else
8635-
maxchar = MAX_MAXCHAR(maxchar, ch);
8630+
maxchar = Py_MAX(maxchar, ch);
86368631
}
86378632
}
86388633

@@ -8673,7 +8668,7 @@ PyUnicode_TransformDecimalToASCII(Py_UNICODE *s,
86738668
int decimal = Py_UNICODE_TODECIMAL(ch);
86748669
if (decimal >= 0)
86758670
ch = '0' + decimal;
8676-
maxchar = MAX_MAXCHAR(maxchar, ch);
8671+
maxchar = Py_MAX(maxchar, ch);
86778672
}
86788673
}
86798674

@@ -8914,7 +8909,7 @@ _PyUnicode_InsertThousandsGrouping(
89148909
if (unicode == NULL) {
89158910
*maxchar = 127;
89168911
if (len != n_digits) {
8917-
*maxchar = MAX_MAXCHAR(*maxchar,
8912+
*maxchar = Py_MAX(*maxchar,
89188913
PyUnicode_MAX_CHAR_VALUE(thousands_sep));
89198914
}
89208915
}
@@ -9309,14 +9304,14 @@ do_capitalize(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *ma
93099304
c = PyUnicode_READ(kind, data, 0);
93109305
n_res = _PyUnicode_ToUpperFull(c, mapped);
93119306
for (j = 0; j < n_res; j++) {
9312-
*maxchar = MAX_MAXCHAR(*maxchar, mapped[j]);
9307+
*maxchar = Py_MAX(*maxchar, mapped[j]);
93139308
res[k++] = mapped[j];
93149309
}
93159310
for (i = 1; i < length; i++) {
93169311
c = PyUnicode_READ(kind, data, i);
93179312
n_res = lower_ucs4(kind, data, length, i, c, mapped);
93189313
for (j = 0; j < n_res; j++) {
9319-
*maxchar = MAX_MAXCHAR(*maxchar, mapped[j]);
9314+
*maxchar = Py_MAX(*maxchar, mapped[j]);
93209315
res[k++] = mapped[j];
93219316
}
93229317
}
@@ -9341,7 +9336,7 @@ do_swapcase(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxc
93419336
mapped[0] = c;
93429337
}
93439338
for (j = 0; j < n_res; j++) {
9344-
*maxchar = MAX_MAXCHAR(*maxchar, mapped[j]);
9339+
*maxchar = Py_MAX(*maxchar, mapped[j]);
93459340
res[k++] = mapped[j];
93469341
}
93479342
}
@@ -9362,7 +9357,7 @@ do_upper_or_lower(int kind, void *data, Py_ssize_t length, Py_UCS4 *res,
93629357
else
93639358
n_res = _PyUnicode_ToUpperFull(c, mapped);
93649359
for (j = 0; j < n_res; j++) {
9365-
*maxchar = MAX_MAXCHAR(*maxchar, mapped[j]);
9360+
*maxchar = Py_MAX(*maxchar, mapped[j]);
93669361
res[k++] = mapped[j];
93679362
}
93689363
}
@@ -9391,7 +9386,7 @@ do_casefold(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxc
93919386
Py_UCS4 mapped[3];
93929387
int j, n_res = _PyUnicode_ToFoldedFull(c, mapped);
93939388
for (j = 0; j < n_res; j++) {
9394-
*maxchar = MAX_MAXCHAR(*maxchar, mapped[j]);
9389+
*maxchar = Py_MAX(*maxchar, mapped[j]);
93959390
res[k++] = mapped[j];
93969391
}
93979392
}
@@ -9416,7 +9411,7 @@ do_title(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar
94169411
n_res = _PyUnicode_ToTitleFull(c, mapped);
94179412

94189413
for (j = 0; j < n_res; j++) {
9419-
*maxchar = MAX_MAXCHAR(*maxchar, mapped[j]);
9414+
*maxchar = Py_MAX(*maxchar, mapped[j]);
94209415
res[k++] = mapped[j];
94219416
}
94229417

@@ -9571,7 +9566,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
95719566
goto onError;
95729567
sz += PyUnicode_GET_LENGTH(item);
95739568
item_maxchar = PyUnicode_MAX_CHAR_VALUE(item);
9574-
maxchar = MAX_MAXCHAR(maxchar, item_maxchar);
9569+
maxchar = Py_MAX(maxchar, item_maxchar);
95759570
if (i != 0)
95769571
sz += seplen;
95779572
if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
@@ -9747,7 +9742,7 @@ pad(PyObject *self,
97479742
return NULL;
97489743
}
97499744
maxchar = PyUnicode_MAX_CHAR_VALUE(self);
9750-
maxchar = MAX_MAXCHAR(maxchar, fill);
9745+
maxchar = Py_MAX(maxchar, fill);
97519746
u = PyUnicode_New(left + _PyUnicode_LENGTH(self) + right, maxchar);
97529747
if (!u)
97539748
return NULL;
@@ -10061,7 +10056,7 @@ replace(PyObject *self, PyObject *str1,
1006110056
/* Replacing str1 with str2 may cause a maxchar reduction in the
1006210057
result string. */
1006310058
mayshrink = (maxchar_str2 < maxchar);
10064-
maxchar = MAX_MAXCHAR(maxchar, maxchar_str2);
10059+
maxchar = Py_MAX(maxchar, maxchar_str2);
1006510060

1006610061
if (len1 == len2) {
1006710062
/* same length */
@@ -10647,7 +10642,7 @@ PyUnicode_Concat(PyObject *left, PyObject *right)
1064710642

1064810643
maxchar = PyUnicode_MAX_CHAR_VALUE(u);
1064910644
maxchar2 = PyUnicode_MAX_CHAR_VALUE(v);
10650-
maxchar = MAX_MAXCHAR(maxchar, maxchar2);
10645+
maxchar = Py_MAX(maxchar, maxchar2);
1065110646

1065210647
/* Concat the two Unicode strings */
1065310648
w = PyUnicode_New(new_len, maxchar);
@@ -10734,7 +10729,7 @@ PyUnicode_Append(PyObject **p_left, PyObject *right)
1073410729
else {
1073510730
maxchar = PyUnicode_MAX_CHAR_VALUE(left);
1073610731
maxchar2 = PyUnicode_MAX_CHAR_VALUE(right);
10737-
maxchar = MAX_MAXCHAR(maxchar, maxchar2);
10732+
maxchar = Py_MAX(maxchar, maxchar2);
1073810733

1073910734
/* Concat the two Unicode strings */
1074010735
res = PyUnicode_New(new_len, maxchar);
@@ -13846,15 +13841,15 @@ PyUnicode_Format(PyObject *format, PyObject *args)
1384613841
if (!(flags & F_LJUST)) {
1384713842
if (sign) {
1384813843
if ((width-1) > len)
13849-
bufmaxchar = MAX_MAXCHAR(bufmaxchar, fill);
13844+
bufmaxchar = Py_MAX(bufmaxchar, fill);
1385013845
}
1385113846
else {
1385213847
if (width > len)
13853-
bufmaxchar = MAX_MAXCHAR(bufmaxchar, fill);
13848+
bufmaxchar = Py_MAX(bufmaxchar, fill);
1385413849
}
1385513850
}
1385613851
maxchar = _PyUnicode_FindMaxChar(temp, 0, pindex+len);
13857-
bufmaxchar = MAX_MAXCHAR(bufmaxchar, maxchar);
13852+
bufmaxchar = Py_MAX(bufmaxchar, maxchar);
1385813853

1385913854
buflen = width;
1386013855
if (sign && len == width)

0 commit comments

Comments
 (0)