Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit ece58de

Browse files
committed
Close #14648: Compute correctly maxchar in str.format() for substrin
1 parent 0b7d7c9 commit ece58de

4 files changed

Lines changed: 50 additions & 6 deletions

File tree

Include/unicodeobject.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -710,6 +710,15 @@ PyAPI_FUNC(PyObject*) PyUnicode_Substring(
710710
Py_ssize_t start,
711711
Py_ssize_t end);
712712

713+
#ifndef Py_LIMITED_API
714+
/* Compute the maximum character of the substring unicode[start:end].
715+
Return 127 for an empty string. */
716+
PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
717+
PyObject *unicode,
718+
Py_ssize_t start,
719+
Py_ssize_t end);
720+
#endif
721+
713722
/* Copy the string into a UCS4 buffer including the null character if copy_null
714723
is set. Return NULL and raise an exception on error. Raise a ValueError if
715724
the buffer is smaller than the string. Return buffer on success.

Lib/test/test_unicode.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -924,6 +924,14 @@ def __format__(self, format_spec):
924924
self.assertRaises(ValueError, format, '', '#')
925925
self.assertRaises(ValueError, format, '', '#20')
926926

927+
# Non-ASCII
928+
self.assertEqual("{0:s}{1:s}".format("ABC", "\u0410\u0411\u0412"),
929+
'ABC\u0410\u0411\u0412')
930+
self.assertEqual("{0:.3s}".format("ABC\u0410\u0411\u0412"),
931+
'ABC')
932+
self.assertEqual("{0:.0s}".format("ABC\u0410\u0411\u0412"),
933+
'')
934+
927935
def test_format_map(self):
928936
self.assertEqual(''.format_map({}), '')
929937
self.assertEqual('a'.format_map({}), 'a')
@@ -1056,8 +1064,6 @@ def __str__(self):
10561064
self.assertEqual('%f' % INF, 'inf')
10571065
self.assertEqual('%F' % INF, 'INF')
10581066

1059-
self.assertEqual(format("\u0410\u0411\u0412", "s"), "АБВ")
1060-
10611067
def test_startswith_endswith_errors(self):
10621068
for meth in ('foo'.startswith, 'foo'.endswith):
10631069
with self.assertRaises(TypeError) as cm:

Objects/unicodeobject.c

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1957,6 +1957,37 @@ PyUnicode_FromKindAndData(int kind, const void *buffer, Py_ssize_t size)
19571957
}
19581958
}
19591959

1960+
Py_UCS4
1961+
_PyUnicode_FindMaxChar(PyObject *unicode, Py_ssize_t start, Py_ssize_t end)
1962+
{
1963+
enum PyUnicode_Kind kind;
1964+
void *startptr, *endptr;
1965+
1966+
assert(PyUnicode_IS_READY(unicode));
1967+
assert(0 <= start);
1968+
assert(end <= PyUnicode_GET_LENGTH(unicode));
1969+
assert(start <= end);
1970+
1971+
if (start == 0 && end == PyUnicode_GET_LENGTH(unicode))
1972+
return PyUnicode_MAX_CHAR_VALUE(unicode);
1973+
1974+
if (start == end)
1975+
return 127;
1976+
1977+
kind = PyUnicode_KIND(unicode);
1978+
startptr = PyUnicode_DATA(unicode);
1979+
endptr = (char*)startptr + end * kind;
1980+
if (start)
1981+
startptr = (char*)startptr + start * kind;
1982+
switch(kind)
1983+
{
1984+
case PyUnicode_1BYTE_KIND: return ucs1lib_find_max_char(startptr, endptr);
1985+
case PyUnicode_2BYTE_KIND: return ucs2lib_find_max_char(startptr, endptr);
1986+
default:
1987+
case PyUnicode_4BYTE_KIND: return ucs4lib_find_max_char(startptr, endptr);
1988+
}
1989+
}
1990+
19601991
/* Ensure that a string uses the most efficient storage, if it is not the
19611992
case: create a new string with of the right kind. Write NULL into *p_unicode
19621993
on error. */

Python/formatter_unicode.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -716,7 +716,7 @@ format_string_internal(PyObject *value, const InternalFormatSpec *format)
716716
Py_ssize_t pos;
717717
Py_ssize_t len = PyUnicode_GET_LENGTH(value);
718718
PyObject *result = NULL;
719-
Py_UCS4 maxchar = 127;
719+
Py_UCS4 maxchar;
720720

721721
/* sign is not allowed on strings */
722722
if (format->sign != '\0') {
@@ -747,11 +747,9 @@ format_string_internal(PyObject *value, const InternalFormatSpec *format)
747747
len = format->precision;
748748
}
749749

750-
if (len)
751-
maxchar = PyUnicode_MAX_CHAR_VALUE(value);
752-
753750
calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
754751

752+
maxchar = _PyUnicode_FindMaxChar(value, 0, len);
755753
if (lpad != 0 || rpad != 0)
756754
maxchar = Py_MAX(maxchar, format->fill_char);
757755

0 commit comments

Comments
 (0)