Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 8cecc8c

Browse files
committed
Issue #7330: Implement width and precision (ex: "%5.3s") for the format string
of PyUnicode_FromFormat() function, original patch written by Ysj Ray.
1 parent 9b5d4d8 commit 8cecc8c

4 files changed

Lines changed: 298 additions & 100 deletions

File tree

Doc/c-api/unicode.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -526,12 +526,23 @@ APIs:
526526
The `"%lld"` and `"%llu"` format specifiers are only available
527527
when :const:`HAVE_LONG_LONG` is defined.
528528
529+
.. note::
530+
The width formatter unit is number of characters rather than bytes.
531+
The precision formatter unit is number of bytes for ``"%s"`` and
532+
``"%V"`` (if the ``PyObject*`` argument is NULL), and a number of
533+
characters for ``"%A"``, ``"%U"``, ``"%S"``, ``"%R"`` and ``"%V"``
534+
(if the ``PyObject*`` argument is not NULL).
535+
529536
.. versionchanged:: 3.2
530537
Support for ``"%lld"`` and ``"%llu"`` added.
531538
532539
.. versionchanged:: 3.3
533540
Support for ``"%li"``, ``"%lli"`` and ``"%zi"`` added.
534541
542+
.. versionchanged:: 3.4
543+
Support width and precision formatter for ``"%s"``, ``"%A"``, ``"%U"``,
544+
``"%V"``, ``"%S"``, ``"%R"`` added.
545+
535546
536547
.. c:function:: PyObject* PyUnicode_FromFormatV(const char *format, va_list vargs)
537548

Lib/test/test_unicode.py

Lines changed: 175 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -2007,9 +2007,13 @@ def PyUnicode_FromFormat(format, *args):
20072007
for arg in args)
20082008
return _PyUnicode_FromFormat(format, *cargs)
20092009

2010+
def check_format(expected, format, *args):
2011+
text = PyUnicode_FromFormat(format, *args)
2012+
self.assertEqual(expected, text)
2013+
20102014
# ascii format, non-ascii argument
2011-
text = PyUnicode_FromFormat(b'ascii\x7f=%U', 'unicode\xe9')
2012-
self.assertEqual(text, 'ascii\x7f=unicode\xe9')
2015+
check_format('ascii\x7f=unicode\xe9',
2016+
b'ascii\x7f=%U', 'unicode\xe9')
20132017

20142018
# non-ascii format, ascii argument: ensure that PyUnicode_FromFormatV()
20152019
# raises an error
@@ -2019,83 +2023,200 @@ def PyUnicode_FromFormat(format, *args):
20192023
PyUnicode_FromFormat, b'unicode\xe9=%s', 'ascii')
20202024

20212025
# test "%c"
2022-
self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0xabcd)), '\uabcd')
2023-
self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0x10ffff)), '\U0010ffff')
2026+
check_format('\uabcd',
2027+
b'%c', c_int(0xabcd))
2028+
check_format('\U0010ffff',
2029+
b'%c', c_int(0x10ffff))
20242030

20252031
# test "%"
2026-
self.assertEqual(PyUnicode_FromFormat(b'%'), '%')
2027-
self.assertEqual(PyUnicode_FromFormat(b'%%'), '%')
2028-
self.assertEqual(PyUnicode_FromFormat(b'%%s'), '%s')
2029-
self.assertEqual(PyUnicode_FromFormat(b'[%%]'), '[%]')
2030-
self.assertEqual(PyUnicode_FromFormat(b'%%%s', b'abc'), '%abc')
2032+
check_format('%',
2033+
b'%')
2034+
check_format('%',
2035+
b'%%')
2036+
check_format('%s',
2037+
b'%%s')
2038+
check_format('[%]',
2039+
b'[%%]')
2040+
check_format('%abc',
2041+
b'%%%s', b'abc')
2042+
2043+
# truncated string
2044+
check_format('abc',
2045+
b'%.3s', b'abcdef')
2046+
check_format('abc[\ufffd',
2047+
b'%.5s', 'abc[\u20ac]'.encode('utf8'))
2048+
check_format("'\\u20acABC'",
2049+
b'%A', '\u20acABC')
2050+
check_format("'\\u20",
2051+
b'%.5A', '\u20acABCDEF')
2052+
check_format("'\u20acABC'",
2053+
b'%R', '\u20acABC')
2054+
check_format("'\u20acA",
2055+
b'%.3R', '\u20acABCDEF')
2056+
check_format('\u20acAB',
2057+
b'%.3S', '\u20acABCDEF')
2058+
check_format('\u20acAB',
2059+
b'%.3U', '\u20acABCDEF')
2060+
check_format('\u20acAB',
2061+
b'%.3V', '\u20acABCDEF', None)
2062+
check_format('abc[\ufffd',
2063+
b'%.5V', None, 'abc[\u20ac]'.encode('utf8'))
2064+
2065+
# following tests comes from #7330
2066+
# test width modifier and precision modifier with %S
2067+
check_format("repr= abc",
2068+
b'repr=%5S', 'abc')
2069+
check_format("repr=ab",
2070+
b'repr=%.2S', 'abc')
2071+
check_format("repr= ab",
2072+
b'repr=%5.2S', 'abc')
2073+
2074+
# test width modifier and precision modifier with %R
2075+
check_format("repr= 'abc'",
2076+
b'repr=%8R', 'abc')
2077+
check_format("repr='ab",
2078+
b'repr=%.3R', 'abc')
2079+
check_format("repr= 'ab",
2080+
b'repr=%5.3R', 'abc')
2081+
2082+
# test width modifier and precision modifier with %A
2083+
check_format("repr= 'abc'",
2084+
b'repr=%8A', 'abc')
2085+
check_format("repr='ab",
2086+
b'repr=%.3A', 'abc')
2087+
check_format("repr= 'ab",
2088+
b'repr=%5.3A', 'abc')
2089+
2090+
# test width modifier and precision modifier with %s
2091+
check_format("repr= abc",
2092+
b'repr=%5s', b'abc')
2093+
check_format("repr=ab",
2094+
b'repr=%.2s', b'abc')
2095+
check_format("repr= ab",
2096+
b'repr=%5.2s', b'abc')
2097+
2098+
# test width modifier and precision modifier with %U
2099+
check_format("repr= abc",
2100+
b'repr=%5U', 'abc')
2101+
check_format("repr=ab",
2102+
b'repr=%.2U', 'abc')
2103+
check_format("repr= ab",
2104+
b'repr=%5.2U', 'abc')
2105+
2106+
# test width modifier and precision modifier with %V
2107+
check_format("repr= abc",
2108+
b'repr=%5V', 'abc', b'123')
2109+
check_format("repr=ab",
2110+
b'repr=%.2V', 'abc', b'123')
2111+
check_format("repr= ab",
2112+
b'repr=%5.2V', 'abc', b'123')
2113+
check_format("repr= 123",
2114+
b'repr=%5V', None, b'123')
2115+
check_format("repr=12",
2116+
b'repr=%.2V', None, b'123')
2117+
check_format("repr= 12",
2118+
b'repr=%5.2V', None, b'123')
20312119

20322120
# test integer formats (%i, %d, %u)
2033-
self.assertEqual(PyUnicode_FromFormat(b'%03i', c_int(10)), '010')
2034-
self.assertEqual(PyUnicode_FromFormat(b'%0.4i', c_int(10)), '0010')
2035-
self.assertEqual(PyUnicode_FromFormat(b'%i', c_int(-123)), '-123')
2036-
self.assertEqual(PyUnicode_FromFormat(b'%li', c_long(-123)), '-123')
2037-
self.assertEqual(PyUnicode_FromFormat(b'%lli', c_longlong(-123)), '-123')
2038-
self.assertEqual(PyUnicode_FromFormat(b'%zi', c_ssize_t(-123)), '-123')
2039-
2040-
self.assertEqual(PyUnicode_FromFormat(b'%d', c_int(-123)), '-123')
2041-
self.assertEqual(PyUnicode_FromFormat(b'%ld', c_long(-123)), '-123')
2042-
self.assertEqual(PyUnicode_FromFormat(b'%lld', c_longlong(-123)), '-123')
2043-
self.assertEqual(PyUnicode_FromFormat(b'%zd', c_ssize_t(-123)), '-123')
2044-
2045-
self.assertEqual(PyUnicode_FromFormat(b'%u', c_uint(123)), '123')
2046-
self.assertEqual(PyUnicode_FromFormat(b'%lu', c_ulong(123)), '123')
2047-
self.assertEqual(PyUnicode_FromFormat(b'%llu', c_ulonglong(123)), '123')
2048-
self.assertEqual(PyUnicode_FromFormat(b'%zu', c_size_t(123)), '123')
2121+
check_format('010',
2122+
b'%03i', c_int(10))
2123+
check_format('0010',
2124+
b'%0.4i', c_int(10))
2125+
check_format('-123',
2126+
b'%i', c_int(-123))
2127+
check_format('-123',
2128+
b'%li', c_long(-123))
2129+
check_format('-123',
2130+
b'%lli', c_longlong(-123))
2131+
check_format('-123',
2132+
b'%zi', c_ssize_t(-123))
2133+
2134+
check_format('-123',
2135+
b'%d', c_int(-123))
2136+
check_format('-123',
2137+
b'%ld', c_long(-123))
2138+
check_format('-123',
2139+
b'%lld', c_longlong(-123))
2140+
check_format('-123',
2141+
b'%zd', c_ssize_t(-123))
2142+
2143+
check_format('123',
2144+
b'%u', c_uint(123))
2145+
check_format('123',
2146+
b'%lu', c_ulong(123))
2147+
check_format('123',
2148+
b'%llu', c_ulonglong(123))
2149+
check_format('123',
2150+
b'%zu', c_size_t(123))
20492151

20502152
# test long output
20512153
min_longlong = -(2 ** (8 * sizeof(c_longlong) - 1))
20522154
max_longlong = -min_longlong - 1
2053-
self.assertEqual(PyUnicode_FromFormat(b'%lld', c_longlong(min_longlong)), str(min_longlong))
2054-
self.assertEqual(PyUnicode_FromFormat(b'%lld', c_longlong(max_longlong)), str(max_longlong))
2155+
check_format(str(min_longlong),
2156+
b'%lld', c_longlong(min_longlong))
2157+
check_format(str(max_longlong),
2158+
b'%lld', c_longlong(max_longlong))
20552159
max_ulonglong = 2 ** (8 * sizeof(c_ulonglong)) - 1
2056-
self.assertEqual(PyUnicode_FromFormat(b'%llu', c_ulonglong(max_ulonglong)), str(max_ulonglong))
2160+
check_format(str(max_ulonglong),
2161+
b'%llu', c_ulonglong(max_ulonglong))
20572162
PyUnicode_FromFormat(b'%p', c_void_p(-1))
20582163

20592164
# test padding (width and/or precision)
2060-
self.assertEqual(PyUnicode_FromFormat(b'%010i', c_int(123)), '123'.rjust(10, '0'))
2061-
self.assertEqual(PyUnicode_FromFormat(b'%100i', c_int(123)), '123'.rjust(100))
2062-
self.assertEqual(PyUnicode_FromFormat(b'%.100i', c_int(123)), '123'.rjust(100, '0'))
2063-
self.assertEqual(PyUnicode_FromFormat(b'%100.80i', c_int(123)), '123'.rjust(80, '0').rjust(100))
2064-
2065-
self.assertEqual(PyUnicode_FromFormat(b'%010u', c_uint(123)), '123'.rjust(10, '0'))
2066-
self.assertEqual(PyUnicode_FromFormat(b'%100u', c_uint(123)), '123'.rjust(100))
2067-
self.assertEqual(PyUnicode_FromFormat(b'%.100u', c_uint(123)), '123'.rjust(100, '0'))
2068-
self.assertEqual(PyUnicode_FromFormat(b'%100.80u', c_uint(123)), '123'.rjust(80, '0').rjust(100))
2069-
2070-
self.assertEqual(PyUnicode_FromFormat(b'%010x', c_int(0x123)), '123'.rjust(10, '0'))
2071-
self.assertEqual(PyUnicode_FromFormat(b'%100x', c_int(0x123)), '123'.rjust(100))
2072-
self.assertEqual(PyUnicode_FromFormat(b'%.100x', c_int(0x123)), '123'.rjust(100, '0'))
2073-
self.assertEqual(PyUnicode_FromFormat(b'%100.80x', c_int(0x123)), '123'.rjust(80, '0').rjust(100))
2165+
check_format('123'.rjust(10, '0'),
2166+
b'%010i', c_int(123))
2167+
check_format('123'.rjust(100),
2168+
b'%100i', c_int(123))
2169+
check_format('123'.rjust(100, '0'),
2170+
b'%.100i', c_int(123))
2171+
check_format('123'.rjust(80, '0').rjust(100),
2172+
b'%100.80i', c_int(123))
2173+
2174+
check_format('123'.rjust(10, '0'),
2175+
b'%010u', c_uint(123))
2176+
check_format('123'.rjust(100),
2177+
b'%100u', c_uint(123))
2178+
check_format('123'.rjust(100, '0'),
2179+
b'%.100u', c_uint(123))
2180+
check_format('123'.rjust(80, '0').rjust(100),
2181+
b'%100.80u', c_uint(123))
2182+
2183+
check_format('123'.rjust(10, '0'),
2184+
b'%010x', c_int(0x123))
2185+
check_format('123'.rjust(100),
2186+
b'%100x', c_int(0x123))
2187+
check_format('123'.rjust(100, '0'),
2188+
b'%.100x', c_int(0x123))
2189+
check_format('123'.rjust(80, '0').rjust(100),
2190+
b'%100.80x', c_int(0x123))
20742191

20752192
# test %A
2076-
text = PyUnicode_FromFormat(b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')
2077-
self.assertEqual(text, r"%A:'abc\xe9\uabcd\U0010ffff'")
2193+
check_format(r"%A:'abc\xe9\uabcd\U0010ffff'",
2194+
b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')
20782195

20792196
# test %V
2080-
text = PyUnicode_FromFormat(b'repr=%V', 'abc', b'xyz')
2081-
self.assertEqual(text, 'repr=abc')
2197+
check_format('repr=abc',
2198+
b'repr=%V', 'abc', b'xyz')
20822199

20832200
# Test string decode from parameter of %s using utf-8.
20842201
# b'\xe4\xba\xba\xe6\xb0\x91' is utf-8 encoded byte sequence of
20852202
# '\u4eba\u6c11'
2086-
text = PyUnicode_FromFormat(b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91')
2087-
self.assertEqual(text, 'repr=\u4eba\u6c11')
2203+
check_format('repr=\u4eba\u6c11',
2204+
b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91')
20882205

20892206
#Test replace error handler.
2090-
text = PyUnicode_FromFormat(b'repr=%V', None, b'abc\xff')
2091-
self.assertEqual(text, 'repr=abc\ufffd')
2207+
check_format('repr=abc\ufffd',
2208+
b'repr=%V', None, b'abc\xff')
20922209

20932210
# not supported: copy the raw format string. these tests are just here
20942211
# to check for crashs and should not be considered as specifications
2095-
self.assertEqual(PyUnicode_FromFormat(b'%1%s', b'abc'), '%s')
2096-
self.assertEqual(PyUnicode_FromFormat(b'%1abc'), '%1abc')
2097-
self.assertEqual(PyUnicode_FromFormat(b'%+i', c_int(10)), '%+i')
2098-
self.assertEqual(PyUnicode_FromFormat(b'%.%s', b'abc'), '%.%s')
2212+
check_format('%s',
2213+
b'%1%s', b'abc')
2214+
check_format('%1abc',
2215+
b'%1abc')
2216+
check_format('%+i',
2217+
b'%+i', c_int(10))
2218+
check_format('%.%s',
2219+
b'%.%s', b'abc')
20992220

21002221
# Test PyUnicode_AsWideChar()
21012222
def test_aswidechar(self):

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ What's New in Python 3.4.0 Alpha 1?
1010
Core and Builtins
1111
-----------------
1212

13+
- Issue #7330: Implement width and precision (ex: "%5.3s") for the format
14+
string of PyUnicode_FromFormat() function, original patch written by Ysj Ray.
15+
1316
- Issue #1545463: Global variables caught in reference cycles are now
1417
garbage-collected at shutdown.
1518

0 commit comments

Comments
 (0)