Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 4dbc305

Browse files
Issue #23055: Fixed a buffer overflow in PyUnicode_FromFormatV. Analysis
and fix by Guido Vranken.
1 parent 119479f commit 4dbc305

3 files changed

Lines changed: 133 additions & 42 deletions

File tree

Lib/test/test_unicode.py

Lines changed: 119 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -2016,9 +2016,10 @@ def __str__(self):
20162016
# Test PyUnicode_FromFormat()
20172017
def test_from_format(self):
20182018
support.import_module('ctypes')
2019-
from ctypes import (pythonapi, py_object,
2019+
from ctypes import (
2020+
pythonapi, py_object, sizeof,
20202021
c_int, c_long, c_longlong, c_ssize_t,
2021-
c_uint, c_ulong, c_ulonglong, c_size_t)
2022+
c_uint, c_ulong, c_ulonglong, c_size_t, c_void_p)
20222023
name = "PyUnicode_FromFormat"
20232024
_PyUnicode_FromFormat = getattr(pythonapi, name)
20242025
_PyUnicode_FromFormat.restype = py_object
@@ -2029,9 +2030,13 @@ def PyUnicode_FromFormat(format, *args):
20292030
for arg in args)
20302031
return _PyUnicode_FromFormat(format, *cargs)
20312032

2033+
def check_format(expected, format, *args):
2034+
text = PyUnicode_FromFormat(format, *args)
2035+
self.assertEqual(expected, text)
2036+
20322037
# ascii format, non-ascii argument
2033-
text = PyUnicode_FromFormat(b'ascii\x7f=%U', 'unicode\xe9')
2034-
self.assertEqual(text, 'ascii\x7f=unicode\xe9')
2038+
check_format('ascii\x7f=unicode\xe9',
2039+
b'ascii\x7f=%U', 'unicode\xe9')
20352040

20362041
# non-ascii format, ascii argument: ensure that PyUnicode_FromFormatV()
20372042
# raises an error
@@ -2041,64 +2046,136 @@ def PyUnicode_FromFormat(format, *args):
20412046
PyUnicode_FromFormat, b'unicode\xe9=%s', 'ascii')
20422047

20432048
# test "%c"
2044-
self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0xabcd)), '\uabcd')
2045-
self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0x10ffff)), '\U0010ffff')
2049+
check_format('\uabcd',
2050+
b'%c', c_int(0xabcd))
2051+
check_format('\U0010ffff',
2052+
b'%c', c_int(0x10ffff))
20462053
with self.assertRaises(OverflowError):
20472054
PyUnicode_FromFormat(b'%c', c_int(0x110000))
20482055
# Issue #18183
2049-
self.assertEqual(
2050-
PyUnicode_FromFormat(b'%c%c', c_int(0x10000), c_int(0x100000)),
2051-
'\U00010000\U00100000')
2056+
check_format('\U00010000\U00100000',
2057+
b'%c%c', c_int(0x10000), c_int(0x100000))
20522058

20532059
# test "%"
2054-
self.assertEqual(PyUnicode_FromFormat(b'%'), '%')
2055-
self.assertEqual(PyUnicode_FromFormat(b'%%'), '%')
2056-
self.assertEqual(PyUnicode_FromFormat(b'%%s'), '%s')
2057-
self.assertEqual(PyUnicode_FromFormat(b'[%%]'), '[%]')
2058-
self.assertEqual(PyUnicode_FromFormat(b'%%%s', b'abc'), '%abc')
2060+
check_format('%',
2061+
b'%')
2062+
check_format('%',
2063+
b'%%')
2064+
check_format('%s',
2065+
b'%%s')
2066+
check_format('[%]',
2067+
b'[%%]')
2068+
check_format('%abc',
2069+
b'%%%s', b'abc')
2070+
2071+
# test %S
2072+
check_format("repr=\u20acABC",
2073+
b'repr=%S', '\u20acABC')
2074+
2075+
# test %R
2076+
check_format("repr='\u20acABC'",
2077+
b'repr=%R', '\u20acABC')
20592078

20602079
# test integer formats (%i, %d, %u)
2061-
self.assertEqual(PyUnicode_FromFormat(b'%03i', c_int(10)), '010')
2062-
self.assertEqual(PyUnicode_FromFormat(b'%0.4i', c_int(10)), '0010')
2063-
self.assertEqual(PyUnicode_FromFormat(b'%i', c_int(-123)), '-123')
2064-
self.assertEqual(PyUnicode_FromFormat(b'%li', c_long(-123)), '-123')
2065-
self.assertEqual(PyUnicode_FromFormat(b'%lli', c_longlong(-123)), '-123')
2066-
self.assertEqual(PyUnicode_FromFormat(b'%zi', c_ssize_t(-123)), '-123')
2067-
2068-
self.assertEqual(PyUnicode_FromFormat(b'%d', c_int(-123)), '-123')
2069-
self.assertEqual(PyUnicode_FromFormat(b'%ld', c_long(-123)), '-123')
2070-
self.assertEqual(PyUnicode_FromFormat(b'%lld', c_longlong(-123)), '-123')
2071-
self.assertEqual(PyUnicode_FromFormat(b'%zd', c_ssize_t(-123)), '-123')
2072-
2073-
self.assertEqual(PyUnicode_FromFormat(b'%u', c_uint(123)), '123')
2074-
self.assertEqual(PyUnicode_FromFormat(b'%lu', c_ulong(123)), '123')
2075-
self.assertEqual(PyUnicode_FromFormat(b'%llu', c_ulonglong(123)), '123')
2076-
self.assertEqual(PyUnicode_FromFormat(b'%zu', c_size_t(123)), '123')
2080+
check_format('010',
2081+
b'%03i', c_int(10))
2082+
check_format('0010',
2083+
b'%0.4i', c_int(10))
2084+
check_format('-123',
2085+
b'%i', c_int(-123))
2086+
check_format('-123',
2087+
b'%li', c_long(-123))
2088+
check_format('-123',
2089+
b'%lli', c_longlong(-123))
2090+
check_format('-123',
2091+
b'%zi', c_ssize_t(-123))
2092+
2093+
check_format('-123',
2094+
b'%d', c_int(-123))
2095+
check_format('-123',
2096+
b'%ld', c_long(-123))
2097+
check_format('-123',
2098+
b'%lld', c_longlong(-123))
2099+
check_format('-123',
2100+
b'%zd', c_ssize_t(-123))
2101+
2102+
check_format('123',
2103+
b'%u', c_uint(123))
2104+
check_format('123',
2105+
b'%lu', c_ulong(123))
2106+
check_format('123',
2107+
b'%llu', c_ulonglong(123))
2108+
check_format('123',
2109+
b'%zu', c_size_t(123))
2110+
2111+
# test long output
2112+
min_longlong = -(2 ** (8 * sizeof(c_longlong) - 1))
2113+
max_longlong = -min_longlong - 1
2114+
check_format(str(min_longlong),
2115+
b'%lld', c_longlong(min_longlong))
2116+
check_format(str(max_longlong),
2117+
b'%lld', c_longlong(max_longlong))
2118+
max_ulonglong = 2 ** (8 * sizeof(c_ulonglong)) - 1
2119+
check_format(str(max_ulonglong),
2120+
b'%llu', c_ulonglong(max_ulonglong))
2121+
PyUnicode_FromFormat(b'%p', c_void_p(-1))
2122+
2123+
# test padding (width and/or precision)
2124+
check_format('123'.rjust(10, '0'),
2125+
b'%010i', c_int(123))
2126+
check_format('123'.rjust(100),
2127+
b'%100i', c_int(123))
2128+
check_format('123'.rjust(300, '0'),
2129+
b'%.300i', c_int(123))
2130+
check_format('123'.rjust(80, '0').rjust(100),
2131+
b'%100.80i', c_int(123))
2132+
2133+
check_format('123'.rjust(10, '0'),
2134+
b'%010u', c_uint(123))
2135+
check_format('123'.rjust(100),
2136+
b'%100u', c_uint(123))
2137+
check_format('123'.rjust(300, '0'),
2138+
b'%.300u', c_uint(123))
2139+
check_format('123'.rjust(80, '0').rjust(100),
2140+
b'%100.80u', c_uint(123))
2141+
2142+
check_format('123'.rjust(10, '0'),
2143+
b'%010x', c_int(0x123))
2144+
check_format('123'.rjust(100),
2145+
b'%100x', c_int(0x123))
2146+
check_format('123'.rjust(300, '0'),
2147+
b'%.300x', c_int(0x123))
2148+
check_format('123'.rjust(80, '0').rjust(100),
2149+
b'%100.80x', c_int(0x123))
20772150

20782151
# test %A
2079-
text = PyUnicode_FromFormat(b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')
2080-
self.assertEqual(text, r"%A:'abc\xe9\uabcd\U0010ffff'")
2152+
check_format(r"%A:'abc\xe9\uabcd\U0010ffff'",
2153+
b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')
20812154

20822155
# test %V
2083-
text = PyUnicode_FromFormat(b'repr=%V', 'abc', b'xyz')
2084-
self.assertEqual(text, 'repr=abc')
2156+
check_format('repr=abc',
2157+
b'repr=%V', 'abc', b'xyz')
20852158

20862159
# Test string decode from parameter of %s using utf-8.
20872160
# b'\xe4\xba\xba\xe6\xb0\x91' is utf-8 encoded byte sequence of
20882161
# '\u4eba\u6c11'
2089-
text = PyUnicode_FromFormat(b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91')
2090-
self.assertEqual(text, 'repr=\u4eba\u6c11')
2162+
check_format('repr=\u4eba\u6c11',
2163+
b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91')
20912164

20922165
#Test replace error handler.
2093-
text = PyUnicode_FromFormat(b'repr=%V', None, b'abc\xff')
2094-
self.assertEqual(text, 'repr=abc\ufffd')
2166+
check_format('repr=abc\ufffd',
2167+
b'repr=%V', None, b'abc\xff')
20952168

20962169
# not supported: copy the raw format string. these tests are just here
20972170
# to check for crashs and should not be considered as specifications
2098-
self.assertEqual(PyUnicode_FromFormat(b'%1%s', b'abc'), '%s')
2099-
self.assertEqual(PyUnicode_FromFormat(b'%1abc'), '%1abc')
2100-
self.assertEqual(PyUnicode_FromFormat(b'%+i', c_int(10)), '%+i')
2101-
self.assertEqual(PyUnicode_FromFormat(b'%.%s', b'abc'), '%.%s')
2171+
check_format('%s',
2172+
b'%1%s', b'abc')
2173+
check_format('%1abc',
2174+
b'%1abc')
2175+
check_format('%+i',
2176+
b'%+i', c_int(10))
2177+
check_format('%.%s',
2178+
b'%.%s', b'abc')
21022179

21032180
# Test PyUnicode_AsWideChar()
21042181
@support.cpython_only

Misc/NEWS

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,18 @@
22
Python News
33
+++++++++++
44

5+
What's New in Python 3.3.7?
6+
============================
7+
8+
*Release date: XXXX-XX-XX*
9+
10+
Core and Builtins
11+
-----------------
12+
13+
- Issue #23055: Fixed a buffer overflow in PyUnicode_FromFormatV. Analysis
14+
and fix by Guido Vranken.
15+
16+
517
What's New in Python 3.3.6?
618
===========================
719

Objects/unicodeobject.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2335,6 +2335,8 @@ parse_format_flags(const char *f,
23352335
f--;
23362336
}
23372337
}
2338+
if (width < precision)
2339+
width = precision;
23382340
if (*f == '\0') {
23392341
/* bogus format "%.1" => go backward, f points to "1" */
23402342
f--;

0 commit comments

Comments
 (0)