diff --git a/Lib/string.py b/Lib/string.py index c4f05c7223ce8a..e8cb280b3959c8 100644 --- a/Lib/string.py +++ b/Lib/string.py @@ -54,6 +54,14 @@ def capwords(s, sep=None): _sentinel_dict = {} + +def _safe_getitem(mapping, key, default): + try: + return mapping[key] + except KeyError: + return default + + class Template: """A string class for supporting $-substitutions.""" @@ -86,9 +94,62 @@ def __init_subclass__(cls): def __init__(self, template): self.template = template + self._substitute = self._compile_substitute(template) + self._safe_substitute = self._compile_safe_substitute(template) # Search for $$, $identifier, ${identifier}, and any bare $'s + def _compile_substitute(self, template): + parts = [] + prev = 0 + for mo in self.pattern.finditer(template): + literal = template[prev: mo.start()] + if literal: + parts.append(repr(literal)) + prev = mo.end() + # Check the most common path first. + named = mo.group('named') or mo.group('braced') + if named is not None: + sub = "mapping[%r]" % named + if '\\' in sub or "'''" in sub: + return None + parts.append("f'''{%s!s}'''" % sub) + elif mo.group('escaped') is not None: + parts.append(repr(self.delimiter)) + else: + return None + literal = template[prev:] + if literal: + parts.append(repr(literal)) + return eval('lambda mapping: ' + ''.join(parts)) + + def _compile_safe_substitute(self, template): + parts = [] + prev = 0 + for mo in self.pattern.finditer(template): + literal = template[prev: mo.start()] + if literal: + parts.append(repr(literal)) + prev = mo.end() + # Check the most common path first. + named = mo.group('named') or mo.group('braced') + if named is not None: + sub = "_safe_getitem(mapping, %r, %r)" % (named, mo.group()) + if '\\' in sub or "'''" in sub: + return None + parts.append("f'''{%s!s}'''" % sub) + elif mo.group('escaped') is not None: + parts.append(repr(self.delimiter)) + elif mo.group('invalid') is not None: + parts.append(repr(mo.group())) + else: + return None + literal = template[prev:] + if literal: + parts.append(repr(literal)) + return eval('lambda mapping, _safe_getitem=_safe_getitem: ' + + ''.join(parts)) + def _invalid(self, mo): i = mo.start('invalid') lines = self.template[:i].splitlines(keepends=True) @@ -106,6 +167,10 @@ def substitute(self, mapping=_sentinel_dict, /, **kws): mapping = kws elif kws: mapping = _ChainMap(kws, mapping) + + if self._substitute is not None: + return self._substitute(mapping) + # Helper function for .sub() def convert(mo): # Check the most common path first. @@ -125,6 +190,10 @@ def safe_substitute(self, mapping=_sentinel_dict, /, **kws): mapping = kws elif kws: mapping = _ChainMap(kws, mapping) + + if self._safe_substitute is not None: + return self._safe_substitute(mapping) + # Helper function for .sub() def convert(mo): named = mo.group('named') or mo.group('braced') diff --git a/Lib/test/test_string.py b/Lib/test/test_string.py index f6d112d8a93ec4..962ab7689d87d3 100644 --- a/Lib/test/test_string.py +++ b/Lib/test/test_string.py @@ -316,6 +316,9 @@ class PathPattern(Template): m.bag.what = 'ham' s = PathPattern('$bag.foo.who likes to eat a bag of $bag.what') self.assertEqual(s.substitute(m), 'tim likes to eat a bag of ham') + self.assertEqual(s.safe_substitute(m), 'tim likes to eat a bag of ham') + del m.bag.foo.who + self.assertEqual(s.safe_substitute(m), '$bag.foo.who likes to eat a bag of ham') def test_flags_override(self): class MyPattern(Template): @@ -364,6 +367,9 @@ class MyPattern(Template): m.bag.what = 'ham' s = MyPattern('@bag.foo.who likes to eat a bag of @bag.what') self.assertEqual(s.substitute(m), 'tim likes to eat a bag of ham') + self.assertEqual(s.safe_substitute(m), 'tim likes to eat a bag of ham') + del m.bag.foo.who + self.assertEqual(s.safe_substitute(m), '@bag.foo.who likes to eat a bag of ham') class BadPattern(Template): pattern = r""" @@ -411,6 +417,31 @@ class MyTemplate(Template): val = t.safe_substitute({'location': 'Cleveland'}) self.assertEqual(val, 'PyCon in Cleveland') + def test_special_characters_in_name(self): + class MyTemplate(Template): + pattern = r""" + @[[](?P[^]]*)[]] | + @(?P[a-z]+) | + (?P@@) | + (?P@) + """ + m = { + '\\': 'backslash', + "\t": 'tab', + '"': 'quotation mark', + "'": 'apostrophe', + '"""': 'triple quotation mark', + "'''": 'triple apostrophe', + "%": 'percent sign', + "$": 'dollar sign', + "{": 'left brace', + "}": 'right brace', + } + for k in m: + s = MyTemplate('<@[%s]>' % k) + self.assertEqual(s.substitute(m), '<%s>' % m[k]) + self.assertEqual(s.safe_substitute(m), '<%s>' % m[k]) + def test_invalid_with_no_lines(self): # The error formatting for invalid templates # has a special case for no data that the default diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2022-11-07-12-51-54.gh-issue-72496.Nj65M_.rst b/Misc/NEWS.d/next/Core_and_Builtins/2022-11-07-12-51-54.gh-issue-72496.Nj65M_.rst new file mode 100644 index 00000000000000..0325d39022438c --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2022-11-07-12-51-54.gh-issue-72496.Nj65M_.rst @@ -0,0 +1 @@ +Improved the performance of string.Template