From 4336ac78bd3cb12379b98525e09afe0e8e80356d Mon Sep 17 00:00:00 2001 From: Ali Hamdan Date: Sun, 28 May 2023 12:16:48 +0000 Subject: [PATCH 1/4] Simplify argparse usage formatting Rationale ========= argparse performs a complex formatting of the usage for argument grouping and for line wrapping to fit the terminal width. This formatting has been a constant source of bugs for at least 10 years (see linked issues below) where defensive assertion errors are triggered or brackets and paranthesis are not properly handeled. Problem ======= The current implementation of argparse usage formatting relies on regular expressions to group arguments usage only to separate them again later with another set of regular expressions. This is a complex and error prone approach that caused all the issues linked below. Special casing certain argument formats has not solved the problem. The following are some of the most common issues: - empty `metavar` - mutually exclusive groups with `SUPPRESS`ed arguments - metavars with whitespace - metavars with brackets or paranthesis Solution ======== The following two comments summarize the solution: - https://github.com/python/cpython/issues/82091#issuecomment-1093832187 - https://github.com/python/cpython/issues/77048#issuecomment-1093776995 Mainly, the solution is to rewrite the usage formatting to avoid the group-then-separate approach. Instead, the usage parts are kept separate and only joined together at the end. This allows for a much simpler implementation that is easier to understand and maintain. It avoids the regular expressions approach and fixes the corresponding issues. This closes the following issues: - Closes #62090 - Closes #62549 - Closes #77048 - Closes #82091 - Closes #89743 - Closes #96310 - Closes #98666 These PRs become obsolete: - Closes #15372 - Closes #96311 --- Lib/argparse.py | 96 ++++----------- Lib/test/test_argparse.py | 115 ++++++++++++++++++ ...3-05-28-11-25-18.gh-issue-62090.opAhDn.rst | 3 + 3 files changed, 144 insertions(+), 70 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-05-28-11-25-18.gh-issue-62090.opAhDn.rst diff --git a/Lib/argparse.py b/Lib/argparse.py index 543d9944f9ede3..95113a3bcbfff3 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -329,17 +329,8 @@ def _format_usage(self, usage, actions, groups, prefix): if len(prefix) + len(usage) > text_width: # break usage into wrappable parts - part_regexp = ( - r'\(.*?\)+(?=\s|$)|' - r'\[.*?\]+(?=\s|$)|' - r'\S+' - ) - opt_usage = format(optionals, groups) - pos_usage = format(positionals, groups) - opt_parts = _re.findall(part_regexp, opt_usage) - pos_parts = _re.findall(part_regexp, pos_usage) - assert ' '.join(opt_parts) == opt_usage - assert ' '.join(pos_parts) == pos_usage + opt_parts = self._get_actions_usage_parts(optionals, groups) + pos_parts = self._get_actions_usage_parts(positionals, groups) # helper for wrapping lines def get_lines(parts, indent, prefix=None): @@ -392,6 +383,9 @@ def get_lines(parts, indent, prefix=None): return '%s%s\n\n' % (prefix, usage) def _format_actions_usage(self, actions, groups): + return ' '.join(self._get_actions_usage_parts(actions, groups)) + + def _get_actions_usage_parts(self, actions, groups): # find group indices and identify actions in groups group_actions = set() inserts = {} @@ -399,56 +393,26 @@ def _format_actions_usage(self, actions, groups): if not group._group_actions: raise ValueError(f'empty group {group}') + if all(action.help is SUPPRESS for action in group._group_actions): + continue + try: start = actions.index(group._group_actions[0]) except ValueError: continue else: - group_action_count = len(group._group_actions) - end = start + group_action_count + end = start + len(group._group_actions) if actions[start:end] == group._group_actions: - - suppressed_actions_count = 0 - for action in group._group_actions: - group_actions.add(action) - if action.help is SUPPRESS: - suppressed_actions_count += 1 - - exposed_actions_count = group_action_count - suppressed_actions_count - - if not group.required: - if start in inserts: - inserts[start] += ' [' - else: - inserts[start] = '[' - if end in inserts: - inserts[end] += ']' - else: - inserts[end] = ']' - elif exposed_actions_count > 1: - if start in inserts: - inserts[start] += ' (' - else: - inserts[start] = '(' - if end in inserts: - inserts[end] += ')' - else: - inserts[end] = ')' - for i in range(start + 1, end): - inserts[i] = '|' + group_actions.update(group._group_actions) + inserts[start, end] = group # collect all actions format strings parts = [] - for i, action in enumerate(actions): + for action in actions: # suppressed arguments are marked with None - # remove | separators for suppressed arguments if action.help is SUPPRESS: - parts.append(None) - if inserts.get(i) == '|': - inserts.pop(i) - elif inserts.get(i + 1) == '|': - inserts.pop(i + 1) + part = None # produce all arg strings elif not action.option_strings: @@ -460,9 +424,6 @@ def _format_actions_usage(self, actions, groups): if part[0] == '[' and part[-1] == ']': part = part[1:-1] - # add the action string to the list - parts.append(part) - # produce the first way to invoke the option in brackets else: option_string = action.option_strings[0] @@ -483,26 +444,21 @@ def _format_actions_usage(self, actions, groups): if not action.required and action not in group_actions: part = '[%s]' % part - # add the action string to the list - parts.append(part) + # add the action string to the list + parts.append(part) - # insert things at the necessary indices - for i in sorted(inserts, reverse=True): - parts[i:i] = [inserts[i]] - - # join all the action items with spaces - text = ' '.join([item for item in parts if item is not None]) - - # clean up separators for mutually exclusive groups - open = r'[\[(]' - close = r'[\])]' - text = _re.sub(r'(%s) ' % open, r'\1', text) - text = _re.sub(r' (%s)' % close, r'\1', text) - text = _re.sub(r'%s *%s' % (open, close), r'', text) - text = text.strip() + # insert group markers at the necessary indices + for start, end in sorted(inserts, reverse=True): + group = inserts[start, end] + group_parts = [item for item in parts[start:end] if item is not None] + if group.required: + open, close = "()" if len(group_parts) > 1 else ("", "") + else: + open, close = "[]" + parts[start:end] = [open + " | ".join(group_parts) + close] - # return the text - return text + # return the usage parts + return [item for item in parts if item is not None] def _format_text(self, text): if '%(prog)' in text: diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index 3a62a16cee3179..6f8251add1d800 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -4139,6 +4139,121 @@ class TestHelpUsagePositionalsOnlyWrap(HelpTestCase): version = '' +class TestHelpUsageMetavarsSpacesParentheses(HelpTestCase): + # https://github.com/python/cpython/issues/62549 + # https://github.com/python/cpython/issues/89743 + parser_signature = Sig(prog='PROG') + argument_signatures = [ + Sig('-n1', metavar='()', help='n1'), + Sig('-o1', metavar='(1, 2)', help='o1'), + Sig('-u1', metavar=' (uu) ', help='u1'), + Sig('-v1', metavar='( vv )', help='v1'), + Sig('-w1', metavar='(w)w', help='w1'), + Sig('-x1', metavar='x(x)', help='x1'), + Sig('-y1', metavar='yy)', help='y1'), + Sig('-z1', metavar='(zz', help='z1'), + Sig('-n2', metavar='[]', help='n2'), + Sig('-o2', metavar='[1, 2]', help='o2'), + Sig('-u2', metavar=' [uu] ', help='u2'), + Sig('-v2', metavar='[ vv ]', help='v2'), + Sig('-w2', metavar='[w]w', help='w2'), + Sig('-x2', metavar='x[x]', help='x2'), + Sig('-y2', metavar='yy]', help='y2'), + Sig('-z2', metavar='[zz', help='z2'), + ] + + usage = '''\ + usage: PROG [-h] [-n1 ()] [-o1 (1, 2)] [-u1 (uu) ] [-v1 ( vv )] [-w1 (w)w] + [-x1 x(x)] [-y1 yy)] [-z1 (zz] [-n2 []] [-o2 [1, 2]] [-u2 [uu] ] + [-v2 [ vv ]] [-w2 [w]w] [-x2 x[x]] [-y2 yy]] [-z2 [zz] + ''' + help = usage + '''\ + + options: + -h, --help show this help message and exit + -n1 () n1 + -o1 (1, 2) o1 + -u1 (uu) u1 + -v1 ( vv ) v1 + -w1 (w)w w1 + -x1 x(x) x1 + -y1 yy) y1 + -z1 (zz z1 + -n2 [] n2 + -o2 [1, 2] o2 + -u2 [uu] u2 + -v2 [ vv ] v2 + -w2 [w]w w2 + -x2 x[x] x2 + -y2 yy] y2 + -z2 [zz z2 + ''' + version = '' + + +class TestHelpUsageNoWhitespaceCrash(TestCase): + + def test_all_suppressed_mutex_followed_by_long_arg(self): + # https://github.com/python/cpython/issues/62090 + # https://github.com/python/cpython/issues/96310 + parser = argparse.ArgumentParser(prog='PROG') + mutex = parser.add_mutually_exclusive_group() + mutex.add_argument('--spam', help=argparse.SUPPRESS) + parser.add_argument('--eggs-eggs-eggs-eggs-eggs-eggs') + usage = textwrap.dedent('''\ + usage: PROG [-h] + [--eggs-eggs-eggs-eggs-eggs-eggs EGGS_EGGS_EGGS_EGGS_EGGS_EGGS] + ''') + self.assertEqual(parser.format_usage(), usage) + + def test_newline_in_metavar(self): + # https://github.com/python/cpython/issues/77048 + mapping = ['123456', '12345', '12345', '123'] + parser = argparse.ArgumentParser('11111111111111') + parser.add_argument('-v', '--verbose', + help='verbose mode', action='store_true') + parser.add_argument('targets', + help='installation targets', + nargs='+', + metavar='\n'.join(mapping)) + usage = textwrap.dedent('''\ + usage: 11111111111111 [-h] [-v] + 123456 + 12345 + 12345 + 123 [123456 + 12345 + 12345 + 123 ...] + ''') + self.assertEqual(parser.format_usage(), usage) + + def test_empty_metavar_required_arg(self): + # https://github.com/python/cpython/issues/82091 + parser = argparse.ArgumentParser(prog='PROG') + parser.add_argument('--nil', metavar='', required=True) + parser.add_argument('--a', metavar='A' * 70) + usage = ( + 'usage: PROG [-h] --nil \n' + ' [--a AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA' + 'AAAAAAAAAAAAAAAAAAAAAAA]\n' + ) + self.assertEqual(parser.format_usage(), usage) + + def test_all_suppressed_mutex_with_optional_nargs(self): + # https://github.com/python/cpython/issues/98666 + parser = argparse.ArgumentParser(prog='PROG') + mutex = parser.add_mutually_exclusive_group() + mutex.add_argument( + '--param1', + nargs='?', const='default', metavar='NAME', help=argparse.SUPPRESS) + mutex.add_argument( + '--param2', + nargs='?', const='default', metavar='NAME', help=argparse.SUPPRESS) + usage = 'usage: PROG [-h]\n' + self.assertEqual(parser.format_usage(), usage) + + class TestHelpVariableExpansion(HelpTestCase): """Test that variables are expanded properly in help messages""" diff --git a/Misc/NEWS.d/next/Library/2023-05-28-11-25-18.gh-issue-62090.opAhDn.rst b/Misc/NEWS.d/next/Library/2023-05-28-11-25-18.gh-issue-62090.opAhDn.rst new file mode 100644 index 00000000000000..bb88aec85feb46 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-05-28-11-25-18.gh-issue-62090.opAhDn.rst @@ -0,0 +1,3 @@ +Simplify :mod:`argparse` usage formatting to avoid assertion errors caused +by whitespace in metavars or :data:`argparse.SUPPRESS`ed groups. Contributed +by Ali Hamdan. From 3f07f533bde1c794a59cea5fd07085d832e51405 Mon Sep 17 00:00:00 2001 From: Ali Hamdan Date: Sun, 28 May 2023 21:42:11 +0000 Subject: [PATCH 2/4] Fix docs build error --- .../Library/2023-05-28-11-25-18.gh-issue-62090.opAhDn.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2023-05-28-11-25-18.gh-issue-62090.opAhDn.rst b/Misc/NEWS.d/next/Library/2023-05-28-11-25-18.gh-issue-62090.opAhDn.rst index bb88aec85feb46..2fc9d18783d59c 100644 --- a/Misc/NEWS.d/next/Library/2023-05-28-11-25-18.gh-issue-62090.opAhDn.rst +++ b/Misc/NEWS.d/next/Library/2023-05-28-11-25-18.gh-issue-62090.opAhDn.rst @@ -1,3 +1,2 @@ -Simplify :mod:`argparse` usage formatting to avoid assertion errors caused -by whitespace in metavars or :data:`argparse.SUPPRESS`ed groups. Contributed -by Ali Hamdan. +Simplify :mod:`argparse` usage formatting to avoid assertion errors caused by +whitespace in metavars or ``SUPPRESS``-ed groups. Contributed by Ali Hamdan. From c2fb1857bc27a0e7841b0c6bf996096f54303cf4 Mon Sep 17 00:00:00 2001 From: Ali Hamdan Date: Wed, 17 Apr 2024 09:00:51 +0000 Subject: [PATCH 3/4] Correctly handle nested mut exc groups --- Lib/argparse.py | 4 +++- Lib/test/test_argparse.py | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/Lib/argparse.py b/Lib/argparse.py index 4550c493922654..e562d375e6f160 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -454,7 +454,9 @@ def _get_actions_usage_parts(self, actions, groups): open, close = "()" if len(group_parts) > 1 else ("", "") else: open, close = "[]" - parts[start:end] = [open + " | ".join(group_parts) + close] + parts[start] = open + " | ".join(group_parts) + close + for i in range(start + 1, end): + parts[i] = None # return the usage parts return [item for item in parts if item is not None] diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index 57661b6e8013a6..27d60388730412 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -4369,6 +4369,25 @@ def test_all_suppressed_mutex_with_optional_nargs(self): usage = 'usage: PROG [-h]\n' self.assertEqual(parser.format_usage(), usage) + def test_nested_mutex_groups(self): + parser = argparse.ArgumentParser() + g = parser.add_mutually_exclusive_group() + g.add_argument("--spam") + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + gg = g.add_mutually_exclusive_group() + gg.add_argument("--hax") + gg.add_argument("--hox", help=argparse.SUPPRESS) + gg.add_argument("--hex") + g.add_argument("--eggs") + parser.add_argument("--num") + + usage = textwrap.dedent('''\ + usage: __main__.py [-h] [--spam SPAM | [--hax HAX | --hex HEX] | --eggs EGGS] + [--num NUM] + ''') + self.assertEqual(parser.format_usage(), usage) + class TestHelpVariableExpansion(HelpTestCase): """Test that variables are expanded properly in help messages""" From c5e2a7c898ac9d9e4b60631bdfad9cecdc5a1cfa Mon Sep 17 00:00:00 2001 From: Ali Hamdan Date: Wed, 17 Apr 2024 09:44:33 +0000 Subject: [PATCH 4/4] Fix test and improve news entry and inline comment --- Lib/argparse.py | 2 +- Lib/test/test_argparse.py | 6 +++--- .../Library/2023-05-28-11-25-18.gh-issue-62090.opAhDn.rst | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Lib/argparse.py b/Lib/argparse.py index e562d375e6f160..55bf8cdd875a8d 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -446,7 +446,7 @@ def _get_actions_usage_parts(self, actions, groups): # add the action string to the list parts.append(part) - # insert group markers at the necessary indices + # group mutually exclusive actions for start, end in sorted(inserts, reverse=True): group = inserts[start, end] group_parts = [item for item in parts[start:end] if item is not None] diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index 27d60388730412..02b499145f6c43 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -4370,7 +4370,7 @@ def test_all_suppressed_mutex_with_optional_nargs(self): self.assertEqual(parser.format_usage(), usage) def test_nested_mutex_groups(self): - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser(prog='PROG') g = parser.add_mutually_exclusive_group() g.add_argument("--spam") with warnings.catch_warnings(): @@ -4383,8 +4383,8 @@ def test_nested_mutex_groups(self): parser.add_argument("--num") usage = textwrap.dedent('''\ - usage: __main__.py [-h] [--spam SPAM | [--hax HAX | --hex HEX] | --eggs EGGS] - [--num NUM] + usage: PROG [-h] [--spam SPAM | [--hax HAX | --hex HEX] | --eggs EGGS] + [--num NUM] ''') self.assertEqual(parser.format_usage(), usage) diff --git a/Misc/NEWS.d/next/Library/2023-05-28-11-25-18.gh-issue-62090.opAhDn.rst b/Misc/NEWS.d/next/Library/2023-05-28-11-25-18.gh-issue-62090.opAhDn.rst index 2fc9d18783d59c..c5abf7178563e8 100644 --- a/Misc/NEWS.d/next/Library/2023-05-28-11-25-18.gh-issue-62090.opAhDn.rst +++ b/Misc/NEWS.d/next/Library/2023-05-28-11-25-18.gh-issue-62090.opAhDn.rst @@ -1,2 +1,2 @@ -Simplify :mod:`argparse` usage formatting to avoid assertion errors caused by -whitespace in metavars or ``SUPPRESS``-ed groups. Contributed by Ali Hamdan. +Fix assertion errors caused by whitespace in metavars or ``SUPPRESS``-ed groups +in :mod:`argparse` by simplifying usage formatting. Patch by Ali Hamdan.