From 6b270fb940d4d36dc8115dba43a926dc312ba70a Mon Sep 17 00:00:00 2001
From: Charles Harris <charlesr.harris@gmail.com>
Date: Sun, 24 Dec 2023 13:54:13 -0700
Subject: [PATCH] MAINT: Update crackfortran.py and f2py2e.py from main

---
 numpy/f2py/crackfortran.py | 230 ++++++++++++++++++++++++++++++++-----
 numpy/f2py/f2py2e.py       | 112 +++++++++---------
 2 files changed, 255 insertions(+), 87 deletions(-)

diff --git a/numpy/f2py/crackfortran.py b/numpy/f2py/crackfortran.py
index ac56d3f2ae6c..8d3fc27608bd 100755
--- a/numpy/f2py/crackfortran.py
+++ b/numpy/f2py/crackfortran.py
@@ -993,6 +993,16 @@ def _resolvenameargspattern(line):
 
 
 def analyzeline(m, case, line):
+    """
+    Reads each line in the input file in sequence and updates global vars.
+
+    Effectively reads and collects information from the input file to the
+    global variable groupcache, a dictionary containing info about each part
+    of the fortran module.
+
+    At the end of analyzeline, information is filtered into the correct dict
+    keys, but parameter values and dimensions are not yet interpreted.
+    """
     global groupcounter, groupname, groupcache, grouplist, filepositiontext
     global currentfilename, f77modulename, neededinterface, neededmodule
     global expectbegin, gotnextfile, previous_context
@@ -1679,10 +1689,18 @@ def markinnerspaces(line):
 
 
 def updatevars(typespec, selector, attrspec, entitydecl):
+    """
+    Returns last_name, the variable name without special chars, parenthesis
+        or dimension specifiers.
+
+    Alters groupcache to add the name, typespec, attrspec (and possibly value)
+    of current variable.
+    """
     global groupcache, groupcounter
 
     last_name = None
     kindselect, charselect, typename = cracktypespec(typespec, selector)
+    # Clean up outer commas, whitespace and undesired chars from attrspec
     if attrspec:
         attrspec = [x.strip() for x in markoutercomma(attrspec).split('@,@')]
         l = []
@@ -2396,8 +2414,6 @@ def _calc_depend_dict(vars):
 
 
 def get_sorted_names(vars):
-    """
-    """
     depend_dict = _calc_depend_dict(vars)
     names = []
     for name in list(depend_dict.keys()):
@@ -2450,7 +2466,7 @@ def _selected_real_kind_func(p, r=0, radix=0):
     if p < 16:
         return 8
     machine = platform.machine().lower()
-    if machine.startswith(('aarch64', 'alpha', 'arm64', 'loongarch', 'power', 'ppc', 'riscv', 's390x', 'sparc')):
+    if machine.startswith(('aarch64', 'alpha', 'arm64', 'loongarch', 'mips', 'power', 'ppc', 'riscv', 's390x', 'sparc')):
         if p <= 33:
             return 16
     else:
@@ -2489,6 +2505,7 @@ def get_parameters(vars, global_params={}):
                     # TODO: test .eq., .neq., etc replacements.
                 ]:
                     v = v.replace(*repl)
+
             v = kind_re.sub(r'kind("\1")', v)
             v = selected_int_kind_re.sub(r'selected_int_kind(\1)', v)
 
@@ -2497,14 +2514,17 @@ def get_parameters(vars, global_params={}):
             # then we may easily remove those specifiers.
             # However, it may be that the user uses other specifiers...(!)
             is_replaced = False
+
             if 'kindselector' in vars[n]:
+                # Remove kind specifier (including those defined
+                # by parameters)
                 if 'kind' in vars[n]['kindselector']:
                     orig_v_len = len(v)
                     v = v.replace('_' + vars[n]['kindselector']['kind'], '')
                     # Again, this will be true if even a single specifier
                     # has been replaced, see comment above.
                     is_replaced = len(v) < orig_v_len
-                    
+
             if not is_replaced:
                 if not selected_kind_re.match(v):
                     v_ = v.split('_')
@@ -2531,6 +2551,10 @@ def get_parameters(vars, global_params={}):
                 outmess(f'get_parameters[TODO]: '
                         f'implement evaluation of complex expression {v}\n')
 
+            dimspec = ([s.lstrip('dimension').strip()
+                        for s in vars[n]['attrspec']
+                       if s.startswith('dimension')] or [None])[0]
+
             # Handle _dp for gh-6624
             # Also fixes gh-20460
             if real16pattern.search(v):
@@ -2538,11 +2562,11 @@ def get_parameters(vars, global_params={}):
             elif real8pattern.search(v):
                 v = 4
             try:
-                params[n] = eval(v, g_params, params)
-
+                params[n] = param_eval(v, g_params, params, dimspec=dimspec)
             except Exception as msg:
                 params[n] = v
-                outmess('get_parameters: got "%s" on %s\n' % (msg, repr(v)))
+                outmess(f'get_parameters: got "{msg}" on {n!r}\n')
+
             if isstring(vars[n]) and isinstance(params[n], int):
                 params[n] = chr(params[n])
             nl = n.lower()
@@ -2550,8 +2574,7 @@ def get_parameters(vars, global_params={}):
                 params[nl] = params[n]
         else:
             print(vars[n])
-            outmess(
-                'get_parameters:parameter %s does not have value?!\n' % (repr(n)))
+            outmess(f'get_parameters:parameter {n!r} does not have value?!\n')
     return params
 
 
@@ -2560,6 +2583,7 @@ def _eval_length(length, params):
         return '(*)'
     return _eval_scalar(length, params)
 
+
 _is_kind_number = re.compile(r'\d+_').match
 
 
@@ -2580,6 +2604,10 @@ def _eval_scalar(value, params):
 
 
 def analyzevars(block):
+    """
+    Sets correct dimension information for each variable/parameter
+    """
+
     global f90modulevars
 
     setmesstext(block)
@@ -2608,7 +2636,8 @@ def analyzevars(block):
             svars.append(n)
 
     params = get_parameters(vars, get_useparameters(block))
-
+    # At this point, params are read and interpreted, but
+    # the params used to define vars are not yet parsed
     dep_matches = {}
     name_match = re.compile(r'[A-Za-z][\w$]*').match
     for v in list(vars.keys()):
@@ -2707,27 +2736,30 @@ def analyzevars(block):
                     check = None
             if dim and 'dimension' not in vars[n]:
                 vars[n]['dimension'] = []
-                for d in rmbadname([x.strip() for x in markoutercomma(dim).split('@,@')]):
-                    star = ':' if d == ':' else '*'
+                for d in rmbadname(
+                        [x.strip() for x in markoutercomma(dim).split('@,@')]
+                ):
+                    # d is the expression inside the dimension declaration
                     # Evaluate `d` with respect to params
-                    if d in params:
-                        d = str(params[d])
-                    for p in params:
-                        re_1 = re.compile(r'(?P<before>.*?)\b' + p + r'\b(?P<after>.*)', re.I)
-                        m = re_1.match(d)
-                        while m:
-                            d = m.group('before') + \
-                                str(params[p]) + m.group('after')
-                            m = re_1.match(d)
-
-                    if d == star:
-                        dl = [star]
+                    try:
+                        # the dimension for this variable depends on a
+                        # previously defined parameter
+                        d = param_parse(d, params)
+                    except (ValueError, IndexError, KeyError):
+                        outmess(
+                            ('analyzevars: could not parse dimension for '
+                            f'variable {d!r}\n')
+                        )
+
+                    dim_char = ':' if d == ':' else '*'
+                    if d == dim_char:
+                        dl = [dim_char]
                     else:
                         dl = markoutercomma(d, ':').split('@:@')
                     if len(dl) == 2 and '*' in dl:  # e.g. dimension(5:*)
                         dl = ['*']
                         d = '*'
-                    if len(dl) == 1 and dl[0] != star:
+                    if len(dl) == 1 and dl[0] != dim_char:
                         dl = ['1', dl[0]]
                     if len(dl) == 2:
                         d1, d2 = map(symbolic.Expr.parse, dl)
@@ -2961,9 +2993,152 @@ def compute_deps(v, deps):
                 del vars[n]
     return vars
 
+
 analyzeargs_re_1 = re.compile(r'\A[a-z]+[\w$]*\Z', re.I)
 
 
+def param_eval(v, g_params, params, dimspec=None):
+    """
+    Creates a dictionary of indices and values for each parameter in a
+    parameter array to be evaluated later.
+
+    WARNING: It is not possible to initialize multidimensional array
+    parameters e.g. dimension(-3:1, 4, 3:5) at this point. This is because in
+    Fortran initialization through array constructor requires the RESHAPE
+    intrinsic function. Since the right-hand side of the parameter declaration
+    is not executed in f2py, but rather at the compiled c/fortran extension,
+    later, it is not possible to execute a reshape of a parameter array.
+    One issue remains: if the user wants to access the array parameter from
+    python, we should either
+    1) allow them to access the parameter array using python standard indexing
+       (which is often incompatible with the original fortran indexing)
+    2) allow the parameter array to be accessed in python as a dictionary with
+       fortran indices as keys
+    We are choosing 2 for now.
+    """
+    if dimspec is None:
+        try:
+            p = eval(v, g_params, params)
+        except Exception as msg:
+            p = v
+            outmess(f'param_eval: got "{msg}" on {v!r}\n')
+        return p
+
+    # This is an array parameter.
+    # First, we parse the dimension information
+    if len(dimspec) < 2 or dimspec[::len(dimspec)-1] != "()":
+        raise ValueError(f'param_eval: dimension {dimspec} can\'t be parsed')
+    dimrange = dimspec[1:-1].split(',')
+    if len(dimrange) == 1:
+        # e.g. dimension(2) or dimension(-1:1)
+        dimrange = dimrange[0].split(':')
+        # now, dimrange is a list of 1 or 2 elements
+        if len(dimrange) == 1:
+            bound = param_parse(dimrange[0], params)
+            dimrange = range(1, int(bound)+1)
+        else:
+            lbound = param_parse(dimrange[0], params)
+            ubound = param_parse(dimrange[1], params)
+            dimrange = range(int(lbound), int(ubound)+1)
+    else:
+        raise ValueError(f'param_eval: multidimensional array parameters '
+                         '{dimspec} not supported')
+
+    # Parse parameter value
+    v = (v[2:-2] if v.startswith('(/') else v).split(',')
+    v_eval = []
+    for item in v:
+        try:
+            item = eval(item, g_params, params)
+        except Exception as msg:
+            outmess(f'param_eval: got "{msg}" on {item!r}\n')
+        v_eval.append(item)
+
+    p = dict(zip(dimrange, v_eval))
+
+    return p
+
+
+def param_parse(d, params):
+    """Recursively parse array dimensions.
+
+    Parses the declaration of an array variable or parameter
+    `dimension` keyword, and is called recursively if the
+    dimension for this array is a previously defined parameter
+    (found in `params`).
+
+    Parameters
+    ----------
+    d : str
+        Fortran expression describing the dimension of an array.
+    params : dict
+        Previously parsed parameters declared in the Fortran source file.
+
+    Returns
+    -------
+    out : str
+        Parsed dimension expression.
+
+    Examples
+    --------
+
+    * If the line being analyzed is
+
+      `integer, parameter, dimension(2) :: pa = (/ 3, 5 /)`
+
+      then `d = 2` and we return immediately, with
+
+    >>> d = '2'
+    >>> param_parse(d, params)
+    2
+
+    * If the line being analyzed is
+
+      `integer, parameter, dimension(pa) :: pb = (/1, 2, 3/)`
+
+      then `d = 'pa'`; since `pa` is a previously parsed parameter,
+      and `pa = 3`, we call `param_parse` recursively, to obtain
+
+    >>> d = 'pa'
+    >>> params = {'pa': 3}
+    >>> param_parse(d, params)
+    3
+
+    * If the line being analyzed is
+
+      `integer, parameter, dimension(pa(1)) :: pb = (/1, 2, 3/)`
+
+      then `d = 'pa(1)'`; since `pa` is a previously parsed parameter,
+      and `pa(1) = 3`, we call `param_parse` recursively, to obtain
+
+    >>> d = 'pa(1)'
+    >>> params = dict(pa={1: 3, 2: 5})
+    >>> param_parse(d, params)
+    3
+    """
+    if "(" in d:
+        # this dimension expression is an array
+        dname = d[:d.find("(")]
+        ddims = d[d.find("(")+1:d.rfind(")")]
+        # this dimension expression is also a parameter;
+        # parse it recursively
+        index = int(param_parse(ddims, params))
+        return str(params[dname][index])
+    elif d in params:
+        return str(params[d])
+    else:
+        for p in params:
+            re_1 = re.compile(
+                r'(?P<before>.*?)\b' + p + r'\b(?P<after>.*)', re.I
+            )
+            m = re_1.match(d)
+            while m:
+                d = m.group('before') + \
+                    str(params[p]) + m.group('after')
+                m = re_1.match(d)
+        return d
+
+
 def expr2name(a, block, args=[]):
     orig_a = a
     a_is_expr = not analyzeargs_re_1.match(a)
@@ -3216,11 +3391,6 @@ def true_intent_list(var):
 
 
 def vars2fortran(block, vars, args, tab='', as_interface=False):
-    """
-    TODO:
-    public sub
-    ...
-    """
     setmesstext(block)
     ret = ''
     nout = []
diff --git a/numpy/f2py/f2py2e.py b/numpy/f2py/f2py2e.py
index ad34d575c273..ce22b2d8a9ec 100755
--- a/numpy/f2py/f2py2e.py
+++ b/numpy/f2py/f2py2e.py
@@ -62,12 +62,6 @@
 
 Options:
 
-  --2d-numpy       Use numpy.f2py tool with NumPy support. [DEFAULT]
-  --2d-numeric     Use f2py2e tool with Numeric support.
-  --2d-numarray    Use f2py2e tool with Numarray support.
-  --g3-numpy       Use 3rd generation f2py from the separate f2py package.
-                   [NOT AVAILABLE YET]
-
   -h <filename>    Write signatures of the fortran routines to file <filename>
                    and exit. You can then edit <filename> and use it instead
                    of <fortran files>. If <filename>==stdout then the
@@ -128,20 +122,22 @@
   -v               Print f2py version ID and exit.
 
 
-build backend options (only effective with -c):
+build backend options (only effective with -c)
+[NO_MESON] is used to indicate an option not meant to be used
+with the meson backend or above Python 3.12:
 
-  --fcompiler=         Specify Fortran compiler type by vendor
-  --compiler=          Specify C compiler type (as defined by distutils)
+  --fcompiler=         Specify Fortran compiler type by vendor [NO_MESON]
+  --compiler=          Specify distutils C compiler type [NO_MESON]
 
-  --help-fcompiler     List available Fortran compilers and exit
-  --f77exec=           Specify the path to F77 compiler
-  --f90exec=           Specify the path to F90 compiler
+  --help-fcompiler     List available Fortran compilers and exit [NO_MESON]
+  --f77exec=           Specify the path to F77 compiler [NO_MESON]
+  --f90exec=           Specify the path to F90 compiler [NO_MESON]
   --f77flags=          Specify F77 compiler flags
   --f90flags=          Specify F90 compiler flags
-  --opt=               Specify optimization flags
-  --arch=              Specify architecture specific optimization flags
-  --noopt              Compile without optimization
-  --noarch             Compile without arch-dependent optimization
+  --opt=               Specify optimization flags [NO_MESON]
+  --arch=              Specify architecture specific optimization flags [NO_MESON]
+  --noopt              Compile without optimization [NO_MESON]
+  --noarch             Compile without arch-dependent optimization [NO_MESON]
   --debug              Compile with debugging information
 
   --dep                <dependency>
@@ -166,7 +162,7 @@
                        by numpy.distutils/system_info.py. E.g. to link
                        with optimized LAPACK libraries (vecLib on MacOSX,
                        ATLAS elsewhere), use --link-lapack_opt.
-                       See also --help-link switch.
+                       See also --help-link switch. [NO_MESON]
 
   -L/path/to/lib/ -l<libname>
   -D<define> -U<name>
@@ -196,7 +192,7 @@
 
 def scaninputline(inputline):
     files, skipfuncs, onlyfuncs, debug = [], [], [], []
-    f, f2, f3, f5, f6, f7, f8, f9, f10 = 1, 0, 0, 0, 0, 0, 0, 0, 0
+    f, f2, f3, f5, f6, f8, f9, f10 = 1, 0, 0, 0, 0, 0, 0, 0
     verbose = 1
     emptygen = True
     dolc = -1
@@ -204,7 +200,7 @@ def scaninputline(inputline):
     dorestdoc = 0
     wrapfuncs = 1
     buildpath = '.'
-    include_paths = []
+    include_paths, inputline = get_includes(inputline)
     signsfile, modulename = None, None
     options = {'buildpath': buildpath,
                'coutput': None,
@@ -264,14 +260,6 @@ def scaninputline(inputline):
         elif l[:8] == '-include':
             cfuncs.outneeds['userincludes'].append(l[9:-1])
             cfuncs.userincludes[l[9:-1]] = '#include ' + l[8:]
-        elif l[:15] in '--include_paths':
-            outmess(
-                'f2py option --include_paths is deprecated, use --include-paths instead.\n')
-            f7 = 1
-        elif l[:15] in '--include-paths':
-            # Similar to using -I with -c, however this is
-            # also used during generation of wrappers
-            f7 = 1
         elif l == '--skip-empty-wrappers':
             emptygen = False
         elif l[0] == '-':
@@ -286,9 +274,6 @@ def scaninputline(inputline):
         elif f6:
             f6 = 0
             buildpath = l
-        elif f7:
-            f7 = 0
-            include_paths.extend(l.split(os.pathsep))
         elif f8:
             f8 = 0
             options["coutput"] = l
@@ -456,7 +441,7 @@ def run_main(comline_list):
     fobjhsrc = os.path.join(f2pydir, 'src', 'fortranobject.h')
     fobjcsrc = os.path.join(f2pydir, 'src', 'fortranobject.c')
     # gh-22819 -- begin
-    parser = make_f2py_parser()
+    parser = make_f2py_compile_parser()
     args, comline_list = parser.parse_known_args(comline_list)
     pyf_files, _ = filter_files("", "[.]pyf([.]src|)", comline_list)
     # Checks that no existing modulename is defined in a pyf file
@@ -538,7 +523,35 @@ def get_prefix(module):
     p = os.path.dirname(os.path.dirname(module.__file__))
     return p
 
-def make_f2py_parser():
+
+class CombineIncludePaths(argparse.Action):
+    def __call__(self, parser, namespace, values, option_string=None):
+        include_paths_set = set(getattr(namespace, 'include_paths', []) or [])
+        if option_string == "--include_paths":
+            outmess("Use --include-paths or -I instead of --include_paths which will be removed")
+        if option_string == "--include-paths" or option_string == "--include_paths":
+            include_paths_set.update(values.split(':'))
+        else:
+            include_paths_set.add(values)
+        setattr(namespace, 'include_paths', list(include_paths_set))
+
+def include_parser():
+    parser = argparse.ArgumentParser(add_help=False)
+    parser.add_argument("-I", dest="include_paths", action=CombineIncludePaths)
+    parser.add_argument("--include-paths", dest="include_paths", action=CombineIncludePaths)
+    parser.add_argument("--include_paths", dest="include_paths", action=CombineIncludePaths)
+    return parser
+
+def get_includes(iline):
+    iline = (' '.join(iline)).split()
+    parser = include_parser()
+    args, remain = parser.parse_known_args(iline)
+    ipaths = args.include_paths
+    if args.include_paths is None:
+        ipaths = []
+    return ipaths, remain
+
+def make_f2py_compile_parser():
     parser = argparse.ArgumentParser(add_help=False)
     parser.add_argument("--dep", action="append", dest="dependencies")
     parser.add_argument("--backend", choices=['meson', 'distutils'], default='distutils')
@@ -548,7 +561,7 @@ def make_f2py_parser():
 def preparse_sysargv():
     # To keep backwards bug compatibility, newer flags are handled by argparse,
     # and `sys.argv` is passed to the rest of `f2py` as is.
-    parser = make_f2py_parser()
+    parser = make_f2py_compile_parser()
 
     args, remaining_argv = parser.parse_known_args()
     sys.argv = [sys.argv[0]] + remaining_argv
@@ -665,19 +678,19 @@ def run_compile():
     if '--quiet' in f2py_flags:
         setup_flags.append('--quiet')
 
+    # Ugly filter to remove everything but sources
     sources = sys.argv[1:]
-    for optname in ['--include_paths', '--include-paths', '--f2cmap']:
-        if optname in sys.argv:
-            i = sys.argv.index(optname)
-            f2py_flags.extend(sys.argv[i:i + 2])
-            del sys.argv[i + 1], sys.argv[i]
-            sources = sys.argv[1:]
+    f2cmapopt = '--f2cmap'
+    if f2cmapopt in sys.argv:
+        i = sys.argv.index(f2cmapopt)
+        f2py_flags.extend(sys.argv[i:i + 2])
+        del sys.argv[i + 1], sys.argv[i]
+        sources = sys.argv[1:]
 
     pyf_files, _sources = filter_files("", "[.]pyf([.]src|)", sources)
     sources = pyf_files + _sources
     modulename = validate_modulename(pyf_files, modulename)
     extra_objects, sources = filter_files('', '[.](o|a|so|dylib)', sources)
-    include_dirs, sources = filter_files('-I', '', sources, remove_prefix=1)
     library_dirs, sources = filter_files('-L', '', sources, remove_prefix=1)
     libraries, sources = filter_files('-l', '', sources, remove_prefix=1)
     undef_macros, sources = filter_files('-U', '', sources, remove_prefix=1)
@@ -700,6 +713,8 @@ def run_compile():
         else:
             run_main(f" {' '.join(f2py_flags)} {' '.join(pyf_files)}".split())
 
+    # Order matters here, includes are needed for run_main above
+    include_dirs, sources = get_includes(sources)
     # Now use the builder
     builder = build_backend(
         modulename,
@@ -747,23 +762,6 @@ def main():
             show_all()
         return
 
-    # Probably outdated options that were not working before 1.16
-    if '--g3-numpy' in sys.argv[1:]:
-        sys.stderr.write("G3 f2py support is not implemented, yet.\\n")
-        sys.exit(1)
-    elif '--2e-numeric' in sys.argv[1:]:
-        sys.argv.remove('--2e-numeric')
-    elif '--2e-numarray' in sys.argv[1:]:
-        # Note that this errors becaust the -DNUMARRAY argument is
-        # not recognized. Just here for back compatibility and the
-        # error message.
-        sys.argv.append("-DNUMARRAY")
-        sys.argv.remove('--2e-numarray')
-    elif '--2e-numpy' in sys.argv[1:]:
-        sys.argv.remove('--2e-numpy')
-    else:
-        pass
-
     if '-c' in sys.argv[1:]:
         run_compile()
     else: