diff --git a/environment.yml b/environment.yml
index 24baea8..e222744 100644
--- a/environment.yml
+++ b/environment.yml
@@ -46,6 +46,10 @@ dependencies:
     - pre-commit
     # For testing
     - pytest-cov
+    # For benchmarking
+    - requests
     # For debugging
     - icecream
     - ipython
+    # For type annotations
+    - mypy
diff --git a/scripts/bench.py b/scripts/bench.py
new file mode 100755
index 0000000..4221514
--- /dev/null
+++ b/scripts/bench.py
@@ -0,0 +1,246 @@
+#!/usr/bin/env python
+import argparse
+import json
+import os
+import statistics
+import sys
+import timeit
+
+import download_data
+import graphblas as gb
+import networkx as nx
+import numpy as np
+import scipy.sparse
+
+import graphblas_algorithms as ga
+import scipy_impl
+from graphblas_algorithms.interface import Dispatcher as ga_dispatcher
+
+thisdir = os.path.dirname(__file__)
+datapaths = [
+    os.path.join(thisdir, "..", "data"),
+    os.path.curdir,
+]
+
+
+def find_data(dataname):
+    if os.path.exists(dataname):
+        return os.path.relpath(dataname)
+    for path in datapaths:
+        path = os.path.join(path, dataname) + ".mtx"
+        if os.path.exists(path):
+            return os.path.relpath(path)
+        path = path.removesuffix(".mtx")
+        if os.path.exists(path):
+            return os.path.relpath(path)
+    if dataname in download_data.data_urls:
+        return os.path.relpath(download_data.main([dataname])[0])
+    raise FileNotFoundError(f"Unable to find data file for {dataname}")
+
+
+def get_symmetry(file_or_mminfo):
+    if not isinstance(file_or_mminfo, tuple):
+        mminfo = scipy.io.mminfo(file_or_mminfo)
+    else:
+        mminfo = file_or_mminfo
+    return mminfo[5]
+
+
+def readfile(filename, is_symmetric, backend):
+    name = filename.split(".", 1)[0].rsplit("/", 1)[0]
+    if backend == "graphblas":
+        A = gb.io.mmread(filename, name=name)
+        A.wait()
+        if is_symmetric:
+            return ga.Graph(A)
+        return ga.DiGraph(A)
+    a = scipy.io.mmread(filename)
+    if backend == "networkx":
+        create_using = nx.Graph if is_symmetric else nx.DiGraph
+        return nx.from_scipy_sparse_array(a, create_using=create_using)
+    if backend == "scipy":
+        return scipy.sparse.csr_array(a)
+    raise ValueError(
+        f"Backend {backend!r} not understood; must be 'graphblas', 'networkx', or 'scipy'"
+    )
+
+
+def best_units(num):
+    """Returns scale factor and prefix such that 1 <= num*scale < 1000"""
+    if num < 1e-12:
+        return 1e15, "f"
+    if num < 1e-9:
+        return 1e12, "p"
+    if num < 1e-6:
+        return 1e9, "n"
+    if num < 1e-3:
+        return 1e6, "\N{MICRO SIGN}"
+    if num < 1:
+        return 1e3, "m"
+    if num < 1e3:
+        return 1.0, ""
+    if num < 1e6:
+        return 1e-3, "k"
+    if num < 1e9:
+        return 1e-6, "M"
+    if num < 1e12:
+        return 1e-9, "G"
+    return 1e-12, "T"
+
+
+def stime(time):
+    scale, units = best_units(time)
+    return f"{time * scale:4.3g} {units}s"
+
+
+# Functions that aren't available in the main networkx namespace
+functionpaths = {
+    "inter_community_edges": "community.quality.inter_community_edges",
+    "intra_community_edges": "community.quality.intra_community_edges",
+    "is_tournament": "tournament.is_tournament",
+    "mutual_weight": "structuralholes.mutual_weight",
+    "score_sequence": "tournament.score_sequence",
+    "tournament_matrix": "tournament.tournament_matrix",
+}
+functioncall = {
+    "s_metric": "func(G, normalized=False)",
+}
+poweriteration = {"eigenvector_centrality", "katz_centrality", "pagerank"}
+directed_only = {
+    "in_degree_centrality",
+    "is_tournament",
+    "out_degree_centrality",
+    "score_sequence",
+    "tournament_matrix",
+    "reciprocity",
+    "overall_reciprocity",
+}
+# Is square_clustering undirected only? graphblas-algorthms doesn't implement it for directed
+undirected_only = {"generalized_degree", "k_truss", "triangles", "square_clustering"}
+
+
+def getfunction(functionname, backend):
+    if backend == "graphblas":
+        return getattr(ga_dispatcher, functionname)
+    if backend == "scipy":
+        return getattr(scipy_impl, functionname)
+    if functionname in functionpaths:
+        func = nx
+        for attr in functionpaths[functionname].split("."):
+            func = getattr(func, attr)
+        return func
+    return getattr(nx, functionname)
+
+
+def main(dataname, backend, functionname, time=3.0, n=None, extra=None, display=True):
+    filename = find_data(dataname)
+    is_symmetric = get_symmetry(filename) == "symmetric"
+    if not is_symmetric and functionname in undirected_only:
+        # Should we automatically symmetrize?
+        raise ValueError(
+            f"Data {dataname!r} is not symmetric, but {functionname} only works on undirected"
+        )
+    if is_symmetric and functionname in directed_only:
+        is_symmetric = False  # Make into directed graph
+    G = readfile(filename, is_symmetric, backend)
+    func = getfunction(functionname, backend)
+    benchstring = functioncall.get(functionname, "func(G)")
+    if extra is not None:
+        benchstring = f"{benchstring[:-1]}, {extra})"
+    globals = {"func": func, "G": G}
+    if functionname in poweriteration:
+        benchstring = f"try:\n    {benchstring}\nexcept exc:\n    pass"
+        globals["exc"] = nx.PowerIterationFailedConvergence
+    if backend == "graphblas":
+        benchstring = f"G._cache.clear()\n{benchstring}"
+    timer = timeit.Timer(benchstring, globals=globals)
+    if display:
+        line = f"Backend = {backend}, function = {functionname}, data = {dataname}"
+        if extra is not None:
+            line += f", extra = {extra}"
+        print("=" * len(line))
+        print(line)
+        print("-" * len(line))
+    info = {"backend": backend, "function": functionname, "data": dataname}
+    if extra is not None:
+        info["extra"] = extra
+    try:
+        first_time = timer.timeit(1)
+    except Exception as exc:
+        if display:
+            print(f"EXCEPTION: {exc}")
+            print("=" * len(line))
+            raise
+        info["exception"] = str(exc)
+        return info
+    if time == 0:
+        n = 1
+    elif n is None:
+        n = 2 ** max(0, int(np.ceil(np.log2(time / first_time))))
+    if display:
+        print("Number of runs:", n)
+        print("first: ", stime(first_time))
+    info["n"] = n
+    info["first"] = first_time
+    if n > 1:
+        results = timer.repeat(n - 1, 1)
+        results.append(first_time)
+        if display:
+            print("median:", stime(statistics.median(results)))
+            print("mean:  ", stime(statistics.mean(results)))
+            print("stdev: ", stime(statistics.stdev(results)))
+            print("min:   ", stime(min(results)))
+            print("max:   ", stime(max(results)))
+        info["median"] = statistics.median(results)
+        info["mean"] = statistics.mean(results)
+        info["stdev"] = statistics.stdev(results)
+        info["min"] = min(results)
+        info["max"] = max(results)
+    if display:
+        print("=" * len(line))
+    return info
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description=f"Example usage: python {sys.argv[0]} -b graphblas -f pagerank -d amazon0302"
+    )
+    parser.add_argument(
+        "-b", "--backend", choices=["graphblas", "networkx", "scipy"], default="graphblas"
+    )
+    parser.add_argument(
+        "-t", "--time", type=float, default=3.0, help="Target minimum time to run benchmarks"
+    )
+    parser.add_argument(
+        "-n",
+        type=int,
+        help="The number of times to run the benchmark (the default is to run according to time)",
+    )
+    parser.add_argument(
+        "-d",
+        "--data",
+        required=True,
+        help="The path to a mtx file or one of the following data names: {"
+        + ", ".join(sorted(download_data.data_urls))
+        + "}; data will be downloaded if necessary",
+    )
+    parser.add_argument(
+        "-j",
+        "--json",
+        action="store_true",
+        help="Print results as json instead of human-readable text",
+    )
+    parser.add_argument("-f", "--func", required=True, help="Which function to benchmark")
+    parser.add_argument("--extra", help="Extra string to add to the function call")
+    args = parser.parse_args()
+    info = main(
+        args.data,
+        args.backend,
+        args.func,
+        time=args.time,
+        n=args.n,
+        extra=args.extra,
+        display=not args.json,
+    )
+    if args.json:
+        print(json.dumps(info))
diff --git a/scripts/bench_pagerank.py b/scripts/bench_pagerank.py
deleted file mode 100644
index 512d829..0000000
--- a/scripts/bench_pagerank.py
+++ /dev/null
@@ -1,250 +0,0 @@
-import click
-import networkx as nx
-
-
-def best_units(num):
-    """Returns scale factor and prefix such that 1 <= num*scale < 1000"""
-    if num < 1e-12:
-        return 1e15, "f"
-    if num < 1e-9:
-        return 1e12, "p"
-    if num < 1e-6:
-        return 1e9, "n"
-    if num < 1e-3:
-        return 1e6, "u"
-    if num < 1:
-        return 1e3, "m"
-    if num < 1e3:
-        return 1.0, ""
-    if num < 1e6:
-        return 1e-3, "k"
-    if num < 1e9:
-        return 1e-6, "M"
-    if num < 1e12:
-        return 1e-9, "G"
-    return 1e-12, "T"
-
-
-def stime(time):
-    scale, units = best_units(time)
-    return f"{time * scale:4.3g} {units}s"
-
-
-# Copied and modified from networkx
-def pagerank_scipy(
-    A,
-    alpha=0.85,
-    personalization=None,
-    max_iter=100,
-    tol=1.0e-6,
-    nstart=None,
-    weight="weight",
-    dangling=None,
-):
-    import numpy as np
-    import scipy as sp
-    import scipy.sparse  # call as sp.sparse
-
-    N = A.shape[0]
-    if A.nnz == 0:
-        return {}
-
-    # nodelist = list(G)
-    S = A.sum(axis=1)
-    S[S != 0] = 1.0 / S[S != 0]
-    # TODO: csr_array
-    Q = sp.sparse.csr_array(sp.sparse.spdiags(S.T, 0, *A.shape))
-    A = Q @ A
-
-    # initial vector
-    if nstart is None:
-        x = np.repeat(1.0 / N, N)
-    else:
-        raise NotImplementedError()
-    # Personalization vector
-    if personalization is None:
-        p = np.repeat(1.0 / N, N)
-    else:
-        raise NotImplementedError()
-    # Dangling nodes
-    if dangling is None:
-        dangling_weights = p
-    else:
-        raise NotImplementedError()
-    is_dangling = np.where(S == 0)[0]
-
-    # power iteration: make up to max_iter iterations
-    for _ in range(max_iter):
-        xlast = x
-        x = alpha * (x @ A + sum(x[is_dangling]) * dangling_weights) + (1 - alpha) * p
-        # check convergence, l1 norm
-        err = np.absolute(x - xlast).sum()
-        if err < N * tol:
-            return x
-            # return dict(zip(nodelist, map(float, x)))
-    raise nx.PowerIterationFailedConvergence(max_iter)
-
-
-@click.command()
-@click.argument("filename")
-@click.option(
-    "-b",
-    "--backend",
-    default="graphblas",
-    type=click.Choice(["graphblas", "networkx", "scipy", "gb", "nx", "sp", "gbnx"]),
-)
-@click.option(
-    "-t",
-    "--time",
-    default=3,
-    type=click.FloatRange(min=0, min_open=True),
-)
-@click.option(
-    "-n",
-    default=None,
-    type=click.IntRange(min=1),
-)
-@click.option(
-    "--verify",
-    is_flag=True,
-)
-@click.option(
-    "--alpha",
-    default=0.85,
-    type=click.FloatRange(min=0, max=1),
-)
-@click.option(
-    "--tol",
-    default=1e-06,
-    type=click.FloatRange(min=0, min_open=True),
-)
-def main(filename, backend, time, n, verify, alpha, tol, _get_result=False):
-    import statistics
-    import timeit
-    import warnings
-
-    import numpy as np
-
-    warnings.simplefilter("ignore")
-    if verify:
-        gb_result = main.callback(filename, "gb", None, None, False, alpha, tol, _get_result=True)
-        sp_result = main.callback(filename, "sp", None, None, False, alpha, tol, _get_result=True)
-        rtol = tol / gb_result.size
-        atol = 1e-16
-        np.testing.assert_allclose(gb_result, sp_result, rtol=rtol, atol=atol)
-        print(" |- graphblas and scipy.sparse match")
-        nx_result = main.callback(filename, "nx", None, None, False, alpha, tol, _get_result=True)
-        np.testing.assert_allclose(gb_result, nx_result, rtol=rtol, atol=atol)
-        print(" |- graphblas and networkx match")
-        np.testing.assert_allclose(sp_result, nx_result, rtol=rtol, atol=atol)
-        print(" |- scipy.sparse and networkx match")
-        gbnx_result = main.callback(
-            filename, "gbnx", None, None, False, alpha, tol, _get_result=True
-        )
-        np.testing.assert_allclose(gbnx_result, gb_result, rtol=rtol, atol=atol)
-        np.testing.assert_allclose(gbnx_result, sp_result, rtol=rtol, atol=atol)
-        np.testing.assert_allclose(gbnx_result, nx_result, rtol=rtol, atol=atol)
-        print("All good!")
-        # Show a grid of total absolute differences between results
-        results = {
-            "gb": gb_result,
-            "sp": sp_result,
-            "nx": nx_result,
-            "gbnx": gbnx_result,
-        }
-        print("     ", end="")
-        for k1 in results:
-            print("%9s" % k1, end="")
-        print()
-        for k1, v1 in results.items():
-            print("%5s" % k1, end="")
-            for v2 in results.values():
-                print("%9.2g" % np.abs(v1 - v2).sum(), end="")
-            print()
-        return
-
-    backend = {
-        "gb": "graphblas",
-        "nx": "networkx",
-        "sp": "scipy",
-    }.get(backend, backend)
-    print(f"Filename: {filename} ; backend: {backend}")
-
-    if backend == "graphblas":
-        import pandas as pd
-        from graphblas import Matrix
-
-        from graphblas_algorithms.link_analysis import pagerank_core as pagerank
-
-        start = timeit.default_timer()
-        df = pd.read_csv(filename, delimiter="\t", names=["row", "col"])
-        G = Matrix.from_coo(df["row"].values, df["col"].values, 1)
-        stop = timeit.default_timer()
-        num_nodes = G.nrows
-        num_edges = G.nvals
-        if _get_result:
-            result = pagerank(G, alpha=alpha, tol=tol)
-            result(~result.S) << 0  # Densify just in case
-            return result.to_coo()[1]
-
-    elif backend == "scipy":
-        import pandas as pd
-        import scipy.sparse
-
-        start = timeit.default_timer()
-        df = pd.read_csv(filename, delimiter="\t", names=["row", "col"])
-        G = scipy.sparse.csr_array((np.repeat(1.0, len(df)), (df["row"].values, df["col"].values)))
-        pagerank = pagerank_scipy
-        stop = timeit.default_timer()
-        num_nodes = G.shape[0]
-        num_edges = G.nnz
-        if _get_result:
-            return pagerank(G, alpha=alpha, tol=tol)
-    else:
-        if backend == "networkx":
-            from networkx import pagerank
-        else:
-            from graphblas_algorithms.link_analysis import pagerank
-
-        start = timeit.default_timer()
-        G = nx.read_edgelist(filename, delimiter="\t", nodetype=int, create_using=nx.DiGraph)
-        N = max(G)
-        for i in range(N):
-            if i not in G:
-                G.add_node(i)
-        stop = timeit.default_timer()
-        num_nodes = len(G.nodes)
-        num_edges = len(G.edges)
-
-        if _get_result:
-            result = pagerank(G, alpha=alpha, tol=tol)
-            return np.array([result.get(key, 0) for key in range(N + 1)])
-
-    print("Num nodes:", num_nodes)
-    print("Num edges:", num_edges)
-    print("Load time:", stime(stop - start))
-    timer = timeit.Timer(
-        "pagerank(G, alpha=alpha, tol=tol)",
-        globals={"pagerank": pagerank, "G": G, "alpha": alpha, "tol": tol},
-    )
-    first_time = timer.timeit(1)
-    if time == 0:
-        n = 1
-    elif n is None:
-        n = 2 ** max(0, int(np.ceil(np.log2(time / first_time))))
-    print("Number of runs:", n)
-    print("first: ", stime(first_time))
-    if n > 1:
-        results = timer.repeat(n - 1, 1)
-        results.append(first_time)
-        print("median:", stime(statistics.median(results)))
-        print("mean:  ", stime(statistics.mean(results)))
-        # print("hmean: ", stime(statistics.harmonic_mean(results)))
-        # print("gmean: ", stime(statistics.geometric_mean(results)))
-        print("stdev: ", stime(statistics.stdev(results)))
-        print("min:   ", stime(min(results)))
-        print("max:   ", stime(max(results)))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/scripts/download_data.py b/scripts/download_data.py
new file mode 100755
index 0000000..9b00ea1
--- /dev/null
+++ b/scripts/download_data.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python
+import argparse
+import functools
+import gzip
+import io
+import os
+import sys
+import tarfile
+
+import requests
+
+thisdir = os.path.dirname(__file__)
+datapath = functools.partial(os.path.join, thisdir, "..", "data")
+
+data_urls = {
+    "amazon0302": "https://sparse.tamu.edu/MM/SNAP/amazon0302.tar.gz",
+    "web-Google": "https://sparse.tamu.edu/MM/SNAP/web-Google.tar.gz",
+    "soc-Pokec": "https://sparse.tamu.edu/MM/SNAP/soc-Pokec.tar.gz",
+    "email-Enron": "https://sparse.tamu.edu/MM/SNAP/email-Enron.tar.gz",
+    "preferentialAttachment": "https://sparse.tamu.edu/MM/DIMACS10/preferentialAttachment.tar.gz",
+    "caidaRouterLevel": "https://sparse.tamu.edu/MM/DIMACS10/caidaRouterLevel.tar.gz",
+    "dblp-2010": "https://sparse.tamu.edu/MM/LAW/dblp-2010.tar.gz",
+    "citationCiteseer": "https://sparse.tamu.edu/MM/DIMACS10/citationCiteseer.tar.gz",
+    "coAuthorsDBLP": "https://sparse.tamu.edu/MM/DIMACS10/coAuthorsDBLP.tar.gz",
+    "as-Skitter": "https://sparse.tamu.edu/MM/SNAP/as-Skitter.tar.gz",
+    "coPapersCiteseer": "https://sparse.tamu.edu/MM/DIMACS10/coPapersCiteseer.tar.gz",
+    "coPapersDBLP": "https://sparse.tamu.edu/MM/DIMACS10/coPapersDBLP.tar.gz",
+}
+
+
+def download(url, target=None):
+    req = requests.request("GET", url)
+    assert req.ok, req.reason
+    tar = tarfile.open(fileobj=io.BytesIO(gzip.decompress(req.content)))
+    for member in tar.members:
+        dirname, basename = os.path.split(member.name)
+        if not basename.endswith(".mtx"):
+            continue
+        tar.extract(member)
+        if target:
+            os.makedirs(os.path.dirname(target), exist_ok=True)
+            os.replace(member.name, target)
+            os.removedirs(dirname)
+
+
+def main(datanames, overwrite=False):
+    filenames = []
+    for name in datanames:
+        target = datapath(f"{name}.mtx")
+        filenames.append(target)
+        relpath = os.path.relpath(target)
+        if not overwrite and os.path.exists(target):
+            print(f"{relpath} already exists; skipping", file=sys.stderr)
+            continue
+        url = data_urls[name]
+        print(f"Downloading {relpath} from {url}", file=sys.stderr)
+        download(url, target)
+    return filenames
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("datanames", nargs="*", choices=list(data_urls) + [[]])
+    args = parser.parse_args()
+    datanames = args.datanames
+    if not datanames:
+        # None specified, so download all that are missing
+        datanames = data_urls
+        overwrite = False
+    else:
+        overwrite = True
+    main(datanames, overwrite=overwrite)
diff --git a/scripts/scipy_impl.py b/scripts/scipy_impl.py
new file mode 100644
index 0000000..06c9d81
--- /dev/null
+++ b/scripts/scipy_impl.py
@@ -0,0 +1,55 @@
+import networkx as nx
+import numpy as np
+import scipy as sp
+import scipy.sparse  # call as sp.sparse
+
+
+def pagerank(
+    A,
+    alpha=0.85,
+    personalization=None,
+    max_iter=100,
+    tol=1.0e-6,
+    nstart=None,
+    weight="weight",
+    dangling=None,
+):
+
+    N = A.shape[0]
+    if A.nnz == 0:
+        return {}
+
+    # nodelist = list(G)
+    S = A.sum(axis=1)
+    S[S != 0] = 1.0 / S[S != 0]
+    # TODO: csr_array
+    Q = sp.sparse.csr_array(sp.sparse.spdiags(S.T, 0, *A.shape))
+    A = Q @ A
+
+    # initial vector
+    if nstart is None:
+        x = np.repeat(1.0 / N, N)
+    else:
+        raise NotImplementedError()
+    # Personalization vector
+    if personalization is None:
+        p = np.repeat(1.0 / N, N)
+    else:
+        raise NotImplementedError()
+    # Dangling nodes
+    if dangling is None:
+        dangling_weights = p
+    else:
+        raise NotImplementedError()
+    is_dangling = np.where(S == 0)[0]
+
+    # power iteration: make up to max_iter iterations
+    for _ in range(max_iter):
+        xlast = x
+        x = alpha * (x @ A + sum(x[is_dangling]) * dangling_weights) + (1 - alpha) * p
+        # check convergence, l1 norm
+        err = np.absolute(x - xlast).sum()
+        if err < N * tol:
+            return x
+            # return dict(zip(nodelist, map(float, x)))
+    raise nx.PowerIterationFailedConvergence(max_iter)