From 486ea225fb5c15d65054841f6bad6b7d0b5aacef Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Wed, 16 Nov 2022 22:25:09 -0600 Subject: [PATCH] Update to use `to_coo/from_coo` instead of `to_values/from_values` --- .github/workflows/test.yml | 2 +- .pre-commit-config.yaml | 9 ++++++--- graphblas_algorithms/algorithms/cluster.py | 6 +++--- graphblas_algorithms/algorithms/core.py | 2 +- graphblas_algorithms/algorithms/dag.py | 4 ++-- .../algorithms/shortest_paths/generic.py | 4 ++-- graphblas_algorithms/algorithms/simple_paths.py | 2 +- graphblas_algorithms/algorithms/tournament.py | 2 +- graphblas_algorithms/classes/_utils.py | 12 ++++++------ graphblas_algorithms/classes/nodemap.py | 6 +++--- graphblas_algorithms/classes/nodeset.py | 2 +- graphblas_algorithms/interface.py | 6 ++++-- graphblas_algorithms/nxapi/boundary.py | 2 +- graphblas_algorithms/nxapi/cluster.py | 2 +- graphblas_algorithms/tests/test_match_nx.py | 17 ++++------------- requirements.txt | 2 +- scripts/bench_pagerank.py | 4 ++-- 17 files changed, 40 insertions(+), 44 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9bb155c..fd5fd15 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -33,7 +33,7 @@ jobs: conda install -c conda-forge python-graphblas scipy pandas \ pytest-cov pytest-randomly black flake8-comprehensions flake8-bugbear # matplotlib lxml pygraphviz pydot sympy # Extra networkx deps we don't need yet - pip install git+https://github.com/jim22k/networkx.git@nx-sparse --no-deps + pip install git+https://github.com/networkx/networkx.git@main --no-deps pip install -e . --no-deps - name: Style checks run: | diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b16f901..d11c621 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,6 +2,8 @@ # # Before first use: `pre-commit install` # To run: `pre-commit run --all-files` +# To update: `pre-commit autoupdate` +# - &flake8_dependencies below needs updated manually fail_fast: true repos: - repo: https://github.com/pre-commit/pre-commit-hooks @@ -26,7 +28,7 @@ repos: - id: isort language_version: python3 - repo: https://github.com/asottile/pyupgrade - rev: v3.1.0 + rev: v3.2.2 hooks: - id: pyupgrade args: [--py38-plus] @@ -45,9 +47,10 @@ repos: hooks: - id: flake8 additional_dependencies: &flake8_dependencies + # These versions need updated manually - flake8==5.0.4 - - flake8-comprehensions==3.10.0 - - flake8-bugbear==22.9.23 + - flake8-comprehensions==3.10.1 + - flake8-bugbear==22.10.27 - repo: https://github.com/asottile/yesqa rev: v1.4.0 hooks: diff --git a/graphblas_algorithms/algorithms/cluster.py b/graphblas_algorithms/algorithms/cluster.py index 66190f7..9ff89d4 100644 --- a/graphblas_algorithms/algorithms/cluster.py +++ b/graphblas_algorithms/algorithms/cluster.py @@ -241,7 +241,7 @@ def square_clustering(G, node_ids=None): A, degrees = G.get_properties("A degrees+") # TODO" how to handle self-edges? # P2 from https://arxiv.org/pdf/2007.11111.pdf; we'll also use it as scratch if node_ids is not None: - v = Vector.from_values(node_ids, True, size=degrees.size) + v = Vector.from_coo(node_ids, True, size=degrees.size) Asubset = binary.second(v & A).new(name="A_subset") else: Asubset = A @@ -298,10 +298,10 @@ def generalized_degree(G, *, mask=None): else: Tri(A.S) << 0 Tri(Tri.S, binary.second) << plus_pair(Tri @ A.T) - rows, cols, vals = Tri.to_values() + rows, cols, vals = Tri.to_coo() # The column index indicates the number of triangles an edge participates in. # The largest this can be is `A.ncols - 1`. Values is count of edges. - return Matrix.from_values( + return Matrix.from_coo( rows, vals, np.ones(vals.size, dtype=int), diff --git a/graphblas_algorithms/algorithms/core.py b/graphblas_algorithms/algorithms/core.py index e6b500b..9212242 100644 --- a/graphblas_algorithms/algorithms/core.py +++ b/graphblas_algorithms/algorithms/core.py @@ -28,7 +28,7 @@ def k_truss(G: Graph, k) -> Graph: S = C # Remove isolate nodes - indices, _ = C.reduce_rowwise(monoid.any).to_values() + indices, _ = C.reduce_rowwise(monoid.any).to_coo() Ktruss = C[indices, indices].new() # Convert back to networkx graph with correct node ids diff --git a/graphblas_algorithms/algorithms/dag.py b/graphblas_algorithms/algorithms/dag.py index 19a921e..cd1d4c6 100644 --- a/graphblas_algorithms/algorithms/dag.py +++ b/graphblas_algorithms/algorithms/dag.py @@ -10,7 +10,7 @@ def descendants(G, source): raise KeyError(f"The node {source} is not in the graph") index = G._key_to_id[source] A = G._A - q = Vector.from_values(index, True, size=A.nrows, name="q") + q = Vector.from_coo(index, True, size=A.nrows, name="q") rv = q.dup(name="descendants") for _ in range(A.nrows): q(~rv.S, replace) << lor_pair(q @ A) @@ -26,7 +26,7 @@ def ancestors(G, source): raise KeyError(f"The node {source} is not in the graph") index = G._key_to_id[source] A = G._A - q = Vector.from_values(index, True, size=A.nrows, name="q") + q = Vector.from_coo(index, True, size=A.nrows, name="q") rv = q.dup(name="descendants") for _ in range(A.nrows): q(~rv.S, replace) << lor_pair(A @ q) diff --git a/graphblas_algorithms/algorithms/shortest_paths/generic.py b/graphblas_algorithms/algorithms/shortest_paths/generic.py index 0ca8aab..864ac2a 100644 --- a/graphblas_algorithms/algorithms/shortest_paths/generic.py +++ b/graphblas_algorithms/algorithms/shortest_paths/generic.py @@ -11,9 +11,9 @@ def has_path(G, source, target): if src == dst: return True A = G._A - q_src = Vector.from_values(src, True, size=A.nrows, name="q_src") + q_src = Vector.from_coo(src, True, size=A.nrows, name="q_src") seen_src = q_src.dup(name="seen_src") - q_dst = Vector.from_values(dst, True, size=A.nrows, name="q_dst") + q_dst = Vector.from_coo(dst, True, size=A.nrows, name="q_dst") seen_dst = q_dst.dup(name="seen_dst") for _ in range(A.nrows // 2): q_src(~seen_src.S, replace) << lor_pair(q_src @ A) diff --git a/graphblas_algorithms/algorithms/simple_paths.py b/graphblas_algorithms/algorithms/simple_paths.py index b690845..646787c 100644 --- a/graphblas_algorithms/algorithms/simple_paths.py +++ b/graphblas_algorithms/algorithms/simple_paths.py @@ -16,7 +16,7 @@ def is_simple_path(G, nodes): if len(indices) != len(nodes) or len(indices) > len(set(indices)): return False # Check all steps in path at once - P = Matrix.from_values(indices[:-1], indices[1:], True, nrows=A.nrows, ncols=A.ncols) + P = Matrix.from_coo(indices[:-1], indices[1:], True, nrows=A.nrows, ncols=A.ncols) P << binary.second(A & P) return P.nvals == len(indices) - 1 # Alternative diff --git a/graphblas_algorithms/algorithms/tournament.py b/graphblas_algorithms/algorithms/tournament.py index 41a850a..32624ff 100644 --- a/graphblas_algorithms/algorithms/tournament.py +++ b/graphblas_algorithms/algorithms/tournament.py @@ -15,7 +15,7 @@ def is_tournament(G): def score_sequence(G): degrees = G.get_property("row_degrees+") - _, values = degrees.to_values(indices=False, sort=False) + _, values = degrees.to_coo(indices=False, sort=False) values.sort() if degrees.nvals != degrees.size: values = np.pad(values, (degrees.size - degrees.nvals, 0)) diff --git a/graphblas_algorithms/classes/_utils.py b/graphblas_algorithms/classes/_utils.py index ab52d8f..c8f4eba 100644 --- a/graphblas_algorithms/classes/_utils.py +++ b/graphblas_algorithms/classes/_utils.py @@ -78,7 +78,7 @@ def dict_to_vector(self, d, *, size=None, dtype=None, name=None): size = len(self) key_to_id = self._key_to_id indices, values = zip(*((key_to_id[key], val) for key, val in d.items())) - return Vector.from_values(indices, values, size=size, dtype=dtype, name=name) + return Vector.from_coo(indices, values, size=size, dtype=dtype, name=name) def list_to_vector(self, nodes, dtype=bool, *, size=None, name=None): @@ -88,7 +88,7 @@ def list_to_vector(self, nodes, dtype=bool, *, size=None, name=None): size = len(self) key_to_id = self._key_to_id index = [key_to_id[key] for key in nodes] - return Vector.from_values(index, True, size=size, dtype=dtype, name=name) + return Vector.from_coo(index, True, size=size, dtype=dtype, name=name) def list_to_mask(self, nodes, *, size=None, name="mask"): @@ -122,7 +122,7 @@ def set_to_vector(self, nodes, dtype=bool, *, ignore_extra=False, size=None, nam nodes = set(nodes) nodes = nodes & key_to_id.keys() index = [key_to_id[key] for key in nodes] - return Vector.from_values(index, True, size=size, dtype=dtype, name=name) + return Vector.from_coo(index, True, size=size, dtype=dtype, name=name) def vector_to_dict(self, v, *, mask=None, fillvalue=None): @@ -132,7 +132,7 @@ def vector_to_dict(self, v, *, mask=None, fillvalue=None): elif fillvalue is not None and v.nvals < v.size: v(mask=~v.S) << fillvalue id_to_key = self.id_to_key - return {id_to_key[index]: value for index, value in zip(*v.to_values(sort=False))} + return {id_to_key[index]: value for index, value in zip(*v.to_coo(sort=False))} def vector_to_nodemap(self, v, *, mask=None, fillvalue=None): @@ -165,7 +165,7 @@ def vector_to_nodeset(self, v): def vector_to_set(self, v): id_to_key = self.id_to_key - indices, _ = v.to_values(values=False, sort=False) + indices, _ = v.to_coo(values=False, sort=False) return {id_to_key[index] for index in indices} @@ -227,7 +227,7 @@ def to_networkx(self, edge_attribute="weight"): A = self.get_property("L+") G.add_nodes_from(self._key_to_id) id_to_key = self.id_to_key - rows, cols, vals = A.to_values() + rows, cols, vals = A.to_coo() rows = (id_to_key[row] for row in rows.tolist()) cols = (id_to_key[col] for col in cols.tolist()) if edge_attribute is None: diff --git a/graphblas_algorithms/classes/nodemap.py b/graphblas_algorithms/classes/nodemap.py index 950a7d9..bedba8a 100644 --- a/graphblas_algorithms/classes/nodemap.py +++ b/graphblas_algorithms/classes/nodemap.py @@ -52,7 +52,7 @@ def __getitem__(self, key): def __iter__(self): # Slow if we iterate over one; fast if we iterate over all return map( - self.id_to_key.__getitem__, self.vector.to_values(values=False, sort=False)[0].tolist() + self.id_to_key.__getitem__, self.vector.to_coo(values=False, sort=False)[0].tolist() ) def __len__(self): @@ -123,7 +123,7 @@ def __getitem__(self, key): def __iter__(self): # Slow if we iterate over one; fast if we iterate over all - return iter(self.vector.to_values(values=False, sort=False)[0].tolist()) + return iter(self.vector.to_coo(values=False, sort=False)[0].tolist()) def __len__(self): return self.vector.nvals @@ -232,7 +232,7 @@ def __iter__(self): # Slow if we iterate over one; fast if we iterate over all return map( self.id_to_key.__getitem__, - self._get_rows().to_values(values=False, sort=False)[0].tolist(), + self._get_rows().to_coo(values=False, sort=False)[0].tolist(), ) def __len__(self): diff --git a/graphblas_algorithms/classes/nodeset.py b/graphblas_algorithms/classes/nodeset.py index eb3678b..81f4839 100644 --- a/graphblas_algorithms/classes/nodeset.py +++ b/graphblas_algorithms/classes/nodeset.py @@ -46,7 +46,7 @@ def __contains__(self, x): def __iter__(self): # Slow if we iterate over one; fast if we iterate over all return map( - self.id_to_key.__getitem__, self.vector.to_values(values=False, sort=False)[0].tolist() + self.id_to_key.__getitem__, self.vector.to_coo(values=False, sort=False)[0].tolist() ) def __len__(self): diff --git a/graphblas_algorithms/interface.py b/graphblas_algorithms/interface.py index 4102d3b..e4b1b12 100644 --- a/graphblas_algorithms/interface.py +++ b/graphblas_algorithms/interface.py @@ -1,5 +1,3 @@ -import pytest - from . import nxapi @@ -93,6 +91,10 @@ def convert_to_nx(obj, *, name=None): @staticmethod def on_start_tests(items): + try: + import pytest + except ImportError: # pragma: no cover (import) + return skip = [ ("test_attributes", {"TestBoruvka", "test_mst.py"}), ("test_weight_attribute", {"TestBoruvka", "test_mst.py"}), diff --git a/graphblas_algorithms/nxapi/boundary.py b/graphblas_algorithms/nxapi/boundary.py index 155ea61..8907f09 100644 --- a/graphblas_algorithms/nxapi/boundary.py +++ b/graphblas_algorithms/nxapi/boundary.py @@ -21,7 +21,7 @@ def edge_boundary(G, nbunch1, nbunch2=None, data=False, keys=False, default=None v1 = G.set_to_vector(nbunch1, ignore_extra=True) v2 = G.set_to_vector(nbunch2, ignore_extra=True) result = algorithms.edge_boundary(G, v1, v2, is_weighted=is_multigraph or data) - rows, cols, vals = result.to_values(values=is_multigraph or data) + rows, cols, vals = result.to_coo(values=is_multigraph or data) id_to_key = G.id_to_key if data: it = zip( diff --git a/graphblas_algorithms/nxapi/cluster.py b/graphblas_algorithms/nxapi/cluster.py index 0b0b550..6644189 100644 --- a/graphblas_algorithms/nxapi/cluster.py +++ b/graphblas_algorithms/nxapi/cluster.py @@ -95,7 +95,7 @@ def _split(L, k): # TODO: should this move into algorithms? def _square_clustering_split(G, node_ids=None, *, nsplits): if node_ids is None: - node_ids = G._A.reduce_rowwise(monoid.any).to_values()[0] + node_ids = G._A.reduce_rowwise(monoid.any).to_coo()[0] result = None for chunk_ids in _split(node_ids, nsplits): res = algorithms.square_clustering(G, chunk_ids) diff --git a/graphblas_algorithms/tests/test_match_nx.py b/graphblas_algorithms/tests/test_match_nx.py index cbd9ddc..918b6f2 100644 --- a/graphblas_algorithms/tests/test_match_nx.py +++ b/graphblas_algorithms/tests/test_match_nx.py @@ -28,17 +28,9 @@ def isdispatched(func): """Can this NetworkX function dispatch to other backends?""" - # Haha, there should be a better way to know this - registered_algorithms = backends._registered_algorithms - try: - return ( - func.__globals__.get("_registered_algorithms") is registered_algorithms - and func.__module__.startswith("networkx") - and func.__module__ != "networkx.classes.backends" - and set(func.__code__.co_freevars) == {"func", "name"} - ) - except Exception: - return False + return ( + callable(func) and hasattr(func, "dispatchname") and func.__module__.startswith("networkx") + ) def dispatchname(func): @@ -46,8 +38,7 @@ def dispatchname(func): # Haha, there should be a better way to get this if not isdispatched(func): raise ValueError(f"Function is not dispatched in NetworkX: {func.__name__}") - index = func.__code__.co_freevars.index("name") - return func.__closure__[index].cell_contents + return func.dispatchname def fullname(func): diff --git a/requirements.txt b/requirements.txt index b005efe..90acea8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1 @@ -python-graphblas >=2022.10.1 +python-graphblas >=2022.11.0 diff --git a/scripts/bench_pagerank.py b/scripts/bench_pagerank.py index 5162cf3..512d829 100644 --- a/scripts/bench_pagerank.py +++ b/scripts/bench_pagerank.py @@ -178,14 +178,14 @@ def main(filename, backend, time, n, verify, alpha, tol, _get_result=False): start = timeit.default_timer() df = pd.read_csv(filename, delimiter="\t", names=["row", "col"]) - G = Matrix.from_values(df["row"].values, df["col"].values, 1) + G = Matrix.from_coo(df["row"].values, df["col"].values, 1) stop = timeit.default_timer() num_nodes = G.nrows num_edges = G.nvals if _get_result: result = pagerank(G, alpha=alpha, tol=tol) result(~result.S) << 0 # Densify just in case - return result.to_values()[1] + return result.to_coo()[1] elif backend == "scipy": import pandas as pd