From c9ed0ab1c34c70a7a7480733d2397cc0649d41f7 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Sat, 11 Feb 2023 00:24:50 -0600 Subject: [PATCH 01/13] Add `{all_pairs,single_source}_bellman_ford_path_length` That is, add these to `nxapi`: - `all_pairs_bellman_ford_path_length` - `single_source_bellman_ford_path_length` --- graphblas_algorithms/algorithms/exceptions.py | 4 ++ .../algorithms/shortest_paths/__init__.py | 1 + .../algorithms/shortest_paths/weighted.py | 45 +++++++++++++++++++ graphblas_algorithms/interface.py | 24 ++++++++-- graphblas_algorithms/nxapi/exception.py | 4 ++ .../nxapi/shortest_paths/__init__.py | 1 + .../nxapi/shortest_paths/weighted.py | 36 +++++++++++++++ 7 files changed, 111 insertions(+), 4 deletions(-) create mode 100644 graphblas_algorithms/algorithms/shortest_paths/weighted.py create mode 100644 graphblas_algorithms/nxapi/shortest_paths/weighted.py diff --git a/graphblas_algorithms/algorithms/exceptions.py b/graphblas_algorithms/algorithms/exceptions.py index 883ffeb..f4ef352 100644 --- a/graphblas_algorithms/algorithms/exceptions.py +++ b/graphblas_algorithms/algorithms/exceptions.py @@ -12,3 +12,7 @@ class EmptyGraphError(GraphBlasAlgorithmException): class PointlessConcept(GraphBlasAlgorithmException): pass + + +class Unbounded(GraphBlasAlgorithmException): + pass diff --git a/graphblas_algorithms/algorithms/shortest_paths/__init__.py b/graphblas_algorithms/algorithms/shortest_paths/__init__.py index 60613e6..9fc57fb 100644 --- a/graphblas_algorithms/algorithms/shortest_paths/__init__.py +++ b/graphblas_algorithms/algorithms/shortest_paths/__init__.py @@ -1,2 +1,3 @@ from .dense import * from .generic import * +from .weighted import * diff --git a/graphblas_algorithms/algorithms/shortest_paths/weighted.py b/graphblas_algorithms/algorithms/shortest_paths/weighted.py new file mode 100644 index 0000000..fe3547b --- /dev/null +++ b/graphblas_algorithms/algorithms/shortest_paths/weighted.py @@ -0,0 +1,45 @@ +from graphblas import Vector, binary, monoid, replace +from graphblas.semiring import min_plus + +from ..exceptions import Unbounded + +__all__ = ["single_source_bellman_ford_path_length"] + + +def single_source_bellman_ford_path_length(G, source): + # No need for `is_weighted=` keyword, b/c this is assumed to be weighted (I think) + index = G._key_to_id[source] + A = G._A + if A.dtype == bool: + # Should we upcast e.g. INT8 to INT64 as well? + dtype = int + else: + dtype = A.dtype + n = A.nrows + d = Vector(dtype, n, name="single_source_bellman_ford_path_length") + d[index] = 0 + cur = d.dup(name="cur") + mask = Vector(bool, n, name="mask") + for _i in range(n - 1): + # This is a slightly modified Bellman-Ford algorithm. + # `cur` is the current frontier of values that improved in the previous iteration. + # This means that in this iteration we drop values from `cur` that are not better. + cur << min_plus(cur @ A) + + # Mask is True where cur not in d or cur < d + mask(cur.S, replace) << True # or: `mask << unary.one[bool](cur)` + mask(binary.second) << binary.lt(cur & d) + + # Drop values from `cur` that didn't improve + cur(mask.V, replace) << cur + if cur.nvals == 0: + break + # Update `d` with values that improved + d(cur.S) << cur + else: + # Check for negative cycle when for loop completes without breaking + cur << min_plus(cur @ A) + mask << binary.lt(cur & d) + if mask.reduce(monoid.lor): + raise Unbounded("Negative cycle detected.") + return d diff --git a/graphblas_algorithms/interface.py b/graphblas_algorithms/interface.py index e88cc4f..1a142c3 100644 --- a/graphblas_algorithms/interface.py +++ b/graphblas_algorithms/interface.py @@ -60,6 +60,12 @@ class Dispatcher: nxapi.shortest_paths.dense.floyd_warshall_predecessor_and_distance ) has_path = nxapi.shortest_paths.generic.has_path + all_pairs_bellman_ford_path_length = ( + nxapi.shortest_paths.weighted.all_pairs_bellman_ford_path_length + ) + single_source_bellman_ford_path_length = ( + nxapi.shortest_paths.weighted.single_source_bellman_ford_path_length + ) # Simple Paths is_simple_path = nxapi.simple_paths.is_simple_path # S Metric @@ -103,13 +109,23 @@ def on_start_tests(items): import pytest except ImportError: # pragma: no cover (import) return + + def key(testpath): + filename, path = testpath.split(":") + classname, testname = path.split(".") + return (testname, frozenset({classname, filename})) + + # Reasons to skip tests multi_attributed = "unable to handle multi-attributed graphs" multidigraph = "unable to handle MultiDiGraph" - freeze = frozenset + multigraph = "unable to handle MultiGraph" + + # Which tests to skip skip = { - ("test_attributes", freeze({"TestBoruvka", "test_mst.py"})): multi_attributed, - ("test_weight_attribute", freeze({"TestBoruvka", "test_mst.py"})): multi_attributed, - ("test_zero_weight", freeze({"TestFloyd", "test_dense.py"})): multidigraph, + key("test_mst.py:TestBoruvka.test_attributes"): multi_attributed, + key("test_mst.py:TestBoruvka.test_weight_attribute"): multi_attributed, + key("test_dense.py:TestFloyd.test_zero_weight"): multidigraph, + key("test_weighted.py:TestBellmanFordAndGoldbergRadzik.test_multigraph"): multigraph, } for item in items: kset = set(item.keywords) diff --git a/graphblas_algorithms/nxapi/exception.py b/graphblas_algorithms/nxapi/exception.py index 0c5b666..2630384 100644 --- a/graphblas_algorithms/nxapi/exception.py +++ b/graphblas_algorithms/nxapi/exception.py @@ -8,6 +8,9 @@ class NetworkXError(Exception): class NetworkXPointlessConcept(Exception): pass + class NetworkXUnbounded(Exception): + pass + class NodeNotFound(Exception): pass @@ -18,6 +21,7 @@ class PowerIterationFailedConvergence(Exception): from networkx import ( NetworkXError, NetworkXPointlessConcept, + NetworkXUnbounded, NodeNotFound, PowerIterationFailedConvergence, ) diff --git a/graphblas_algorithms/nxapi/shortest_paths/__init__.py b/graphblas_algorithms/nxapi/shortest_paths/__init__.py index 60613e6..9fc57fb 100644 --- a/graphblas_algorithms/nxapi/shortest_paths/__init__.py +++ b/graphblas_algorithms/nxapi/shortest_paths/__init__.py @@ -1,2 +1,3 @@ from .dense import * from .generic import * +from .weighted import * diff --git a/graphblas_algorithms/nxapi/shortest_paths/weighted.py b/graphblas_algorithms/nxapi/shortest_paths/weighted.py new file mode 100644 index 0000000..aeaed86 --- /dev/null +++ b/graphblas_algorithms/nxapi/shortest_paths/weighted.py @@ -0,0 +1,36 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph + +from ..exception import NetworkXUnbounded, NodeNotFound + +__all__ = [ + "all_pairs_bellman_ford_path_length", + "single_source_bellman_ford_path_length", +] + + +def all_pairs_bellman_ford_path_length(G, weight="weight"): + # TODO: what if weight is a function? + # How should we implement and call `algorithms.all_pairs_bellman_ford_path_length`? + # Should we compute in chunks to expose more parallelism? + G = to_graph(G, weight=weight) + for source in G: + try: + d = algorithms.single_source_bellman_ford_path_length(G, source) + except algorithms.exceptions.Unbounded as e: + raise NetworkXUnbounded(*e.args) from e + except KeyError as e: + raise NodeNotFound(*e.args) from e + yield (source, G.vector_to_nodemap(d)) + + +def single_source_bellman_ford_path_length(G, source, weight="weight"): + # TODO: what if weight is a function? + G = to_graph(G, weight=weight) + try: + d = algorithms.single_source_bellman_ford_path_length(G, source) + except algorithms.exceptions.Unbounded as e: + raise NetworkXUnbounded(*e.args) from e + except KeyError as e: + raise NodeNotFound(*e.args) from e + return G.vector_to_nodemap(d) From caa3e830bf87c938791eb0aa149e4f57e6cd67a7 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Sat, 11 Feb 2023 01:19:26 -0600 Subject: [PATCH 02/13] Faster (and clearer) --- graphblas_algorithms/algorithms/shortest_paths/weighted.py | 5 +++-- scripts/bench.py | 5 +++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/graphblas_algorithms/algorithms/shortest_paths/weighted.py b/graphblas_algorithms/algorithms/shortest_paths/weighted.py index fe3547b..65a0d6c 100644 --- a/graphblas_algorithms/algorithms/shortest_paths/weighted.py +++ b/graphblas_algorithms/algorithms/shortest_paths/weighted.py @@ -1,4 +1,4 @@ -from graphblas import Vector, binary, monoid, replace +from graphblas import Vector, binary, monoid, replace, unary from graphblas.semiring import min_plus from ..exceptions import Unbounded @@ -20,6 +20,7 @@ def single_source_bellman_ford_path_length(G, source): d[index] = 0 cur = d.dup(name="cur") mask = Vector(bool, n, name="mask") + one = unary.one[bool] for _i in range(n - 1): # This is a slightly modified Bellman-Ford algorithm. # `cur` is the current frontier of values that improved in the previous iteration. @@ -27,7 +28,7 @@ def single_source_bellman_ford_path_length(G, source): cur << min_plus(cur @ A) # Mask is True where cur not in d or cur < d - mask(cur.S, replace) << True # or: `mask << unary.one[bool](cur)` + mask << one(cur) mask(binary.second) << binary.lt(cur & d) # Drop values from `cur` that didn't improve diff --git a/scripts/bench.py b/scripts/bench.py index e67640b..f5be06e 100755 --- a/scripts/bench.py +++ b/scripts/bench.py @@ -117,6 +117,9 @@ def stime(time): } # Is square_clustering undirected only? graphblas-algorthms doesn't implement it for directed undirected_only = {"generalized_degree", "k_truss", "triangles", "square_clustering"} +returns_iterators = { + "all_pairs_bellman_ford_path_length", +} def getfunction(functionname, backend): @@ -151,6 +154,8 @@ def main(dataname, backend, functionname, time=3.0, n=None, extra=None, display= benchstring = functioncall.get(functionname, "func(G)") if extra is not None: benchstring = f"{benchstring[:-1]}, {extra})" + if functionname in returns_iterators: + benchstring = f"for _ in {benchstring}: pass" globals = {"func": func, "G": G} if functionname in poweriteration: benchstring = f"try:\n {benchstring}\nexcept exc:\n pass" From 343fcb8ab24c4f12cca13b48244e75629fdefc25 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Sat, 11 Feb 2023 22:03:14 -0600 Subject: [PATCH 03/13] Implement `algorithms.bellman_ford_path_lengths` to compute in chunks --- .../algorithms/shortest_paths/weighted.py | 58 ++++++++++++++++++- graphblas_algorithms/classes/_utils.py | 4 +- .../nxapi/shortest_paths/weighted.py | 35 +++++++---- scripts/bench.py | 4 +- scripts/scipy_impl.py | 11 ++++ 5 files changed, 93 insertions(+), 19 deletions(-) diff --git a/graphblas_algorithms/algorithms/shortest_paths/weighted.py b/graphblas_algorithms/algorithms/shortest_paths/weighted.py index 65a0d6c..26983a7 100644 --- a/graphblas_algorithms/algorithms/shortest_paths/weighted.py +++ b/graphblas_algorithms/algorithms/shortest_paths/weighted.py @@ -1,9 +1,13 @@ -from graphblas import Vector, binary, monoid, replace, unary +import numpy as np +from graphblas import Matrix, Vector, binary, monoid, replace, unary from graphblas.semiring import min_plus from ..exceptions import Unbounded -__all__ = ["single_source_bellman_ford_path_length"] +__all__ = [ + "single_source_bellman_ford_path_length", + "bellman_ford_path_lengths", +] def single_source_bellman_ford_path_length(G, source): @@ -44,3 +48,53 @@ def single_source_bellman_ford_path_length(G, source): if mask.reduce(monoid.lor): raise Unbounded("Negative cycle detected.") return d + + +def bellman_ford_path_lengths(G, nodes=None, *, expand_output=False): + """ + + Parameters + ---------- + expand_output : bool, default False + When False, the returned Matrix has one row per node in nodes. + When True, the returned Matrix has the same shape as the input Matrix. + """ + # Same algorithms as in `single_source_bellman_ford_path_length`, but with + # `Cur` as a Matrix with each row corresponding to a source node. + A = G._A + if A.dtype == bool: + dtype = int + else: + dtype = A.dtype + n = A.nrows + if nodes is None: + # TODO: `D = Vector.from_iso_value(0, n, dtype).diag()` + D = Vector(dtype, n, name="bellman_ford_path_lengths_vector") + D << 0 + D = D.diag(name="bellman_ford_path_lengths") + else: + ids = G.list_to_ids(nodes) + D = Matrix.from_coo( + np.arange(len(ids), dtype=np.uint64), ids, 0, dtype, nrows=len(ids), ncols=n + ) + Cur = D.dup(name="Cur") + Mask = Matrix(bool, D.nrows, D.ncols, name="Mask") + one = unary.one[bool] + for _i in range(n - 1): + Cur << min_plus(Cur @ A) + Mask << one(Cur) + Mask(binary.second) << binary.lt(Cur & D) + Cur(Mask.V, replace) << Cur + if Cur.nvals == 0: + break + D(Cur.S) << Cur + else: + Cur << min_plus(Cur @ A) + Mask << binary.lt(Cur & D) + if Mask.reduce_scalar(monoid.lor): + raise Unbounded("Negative cycle detected.") + if nodes is not None and expand_output: + rv = Matrix(D.dtype, n, n, name=D.name) + rv[ids, :] = D + return rv + return D diff --git a/graphblas_algorithms/classes/_utils.py b/graphblas_algorithms/classes/_utils.py index 629316a..8a8e6c9 100644 --- a/graphblas_algorithms/classes/_utils.py +++ b/graphblas_algorithms/classes/_utils.py @@ -65,14 +65,14 @@ def dict_to_vector(self, d, *, size=None, dtype=None, name=None): return Vector.from_coo(indices, values, size=size, dtype=dtype, name=name) -def list_to_vector(self, nodes, dtype=bool, *, size=None, name=None): +def list_to_vector(self, nodes, dtype=None, *, values=True, size=None, name=None): if nodes is None: return None if size is None: size = len(self) key_to_id = self._key_to_id index = [key_to_id[key] for key in nodes] - return Vector.from_coo(index, True, size=size, dtype=dtype, name=name) + return Vector.from_coo(index, values, size=size, dtype=dtype, name=name) def list_to_mask(self, nodes, *, size=None, name="mask"): diff --git a/graphblas_algorithms/nxapi/shortest_paths/weighted.py b/graphblas_algorithms/nxapi/shortest_paths/weighted.py index aeaed86..1ff1d8c 100644 --- a/graphblas_algorithms/nxapi/shortest_paths/weighted.py +++ b/graphblas_algorithms/nxapi/shortest_paths/weighted.py @@ -9,19 +9,30 @@ ] -def all_pairs_bellman_ford_path_length(G, weight="weight"): - # TODO: what if weight is a function? - # How should we implement and call `algorithms.all_pairs_bellman_ford_path_length`? - # Should we compute in chunks to expose more parallelism? +def all_pairs_bellman_ford_path_length(G, weight="weight", *, chunksize=128): + # Larger chunksize offers more parallelism, but uses more memory. G = to_graph(G, weight=weight) - for source in G: - try: - d = algorithms.single_source_bellman_ford_path_length(G, source) - except algorithms.exceptions.Unbounded as e: - raise NetworkXUnbounded(*e.args) from e - except KeyError as e: - raise NodeNotFound(*e.args) from e - yield (source, G.vector_to_nodemap(d)) + if chunksize < 2: + for source in G: + try: + d = algorithms.single_source_bellman_ford_path_length(G, source) + except algorithms.exceptions.Unbounded as e: + raise NetworkXUnbounded(*e.args) from e + yield (source, G.vector_to_nodemap(d)) + else: + # We should probably make a utility function for chunking + nodes = list(G) + for start, stop in zip( + range(0, len(nodes), chunksize), range(chunksize, len(nodes) + chunksize, chunksize) + ): + cur_nodes = nodes[start:stop] + try: + D = algorithms.bellman_ford_path_lengths(G, cur_nodes) + except algorithms.exceptions.Unbounded as e: + raise NetworkXUnbounded(*e.args) from e + for i, source in enumerate(cur_nodes): + d = D[i, :].new(name=f"all_pairs_bellman_ford_path_length_{i}") + yield (source, G.vector_to_nodemap(d)) def single_source_bellman_ford_path_length(G, source, weight="weight"): diff --git a/scripts/bench.py b/scripts/bench.py index f5be06e..d743bc0 100755 --- a/scripts/bench.py +++ b/scripts/bench.py @@ -117,9 +117,7 @@ def stime(time): } # Is square_clustering undirected only? graphblas-algorthms doesn't implement it for directed undirected_only = {"generalized_degree", "k_truss", "triangles", "square_clustering"} -returns_iterators = { - "all_pairs_bellman_ford_path_length", -} +returns_iterators = {"all_pairs_bellman_ford_path_length", "isolates"} def getfunction(functionname, backend): diff --git a/scripts/scipy_impl.py b/scripts/scipy_impl.py index adf7ee3..2970be5 100644 --- a/scripts/scipy_impl.py +++ b/scripts/scipy_impl.py @@ -52,3 +52,14 @@ def pagerank( return x # return dict(zip(nodelist, map(float, x))) raise nx.PowerIterationFailedConvergence(max_iter) + + +def all_pairs_bellman_ford_path_length(A, weight="weight"): + for source in range(A.shape[0]): + d = single_source_bellman_ford_path_length(A, source) + yield (source, d) + + +def single_source_bellman_ford_path_length(A, source, weight="weight"): + return scipy.sparse.csgraph.bellman_ford(A, indices=source) # So slow! + # return scipy.sparse.csgraph.dijkstra(A, indices=source) # Faster From 2ca6b0d24cecaaf02a1e7a487013d1fb37d036b8 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Sun, 12 Feb 2023 11:46:53 -0600 Subject: [PATCH 04/13] Allow to compute all at once --- graphblas_algorithms/nxapi/shortest_paths/weighted.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/graphblas_algorithms/nxapi/shortest_paths/weighted.py b/graphblas_algorithms/nxapi/shortest_paths/weighted.py index 1ff1d8c..338e620 100644 --- a/graphblas_algorithms/nxapi/shortest_paths/weighted.py +++ b/graphblas_algorithms/nxapi/shortest_paths/weighted.py @@ -12,7 +12,14 @@ def all_pairs_bellman_ford_path_length(G, weight="weight", *, chunksize=128): # Larger chunksize offers more parallelism, but uses more memory. G = to_graph(G, weight=weight) - if chunksize < 2: + if chunksize is None or chunksize <= 0: + # All at once + try: + D = algorithms.bellman_ford_path_lengths(G) + except algorithms.exceptions.Unbounded as e: + raise NetworkXUnbounded(*e.args) from e + yield from G.matrix_to_nodenodemap(D).items() + elif chunksize < 2: for source in G: try: d = algorithms.single_source_bellman_ford_path_length(G, source) From 704937e21b77d9c0301fb200339d6181865c9473 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Sun, 12 Feb 2023 21:50:33 -0600 Subject: [PATCH 05/13] Ignore diagonals during Bellman-Ford --- .../algorithms/shortest_paths/weighted.py | 19 ++++- graphblas_algorithms/classes/_caching.py | 20 ++++- graphblas_algorithms/classes/digraph.py | 83 ++++++++++++++++++- graphblas_algorithms/classes/graph.py | 83 ++++++++++++++++++- 4 files changed, 191 insertions(+), 14 deletions(-) diff --git a/graphblas_algorithms/algorithms/shortest_paths/weighted.py b/graphblas_algorithms/algorithms/shortest_paths/weighted.py index 26983a7..2a5e681 100644 --- a/graphblas_algorithms/algorithms/shortest_paths/weighted.py +++ b/graphblas_algorithms/algorithms/shortest_paths/weighted.py @@ -1,6 +1,6 @@ import numpy as np -from graphblas import Matrix, Vector, binary, monoid, replace, unary -from graphblas.semiring import min_plus +from graphblas import Matrix, Vector, binary, monoid, replace, select, unary +from graphblas.semiring import any_pair, min_plus from ..exceptions import Unbounded @@ -13,7 +13,7 @@ def single_source_bellman_ford_path_length(G, source): # No need for `is_weighted=` keyword, b/c this is assumed to be weighted (I think) index = G._key_to_id[source] - A = G._A + A, has_negative_diagonal = G.get_properties("offdiag has_negative_diagonal") if A.dtype == bool: # Should we upcast e.g. INT8 to INT64 as well? dtype = int @@ -47,6 +47,12 @@ def single_source_bellman_ford_path_length(G, source): mask << binary.lt(cur & d) if mask.reduce(monoid.lor): raise Unbounded("Negative cycle detected.") + if has_negative_diagonal: + # We removed diagonal entries above, so check if we visited one with a negative weight + diag = G.get_property("diag") + cur << select.valuelt(diag, 0) + if any_pair(d @ cur): + raise Unbounded("Negative cycle detected.") return d @@ -61,7 +67,7 @@ def bellman_ford_path_lengths(G, nodes=None, *, expand_output=False): """ # Same algorithms as in `single_source_bellman_ford_path_length`, but with # `Cur` as a Matrix with each row corresponding to a source node. - A = G._A + A, has_negative_diagonal = G.get_properties("offdiag has_negative_diagonal") if A.dtype == bool: dtype = int else: @@ -93,6 +99,11 @@ def bellman_ford_path_lengths(G, nodes=None, *, expand_output=False): Mask << binary.lt(Cur & D) if Mask.reduce_scalar(monoid.lor): raise Unbounded("Negative cycle detected.") + if has_negative_diagonal: + diag = G.get_property("diag") + cur = select.valuelt(diag, 0) + if any_pair(D @ cur).nvals > 0: + raise Unbounded("Negative cycle detected.") if nodes is not None and expand_output: rv = Matrix(D.dtype, n, n, name=D.name) rv[ids, :] = D diff --git a/graphblas_algorithms/classes/_caching.py b/graphblas_algorithms/classes/_caching.py index 24de3ec..426ba61 100644 --- a/graphblas_algorithms/classes/_caching.py +++ b/graphblas_algorithms/classes/_caching.py @@ -1,6 +1,8 @@ -from graphblas import op +from graphblas import Scalar, dtypes, op from graphblas.core import operator +NONNEGATIVE_DTYPES = {dtypes.BOOL, dtypes.UINT8, dtypes.UINT16, dtypes.UINT32, dtypes.UINT64} + def get_reduce_to_vector(key, opname, methodname): op_ = op.from_string(opname) @@ -142,7 +144,7 @@ def get_reduction(G, mask=None): cache[f"{keybase}+"] = cache[key] return cache[key] - else: + elif key[-1] == "+": def get_reduction(G, mask=None): A = G._A @@ -170,4 +172,18 @@ def get_reduction(G, mask=None): cache[f"{keybase}-"] = cache[key] return cache[key] + elif key.endswith("_diagonal"): + + def get_reduction(G, mask=None): + A = G._A + cache = G._cache + if key not in cache: + if not G.get_property("has_self_edges"): + cache[key] = Scalar(op_[A.dtype].return_type, name=key) + else: + cache[key] = G.get_property("diag").reduce(op_).new(name=key) + return cache[key] + + else: # pragma: no cover (sanity) + raise RuntimeError() return get_reduction diff --git a/graphblas_algorithms/classes/digraph.py b/graphblas_algorithms/classes/digraph.py index 0984dcd..36c62f1 100644 --- a/graphblas_algorithms/classes/digraph.py +++ b/graphblas_algorithms/classes/digraph.py @@ -6,7 +6,7 @@ import graphblas_algorithms as ga from . import _utils -from ._caching import get_reduce_to_scalar, get_reduce_to_vector +from ._caching import NONNEGATIVE_DTYPES, get_reduce_to_scalar, get_reduce_to_vector from .graph import Graph @@ -371,6 +371,65 @@ def get_total_recipm(G, mask=None): return cache["total_recip-"] +def has_negative_diagonal(G, mask=None): + A = G._A + cache = G._cache + if "has_negative_diagonal" not in cache: + if A.dtype in NONNEGATIVE_DTYPES or A.dtype._is_udt or cache.get("has_self_edges") is False: + cache["has_negative_diagonal"] = False + elif ( + cache.get("has_negative_edges+") is True + and cache.get("has_negative_edges-") is False + or cache.get("has_negative_edges+") is True + and cache.get("min_element-", 0) >= 0 + or cache.get("min_element+", 0) < 0 + and cache.get("min_element+", 0) < cache.get("min_element-", 0) + ): + cache["has_negative_diagonal"] = True + else: + cache["has_negative_diagonal"] = G.get_property("min_diagonal").get(0) < 0 + return cache["has_negative_diagonal"] + + +def has_negative_edgesp(G, mask=None): + A = G._A + cache = G._cache + if "has_negative_edges+" not in cache: + if A.dtype in NONNEGATIVE_DTYPES or A.dtype._is_udt: + cache["has_negative_edges+"] = False + elif ( + cache.get("has_negative_edges-") + or cache.get("min_element+", 0) < 0 + or cache.get("min_element-", 0) < 0 + or cache.get("min_diagonal", 0) < 0 + or cache.get("has_negative_diagonal") + ): + cache["has_negative_edges+"] = True + elif cache.get("has_negative_edges-") is False: + cache["has_negative_edges+"] = G.get_property("min_diagonal").get(0) < 0 + else: + cache["has_negative_edges+"] = G.get_property("min_element+").get(0) < 0 + return cache["has_negative_edges+"] + + +def has_negative_edgesm(G, mask=None): + A = G._A + cache = G._cache + if "has_negative_edges-" not in cache: + if A.dtype in NONNEGATIVE_DTYPES or A.dtype._is_udt: + cache["has_negative_edges-"] = False + elif ( + cache.get("has_negative_edges+") + and cache.get("has_self_edges") is False + or cache.get("has_negative_edges+") + and cache.get("has_negative_diagonal") is False + ): + cache["has_negative_edges-"] = True + else: + cache["has_negative_edges-"] = G.get_property("min_element-").get(0) < 0 + return cache["has_negative_edges-"] + + def has_self_edges(G, mask=None): """A.diag().nvals > 0""" A = G._A @@ -388,6 +447,8 @@ def has_self_edges(G, mask=None): cache["has_self_edges"] = cache["U-"].nvals < cache["U+"].nvals else: cache["has_self_edges"] = cache["U-"].nvals + cache["L-"].nvals < A.nvals + elif cache.get("has_negative_diagonal") is True: + cache["has_self_edges"] = True elif "total_recip-" in cache and "total_recip+" in cache: cache["has_self_edges"] = cache["total_recip+"] > cache["total_recip-"] elif "row_degrees-" in cache and "row_degrees+" in cache: @@ -473,9 +534,14 @@ def __missing__(self, key): get_reduction = get_reduce_to_scalar(key, opname) else: get_reduction = get_reduce_to_vector(key, opname, methodname) - self[key] = get_reduction - return get_reduction - raise KeyError(key) + elif key.endswith("_diagonal"): + # e.g., min_diagonal + opname = key[: -len("_diagonal")] + get_reduction = get_reduce_to_scalar(key, opname) + else: + raise KeyError(key) + self[key] = get_reduction + return get_reduction class DiGraph(Graph): @@ -507,6 +573,12 @@ class DiGraph(Graph): "total_degrees-", "total_recip+", # scalar; I don't like this name "total_recip-", # scalar; I don't like this name + "min_diagonal", + "min_element+", + "min_element-", + "has_negative_diagonal", + "has_negative_edges-", + "has_negative_edges+", "has_self_edges", ] ) @@ -528,6 +600,9 @@ class DiGraph(Graph): "total_degrees-": get_total_degreesm, "total_recip+": get_total_recipp, "total_recip-": get_total_recipm, + "has_negative_diagonal": has_negative_diagonal, + "has_negative_edges-": has_negative_edgesm, + "has_negative_edges+": has_negative_edgesp, "has_self_edges": has_self_edges, } ) diff --git a/graphblas_algorithms/classes/graph.py b/graphblas_algorithms/classes/graph.py index 65e4cea..25bf1c6 100644 --- a/graphblas_algorithms/classes/graph.py +++ b/graphblas_algorithms/classes/graph.py @@ -6,7 +6,7 @@ import graphblas_algorithms as ga from . import _utils -from ._caching import get_reduce_to_scalar, get_reduce_to_vector +from ._caching import NONNEGATIVE_DTYPES, get_reduce_to_scalar, get_reduce_to_vector def get_A(G, mask=None): @@ -129,6 +129,65 @@ def get_diag(G, mask=None): return cache["diag"] +def has_negative_diagonal(G, mask=None): + A = G._A + cache = G._cache + if "has_negative_diagonal" not in cache: + if A.dtype in NONNEGATIVE_DTYPES or A.dtype._is_udt or cache.get("has_self_edges") is False: + cache["has_negative_diagonal"] = False + elif ( + cache.get("has_negative_edges+") is True + and cache.get("has_negative_edges-") is False + or cache.get("has_negative_edges+") is True + and cache.get("min_element-", 0) >= 0 + or cache.get("min_element+", 0) < 0 + and cache.get("min_element+", 0) < cache.get("min_element-", 0) + ): + cache["has_negative_diagonal"] = True + else: + cache["has_negative_diagonal"] = G.get_property("min_diagonal").get(0) < 0 + return cache["has_negative_diagonal"] + + +def has_negative_edgesp(G, mask=None): + A = G._A + cache = G._cache + if "has_negative_edges+" not in cache: + if A.dtype in NONNEGATIVE_DTYPES or A.dtype._is_udt: + cache["has_negative_edges+"] = False + elif ( + cache.get("has_negative_edges-") + or cache.get("min_element+", 0) < 0 + or cache.get("min_element-", 0) < 0 + or cache.get("min_diagonal", 0) < 0 + or cache.get("has_negative_diagonal") + ): + cache["has_negative_edges+"] = True + elif cache.get("has_negative_edges-") is False: + cache["has_negative_edges+"] = G.get_property("min_diagonal").get(0) < 0 + else: + cache["has_negative_edges+"] = G.get_property("min_element+").get(0) < 0 + return cache["has_negative_edges+"] + + +def has_negative_edgesm(G, mask=None): + A = G._A + cache = G._cache + if "has_negative_edges-" not in cache: + if A.dtype in NONNEGATIVE_DTYPES or A.dtype._is_udt: + cache["has_negative_edges-"] = False + elif ( + cache.get("has_negative_edges+") + and cache.get("has_self_edges") is False + or cache.get("has_negative_edges+") + and cache.get("has_negative_diagonal") is False + ): + cache["has_negative_edges-"] = True + else: + cache["has_negative_edges-"] = G.get_property("min_element-").get(0) < 0 + return cache["has_negative_edges-"] + + def has_self_edges(G, mask=None): """A.diag().nvals > 0""" A = G._A @@ -144,6 +203,8 @@ def has_self_edges(G, mask=None): cache["has_self_edges"] = 2 * cache["U-"].nvals < A.nvals elif "offdiag" in cache: cache["has_self_edges"] = A.nvals > cache["offdiag"].nvals + elif cache.get("has_negative_diagonal") is True: + cache["has_self_edges"] = True else: G.get_property("diag") return cache["has_self_edges"] @@ -190,9 +251,14 @@ def __missing__(self, key): else: get_reduction = get_reduce_to_vector(key, opname, methodname) self[f"{opname}_columnwise{key[-1]}"] = get_reduction - self[key] = get_reduction - return get_reduction - raise KeyError(key) + elif key.endswith("_diagonal"): + # e.g., min_diagonal + opname = key[: -len("_diagonal")] + get_reduction = get_reduce_to_scalar(key, opname) + else: + raise KeyError(key) + self[key] = get_reduction + return get_reduction class Graph: @@ -216,6 +282,12 @@ class Graph: "diag", "count_rowwise+", "count_rowwise-", + "min_diagonal", + "min_element+", + "min_element-", + "has_negative_diagonal", + "has_negative_edges-", + "has_negative_edges+", "has_self_edges", ] ) @@ -231,6 +303,9 @@ class Graph: "U-": get_Um, "L-": get_Lm, "diag": get_diag, + "has_negative_diagonal": has_negative_diagonal, + "has_negative_edges-": has_negative_edgesm, + "has_negative_edges+": has_negative_edgesp, "has_self_edges": has_self_edges, } ) From 4cbbfe980ed0ebf18784477a7b1f81752f29e782 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Mon, 13 Feb 2023 00:36:07 -0600 Subject: [PATCH 06/13] Add comment, and use `offdiag` more places. --- graphblas_algorithms/algorithms/core.py | 2 +- graphblas_algorithms/algorithms/dag.py | 4 ++-- graphblas_algorithms/algorithms/shortest_paths/generic.py | 2 +- graphblas_algorithms/algorithms/shortest_paths/weighted.py | 3 +++ graphblas_algorithms/algorithms/simple_paths.py | 2 +- graphblas_algorithms/nxapi/cluster.py | 2 +- 6 files changed, 9 insertions(+), 6 deletions(-) diff --git a/graphblas_algorithms/algorithms/core.py b/graphblas_algorithms/algorithms/core.py index 85ab592..d531c61 100644 --- a/graphblas_algorithms/algorithms/core.py +++ b/graphblas_algorithms/algorithms/core.py @@ -28,7 +28,7 @@ def k_truss(G: Graph, k) -> Graph: S = C # Remove isolate nodes - indices, _ = C.reduce_rowwise(monoid.any).to_coo() + indices, _ = C.reduce_rowwise(monoid.any).to_coo(values=False) Ktruss = C[indices, indices].new() # Convert back to networkx graph with correct node ids diff --git a/graphblas_algorithms/algorithms/dag.py b/graphblas_algorithms/algorithms/dag.py index cd1d4c6..3cceeef 100644 --- a/graphblas_algorithms/algorithms/dag.py +++ b/graphblas_algorithms/algorithms/dag.py @@ -9,7 +9,7 @@ def descendants(G, source): if source not in G._key_to_id: raise KeyError(f"The node {source} is not in the graph") index = G._key_to_id[source] - A = G._A + A = G.get_property("offdiag") q = Vector.from_coo(index, True, size=A.nrows, name="q") rv = q.dup(name="descendants") for _ in range(A.nrows): @@ -25,7 +25,7 @@ def ancestors(G, source): if source not in G._key_to_id: raise KeyError(f"The node {source} is not in the graph") index = G._key_to_id[source] - A = G._A + A = G.get_property("offdiag") q = Vector.from_coo(index, True, size=A.nrows, name="q") rv = q.dup(name="descendants") for _ in range(A.nrows): diff --git a/graphblas_algorithms/algorithms/shortest_paths/generic.py b/graphblas_algorithms/algorithms/shortest_paths/generic.py index 864ac2a..f91c9cf 100644 --- a/graphblas_algorithms/algorithms/shortest_paths/generic.py +++ b/graphblas_algorithms/algorithms/shortest_paths/generic.py @@ -10,7 +10,7 @@ def has_path(G, source, target): dst = G._key_to_id[target] if src == dst: return True - A = G._A + A = G.get_property("offdiag") q_src = Vector.from_coo(src, True, size=A.nrows, name="q_src") seen_src = q_src.dup(name="seen_src") q_dst = Vector.from_coo(dst, True, size=A.nrows, name="q_dst") diff --git a/graphblas_algorithms/algorithms/shortest_paths/weighted.py b/graphblas_algorithms/algorithms/shortest_paths/weighted.py index 2a5e681..ae871d0 100644 --- a/graphblas_algorithms/algorithms/shortest_paths/weighted.py +++ b/graphblas_algorithms/algorithms/shortest_paths/weighted.py @@ -13,6 +13,9 @@ def single_source_bellman_ford_path_length(G, source): # No need for `is_weighted=` keyword, b/c this is assumed to be weighted (I think) index = G._key_to_id[source] + # Use `offdiag` instead of `A`, b/c self-loops don't contribute to the result, + # and negative self-loops are easy negative cycles to avoid. + # We check if we hit a self-loop negative cycle at the end. A, has_negative_diagonal = G.get_properties("offdiag has_negative_diagonal") if A.dtype == bool: # Should we upcast e.g. INT8 to INT64 as well? diff --git a/graphblas_algorithms/algorithms/simple_paths.py b/graphblas_algorithms/algorithms/simple_paths.py index 646787c..44ba66e 100644 --- a/graphblas_algorithms/algorithms/simple_paths.py +++ b/graphblas_algorithms/algorithms/simple_paths.py @@ -8,7 +8,7 @@ def is_simple_path(G, nodes): return False if len(nodes) == 1: return nodes[0] in G - A = G._A + A = G._A # offdiag instead? if A.nvals < len(nodes) - 1: return False key_to_id = G._key_to_id diff --git a/graphblas_algorithms/nxapi/cluster.py b/graphblas_algorithms/nxapi/cluster.py index e252963..544d79f 100644 --- a/graphblas_algorithms/nxapi/cluster.py +++ b/graphblas_algorithms/nxapi/cluster.py @@ -94,7 +94,7 @@ def _split(L, k): # TODO: should this move into algorithms? def _square_clustering_split(G, node_ids=None, *, nsplits): if node_ids is None: - node_ids = G._A.reduce_rowwise(monoid.any).to_coo()[0] + node_ids, _ = G._A.reduce_rowwise(monoid.any).to_coo(values=False) result = None for chunk_ids in _split(node_ids, nsplits): res = algorithms.square_clustering(G, chunk_ids) From a644bf8965fe3e4e204056742d5bd095c0b12659 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Tue, 14 Feb 2023 00:12:18 -0600 Subject: [PATCH 07/13] Do level BFS for Bellman-Ford when iso-valued (and non-negative) --- .../algorithms/shortest_paths/weighted.py | 86 ++++++++++++- graphblas_algorithms/classes/digraph.py | 119 +++--------------- graphblas_algorithms/classes/graph.py | 40 ++++++ 3 files changed, 140 insertions(+), 105 deletions(-) diff --git a/graphblas_algorithms/algorithms/shortest_paths/weighted.py b/graphblas_algorithms/algorithms/shortest_paths/weighted.py index ae871d0..ae3bc18 100644 --- a/graphblas_algorithms/algorithms/shortest_paths/weighted.py +++ b/graphblas_algorithms/algorithms/shortest_paths/weighted.py @@ -13,6 +13,18 @@ def single_source_bellman_ford_path_length(G, source): # No need for `is_weighted=` keyword, b/c this is assumed to be weighted (I think) index = G._key_to_id[source] + if G.get_property("is_iso"): + # If the edges are iso-valued (and positive), then we can simply do level BFS + is_negative, iso_value = G.get_properties("has_negative_edges+ iso_value") + if not is_negative: + d = _bfs_level(G, source, dtype=iso_value.dtype) + if iso_value != 1: + d *= iso_value + return d + # It's difficult to detect negative cycles with BFS + if G._A[index, index].get() is not None: + raise Unbounded("Negative cycle detected.") + # Use `offdiag` instead of `A`, b/c self-loops don't contribute to the result, # and negative self-loops are easy negative cycles to avoid. # We check if we hit a self-loop negative cycle at the end. @@ -70,6 +82,19 @@ def bellman_ford_path_lengths(G, nodes=None, *, expand_output=False): """ # Same algorithms as in `single_source_bellman_ford_path_length`, but with # `Cur` as a Matrix with each row corresponding to a source node. + if G.get_property("is_iso"): + is_negative, iso_value = G.get_properties("has_negative_edges+ iso_value") + if not is_negative: + D = _bfs_levels(G, nodes, dtype=iso_value.dtype) + if iso_value != 1: + D *= iso_value + if nodes is not None and expand_output and D.ncols != D.nrows: + ids = G.list_to_ids(nodes) + rv = Matrix(D.dtype, D.ncols, D.ncols, name=D.name) + rv[ids, :] = D + return rv + return D + A, has_negative_diagonal = G.get_properties("offdiag has_negative_diagonal") if A.dtype == bool: dtype = int @@ -84,7 +109,13 @@ def bellman_ford_path_lengths(G, nodes=None, *, expand_output=False): else: ids = G.list_to_ids(nodes) D = Matrix.from_coo( - np.arange(len(ids), dtype=np.uint64), ids, 0, dtype, nrows=len(ids), ncols=n + np.arange(len(ids), dtype=np.uint64), + ids, + 0, + dtype, + nrows=len(ids), + ncols=n, + name="bellman_ford_path_lengths", ) Cur = D.dup(name="Cur") Mask = Matrix(bool, D.nrows, D.ncols, name="Mask") @@ -107,8 +138,59 @@ def bellman_ford_path_lengths(G, nodes=None, *, expand_output=False): cur = select.valuelt(diag, 0) if any_pair(D @ cur).nvals > 0: raise Unbounded("Negative cycle detected.") - if nodes is not None and expand_output: + if nodes is not None and expand_output and D.ncols != D.nrows: rv = Matrix(D.dtype, n, n, name=D.name) rv[ids, :] = D return rv return D + + +def _bfs_level(G, source, *, dtype=int): + if dtype == bool: + dtype = int + index = G._key_to_id[source] + A = G.get_property("offdiag") + n = A.nrows + v = Vector(dtype, n, name="bfs_level") + q = Vector(bool, n, name="q") + v[index] = 0 + q[index] = True + any_pair_bool = any_pair[bool] + for i in range(1, n): + q(~v.S, replace) << any_pair_bool(q @ A) + if q.nvals == 0: + break + v(q.S) << i + return v + + +def _bfs_levels(G, nodes=None, *, dtype=int): + if dtype == bool: + dtype = int + A = G.get_property("offdiag") + n = A.nrows + if nodes is None: + # TODO: `D = Vector.from_iso_value(0, n, dtype).diag()` + D = Vector(dtype, n, name="bfs_levels_vector") + D << 0 + D = D.diag(name="bfs_levels") + else: + ids = G.list_to_ids(nodes) + D = Matrix.from_coo( + np.arange(len(ids), dtype=np.uint64), + ids, + 0, + dtype, + nrows=len(ids), + ncols=n, + name="bfs_levels", + ) + Q = Matrix(bool, D.nrows, D.ncols, name="Q") + Q << unary.one[bool](D) + any_pair_bool = any_pair[bool] + for i in range(1, n): + Q(~D.S, replace) << any_pair_bool(Q @ A) + if Q.nvals == 0: + break + D(Q.S) << i + return D diff --git a/graphblas_algorithms/classes/digraph.py b/graphblas_algorithms/classes/digraph.py index 36c62f1..e0b5751 100644 --- a/graphblas_algorithms/classes/digraph.py +++ b/graphblas_algorithms/classes/digraph.py @@ -1,18 +1,23 @@ from collections import defaultdict import graphblas as gb -from graphblas import Matrix, Vector, binary, replace, select, unary +from graphblas import Matrix, binary, replace, select, unary import graphblas_algorithms as ga from . import _utils -from ._caching import NONNEGATIVE_DTYPES, get_reduce_to_scalar, get_reduce_to_vector -from .graph import Graph - - -def get_A(G, mask=None): - """A""" - return G._A +from ._caching import get_reduce_to_scalar, get_reduce_to_vector +from .graph import ( + Graph, + get_A, + get_diag, + get_iso_value, + get_offdiag, + has_negative_diagonal, + has_negative_edgesm, + has_negative_edgesp, + is_iso, +) def get_AT(G, mask=None): @@ -24,22 +29,6 @@ def get_AT(G, mask=None): return cache["AT"] -def get_offdiag(G, mask=None): - """select.offdiag(A)""" - A = G._A - cache = G._cache - if "offdiag" not in cache: - if cache.get("has_self_edges") is False: - cache["offdiag"] = A - else: - cache["offdiag"] = select.offdiag(A).new(name="offdiag") - if "has_self_edges" not in cache: - cache["has_self_edges"] = A.nvals > cache["offdiag"].nvals - if not cache["has_self_edges"]: - cache["offdiag"] = A - return cache["offdiag"] - - def get_Up(G, mask=None): """select.triu(A)""" A = G._A @@ -126,26 +115,6 @@ def get_Lm(G, mask=None): return cache["L-"] -def get_diag(G, mask=None): - """select.diag(A)""" - A = G._A - cache = G._cache - if "diag" not in cache: - if cache.get("has_self_edges") is False: - cache["diag"] = Vector(A.dtype, size=A.nrows, name="diag") - elif "U+" in cache: - cache["diag"] = cache["U+"].diag(name="diag") - elif "L+" in cache: - cache["diag"] = cache["L+"].diag(name="diag") - else: - cache["diag"] = A.diag(name="diag") - if "has_self_edges" not in cache: - cache["has_self_edges"] = cache["diag"].nvals > 0 - if mask is not None: - return cache["diag"].dup(mask=mask) - return cache["diag"] - - def get_recip_degreesp(G, mask=None): """pair(A & A.T).reduce_rowwise()""" A = G._A @@ -371,65 +340,6 @@ def get_total_recipm(G, mask=None): return cache["total_recip-"] -def has_negative_diagonal(G, mask=None): - A = G._A - cache = G._cache - if "has_negative_diagonal" not in cache: - if A.dtype in NONNEGATIVE_DTYPES or A.dtype._is_udt or cache.get("has_self_edges") is False: - cache["has_negative_diagonal"] = False - elif ( - cache.get("has_negative_edges+") is True - and cache.get("has_negative_edges-") is False - or cache.get("has_negative_edges+") is True - and cache.get("min_element-", 0) >= 0 - or cache.get("min_element+", 0) < 0 - and cache.get("min_element+", 0) < cache.get("min_element-", 0) - ): - cache["has_negative_diagonal"] = True - else: - cache["has_negative_diagonal"] = G.get_property("min_diagonal").get(0) < 0 - return cache["has_negative_diagonal"] - - -def has_negative_edgesp(G, mask=None): - A = G._A - cache = G._cache - if "has_negative_edges+" not in cache: - if A.dtype in NONNEGATIVE_DTYPES or A.dtype._is_udt: - cache["has_negative_edges+"] = False - elif ( - cache.get("has_negative_edges-") - or cache.get("min_element+", 0) < 0 - or cache.get("min_element-", 0) < 0 - or cache.get("min_diagonal", 0) < 0 - or cache.get("has_negative_diagonal") - ): - cache["has_negative_edges+"] = True - elif cache.get("has_negative_edges-") is False: - cache["has_negative_edges+"] = G.get_property("min_diagonal").get(0) < 0 - else: - cache["has_negative_edges+"] = G.get_property("min_element+").get(0) < 0 - return cache["has_negative_edges+"] - - -def has_negative_edgesm(G, mask=None): - A = G._A - cache = G._cache - if "has_negative_edges-" not in cache: - if A.dtype in NONNEGATIVE_DTYPES or A.dtype._is_udt: - cache["has_negative_edges-"] = False - elif ( - cache.get("has_negative_edges+") - and cache.get("has_self_edges") is False - or cache.get("has_negative_edges+") - and cache.get("has_negative_diagonal") is False - ): - cache["has_negative_edges-"] = True - else: - cache["has_negative_edges-"] = G.get_property("min_element-").get(0) < 0 - return cache["has_negative_edges-"] - - def has_self_edges(G, mask=None): """A.diag().nvals > 0""" A = G._A @@ -600,6 +510,8 @@ class DiGraph(Graph): "total_degrees-": get_total_degreesm, "total_recip+": get_total_recipp, "total_recip-": get_total_recipm, + "is_iso": is_iso, + "iso_value": get_iso_value, "has_negative_diagonal": has_negative_diagonal, "has_negative_edges-": has_negative_edgesm, "has_negative_edges+": has_negative_edgesp, @@ -643,6 +555,7 @@ def __init__(self, incoming_graph_data=None, *, key_to_id=None, **attr): list_to_vector = _utils.list_to_vector list_to_mask = _utils.list_to_mask list_to_ids = _utils.list_to_ids + list_to_keys = _utils.list_to_keys matrix_to_dicts = _utils.matrix_to_dicts matrix_to_nodenodemap = _utils.matrix_to_nodenodemap matrix_to_vectornodemap = _utils.matrix_to_vectornodemap diff --git a/graphblas_algorithms/classes/graph.py b/graphblas_algorithms/classes/graph.py index 25bf1c6..c083a76 100644 --- a/graphblas_algorithms/classes/graph.py +++ b/graphblas_algorithms/classes/graph.py @@ -126,6 +126,8 @@ def get_diag(G, mask=None): cache["diag"] = A.diag(name="diag") if "has_self_edges" not in cache: cache["has_self_edges"] = cache["diag"].nvals > 0 + if mask is not None: + return cache["diag"].dup(mask=mask) return cache["diag"] @@ -163,6 +165,8 @@ def has_negative_edgesp(G, mask=None): or cache.get("has_negative_diagonal") ): cache["has_negative_edges+"] = True + elif cache.get("iso_value") is not None: + cache["has_negative_edges+"] = cache["iso_value"].get(0) < 0 elif cache.get("has_negative_edges-") is False: cache["has_negative_edges+"] = G.get_property("min_diagonal").get(0) < 0 else: @@ -210,6 +214,40 @@ def has_self_edges(G, mask=None): return cache["has_self_edges"] +def is_iso(G, mask=None): + A = G._A + cache = G._cache + if "is_iso" not in cache: + if "iso_value" in cache: + cache["is_iso"] = cache["iso_value"] is not None + else: + # SuiteSparse:GraphBLAS + cache["is_iso"] = A.ss.is_iso + return cache["is_iso"] + + +def get_iso_value(G, mask=None): + A = G._A + cache = G._cache + if "iso_value" not in cache: + if "is_iso" in cache: + if cache["is_iso"]: + # SuiteSparse:GraphBLAS + cache["iso_value"] = A.ss.iso_value + else: + cache["iso_value"] + else: + # min_val, max_val = G.get_properties('min_element+ max_element+') + # SuiteSparse:GraphBLAS + if A.ss.is_iso: + cache["iso_value"] = A.ss.iso_value + cache["is_iso"] = True + else: + cache["iso_value"] = None + cache["is_iso"] = False + return cache["iso_value"] + + def to_undirected_graph(G, weight=None, dtype=None): # We should do some sanity checks here to ensure we're returning a valid undirected graph if isinstance(G, Graph): @@ -303,6 +341,8 @@ class Graph: "U-": get_Um, "L-": get_Lm, "diag": get_diag, + "is_iso": is_iso, + "iso_value": get_iso_value, "has_negative_diagonal": has_negative_diagonal, "has_negative_edges-": has_negative_edgesm, "has_negative_edges+": has_negative_edgesp, From aaa19c1523d7edc64cdb9e6413e96afd38326919 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Tue, 14 Feb 2023 10:40:49 -0600 Subject: [PATCH 08/13] Use `"iso_value"` property more places instead of `A.ss.iso_value` --- graphblas_algorithms/algorithms/centrality/katz.py | 4 ++-- .../algorithms/link_analysis/pagerank_alg.py | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/graphblas_algorithms/algorithms/centrality/katz.py b/graphblas_algorithms/algorithms/centrality/katz.py index 0fe35a5..3d21331 100644 --- a/graphblas_algorithms/algorithms/centrality/katz.py +++ b/graphblas_algorithms/algorithms/centrality/katz.py @@ -35,9 +35,9 @@ def katz_centrality( raise GraphBlasAlgorithmException("beta must have a value for every node") A = G._A - if A.ss.is_iso: + if (iso_value := G.get_property("iso_value")) is not None: # Fold iso-value into alpha - alpha *= A.ss.iso_value.get(1.0) + alpha *= iso_value.get(1.0) semiring = plus_first[float] else: semiring = plus_times[float] diff --git a/graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py b/graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py index ce81ce1..1623819 100644 --- a/graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py +++ b/graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py @@ -49,11 +49,10 @@ def pagerank( row_degrees = G.get_property("plus_rowwise+") # XXX: What about self-edges? S = (alpha / row_degrees).new(name="S") - if A.ss.is_iso: + if (iso_value := G.get_property("iso_value")) is not None: # Fold iso-value of A into S # This lets us use the plus_first semiring, which is faster - iso_value = A.ss.iso_value - if iso_value != 1: + if iso_value.get(1) != 1: S *= iso_value semiring = plus_first[float] else: From b82c8afd2cca06de31f977a881d205c26ea7fef2 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Tue, 14 Feb 2023 11:09:20 -0600 Subject: [PATCH 09/13] Fail fast in these unlikely, but easily detected, cases --- .../algorithms/shortest_paths/weighted.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/graphblas_algorithms/algorithms/shortest_paths/weighted.py b/graphblas_algorithms/algorithms/shortest_paths/weighted.py index ae3bc18..88ca175 100644 --- a/graphblas_algorithms/algorithms/shortest_paths/weighted.py +++ b/graphblas_algorithms/algorithms/shortest_paths/weighted.py @@ -24,6 +24,9 @@ def single_source_bellman_ford_path_length(G, source): # It's difficult to detect negative cycles with BFS if G._A[index, index].get() is not None: raise Unbounded("Negative cycle detected.") + if not G.is_directed() and G._A[index, :].nvals > 0: + # For undirected graphs, any negative edge is a cycle + raise Unbounded("Negative cycle detected.") # Use `offdiag` instead of `A`, b/c self-loops don't contribute to the result, # and negative self-loops are easy negative cycles to avoid. @@ -94,6 +97,14 @@ def bellman_ford_path_lengths(G, nodes=None, *, expand_output=False): rv[ids, :] = D return rv return D + if not G.is_directed(): + # For undirected graphs, any negative edge is a cycle + if nodes is not None: + ids = G.list_to_ids(nodes) + if G._A[ids, :].nvals > 0: + raise Unbounded("Negative cycle detected.") + elif G._A.nvals > 0: + raise Unbounded("Negative cycle detected.") A, has_negative_diagonal = G.get_properties("offdiag has_negative_diagonal") if A.dtype == bool: From ba337d48ffd175e6fa02f6de8590c99eab9cd08a Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Tue, 14 Feb 2023 14:48:12 -0600 Subject: [PATCH 10/13] Allow garbage collector to be enabled during benchmarks --- scripts/bench.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/scripts/bench.py b/scripts/bench.py index d743bc0..0feca1a 100755 --- a/scripts/bench.py +++ b/scripts/bench.py @@ -1,5 +1,6 @@ #!/usr/bin/env python import argparse +import gc import json import os import statistics @@ -146,7 +147,9 @@ def getgraph(dataname, backend="graphblas", functionname=None): return readfile(filename, is_symmetric, backend) -def main(dataname, backend, functionname, time=3.0, n=None, extra=None, display=True): +def main( + dataname, backend, functionname, time=3.0, n=None, extra=None, display=True, enable_gc=False +): G = getgraph(dataname, backend, functionname) func = getfunction(functionname, backend) benchstring = functioncall.get(functionname, "func(G)") @@ -160,7 +163,12 @@ def main(dataname, backend, functionname, time=3.0, n=None, extra=None, display= globals["exc"] = nx.PowerIterationFailedConvergence if backend == "graphblas": benchstring = f"G._cache.clear()\n{benchstring}" - timer = timeit.Timer(benchstring, globals=globals) + if enable_gc: + setup = "gc.enable()" + globals["gc"] = gc + else: + setup = "pass" + timer = timeit.Timer(benchstring, setup=setup, globals=globals) if display: line = f"Backend = {backend}, function = {functionname}, data = {dataname}" if extra is not None: @@ -237,6 +245,11 @@ def main(dataname, backend, functionname, time=3.0, n=None, extra=None, display= action="store_true", help="Print results as json instead of human-readable text", ) + parser.add_argument( + "--gc", + action="store_true", + help="Enable the garbage collector during timing (may help if running out of memory)", + ) parser.add_argument("-f", "--func", required=True, help="Which function to benchmark") parser.add_argument("--extra", help="Extra string to add to the function call") args = parser.parse_args() @@ -248,6 +261,7 @@ def main(dataname, backend, functionname, time=3.0, n=None, extra=None, display= n=args.n, extra=args.extra, display=not args.json, + enable_gc=args.gc, ) if args.json: print(json.dumps(info)) From 4370b2d733f0f71a17a2e58ffb71fd5f4f2ce9e9 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Wed, 15 Feb 2023 13:07:12 -0600 Subject: [PATCH 11/13] Automatically choose appropriate chunksize --- .pre-commit-config.yaml | 2 +- .../nxapi/shortest_paths/weighted.py | 12 ++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 94612c9..f245318 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -56,7 +56,7 @@ repos: # These versions need updated manually - flake8==6.0.0 - flake8-comprehensions==3.10.1 - - flake8-bugbear==23.1.20 + - flake8-bugbear==23.2.13 - flake8-simplify==0.19.3 - repo: https://github.com/asottile/yesqa rev: v1.4.0 diff --git a/graphblas_algorithms/nxapi/shortest_paths/weighted.py b/graphblas_algorithms/nxapi/shortest_paths/weighted.py index 338e620..1c5e759 100644 --- a/graphblas_algorithms/nxapi/shortest_paths/weighted.py +++ b/graphblas_algorithms/nxapi/shortest_paths/weighted.py @@ -9,10 +9,18 @@ ] -def all_pairs_bellman_ford_path_length(G, weight="weight", *, chunksize=128): +def all_pairs_bellman_ford_path_length(G, weight="weight", *, chunksize="auto"): # Larger chunksize offers more parallelism, but uses more memory. + # Chunksize indicates for how many source nodes to compute at one time. + # The default is to choose the number of rows so the result, if dense, + # will be about 10MB. G = to_graph(G, weight=weight) - if chunksize is None or chunksize <= 0: + if chunksize == "auto": + # TODO: make a utility function for this that can be reused + targetsize = 10 * 1024 * 1024 # 10 MB + chunksize = max(1, targetsize // (len(G) * G._A.dtype.np_type.itemsize)) + + if chunksize is None or chunksize <= 0 or chunksize >= len(G): # All at once try: D = algorithms.bellman_ford_path_lengths(G) From 7268d80987c12ff0ac8b4661a978b467f7808eeb Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Fri, 17 Feb 2023 05:50:59 +0100 Subject: [PATCH 12/13] Use `nsplits="auto"` in square_clustering (default to 256 MB chunks) --- .../algorithms/shortest_paths/weighted.py | 4 +-- graphblas_algorithms/nxapi/cluster.py | 28 +++++++++++++------ 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/graphblas_algorithms/algorithms/shortest_paths/weighted.py b/graphblas_algorithms/algorithms/shortest_paths/weighted.py index 88ca175..f073180 100644 --- a/graphblas_algorithms/algorithms/shortest_paths/weighted.py +++ b/graphblas_algorithms/algorithms/shortest_paths/weighted.py @@ -113,7 +113,7 @@ def bellman_ford_path_lengths(G, nodes=None, *, expand_output=False): dtype = A.dtype n = A.nrows if nodes is None: - # TODO: `D = Vector.from_iso_value(0, n, dtype).diag()` + # TODO: `D = Vector.from_scalar(0, n, dtype).diag()` D = Vector(dtype, n, name="bellman_ford_path_lengths_vector") D << 0 D = D.diag(name="bellman_ford_path_lengths") @@ -181,7 +181,7 @@ def _bfs_levels(G, nodes=None, *, dtype=int): A = G.get_property("offdiag") n = A.nrows if nodes is None: - # TODO: `D = Vector.from_iso_value(0, n, dtype).diag()` + # TODO: `D = Vector.from_scalar(0, n, dtype).diag()` D = Vector(dtype, n, name="bfs_levels_vector") D << 0 D = D.diag(name="bfs_levels") diff --git a/graphblas_algorithms/nxapi/cluster.py b/graphblas_algorithms/nxapi/cluster.py index 544d79f..457f5e6 100644 --- a/graphblas_algorithms/nxapi/cluster.py +++ b/graphblas_algorithms/nxapi/cluster.py @@ -105,27 +105,37 @@ def _square_clustering_split(G, node_ids=None, *, nsplits): return result -def square_clustering(G, nodes=None, *, nsplits=None): +def square_clustering(G, nodes=None, *, nsplits="auto"): + # `nsplits` is used to split the computation into chunks. + # square_clustering computes `A @ A`, which can get very large, even dense. + # The default `nsplits` is to choose the number so that `Asubset @ A` + # will be about 256 MB if dense. G = to_undirected_graph(G) if len(G) == 0: return {} - elif nodes is None: + if nsplits == "auto": + # TODO: make a utility function for this that can be reused + # Also, should we use `chunksize` instead of `nsplits`? + targetsize = 256 * 1024 * 1024 # 256 MB + nsplits = len(G) ** 2 * G._A.dtype.np_type.itemsize // targetsize + if nsplits <= 1: + nsplits = None + if nodes is None: # Should we use this one for subsets of nodes as well? if nsplits is None: result = algorithms.square_clustering(G) else: result = _square_clustering_split(G, nsplits=nsplits) return G.vector_to_nodemap(result, fill_value=0) - elif nodes in G: + if nodes in G: idx = G._key_to_id[nodes] return algorithms.single_square_clustering(G, idx) + ids = G.list_to_ids(nodes) + if nsplits is None: + result = algorithms.square_clustering(G, ids) else: - ids = G.list_to_ids(nodes) - if nsplits is None: - result = algorithms.square_clustering(G, ids) - else: - result = _square_clustering_split(G, ids, nsplits=nsplits) - return G.vector_to_nodemap(result) + result = _square_clustering_split(G, ids, nsplits=nsplits) + return G.vector_to_nodemap(result) @not_implemented_for("directed") From bbcdd07174d8c731a6aa3b3d7d562467edeba4a3 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Fri, 17 Feb 2023 05:56:22 +0100 Subject: [PATCH 13/13] Add note about `A.ss.is_iso` --- graphblas_algorithms/classes/graph.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/graphblas_algorithms/classes/graph.py b/graphblas_algorithms/classes/graph.py index c083a76..0688518 100644 --- a/graphblas_algorithms/classes/graph.py +++ b/graphblas_algorithms/classes/graph.py @@ -221,7 +221,8 @@ def is_iso(G, mask=None): if "iso_value" in cache: cache["is_iso"] = cache["iso_value"] is not None else: - # SuiteSparse:GraphBLAS + # SuiteSparse:GraphBLAS. `A` may still be iso-valued even if `A.ss.is_iso` is False. + # Should we check this or rely on `A.ss.is_iso` b/c it's fast and should usually work? cache["is_iso"] = A.ss.is_iso return cache["is_iso"]