From 85193576018d9205421a430538d07f90fa3a3fcc Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Wed, 3 May 2023 18:47:09 -0500 Subject: [PATCH 1/2] Add `bellman_ford_path` --- README.md | 1 + graphblas_algorithms/algorithms/_bfs.py | 47 ++++++- .../algorithms/components/connected.py | 6 +- .../algorithms/components/weakly_connected.py | 4 +- .../algorithms/shortest_paths/weighted.py | 116 +++++++++++++++++- graphblas_algorithms/generators/ego.py | 4 +- graphblas_algorithms/interface.py | 1 + .../nxapi/shortest_paths/weighted.py | 10 ++ 8 files changed, 175 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 821cd95..adc236a 100644 --- a/README.md +++ b/README.md @@ -178,6 +178,7 @@ dispatch pattern shown above. - Shortest Paths - all_pairs_bellman_ford_path_length - all_pairs_shortest_path_length + - bellman_ford_path - floyd_warshall - floyd_warshall_numpy - floyd_warshall_predecessor_and_distance diff --git a/graphblas_algorithms/algorithms/_bfs.py b/graphblas_algorithms/algorithms/_bfs.py index 31674ce..49ba735 100644 --- a/graphblas_algorithms/algorithms/_bfs.py +++ b/graphblas_algorithms/algorithms/_bfs.py @@ -1,7 +1,7 @@ """BFS routines used by other algorithms""" import numpy as np -from graphblas import Matrix, Vector, binary, replace, unary +from graphblas import Matrix, Vector, binary, indexunary, replace, semiring, unary from graphblas.semiring import any_pair @@ -11,8 +11,13 @@ def _get_cutoff(n, cutoff): return cutoff + 1 # Inclusive -def _plain_bfs(G, source, *, cutoff=None): - index = G._key_to_id[source] +def _bfs_plain(G, source=None, target=None, *, index=None, cutoff=None): + if source is not None: + index = G._key_to_id[source] + if target is not None: + dst_id = G._key_to_id[target] + else: + dst_id = None A = G.get_property("offdiag") n = A.nrows v = Vector(bool, n, name="bfs_plain") @@ -25,6 +30,8 @@ def _plain_bfs(G, source, *, cutoff=None): q(~v.S, replace) << any_pair_bool(q @ A) if q.nvals == 0: break + if dst_id is not None and dst_id in q: + break v(q.S) << True return v @@ -83,8 +90,38 @@ def _bfs_levels(G, nodes, cutoff=None, *, dtype=int): return D +def _bfs_parent(G, source, cutoff=None, *, target=None, transpose=False, dtype=int): + if dtype == bool: + dtype = int + index = G._key_to_id[source] + if target is not None: + dst_id = G._key_to_id[target] + else: + dst_id = None + A = G.get_property("offdiag") + if transpose and G.is_directed(): + A = A.T # TODO: should we use "AT" instead? + n = A.nrows + v = Vector(dtype, n, name="bfs_parent") + q = Vector(dtype, n, name="q") + v[index] = index + q[index] = index + min_first = semiring.min_first[v.dtype] + index = indexunary.index[v.dtype] + cutoff = _get_cutoff(n, cutoff) + for _i in range(1, cutoff): + q(~v.S, replace) << min_first(q @ A) + if q.nvals == 0: + break + v(q.S) << q + if dst_id is not None and dst_id in q: + break + q << index(q) + return v + + # TODO: benchmark this and the version commented out below -def _plain_bfs_bidirectional(G, source): +def _bfs_plain_bidirectional(G, source): # Bi-directional BFS w/o symmetrizing the adjacency matrix index = G._key_to_id[source] A = G.get_property("offdiag") @@ -125,7 +162,7 @@ def _plain_bfs_bidirectional(G, source): """ -def _plain_bfs_bidirectional(G, source): +def _bfs_plain_bidirectional(G, source): # Bi-directional BFS w/o symmetrizing the adjacency matrix index = G._key_to_id[source] A = G.get_property("offdiag") diff --git a/graphblas_algorithms/algorithms/components/connected.py b/graphblas_algorithms/algorithms/components/connected.py index fb2f678..3f19b86 100644 --- a/graphblas_algorithms/algorithms/components/connected.py +++ b/graphblas_algorithms/algorithms/components/connected.py @@ -1,12 +1,12 @@ -from .._bfs import _plain_bfs +from .._bfs import _bfs_plain from ..exceptions import PointlessConcept def is_connected(G): if len(G) == 0: raise PointlessConcept("Connectivity is undefined for the null graph.") - return _plain_bfs(G, next(iter(G))).nvals == len(G) + return _bfs_plain(G, next(iter(G))).nvals == len(G) def node_connected_component(G, n): - return _plain_bfs(G, n) + return _bfs_plain(G, n) diff --git a/graphblas_algorithms/algorithms/components/weakly_connected.py b/graphblas_algorithms/algorithms/components/weakly_connected.py index 99eba78..306d96e 100644 --- a/graphblas_algorithms/algorithms/components/weakly_connected.py +++ b/graphblas_algorithms/algorithms/components/weakly_connected.py @@ -1,8 +1,8 @@ -from .._bfs import _plain_bfs_bidirectional +from .._bfs import _bfs_plain_bidirectional from ..exceptions import PointlessConcept def is_weakly_connected(G): if len(G) == 0: raise PointlessConcept("Connectivity is undefined for the null graph.") - return _plain_bfs_bidirectional(G, next(iter(G))).nvals == len(G) + return _bfs_plain_bidirectional(G, next(iter(G))).nvals == len(G) diff --git a/graphblas_algorithms/algorithms/shortest_paths/weighted.py b/graphblas_algorithms/algorithms/shortest_paths/weighted.py index fddf672..d2a9f0e 100644 --- a/graphblas_algorithms/algorithms/shortest_paths/weighted.py +++ b/graphblas_algorithms/algorithms/shortest_paths/weighted.py @@ -1,12 +1,13 @@ import numpy as np -from graphblas import Matrix, Vector, binary, monoid, replace, select, unary +from graphblas import Matrix, Vector, binary, indexunary, monoid, replace, select, unary from graphblas.semiring import any_pair, min_plus -from .._bfs import _bfs_level, _bfs_levels +from .._bfs import _bfs_level, _bfs_levels, _bfs_parent, _bfs_plain from ..exceptions import Unbounded __all__ = [ "single_source_bellman_ford_path_length", + "bellman_ford_path", "bellman_ford_path_lengths", "negative_edge_cycle", ] @@ -164,6 +165,117 @@ def bellman_ford_path_lengths(G, nodes=None, *, expand_output=False): return D +def _reconstruct_path_from_parents(G, parents, src, dst): + indices, values = parents.to_coo(sort=False) + d = dict(zip(indices.tolist(), values.tolist())) + if dst not in d: + return [] + cur = dst + path = [cur] + while cur != src: + cur = d[cur] + path.append(cur) + return G.list_to_keys(reversed(path)) + + +def bellman_ford_path(G, source, target): + src_id = G._key_to_id[source] + dst_id = G._key_to_id[target] + if G.get_property("is_iso"): + # If the edges are iso-valued (and positive), then we can simply do level BFS + is_negative = G.get_property("has_negative_edges+") + if not is_negative: + p = _bfs_parent(G, source, target=target) + return _reconstruct_path_from_parents(G, p, src_id, dst_id) + raise Unbounded("Negative cycle detected.") + A, is_negative, has_negative_diagonal = G.get_properties( + "offdiag has_negative_edges- has_negative_diagonal" + ) + if A.dtype == bool: + # Should we upcast e.g. INT8 to INT64 as well? + dtype = int + else: + dtype = A.dtype + cutoff = None + n = A.nrows + d = Vector(dtype, n, name="bellman_ford_path_length") + d[src_id] = 0 + p = Vector(int, n, name="bellman_ford_path_parent") + p[src_id] = src_id + + prev = d.dup(name="prev") + cur = Vector(dtype, n, name="cur") + indices = Vector(int, n, name="indices") + mask = Vector(bool, n, name="mask") + B = Matrix(dtype, n, n, name="B") + Indices = Matrix(int, n, n, name="Indices") + cols = prev.to_coo(values=False)[0] + one = unary.one[bool] + for _i in range(n - 1): + # This is a slightly modified Bellman-Ford algorithm. + # `cur` is the current frontier of values that improved in the previous iteration. + # This means that in this iteration we drop values from `cur` that are not better. + cur << min_plus(prev @ A) + if cutoff is not None: + cur << select.valuele(cur, cutoff) + + # Mask is True where cur not in d or cur < d + mask << one(cur) + mask(binary.second) << binary.lt(cur & d) + + # Drop values from `cur` that didn't improve + cur(mask.V, replace) << cur + if cur.nvals == 0: + break + # Update `d` with values that improved + d(cur.S) << cur + if not is_negative: + # Limit exploration if we have a target + cutoff = cur.get(dst_id, cutoff) + + # Now try to find the parents! + # This is also not standard. Typically, UDTs and UDFs are used to keep + # track of both the minimum element and the parent id at the same time. + # Only include rows and columns that were used this iteration. + rows = cols + cols = cur.to_coo(values=False)[0] + B.clear() + B[rows, cols] = A[rows, cols] + + # Reverse engineer to determine parent + B << binary.plus(prev & B) + B << binary.iseq(B & cur) + B << select.valuene(B, False) + Indices << indexunary.rowindex(B) + indices << Indices.reduce_columnwise(monoid.min) + p(indices.S) << indices + prev, cur = cur, prev + else: + # Check for negative cycle when for loop completes without breaking + cur << min_plus(prev @ A) + if cutoff is not None: + cur << select.valuele(cur, cutoff) + mask << binary.lt(cur & d) + if mask.get(dst_id): + raise Unbounded("Negative cycle detected.") + path = _reconstruct_path_from_parents(G, p, src_id, dst_id) + if has_negative_diagonal and path: + mask.clear() + mask[G.list_to_ids(path)] = True + diag = G.get_property("diag", mask=mask.S) + if diag.nvals > 0: + raise Unbounded("Negative cycle detected.") + mask << binary.first(mask & cur) # mask(cur.S, replace) << mask + if mask.nvals > 0: + # Is there a path from any visited node with negative self-loop to target? + # We could actually stop as soon as any from `path` is visited + indices, _ = mask.to_coo(values=False)[0] + q = _bfs_plain(G, target=target, index=indices, cutoff=_i) + if dst_id in q: + raise Unbounded("Negative cycle detected.") + return path + + def negative_edge_cycle(G): # TODO: use a heuristic to try to stop early if G.is_directed(): diff --git a/graphblas_algorithms/generators/ego.py b/graphblas_algorithms/generators/ego.py index 26e9cf9..4d95e0f 100644 --- a/graphblas_algorithms/generators/ego.py +++ b/graphblas_algorithms/generators/ego.py @@ -1,4 +1,4 @@ -from ..algorithms.components.connected import _plain_bfs +from ..algorithms.components.connected import _bfs_plain from ..algorithms.shortest_paths.weighted import single_source_bellman_ford_path_length __all__ = ["ego_graph"] @@ -14,7 +14,7 @@ def ego_graph(G, n, radius=1, center=True, undirected=False, is_weighted=False): if is_weighted: v = single_source_bellman_ford_path_length(G2, n, cutoff=radius) else: - v = _plain_bfs(G2, n, cutoff=radius) + v = _bfs_plain(G2, n, cutoff=radius) if not center: del v[G._key_to_id[n]] diff --git a/graphblas_algorithms/interface.py b/graphblas_algorithms/interface.py index 94f02a6..d0a8365 100644 --- a/graphblas_algorithms/interface.py +++ b/graphblas_algorithms/interface.py @@ -83,6 +83,7 @@ class Dispatcher: nxapi.shortest_paths.unweighted.single_target_shortest_path_length ) all_pairs_shortest_path_length = nxapi.shortest_paths.unweighted.all_pairs_shortest_path_length + bellman_ford_path = nxapi.shortest_paths.weighted.bellman_ford_path all_pairs_bellman_ford_path_length = ( nxapi.shortest_paths.weighted.all_pairs_bellman_ford_path_length ) diff --git a/graphblas_algorithms/nxapi/shortest_paths/weighted.py b/graphblas_algorithms/nxapi/shortest_paths/weighted.py index a44a18e..9916e45 100644 --- a/graphblas_algorithms/nxapi/shortest_paths/weighted.py +++ b/graphblas_algorithms/nxapi/shortest_paths/weighted.py @@ -6,6 +6,7 @@ __all__ = [ "all_pairs_bellman_ford_path_length", + "bellman_ford_path", "negative_edge_cycle", "single_source_bellman_ford_path_length", ] @@ -55,6 +56,15 @@ def single_source_bellman_ford_path_length(G, source, weight="weight"): return G.vector_to_nodemap(d) +def bellman_ford_path(G, source, target, weight="weight"): + # TODO: what if weight is a function? + G = to_graph(G, weight=weight) + try: + return algorithms.bellman_ford_path(G, source, target) + except KeyError as e: + raise NodeNotFound(*e.args) from e + + def negative_edge_cycle(G, weight="weight", heuristic=True): # TODO: what if weight is a function? # TODO: use a heuristic to try to stop early From d41e65ed6588a6641bdeae3a4f55c2341616c26d Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Wed, 3 May 2023 19:27:42 -0500 Subject: [PATCH 2/2] Make sure README.md lists all algorithms --- graphblas_algorithms/tests/test_match_nx.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/graphblas_algorithms/tests/test_match_nx.py b/graphblas_algorithms/tests/test_match_nx.py index 490d1d7..6250b1d 100644 --- a/graphblas_algorithms/tests/test_match_nx.py +++ b/graphblas_algorithms/tests/test_match_nx.py @@ -11,6 +11,7 @@ """ import sys from collections import namedtuple +from pathlib import Path import pytest @@ -191,3 +192,21 @@ def test_print_dispatched_implemented(nx_names_to_info, gb_names_to_info): for i, name in enumerate(sorted(fullnames)): print(i, name) print("=============================================================================") + + +def test_algorithms_in_readme(nx_names_to_info, gb_names_to_info): + """Ensure all algorithms are mentioned in README.md.""" + implemented = nx_names_to_info.keys() & gb_names_to_info.keys() + path = Path(__file__).parent.parent.parent / "README.md" + if not path.exists(): + return + with path.open("r") as f: + text = f.read() + missing = set() + for name in sorted(implemented): + if name not in text: + missing.add(name) + if missing: + msg = f"Algorithms missing in README.md: {', '.join(sorted(missing))}" + print(msg) + raise AssertionError(msg)