From f2f2cec06c2233b42af7169b1c76f803101337f2 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Mon, 30 Jan 2023 22:56:56 -0600 Subject: [PATCH 1/4] Add `floyd_warshall` --- README.md | 1 + .../algorithms/shortest_paths/__init__.py | 1 + .../algorithms/shortest_paths/dense.py | 37 +++++++++++++++++++ graphblas_algorithms/classes/_utils.py | 1 + graphblas_algorithms/interface.py | 19 ++++++---- .../nxapi/shortest_paths/__init__.py | 1 + .../nxapi/shortest_paths/dense.py | 10 +++++ 7 files changed, 62 insertions(+), 8 deletions(-) create mode 100644 graphblas_algorithms/algorithms/shortest_paths/dense.py create mode 100644 graphblas_algorithms/nxapi/shortest_paths/dense.py diff --git a/README.md b/README.md index 3e21cd7..92c06c3 100644 --- a/README.md +++ b/README.md @@ -147,6 +147,7 @@ dispatch pattern shown above. - is_k_regular - is_regular - Shortest Paths + - floyd_warshall - has_path - Simple Paths - is_simple_path diff --git a/graphblas_algorithms/algorithms/shortest_paths/__init__.py b/graphblas_algorithms/algorithms/shortest_paths/__init__.py index c9840bc..60613e6 100644 --- a/graphblas_algorithms/algorithms/shortest_paths/__init__.py +++ b/graphblas_algorithms/algorithms/shortest_paths/__init__.py @@ -1 +1,2 @@ +from .dense import * from .generic import * diff --git a/graphblas_algorithms/algorithms/shortest_paths/dense.py b/graphblas_algorithms/algorithms/shortest_paths/dense.py new file mode 100644 index 0000000..126a7b4 --- /dev/null +++ b/graphblas_algorithms/algorithms/shortest_paths/dense.py @@ -0,0 +1,37 @@ +from graphblas import Matrix, Vector, binary +from graphblas.semiring import any_plus + +__all__ = ["floyd_warshall"] + + +def floyd_warshall(G, is_weighted=False): + # By using `offdiag` instead of `G._A`, we ensure that D will not become dense. + # Dense D may be better at times, but not including the diagonal will result in less work. + # Typically, Floyd-Warshall algorithms sets the diagonal of D to 0 at the beginning. + # This is unnecessary with sparse matrices, and we set the diagonal to 0 at the end. + A = G.get_property("offdiag") + if A.dtype == bool or not is_weighted: + dtype = int + else: + dtype = A.dtype + n = A.nrows + D = Matrix(dtype, nrows=n, ncols=n, name="floyd_warshall") + if is_weighted: + D << A + else: + D(A.S) << 1 # Like `D << unary.one[int](A)` + del A + + Row = Matrix(dtype, nrows=1, ncols=n, name="Row") + Col = Matrix(dtype, nrows=n, ncols=1, name="Col") + for i in range(n): + Col << D[:, [i]] + Row << D[[i], :] + D(binary.min) << any_plus(Col @ Row) # Like `col.outer(row, binary.plus)` + + # Set diagonal values to 0 (this way seems fast). + # The missing values are implied to be infinity, so we set diagonals explicitly to 0. + v = Vector(bool, size=n) + v << True + D(v.diag().S) << 0 + return D diff --git a/graphblas_algorithms/classes/_utils.py b/graphblas_algorithms/classes/_utils.py index 237dcf3..0bce9a6 100644 --- a/graphblas_algorithms/classes/_utils.py +++ b/graphblas_algorithms/classes/_utils.py @@ -154,6 +154,7 @@ def matrix_to_dicts(self, A, *, use_row_index=False, use_column_index=False): and likewise for `use_column_index=True``. """ + # TODO: use `A.to_dicts()`? if isinstance(A, TransposedMatrix): # Not covered d = A.T.ss.export("hypercsc") diff --git a/graphblas_algorithms/interface.py b/graphblas_algorithms/interface.py index eaee712..ddc1091 100644 --- a/graphblas_algorithms/interface.py +++ b/graphblas_algorithms/interface.py @@ -55,6 +55,7 @@ class Dispatcher: is_k_regular = nxapi.regular.is_k_regular is_regular = nxapi.regular.is_regular # Shortest Paths + floyd_warshall = nxapi.shortest_paths.dense.floyd_warshall has_path = nxapi.shortest_paths.generic.has_path # Simple Paths is_simple_path = nxapi.simple_paths.is_simple_path @@ -99,14 +100,16 @@ def on_start_tests(items): import pytest except ImportError: # pragma: no cover (import) return - skip = [ - ("test_attributes", {"TestBoruvka", "test_mst.py"}), - ("test_weight_attribute", {"TestBoruvka", "test_mst.py"}), - ] + multi_attributed = "unable to handle multi-attributed graphs" + multidigraph = "unable to handle MultiDiGraph" + freeze = frozenset + skip = { + ("test_attributes", freeze({"TestBoruvka", "test_mst.py"})): multi_attributed, + ("test_weight_attribute", freeze({"TestBoruvka", "test_mst.py"})): multi_attributed, + ("test_zero_weight", freeze({"TestFloyd", "test_dense.py"})): multidigraph, + } for item in items: kset = set(item.keywords) - for test_name, keywords in skip: + for (test_name, keywords), reason in skip.items(): if item.name == test_name and keywords.issubset(kset): - item.add_marker( - pytest.mark.xfail(reason="unable to handle multi-attributed graphs") - ) + item.add_marker(pytest.mark.xfail(reason=reason)) diff --git a/graphblas_algorithms/nxapi/shortest_paths/__init__.py b/graphblas_algorithms/nxapi/shortest_paths/__init__.py index c9840bc..60613e6 100644 --- a/graphblas_algorithms/nxapi/shortest_paths/__init__.py +++ b/graphblas_algorithms/nxapi/shortest_paths/__init__.py @@ -1 +1,2 @@ +from .dense import * from .generic import * diff --git a/graphblas_algorithms/nxapi/shortest_paths/dense.py b/graphblas_algorithms/nxapi/shortest_paths/dense.py new file mode 100644 index 0000000..21a32be --- /dev/null +++ b/graphblas_algorithms/nxapi/shortest_paths/dense.py @@ -0,0 +1,10 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph + +__all__ = ["floyd_warshall"] + + +def floyd_warshall(G, weight="weight"): + G = to_graph(G, weight=weight) + D = algorithms.floyd_warshall(G, is_weighted=weight is not None) + return G.matrix_to_dicts(D) From 44d27193bcf5b5da4f839be6926e883db40cfc75 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Tue, 31 Jan 2023 13:57:26 -0600 Subject: [PATCH 2/4] Optimization: better handle sparsity such as skip empty nodes --- graphblas_algorithms/algorithms/core.py | 2 +- .../algorithms/shortest_paths/dense.py | 22 ++++++++++++++----- graphblas_algorithms/nxapi/cluster.py | 2 +- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/graphblas_algorithms/algorithms/core.py b/graphblas_algorithms/algorithms/core.py index 85ab592..dc50b69 100644 --- a/graphblas_algorithms/algorithms/core.py +++ b/graphblas_algorithms/algorithms/core.py @@ -28,7 +28,7 @@ def k_truss(G: Graph, k) -> Graph: S = C # Remove isolate nodes - indices, _ = C.reduce_rowwise(monoid.any).to_coo() + indices = C.reduce_rowwise(monoid.any).to_coo(values=False)[0] Ktruss = C[indices, indices].new() # Convert back to networkx graph with correct node ids diff --git a/graphblas_algorithms/algorithms/shortest_paths/dense.py b/graphblas_algorithms/algorithms/shortest_paths/dense.py index 126a7b4..a2d40ce 100644 --- a/graphblas_algorithms/algorithms/shortest_paths/dense.py +++ b/graphblas_algorithms/algorithms/shortest_paths/dense.py @@ -1,4 +1,5 @@ from graphblas import Matrix, Vector, binary +from graphblas.select import offdiag from graphblas.semiring import any_plus __all__ = ["floyd_warshall"] @@ -9,7 +10,13 @@ def floyd_warshall(G, is_weighted=False): # Dense D may be better at times, but not including the diagonal will result in less work. # Typically, Floyd-Warshall algorithms sets the diagonal of D to 0 at the beginning. # This is unnecessary with sparse matrices, and we set the diagonal to 0 at the end. - A = G.get_property("offdiag") + # We also don't iterate over index `i` if either row i or column i are empty. + if G.is_directed(): + A, row_degrees, column_degrees = G.get_properties("offdiag row_degrees- column_degrees-") + nonempty_nodes = binary.pair(row_degrees & column_degrees).new(name="nonempty_nodes") + else: + A, nonempty_nodes = G.get_properties("offdiag degrees-") + if A.dtype == bool or not is_weighted: dtype = int else: @@ -24,14 +31,17 @@ def floyd_warshall(G, is_weighted=False): Row = Matrix(dtype, nrows=1, ncols=n, name="Row") Col = Matrix(dtype, nrows=n, ncols=1, name="Col") - for i in range(n): + Outer = Matrix(dtype, nrows=n, ncols=n, name="temp") + for i in nonempty_nodes: Col << D[:, [i]] Row << D[[i], :] - D(binary.min) << any_plus(Col @ Row) # Like `col.outer(row, binary.plus)` + Outer << any_plus(Col @ Row) + D(binary.min) << offdiag(Outer) # Set diagonal values to 0 (this way seems fast). # The missing values are implied to be infinity, so we set diagonals explicitly to 0. - v = Vector(bool, size=n) - v << True - D(v.diag().S) << 0 + mask = Vector(bool, size=n, name="mask") + mask << True + Mask = mask.diag(name="Mask") + D(Mask.S) << 0 return D diff --git a/graphblas_algorithms/nxapi/cluster.py b/graphblas_algorithms/nxapi/cluster.py index 7ac5618..4ee80e1 100644 --- a/graphblas_algorithms/nxapi/cluster.py +++ b/graphblas_algorithms/nxapi/cluster.py @@ -95,7 +95,7 @@ def _split(L, k): # TODO: should this move into algorithms? def _square_clustering_split(G, node_ids=None, *, nsplits): if node_ids is None: - node_ids = G._A.reduce_rowwise(monoid.any).to_coo()[0] + node_ids = G._A.reduce_rowwise(monoid.any).to_coo(values=False)[0] result = None for chunk_ids in _split(node_ids, nsplits): res = algorithms.square_clustering(G, chunk_ids) From f96a8a26fc9215636c03723bfbe6a4fa4c849406 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Tue, 31 Jan 2023 23:39:31 -0600 Subject: [PATCH 3/4] Simplify (so this PR can be a reference) --- graphblas_algorithms/algorithms/core.py | 2 +- graphblas_algorithms/classes/_utils.py | 1 - graphblas_algorithms/nxapi/cluster.py | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/graphblas_algorithms/algorithms/core.py b/graphblas_algorithms/algorithms/core.py index dc50b69..85ab592 100644 --- a/graphblas_algorithms/algorithms/core.py +++ b/graphblas_algorithms/algorithms/core.py @@ -28,7 +28,7 @@ def k_truss(G: Graph, k) -> Graph: S = C # Remove isolate nodes - indices = C.reduce_rowwise(monoid.any).to_coo(values=False)[0] + indices, _ = C.reduce_rowwise(monoid.any).to_coo() Ktruss = C[indices, indices].new() # Convert back to networkx graph with correct node ids diff --git a/graphblas_algorithms/classes/_utils.py b/graphblas_algorithms/classes/_utils.py index 0bce9a6..237dcf3 100644 --- a/graphblas_algorithms/classes/_utils.py +++ b/graphblas_algorithms/classes/_utils.py @@ -154,7 +154,6 @@ def matrix_to_dicts(self, A, *, use_row_index=False, use_column_index=False): and likewise for `use_column_index=True``. """ - # TODO: use `A.to_dicts()`? if isinstance(A, TransposedMatrix): # Not covered d = A.T.ss.export("hypercsc") diff --git a/graphblas_algorithms/nxapi/cluster.py b/graphblas_algorithms/nxapi/cluster.py index 4ee80e1..7ac5618 100644 --- a/graphblas_algorithms/nxapi/cluster.py +++ b/graphblas_algorithms/nxapi/cluster.py @@ -95,7 +95,7 @@ def _split(L, k): # TODO: should this move into algorithms? def _square_clustering_split(G, node_ids=None, *, nsplits): if node_ids is None: - node_ids = G._A.reduce_rowwise(monoid.any).to_coo(values=False)[0] + node_ids = G._A.reduce_rowwise(monoid.any).to_coo()[0] result = None for chunk_ids in _split(node_ids, nsplits): res = algorithms.square_clustering(G, chunk_ids) From b02851b810650248c1a79f107ee807f461c670d5 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Wed, 1 Feb 2023 12:01:01 -0600 Subject: [PATCH 4/4] Better name ("Outer", not "temp") --- graphblas_algorithms/algorithms/shortest_paths/dense.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/graphblas_algorithms/algorithms/shortest_paths/dense.py b/graphblas_algorithms/algorithms/shortest_paths/dense.py index a2d40ce..f53814f 100644 --- a/graphblas_algorithms/algorithms/shortest_paths/dense.py +++ b/graphblas_algorithms/algorithms/shortest_paths/dense.py @@ -31,11 +31,11 @@ def floyd_warshall(G, is_weighted=False): Row = Matrix(dtype, nrows=1, ncols=n, name="Row") Col = Matrix(dtype, nrows=n, ncols=1, name="Col") - Outer = Matrix(dtype, nrows=n, ncols=n, name="temp") + Outer = Matrix(dtype, nrows=n, ncols=n, name="Outer") for i in nonempty_nodes: Col << D[:, [i]] Row << D[[i], :] - Outer << any_plus(Col @ Row) + Outer << any_plus(Col @ Row) # Like `col.outer(row, binary.plus)` D(binary.min) << offdiag(Outer) # Set diagonal values to 0 (this way seems fast).