diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5eef855..dcb86d6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -30,16 +30,18 @@ jobs: activate-environment: testing - name: Install dependencies run: | - conda install -c conda-forge python-graphblas networkx scipy \ - pytest coverage black flake8 flake8-comprehensions flake8-bugbear - pip install -e . + conda install -c conda-forge python-graphblas scipy pandas \ + pytest-cov pytest-randomly black flake8-comprehensions flake8-bugbear + # matplotlib lxml pygraphviz pydot sympy # Extra networkx deps we don't need yet + pip install git+https://github.com/networkx/networkx.git@main --no-deps + pip install -e . --no-deps - name: Style checks run: | flake8 black . --check --diff - name: PyTest run: | - coverage run --branch -m pytest + coverage run --branch -m pytest -v coverage xml - name: Coverage uses: codecov/codecov-action@v2 diff --git a/graphblas_algorithms/algorithms/__init__.py b/graphblas_algorithms/algorithms/__init__.py index 11e76f9..2edc8ee 100644 --- a/graphblas_algorithms/algorithms/__init__.py +++ b/graphblas_algorithms/algorithms/__init__.py @@ -1,3 +1,3 @@ -from .cluster import average_clustering, clustering, transitivity, triangles -from .link_analysis import pagerank -from .reciprocity import overall_reciprocity, reciprocity +from .cluster import * +from .link_analysis import * +from .reciprocity import * diff --git a/graphblas_algorithms/algorithms/cluster.py b/graphblas_algorithms/algorithms/cluster.py index d2f6073..4310a54 100644 --- a/graphblas_algorithms/algorithms/cluster.py +++ b/graphblas_algorithms/algorithms/cluster.py @@ -1,31 +1,52 @@ -from graphblas import binary -from graphblas.semiring import plus_pair -from networkx import average_clustering as _nx_average_clustering -from networkx import clustering as _nx_clustering +import numpy as np +from graphblas import Matrix, Vector, binary, monoid, replace, select, unary +from graphblas.semiring import plus_pair, plus_times from graphblas_algorithms.classes.digraph import to_graph from graphblas_algorithms.classes.graph import to_undirected_graph -from graphblas_algorithms.utils import not_implemented_for +from graphblas_algorithms.utils import get_all, not_implemented_for -def single_triangle_core(G, node): +def single_triangle_core(G, node, *, weighted=False): index = G._key_to_id[node] r = G._A[index, :].new() # Pretty much all the time is spent here taking TRIL, which is used to ignore self-edges L = G.get_property("L-") if G.get_property("has_self_edges"): del r[index] # Ignore self-edges - return plus_pair(L @ r).new(mask=r.S).reduce(allow_empty=False).value + if weighted: + maxval = G.get_property("max_element-") + L = unary.cbrt(L / maxval) + r = unary.cbrt(r / maxval) + semiring = plus_times + else: + semiring = plus_pair + val = semiring(L @ r).new(mask=r.S) + if weighted: + val *= r + return val.reduce().get(0) -def triangles_core(G, mask=None): +def triangles_core(G, *, weighted=False, mask=None): # Ignores self-edges + # Can we apply the mask earlier in the computation? L, U = G.get_properties("L- U-") - C = plus_pair(L @ L.T).new(mask=L.S) + if weighted: + maxval = G.get_property("max_element-") + L = unary.cbrt(L / maxval) + U = unary.cbrt(U / maxval) + semiring = plus_times + else: + semiring = plus_pair + C = semiring(L @ L.T).new(mask=L.S) + D = semiring(U @ L.T).new(mask=U.S) + if weighted: + C *= L + D *= U return ( C.reduce_rowwise().new(mask=mask) + C.reduce_columnwise().new(mask=mask) - + plus_pair(U @ L.T).new(mask=U.S).reduce_rowwise().new(mask=mask) + + D.reduce_rowwise().new(mask=mask) ).new(name="triangles") @@ -45,7 +66,7 @@ def total_triangles_core(G): # We use SandiaDot method, because it's usually the fastest on large graphs. # For smaller graphs, Sandia method is usually faster: plus_pair(L @ L).new(mask=L.S) L, U = G.get_properties("L- U-") - return plus_pair(L @ U.T).new(mask=L.S).reduce_scalar(allow_empty=False).value + return plus_pair(L @ U.T).new(mask=L.S).reduce_scalar().get(0) def transitivity_core(G): @@ -59,11 +80,8 @@ def transitivity_core(G): def transitivity_directed_core(G): # XXX" is transitivity supposed to work on directed graphs like this? - if G.get_property("has_self_edges"): - A = G.get_property("offdiag") - else: - A = G._A - numerator = plus_pair(A @ A.T).new(mask=A.S).reduce_scalar(allow_empty=False).value + A, AT = G.get_properties("offdiag AT") + numerator = plus_pair(A @ A.T).new(mask=A.S).reduce_scalar().get(0) if numerator == 0: return 0 degrees = G.get_property("row_degrees-") @@ -82,64 +100,81 @@ def transitivity(G): return G._cacheit("transitivity", func, G) -def clustering_core(G, mask=None): - tri = triangles_core(G, mask=mask) +def clustering_core(G, *, weighted=False, mask=None): + tri = triangles_core(G, weighted=weighted, mask=mask) degrees = G.get_property("degrees-") denom = degrees * (degrees - 1) return (2 * tri / denom).new(name="clustering") -def clustering_directed_core(G, mask=None): - if G.get_property("has_self_edges"): - A = G.get_property("offdiag") +def clustering_directed_core(G, *, weighted=False, mask=None): + # Can we apply the mask earlier in the computation? + A, AT = G.get_properties("offdiag AT") + if weighted: + maxval = G.get_property("max_element-") + A = unary.cbrt(A / maxval) + AT = unary.cbrt(AT / maxval) + semiring = plus_times else: - A = G._A - AT = G.get_property("AT") - temp = plus_pair(A @ A.T).new(mask=A.S) + semiring = plus_pair + C = semiring(A @ A.T).new(mask=A.S) + D = semiring(AT @ A.T).new(mask=A.S) + E = semiring(AT @ AT.T).new(mask=A.S) + if weighted: + C *= A + D *= A + E *= A tri = ( - temp.reduce_rowwise().new(mask=mask) - + temp.reduce_columnwise().new(mask=mask) - + plus_pair(AT @ A.T).new(mask=A.S).reduce_rowwise().new(mask=mask) - + plus_pair(AT @ AT.T).new(mask=A.S).reduce_columnwise().new(mask=mask) + C.reduce_rowwise().new(mask=mask) + + C.reduce_columnwise().new(mask=mask) + + D.reduce_rowwise().new(mask=mask) + + E.reduce_columnwise().new(mask=mask) ) recip_degrees, total_degrees = G.get_properties("recip_degrees- total_degrees-", mask=mask) - return (tri / (total_degrees * (total_degrees - 1) - 2 * recip_degrees)).new(name="clustering") + denom = total_degrees * (total_degrees - 1) - 2 * recip_degrees + return (tri / denom).new(name="clustering") -def single_clustering_core(G, node): - tri = single_triangle_core(G, node) +def single_clustering_core(G, node, *, weighted=False): + tri = single_triangle_core(G, node, weighted=weighted) if tri == 0: return 0 index = G._key_to_id[node] + # TODO: it would be nice if we could clean this up, but still be fast if "degrees-" in G._cache: - degrees = G.get_property("degrees-")[index].value + degrees = G.get_property("degrees-").get(index) elif "degrees+" in G._cache: - degrees = G.get_property("degrees+")[index].value - if G.get_property("has_self_edges") and G._A[index, index].value is not None: + degrees = G.get_property("degrees+").get(index) + if G.get_property("has_self_edges") and G._A.get(index, index) is not None: degrees -= 1 else: - row = G._A[index, :].new() + row = G._A[index, :] degrees = row.nvals - if G.get_property("has_self_edges") and row[index].value is not None: + if G.get_property("has_self_edges") and row.get(index) is not None: degrees -= 1 denom = degrees * (degrees - 1) return 2 * tri / denom -def single_clustering_directed_core(G, node, *, has_self_edges=True): - if G.get_property("has_self_edges"): - A = G.get_property("offdiag") - else: - A = G._A +def single_clustering_directed_core(G, node, *, weighted=False): + A = G.get_property("offdiag") index = G._key_to_id[node] - r = A[index, :].new() - c = A[:, index].new() - tri = ( - plus_pair(A @ c).new(mask=c.S).reduce(allow_empty=False).value - + plus_pair(A @ c).new(mask=r.S).reduce(allow_empty=False).value - + plus_pair(A @ r).new(mask=c.S).reduce(allow_empty=False).value - + plus_pair(A @ r).new(mask=r.S).reduce(allow_empty=False).value - ) + if weighted: + maxval = G.get_property("max_element-") + A = unary.cbrt(A / maxval) + semiring = plus_times + else: + semiring = plus_pair + r = A[index, :] + c = A[:, index] + tris = [] + for x, y in [(c, c), (c, r), (r, c), (r, r)]: + v = semiring(A @ x).new(mask=y.S) + if weighted: + v *= y + tris.append(v.reduce().new()) + # Getting Python scalars are blocking operations, so we do them last + tri = sum(t.get(0) for t in tris) if tri == 0: return 0 total_degrees = c.nvals + r.nvals @@ -148,28 +183,26 @@ def single_clustering_directed_core(G, node, *, has_self_edges=True): def clustering(G, nodes=None, weight=None): - if weight is not None: - # TODO: Not yet implemented. Clustering implemented only for unweighted. - return _nx_clustering(G, nodes=nodes, weight=weight) G = to_graph(G, weight=weight) # to directed or undirected if len(G) == 0: return {} + weighted = weight is not None if nodes in G: if G.is_directed(): - return single_clustering_directed_core(G, nodes) + return single_clustering_directed_core(G, nodes, weighted=weighted) else: - return single_clustering_core(G, nodes) + return single_clustering_core(G, nodes, weighted=weighted) mask = G.list_to_mask(nodes) if G.is_directed(): - result = clustering_directed_core(G, mask=mask) + result = clustering_directed_core(G, weighted=weighted, mask=mask) else: - result = clustering_core(G, mask=mask) + result = clustering_core(G, weighted=weighted, mask=mask) return G.vector_to_dict(result, mask=mask, fillvalue=0.0) -def average_clustering_core(G, mask=None, count_zeros=True): - c = clustering_core(G, mask=mask) - val = c.reduce(allow_empty=False).value +def average_clustering_core(G, *, count_zeros=True, weighted=False, mask=None): + c = clustering_core(G, weighted=weighted, mask=mask) + val = c.reduce().get(0) if not count_zeros: return val / c.nvals elif mask is not None: @@ -178,9 +211,9 @@ def average_clustering_core(G, mask=None, count_zeros=True): return val / c.size -def average_clustering_directed_core(G, mask=None, count_zeros=True): - c = clustering_directed_core(G, mask=mask) - val = c.reduce(allow_empty=False).value +def average_clustering_directed_core(G, *, count_zeros=True, weighted=False, mask=None): + c = clustering_directed_core(G, weighted=weighted, mask=mask) + val = c.reduce().get(0) if not count_zeros: return val / c.nvals elif mask is not None: @@ -190,12 +223,10 @@ def average_clustering_directed_core(G, mask=None, count_zeros=True): def average_clustering(G, nodes=None, weight=None, count_zeros=True): - if weight is not None: - # TODO: Not yet implemented. Clustering implemented only for unweighted. - return _nx_average_clustering(G, nodes=nodes, weight=weight, count_zeros=count_zeros) G = to_graph(G, weight=weight) # to directed or undirected if len(G) == 0: - raise ZeroDivisionError() # Not covered + raise ZeroDivisionError() + weighted = weight is not None mask = G.list_to_mask(nodes) if G.is_directed(): func = average_clustering_directed_core @@ -203,7 +234,116 @@ def average_clustering(G, nodes=None, weight=None, count_zeros=True): func = average_clustering_core if mask is None: return G._cacheit( - f"average_clustering(count_zeros={count_zeros})", func, G, count_zeros=count_zeros + f"average_clustering(count_zeros={count_zeros})", + func, + G, + weighted=weighted, + count_zeros=count_zeros, ) else: - return func(G, mask=mask, count_zeros=count_zeros) + return func(G, weighted=weighted, count_zeros=count_zeros, mask=mask) + + +def square_clustering_core(G, node_ids=None): + # node_ids argument is a bit different from what we do elsewhere. + # Normally, we take a mask or vector in a `_core` function. + # By accepting an iterable here, it could be of node ids or node keys. + A, degrees = G.get_properties("A degrees+") # TODO" how to handle self-edges? + if node_ids is None: + # Can we do this better using SuiteSparse:GraphBLAS iteration? + node_ids = A.reduce_rowwise(monoid.any).new(name="node_ids") # all nodes with edges + C = unary.positionj(A).new(name="C") + rv = Vector(float, A.nrows, name="square_clustering") + row = Vector(A.dtype, A.ncols, name="row") + M = Matrix(int, A.nrows, A.ncols, name="M") + Q = Matrix(int, A.nrows, A.ncols, name="Q") + for v in node_ids: + # Th mask M indicates the u and w neighbors of v to "iterate" over + row << A[v, :] + M << row.outer(row, binary.pair) + M << select.tril(M, -1) + # To compute q_v(u, w), the number of common neighbors of u and w other than v (squares), + # we first set the v'th column to zero, which lets us ignore v as a common neighbor. + Q << binary.isne(C, v) # `isne` keeps the dtype as int + # Q: count the number of squares for each u-w combination! + Q(M.S, replace) << plus_times(Q @ Q.T) + # Total squares for v + squares = Q.reduce_scalar().get(0) + if squares == 0: + rv[v] = 0 + continue + # Denominator is the total number of squares that could exist. + # First contribution is degrees[u] + degrees[w] for each u-w combo. + Q(M.S, replace) << degrees.outer(degrees, binary.plus) + deg_uw = Q.reduce_scalar().new() + # Then we subtract off # squares, 1 for each u and 1 for each w for all combos, + # and 1 for each edge where u-w or w-u are connected (which would make triangles). + Q << binary.pair(A & M) # Are u-w connected? Can skip if bipartite + denom = deg_uw.get(0) - (squares + 2 * M.nvals + 2 * Q.nvals) + rv[v] = squares / denom + return rv + + +def square_clustering(G, nodes=None): + G = to_undirected_graph(G) + if len(G) == 0: + return {} + if nodes in G: + idx = G._key_to_id[nodes] + result = square_clustering_core(G, [idx]) + return result.get(idx) + ids = G.list_to_ids(nodes) + result = square_clustering_core(G, ids) + return G.vector_to_dict(result) + + +def generalized_degree_core(G, *, mask=None): + # Not benchmarked or optimized + A = G.get_property("offdiag") + Tri = Matrix(int, A.nrows, A.ncols, name="Tri") + if mask is not None: + if mask.structure and not mask.value: + v_mask = mask.parent + else: + v_mask = mask.new() # Not covered + Tri << binary.pair(v_mask & A) # Mask out rows + Tri(Tri.S) << 0 + else: + Tri(A.S) << 0 + Tri(Tri.S, binary.second) << plus_pair(Tri @ A.T) + rows, cols, vals = Tri.to_values() + # The column index indicates the number of triangles an edge participates in. + # The largest this can be is `A.ncols - 1`. Values is count of edges. + return Matrix.from_values( + rows, + vals, + np.ones(vals.size, dtype=int), + dup_op=binary.plus, + nrows=A.nrows, + ncols=A.ncols - 1, + name="generalized_degree", + ) + + +def single_generalized_degree_core(G, node): + # Not benchmarked or optimized + index = G._key_to_id[node] + v = Vector(bool, len(G)) + v[index] = True + return generalized_degree_core(G, mask=v.S)[index, :].new(name=f"generalized_degree_{index}") + + +@not_implemented_for("directed") +def generalized_degree(G, nodes=None): + G = to_undirected_graph(G) + if len(G) == 0: + return {} + if nodes in G: + result = single_generalized_degree_core(G, nodes) + return G.vector_to_dict(result) + mask = G.list_to_mask(nodes) + result = generalized_degree_core(G, mask=mask) + return G.matrix_to_dicts(result) + + +__all__ = get_all(__name__) diff --git a/graphblas_algorithms/algorithms/link_analysis/__init__.py b/graphblas_algorithms/algorithms/link_analysis/__init__.py index 015e68b..7e957e4 100644 --- a/graphblas_algorithms/algorithms/link_analysis/__init__.py +++ b/graphblas_algorithms/algorithms/link_analysis/__init__.py @@ -1 +1 @@ -from .pagerank_alg import pagerank +from .pagerank_alg import * diff --git a/graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py b/graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py index 8811cbd..c64b2dc 100644 --- a/graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py +++ b/graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py @@ -5,6 +5,7 @@ from graphblas.semiring import plus_first, plus_times from graphblas_algorithms.classes.digraph import to_graph +from graphblas_algorithms.utils import get_all def pagerank_core( @@ -28,18 +29,18 @@ def pagerank_core( if nstart is None: x[:] = 1.0 / N else: - denom = nstart.reduce(allow_empty=False).value + denom = nstart.reduce().get(0) if denom == 0: - raise ZeroDivisionError() + raise ZeroDivisionError("nstart sums to 0") x << nstart / denom # Personalization vector or scalar if personalization is None: p = 1.0 / N else: - denom = personalization.reduce(allow_empty=False).value + denom = personalization.reduce().get(0) if denom == 0: - raise ZeroDivisionError() + raise ZeroDivisionError("personalization sums to 0") p = (personalization / denom).new(name="p") # Inverse of row_degrees @@ -64,7 +65,7 @@ def pagerank_core( dangling_mask(mask=~S.S) << 1.0 # Fold alpha constant into dangling_weights (or dangling_mask) if dangling is not None: - dangling_weights = (alpha / dangling.reduce(allow_empty=False).value * dangling).new( + dangling_weights = (alpha / dangling.reduce().get(0) * dangling).new( name="dangling_weights" ) elif personalization is None: @@ -95,7 +96,7 @@ def pagerank_core( x += semiring(w @ A) # plus_first if A.ss.is_iso else plus_times # Check convergence, l1 norm: err = sum(abs(xprev - x)) - xprev << binary.minus(xprev | x, require_monoid=False) + xprev << binary.minus(xprev | x) xprev << unary.abs(xprev) err = xprev.reduce().value if err < N * tol: @@ -138,3 +139,6 @@ def pagerank( row_degrees=row_degrees, ) return G.vector_to_dict(result, fillvalue=0.0) + + +__all__ = get_all(__name__) diff --git a/graphblas_algorithms/algorithms/reciprocity.py b/graphblas_algorithms/algorithms/reciprocity.py index 29a9773..abf0b7f 100644 --- a/graphblas_algorithms/algorithms/reciprocity.py +++ b/graphblas_algorithms/algorithms/reciprocity.py @@ -1,8 +1,8 @@ from graphblas import binary from networkx import NetworkXError -from networkx.utils import not_implemented_for from graphblas_algorithms.classes.digraph import to_directed_graph +from graphblas_algorithms.utils import get_all, not_implemented_for def reciprocity_core(G, mask=None): @@ -45,3 +45,6 @@ def overall_reciprocity_core(G): def overall_reciprocity(G): G = to_directed_graph(G, dtype=bool) return overall_reciprocity_core(G) + + +__all__ = get_all(__name__) diff --git a/graphblas_algorithms/classes/__init__.py b/graphblas_algorithms/classes/__init__.py index 4585e1d..7fa216d 100644 --- a/graphblas_algorithms/classes/__init__.py +++ b/graphblas_algorithms/classes/__init__.py @@ -1,2 +1,2 @@ -from .digraph import DiGraph -from .graph import Graph +from .digraph import * +from .graph import * diff --git a/graphblas_algorithms/classes/_caching.py b/graphblas_algorithms/classes/_caching.py index cc78664..525de5d 100644 --- a/graphblas_algorithms/classes/_caching.py +++ b/graphblas_algorithms/classes/_caching.py @@ -1,11 +1,8 @@ -from graphblas import agg, op, operator +from graphblas import op, operator def get_reduce_to_vector(key, opname, methodname): - try: - op_ = op.from_string(opname) - except ValueError: - op_ = agg.from_string(opname) + op_ = op.from_string(opname) op_, opclass = operator.find_opclass(op_) keybase = key[:-1] if key[-1] == "-": @@ -101,10 +98,7 @@ def get_reduction(G, mask=None): def get_reduce_to_scalar(key, opname): - try: - op_ = op.from_string(opname) - except ValueError: - op_ = agg.from_string(opname) + op_ = op.from_string(opname) op_, opclass = operator.find_opclass(op_) keybase = key[:-1] if key[-1] == "-": diff --git a/graphblas_algorithms/classes/_utils.py b/graphblas_algorithms/classes/_utils.py index 155280c..0f4083c 100644 --- a/graphblas_algorithms/classes/_utils.py +++ b/graphblas_algorithms/classes/_utils.py @@ -1,5 +1,12 @@ import graphblas as gb +import networkx as nx +import numpy as np from graphblas import Matrix, Vector, binary +from graphblas.matrix import TransposedMatrix + +################ +# Classmethods # +################ def from_networkx(cls, G, weight=None, dtype=None): @@ -12,16 +19,36 @@ def from_networkx(cls, G, weight=None, dtype=None): return rv -def from_graphblas(cls, A): +def from_graphblas(cls, A, *, key_to_id=None): # Does not copy! if A.nrows != A.ncols: raise ValueError(f"Adjacency matrix must be square; got {A.nrows} x {A.ncols}") rv = cls() - rv._key_to_id = {i: i for i in range(A.nrows)} + # If there is no mapping, it may be nice to keep this as None + if key_to_id is None: + rv._key_to_id = {i: i for i in range(A.nrows)} + else: + rv._key_to_id = key_to_id rv._A = A return rv +############## +# Properties # +############## + + +def id_to_key(self): + if self._id_to_key is None: + self._id_to_key = {val: key for key, val in self._key_to_id.items()} + return self._id_to_key + + +########### +# Methods # +########### + + def get_property(self, name, *, mask=None): return self._get_property[self._cache_aliases.get(name, name)](self, mask) @@ -67,15 +94,65 @@ def list_to_mask(self, nodes, *, size=None, name="mask"): return self.list_to_vector(nodes, size=size, name=name).S +def list_to_ids(self, nodes): + if nodes is None: + return None + return [self._key_to_id[key] for key in nodes] + + def vector_to_dict(self, v, *, mask=None, fillvalue=None): - if self._id_to_key is None: - self._id_to_key = {val: key for key, val in self._key_to_id.items()} if mask is not None: if fillvalue is not None and v.nvals < mask.parent.nvals: v(mask, binary.first) << fillvalue elif fillvalue is not None and v.nvals < v.size: v(mask=~v.S) << fillvalue - return {self._id_to_key[index]: value for index, value in zip(*v.to_values(sort=False))} + id_to_key = self.id_to_key + return {id_to_key[index]: value for index, value in zip(*v.to_values(sort=False))} + + +def matrix_to_dicts(self, A): + """{row: {col: val}}""" + if isinstance(A, TransposedMatrix): + # Not covered + d = A.T.ss.export("hypercsc") + rows = d["cols"].tolist() + col_indices = d["row_indices"].tolist() + else: + d = A.ss.export("hypercsr") + rows = d["rows"].tolist() + col_indices = d["col_indices"].tolist() + indptr = d["indptr"] + values = d["values"].tolist() + id_to_key = self.id_to_key + return { + id_to_key[row]: { + id_to_key[col]: val for col, val in zip(col_indices[start:stop], values[start:stop]) + } + for row, (start, stop) in zip( + rows, np.lib.stride_tricks.sliding_window_view(indptr, 2).tolist() + ) + } + + +def to_networkx(self, edge_attribute="weight"): + # Not covered yet, but will probably be useful soon + if self.is_directed(): + G = nx.DiGraph() + A = self._A + else: + G = nx.Graph() + A = self.get_property("L+") + G.add_nodes_from(self._key_to_id) + id_to_key = self.id_to_key + rows, cols, vals = A.to_values() + rows = (id_to_key[row] for row in rows.tolist()) + cols = (id_to_key[col] for col in cols.tolist()) + if edge_attribute is None: + G.add_edges_from(zip(rows, cols)) + else: + G.add_weighted_edges_from(zip(rows, cols, vals), weight=edge_attribute) + # What else should we copy over? + return G def _cacheit(self, key, func, *args, **kwargs): diff --git a/graphblas_algorithms/classes/digraph.py b/graphblas_algorithms/classes/digraph.py index ae7f38f..1ad0bcc 100644 --- a/graphblas_algorithms/classes/digraph.py +++ b/graphblas_algorithms/classes/digraph.py @@ -4,12 +4,19 @@ from graphblas import Matrix, Vector, binary, select, unary import graphblas_algorithms as ga +from graphblas_algorithms.utils import get_all from . import _utils from ._caching import get_reduce_to_scalar, get_reduce_to_vector +def get_A(G, mask=None): + """A""" + return G._A + + def get_AT(G, mask=None): + """A.T""" A = G._A cache = G._cache if "AT" not in cache: @@ -18,6 +25,7 @@ def get_AT(G, mask=None): def get_offdiag(G, mask=None): + """select.offdiag(A)""" A = G._A cache = G._cache if "offdiag" not in cache: @@ -33,6 +41,7 @@ def get_offdiag(G, mask=None): def get_Up(G, mask=None): + """select.triu(A)""" A = G._A cache = G._cache if "U+" not in cache: @@ -51,6 +60,7 @@ def get_Up(G, mask=None): def get_Lp(G, mask=None): + """select.tril(A)""" A = G._A cache = G._cache if "L+" not in cache: @@ -69,6 +79,7 @@ def get_Lp(G, mask=None): def get_Um(G, mask=None): + """select.triu(A, 1)""" A = G._A cache = G._cache if "U-" not in cache: @@ -92,6 +103,7 @@ def get_Um(G, mask=None): def get_Lm(G, mask=None): + """select.tril(A, -1)""" A = G._A cache = G._cache if "L-" not in cache: @@ -115,6 +127,7 @@ def get_Lm(G, mask=None): def get_diag(G, mask=None): + """select.diag(A)""" A = G._A cache = G._cache if "diag" not in cache: @@ -134,6 +147,7 @@ def get_diag(G, mask=None): def get_recip_degreesp(G, mask=None): + """pair(A & A.T).reduce_rowwise()""" A = G._A cache = G._cache if "AT" in cache: @@ -179,6 +193,7 @@ def get_recip_degreesp(G, mask=None): def get_recip_degreesm(G, mask=None): + """C = select.offdiag(A) ; pair(C & C.T).reduce_rowwise()""" A = G._A cache = G._cache if "AT" in cache: @@ -194,9 +209,9 @@ def get_recip_degreesm(G, mask=None): cache["recip_degrees-"] = cache["recip_degrees+"] return cache["recip_degrees-"].dup(mask=mask) elif "recip_degrees+" in cache and "diag" in cache: - rv = binary.minus( - cache["recip_degrees+"] | unary.one(cache["diag"]), require_monoid=False - ).new(mask=mask, name="recip_degrees-") + rv = binary.minus(cache["recip_degrees+"] | unary.one(cache["diag"])).new( + mask=mask, name="recip_degrees-" + ) rv(rv.V, replace=True) << rv # drop 0s return rv elif not G.get_property("has_self_edges"): @@ -215,18 +230,16 @@ def get_recip_degreesm(G, mask=None): else: diag = G.get_property("diag", mask=mask) overlap = binary.pair(A & AT).reduce_rowwise().new(mask=mask) - rv = binary.minus(overlap | unary.one(diag), require_monoid=False).new( - name="recip_degrees-" - ) + rv = binary.minus(overlap | unary.one(diag)).new(name="recip_degrees-") rv(rv.V, replace=True) << rv # drop 0s return rv if "recip_degrees-" not in cache: if cache.get("has_self_edges") is False and "recip_degrees+" in cache: cache["recip_degrees-"] = cache["recip_degrees+"] elif "recip_degrees+" in cache and "diag" in cache: - rv = binary.minus( - cache["recip_degrees+"] | unary.one(cache["diag"]), require_monoid=False - ).new(name="recip_degrees-") + rv = binary.minus(cache["recip_degrees+"] | unary.one(cache["diag"])).new( + name="recip_degrees-" + ) rv(rv.V, replace=True) << rv # drop 0s cache["recip_degrees-"] = rv elif not G.get_property("has_self_edges"): @@ -243,9 +256,7 @@ def get_recip_degreesm(G, mask=None): else: diag = G.get_property("diag") overlap = binary.pair(A & AT).reduce_rowwise().new() - rv = binary.minus(overlap | unary.one(diag), require_monoid=False).new( - name="recip_degrees-" - ) + rv = binary.minus(overlap | unary.one(diag)).new(name="recip_degrees-") rv(rv.V, replace=True) << rv # drop 0s cache["recip_degrees-"] = rv if ( @@ -260,6 +271,7 @@ def get_recip_degreesm(G, mask=None): def get_total_degreesp(G, mask=None): + """A.reduce_rowwise(agg.count) + A.reduce_columnwise(agg.count)""" cache = G._cache if mask is not None: if "total_degrees+" in cache: @@ -291,6 +303,7 @@ def get_total_degreesp(G, mask=None): def get_total_degreesm(G, mask=None): + """C = select.offdiag(A) ; C.reduce_rowwise(agg.count) + C.reduce_columnwise(agg.count)""" cache = G._cache if mask is not None: if "total_degrees-" in cache: @@ -322,19 +335,20 @@ def get_total_degreesm(G, mask=None): def get_total_recipp(G, mask=None): + """pair(A & A.T).reduce_scalar()""" A = G._A cache = G._cache if "total_recip+" not in cache: if "total_recip-" in cache and cache.get("has_self_edges") is False: cache["total_recip+"] = cache["total_recip-"] elif "recip_degrees+" in cache: - cache["total_recip+"] = cache["recip_degrees+"].reduce(allow_empty=False).value + cache["total_recip+"] = cache["recip_degrees+"].reduce().get(0) else: if "AT" in cache: AT = cache["AT"] else: AT = A.T - cache["total_recip+"] = binary.pair(A & AT).reduce_scalar(allow_empty=False).value + cache["total_recip+"] = binary.pair(A & AT).reduce_scalar().get(0) if "has_self_edges" not in cache and "total_recip-" in cache: cache["has_self_edges"] = cache["total_recip+"] > cache["total_recip-"] if cache.get("has_self_edges") is False: @@ -343,12 +357,13 @@ def get_total_recipp(G, mask=None): def get_total_recipm(G, mask=None): + """C = select.offdiag(A) ; pair(C & C.T).reduce_scalar()""" cache = G._cache if "total_recip-" not in cache: if "total_recip+" in cache and cache.get("has_self_edges") is False: cache["total_recip-"] = cache["total_recip+"] else: - cache["total_recip-"] = G.get_property("recip_degrees-").reduce(allow_empty=False).value + cache["total_recip-"] = G.get_property("recip_degrees-").reduce().get(0) if "has_self_edges" not in cache and "total_recip+" in cache: cache["has_self_edges"] = cache["total_recip+"] > cache["total_recip-"] if cache.get("has_self_edges") is False: @@ -357,6 +372,7 @@ def get_total_recipm(G, mask=None): def has_self_edges(G, mask=None): + """A.diag().nvals > 0""" A = G._A cache = G._cache if "has_self_edges" not in cache: @@ -383,21 +399,13 @@ def has_self_edges(G, mask=None): elif "recip_degrees-" in cache and "recip_degrees+" in cache: cache["has_self_edges"] = not cache["recip_degrees-"].isequal(cache["recip_degrees+"]) elif "row_degrees-" in cache: - cache["has_self_edges"] = ( - cache["row_degrees-"].reduce(allow_empty=False).value < A.nvals - ) + cache["has_self_edges"] = cache["row_degrees-"].reduce().get(0) < A.nvals elif "column_degrees-" in cache: - cache["has_self_edges"] = ( - cache["column_degrees-"].reduce(allow_empty=False).value < A.nvals - ) + cache["has_self_edges"] = cache["column_degrees-"].reduce().get(0) < A.nvals elif "total_degrees-" in cache: - cache["has_self_edges"] = ( - cache["total_degrees-"].reduce(allow_empty=False).value < 2 * A.nvals - ) + cache["has_self_edges"] = cache["total_degrees-"].reduce().get(0) < 2 * A.nvals elif "total_degrees+" in cache: - cache["has_self_edges"] = ( - cache["total_degrees+"].reduce(allow_empty=False).value > 2 * A.nvals - ) + cache["has_self_edges"] = cache["total_degrees+"].reduce().get(0) > 2 * A.nvals else: G.get_property("diag") return cache["has_self_edges"] @@ -463,6 +471,7 @@ class DiGraph: key: i for i, key in enumerate( [ + "A", "AT", "offdiag", "U+", @@ -487,6 +496,7 @@ class DiGraph: ) _get_property = AutoDict( { + "A": get_A, "AT": get_AT, "offdiag": get_offdiag, "U+": get_Up, @@ -527,14 +537,19 @@ def __init__(self, incoming_graph_data=None, **attr): # Graphblas-specific methods from_networkx = classmethod(_utils.from_networkx) from_graphblas = classmethod(_utils.from_graphblas) + id_to_key = property(_utils.id_to_key) get_property = _utils.get_property get_properties = _utils.get_properties dict_to_vector = _utils.dict_to_vector list_to_vector = _utils.list_to_vector list_to_mask = _utils.list_to_mask + list_to_ids = _utils.list_to_ids + matrix_to_dicts = _utils.matrix_to_dicts + to_networkx = _utils.to_networkx vector_to_dict = _utils.vector_to_dict _cacheit = _utils._cacheit + # NetworkX methods def to_directed_class(self): return DiGraph @@ -573,3 +588,6 @@ def is_multigraph(self): def is_directed(self): return True + + +__all__ = get_all(__name__) diff --git a/graphblas_algorithms/classes/graph.py b/graphblas_algorithms/classes/graph.py index 2be2a7f..37e4513 100644 --- a/graphblas_algorithms/classes/graph.py +++ b/graphblas_algorithms/classes/graph.py @@ -4,18 +4,26 @@ from graphblas import Matrix, Vector, select import graphblas_algorithms as ga +from graphblas_algorithms.utils import get_all from . import _utils from ._caching import get_reduce_to_scalar, get_reduce_to_vector +def get_A(G, mask=None): + """A""" + return G._A + + def get_AT(G, mask=None): + """A.T""" A = G._A G._cache["AT"] = A return A def get_offdiag(G, mask=None): + """select.offdiag(A)""" A = G._A cache = G._cache if "offdiag" not in cache: @@ -31,6 +39,7 @@ def get_offdiag(G, mask=None): def get_Up(G, mask=None): + """select.triu(A)""" A = G._A cache = G._cache if "U+" not in cache: @@ -46,6 +55,7 @@ def get_Up(G, mask=None): def get_Lp(G, mask=None): + """select.tril(A)""" A = G._A cache = G._cache if "L+" not in cache: @@ -61,6 +71,7 @@ def get_Lp(G, mask=None): def get_Um(G, mask=None): + """select.triu(A, 1)""" A = G._A cache = G._cache if "U-" not in cache: @@ -81,6 +92,7 @@ def get_Um(G, mask=None): def get_Lm(G, mask=None): + """select.tril(A, -1)""" A = G._A cache = G._cache if "L-" not in cache: @@ -101,6 +113,7 @@ def get_Lm(G, mask=None): def get_diag(G, mask=None): + """A.diag()""" A = G._A cache = G._cache if "diag" not in cache: @@ -118,6 +131,7 @@ def get_diag(G, mask=None): def has_self_edges(G, mask=None): + """A.diag().nvals > 0""" A = G._A cache = G._cache if "has_self_edges" not in cache: @@ -183,6 +197,7 @@ class Graph: key: i for i, key in enumerate( [ + "A", "AT", "offdiag", "U+", @@ -199,6 +214,7 @@ class Graph: ) _get_property = AutoDict( { + "A": get_A, "AT": get_AT, "offdiag": get_offdiag, "U+": get_Up, @@ -235,14 +251,19 @@ def __init__(self, incoming_graph_data=None, **attr): # Graphblas-specific methods from_networkx = classmethod(_utils.from_networkx) from_graphblas = classmethod(_utils.from_graphblas) + id_to_key = property(_utils.id_to_key) get_property = _utils.get_property get_properties = _utils.get_properties dict_to_vector = _utils.dict_to_vector list_to_vector = _utils.list_to_vector list_to_mask = _utils.list_to_mask + list_to_ids = _utils.list_to_ids + matrix_to_dicts = _utils.matrix_to_dicts + to_networkx = _utils.to_networkx vector_to_dict = _utils.vector_to_dict _cacheit = _utils._cacheit + # NetworkX methods def to_directed_class(self): return ga.DiGraph @@ -281,3 +302,6 @@ def is_multigraph(self): def is_directed(self): return False + + +__all__ = get_all(__name__) diff --git a/graphblas_algorithms/utils/__init__.py b/graphblas_algorithms/utils/__init__.py index f598751..345b7ef 100644 --- a/graphblas_algorithms/utils/__init__.py +++ b/graphblas_algorithms/utils/__init__.py @@ -1 +1,2 @@ -from .decorators import not_implemented_for +from ._misc import * +from .decorators import * diff --git a/graphblas_algorithms/utils/_misc.py b/graphblas_algorithms/utils/_misc.py new file mode 100644 index 0000000..bf05029 --- /dev/null +++ b/graphblas_algorithms/utils/_misc.py @@ -0,0 +1,9 @@ +from importlib import import_module + +__all__ = ["get_all"] + + +def get_all(name): + this = import_module(name) + that = import_module(name.replace("graphblas_algorithms", "networkx", 1)) + return [key for key in that.__all__ if key in this.__dict__] diff --git a/graphblas_algorithms/utils/decorators.py b/graphblas_algorithms/utils/decorators.py index 5aee3a2..90dbd9f 100644 --- a/graphblas_algorithms/utils/decorators.py +++ b/graphblas_algorithms/utils/decorators.py @@ -1,6 +1,8 @@ from graphblas import Matrix from networkx.utils.decorators import not_implemented_for as _not_implemented_for +from ._misc import get_all + def not_implemented_for(*graph_types): rv = _not_implemented_for(*graph_types) @@ -15,3 +17,6 @@ def inner(g): rv._func = inner return rv + + +__all__ = get_all(__name__) diff --git a/requirements.txt b/requirements.txt index d6ac249..f0bd677 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ -python-graphblas >=2022.4.2 +python-graphblas >=2022.5.0 networkx