diff --git a/.codecov.yml b/.codecov.yml new file mode 100644 index 0000000..4fd4800 --- /dev/null +++ b/.codecov.yml @@ -0,0 +1,14 @@ +coverage: + status: + project: + default: + informational: true + patch: + default: + informational: true + changes: false +comment: + layout: "header, diff" + behavior: default +github_checks: + annotations: false diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 5ef2b10..97bb856 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -1,10 +1,12 @@ +# Rely on pre-commit.ci instead name: Lint via pre-commit on: - pull_request: - push: - branches-ignore: - - main + workflow_dispatch: + # pull_request: + # push: + # branches-ignore: + # - main permissions: contents: read @@ -14,8 +16,8 @@ jobs: name: pre-commit-hooks runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: python-version: "3.10" - uses: pre-commit/action@v3.0.0 diff --git a/.github/workflows/publish_pypi.yml b/.github/workflows/publish_pypi.yml index d3409c2..f848ad6 100644 --- a/.github/workflows/publish_pypi.yml +++ b/.github/workflows/publish_pypi.yml @@ -14,20 +14,20 @@ jobs: shell: bash -l {0} steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 0 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: - python-version: "3.8" + python-version: "3.10" - name: Install build dependencies run: | python -m pip install --upgrade pip python -m pip install build twine - name: Build wheel and sdist run: python -m build --sdist --wheel - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: releases path: dist @@ -35,7 +35,7 @@ jobs: - name: Check with twine run: python -m twine check --strict dist/* - name: Publish to PyPI - uses: pypa/gh-action-pypi-publish@v1.6.4 + uses: pypa/gh-action-pypi-publish@v1.8.11 with: user: __token__ password: ${{ secrets.PYPI_TOKEN }} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d22022e..47ca7bc 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -15,31 +15,52 @@ jobs: fail-fast: true matrix: os: ["ubuntu-latest", "macos-latest", "windows-latest"] - python-version: ["3.8", "3.9", "3.10"] + python-version: ["3.10", "3.11", "3.12"] steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 0 - - name: Conda - uses: conda-incubator/setup-miniconda@v2 + - name: Setup mamba + uses: conda-incubator/setup-miniconda@v3 + id: setup_mamba + continue-on-error: true + with: + miniforge-variant: Mambaforge + miniforge-version: latest + use-mamba: true + python-version: ${{ matrix.python-version }} + channels: conda-forge,${{ contains(matrix.python-version, 'pypy') && 'defaults' || 'nodefaults' }} + channel-priority: ${{ contains(matrix.python-version, 'pypy') && 'flexible' || 'strict' }} + activate-environment: graphblas + auto-activate-base: false + - name: Setup conda + uses: conda-incubator/setup-miniconda@v3 + id: setup_conda + if: steps.setup_mamba.outcome == 'failure' + continue-on-error: false with: auto-update-conda: true python-version: ${{ matrix.python-version }} - channels: conda-forge - activate-environment: testing + channels: conda-forge,${{ contains(matrix.python-version, 'pypy') && 'defaults' || 'nodefaults' }} + channel-priority: ${{ contains(matrix.python-version, 'pypy') && 'flexible' || 'strict' }} + activate-environment: graphblas + auto-activate-base: false - name: Install dependencies run: | - conda install -c conda-forge python-graphblas scipy pandas pytest-cov pytest-randomly + $(command -v mamba || command -v conda) install python-suitesparse-graphblas scipy pandas donfig pyyaml numpy python-graphblas \ + pytest-cov pytest-randomly pytest-mpl networkx # matplotlib lxml pygraphviz pydot sympy # Extra networkx deps we don't need yet - pip install git+https://github.com/networkx/networkx.git@main --no-deps + # Sometimes we prefer to use the latest release of NetworkX or the latest development from github + # pip install git+https://github.com/networkx/networkx.git@main --no-deps pip install -e . --no-deps - name: PyTest run: | python -c 'import sys, graphblas_algorithms; assert "networkx" not in sys.modules' coverage run --branch -m pytest --color=yes -v --check-structure coverage report - NETWORKX_GRAPH_CONVERT=graphblas pytest --color=yes --pyargs networkx --cov --cov-append + # NETWORKX_GRAPH_CONVERT=graphblas pytest --color=yes --pyargs networkx --cov --cov-append + ./run_nx_tests.sh --color=yes --cov --cov-append coverage report coverage xml - name: Coverage diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8bf0101..e4525c5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,77 +4,102 @@ # To run: `pre-commit run --all-files` # To update: `pre-commit autoupdate` # - &flake8_dependencies below needs updated manually +ci: + # See: https://pre-commit.ci/#configuration + autofix_prs: false + autoupdate_schedule: quarterly + autoupdate_commit_msg: "chore: update pre-commit hooks" + autofix_commit_msg: "style: pre-commit fixes" + skip: [no-commit-to-branch] fail_fast: true default_language_version: python: python3 repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: - id: check-added-large-files + - id: check-case-conflict + - id: check-merge-conflict + # - id: check-symlinks - id: check-ast - id: check-toml - id: check-yaml - id: debug-statements - id: end-of-file-fixer + exclude_types: [svg] - id: mixed-line-ending - id: trailing-whitespace + - id: name-tests-test + args: ["--pytest-test-first"] - repo: https://github.com/abravalheri/validate-pyproject - rev: v0.12.1 + rev: v0.15 hooks: - id: validate-pyproject name: Validate pyproject.toml - - repo: https://github.com/myint/autoflake - rev: v2.0.1 + # I don't yet trust ruff to do what autoflake does + - repo: https://github.com/PyCQA/autoflake + rev: v2.2.1 hooks: - id: autoflake args: [--in-place] - repo: https://github.com/pycqa/isort - rev: 5.12.0 + rev: 5.13.2 hooks: - id: isort - repo: https://github.com/asottile/pyupgrade - rev: v3.3.1 + rev: v3.15.0 hooks: - id: pyupgrade - args: [--py38-plus] + args: [--py310-plus] - repo: https://github.com/MarcoGorelli/auto-walrus rev: v0.2.2 hooks: - id: auto-walrus args: [--line-length, "100"] - repo: https://github.com/psf/black - rev: 23.1.0 + rev: 23.12.1 hooks: - id: black # - id: black-jupyter + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.1.9 + hooks: + - id: ruff + args: [--fix-only, --show-fixes] - repo: https://github.com/PyCQA/flake8 - rev: 6.0.0 + rev: 6.1.0 hooks: - id: flake8 additional_dependencies: &flake8_dependencies # These versions need updated manually - - flake8==6.0.0 - - flake8-comprehensions==3.10.1 - - flake8-bugbear==23.2.13 - - flake8-simplify==0.19.3 + - flake8==6.1.0 + - flake8-bugbear==23.12.2 + - flake8-simplify==0.21.0 - repo: https://github.com/asottile/yesqa - rev: v1.4.0 + rev: v1.5.0 hooks: - id: yesqa additional_dependencies: *flake8_dependencies - repo: https://github.com/codespell-project/codespell - rev: v2.2.2 + rev: v2.2.6 hooks: - id: codespell types_or: [python, rst, markdown] additional_dependencies: [tomli] files: ^(graphblas_algorithms|docs)/ - - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.253 + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.1.9 hooks: - id: ruff + # `pyroma` may help keep our package standards up to date if best practices change. + # This is probably a "low value" check though and safe to remove if we want faster pre-commit. + - repo: https://github.com/regebro/pyroma + rev: "4.2" + hooks: + - id: pyroma + args: [-n, "10", .] - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: - id: no-commit-to-branch # no commit directly to main diff --git a/MANIFEST.in b/MANIFEST.in index b8af874..c69947d 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,5 @@ recursive-include graphblas_algorithms *.py +prune docs include setup.py include README.md include LICENSE diff --git a/README.md b/README.md index a4dfd50..ed66df3 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,29 @@ -# **GraphBLAS Algorithms** - +![GraphBLAS Algorithms](https://raw.githubusercontent.com/python-graphblas/graphblas-algorithms/main/docs/_static/img/logo-name-medium.svg) +
[![conda-forge](https://img.shields.io/conda/vn/conda-forge/graphblas-algorithms.svg)](https://anaconda.org/conda-forge/graphblas-algorithms) [![pypi](https://img.shields.io/pypi/v/graphblas-algorithms.svg)](https://pypi.python.org/pypi/graphblas-algorithms/) +[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/graphblas-algorithms)](https://pypi.python.org/pypi/graphblas-algorithms/) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/python-graphblas/graphblas-algorithms/blob/main/LICENSE) +
[![Tests](https://github.com/python-graphblas/graphblas-algorithms/workflows/Tests/badge.svg?branch=main)](https://github.com/python-graphblas/graphblas-algorithms/actions) [![Coverage](https://codecov.io/gh/python-graphblas/graphblas-algorithms/branch/main/graph/badge.svg)](https://codecov.io/gh/python-graphblas/graphblas-algorithms) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.7329185.svg)](https://doi.org/10.5281/zenodo.7329185) [![Discord](https://img.shields.io/badge/Chat-Discord-blue)](https://discord.com/invite/vur45CbwMz) -GraphBLAS algorithms written in Python with [Python-graphblas](https://python-graphblas.readthedocs.io/en/latest/). We are trying to target the NetworkX API algorithms where possible. +`graphblas-algorithms` is a collection of GraphBLAS algorithms written using +[`python-graphblas`](https://python-graphblas.readthedocs.io/en/latest/). +It may be used directly or as an experimental +[backend to NetworkX](https://networkx.org/documentation/stable/reference/classes/index.html#backends). + +Why use GraphBLAS Algorithms? Because it is *fast*, *flexible*, and *familiar* by using the NetworkX API. + +Are we missing any [algorithms](#Plugin-Algorithms) that you want? +[Please let us know!](https://github.com/python-graphblas/graphblas-algorithms/issues) +
+GraphBLAS vs NetworkX +
+GraphBLAS vs igraph ### Installation ``` @@ -98,69 +112,138 @@ The following NetworkX algorithms have been implemented by graphblas-algorithms and can be used following the dispatch pattern shown above. -- Boundary - - edge_boundary - - node_boundary -- Centrality - - degree_centrality - - eigenvector_centrality - - in_degree_centrality - - katz_centrality - - out_degree_centrality -- Cluster - - average_clustering - - clustering - - generalized_degree - - square_clustering - - transitivity - - triangles -- Community - - inter_community_edges - - intra_community_edges -- Core - - k_truss -- Cuts - - boundary_expansion - - conductance - - cut_size - - edge_expansion - - mixing_expansion - - node_expansion - - normalized_cut_size - - volume -- DAG - - ancestors - - descendants -- Dominating - - is_dominating_set -- Isolate - - is_isolate - - isolates - - number_of_isolates -- Link Analysis - - hits - - pagerank -- Reciprocity - - overall_reciprocity - - reciprocity -- Regular - - is_k_regular - - is_regular -- Shortest Paths - - floyd_warshall - - floyd_warshall_predecessor_and_distance - - single_source_bellman_ford_path_length - - all_pairs_bellman_ford_path_length - - has_path -- Simple Paths - - is_simple_path -- S Metric - - s_metric -- Structural Holes - - mutual_weight -- Tournament - - is_tournament - - score_sequence - - tournament_matrix -- Triads - - is_triad +[//]: # (Begin auto-generated code) + +``` +graphblas_algorithms.nxapi +├── boundary +│ ├── edge_boundary +│ └── node_boundary +├── centrality +│ ├── degree_alg +│ │ ├── degree_centrality +│ │ ├── in_degree_centrality +│ │ └── out_degree_centrality +│ ├── eigenvector +│ │ └── eigenvector_centrality +│ └── katz +│ └── katz_centrality +├── cluster +│ ├── average_clustering +│ ├── clustering +│ ├── generalized_degree +│ ├── square_clustering +│ ├── transitivity +│ └── triangles +├── community +│ └── quality +│ ├── inter_community_edges +│ └── intra_community_edges +├── components +│ ├── connected +│ │ ├── is_connected +│ │ └── node_connected_component +│ └── weakly_connected +│ └── is_weakly_connected +├── core +│ └── k_truss +├── cuts +│ ├── boundary_expansion +│ ├── conductance +│ ├── cut_size +│ ├── edge_expansion +│ ├── mixing_expansion +│ ├── node_expansion +│ ├── normalized_cut_size +│ └── volume +├── dag +│ ├── ancestors +│ └── descendants +├── dominating +│ └── is_dominating_set +├── efficiency_measures +│ └── efficiency +├── generators +│ └── ego +│ └── ego_graph +├── isolate +│ ├── is_isolate +│ ├── isolates +│ └── number_of_isolates +├── isomorphism +│ └── isomorph +│ ├── fast_could_be_isomorphic +│ └── faster_could_be_isomorphic +├── linalg +│ ├── bethehessianmatrix +│ │ └── bethe_hessian_matrix +│ ├── graphmatrix +│ │ └── adjacency_matrix +│ ├── laplacianmatrix +│ │ ├── laplacian_matrix +│ │ └── normalized_laplacian_matrix +│ └── modularitymatrix +│ ├── directed_modularity_matrix +│ └── modularity_matrix +├── link_analysis +│ ├── hits_alg +│ │ └── hits +│ └── pagerank_alg +│ ├── google_matrix +│ └── pagerank +├── lowest_common_ancestors +│ └── lowest_common_ancestor +├── operators +│ ├── binary +│ │ ├── compose +│ │ ├── difference +│ │ ├── disjoint_union +│ │ ├── full_join +│ │ ├── intersection +│ │ ├── symmetric_difference +│ │ └── union +│ └── unary +│ ├── complement +│ └── reverse +├── reciprocity +│ ├── overall_reciprocity +│ └── reciprocity +├── regular +│ ├── is_k_regular +│ └── is_regular +├── shortest_paths +│ ├── dense +│ │ ├── floyd_warshall +│ │ ├── floyd_warshall_numpy +│ │ └── floyd_warshall_predecessor_and_distance +│ ├── generic +│ │ └── has_path +│ ├── unweighted +│ │ ├── all_pairs_shortest_path_length +│ │ ├── single_source_shortest_path_length +│ │ └── single_target_shortest_path_length +│ └── weighted +│ ├── all_pairs_bellman_ford_path_length +│ ├── bellman_ford_path +│ ├── bellman_ford_path_length +│ ├── negative_edge_cycle +│ └── single_source_bellman_ford_path_length +├── simple_paths +│ └── is_simple_path +├── smetric +│ └── s_metric +├── structuralholes +│ └── mutual_weight +├── tournament +│ ├── is_tournament +│ ├── score_sequence +│ └── tournament_matrix +├── traversal +│ └── breadth_first_search +│ ├── bfs_layers +│ └── descendants_at_distance +└── triads + └── is_triad +``` + +[//]: # (End auto-generated code) diff --git a/_nx_graphblas/__init__.py b/_nx_graphblas/__init__.py new file mode 100644 index 0000000..6ffa061 --- /dev/null +++ b/_nx_graphblas/__init__.py @@ -0,0 +1,107 @@ +def get_info(): + return { + "backend_name": "graphblas", + "project": "graphblas-algorithms", + "package": "graphblas_algorithms", + "url": "https://github.com/python-graphblas/graphblas-algorithms", + "short_summary": "OpenMP-enabled sparse linear algebra backend.", + # "description": "TODO", + "functions": { + "adjacency_matrix": {}, + "all_pairs_bellman_ford_path_length": { + "extra_parameters": { + "chunksize : int or str, optional": "Split the computation into chunks; " + 'may specify size as string or number of rows. Default "10 MiB"', + }, + }, + "all_pairs_shortest_path_length": { + "extra_parameters": { + "chunksize : int or str, optional": "Split the computation into chunks; " + 'may specify size as string or number of rows. Default "10 MiB"', + }, + }, + "ancestors": {}, + "average_clustering": {}, + "bellman_ford_path": {}, + "bellman_ford_path_length": {}, + "bethe_hessian_matrix": {}, + "bfs_layers": {}, + "boundary_expansion": {}, + "clustering": {}, + "complement": {}, + "compose": {}, + "conductance": {}, + "cut_size": {}, + "degree_centrality": {}, + "descendants": {}, + "descendants_at_distance": {}, + "difference": {}, + "directed_modularity_matrix": {}, + "disjoint_union": {}, + "edge_boundary": {}, + "edge_expansion": {}, + "efficiency": {}, + "ego_graph": {}, + "eigenvector_centrality": {}, + "fast_could_be_isomorphic": {}, + "faster_could_be_isomorphic": {}, + "floyd_warshall": {}, + "floyd_warshall_numpy": {}, + "floyd_warshall_predecessor_and_distance": {}, + "full_join": {}, + "generalized_degree": {}, + "google_matrix": {}, + "has_path": {}, + "hits": {}, + "in_degree_centrality": {}, + "inter_community_edges": {}, + "intersection": {}, + "intra_community_edges": {}, + "is_connected": {}, + "is_dominating_set": {}, + "is_isolate": {}, + "is_k_regular": {}, + "isolates": {}, + "is_regular": {}, + "is_simple_path": {}, + "is_tournament": {}, + "is_triad": {}, + "is_weakly_connected": {}, + "katz_centrality": {}, + "k_truss": {}, + "laplacian_matrix": {}, + "lowest_common_ancestor": {}, + "mixing_expansion": {}, + "modularity_matrix": {}, + "mutual_weight": {}, + "negative_edge_cycle": {}, + "node_boundary": {}, + "node_connected_component": {}, + "node_expansion": {}, + "normalized_cut_size": {}, + "normalized_laplacian_matrix": {}, + "number_of_isolates": {}, + "out_degree_centrality": {}, + "overall_reciprocity": {}, + "pagerank": {}, + "reciprocity": {}, + "reverse": {}, + "score_sequence": {}, + "single_source_bellman_ford_path_length": {}, + "single_source_shortest_path_length": {}, + "single_target_shortest_path_length": {}, + "s_metric": {}, + "square_clustering": { + "extra_parameters": { + "chunksize : int or str, optional": "Split the computation into chunks; " + 'may specify size as string or number of rows. Default "256 MiB"', + }, + }, + "symmetric_difference": {}, + "tournament_matrix": {}, + "transitivity": {}, + "triangles": {}, + "union": {}, + "volume": {}, + }, + } diff --git a/docs/_static/img/graphblas-vs-igraph.png b/docs/_static/img/graphblas-vs-igraph.png new file mode 100755 index 0000000..4c253d1 Binary files /dev/null and b/docs/_static/img/graphblas-vs-igraph.png differ diff --git a/docs/_static/img/graphblas-vs-networkx.png b/docs/_static/img/graphblas-vs-networkx.png new file mode 100755 index 0000000..bf9cb69 Binary files /dev/null and b/docs/_static/img/graphblas-vs-networkx.png differ diff --git a/docs/_static/img/logo-name-medium.svg b/docs/_static/img/logo-name-medium.svg new file mode 100644 index 0000000..81b7b01 --- /dev/null +++ b/docs/_static/img/logo-name-medium.svg @@ -0,0 +1 @@ + diff --git a/environment.yml b/environment.yml index 9342aa4..06142d1 100644 --- a/environment.yml +++ b/environment.yml @@ -42,6 +42,8 @@ dependencies: - pydot - pygraphviz - sympy + # For updating algorithm list in README + - rich # For linting - pre-commit # For testing diff --git a/graphblas_algorithms/__init__.py b/graphblas_algorithms/__init__.py index f9bbcf4..e86efa9 100644 --- a/graphblas_algorithms/__init__.py +++ b/graphblas_algorithms/__init__.py @@ -1,6 +1,8 @@ import importlib.metadata from .classes import * +from .generators import * +from .linalg import * from .algorithms import * # isort:skip diff --git a/graphblas_algorithms/algorithms/__init__.py b/graphblas_algorithms/algorithms/__init__.py index 0e4c9ee..be06324 100644 --- a/graphblas_algorithms/algorithms/__init__.py +++ b/graphblas_algorithms/algorithms/__init__.py @@ -3,12 +3,17 @@ from .centrality import * from .cluster import * from .community import * +from .components import * from .core import * from .cuts import * from .dag import * from .dominating import * +from .efficiency_measures import * from .isolate import * +from .isomorphism import * from .link_analysis import * +from .lowest_common_ancestors import * +from .operators import * from .reciprocity import * from .regular import * from .shortest_paths import * @@ -16,4 +21,5 @@ from .smetric import * from .structuralholes import * from .tournament import * +from .traversal import * from .triads import * diff --git a/graphblas_algorithms/algorithms/_bfs.py b/graphblas_algorithms/algorithms/_bfs.py new file mode 100644 index 0000000..8189aae --- /dev/null +++ b/graphblas_algorithms/algorithms/_bfs.py @@ -0,0 +1,204 @@ +"""BFS routines used by other algorithms""" + +import numpy as np +from graphblas import Matrix, Vector, binary, indexunary, replace, semiring, unary +from graphblas.semiring import any_pair + + +def _get_cutoff(n, cutoff): + if cutoff is None or cutoff >= n: + return n # Everything + return cutoff + 1 # Inclusive + + +# Push-pull optimization is possible, but annoying to implement +def _bfs_plain( + G, source=None, target=None, *, index=None, cutoff=None, transpose=False, name="bfs_plain" +): + if source is not None: + if source not in G._key_to_id: + raise KeyError(f"The node {source} is not in the graph") + index = G._key_to_id[source] + if target is not None: + if target not in G._key_to_id: + raise KeyError(f"The node {target} is not in the graph") + dst_id = G._key_to_id[target] + else: + dst_id = None + A = G.get_property("offdiag") + if transpose and G.is_directed(): + A = A.T # TODO: should we use "AT" instead? + n = A.nrows + v = Vector(bool, n, name=name) + q = Vector(bool, n, name="q") + v[index] = True + q[index] = True + any_pair_bool = any_pair[bool] + cutoff = _get_cutoff(n, cutoff) + for _i in range(1, cutoff): + q(~v.S, replace) << any_pair_bool(q @ A) + if q.nvals == 0: + break + v(q.S) << True + if dst_id is not None and dst_id in q: + break + return v + + +def _bfs_level(G, source, target=None, *, cutoff=None, transpose=False, dtype=int): + if dtype == bool: + dtype = int + index = G._key_to_id[source] + if target is not None: + if target not in G._key_to_id: + raise KeyError(f"The node {target} is not in the graph") + dst_id = G._key_to_id[target] + else: + dst_id = None + A = G.get_property("offdiag") + if transpose and G.is_directed(): + A = A.T # TODO: should we use "AT" instead? + n = A.nrows + v = Vector(dtype, n, name="bfs_level") + q = Vector(bool, n, name="q") + v[index] = 0 + q[index] = True + any_pair_bool = any_pair[bool] + cutoff = _get_cutoff(n, cutoff) + for i in range(1, cutoff): + q(~v.S, replace) << any_pair_bool(q @ A) + if q.nvals == 0: + break + v(q.S) << i + if dst_id is not None and dst_id in q: + break + return v + + +def _bfs_levels(G, nodes, *, cutoff=None, dtype=int): + if dtype == bool: + dtype = int + A = G.get_property("offdiag") + n = A.nrows + if nodes is None: + # TODO: `D = Vector.from_scalar(0, n, dtype).diag()` + D = Vector(dtype, n, name="bfs_levels_vector") + D << 0 + D = D.diag(name="bfs_levels") + else: + ids = G.list_to_ids(nodes) + D = Matrix.from_coo( + np.arange(len(ids), dtype=np.uint64), + ids, + 0, + dtype, + nrows=len(ids), + ncols=n, + name="bfs_levels", + ) + Q = unary.one[bool](D).new(name="Q") + any_pair_bool = any_pair[bool] + cutoff = _get_cutoff(n, cutoff) + for i in range(1, cutoff): + Q(~D.S, replace) << any_pair_bool(Q @ A) + if Q.nvals == 0: + break + D(Q.S) << i + return D + + +def _bfs_parent(G, source, target=None, *, cutoff=None, transpose=False, dtype=int): + if dtype == bool: + dtype = int + index = G._key_to_id[source] + if target is not None: + dst_id = G._key_to_id[target] + else: + dst_id = None + A = G.get_property("offdiag") + if transpose and G.is_directed(): + A = A.T # TODO: should we use "AT" instead? + n = A.nrows + v = Vector(dtype, n, name="bfs_parent") + q = Vector(dtype, n, name="q") + v[index] = index + q[index] = index + min_first = semiring.min_first[v.dtype] + index = indexunary.index[v.dtype] + cutoff = _get_cutoff(n, cutoff) + for _i in range(1, cutoff): + q(~v.S, replace) << min_first(q @ A) + if q.nvals == 0: + break + v(q.S) << q + if dst_id is not None and dst_id in q: + break + q << index(q) + return v + + +# TODO: benchmark this and the version commented out below +def _bfs_plain_bidirectional(G, source): + # Bi-directional BFS w/o symmetrizing the adjacency matrix + index = G._key_to_id[source] + A = G.get_property("offdiag") + # XXX: should we use `AT` if available? + n = A.nrows + v = Vector(bool, n, name="bfs_plain") + q_out = Vector(bool, n, name="q_out") + q_in = Vector(bool, n, name="q_in") + v[index] = True + q_in[index] = True + any_pair_bool = any_pair[bool] + is_out_empty = True + is_in_empty = False + for _i in range(1, n): + # Traverse out-edges from the most recent `q_in` and `q_out` + if is_out_empty: + q_out(~v.S) << any_pair_bool(q_in @ A) + else: + q_out << binary.any(q_out | q_in) + q_out(~v.S, replace) << any_pair_bool(q_out @ A) + is_out_empty = q_out.nvals == 0 + if not is_out_empty: + v(q_out.S) << True + elif is_in_empty: + break + # Traverse in-edges from the most recent `q_in` and `q_out` + if is_in_empty: + q_in(~v.S) << any_pair_bool(A @ q_out) + else: + q_in << binary.any(q_out | q_in) + q_in(~v.S, replace) << any_pair_bool(A @ q_in) + is_in_empty = q_in.nvals == 0 + if not is_in_empty: + v(q_in.S) << True + elif is_out_empty: + break + return v + + +""" +def _bfs_plain_bidirectional(G, source): + # Bi-directional BFS w/o symmetrizing the adjacency matrix + index = G._key_to_id[source] + A = G.get_property("offdiag") + n = A.nrows + v = Vector(bool, n, name="bfs_plain") + q = Vector(bool, n, name="q") + q2 = Vector(bool, n, name="q_2") + v[index] = True + q[index] = True + any_pair_bool = any_pair[bool] + for _i in range(1, n): + q2(~v.S, replace) << any_pair_bool(q @ A) + v(q2.S) << True + q(~v.S, replace) << any_pair_bool(A @ q) + if q.nvals == 0: + if q2.nvals == 0: + break + q, q2 = q2, q + elif q2.nvals != 0: + q << binary.any(q | q2) + return v +""" diff --git a/graphblas_algorithms/algorithms/centrality/eigenvector.py b/graphblas_algorithms/algorithms/centrality/eigenvector.py index 5a2ee78..e9385f3 100644 --- a/graphblas_algorithms/algorithms/centrality/eigenvector.py +++ b/graphblas_algorithms/algorithms/centrality/eigenvector.py @@ -1,11 +1,7 @@ from graphblas import Vector -from graphblas_algorithms.algorithms._helpers import is_converged, normalize -from graphblas_algorithms.algorithms.exceptions import ( - ConvergenceFailure, - GraphBlasAlgorithmException, - PointlessConcept, -) +from .._helpers import is_converged, normalize +from ..exceptions import ConvergenceFailure, GraphBlasAlgorithmException, PointlessConcept __all__ = ["eigenvector_centrality"] @@ -27,7 +23,7 @@ def eigenvector_centrality(G, max_iter=100, tol=1.0e-6, nstart=None, name="eigen # Power iteration: make up to max_iter iterations A = G._A xprev = Vector(float, N, name="x_prev") - for _ in range(max_iter): + for _i in range(max_iter): xprev << x x += x @ A normalize(x, "L2") diff --git a/graphblas_algorithms/algorithms/centrality/katz.py b/graphblas_algorithms/algorithms/centrality/katz.py index 3d21331..8087e85 100644 --- a/graphblas_algorithms/algorithms/centrality/katz.py +++ b/graphblas_algorithms/algorithms/centrality/katz.py @@ -2,11 +2,8 @@ from graphblas.core.utils import output_type from graphblas.semiring import plus_first, plus_times -from graphblas_algorithms.algorithms._helpers import is_converged, normalize -from graphblas_algorithms.algorithms.exceptions import ( - ConvergenceFailure, - GraphBlasAlgorithmException, -) +from .._helpers import is_converged, normalize +from ..exceptions import ConvergenceFailure, GraphBlasAlgorithmException __all__ = ["katz_centrality"] @@ -44,7 +41,7 @@ def katz_centrality( # Power iteration: make up to max_iter iterations xprev = Vector(float, N, name="x_prev") - for _ in range(max_iter): + for _i in range(max_iter): xprev, x = x, xprev # x << alpha * semiring(xprev @ A) + beta x << semiring(xprev @ A) diff --git a/graphblas_algorithms/algorithms/components/__init__.py b/graphblas_algorithms/algorithms/components/__init__.py new file mode 100644 index 0000000..bb0aea6 --- /dev/null +++ b/graphblas_algorithms/algorithms/components/__init__.py @@ -0,0 +1,2 @@ +from .connected import * +from .weakly_connected import * diff --git a/graphblas_algorithms/algorithms/components/connected.py b/graphblas_algorithms/algorithms/components/connected.py new file mode 100644 index 0000000..3f19b86 --- /dev/null +++ b/graphblas_algorithms/algorithms/components/connected.py @@ -0,0 +1,12 @@ +from .._bfs import _bfs_plain +from ..exceptions import PointlessConcept + + +def is_connected(G): + if len(G) == 0: + raise PointlessConcept("Connectivity is undefined for the null graph.") + return _bfs_plain(G, next(iter(G))).nvals == len(G) + + +def node_connected_component(G, n): + return _bfs_plain(G, n) diff --git a/graphblas_algorithms/algorithms/components/weakly_connected.py b/graphblas_algorithms/algorithms/components/weakly_connected.py new file mode 100644 index 0000000..306d96e --- /dev/null +++ b/graphblas_algorithms/algorithms/components/weakly_connected.py @@ -0,0 +1,8 @@ +from .._bfs import _bfs_plain_bidirectional +from ..exceptions import PointlessConcept + + +def is_weakly_connected(G): + if len(G) == 0: + raise PointlessConcept("Connectivity is undefined for the null graph.") + return _bfs_plain_bidirectional(G, next(iter(G))).nvals == len(G) diff --git a/graphblas_algorithms/algorithms/core.py b/graphblas_algorithms/algorithms/core.py index 8133c71..a6ff26d 100644 --- a/graphblas_algorithms/algorithms/core.py +++ b/graphblas_algorithms/algorithms/core.py @@ -1,11 +1,12 @@ from graphblas import Matrix, monoid, replace, select, semiring -from graphblas_algorithms.classes.graph import Graph +from graphblas_algorithms import Graph __all__ = ["k_truss"] def k_truss(G: Graph, k) -> Graph: + # TODO: should we have an option to keep the output matrix the same size? # Ignore self-edges S = G.get_property("offdiag") @@ -32,6 +33,5 @@ def k_truss(G: Graph, k) -> Graph: Ktruss = C[indices, indices].new() # Convert back to networkx graph with correct node ids - keys = G.list_to_keys(indices) - key_to_id = dict(zip(keys, range(len(indices)))) + key_to_id = G.renumber_key_to_id(indices.tolist()) return Graph(Ktruss, key_to_id=key_to_id) diff --git a/graphblas_algorithms/algorithms/dag.py b/graphblas_algorithms/algorithms/dag.py index 3cceeef..63eb560 100644 --- a/graphblas_algorithms/algorithms/dag.py +++ b/graphblas_algorithms/algorithms/dag.py @@ -1,37 +1,17 @@ -from graphblas import Vector, replace -from graphblas.semiring import lor_pair +from ._bfs import _bfs_plain __all__ = ["descendants", "ancestors"] -# Push-pull optimization is possible, but annoying to implement def descendants(G, source): - if source not in G._key_to_id: - raise KeyError(f"The node {source} is not in the graph") + rv = _bfs_plain(G, source, name="descendants") index = G._key_to_id[source] - A = G.get_property("offdiag") - q = Vector.from_coo(index, True, size=A.nrows, name="q") - rv = q.dup(name="descendants") - for _ in range(A.nrows): - q(~rv.S, replace) << lor_pair(q @ A) - if q.nvals == 0: - break - rv(q.S) << True del rv[index] return rv def ancestors(G, source): - if source not in G._key_to_id: - raise KeyError(f"The node {source} is not in the graph") + rv = _bfs_plain(G, source, transpose=True, name="ancestors") index = G._key_to_id[source] - A = G.get_property("offdiag") - q = Vector.from_coo(index, True, size=A.nrows, name="q") - rv = q.dup(name="descendants") - for _ in range(A.nrows): - q(~rv.S, replace) << lor_pair(A @ q) - if q.nvals == 0: - break - rv(q.S) << True del rv[index] return rv diff --git a/graphblas_algorithms/algorithms/dominating.py b/graphblas_algorithms/algorithms/dominating.py index 60c3426..2894bd8 100644 --- a/graphblas_algorithms/algorithms/dominating.py +++ b/graphblas_algorithms/algorithms/dominating.py @@ -1,8 +1,8 @@ -from graphblas.semiring import lor_pair +from graphblas.semiring import any_pair __all__ = ["is_dominating_set"] def is_dominating_set(G, nbunch): - nbrs = lor_pair(nbunch @ G._A).new(mask=~nbunch.S) # A or A.T? + nbrs = any_pair[bool](nbunch @ G._A).new(mask=~nbunch.S) # A or A.T? return nbrs.size - nbunch.nvals - nbrs.nvals == 0 diff --git a/graphblas_algorithms/algorithms/efficiency_measures.py b/graphblas_algorithms/algorithms/efficiency_measures.py new file mode 100644 index 0000000..3d922ee --- /dev/null +++ b/graphblas_algorithms/algorithms/efficiency_measures.py @@ -0,0 +1,12 @@ +from .exceptions import NoPath +from .shortest_paths.unweighted import bidirectional_shortest_path_length + +__all__ = ["efficiency"] + + +def efficiency(G, u, v): + try: + eff = 1 / bidirectional_shortest_path_length(G, u, v) + except NoPath: + eff = 0 + return eff diff --git a/graphblas_algorithms/algorithms/exceptions.py b/graphblas_algorithms/algorithms/exceptions.py index f4ef352..7c911c9 100644 --- a/graphblas_algorithms/algorithms/exceptions.py +++ b/graphblas_algorithms/algorithms/exceptions.py @@ -14,5 +14,9 @@ class PointlessConcept(GraphBlasAlgorithmException): pass +class NoPath(GraphBlasAlgorithmException): + pass + + class Unbounded(GraphBlasAlgorithmException): pass diff --git a/graphblas_algorithms/algorithms/isomorphism/__init__.py b/graphblas_algorithms/algorithms/isomorphism/__init__.py new file mode 100644 index 0000000..e701b70 --- /dev/null +++ b/graphblas_algorithms/algorithms/isomorphism/__init__.py @@ -0,0 +1 @@ +from .isomorph import * diff --git a/graphblas_algorithms/algorithms/isomorphism/isomorph.py b/graphblas_algorithms/algorithms/isomorphism/isomorph.py new file mode 100644 index 0000000..12e5af4 --- /dev/null +++ b/graphblas_algorithms/algorithms/isomorphism/isomorph.py @@ -0,0 +1,56 @@ +import numpy as np +from graphblas import binary + +from ..cluster import triangles + +__all__ = [ + "fast_could_be_isomorphic", + "faster_could_be_isomorphic", +] + + +def fast_could_be_isomorphic(G1, G2): + if len(G1) != len(G2): + return False + d1 = G1.get_property("total_degrees+" if G1.is_directed() else "degrees+") + d2 = G2.get_property("total_degrees+" if G2.is_directed() else "degrees+") + if d1.nvals != d2.nvals: + return False + t1 = triangles(G1) + t2 = triangles(G2) + if t1.nvals != t2.nvals: + return False + # Make ds and ts the same shape as numpy arrays so we can sort them lexicographically. + if t1.nvals != d1.nvals: + # Assign 0 to t1 where present in d1 but not t1 + t1(~t1.S) << binary.second(d1, 0) + if t2.nvals != d2.nvals: + # Assign 0 to t2 where present in d2 but not t2 + t2(~t2.S) << binary.second(d2, 0) + d1 = d1.to_coo(indices=False)[1] + d2 = d2.to_coo(indices=False)[1] + t1 = t1.to_coo(indices=False)[1] + t2 = t2.to_coo(indices=False)[1] + ind1 = np.lexsort((d1, t1)) + ind2 = np.lexsort((d2, t2)) + if not np.array_equal(d1[ind1], d2[ind2]): + return False + if not np.array_equal(t1[ind1], t2[ind2]): + return False + return True + + +def faster_could_be_isomorphic(G1, G2): + if len(G1) != len(G2): + return False + d1 = G1.get_property("total_degrees+" if G1.is_directed() else "degrees+") + d2 = G2.get_property("total_degrees+" if G2.is_directed() else "degrees+") + if d1.nvals != d2.nvals: + return False + d1 = d1.to_coo(indices=False)[1] + d2 = d2.to_coo(indices=False)[1] + d1.sort() + d2.sort() + if not np.array_equal(d1, d2): + return False + return True diff --git a/graphblas_algorithms/algorithms/link_analysis/hits_alg.py b/graphblas_algorithms/algorithms/link_analysis/hits_alg.py index aadd77e..662ac14 100644 --- a/graphblas_algorithms/algorithms/link_analysis/hits_alg.py +++ b/graphblas_algorithms/algorithms/link_analysis/hits_alg.py @@ -1,7 +1,7 @@ from graphblas import Vector -from graphblas_algorithms.algorithms._helpers import is_converged, normalize -from graphblas_algorithms.algorithms.exceptions import ConvergenceFailure +from .._helpers import is_converged, normalize +from ..exceptions import ConvergenceFailure __all__ = ["hits"] @@ -30,7 +30,7 @@ def hits(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True, *, with_auth a, h = h, a ATA = (A.T @ A).new(name="ATA") # Authority matrix aprev = Vector(float, N, name="a_prev") - for _ in range(max_iter): + for _i in range(max_iter): aprev, a = a, aprev a << ATA @ aprev normalize(a, "Linf") @@ -41,7 +41,7 @@ def hits(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True, *, with_auth raise ConvergenceFailure(max_iter) else: hprev = Vector(float, N, name="h_prev") - for _ in range(max_iter): + for _i in range(max_iter): hprev, h = h, hprev a << hprev @ A h << A @ a diff --git a/graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py b/graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py index 1623819..7391dbe 100644 --- a/graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py +++ b/graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py @@ -1,11 +1,12 @@ -from graphblas import Vector +from graphblas import Matrix, Vector, binary, monoid from graphblas.semiring import plus_first, plus_times from graphblas_algorithms import Graph -from graphblas_algorithms.algorithms._helpers import is_converged -from graphblas_algorithms.algorithms.exceptions import ConvergenceFailure -__all__ = ["pagerank"] +from .._helpers import is_converged +from ..exceptions import ConvergenceFailure + +__all__ = ["pagerank", "google_matrix"] def pagerank( @@ -79,7 +80,7 @@ def pagerank( # Power iteration: make up to max_iter iterations xprev = Vector(float, N, name="x_prev") w = Vector(float, N, name="w") - for _ in range(max_iter): + for _i in range(max_iter): xprev, x = x, xprev # x << alpha * ((xprev * S) @ A + "dangling_weights") + (1 - alpha) * p @@ -98,3 +99,64 @@ def pagerank( x.name = name return x raise ConvergenceFailure(max_iter) + + +def google_matrix( + G: Graph, + alpha=0.85, + personalization=None, + nodelist=None, + dangling=None, + name="google_matrix", +) -> Matrix: + A = G._A + ids = G.list_to_ids(nodelist) + if ids is not None: + A = A[ids, ids].new(float, name=name) + else: + A = A.dup(float, name=name) + N = A.nrows + if N == 0: + return A + + # Personalization vector or scalar + if personalization is None: + p = 1.0 / N + else: + if ids is not None: + personalization = personalization[ids].new(name="personalization") + denom = personalization.reduce().get(0) + if denom == 0: + raise ZeroDivisionError("personalization sums to 0") + p = (personalization / denom).new(mask=personalization.V, name="p") + + if ids is None or len(ids) == len(G): + nonempty_rows = G.get_property("any_rowwise+") # XXX: What about self-edges? + else: + nonempty_rows = A.reduce_rowwise(monoid.any).new(name="nonempty_rows") + + is_dangling = nonempty_rows.nvals < N + if is_dangling: + empty_rows = (~nonempty_rows.S).new(name="empty_rows") + if dangling is not None: + if ids is not None: + dangling = dangling[ids].new(name="dangling") + dangling_weights = (1.0 / dangling.reduce().get(0) * dangling).new( + mask=dangling.V, name="dangling_weights" + ) + A << binary.first(empty_rows.outer(dangling_weights) | A) + elif personalization is None: + A << binary.first((p * empty_rows) | A) + else: + A << binary.first(empty_rows.outer(p) | A) + + scale = A.reduce_rowwise(monoid.plus).new(float) + scale << alpha / scale + A << scale * A + p *= 1 - alpha + if personalization is None: + # Add a scalar everywhere, which makes A dense + A(binary.plus)[:, :] = p + else: + A << A + p + return A diff --git a/graphblas_algorithms/algorithms/lowest_common_ancestors.py b/graphblas_algorithms/algorithms/lowest_common_ancestors.py new file mode 100644 index 0000000..0dfac19 --- /dev/null +++ b/graphblas_algorithms/algorithms/lowest_common_ancestors.py @@ -0,0 +1,21 @@ +from graphblas import binary, replace +from graphblas.semiring import any_pair + +from ._bfs import _bfs_plain + +__all__ = ["lowest_common_ancestor"] + + +def lowest_common_ancestor(G, node1, node2, default=None): + common_ancestors = _bfs_plain(G, node1, name="common_ancestors", transpose=True) + other_ancestors = _bfs_plain(G, node2, name="other_ancestors", transpose=True) + common_ancestors << binary.pair(common_ancestors & other_ancestors) + if common_ancestors.nvals == 0: + return default + # Take one BFS step along predecessors. The lowest common ancestor is one we don't visit. + # An alternative strategy would be to walk along successors until there are no more. + other_ancestors(common_ancestors.S, replace) << any_pair[bool](G._A @ common_ancestors) + common_ancestors(~other_ancestors.S, replace) << common_ancestors + index = common_ancestors.to_coo(values=False)[0][0] + # XXX: should we return index or key? + return G.id_to_key[index] diff --git a/graphblas_algorithms/algorithms/operators/__init__.py b/graphblas_algorithms/algorithms/operators/__init__.py new file mode 100644 index 0000000..c2742b9 --- /dev/null +++ b/graphblas_algorithms/algorithms/operators/__init__.py @@ -0,0 +1,2 @@ +from .binary import * +from .unary import * diff --git a/graphblas_algorithms/algorithms/operators/binary.py b/graphblas_algorithms/algorithms/operators/binary.py new file mode 100644 index 0000000..4c14a11 --- /dev/null +++ b/graphblas_algorithms/algorithms/operators/binary.py @@ -0,0 +1,155 @@ +from graphblas import Matrix, binary, dtypes, unary + +from ..exceptions import GraphBlasAlgorithmException + +__all__ = [ + "compose", + "difference", + "disjoint_union", + "full_join", + "intersection", + "symmetric_difference", + "union", +] + + +def union(G, H, rename=(), *, name="union"): + if G.is_multigraph() != H.is_multigraph(): + raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.") + if G.is_multigraph(): + raise NotImplementedError("Not yet implemented for multigraphs") + if rename: + prefix = rename[0] + if prefix is not None: + G = type(G)( + G._A, key_to_id={f"{prefix}{key}": val for key, val in G._key_to_id.items()} + ) + if len(rename) > 1: + prefix = rename[1] + if prefix is not None: + H = type(H)( + H._A, key_to_id={f"{prefix}{key}": val for key, val in H._key_to_id.items()} + ) + A = G._A + B = H._A + if not G._key_to_id.keys().isdisjoint(H._key_to_id.keys()): + raise GraphBlasAlgorithmException("The node sets of the graphs are not disjoint.") + C = Matrix(dtypes.unify(A.dtype, B.dtype), A.nrows + B.nrows, A.ncols + B.ncols, name=name) + C[: A.nrows, : A.ncols] = A + C[A.nrows :, A.ncols :] = B + offset = A.nrows + key_to_id = {key: val + offset for key, val in H._key_to_id.items()} + key_to_id.update(G._key_to_id) + return type(G)(C, key_to_id=key_to_id) + + +def disjoint_union(G, H, *, name="disjoint_union"): + if G.is_multigraph() != H.is_multigraph(): + raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.") + if G.is_multigraph(): + raise NotImplementedError("Not yet implemented for multigraphs") + A = G._A + B = H._A + C = Matrix(dtypes.unify(A.dtype, B.dtype), A.nrows + B.nrows, A.ncols + B.ncols, name=name) + C[: A.nrows, : A.ncols] = A + C[A.nrows :, A.ncols :] = B + return type(G)(C) + + +def intersection(G, H, *, name="intersection"): + if G.is_multigraph() != H.is_multigraph(): + raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.") + if G.is_multigraph(): + raise NotImplementedError("Not yet implemented for multigraphs") + keys = sorted(G._key_to_id.keys() & H._key_to_id.keys(), key=G._key_to_id.__getitem__) + ids = G.list_to_ids(keys) + A = G._A[ids, ids].new() + ids = H.list_to_ids(keys) + B = H._A[ids, ids].new(dtypes.unify(A.dtype, H._A.dtype), mask=A.S, name=name) + B << unary.one(B) + return type(G)(B, key_to_id=dict(zip(keys, range(len(keys)), strict=True))) + + +def difference(G, H, *, name="difference"): + if G.is_multigraph() != H.is_multigraph(): + raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.") + if G.is_multigraph(): + raise NotImplementedError("Not yet implemented for multigraphs") + if G._key_to_id.keys() != H._key_to_id.keys(): + raise GraphBlasAlgorithmException("Node sets of graphs not equal") + A = G._A + if G._key_to_id == H._key_to_id: + B = H._A + else: + # Need to perform a permutation + keys = sorted(G._key_to_id, key=G._key_to_id.__getitem__) + ids = H.list_to_ids(keys) + B = H._A[ids, ids].new() + C = unary.one(A).new(mask=~B.S, name=name) + return type(G)(C, key_to_id=G._key_to_id) + + +def symmetric_difference(G, H, *, name="symmetric_difference"): + if G.is_multigraph() != H.is_multigraph(): + raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.") + if G.is_multigraph(): + raise NotImplementedError("Not yet implemented for multigraphs") + if G._key_to_id.keys() != H._key_to_id.keys(): + raise GraphBlasAlgorithmException("Node sets of graphs not equal") + A = G._A + if G._key_to_id == H._key_to_id: + B = H._A + else: + # Need to perform a permutation + keys = sorted(G._key_to_id, key=G._key_to_id.__getitem__) + ids = H.list_to_ids(keys) + B = H._A[ids, ids].new() + Mask = binary.pair[bool](A & B).new(name="mask") + C = binary.pair(A | B, left_default=True, right_default=True).new(mask=~Mask.S, name=name) + return type(G)(C, key_to_id=G._key_to_id) + + +def compose(G, H, *, name="compose"): + if G.is_multigraph() != H.is_multigraph(): + raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.") + if G.is_multigraph(): + raise NotImplementedError("Not yet implemented for multigraphs") + A = G._A + B = H._A + if G._key_to_id.keys() == H._key_to_id.keys(): + if G._key_to_id != H._key_to_id: + # Need to perform a permutation + keys = sorted(G._key_to_id, key=G._key_to_id.__getitem__) + ids = H.list_to_ids(keys) + B = B[ids, ids].new() + C = binary.second(A | B).new(name=name) + key_to_id = G._key_to_id + else: + keys = sorted(G._key_to_id.keys() & H._key_to_id.keys(), key=G._key_to_id.__getitem__) + B = H._A + C = Matrix( + dtypes.unify(A.dtype, B.dtype), + A.nrows + B.nrows - len(keys), + A.ncols + B.ncols - len(keys), + name=name, + ) + C[: A.nrows, : A.ncols] = A + ids1 = G.list_to_ids(keys) + ids2 = H.list_to_ids(keys) + C[ids1, ids1] = B[ids2, ids2] + newkeys = sorted(H._key_to_id.keys() - G._key_to_id.keys(), key=H._key_to_id.__getitem__) + ids = H.list_to_ids(newkeys) + C[A.nrows :, A.ncols :] = B[ids, ids] + # Now make new `key_to_id` + ids += A.nrows + key_to_id = dict(zip(newkeys, ids.tolist(), strict=True)) + key_to_id.update(G._key_to_id) + return type(G)(C, key_to_id=key_to_id) + + +def full_join(G, H, rename=(), *, name="full_join"): + rv = union(G, H, rename, name=name) + nrows, ncols = G._A.shape + rv._A[:nrows, ncols:] = True + rv._A[nrows:, :ncols] = True + return rv diff --git a/graphblas_algorithms/algorithms/operators/unary.py b/graphblas_algorithms/algorithms/operators/unary.py new file mode 100644 index 0000000..e7c46d6 --- /dev/null +++ b/graphblas_algorithms/algorithms/operators/unary.py @@ -0,0 +1,18 @@ +from graphblas import select + +from ..exceptions import GraphBlasAlgorithmException + +__all__ = ["complement", "reverse"] + + +def complement(G, *, name="complement"): + A = G._A + R = (~A.S).new(A.dtype, name=name) + R << select.offdiag(R) + return type(G)(R, key_to_id=G._key_to_id) + + +def reverse(G, copy=True): + if not G.is_directed(): + raise GraphBlasAlgorithmException("Cannot reverse an undirected graph.") + return G.reverse(copy=copy) diff --git a/graphblas_algorithms/algorithms/shortest_paths/__init__.py b/graphblas_algorithms/algorithms/shortest_paths/__init__.py index 9fc57fb..781db9d 100644 --- a/graphblas_algorithms/algorithms/shortest_paths/__init__.py +++ b/graphblas_algorithms/algorithms/shortest_paths/__init__.py @@ -1,3 +1,4 @@ from .dense import * from .generic import * +from .unweighted import * from .weighted import * diff --git a/graphblas_algorithms/algorithms/shortest_paths/dense.py b/graphblas_algorithms/algorithms/shortest_paths/dense.py index 94282d0..394d1b4 100644 --- a/graphblas_algorithms/algorithms/shortest_paths/dense.py +++ b/graphblas_algorithms/algorithms/shortest_paths/dense.py @@ -1,6 +1,8 @@ from graphblas import Matrix, Vector, binary, indexunary, replace, select from graphblas.semiring import any_plus, any_second +from ..exceptions import GraphBlasAlgorithmException + __all__ = ["floyd_warshall", "floyd_warshall_predecessor_and_distance"] @@ -8,7 +10,9 @@ def floyd_warshall(G, is_weighted=False): return floyd_warshall_predecessor_and_distance(G, is_weighted, compute_predecessors=False)[1] -def floyd_warshall_predecessor_and_distance(G, is_weighted=False, *, compute_predecessors=True): +def floyd_warshall_predecessor_and_distance( + G, is_weighted=False, *, compute_predecessors=True, permutation=None +): # By using `offdiag` instead of `G._A`, we ensure that D will not become dense. # Dense D may be better at times, but not including the diagonal will result in less work. # Typically, Floyd-Warshall algorithms sets the diagonal of D to 0 at the beginning. @@ -19,6 +23,13 @@ def floyd_warshall_predecessor_and_distance(G, is_weighted=False, *, compute_pre nonempty_nodes = binary.pair(row_degrees & column_degrees).new(name="nonempty_nodes") else: A, nonempty_nodes = G.get_properties("U- degrees-") + if permutation is not None: + if len(permutation) != nonempty_nodes.size: + raise GraphBlasAlgorithmException( + "permutation must contain every node in G with no repeats." + ) + A = A[permutation, permutation].new() + nonempty_nodes = nonempty_nodes[permutation].new(name="nonempty_nodes") if A.dtype == bool or not is_weighted: dtype = int diff --git a/graphblas_algorithms/algorithms/shortest_paths/generic.py b/graphblas_algorithms/algorithms/shortest_paths/generic.py index f91c9cf..ef86f89 100644 --- a/graphblas_algorithms/algorithms/shortest_paths/generic.py +++ b/graphblas_algorithms/algorithms/shortest_paths/generic.py @@ -1,33 +1,12 @@ -from graphblas import Vector, replace -from graphblas.semiring import lor_pair +from ..exceptions import NoPath +from .unweighted import bidirectional_shortest_path_length __all__ = ["has_path"] def has_path(G, source, target): - # Perform bidirectional BFS from source to target and target to source - src = G._key_to_id[source] - dst = G._key_to_id[target] - if src == dst: - return True - A = G.get_property("offdiag") - q_src = Vector.from_coo(src, True, size=A.nrows, name="q_src") - seen_src = q_src.dup(name="seen_src") - q_dst = Vector.from_coo(dst, True, size=A.nrows, name="q_dst") - seen_dst = q_dst.dup(name="seen_dst") - for _ in range(A.nrows // 2): - q_src(~seen_src.S, replace) << lor_pair(q_src @ A) - if q_src.nvals == 0: - return False - if lor_pair(q_src @ q_dst): - return True - - q_dst(~seen_dst.S, replace) << lor_pair(A @ q_dst) - if q_dst.nvals == 0: - return False - if lor_pair(q_src @ q_dst): - return True - - seen_src(q_src.S) << True - seen_dst(q_dst.S) << True - return False + try: + bidirectional_shortest_path_length(G, source, target) + except NoPath: + return False + return True diff --git a/graphblas_algorithms/algorithms/shortest_paths/unweighted.py b/graphblas_algorithms/algorithms/shortest_paths/unweighted.py new file mode 100644 index 0000000..ec87b65 --- /dev/null +++ b/graphblas_algorithms/algorithms/shortest_paths/unweighted.py @@ -0,0 +1,64 @@ +from graphblas import Matrix, Vector, replace +from graphblas.semiring import any_pair + +from .._bfs import _bfs_level, _bfs_levels +from ..exceptions import NoPath + +__all__ = [ + "single_source_shortest_path_length", + "single_target_shortest_path_length", + "all_pairs_shortest_path_length", +] + + +def single_source_shortest_path_length(G, source, cutoff=None): + return _bfs_level(G, source, cutoff=cutoff) + + +def single_target_shortest_path_length(G, target, cutoff=None): + return _bfs_level(G, target, cutoff=cutoff, transpose=True) + + +def all_pairs_shortest_path_length(G, cutoff=None, *, nodes=None, expand_output=False): + D = _bfs_levels(G, nodes, cutoff=cutoff) + if nodes is not None and expand_output and D.ncols != D.nrows: + ids = G.list_to_ids(nodes) + rv = Matrix(D.dtype, D.ncols, D.ncols, name=D.name) + rv[ids, :] = D + return rv + return D + + +def bidirectional_shortest_path_length(G, source, target): + # Perform bidirectional BFS from source to target and target to source + # TODO: have this raise NodeNotFound? + if source not in G or target not in G: + raise KeyError(f"Either source {source} or target {target} is not in G") # NodeNotFound + src = G._key_to_id[source] + dst = G._key_to_id[target] + if src == dst: + return 0 + A = G.get_property("offdiag") + q_src = Vector(bool, size=A.nrows, name="q_src") + q_src[src] = True + seen_src = q_src.dup(name="seen_src") + q_dst = Vector(bool, size=A.nrows, name="q_dst") + q_dst[dst] = True + seen_dst = q_dst.dup(name="seen_dst", clear=True) + any_pair_bool = any_pair[bool] + for i in range(1, A.nrows + 1, 2): + q_src(~seen_src.S, replace) << any_pair_bool(q_src @ A) + if q_src.nvals == 0: + raise NoPath(f"No path between {source} and {target}.") + if any_pair_bool(q_src @ q_dst): + return i + + seen_dst(q_dst.S) << True + q_dst(~seen_dst.S, replace) << any_pair_bool(A @ q_dst) + if q_dst.nvals == 0: + raise NoPath(f"No path between {source} and {target}.") + if any_pair_bool(q_src @ q_dst): + return i + 1 + + seen_src(q_src.S) << True + raise NoPath(f"No path between {source} and {target}.") diff --git a/graphblas_algorithms/algorithms/shortest_paths/weighted.py b/graphblas_algorithms/algorithms/shortest_paths/weighted.py index a5cec41..a83a060 100644 --- a/graphblas_algorithms/algorithms/shortest_paths/weighted.py +++ b/graphblas_algorithms/algorithms/shortest_paths/weighted.py @@ -1,37 +1,57 @@ import numpy as np -from graphblas import Matrix, Vector, binary, monoid, replace, select, unary +from graphblas import Matrix, Vector, binary, indexunary, monoid, replace, select, unary from graphblas.semiring import any_pair, min_plus -from ..exceptions import Unbounded +from .._bfs import _bfs_level, _bfs_levels, _bfs_parent, _bfs_plain +from ..exceptions import NoPath, Unbounded __all__ = [ "single_source_bellman_ford_path_length", + "bellman_ford_path", + "bellman_ford_path_length", "bellman_ford_path_lengths", + "negative_edge_cycle", ] -def single_source_bellman_ford_path_length(G, source): +def _bellman_ford_path_length(G, source, target=None, *, cutoff=None, name): # No need for `is_weighted=` keyword, b/c this is assumed to be weighted (I think) - index = G._key_to_id[source] + src_id = G._key_to_id[source] + if target is not None: + dst_id = G._key_to_id[target] + else: + dst_id = None + if G.get_property("is_iso"): # If the edges are iso-valued (and positive), then we can simply do level BFS is_negative, iso_value = G.get_properties("has_negative_edges+ iso_value") if not is_negative: - d = _bfs_level(G, source, dtype=iso_value.dtype) + if cutoff is not None: + cutoff = int(cutoff // iso_value.get()) + d = _bfs_level(G, source, target, cutoff=cutoff, dtype=iso_value.dtype) + if dst_id is not None: + d = d.get(dst_id) + if d is None: + raise NoPath(f"node {target} not reachable from {source}") if iso_value != 1: d *= iso_value return d # It's difficult to detect negative cycles with BFS - if G._A[index, index].get() is not None: + if G._A[src_id, src_id].get() is not None: raise Unbounded("Negative cycle detected.") - if not G.is_directed() and G._A[index, :].nvals > 0: + if not G.is_directed() and G._A[src_id, :].nvals > 0: # For undirected graphs, any negative edge is a cycle raise Unbounded("Negative cycle detected.") # Use `offdiag` instead of `A`, b/c self-loops don't contribute to the result, # and negative self-loops are easy negative cycles to avoid. # We check if we hit a self-loop negative cycle at the end. - A, has_negative_diagonal = G.get_properties("offdiag has_negative_diagonal") + if dst_id is None: + A, has_negative_diagonal = G.get_properties("offdiag has_negative_diagonal") + else: + A, is_negative, has_negative_diagonal = G.get_properties( + "offdiag has_negative_edges- has_negative_diagonal" + ) if A.dtype == bool: # Should we upcast e.g. INT8 to INT64 as well? dtype = int @@ -39,7 +59,7 @@ def single_source_bellman_ford_path_length(G, source): dtype = A.dtype n = A.nrows d = Vector(dtype, n, name="single_source_bellman_ford_path_length") - d[index] = 0 + d[src_id] = 0 cur = d.dup(name="cur") mask = Vector(bool, n, name="mask") one = unary.one[bool] @@ -48,6 +68,8 @@ def single_source_bellman_ford_path_length(G, source): # `cur` is the current frontier of values that improved in the previous iteration. # This means that in this iteration we drop values from `cur` that are not better. cur << min_plus(cur @ A) + if cutoff is not None: + cur << select.valuele(cur, cutoff) # Mask is True where cur not in d or cur < d mask << one(cur) @@ -59,11 +81,16 @@ def single_source_bellman_ford_path_length(G, source): break # Update `d` with values that improved d(cur.S) << cur + if dst_id is not None and not is_negative: + # Limit exploration if we have a target + cutoff = cur.get(dst_id, cutoff) else: # Check for negative cycle when for loop completes without breaking cur << min_plus(cur @ A) + if cutoff is not None: + cur << select.valuele(cur, cutoff) mask << binary.lt(cur & d) - if mask.reduce(monoid.lor): + if dst_id is None and mask.reduce(monoid.lor) or dst_id is not None and mask.get(dst_id): raise Unbounded("Negative cycle detected.") if has_negative_diagonal: # We removed diagonal entries above, so check if we visited one with a negative weight @@ -71,9 +98,23 @@ def single_source_bellman_ford_path_length(G, source): cur << select.valuelt(diag, 0) if any_pair(d @ cur): raise Unbounded("Negative cycle detected.") + if dst_id is not None: + d = d.get(dst_id) + if d is None: + raise NoPath(f"node {target} not reachable from {source}") return d +def single_source_bellman_ford_path_length( + G, source, *, cutoff=None, name="single_source_bellman_ford_path_length" +): + return _bellman_ford_path_length(G, source, cutoff=cutoff, name=name) + + +def bellman_ford_path_length(G, source, target): + return _bellman_ford_path_length(G, source, target, name="bellman_ford_path_length") + + def bellman_ford_path_lengths(G, nodes=None, *, expand_output=False): """Extra parameter: expand_output @@ -156,52 +197,152 @@ def bellman_ford_path_lengths(G, nodes=None, *, expand_output=False): return D -def _bfs_level(G, source, *, dtype=int): - if dtype == bool: +def _reconstruct_path_from_parents(G, parents, src, dst): + indices, values = parents.to_coo(sort=False) + d = dict(zip(indices.tolist(), values.tolist(), strict=True)) + if dst not in d: + return [] + cur = dst + path = [cur] + while cur != src: + cur = d[cur] + path.append(cur) + return G.list_to_keys(reversed(path)) + + +def bellman_ford_path(G, source, target): + src_id = G._key_to_id[source] + dst_id = G._key_to_id[target] + if G.get_property("is_iso"): + # If the edges are iso-valued (and positive), then we can simply do level BFS + is_negative = G.get_property("has_negative_edges+") + if not is_negative: + p = _bfs_parent(G, source, target) + return _reconstruct_path_from_parents(G, p, src_id, dst_id) + raise Unbounded("Negative cycle detected.") + A, is_negative, has_negative_diagonal = G.get_properties( + "offdiag has_negative_edges- has_negative_diagonal" + ) + if A.dtype == bool: + # Should we upcast e.g. INT8 to INT64 as well? dtype = int - index = G._key_to_id[source] - A = G.get_property("offdiag") + else: + dtype = A.dtype + cutoff = None n = A.nrows - v = Vector(dtype, n, name="bfs_level") - q = Vector(bool, n, name="q") - v[index] = 0 - q[index] = True - any_pair_bool = any_pair[bool] - for i in range(1, n): - q(~v.S, replace) << any_pair_bool(q @ A) - if q.nvals == 0: + d = Vector(dtype, n, name="bellman_ford_path_length") + d[src_id] = 0 + p = Vector(int, n, name="bellman_ford_path_parent") + p[src_id] = src_id + + prev = d.dup(name="prev") + cur = Vector(dtype, n, name="cur") + indices = Vector(int, n, name="indices") + mask = Vector(bool, n, name="mask") + B = Matrix(dtype, n, n, name="B") + Indices = Matrix(int, n, n, name="Indices") + cols = prev.to_coo(values=False)[0] + one = unary.one[bool] + for _i in range(n - 1): + # This is a slightly modified Bellman-Ford algorithm. + # `cur` is the current frontier of values that improved in the previous iteration. + # This means that in this iteration we drop values from `cur` that are not better. + cur << min_plus(prev @ A) + if cutoff is not None: + cur << select.valuele(cur, cutoff) + + # Mask is True where cur not in d or cur < d + mask << one(cur) + mask(binary.second) << binary.lt(cur & d) + + # Drop values from `cur` that didn't improve + cur(mask.V, replace) << cur + if cur.nvals == 0: break - v(q.S) << i - return v + # Update `d` with values that improved + d(cur.S) << cur + if not is_negative: + # Limit exploration if we have a target + cutoff = cur.get(dst_id, cutoff) + # Now try to find the parents! + # This is also not standard. Typically, UDTs and UDFs are used to keep + # track of both the minimum element and the parent id at the same time. + # Only include rows and columns that were used this iteration. + rows = cols + cols = cur.to_coo(values=False)[0] + B.clear() + B[rows, cols] = A[rows, cols] -def _bfs_levels(G, nodes=None, *, dtype=int): - if dtype == bool: + # Reverse engineer to determine parent + B << binary.plus(prev & B) + B << binary.iseq(B & cur) + B << select.valuene(B, False) + Indices << indexunary.rowindex(B) + indices << Indices.reduce_columnwise(monoid.min) + p(indices.S) << indices + prev, cur = cur, prev + else: + # Check for negative cycle when for loop completes without breaking + cur << min_plus(prev @ A) + if cutoff is not None: + cur << select.valuele(cur, cutoff) + mask << binary.lt(cur & d) + if mask.get(dst_id): + raise Unbounded("Negative cycle detected.") + path = _reconstruct_path_from_parents(G, p, src_id, dst_id) + if has_negative_diagonal and path: + mask.clear() + mask[G.list_to_ids(path)] = True + diag = G.get_property("diag", mask=mask.S) + if diag.nvals > 0: + raise Unbounded("Negative cycle detected.") + mask << binary.first(mask & cur) # mask(cur.S, replace) << mask + if mask.nvals > 0: + # Is there a path from any visited node with negative self-loop to target? + # We could actually stop as soon as any from `path` is visited + indices, _ = mask.to_coo(values=False)[0] + q = _bfs_plain(G, target=target, index=indices, cutoff=_i) + if dst_id in q: + raise Unbounded("Negative cycle detected.") + return path + + +def negative_edge_cycle(G): + # TODO: use a heuristic to try to stop early + if G.is_directed(): + deg = "total_degrees-" + else: + deg = "degrees-" + A, degrees, has_negative_diagonal, has_negative_edges = G.get_properties( + f"offdiag {deg} has_negative_diagonal has_negative_edges-" + ) + if has_negative_diagonal: + return True + if not has_negative_edges: + return False + if A.dtype == bool: + # Should we upcast e.g. INT8 to INT64 as well? dtype = int - A = G.get_property("offdiag") - n = A.nrows - if nodes is None: - # TODO: `D = Vector.from_scalar(0, n, dtype).diag()` - D = Vector(dtype, n, name="bfs_levels_vector") - D << 0 - D = D.diag(name="bfs_levels") else: - ids = G.list_to_ids(nodes) - D = Matrix.from_coo( - np.arange(len(ids), dtype=np.uint64), - ids, - 0, - dtype, - nrows=len(ids), - ncols=n, - name="bfs_levels", - ) - Q = Matrix(bool, D.nrows, D.ncols, name="Q") - Q << unary.one[bool](D) - any_pair_bool = any_pair[bool] - for i in range(1, n): - Q(~D.S, replace) << any_pair_bool(Q @ A) - if Q.nvals == 0: - break - D(Q.S) << i - return D + dtype = A.dtype + n = A.nrows + # Begin from every node that has edges + d = Vector(dtype, n, name="negative_edge_cycle") + d(degrees.S) << 0 + cur = d.dup(name="cur") + mask = Vector(bool, n, name="mask") + one = unary.one[bool] + for _i in range(n - 1): + cur << min_plus(cur @ A) + mask << one(cur) + mask(binary.second) << binary.lt(cur & d) + cur(mask.V, replace) << cur + if cur.nvals == 0: + return False + d(cur.S) << cur + cur << min_plus(cur @ A) + mask << binary.lt(cur & d) + if mask.reduce(monoid.lor): + return True + return False diff --git a/graphblas_algorithms/algorithms/traversal/__init__.py b/graphblas_algorithms/algorithms/traversal/__init__.py new file mode 100644 index 0000000..7811162 --- /dev/null +++ b/graphblas_algorithms/algorithms/traversal/__init__.py @@ -0,0 +1 @@ +from .breadth_first_search import * diff --git a/graphblas_algorithms/algorithms/traversal/breadth_first_search.py b/graphblas_algorithms/algorithms/traversal/breadth_first_search.py new file mode 100644 index 0000000..a761134 --- /dev/null +++ b/graphblas_algorithms/algorithms/traversal/breadth_first_search.py @@ -0,0 +1,45 @@ +from graphblas import Vector, replace +from graphblas.semiring import any_pair + +__all__ = [ + "bfs_layers", + "descendants_at_distance", +] + + +def bfs_layers(G, sources): + if sources in G: + sources = [sources] + ids = G.list_to_ids(sources) + if ids is None or len(ids) == 0: + return + A = G.get_property("offdiag") + n = A.nrows + v = Vector(bool, size=n, name="bfs_layers") + q = Vector.from_coo(ids, True, size=n, name="q") + any_pair_bool = any_pair[bool] + yield q.dup(name="bfs_layer_0") + for i in range(1, n): + v(q.S) << True + q(~v.S, replace) << any_pair_bool(q @ A) + if q.nvals == 0: + return + yield q.dup(name=f"bfs_layer_{i}") + + +def descendants_at_distance(G, source, distance): + index = G._key_to_id[source] + A = G.get_property("offdiag") + n = A.nrows + q = Vector(bool, size=n, name=f"descendants_at_distance_{distance}") + q[index] = True + if distance == 0: + return q + v = Vector(bool, size=n, name="bfs_seen") + any_pair_bool = any_pair[bool] + for _i in range(1, distance + 1): + v(q.S) << True + q(~v.S, replace) << any_pair_bool(q @ A) + if q.nvals == 0: + break + return q diff --git a/graphblas_algorithms/algorithms/triads.py b/graphblas_algorithms/algorithms/triads.py index 54702c7..e6ec2be 100644 --- a/graphblas_algorithms/algorithms/triads.py +++ b/graphblas_algorithms/algorithms/triads.py @@ -1,5 +1,4 @@ -from graphblas_algorithms.classes.digraph import DiGraph -from graphblas_algorithms.classes.graph import Graph +from graphblas_algorithms import DiGraph, Graph __all__ = ["is_triad"] diff --git a/graphblas_algorithms/classes/_utils.py b/graphblas_algorithms/classes/_utils.py index c52b2be..ecf66d9 100644 --- a/graphblas_algorithms/classes/_utils.py +++ b/graphblas_algorithms/classes/_utils.py @@ -61,7 +61,7 @@ def dict_to_vector(self, d, *, size=None, dtype=None, name=None): if size is None: size = len(self) key_to_id = self._key_to_id - indices, values = zip(*((key_to_id[key], val) for key, val in d.items())) + indices, values = zip(*((key_to_id[key], val) for key, val in d.items()), strict=True) return Vector.from_coo(indices, values, size=size, dtype=dtype, name=name) @@ -85,7 +85,7 @@ def list_to_ids(self, nodes): if nodes is None: return None key_to_id = self._key_to_id - return [key_to_id[key] for key in nodes] + return np.fromiter((key_to_id[key] for key in nodes), np.uint64) def list_to_keys(self, indices): @@ -116,7 +116,17 @@ def vector_to_dict(self, v, *, mask=None, fill_value=None): elif fill_value is not None and v.nvals < v.size: v(mask=~v.S) << fill_value id_to_key = self.id_to_key - return {id_to_key[index]: value for index, value in zip(*v.to_coo(sort=False))} + return {id_to_key[index]: value for index, value in zip(*v.to_coo(sort=False), strict=True)} + + +def vector_to_list(self, v, *, values_are_keys=False): + id_to_key = self.id_to_key + return [ + id_to_key[idx] + for idx in v.to_coo(indices=not values_are_keys, values=values_are_keys, sort=True)[ + bool(values_are_keys) + ].tolist() + ] def vector_to_nodemap(self, v, *, mask=None, fill_value=None, values_are_keys=False): @@ -188,26 +198,29 @@ def matrix_to_dicts(self, A, *, use_row_index=False, use_column_index=False, val id_to_key = self.id_to_key if values_are_keys: values = [id_to_key[val] for val in values] - it = zip(rows, np.lib.stride_tricks.sliding_window_view(indptr, 2).tolist()) + it = zip(rows, np.lib.stride_tricks.sliding_window_view(indptr, 2).tolist(), strict=True) if use_row_index and use_column_index: return { - row: dict(zip(col_indices[start:stop], values[start:stop])) for row, (start, stop) in it + row: dict(zip(col_indices[start:stop], values[start:stop], strict=True)) + for row, (start, stop) in it } if use_row_index: return { row: { - id_to_key[col]: val for col, val in zip(col_indices[start:stop], values[start:stop]) + id_to_key[col]: val + for col, val in zip(col_indices[start:stop], values[start:stop], strict=True) } for row, (start, stop) in it } if use_column_index: return { - id_to_key[row]: dict(zip(col_indices[start:stop], values[start:stop])) + id_to_key[row]: dict(zip(col_indices[start:stop], values[start:stop], strict=True)) for row, (start, stop) in it } return { id_to_key[row]: { - id_to_key[col]: val for col, val in zip(col_indices[start:stop], values[start:stop]) + id_to_key[col]: val + for col, val in zip(col_indices[start:stop], values[start:stop], strict=True) } for row, (start, stop) in it } @@ -229,9 +242,9 @@ def to_networkx(self, edge_attribute="weight"): rows = (id_to_key[row] for row in rows.tolist()) cols = (id_to_key[col] for col in cols.tolist()) if edge_attribute is None: - G.add_edges_from(zip(rows, cols)) + G.add_edges_from(zip(rows, cols, strict=True)) else: - G.add_weighted_edges_from(zip(rows, cols, vals), weight=edge_attribute) + G.add_weighted_edges_from(zip(rows, cols, vals, strict=True), weight=edge_attribute) # What else should we copy over? return G @@ -240,3 +253,12 @@ def _cacheit(self, key, func, *args, **kwargs): if key not in self._cache: self._cache[key] = func(*args, **kwargs) return self._cache[key] + + +def renumber_key_to_id(self, indices): + """Create `key_to_id` for e.g. a subgraph with node ids from `indices`""" + id_to_key = self.id_to_key + return {id_to_key[index]: i for i, index in enumerate(indices)} + # Alternative (about the same performance) + # keys = self.list_to_keys(indices) + # return dict(zip(keys, range(len(indices)), strict=True)) diff --git a/graphblas_algorithms/classes/digraph.py b/graphblas_algorithms/classes/digraph.py index 0bc1ec7..1e9fe5f 100644 --- a/graphblas_algorithms/classes/digraph.py +++ b/graphblas_algorithms/classes/digraph.py @@ -1,4 +1,5 @@ from collections import defaultdict +from copy import deepcopy import graphblas as gb from graphblas import Matrix, binary, replace, select, unary @@ -441,6 +442,7 @@ def __missing__(self, key): class DiGraph(Graph): + __networkx_backend__ = "graphblas" __networkx_plugin__ = "graphblas" # "-" properties ignore self-edges, "+" properties include self-edges @@ -548,10 +550,12 @@ def __init__(self, incoming_graph_data=None, *, key_to_id=None, **attr): set_to_vector = _utils.set_to_vector to_networkx = _utils.to_networkx vector_to_dict = _utils.vector_to_dict + vector_to_list = _utils.vector_to_list vector_to_nodemap = _utils.vector_to_nodemap vector_to_nodeset = _utils.vector_to_nodeset vector_to_set = _utils.vector_to_set _cacheit = _utils._cacheit + renumber_key_to_id = _utils.renumber_key_to_id # NetworkX methods def to_directed_class(self): @@ -597,6 +601,25 @@ def is_multigraph(self): def is_directed(self): return True + def to_undirected(self, reciprocal=False, as_view=False, *, name=None): + if as_view: + raise NotImplementedError("`as_vew=True` is not implemented in `G.to_undirected`") + A = self._A + if reciprocal: + B = binary.any(A & A.T).new(name=name) + else: + B = binary.any(A | A.T).new(name=name) + return Graph(B, key_to_id=self._key_to_id) + + def reverse(self, copy=True): + # We could even reuse many of the cached values + A = self._A.T # This probably mostly works, but does not yet support assignment + if copy: + A = A.new() + rv = type(self)(A, key_to_id=self._key_to_id) + rv.graph.update(deepcopy(self.graph)) + return rv + class MultiDiGraph(DiGraph): def is_multigraph(self): diff --git a/graphblas_algorithms/classes/graph.py b/graphblas_algorithms/classes/graph.py index 718264f..f3e2239 100644 --- a/graphblas_algorithms/classes/graph.py +++ b/graphblas_algorithms/classes/graph.py @@ -301,6 +301,7 @@ def __missing__(self, key): class Graph: + __networkx_backend__ = "graphblas" __networkx_plugin__ = "graphblas" # "-" properties ignore self-edges, "+" properties include self-edges @@ -396,10 +397,12 @@ def __init__(self, incoming_graph_data=None, *, key_to_id=None, **attr): set_to_vector = _utils.set_to_vector to_networkx = _utils.to_networkx vector_to_dict = _utils.vector_to_dict + vector_to_list = _utils.vector_to_list vector_to_nodemap = _utils.vector_to_nodemap vector_to_nodeset = _utils.vector_to_nodeset vector_to_set = _utils.vector_to_set _cacheit = _utils._cacheit + renumber_key_to_id = _utils.renumber_key_to_id # NetworkX methods def to_directed_class(self): diff --git a/graphblas_algorithms/classes/nodemap.py b/graphblas_algorithms/classes/nodemap.py index 63b7a5e..2a32502 100644 --- a/graphblas_algorithms/classes/nodemap.py +++ b/graphblas_algorithms/classes/nodemap.py @@ -28,6 +28,7 @@ def __init__(self, v, *, fill_value=None, values_are_keys=False, key_to_id=None) set_to_vector = _utils.set_to_vector # to_networkx = _utils.to_networkx vector_to_dict = _utils.vector_to_dict + vector_to_list = _utils.vector_to_list vector_to_nodemap = _utils.vector_to_nodemap vector_to_nodeset = _utils.vector_to_nodeset vector_to_set = _utils.vector_to_set @@ -95,6 +96,7 @@ def get(self, key, default=None): return default if self._values_are_keys: return self.id_to_key[rv] + return rv # items # keys @@ -220,6 +222,7 @@ def _get_rows(self): set_to_vector = _utils.set_to_vector # to_networkx = _utils.to_networkx vector_to_dict = _utils.vector_to_dict + vector_to_list = _utils.vector_to_list vector_to_nodemap = _utils.vector_to_nodemap vector_to_nodeset = _utils.vector_to_nodeset vector_to_set = _utils.vector_to_set @@ -335,6 +338,7 @@ def _get_rows(self): set_to_vector = _utils.set_to_vector # to_networkx = _utils.to_networkx vector_to_dict = _utils.vector_to_dict + vector_to_list = _utils.vector_to_list vector_to_nodemap = _utils.vector_to_nodemap vector_to_nodeset = _utils.vector_to_nodeset vector_to_set = _utils.vector_to_set diff --git a/graphblas_algorithms/classes/nodeset.py b/graphblas_algorithms/classes/nodeset.py index 1713a7d..b79895e 100644 --- a/graphblas_algorithms/classes/nodeset.py +++ b/graphblas_algorithms/classes/nodeset.py @@ -1,6 +1,6 @@ from collections.abc import MutableSet -from graphblas.semiring import lor_pair, plus_pair +from graphblas.semiring import any_pair, plus_pair from . import _utils @@ -26,6 +26,7 @@ def __init__(self, v, *, key_to_id=None): set_to_vector = _utils.set_to_vector # to_networkx = _utils.to_networkx vector_to_dict = _utils.vector_to_dict + vector_to_list = _utils.vector_to_list vector_to_nodemap = _utils.vector_to_nodemap vector_to_nodeset = _utils.vector_to_nodeset vector_to_set = _utils.vector_to_set @@ -76,7 +77,7 @@ def clear(self): def isdisjoin(self, other): if isinstance(other, NodeSet): - return not lor_pair(self.vector @ other.vector) + return not any_pair[bool](self.vector @ other.vector) return super().isdisjoint(other) def pop(self): @@ -104,3 +105,8 @@ def _from_iterable(self, it): # Add more set methods (as needed) def union(self, *args): return set(self).union(*args) # TODO: can we make this better? + + def copy(self): + rv = type(self)(self.vector.dup(), key_to_id=self._key_to_id) + rv._id_to_key = self._id_to_key + return rv diff --git a/graphblas_algorithms/generators/__init__.py b/graphblas_algorithms/generators/__init__.py new file mode 100644 index 0000000..65a6526 --- /dev/null +++ b/graphblas_algorithms/generators/__init__.py @@ -0,0 +1 @@ +from .ego import * diff --git a/graphblas_algorithms/generators/ego.py b/graphblas_algorithms/generators/ego.py new file mode 100644 index 0000000..4d95e0f --- /dev/null +++ b/graphblas_algorithms/generators/ego.py @@ -0,0 +1,24 @@ +from ..algorithms.components.connected import _bfs_plain +from ..algorithms.shortest_paths.weighted import single_source_bellman_ford_path_length + +__all__ = ["ego_graph"] + + +def ego_graph(G, n, radius=1, center=True, undirected=False, is_weighted=False): + # TODO: should we have an option to keep the output matrix the same size? + if undirected and G.is_directed(): + # NOT COVERED + G2 = G.to_undirected() + else: + G2 = G + if is_weighted: + v = single_source_bellman_ford_path_length(G2, n, cutoff=radius) + else: + v = _bfs_plain(G2, n, cutoff=radius) + if not center: + del v[G._key_to_id[n]] + + indices, _ = v.to_coo(values=False) + A = G._A[indices, indices].new(name="ego") + key_to_id = G.renumber_key_to_id(indices.tolist()) + return type(G)(A, key_to_id=key_to_id) diff --git a/graphblas_algorithms/interface.py b/graphblas_algorithms/interface.py index 1a142c3..c718371 100644 --- a/graphblas_algorithms/interface.py +++ b/graphblas_algorithms/interface.py @@ -1,106 +1,268 @@ from . import nxapi ####### -# NOTE: Remember to update README.md when adding or removing algorithms from Dispatcher +# NOTE: Remember to run `python scripts/maketree.py` when adding or removing algorithms +# to automatically add it to README.md. You must still add algorithms below. ####### class Dispatcher: - # Boundary - edge_boundary = nxapi.boundary.edge_boundary - node_boundary = nxapi.boundary.node_boundary - # Centrality - degree_centrality = nxapi.centrality.degree_alg.degree_centrality - eigenvector_centrality = nxapi.centrality.eigenvector.eigenvector_centrality - in_degree_centrality = nxapi.centrality.degree_alg.in_degree_centrality - katz_centrality = nxapi.centrality.katz.katz_centrality - out_degree_centrality = nxapi.centrality.degree_alg.out_degree_centrality - # Cluster - average_clustering = nxapi.cluster.average_clustering - clustering = nxapi.cluster.clustering - generalized_degree = nxapi.cluster.generalized_degree - square_clustering = nxapi.cluster.square_clustering - transitivity = nxapi.cluster.transitivity - triangles = nxapi.cluster.triangles - # Community - inter_community_edges = nxapi.community.quality.inter_community_edges - intra_community_edges = nxapi.community.quality.intra_community_edges - # Core - k_truss = nxapi.core.k_truss - # Cuts - boundary_expansion = nxapi.cuts.boundary_expansion - conductance = nxapi.cuts.conductance - cut_size = nxapi.cuts.cut_size - edge_expansion = nxapi.cuts.edge_expansion - mixing_expansion = nxapi.cuts.mixing_expansion - node_expansion = nxapi.cuts.node_expansion - normalized_cut_size = nxapi.cuts.normalized_cut_size - volume = nxapi.cuts.volume - # DAG - ancestors = nxapi.dag.ancestors - descendants = nxapi.dag.descendants - # Dominating - is_dominating_set = nxapi.dominating.is_dominating_set - # Isolate - is_isolate = nxapi.isolate.is_isolate - isolates = nxapi.isolate.isolates - number_of_isolates = nxapi.isolate.number_of_isolates - # Link Analysis - hits = nxapi.link_analysis.hits_alg.hits - pagerank = nxapi.link_analysis.pagerank_alg.pagerank - # Reciprocity + # Begin auto-generated code: dispatch + mod = nxapi.boundary + # ================== + edge_boundary = mod.edge_boundary + node_boundary = mod.node_boundary + + mod = nxapi.centrality + # ==================== + degree_centrality = mod.degree_alg.degree_centrality + in_degree_centrality = mod.degree_alg.in_degree_centrality + out_degree_centrality = mod.degree_alg.out_degree_centrality + eigenvector_centrality = mod.eigenvector.eigenvector_centrality + katz_centrality = mod.katz.katz_centrality + + mod = nxapi.cluster + # ================= + average_clustering = mod.average_clustering + clustering = mod.clustering + generalized_degree = mod.generalized_degree + square_clustering = mod.square_clustering + transitivity = mod.transitivity + triangles = mod.triangles + + mod = nxapi.community + # =================== + inter_community_edges = mod.quality.inter_community_edges + intra_community_edges = mod.quality.intra_community_edges + + mod = nxapi.components + # ==================== + is_connected = mod.connected.is_connected + node_connected_component = mod.connected.node_connected_component + is_weakly_connected = mod.weakly_connected.is_weakly_connected + + mod = nxapi.core + # ============== + k_truss = mod.k_truss + + mod = nxapi.cuts + # ============== + boundary_expansion = mod.boundary_expansion + conductance = mod.conductance + cut_size = mod.cut_size + edge_expansion = mod.edge_expansion + mixing_expansion = mod.mixing_expansion + node_expansion = mod.node_expansion + normalized_cut_size = mod.normalized_cut_size + volume = mod.volume + + mod = nxapi.dag + # ============= + ancestors = mod.ancestors + descendants = mod.descendants + + mod = nxapi.dominating + # ==================== + is_dominating_set = mod.is_dominating_set + + mod = nxapi.efficiency_measures + # ============================= + efficiency = mod.efficiency + + mod = nxapi.generators + # ==================== + ego_graph = mod.ego.ego_graph + + mod = nxapi.isolate + # ================= + is_isolate = mod.is_isolate + isolates = mod.isolates + number_of_isolates = mod.number_of_isolates + + mod = nxapi.isomorphism + # ===================== + fast_could_be_isomorphic = mod.isomorph.fast_could_be_isomorphic + faster_could_be_isomorphic = mod.isomorph.faster_could_be_isomorphic + + mod = nxapi.linalg + # ================ + bethe_hessian_matrix = mod.bethehessianmatrix.bethe_hessian_matrix + adjacency_matrix = mod.graphmatrix.adjacency_matrix + laplacian_matrix = mod.laplacianmatrix.laplacian_matrix + normalized_laplacian_matrix = mod.laplacianmatrix.normalized_laplacian_matrix + directed_modularity_matrix = mod.modularitymatrix.directed_modularity_matrix + modularity_matrix = mod.modularitymatrix.modularity_matrix + + mod = nxapi.link_analysis + # ======================= + hits = mod.hits_alg.hits + google_matrix = mod.pagerank_alg.google_matrix + pagerank = mod.pagerank_alg.pagerank + + mod = nxapi.lowest_common_ancestors + # ================================= + lowest_common_ancestor = mod.lowest_common_ancestor + + mod = nxapi.operators + # =================== + compose = mod.binary.compose + difference = mod.binary.difference + disjoint_union = mod.binary.disjoint_union + full_join = mod.binary.full_join + intersection = mod.binary.intersection + symmetric_difference = mod.binary.symmetric_difference + union = mod.binary.union + complement = mod.unary.complement + reverse = mod.unary.reverse + + mod = nxapi.reciprocity + # ===================== overall_reciprocity = nxapi.overall_reciprocity reciprocity = nxapi.reciprocity - # Regular - is_k_regular = nxapi.regular.is_k_regular - is_regular = nxapi.regular.is_regular - # Shortest Paths - floyd_warshall = nxapi.shortest_paths.dense.floyd_warshall - floyd_warshall_predecessor_and_distance = ( - nxapi.shortest_paths.dense.floyd_warshall_predecessor_and_distance - ) - has_path = nxapi.shortest_paths.generic.has_path - all_pairs_bellman_ford_path_length = ( - nxapi.shortest_paths.weighted.all_pairs_bellman_ford_path_length - ) - single_source_bellman_ford_path_length = ( - nxapi.shortest_paths.weighted.single_source_bellman_ford_path_length - ) - # Simple Paths - is_simple_path = nxapi.simple_paths.is_simple_path - # S Metric - s_metric = nxapi.smetric.s_metric - # Structural Holes - mutual_weight = nxapi.structuralholes.mutual_weight - # Tournament - is_tournament = nxapi.tournament.is_tournament - score_sequence = nxapi.tournament.score_sequence - tournament_matrix = nxapi.tournament.tournament_matrix - # Triads - is_triad = nxapi.triads.is_triad + + mod = nxapi.regular + # ================= + is_k_regular = mod.is_k_regular + is_regular = mod.is_regular + + mod = nxapi.shortest_paths + # ======================== + floyd_warshall = mod.dense.floyd_warshall + floyd_warshall_numpy = mod.dense.floyd_warshall_numpy + floyd_warshall_predecessor_and_distance = mod.dense.floyd_warshall_predecessor_and_distance + has_path = mod.generic.has_path + all_pairs_shortest_path_length = mod.unweighted.all_pairs_shortest_path_length + single_source_shortest_path_length = mod.unweighted.single_source_shortest_path_length + single_target_shortest_path_length = mod.unweighted.single_target_shortest_path_length + all_pairs_bellman_ford_path_length = mod.weighted.all_pairs_bellman_ford_path_length + bellman_ford_path = mod.weighted.bellman_ford_path + bellman_ford_path_length = mod.weighted.bellman_ford_path_length + negative_edge_cycle = mod.weighted.negative_edge_cycle + single_source_bellman_ford_path_length = mod.weighted.single_source_bellman_ford_path_length + + mod = nxapi.simple_paths + # ====================== + is_simple_path = mod.is_simple_path + + mod = nxapi.smetric + # ================= + s_metric = mod.s_metric + + mod = nxapi.structuralholes + # ========================= + mutual_weight = mod.mutual_weight + + mod = nxapi.tournament + # ==================== + is_tournament = mod.is_tournament + score_sequence = mod.score_sequence + tournament_matrix = mod.tournament_matrix + + mod = nxapi.traversal + # =================== + bfs_layers = mod.breadth_first_search.bfs_layers + descendants_at_distance = mod.breadth_first_search.descendants_at_distance + + mod = nxapi.triads + # ================ + is_triad = mod.is_triad + + del mod + # End auto-generated code: dispatch @staticmethod - def convert_from_nx(graph, weight=None, *, name=None): + def convert_from_nx( + graph, + edge_attrs=None, + node_attrs=None, + preserve_edge_attrs=False, + preserve_node_attrs=False, + preserve_graph_attrs=False, + name=None, + graph_name=None, + *, + weight=None, # For nx.__version__ <= 3.1 + ): import networkx as nx from .classes import DiGraph, Graph, MultiDiGraph, MultiGraph + if preserve_edge_attrs: + if graph.is_multigraph(): + attrs = set().union( + *( + datadict + for nbrs in graph._adj.values() + for keydict in nbrs.values() + for datadict in keydict.values() + ) + ) + else: + attrs = set().union( + *(datadict for nbrs in graph._adj.values() for datadict in nbrs.values()) + ) + if len(attrs) == 1: + [attr] = attrs + edge_attrs = {attr: None} + elif attrs: + raise NotImplementedError("`preserve_edge_attrs=True` is not fully implemented") + if node_attrs: + raise NotImplementedError("non-None `node_attrs` is not yet implemented") + if preserve_node_attrs: + attrs = set().union(*(datadict for node, datadict in graph.nodes(data=True))) + if attrs: + raise NotImplementedError("`preserve_node_attrs=True` is not implemented") + if edge_attrs: + if len(edge_attrs) > 1: + raise NotImplementedError( + "Multiple edge attributes is not implemented (bad value for edge_attrs)" + ) + if weight is not None: + raise TypeError("edge_attrs and weight both given") + [[weight, default]] = edge_attrs.items() + if default is not None and default != 1: + raise NotImplementedError(f"edge default != 1 is not implemented; got {default}") + if isinstance(graph, nx.MultiDiGraph): - return MultiDiGraph.from_networkx(graph, weight=weight) - if isinstance(graph, nx.MultiGraph): - return MultiGraph.from_networkx(graph, weight=weight) - if isinstance(graph, nx.DiGraph): - return DiGraph.from_networkx(graph, weight=weight) - if isinstance(graph, nx.Graph): - return Graph.from_networkx(graph, weight=weight) - raise TypeError(f"Unsupported type of graph: {type(graph)}") + G = MultiDiGraph.from_networkx(graph, weight=weight) + elif isinstance(graph, nx.MultiGraph): + G = MultiGraph.from_networkx(graph, weight=weight) + elif isinstance(graph, nx.DiGraph): + G = DiGraph.from_networkx(graph, weight=weight) + elif isinstance(graph, nx.Graph): + G = Graph.from_networkx(graph, weight=weight) + else: + raise TypeError(f"Unsupported type of graph: {type(graph)}") + if preserve_graph_attrs: + G.graph.update(graph.graph) + return G @staticmethod def convert_to_nx(obj, *, name=None): + from graphblas import Matrix, io + from .classes import Graph if isinstance(obj, Graph): obj = obj.to_networkx() + elif isinstance(obj, Matrix): + if name in { + "adjacency_matrix", + "bethe_hessian_matrix", + "laplacian_matrix", + "normalized_laplacian_matrix", + "tournament_matrix", + }: + obj = io.to_scipy_sparse(obj) + elif name in { + "directed_modularity_matrix", + "floyd_warshall_numpy", + "google_matrix", + "modularity_matrix", + }: + obj = obj.to_dense(fill_value=False) + else: # pragma: no cover + raise RuntimeError(f"Should {name} return a numpy or scipy.sparse array?") return obj @staticmethod @@ -112,20 +274,38 @@ def on_start_tests(items): def key(testpath): filename, path = testpath.split(":") - classname, testname = path.split(".") - return (testname, frozenset({classname, filename})) + *names, testname = path.split(".") + if names: + [classname] = names + return (testname, frozenset({classname, filename})) + return (testname, frozenset({filename})) # Reasons to skip tests - multi_attributed = "unable to handle multi-attributed graphs" + # multi_attributed = "unable to handle multi-attributed graphs" multidigraph = "unable to handle MultiDiGraph" multigraph = "unable to handle MultiGraph" # Which tests to skip skip = { - key("test_mst.py:TestBoruvka.test_attributes"): multi_attributed, - key("test_mst.py:TestBoruvka.test_weight_attribute"): multi_attributed, + # key("test_mst.py:TestBoruvka.test_attributes"): multi_attributed, + # key("test_mst.py:TestBoruvka.test_weight_attribute"): multi_attributed, key("test_dense.py:TestFloyd.test_zero_weight"): multidigraph, + key("test_dense_numpy.py:test_zero_weight"): multidigraph, key("test_weighted.py:TestBellmanFordAndGoldbergRadzik.test_multigraph"): multigraph, + # key("test_binary.py:test_compose_multigraph"): multigraph, + # key("test_binary.py:test_difference_multigraph_attributes"): multigraph, + # key("test_binary.py:test_disjoint_union_multigraph"): multigraph, + # key("test_binary.py:test_full_join_multigraph"): multigraph, + # key("test_binary.py:test_intersection_multigraph_attributes"): multigraph, + # key( + # "test_binary.py:test_intersection_multigraph_attributes_node_set_different" + # ): multigraph, + # key("test_binary.py:test_symmetric_difference_multigraph"): multigraph, + # key("test_binary.py:test_union_attributes"): multi_attributed, + # TODO: move failing assertion from `test_union_and_compose` + # key("test_binary.py:test_union_and_compose"): multi_attributed, + # key("test_binary.py:test_union_multigraph"): multigraph, + # key("test_vf2pp.py:test_custom_multigraph4_different_labels"): multigraph, } for item in items: kset = set(item.keywords) diff --git a/graphblas_algorithms/linalg/__init__.py b/graphblas_algorithms/linalg/__init__.py new file mode 100644 index 0000000..5fb0b2b --- /dev/null +++ b/graphblas_algorithms/linalg/__init__.py @@ -0,0 +1,4 @@ +from .bethehessianmatrix import * +from .graphmatrix import * +from .laplacianmatrix import * +from .modularitymatrix import * diff --git a/graphblas_algorithms/linalg/bethehessianmatrix.py b/graphblas_algorithms/linalg/bethehessianmatrix.py new file mode 100644 index 0000000..edd000f --- /dev/null +++ b/graphblas_algorithms/linalg/bethehessianmatrix.py @@ -0,0 +1,25 @@ +from graphblas import Vector, binary + +__all__ = ["bethe_hessian_matrix"] + + +def bethe_hessian_matrix(G, r=None, nodelist=None, *, name="bethe_hessian_matrix"): + A = G._A + if nodelist is not None: + ids = G.list_to_ids(nodelist) + A = A[ids, ids].new() + d = A.reduce_rowwise().new(name="d") + else: + d = G.get_property("plus_rowwise+") + if r is None: + degrees = G.get_property("degrees+") + k = degrees.reduce().get(0) + k2 = (degrees @ degrees).get(0) + r = k2 / k - 1 + n = A.nrows + # result = (r**2 - 1) * I - r * A + D + ri = Vector.from_scalar(r**2 - 1.0, n, name="ri") + ri += d + rI = ri.diag(name=name) + rI(binary.plus) << binary.times(-r, A) # rI += -r * A + return rI diff --git a/graphblas_algorithms/linalg/graphmatrix.py b/graphblas_algorithms/linalg/graphmatrix.py new file mode 100644 index 0000000..0eff6ef --- /dev/null +++ b/graphblas_algorithms/linalg/graphmatrix.py @@ -0,0 +1,19 @@ +from graphblas import unary + +__all__ = ["adjacency_matrix"] + + +def adjacency_matrix(G, nodelist=None, dtype=None, is_weighted=False, *, name="adjacency_matrix"): + if dtype is None: + dtype = G._A.dtype + if G.is_multigraph(): + is_weighted = True # XXX + if nodelist is None: + if not is_weighted: + return unary.one[dtype](G._A).new(name=name) + return G._A.dup(dtype, name=name) + ids = G.list_to_ids(nodelist) + A = G._A[ids, ids].new(dtype, name=name) + if not is_weighted: + A << unary.one(A) + return A diff --git a/graphblas_algorithms/linalg/laplacianmatrix.py b/graphblas_algorithms/linalg/laplacianmatrix.py new file mode 100644 index 0000000..18ed65a --- /dev/null +++ b/graphblas_algorithms/linalg/laplacianmatrix.py @@ -0,0 +1,54 @@ +from graphblas import monoid, unary + +__all__ = [ + "laplacian_matrix", + "normalized_laplacian_matrix", +] + + +def _laplacian_helper(G, nodelist=None, is_weighted=False): + if G.is_multigraph(): + is_weighted = True # XXX + A = G._A + if nodelist is not None: + ids = G.list_to_ids(nodelist) + A = A[ids, ids].new() + if not is_weighted: + A << unary.one(A) + d = A.reduce_rowwise(monoid.plus).new() + elif is_weighted: + d = G.get_property("plus_rowwise+") + else: + d = G.get_property("degrees+") + A = unary.one(A).new() + return d, A + + +def laplacian_matrix(G, nodelist=None, is_weighted=False, *, name="laplacian_matrix"): + d, A = _laplacian_helper(G, nodelist, is_weighted) + D = d.diag(name="D") + return (D - A).new(name=name) + + +def normalized_laplacian_matrix( + G, nodelist=None, is_weighted=False, *, name="normalized_laplacian_matrix" +): + d, A = _laplacian_helper(G, nodelist, is_weighted) + d_invsqrt = unary.sqrt(d).new(name="d_invsqrt") + d_invsqrt << unary.minv(d_invsqrt) + + # XXX: what if `d` is 0 and `d_invsqrt` is infinity? (not tested) + # d_invsqrt(unary.isinf(d_invsqrt)) << 0 + + # Calculate: A_weighted = D_invsqrt @ A @ D_invsqrt + A_weighted = d_invsqrt.outer(d_invsqrt).new(mask=A.S, name=name) + A_weighted *= A + # Alt (no idea which implementation is better) + # D_invsqrt = d_invsqrt.diag(name="D_invsqrt") + # A_weighted = (D_invsqrt @ A).new(name=name) + # A_weighted @= D_invsqrt + + d_invsqrt << unary.one(d_invsqrt) + D = d_invsqrt.diag(name="D") + A_weighted << D - A_weighted + return A_weighted diff --git a/graphblas_algorithms/linalg/modularitymatrix.py b/graphblas_algorithms/linalg/modularitymatrix.py new file mode 100644 index 0000000..1efff65 --- /dev/null +++ b/graphblas_algorithms/linalg/modularitymatrix.py @@ -0,0 +1,37 @@ +from graphblas import monoid, unary + +from .laplacianmatrix import _laplacian_helper + +__all__ = ["modularity_matrix", "directed_modularity_matrix"] + + +def modularity_matrix(G, nodelist=None, is_weighted=False, *, name="modularity_matrix"): + k, A = _laplacian_helper(G, nodelist, is_weighted) + m = k.reduce().get(0) + X = k.outer(k).new(float, name=name) + X /= m + X << A - X + return X + + +def directed_modularity_matrix( + G, nodelist=None, is_weighted=False, *, name="directed_modularity_matrix" +): + A = G._A + if nodelist is not None: + ids = G.list_to_ids(nodelist) + A = A[ids, ids].new() + if not is_weighted: + A << unary.one(A) + k_out = A.reduce_rowwise(monoid.plus).new() + k_in = A.reduce_columnwise(monoid.plus).new() + elif is_weighted: + k_out, k_in = G.get_properties("plus_rowwise+ plus_columnwise+") + else: + A = unary.one(A).new() + k_out, k_in = G.get_properties("row_degrees+ column_degrees+") + m = k_out.reduce().get(0) + X = k_out.outer(k_in).new(float, name=name) + X /= m + X << A - X + return X diff --git a/graphblas_algorithms/nxapi/__init__.py b/graphblas_algorithms/nxapi/__init__.py index 75c7aa7..97d4249 100644 --- a/graphblas_algorithms/nxapi/__init__.py +++ b/graphblas_algorithms/nxapi/__init__.py @@ -2,23 +2,40 @@ from .centrality import * from .cluster import * from .community import * +from .components import * from .core import * from .cuts import * from .dag import * from .dominating import * +from .efficiency_measures import * +from .generators import * from .isolate import * +from .isomorphism import fast_could_be_isomorphic, faster_could_be_isomorphic +from .linalg import * from .link_analysis import * +from .lowest_common_ancestors import * +from .operators import * from .reciprocity import * from .regular import * from .shortest_paths import * from .simple_paths import * from .smetric import * from .structuralholes import * +from .traversal import * from .triads import * +from .tournament import is_tournament from . import centrality from . import cluster from . import community +from . import components +from . import efficiency_measures +from . import generators +from . import isomorphism +from . import linalg from . import link_analysis +from . import lowest_common_ancestors +from . import operators from . import shortest_paths from . import tournament +from . import traversal diff --git a/graphblas_algorithms/nxapi/_utils.py b/graphblas_algorithms/nxapi/_utils.py index db309a4..0bb9617 100644 --- a/graphblas_algorithms/nxapi/_utils.py +++ b/graphblas_algorithms/nxapi/_utils.py @@ -100,7 +100,7 @@ def partition(chunksize, L, *, evenly=True): yield from L return if evenly: - k = ceil(L / chunksize) + k = ceil(len(L) / chunksize) if k * chunksize != N: yield from split_evenly(k, L) return diff --git a/graphblas_algorithms/nxapi/boundary.py b/graphblas_algorithms/nxapi/boundary.py index 8907f09..662cfe4 100644 --- a/graphblas_algorithms/nxapi/boundary.py +++ b/graphblas_algorithms/nxapi/boundary.py @@ -29,15 +29,19 @@ def edge_boundary(G, nbunch1, nbunch2=None, data=False, keys=False, default=None (id_to_key[col] for col in cols), # Unsure about this; data argument may mean *all* edge attributes ({weight: val} for val in vals), + strict=True, ) else: it = zip( (id_to_key[row] for row in rows), (id_to_key[col] for col in cols), + strict=True, ) if is_multigraph: # Edge weights indicate number of times to repeat edges - it = itertools.chain.from_iterable(itertools.starmap(itertools.repeat, zip(it, vals))) + it = itertools.chain.from_iterable( + itertools.starmap(itertools.repeat, zip(it, vals, strict=True)) + ) return it diff --git a/graphblas_algorithms/nxapi/cluster.py b/graphblas_algorithms/nxapi/cluster.py index 425fd09..8e61f9b 100644 --- a/graphblas_algorithms/nxapi/cluster.py +++ b/graphblas_algorithms/nxapi/cluster.py @@ -78,19 +78,6 @@ def average_clustering(G, nodes=None, weight=None, count_zeros=True): return func(G, weighted=weighted, count_zeros=count_zeros, mask=mask) -def _split(L, k): - """Split a list into approximately-equal parts""" - N = len(L) - start = 0 - for i in range(1, k): - stop = (N * i + k - 1) // k - if stop != start: - yield L[start:stop] - start = stop - if stop != N: - yield L[stop:] - - # TODO: should this move into algorithms? def _square_clustering_split(G, node_ids=None, *, chunksize): if node_ids is None: diff --git a/graphblas_algorithms/nxapi/components/__init__.py b/graphblas_algorithms/nxapi/components/__init__.py new file mode 100644 index 0000000..bb0aea6 --- /dev/null +++ b/graphblas_algorithms/nxapi/components/__init__.py @@ -0,0 +1,2 @@ +from .connected import * +from .weakly_connected import * diff --git a/graphblas_algorithms/nxapi/components/connected.py b/graphblas_algorithms/nxapi/components/connected.py new file mode 100644 index 0000000..d55a430 --- /dev/null +++ b/graphblas_algorithms/nxapi/components/connected.py @@ -0,0 +1,27 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.algorithms.exceptions import PointlessConcept +from graphblas_algorithms.classes.graph import to_undirected_graph +from graphblas_algorithms.utils import not_implemented_for + +from ..exception import NetworkXPointlessConcept + +__all__ = [ + "is_connected", + "node_connected_component", +] + + +@not_implemented_for("directed") +def is_connected(G): + G = to_undirected_graph(G) + try: + return algorithms.is_connected(G) + except PointlessConcept as e: + raise NetworkXPointlessConcept(*e.args) from e + + +@not_implemented_for("directed") +def node_connected_component(G, n): + G = to_undirected_graph(G) + rv = algorithms.node_connected_component(G, n) + return G.vector_to_nodeset(rv) diff --git a/graphblas_algorithms/nxapi/components/weakly_connected.py b/graphblas_algorithms/nxapi/components/weakly_connected.py new file mode 100644 index 0000000..c72b532 --- /dev/null +++ b/graphblas_algorithms/nxapi/components/weakly_connected.py @@ -0,0 +1,19 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.algorithms.exceptions import PointlessConcept +from graphblas_algorithms.classes.digraph import to_directed_graph +from graphblas_algorithms.utils import not_implemented_for + +from ..exception import NetworkXPointlessConcept + +__all__ = [ + "is_weakly_connected", +] + + +@not_implemented_for("undirected") +def is_weakly_connected(G): + G = to_directed_graph(G) + try: + return algorithms.is_weakly_connected(G) + except PointlessConcept as e: + raise NetworkXPointlessConcept(*e.args) from e diff --git a/graphblas_algorithms/nxapi/efficiency_measures.py b/graphblas_algorithms/nxapi/efficiency_measures.py new file mode 100644 index 0000000..06971a2 --- /dev/null +++ b/graphblas_algorithms/nxapi/efficiency_measures.py @@ -0,0 +1,9 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.graph import to_undirected_graph +from graphblas_algorithms.utils import not_implemented_for + + +@not_implemented_for("directed") +def efficiency(G, u, v): + G = to_undirected_graph(G) + return algorithms.efficiency(G, u, v) diff --git a/graphblas_algorithms/nxapi/exception.py b/graphblas_algorithms/nxapi/exception.py index 2630384..0804bb1 100644 --- a/graphblas_algorithms/nxapi/exception.py +++ b/graphblas_algorithms/nxapi/exception.py @@ -5,6 +5,9 @@ class NetworkXError(Exception): pass + class NetworkXNoPath(Exception): + pass + class NetworkXPointlessConcept(Exception): pass @@ -20,6 +23,7 @@ class PowerIterationFailedConvergence(Exception): else: from networkx import ( NetworkXError, + NetworkXNoPath, NetworkXPointlessConcept, NetworkXUnbounded, NodeNotFound, diff --git a/graphblas_algorithms/nxapi/generators/__init__.py b/graphblas_algorithms/nxapi/generators/__init__.py new file mode 100644 index 0000000..65a6526 --- /dev/null +++ b/graphblas_algorithms/nxapi/generators/__init__.py @@ -0,0 +1 @@ +from .ego import * diff --git a/graphblas_algorithms/nxapi/generators/ego.py b/graphblas_algorithms/nxapi/generators/ego.py new file mode 100644 index 0000000..e591cb3 --- /dev/null +++ b/graphblas_algorithms/nxapi/generators/ego.py @@ -0,0 +1,11 @@ +from graphblas_algorithms import generators +from graphblas_algorithms.classes.digraph import to_graph + +__all__ = ["ego_graph"] + + +def ego_graph(G, n, radius=1, center=True, undirected=False, distance=None): + G = to_graph(G, weight=distance) + return generators.ego_graph( + G, n, radius=radius, center=center, undirected=undirected, is_weighted=distance is not None + ) diff --git a/graphblas_algorithms/nxapi/isomorphism/__init__.py b/graphblas_algorithms/nxapi/isomorphism/__init__.py new file mode 100644 index 0000000..e701b70 --- /dev/null +++ b/graphblas_algorithms/nxapi/isomorphism/__init__.py @@ -0,0 +1 @@ +from .isomorph import * diff --git a/graphblas_algorithms/nxapi/isomorphism/isomorph.py b/graphblas_algorithms/nxapi/isomorphism/isomorph.py new file mode 100644 index 0000000..1dedb64 --- /dev/null +++ b/graphblas_algorithms/nxapi/isomorphism/isomorph.py @@ -0,0 +1,25 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph + +__all__ = [ + "fast_could_be_isomorphic", + "faster_could_be_isomorphic", +] + + +def fast_could_be_isomorphic(G1, G2): + G1 = to_graph(G1) + G2 = to_graph(G2) + return algorithms.fast_could_be_isomorphic(G1, G2) + + +fast_graph_could_be_isomorphic = fast_could_be_isomorphic + + +def faster_could_be_isomorphic(G1, G2): + G1 = to_graph(G1) + G2 = to_graph(G2) + return algorithms.faster_could_be_isomorphic(G1, G2) + + +faster_graph_could_be_isomorphic = faster_could_be_isomorphic diff --git a/graphblas_algorithms/nxapi/linalg/__init__.py b/graphblas_algorithms/nxapi/linalg/__init__.py new file mode 100644 index 0000000..aada0f4 --- /dev/null +++ b/graphblas_algorithms/nxapi/linalg/__init__.py @@ -0,0 +1,5 @@ +from . import bethehessianmatrix, graphmatrix, laplacianmatrix, modularitymatrix +from .bethehessianmatrix import * +from .graphmatrix import * +from .laplacianmatrix import * +from .modularitymatrix import * diff --git a/graphblas_algorithms/nxapi/linalg/bethehessianmatrix.py b/graphblas_algorithms/nxapi/linalg/bethehessianmatrix.py new file mode 100644 index 0000000..7fa30b4 --- /dev/null +++ b/graphblas_algorithms/nxapi/linalg/bethehessianmatrix.py @@ -0,0 +1,12 @@ +from graphblas_algorithms import linalg +from graphblas_algorithms.classes.graph import to_undirected_graph +from graphblas_algorithms.utils import not_implemented_for + +__all__ = ["bethe_hessian_matrix"] + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +def bethe_hessian_matrix(G, r=None, nodelist=None): + G = to_undirected_graph(G) + return linalg.bethe_hessian_matrix(G, r=r, nodelist=nodelist) diff --git a/graphblas_algorithms/nxapi/linalg/graphmatrix.py b/graphblas_algorithms/nxapi/linalg/graphmatrix.py new file mode 100644 index 0000000..0b3e7d9 --- /dev/null +++ b/graphblas_algorithms/nxapi/linalg/graphmatrix.py @@ -0,0 +1,9 @@ +from graphblas_algorithms import linalg +from graphblas_algorithms.classes.digraph import to_graph + +__all__ = ["adjacency_matrix"] + + +def adjacency_matrix(G, nodelist=None, dtype=None, weight="weight"): + G = to_graph(G, weight=weight, dtype=dtype) + return linalg.adjacency_matrix(G, nodelist, dtype, is_weighted=weight is not None) diff --git a/graphblas_algorithms/nxapi/linalg/laplacianmatrix.py b/graphblas_algorithms/nxapi/linalg/laplacianmatrix.py new file mode 100644 index 0000000..752ca1e --- /dev/null +++ b/graphblas_algorithms/nxapi/linalg/laplacianmatrix.py @@ -0,0 +1,14 @@ +from graphblas_algorithms import linalg +from graphblas_algorithms.classes.digraph import to_graph + +__all__ = ["laplacian_matrix", "normalized_laplacian_matrix"] + + +def laplacian_matrix(G, nodelist=None, weight="weight"): + G = to_graph(G, weight=weight) + return linalg.laplacian_matrix(G, nodelist, is_weighted=weight is not None) + + +def normalized_laplacian_matrix(G, nodelist=None, weight="weight"): + G = to_graph(G, weight=weight) + return linalg.normalized_laplacian_matrix(G, nodelist, is_weighted=weight is not None) diff --git a/graphblas_algorithms/nxapi/linalg/modularitymatrix.py b/graphblas_algorithms/nxapi/linalg/modularitymatrix.py new file mode 100644 index 0000000..76e160f --- /dev/null +++ b/graphblas_algorithms/nxapi/linalg/modularitymatrix.py @@ -0,0 +1,20 @@ +from graphblas_algorithms import linalg +from graphblas_algorithms.classes.digraph import to_directed_graph +from graphblas_algorithms.classes.graph import to_undirected_graph +from graphblas_algorithms.utils import not_implemented_for + +__all__ = ["modularity_matrix", "directed_modularity_matrix"] + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +def modularity_matrix(G, nodelist=None, weight=None): + G = to_undirected_graph(G, weight=weight) + return linalg.modularity_matrix(G, nodelist, is_weighted=weight is not None) + + +@not_implemented_for("undirected") +@not_implemented_for("multigraph") +def directed_modularity_matrix(G, nodelist=None, weight=None): + G = to_directed_graph(G, weight=weight) + return linalg.directed_modularity_matrix(G, nodelist, is_weighted=weight is not None) diff --git a/graphblas_algorithms/nxapi/link_analysis/pagerank_alg.py b/graphblas_algorithms/nxapi/link_analysis/pagerank_alg.py index d40506f..22e977e 100644 --- a/graphblas_algorithms/nxapi/link_analysis/pagerank_alg.py +++ b/graphblas_algorithms/nxapi/link_analysis/pagerank_alg.py @@ -3,7 +3,7 @@ from ..exception import PowerIterationFailedConvergence -_all = ["pagerank"] +_all = ["pagerank", "google_matrix"] def pagerank( @@ -43,3 +43,21 @@ def pagerank( raise PowerIterationFailedConvergence(*e.args) from e else: return G.vector_to_nodemap(result, fill_value=0.0) + + +def google_matrix( + G, alpha=0.85, personalization=None, nodelist=None, weight="weight", dangling=None +): + G = to_graph(G, weight=weight, dtype=float) + p = G.dict_to_vector(personalization, dtype=float, name="personalization") + if dangling is not None and G.get_property("row_degrees+").nvals < len(G): + dangling_weights = G.dict_to_vector(dangling, dtype=float, name="dangling") + else: + dangling_weights = None + return algorithms.google_matrix( + G, + alpha=alpha, + personalization=p, + nodelist=nodelist, + dangling=dangling_weights, + ) diff --git a/graphblas_algorithms/nxapi/lowest_common_ancestors.py b/graphblas_algorithms/nxapi/lowest_common_ancestors.py new file mode 100644 index 0000000..f94e8c2 --- /dev/null +++ b/graphblas_algorithms/nxapi/lowest_common_ancestors.py @@ -0,0 +1,11 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_directed_graph +from graphblas_algorithms.utils import not_implemented_for + +__all__ = ["lowest_common_ancestor"] + + +@not_implemented_for("undirected") +def lowest_common_ancestor(G, node1, node2, default=None): + G = to_directed_graph(G) + return algorithms.lowest_common_ancestor(G, node1, node2, default=default) diff --git a/graphblas_algorithms/nxapi/operators/__init__.py b/graphblas_algorithms/nxapi/operators/__init__.py new file mode 100644 index 0000000..c2742b9 --- /dev/null +++ b/graphblas_algorithms/nxapi/operators/__init__.py @@ -0,0 +1,2 @@ +from .binary import * +from .unary import * diff --git a/graphblas_algorithms/nxapi/operators/binary.py b/graphblas_algorithms/nxapi/operators/binary.py new file mode 100644 index 0000000..82e8f08 --- /dev/null +++ b/graphblas_algorithms/nxapi/operators/binary.py @@ -0,0 +1,77 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph + +from ..exception import NetworkXError + +__all__ = [ + "compose", + "difference", + "disjoint_union", + "full_join", + "intersection", + "symmetric_difference", + "union", +] + + +def union(G, H, rename=()): + G = to_graph(G) + H = to_graph(H) + try: + return algorithms.union(G, H, rename=rename) + except algorithms.exceptions.GraphBlasAlgorithmException as e: + raise NetworkXError(*e.args) from e + + +def disjoint_union(G, H): + G = to_graph(G) + H = to_graph(H) + try: + return algorithms.disjoint_union(G, H) + except algorithms.exceptions.GraphBlasAlgorithmException as e: + raise NetworkXError(*e.args) from e + + +def intersection(G, H): + G = to_graph(G) + H = to_graph(H) + try: + return algorithms.intersection(G, H) + except algorithms.exceptions.GraphBlasAlgorithmException as e: + raise NetworkXError(*e.args) from e + + +def difference(G, H): + G = to_graph(G) + H = to_graph(H) + try: + return algorithms.difference(G, H) + except algorithms.exceptions.GraphBlasAlgorithmException as e: + raise NetworkXError(*e.args) from e + + +def symmetric_difference(G, H): + G = to_graph(G) + H = to_graph(H) + try: + return algorithms.symmetric_difference(G, H) + except algorithms.exceptions.GraphBlasAlgorithmException as e: + raise NetworkXError(*e.args) from e + + +def compose(G, H): + G = to_graph(G) + H = to_graph(H) + try: + return algorithms.compose(G, H) + except algorithms.exceptions.GraphBlasAlgorithmException as e: + raise NetworkXError(*e.args) from e + + +def full_join(G, H, rename=()): + G = to_graph(G) + H = to_graph(H) + try: + return algorithms.full_join(G, H, rename=rename) + except algorithms.exceptions.GraphBlasAlgorithmException as e: + raise NetworkXError(*e.args) from e diff --git a/graphblas_algorithms/nxapi/operators/unary.py b/graphblas_algorithms/nxapi/operators/unary.py new file mode 100644 index 0000000..6633b3b --- /dev/null +++ b/graphblas_algorithms/nxapi/operators/unary.py @@ -0,0 +1,22 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph + +from ..exception import NetworkXError + +__all__ = [ + "complement", + "reverse", +] + + +def complement(G): + G = to_graph(G) + return algorithms.complement(G) + + +def reverse(G, copy=True): + G = to_graph(G) + try: + return algorithms.reverse(G, copy=copy) + except algorithms.exceptions.GraphBlasAlgorithmException as e: + raise NetworkXError(*e.args) from e diff --git a/graphblas_algorithms/nxapi/shortest_paths/__init__.py b/graphblas_algorithms/nxapi/shortest_paths/__init__.py index 9fc57fb..781db9d 100644 --- a/graphblas_algorithms/nxapi/shortest_paths/__init__.py +++ b/graphblas_algorithms/nxapi/shortest_paths/__init__.py @@ -1,3 +1,4 @@ from .dense import * from .generic import * +from .unweighted import * from .weighted import * diff --git a/graphblas_algorithms/nxapi/shortest_paths/dense.py b/graphblas_algorithms/nxapi/shortest_paths/dense.py index 4b62891..82c2eed 100644 --- a/graphblas_algorithms/nxapi/shortest_paths/dense.py +++ b/graphblas_algorithms/nxapi/shortest_paths/dense.py @@ -1,7 +1,9 @@ from graphblas_algorithms import algorithms from graphblas_algorithms.classes.digraph import to_graph -__all__ = ["floyd_warshall", "floyd_warshall_predecessor_and_distance"] +from ..exception import NetworkXError + +__all__ = ["floyd_warshall", "floyd_warshall_numpy", "floyd_warshall_predecessor_and_distance"] def floyd_warshall(G, weight="weight"): @@ -17,3 +19,19 @@ def floyd_warshall_predecessor_and_distance(G, weight="weight"): G.matrix_to_nodenodemap(P, values_are_keys=True), G.matrix_to_nodenodemap(D, fill_value=float("inf")), ) + + +def floyd_warshall_numpy(G, nodelist=None, weight="weight"): + G = to_graph(G, weight=weight) + if nodelist is not None: + if not (len(nodelist) == len(G) == len(set(nodelist))): + raise NetworkXError("nodelist must contain every node in G with no repeats.") + permutation = G.list_to_ids(nodelist) + else: + permutation = None + try: + return algorithms.floyd_warshall_predecessor_and_distance( + G, is_weighted=weight is not None, compute_predecessors=False, permutation=permutation + )[1] + except algorithms.exceptions.GraphBlasAlgorithmException as e: + raise NetworkXError(*e.args) from e diff --git a/graphblas_algorithms/nxapi/shortest_paths/unweighted.py b/graphblas_algorithms/nxapi/shortest_paths/unweighted.py new file mode 100644 index 0000000..f1700f3 --- /dev/null +++ b/graphblas_algorithms/nxapi/shortest_paths/unweighted.py @@ -0,0 +1,45 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph + +from .._utils import normalize_chunksize, partition +from ..exception import NodeNotFound + +__all__ = [ + "single_source_shortest_path_length", + "single_target_shortest_path_length", + "all_pairs_shortest_path_length", +] + + +def single_source_shortest_path_length(G, source, cutoff=None): + G = to_graph(G) + if source not in G: + raise NodeNotFound(f"Source {source} is not in G") + v = algorithms.single_source_shortest_path_length(G, source, cutoff) + return G.vector_to_nodemap(v) + + +def single_target_shortest_path_length(G, target, cutoff=None): + G = to_graph(G) + if target not in G: + raise NodeNotFound(f"Target {target} is not in G") + v = algorithms.single_target_shortest_path_length(G, target, cutoff) + return G.vector_to_nodemap(v) + + +def all_pairs_shortest_path_length(G, cutoff=None, *, chunksize="10 MiB"): + G = to_graph(G) + chunksize = normalize_chunksize(chunksize, len(G) * G._A.dtype.np_type.itemsize, len(G)) + if chunksize is None: + D = algorithms.all_pairs_shortest_path_length(G, cutoff) + yield from G.matrix_to_nodenodemap(D).items() + elif chunksize < 2: + for source in G: + d = algorithms.single_source_shortest_path_length(G, source, cutoff) + yield (source, G.vector_to_nodemap(d)) + else: + for cur_nodes in partition(chunksize, list(G)): + D = algorithms.all_pairs_shortest_path_length(G, cutoff, nodes=cur_nodes) + for i, source in enumerate(cur_nodes): + d = D[i, :].new(name=f"all_pairs_shortest_path_length_{i}") + yield (source, G.vector_to_nodemap(d)) diff --git a/graphblas_algorithms/nxapi/shortest_paths/weighted.py b/graphblas_algorithms/nxapi/shortest_paths/weighted.py index d6bf1d2..b08dd85 100644 --- a/graphblas_algorithms/nxapi/shortest_paths/weighted.py +++ b/graphblas_algorithms/nxapi/shortest_paths/weighted.py @@ -1,11 +1,14 @@ -from graphblas_algorithms import algorithms +from graphblas_algorithms import algorithms, exceptions from graphblas_algorithms.classes.digraph import to_graph from .._utils import normalize_chunksize, partition -from ..exception import NetworkXUnbounded, NodeNotFound +from ..exception import NetworkXNoPath, NetworkXUnbounded, NodeNotFound __all__ = [ "all_pairs_bellman_ford_path_length", + "bellman_ford_path", + "bellman_ford_path_length", + "negative_edge_cycle", "single_source_bellman_ford_path_length", ] @@ -52,3 +55,29 @@ def single_source_bellman_ford_path_length(G, source, weight="weight"): except KeyError as e: raise NodeNotFound(*e.args) from e return G.vector_to_nodemap(d) + + +def bellman_ford_path(G, source, target, weight="weight"): + # TODO: what if weight is a function? + G = to_graph(G, weight=weight) + try: + return algorithms.bellman_ford_path(G, source, target) + except KeyError as e: + raise NodeNotFound(*e.args) from e + + +def bellman_ford_path_length(G, source, target, weight="weight"): + G = to_graph(G, weight=weight) + try: + return algorithms.bellman_ford_path_length(G, source, target) + except KeyError as e: + raise NodeNotFound(*e.args) from e + except exceptions.NoPath as e: + raise NetworkXNoPath(*e.args) from e + + +def negative_edge_cycle(G, weight="weight", heuristic=True): + # TODO: what if weight is a function? + # TODO: use a heuristic to try to stop early + G = to_graph(G, weight=weight) + return algorithms.negative_edge_cycle(G) diff --git a/graphblas_algorithms/nxapi/smetric.py b/graphblas_algorithms/nxapi/smetric.py index a363e1e..a1f60ab 100644 --- a/graphblas_algorithms/nxapi/smetric.py +++ b/graphblas_algorithms/nxapi/smetric.py @@ -1,13 +1,22 @@ +import warnings + from graphblas_algorithms import algorithms from graphblas_algorithms.classes.digraph import to_graph -from .exception import NetworkXError - __all__ = ["s_metric"] -def s_metric(G, normalized=True): - if normalized: - raise NetworkXError("Normalization not implemented") +def s_metric(G, **kwargs): + if kwargs: + if "normalized" in kwargs: + warnings.warn( + "\n\nThe `normalized` keyword is deprecated and will be removed\n" + "in the future. To silence this warning, remove `normalized`\n" + "when calling `s_metric`.\n\nThe value of `normalized` is ignored.", + DeprecationWarning, + stacklevel=2, + ) + else: + raise TypeError(f"s_metric got an unexpected keyword argument '{kwargs.popitem()[0]}'") G = to_graph(G) return algorithms.s_metric(G) diff --git a/graphblas_algorithms/nxapi/tournament.py b/graphblas_algorithms/nxapi/tournament.py index d951ade..6c1bb1f 100644 --- a/graphblas_algorithms/nxapi/tournament.py +++ b/graphblas_algorithms/nxapi/tournament.py @@ -1,5 +1,3 @@ -from graphblas import io - from graphblas_algorithms import algorithms from graphblas_algorithms.classes.digraph import to_directed_graph from graphblas_algorithms.utils import not_implemented_for @@ -28,6 +26,5 @@ def score_sequence(G): @not_implemented_for("multigraph") def tournament_matrix(G): G = to_directed_graph(G) - T = algorithms.tournament_matrix(G) # TODO: can we return a different, more native object? - return io.to_scipy_sparse(T) + return algorithms.tournament_matrix(G) diff --git a/graphblas_algorithms/nxapi/traversal/__init__.py b/graphblas_algorithms/nxapi/traversal/__init__.py new file mode 100644 index 0000000..7811162 --- /dev/null +++ b/graphblas_algorithms/nxapi/traversal/__init__.py @@ -0,0 +1 @@ +from .breadth_first_search import * diff --git a/graphblas_algorithms/nxapi/traversal/breadth_first_search.py b/graphblas_algorithms/nxapi/traversal/breadth_first_search.py new file mode 100644 index 0000000..0b2c6a7 --- /dev/null +++ b/graphblas_algorithms/nxapi/traversal/breadth_first_search.py @@ -0,0 +1,27 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph + +from ..exception import NetworkXError + +__all__ = [ + "bfs_layers", + "descendants_at_distance", +] + + +def bfs_layers(G, sources): + G = to_graph(G) + try: + for layer in algorithms.bfs_layers(G, sources): + yield G.vector_to_list(layer) + except KeyError as e: + raise NetworkXError(*e.args) from e + + +def descendants_at_distance(G, source, distance): + G = to_graph(G) + try: + v = algorithms.descendants_at_distance(G, source, distance) + except KeyError as e: + raise NetworkXError(*e.args) from e + return G.vector_to_nodeset(v) diff --git a/graphblas_algorithms/tests/test_core.py b/graphblas_algorithms/tests/test_core.py index 7718ef6..68dbeb7 100644 --- a/graphblas_algorithms/tests/test_core.py +++ b/graphblas_algorithms/tests/test_core.py @@ -27,10 +27,13 @@ def test_packages(): path = pathlib.Path(ga.__file__).parent pkgs = [f"graphblas_algorithms.{x}" for x in setuptools.find_packages(path)] pkgs.append("graphblas_algorithms") + pkgs.append("_nx_graphblas") pkgs.sort() pyproject = path.parent / "pyproject.toml" if not pyproject.exists(): pytest.skip("Did not find pyproject.toml") with pyproject.open("rb") as f: pkgs2 = sorted(tomli.load(f)["tool"]["setuptools"]["packages"]) - assert pkgs == pkgs2 + assert ( + pkgs == pkgs2 + ), "If there are extra items on the left, add them to pyproject.toml:tool.setuptools.packages" diff --git a/graphblas_algorithms/tests/test_match_nx.py b/graphblas_algorithms/tests/test_match_nx.py index 6c42d54..1924ff7 100644 --- a/graphblas_algorithms/tests/test_match_nx.py +++ b/graphblas_algorithms/tests/test_match_nx.py @@ -11,6 +11,7 @@ """ import sys from collections import namedtuple +from pathlib import Path import pytest @@ -21,13 +22,29 @@ "Matching networkx namespace requires networkx to be installed", allow_module_level=True ) else: - from networkx.classes import backends # noqa: F401 + try: + from networkx.utils import backends + + IS_NX_30_OR_31 = False + except ImportError: # pragma: no cover (import) + # This is the location in nx 3.1 + from networkx.classes import backends # noqa: F401 + + IS_NX_30_OR_31 = True def isdispatched(func): """Can this NetworkX function dispatch to other backends?""" + if IS_NX_30_OR_31: + return ( + callable(func) + and hasattr(func, "dispatchname") + and func.__module__.startswith("networkx") + ) return ( - callable(func) and hasattr(func, "dispatchname") and func.__module__.startswith("networkx") + callable(func) + and hasattr(func, "preserve_edge_attrs") + and func.__module__.startswith("networkx") ) @@ -36,7 +53,9 @@ def dispatchname(func): # Haha, there should be a better way to get this if not isdispatched(func): raise ValueError(f"Function is not dispatched in NetworkX: {func.__name__}") - return func.dispatchname + if IS_NX_30_OR_31: + return func.dispatchname + return func.name def fullname(func): @@ -130,12 +149,20 @@ def nx_to_gb_info(info): ) +def module_exists(info): + return info[2].rsplit(".", 1)[0] in sys.modules + + @pytest.mark.checkstructure def test_dispatched_funcs_in_nxapi(nx_names_to_info, gb_names_to_info): """Are graphblas_algorithms functions in the correct locations in nxapi?""" failing = False for name in nx_names_to_info.keys() & gb_names_to_info.keys(): - nx_paths = {nx_to_gb_info(info) for info in nx_names_to_info[name]} + nx_paths = { + gbinfo + for info in nx_names_to_info[name] + if module_exists(gbinfo := nx_to_gb_info(info)) + } gb_paths = gb_names_to_info[name] if nx_paths != gb_paths: # pragma: no cover failing = True @@ -151,3 +178,60 @@ def test_dispatched_funcs_in_nxapi(nx_names_to_info, gb_names_to_info): print(" ", ":".join(path.rsplit(".", 1))) if failing: # pragma: no cover raise AssertionError + + +def get_fullname(info): + fullname = info.fullname + if not fullname.endswith(f".{info.dispatchname}"): + fullname += f" ({info.dispatchname})" + return fullname + + +def test_print_dispatched_not_implemented(nx_names_to_info, gb_names_to_info): + """It may be informative to see the results from this to identify functions to implement. + + $ pytest -s -k test_print_dispatched_not_implemented + """ + not_implemented = nx_names_to_info.keys() - gb_names_to_info.keys() + fullnames = {get_fullname(next(iter(nx_names_to_info[name]))) for name in not_implemented} + print() + print("=================================================================================") + print("Functions dispatched in NetworkX that ARE NOT implemented in graphblas-algorithms") + print("---------------------------------------------------------------------------------") + for i, name in enumerate(sorted(fullnames)): + print(i, name) + print("=================================================================================") + + +def test_print_dispatched_implemented(nx_names_to_info, gb_names_to_info): + """It may be informative to see the results from this to identify implemented functions. + + $ pytest -s -k test_print_dispatched_implemented + """ + implemented = nx_names_to_info.keys() & gb_names_to_info.keys() + fullnames = {get_fullname(next(iter(nx_names_to_info[name]))) for name in implemented} + print() + print("=============================================================================") + print("Functions dispatched in NetworkX that ARE implemented in graphblas-algorithms") + print("-----------------------------------------------------------------------------") + for i, name in enumerate(sorted(fullnames)): + print(i, name) + print("=============================================================================") + + +def test_algorithms_in_readme(nx_names_to_info, gb_names_to_info): + """Ensure all algorithms are mentioned in README.md.""" + implemented = nx_names_to_info.keys() & gb_names_to_info.keys() + path = Path(__file__).parent.parent.parent / "README.md" + if not path.exists(): + return + with path.open("r") as f: + text = f.read() + missing = set() + for name in sorted(implemented): + if name not in text: + missing.add(name) + if missing: + msg = f"Algorithms missing in README.md: {', '.join(sorted(missing))}" + print(msg) + raise AssertionError(msg) diff --git a/pyproject.toml b/pyproject.toml index f1e4472..b1625c6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,11 +10,12 @@ name = "graphblas-algorithms" dynamic = ["version"] description = "Graph algorithms written in GraphBLAS and backend for NetworkX" readme = "README.md" -requires-python = ">=3.8" +requires-python = ">=3.10" license = {file = "LICENSE"} authors = [ - {name = "Erik Welch"}, + {name = "Erik Welch", email = "erik.n.welch@gmail.com"}, {name = "Jim Kitchen"}, + {name = "Graphblas-algorithms contributors"}, ] maintainers = [ {name = "Erik Welch", email = "erik.n.welch@gmail.com"}, @@ -35,16 +36,16 @@ keywords = [ "math", ] classifiers = [ - "Development Status :: 3 - Alpha", + "Development Status :: 4 - Beta", "License :: OSI Approved :: Apache Software License", "Operating System :: MacOS :: MacOS X", "Operating System :: POSIX :: Linux", "Operating System :: Microsoft :: Windows", "Programming Language :: Python", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3 :: Only", "Intended Audience :: Developers", "Intended Audience :: Other Audience", @@ -63,6 +64,12 @@ dependencies = [ [project.entry-points."networkx.plugins"] graphblas = "graphblas_algorithms.interface:Dispatcher" +[project.entry-points."networkx.backends"] +graphblas = "graphblas_algorithms.interface:Dispatcher" + +[project.entry-points."networkx.backend_info"] +graphblas = "_nx_graphblas:get_info" + [project.urls] homepage = "https://github.com/python-graphblas/graphblas-algorithms" # documentation = "https://graphblas-algorithms.readthedocs.io" @@ -73,38 +80,48 @@ changelog = "https://github.com/python-graphblas/graphblas-algorithms/releases" test = [ "pytest", "networkx >=3.0", - "scipy >=1.8", + "scipy >=1.9", "setuptools", "tomli", ] -complete = [ - "pytest", - "networkx >=3.0", - "scipy >=1.8", - "setuptools", - "tomli", +all = [ + "graphblas-algorithms[test]", ] [tool.setuptools] # Let's be explicit (we test this too) +# TODO: it would be nice if setuptools (or our build backend) could handle this automatically and reliably. # $ python -c 'from setuptools import find_packages ; [print(x) for x in sorted(find_packages())]' # $ find graphblas_algorithms/ -name __init__.py -print | sort | sed -e 's/\/__init__.py//g' -e 's/\//./g' # $ python -c 'import tomli ; [print(x) for x in sorted(tomli.load(open("pyproject.toml", "rb"))["tool"]["setuptools"]["packages"])]' packages = [ + "_nx_graphblas", "graphblas_algorithms", "graphblas_algorithms.algorithms", "graphblas_algorithms.algorithms.centrality", "graphblas_algorithms.algorithms.community", + "graphblas_algorithms.algorithms.components", + "graphblas_algorithms.algorithms.isomorphism", "graphblas_algorithms.algorithms.link_analysis", + "graphblas_algorithms.algorithms.operators", "graphblas_algorithms.algorithms.shortest_paths", "graphblas_algorithms.algorithms.tests", + "graphblas_algorithms.algorithms.traversal", "graphblas_algorithms.classes", + "graphblas_algorithms.generators", + "graphblas_algorithms.linalg", "graphblas_algorithms.nxapi", "graphblas_algorithms.nxapi.centrality", "graphblas_algorithms.nxapi.community", + "graphblas_algorithms.nxapi.components", + "graphblas_algorithms.nxapi.generators", + "graphblas_algorithms.nxapi.isomorphism", + "graphblas_algorithms.nxapi.linalg", "graphblas_algorithms.nxapi.link_analysis", + "graphblas_algorithms.nxapi.operators", "graphblas_algorithms.nxapi.shortest_paths", "graphblas_algorithms.nxapi.tests", + "graphblas_algorithms.nxapi.traversal", "graphblas_algorithms.tests", "graphblas_algorithms.utils", ] @@ -116,7 +133,7 @@ dirty_template = "{tag}+{ccount}.g{sha}.dirty" [tool.black] line-length = 100 -target-version = ["py38", "py39", "py310", "py311"] +target-version = ["py310", "py311", "py312"] [tool.isort] sections = ["FUTURE", "STDLIB", "THIRDPARTY", "FIRSTPARTY", "LOCALFOLDER"] @@ -132,6 +149,7 @@ skip = [ ] [tool.pytest.ini_options] +minversion = "6.0" testpaths = "graphblas_algorithms" xfail_strict = false markers = [ @@ -158,7 +176,10 @@ exclude_lines = [ [tool.ruff] # https://github.com/charliermarsh/ruff/ line-length = 100 -target-version = "py38" +target-version = "py310" +unfixable = [ + "F841" # unused-variable (Note: can leave useless expression) +] select = [ "ALL", ] @@ -183,6 +204,7 @@ ignore = [ # "SIM401", # Use dict.get ... instead of if-else-block (Note: if-else better for coverage and sometimes clearer) # "TRY004", # Prefer `TypeError` exception for invalid type (Note: good advice, but not worth the nuisance) # "TRY200", # Use `raise from` to specify exception cause (Note: sometimes okay to raise original exception) + "UP038", # Use `X | Y` in `isinstance` call instead of `(X, Y)` (Note: using `|` seems to be slower) # Intentionally ignored "COM812", # Trailing comma missing @@ -203,12 +225,14 @@ ignore = [ "RET502", # Do not implicitly `return None` in function able to return non-`None` value "RET503", # Missing explicit `return` at the end of function able to return non-`None` value "RET504", # Unnecessary variable assignment before `return` statement + "RUF012", # Mutable class attributes should be annotated with `typing.ClassVar` (Note: no annotations yet) "S110", # `try`-`except`-`pass` detected, consider logging the exception (Note: good advice, but we don't log) "S112", # `try`-`except`-`continue` detected, consider logging the exception (Note: good advice, but we don't log) "SIM102", # Use a single `if` statement instead of nested `if` statements (Note: often necessary) "SIM105", # Use contextlib.suppress(...) instead of try-except-pass (Note: try-except-pass is much faster) "SIM108", # Use ternary operator ... instead of if-else-block (Note: if-else better for coverage and sometimes clearer) "TRY003", # Avoid specifying long messages outside the exception class (Note: why?) + "FIX001", "FIX002", "FIX003", "FIX004", # flake8-fixme (like flake8-todos) # Ignored categories "C90", # mccabe (Too strict, but maybe we should make things less complex) @@ -224,6 +248,7 @@ ignore = [ "TID", # flake8-tidy-imports (Rely on isort and our own judgement) "TCH", # flake8-type-checking (Note: figure out type checking later) "ARG", # flake8-unused-arguments (Sometimes helpful, but too strict) + "TD", # flake8-todos (Maybe okay to add some of these) "ERA", # eradicate (We like code in comments!) "PD", # pandas-vet (Intended for scripts that use pandas, not libraries) ] @@ -231,6 +256,7 @@ ignore = [ [tool.ruff.per-file-ignores] "__init__.py" = ["F401"] # Allow unused imports (w/o defining `__all__`) "graphblas_algorithms/**/tests/*py" = ["S101", "T201", "D103", "D100"] # Allow assert, print, and no docstring +"graphblas_algorithms/interface.py" = ["PIE794"] # Allow us to use `mod = nxapi.` repeatedly "graphblas_algorithms/nxapi/exception.py" = ["F401"] # Allow unused imports (w/o defining `__all__`) "scripts/*.py" = ["INP001", "S101", "T201"] # Not a package, allow assert, allow print diff --git a/run_nx_tests.sh b/run_nx_tests.sh index 08a5582..740ab26 100755 --- a/run_nx_tests.sh +++ b/run_nx_tests.sh @@ -1,3 +1,6 @@ #!/bin/bash -NETWORKX_GRAPH_CONVERT=graphblas pytest --pyargs networkx "$@" -# NETWORKX_GRAPH_CONVERT=graphblas pytest --pyargs networkx --cov --cov-report term-missing "$@" +NETWORKX_GRAPH_CONVERT=graphblas \ +NETWORKX_TEST_BACKEND=graphblas \ +NETWORKX_FALLBACK_TO_NX=True \ + pytest --pyargs networkx "$@" +# pytest --pyargs networkx --cov --cov-report term-missing "$@" diff --git a/scripts/bench.py b/scripts/bench.py index ba61300..3b3f4dc 100755 --- a/scripts/bench.py +++ b/scripts/bench.py @@ -19,7 +19,7 @@ datapaths = [ Path(__file__).parent / ".." / "data", - Path("."), + Path(), ] @@ -37,7 +37,7 @@ def find_data(dataname): if dataname not in download_data.data_urls: raise FileNotFoundError(f"Unable to find data file for {dataname}") curpath = Path(download_data.main([dataname])[0]) - return curpath.resolve().relative_to(Path(".").resolve()) + return curpath.resolve().relative_to(Path().resolve()) def get_symmetry(file_or_mminfo): diff --git a/scripts/download_data.py b/scripts/download_data.py index 009ebf0..b01626c 100755 --- a/scripts/download_data.py +++ b/scripts/download_data.py @@ -47,7 +47,7 @@ def main(datanames, overwrite=False): for name in datanames: target = datapath / f"{name}.mtx" filenames.append(target) - relpath = target.resolve().relative_to(Path(".").resolve()) + relpath = target.resolve().relative_to(Path().resolve()) if not overwrite and target.exists(): print(f"{relpath} already exists; skipping", file=sys.stderr) continue diff --git a/scripts/maketree.py b/scripts/maketree.py new file mode 100755 index 0000000..e4deed5 --- /dev/null +++ b/scripts/maketree.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python +"""Run this script to auto-generate API when adding or removing nxapi functions. + +This updates API tree in README.md and dispatch functions in `graphblas_algorithms/interface.py`. + +""" +from io import StringIO +from pathlib import Path + +import rich +from graphblas.core.utils import _autogenerate_code +from rich.tree import Tree + +from graphblas_algorithms.tests import test_match_nx +from graphblas_algorithms.tests.test_match_nx import get_fullname + + +def get_fixture(attr): + return getattr(test_match_nx, attr).__wrapped__ + + +def trim(name): + for prefix in ["networkx.algorithms.", "networkx."]: + if name.startswith(prefix): + return name[len(prefix) :] + raise ValueError(f"{name!r} does not begin with a recognized prefix") + + +def get_names(): + nx_names_to_info = get_fixture("nx_names_to_info")(get_fixture("nx_info")()) + gb_names_to_info = get_fixture("gb_names_to_info")(get_fixture("gb_info")()) + implemented = nx_names_to_info.keys() & gb_names_to_info.keys() + return sorted(trim(get_fullname(next(iter(nx_names_to_info[name])))) for name in implemented) + + +# Dispatched functions that are only available from `nxapi` +SHORTPATH = { + "overall_reciprocity", + "reciprocity", +} + + +def main(print_to_console=True, update_readme=True, update_interface=True): + fullnames = get_names() + # Step 1: add to README.md + tree = Tree("graphblas_algorithms.nxapi") + subtrees = {} + + def addtree(path): + if path in subtrees: + rv = subtrees[path] + elif "." not in path: + rv = subtrees[path] = tree.add(path) + else: + subpath, last = path.rsplit(".", 1) + subtree = addtree(subpath) + rv = subtrees[path] = subtree.add(last) + return rv + + for fullname in fullnames: + addtree(fullname) + if print_to_console: + rich.print(tree) + if update_readme: + s = StringIO() + rich.print(tree, file=s) + s.seek(0) + text = s.read() + _autogenerate_code( + Path(__file__).parent.parent / "README.md", + f"\n```\n{text}```\n\n", + begin="[//]: # (Begin auto-generated code)", + end="[//]: # (End auto-generated code)", + callblack=False, + ) + # Step 2: add to interface.py + lines = [] + prev_mod = None + for fullname in fullnames: + mod, subpath = fullname.split(".", 1) + if mod != prev_mod: + if prev_mod is not None: + lines.append("") + prev_mod = mod + lines.append(f" mod = nxapi.{mod}") + lines.append(" # " + "=" * (len(mod) + 10)) + if " (" in subpath: + subpath, name = subpath.rsplit(" (", 1) + name = name.split(")")[0] + else: + name = subpath.rsplit(".", 1)[-1] + if name in SHORTPATH: + subpath = subpath.rsplit(".", 1)[-1] + lines.append(f" {name} = nxapi.{subpath}") + else: + lines.append(f" {name} = mod.{subpath}") + lines.append("") + lines.append(" del mod") + lines.append("") + text = "\n".join(lines) + if update_interface: + _autogenerate_code( + Path(__file__).parent.parent / "graphblas_algorithms" / "interface.py", + text, + specializer="dispatch", + ) + return tree + + +if __name__ == "__main__": + main() diff --git a/scripts/scipy_impl.py b/scripts/scipy_impl.py index 277cece..35815a6 100644 --- a/scripts/scipy_impl.py +++ b/scripts/scipy_impl.py @@ -43,14 +43,14 @@ def pagerank( is_dangling = np.where(S == 0)[0] # power iteration: make up to max_iter iterations - for _ in range(max_iter): + for _i in range(max_iter): xlast = x x = alpha * (x @ A + sum(x[is_dangling]) * dangling_weights) + (1 - alpha) * p # check convergence, l1 norm err = np.absolute(x - xlast).sum() if err < N * tol: return x - # return dict(zip(nodelist, map(float, x))) + # return dict(zip(nodelist, map(float, x), strict=True)) raise nx.PowerIterationFailedConvergence(max_iter)