diff --git a/.codecov.yml b/.codecov.yml new file mode 100644 index 0000000..4fd4800 --- /dev/null +++ b/.codecov.yml @@ -0,0 +1,14 @@ +coverage: + status: + project: + default: + informational: true + patch: + default: + informational: true + changes: false +comment: + layout: "header, diff" + behavior: default +github_checks: + annotations: false diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..32a72cc --- /dev/null +++ b/.flake8 @@ -0,0 +1,15 @@ +[flake8] +max-line-length = 100 +inline-quotes = " +exclude = + graphblas_algorithms/*/tests/, + graphblas_algorithms/*/*/tests/, + build/ +extend-ignore = + E203, + SIM105, + SIM401, +# E203 whitespace before ':' (to be compatible with black) +per-file-ignores = + __init__.py:F401,F403, # allow unused and star imports + graphblas_algorithms/nxapi/exception.py:F401, diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index 2add182..0000000 --- a/.gitattributes +++ /dev/null @@ -1 +0,0 @@ -graphblas_algorithms/_version.py export-subst diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..b18fd29 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,6 @@ +version: 2 +updates: + - package-ecosystem: 'github-actions' + directory: '/' + schedule: + interval: 'weekly' diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..97bb856 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,23 @@ +# Rely on pre-commit.ci instead +name: Lint via pre-commit + +on: + workflow_dispatch: + # pull_request: + # push: + # branches-ignore: + # - main + +permissions: + contents: read + +jobs: + pre-commit: + name: pre-commit-hooks + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + - uses: pre-commit/action@v3.0.0 diff --git a/.github/workflows/publish_pypi.yml b/.github/workflows/publish_pypi.yml new file mode 100644 index 0000000..f848ad6 --- /dev/null +++ b/.github/workflows/publish_pypi.yml @@ -0,0 +1,41 @@ +name: Publish to PyPI + +on: + push: + tags: + - '20*' + +jobs: + build_and_deploy: + runs-on: ubuntu-latest + if: github.repository == 'python-graphblas/graphblas-algorithms' + defaults: + run: + shell: bash -l {0} + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + - name: Install build dependencies + run: | + python -m pip install --upgrade pip + python -m pip install build twine + - name: Build wheel and sdist + run: python -m build --sdist --wheel + - uses: actions/upload-artifact@v4 + with: + name: releases + path: dist + if-no-files-found: error + - name: Check with twine + run: python -m twine check --strict dist/* + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@v1.8.11 + with: + user: __token__ + password: ${{ secrets.PYPI_TOKEN }} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7767834..47ca7bc 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -12,60 +12,56 @@ jobs: run: shell: bash -l {0} strategy: - fail-fast: false + fail-fast: true matrix: os: ["ubuntu-latest", "macos-latest", "windows-latest"] - python-version: ["3.8", "3.9", "3.10"] + python-version: ["3.10", "3.11", "3.12"] steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 with: fetch-depth: 0 - - name: Conda - uses: conda-incubator/setup-miniconda@v2 + - name: Setup mamba + uses: conda-incubator/setup-miniconda@v3 + id: setup_mamba + continue-on-error: true + with: + miniforge-variant: Mambaforge + miniforge-version: latest + use-mamba: true + python-version: ${{ matrix.python-version }} + channels: conda-forge,${{ contains(matrix.python-version, 'pypy') && 'defaults' || 'nodefaults' }} + channel-priority: ${{ contains(matrix.python-version, 'pypy') && 'flexible' || 'strict' }} + activate-environment: graphblas + auto-activate-base: false + - name: Setup conda + uses: conda-incubator/setup-miniconda@v3 + id: setup_conda + if: steps.setup_mamba.outcome == 'failure' + continue-on-error: false with: auto-update-conda: true python-version: ${{ matrix.python-version }} - channels: conda-forge - activate-environment: testing + channels: conda-forge,${{ contains(matrix.python-version, 'pypy') && 'defaults' || 'nodefaults' }} + channel-priority: ${{ contains(matrix.python-version, 'pypy') && 'flexible' || 'strict' }} + activate-environment: graphblas + auto-activate-base: false - name: Install dependencies run: | - conda install -c conda-forge python-graphblas networkx scipy pytest coverage black flake8 coveralls - pip install -e . - - name: Style checks - run: | - flake8 - black . --check --diff + $(command -v mamba || command -v conda) install python-suitesparse-graphblas scipy pandas donfig pyyaml numpy python-graphblas \ + pytest-cov pytest-randomly pytest-mpl networkx + # matplotlib lxml pygraphviz pydot sympy # Extra networkx deps we don't need yet + # Sometimes we prefer to use the latest release of NetworkX or the latest development from github + # pip install git+https://github.com/networkx/networkx.git@main --no-deps + pip install -e . --no-deps - name: PyTest run: | - coverage run --branch -m pytest + python -c 'import sys, graphblas_algorithms; assert "networkx" not in sys.modules' + coverage run --branch -m pytest --color=yes -v --check-structure + coverage report + # NETWORKX_GRAPH_CONVERT=graphblas pytest --color=yes --pyargs networkx --cov --cov-append + ./run_nx_tests.sh --color=yes --cov --cov-append + coverage report + coverage xml - name: Coverage - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - COVERALLS_FLAG_NAME: ${{ matrix.python-version}}/${{ matrix.os }} - COVERALLS_PARALLEL: true - run: | - coverage report --show-missing - # coveralls --service=github # Broken :( - -# finish: -# needs: test -# if: always() -# runs-on: ubuntu-latest -# defaults: -# run: -# shell: bash -l {0} -# steps: -# - name: Create env -# uses: conda-incubator/setup-miniconda@v2 -# with: -# auto-update-conda: true -# python-version: "3.10" -# activate-environment: finishing -# - name: Update env -# run: conda install -c conda-forge coveralls -# - name: Coveralls finished -# env: -# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} -# run: | -# coveralls --finish + uses: codecov/codecov-action@v3 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fa4b778..e4525c5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,55 +2,104 @@ # # Before first use: `pre-commit install` # To run: `pre-commit run --all-files` +# To update: `pre-commit autoupdate` +# - &flake8_dependencies below needs updated manually +ci: + # See: https://pre-commit.ci/#configuration + autofix_prs: false + autoupdate_schedule: quarterly + autoupdate_commit_msg: "chore: update pre-commit hooks" + autofix_commit_msg: "style: pre-commit fixes" + skip: [no-commit-to-branch] +fail_fast: true +default_language_version: + python: python3 repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.2.0 + rev: v4.5.0 hooks: - id: check-added-large-files + - id: check-case-conflict + - id: check-merge-conflict + # - id: check-symlinks + - id: check-ast + - id: check-toml - id: check-yaml - id: debug-statements - id: end-of-file-fixer + exclude_types: [svg] - id: mixed-line-ending - id: trailing-whitespace - - repo: https://github.com/myint/autoflake - rev: v1.4 + - id: name-tests-test + args: ["--pytest-test-first"] + - repo: https://github.com/abravalheri/validate-pyproject + rev: v0.15 + hooks: + - id: validate-pyproject + name: Validate pyproject.toml + # I don't yet trust ruff to do what autoflake does + - repo: https://github.com/PyCQA/autoflake + rev: v2.2.1 hooks: - id: autoflake args: [--in-place] - repo: https://github.com/pycqa/isort - rev: 5.10.1 + rev: 5.13.2 hooks: - id: isort - language_version: python3 - repo: https://github.com/asottile/pyupgrade - rev: v2.32.0 + rev: v3.15.0 hooks: - id: pyupgrade - args: [--py38-plus] + args: [--py310-plus] + - repo: https://github.com/MarcoGorelli/auto-walrus + rev: v0.2.2 + hooks: + - id: auto-walrus + args: [--line-length, "100"] - repo: https://github.com/psf/black - rev: 22.3.0 + rev: 23.12.1 hooks: - id: black - language_version: python3 - args: [--target-version=py38] + # - id: black-jupyter + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.1.9 + hooks: + - id: ruff + args: [--fix-only, --show-fixes] - repo: https://github.com/PyCQA/flake8 - rev: 4.0.1 + rev: 6.1.0 hooks: - id: flake8 additional_dependencies: &flake8_dependencies - - flake8==4.0.1 - - flake8-comprehensions==3.8.0 - - flake8-bugbear==22.3.23 + # These versions need updated manually + - flake8==6.1.0 + - flake8-bugbear==23.12.2 + - flake8-simplify==0.21.0 - repo: https://github.com/asottile/yesqa - rev: v1.3.0 + rev: v1.5.0 hooks: - id: yesqa additional_dependencies: *flake8_dependencies - repo: https://github.com/codespell-project/codespell - rev: v2.1.0 + rev: v2.2.6 hooks: - id: codespell types_or: [python, rst, markdown] + additional_dependencies: [tomli] files: ^(graphblas_algorithms|docs)/ - # args: ["--ignore-words-list=coo,ba"] -# Maybe: black-jupyter, blacken-docs, blackdoc mypy, velin + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.1.9 + hooks: + - id: ruff + # `pyroma` may help keep our package standards up to date if best practices change. + # This is probably a "low value" check though and safe to remove if we want faster pre-commit. + - repo: https://github.com/regebro/pyroma + rev: "4.2" + hooks: + - id: pyroma + args: [-n, "10", .] + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: no-commit-to-branch # no commit directly to main diff --git a/MANIFEST.in b/MANIFEST.in index 58956ba..c69947d 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,9 +1,6 @@ recursive-include graphblas_algorithms *.py +prune docs include setup.py -include setup.cfg include README.md include LICENSE include MANIFEST.in -include versioneer.py -include requirements.txt -include graphblas_algorithms/_version.py diff --git a/README.md b/README.md index aa64bbc..ed66df3 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,249 @@ -# **GraphBLAS Algorithms** - +![GraphBLAS Algorithms](https://raw.githubusercontent.com/python-graphblas/graphblas-algorithms/main/docs/_static/img/logo-name-medium.svg) +
+[![conda-forge](https://img.shields.io/conda/vn/conda-forge/graphblas-algorithms.svg)](https://anaconda.org/conda-forge/graphblas-algorithms) [![pypi](https://img.shields.io/pypi/v/graphblas-algorithms.svg)](https://pypi.python.org/pypi/graphblas-algorithms/) +[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/graphblas-algorithms)](https://pypi.python.org/pypi/graphblas-algorithms/) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/python-graphblas/graphblas-algorithms/blob/main/LICENSE) +
[![Tests](https://github.com/python-graphblas/graphblas-algorithms/workflows/Tests/badge.svg?branch=main)](https://github.com/python-graphblas/graphblas-algorithms/actions) - - +[![Coverage](https://codecov.io/gh/python-graphblas/graphblas-algorithms/branch/main/graph/badge.svg)](https://codecov.io/gh/python-graphblas/graphblas-algorithms) +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.7329185.svg)](https://doi.org/10.5281/zenodo.7329185) +[![Discord](https://img.shields.io/badge/Chat-Discord-blue)](https://discord.com/invite/vur45CbwMz) -GraphBLAS algorithms written in Python with [Python-graphblas](https://github.com/python-graphblas/python-graphblas). We are trying to target the NetworkX API algorithms where possible. +`graphblas-algorithms` is a collection of GraphBLAS algorithms written using +[`python-graphblas`](https://python-graphblas.readthedocs.io/en/latest/). +It may be used directly or as an experimental +[backend to NetworkX](https://networkx.org/documentation/stable/reference/classes/index.html#backends). + +Why use GraphBLAS Algorithms? Because it is *fast*, *flexible*, and *familiar* by using the NetworkX API. + +Are we missing any [algorithms](#Plugin-Algorithms) that you want? +[Please let us know!](https://github.com/python-graphblas/graphblas-algorithms/issues) +
+GraphBLAS vs NetworkX +
+GraphBLAS vs igraph ### Installation ``` +conda install -c conda-forge graphblas-algorithms +``` +``` pip install graphblas-algorithms ``` -This is a work in progress. Stay tuned (or come help 😃)! +## Basic Usage + +First, create a GraphBLAS Matrix. + +```python +import graphblas as gb + +M = gb.Matrix.from_coo( + [0, 0, 1, 2, 2, 3], + [1, 3, 0, 0, 1, 2], + [1., 2., 3., 4., 5., 6.], + nrows=4, ncols=4, dtype='float32' +) +``` + +Next wrap the Matrix as `ga.Graph`. + +```python +import graphblas_algorithms as ga + +G = ga.Graph(M) +``` + +Finally call an algorithm. + +```python +hubs, authorities = ga.hits(G) +``` + +When the result is a value per node, a `gb.Vector` will be returned. +In the case of [HITS](https://en.wikipedia.org/wiki/HITS_algorithm), +two Vectors are returned representing the hubs and authorities values. + +Algorithms whose result is a subgraph will return `ga.Graph`. + +## Plugin for NetworkX + +Dispatching to plugins is a new feature in Networkx 3.0. +When both `networkx` and `graphblas-algorithms` are installed in an +environment, calls to NetworkX algorithms can be dispatched to the +equivalent version in `graphblas-algorithms`. + +### Dispatch Example +```python +import networkx as nx +import graphblas_algorithms as ga + +# Generate a random graph (5000 nodes, 1_000_000 edges) +G = nx.erdos_renyi_graph(5000, 0.08) + +# Explicitly convert to ga.Graph +G2 = ga.Graph.from_networkx(G) + +# Pass G2 to NetworkX's k_truss +T5 = nx.k_truss(G2, 5) +``` + +`G2` is not a `nx.Graph`, but it does have an attribute +`__networkx_plugin__ = "graphblas"`. This tells NetworkX to +dispatch the k_truss call to graphblas-algorithms. This link +connection exists because graphblas-algorithms registers +itself as a "networkx.plugin" entry point. + +The result `T5` is a `ga.Graph` representing the 5-truss structure of the +original graph. To convert to a NetworkX Graph, use: +```python +T5.to_networkx() +``` + +Note that even with the conversions to and from `ga.Graph`, this example still runs 10x +faster than using the native NetworkX k-truss implementation. Speed improvements scale +with graph size, so larger graphs will see an even larger speed-up relative to NetworkX. + +### Plugin Algorithms + +The following NetworkX algorithms have been implemented +by graphblas-algorithms and can be used following the +dispatch pattern shown above. + +[//]: # (Begin auto-generated code) + +``` +graphblas_algorithms.nxapi +├── boundary +│ ├── edge_boundary +│ └── node_boundary +├── centrality +│ ├── degree_alg +│ │ ├── degree_centrality +│ │ ├── in_degree_centrality +│ │ └── out_degree_centrality +│ ├── eigenvector +│ │ └── eigenvector_centrality +│ └── katz +│ └── katz_centrality +├── cluster +│ ├── average_clustering +│ ├── clustering +│ ├── generalized_degree +│ ├── square_clustering +│ ├── transitivity +│ └── triangles +├── community +│ └── quality +│ ├── inter_community_edges +│ └── intra_community_edges +├── components +│ ├── connected +│ │ ├── is_connected +│ │ └── node_connected_component +│ └── weakly_connected +│ └── is_weakly_connected +├── core +│ └── k_truss +├── cuts +│ ├── boundary_expansion +│ ├── conductance +│ ├── cut_size +│ ├── edge_expansion +│ ├── mixing_expansion +│ ├── node_expansion +│ ├── normalized_cut_size +│ └── volume +├── dag +│ ├── ancestors +│ └── descendants +├── dominating +│ └── is_dominating_set +├── efficiency_measures +│ └── efficiency +├── generators +│ └── ego +│ └── ego_graph +├── isolate +│ ├── is_isolate +│ ├── isolates +│ └── number_of_isolates +├── isomorphism +│ └── isomorph +│ ├── fast_could_be_isomorphic +│ └── faster_could_be_isomorphic +├── linalg +│ ├── bethehessianmatrix +│ │ └── bethe_hessian_matrix +│ ├── graphmatrix +│ │ └── adjacency_matrix +│ ├── laplacianmatrix +│ │ ├── laplacian_matrix +│ │ └── normalized_laplacian_matrix +│ └── modularitymatrix +│ ├── directed_modularity_matrix +│ └── modularity_matrix +├── link_analysis +│ ├── hits_alg +│ │ └── hits +│ └── pagerank_alg +│ ├── google_matrix +│ └── pagerank +├── lowest_common_ancestors +│ └── lowest_common_ancestor +├── operators +│ ├── binary +│ │ ├── compose +│ │ ├── difference +│ │ ├── disjoint_union +│ │ ├── full_join +│ │ ├── intersection +│ │ ├── symmetric_difference +│ │ └── union +│ └── unary +│ ├── complement +│ └── reverse +├── reciprocity +│ ├── overall_reciprocity +│ └── reciprocity +├── regular +│ ├── is_k_regular +│ └── is_regular +├── shortest_paths +│ ├── dense +│ │ ├── floyd_warshall +│ │ ├── floyd_warshall_numpy +│ │ └── floyd_warshall_predecessor_and_distance +│ ├── generic +│ │ └── has_path +│ ├── unweighted +│ │ ├── all_pairs_shortest_path_length +│ │ ├── single_source_shortest_path_length +│ │ └── single_target_shortest_path_length +│ └── weighted +│ ├── all_pairs_bellman_ford_path_length +│ ├── bellman_ford_path +│ ├── bellman_ford_path_length +│ ├── negative_edge_cycle +│ └── single_source_bellman_ford_path_length +├── simple_paths +│ └── is_simple_path +├── smetric +│ └── s_metric +├── structuralholes +│ └── mutual_weight +├── tournament +│ ├── is_tournament +│ ├── score_sequence +│ └── tournament_matrix +├── traversal +│ └── breadth_first_search +│ ├── bfs_layers +│ └── descendants_at_distance +└── triads + └── is_triad +``` + +[//]: # (End auto-generated code) diff --git a/_nx_graphblas/__init__.py b/_nx_graphblas/__init__.py new file mode 100644 index 0000000..6ffa061 --- /dev/null +++ b/_nx_graphblas/__init__.py @@ -0,0 +1,107 @@ +def get_info(): + return { + "backend_name": "graphblas", + "project": "graphblas-algorithms", + "package": "graphblas_algorithms", + "url": "https://github.com/python-graphblas/graphblas-algorithms", + "short_summary": "OpenMP-enabled sparse linear algebra backend.", + # "description": "TODO", + "functions": { + "adjacency_matrix": {}, + "all_pairs_bellman_ford_path_length": { + "extra_parameters": { + "chunksize : int or str, optional": "Split the computation into chunks; " + 'may specify size as string or number of rows. Default "10 MiB"', + }, + }, + "all_pairs_shortest_path_length": { + "extra_parameters": { + "chunksize : int or str, optional": "Split the computation into chunks; " + 'may specify size as string or number of rows. Default "10 MiB"', + }, + }, + "ancestors": {}, + "average_clustering": {}, + "bellman_ford_path": {}, + "bellman_ford_path_length": {}, + "bethe_hessian_matrix": {}, + "bfs_layers": {}, + "boundary_expansion": {}, + "clustering": {}, + "complement": {}, + "compose": {}, + "conductance": {}, + "cut_size": {}, + "degree_centrality": {}, + "descendants": {}, + "descendants_at_distance": {}, + "difference": {}, + "directed_modularity_matrix": {}, + "disjoint_union": {}, + "edge_boundary": {}, + "edge_expansion": {}, + "efficiency": {}, + "ego_graph": {}, + "eigenvector_centrality": {}, + "fast_could_be_isomorphic": {}, + "faster_could_be_isomorphic": {}, + "floyd_warshall": {}, + "floyd_warshall_numpy": {}, + "floyd_warshall_predecessor_and_distance": {}, + "full_join": {}, + "generalized_degree": {}, + "google_matrix": {}, + "has_path": {}, + "hits": {}, + "in_degree_centrality": {}, + "inter_community_edges": {}, + "intersection": {}, + "intra_community_edges": {}, + "is_connected": {}, + "is_dominating_set": {}, + "is_isolate": {}, + "is_k_regular": {}, + "isolates": {}, + "is_regular": {}, + "is_simple_path": {}, + "is_tournament": {}, + "is_triad": {}, + "is_weakly_connected": {}, + "katz_centrality": {}, + "k_truss": {}, + "laplacian_matrix": {}, + "lowest_common_ancestor": {}, + "mixing_expansion": {}, + "modularity_matrix": {}, + "mutual_weight": {}, + "negative_edge_cycle": {}, + "node_boundary": {}, + "node_connected_component": {}, + "node_expansion": {}, + "normalized_cut_size": {}, + "normalized_laplacian_matrix": {}, + "number_of_isolates": {}, + "out_degree_centrality": {}, + "overall_reciprocity": {}, + "pagerank": {}, + "reciprocity": {}, + "reverse": {}, + "score_sequence": {}, + "single_source_bellman_ford_path_length": {}, + "single_source_shortest_path_length": {}, + "single_target_shortest_path_length": {}, + "s_metric": {}, + "square_clustering": { + "extra_parameters": { + "chunksize : int or str, optional": "Split the computation into chunks; " + 'may specify size as string or number of rows. Default "256 MiB"', + }, + }, + "symmetric_difference": {}, + "tournament_matrix": {}, + "transitivity": {}, + "triangles": {}, + "union": {}, + "volume": {}, + }, + } diff --git a/conftest.py b/conftest.py new file mode 100644 index 0000000..89730c5 --- /dev/null +++ b/conftest.py @@ -0,0 +1,16 @@ +import pytest + + +def pytest_addoption(parser): + parser.addoption( + "--check-structure", + "--checkstructure", + default=None, + action="store_true", + help="Check that `graphblas_algorithms.nxapi` matches networkx structure", + ) + + +def pytest_runtest_setup(item): + if "checkstructure" in item.keywords and not item.config.getoption("--check-structure"): + pytest.skip("need --check-structure option to run") diff --git a/docs/_static/img/graphblas-vs-igraph.png b/docs/_static/img/graphblas-vs-igraph.png new file mode 100755 index 0000000..4c253d1 Binary files /dev/null and b/docs/_static/img/graphblas-vs-igraph.png differ diff --git a/docs/_static/img/graphblas-vs-networkx.png b/docs/_static/img/graphblas-vs-networkx.png new file mode 100755 index 0000000..bf9cb69 Binary files /dev/null and b/docs/_static/img/graphblas-vs-networkx.png differ diff --git a/docs/_static/img/logo-name-medium.svg b/docs/_static/img/logo-name-medium.svg new file mode 100644 index 0000000..81b7b01 --- /dev/null +++ b/docs/_static/img/logo-name-medium.svg @@ -0,0 +1 @@ + diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..06142d1 --- /dev/null +++ b/environment.yml @@ -0,0 +1,58 @@ +# To use: +# $ conda env create -f environment.yml +# $ conda activate graphblas-algorithms-dev +# +# Or use mamba instead of conda. +# +# pre-commit should be set up once after the repo is cloned (see .pre-commit-config.yaml). +# In the `graphblas-algorithms-dev` environment, run: +# $ pre-commit install +# +# At times, one may need to use a development version of networkx or python-graphblas. +# To do this, you will need to uninstall, git clone, and run setup.py develop. For example: +# +# $ conda remove --force python-graphblas +# $ git clone git@github.com:python-graphblas/python-graphblas.git +# $ cd python-graphblas +# $ python setup.py develop --no-deps +# +# $ conda remove --force networkx +# $ git clone git@github.com:networkx/networkx.git +# $ cd networkx +# $ python setup.py develop --no-deps +name: graphblas-algorithms-dev +channels: + - conda-forge + - nodefaults # Only install packages from conda-forge for faster solving +dependencies: + - python + - python-graphblas + - networkx + # python-graphblas default dependencies + - donfig + - numba + - python-suitesparse-graphblas + - pyyaml + # networkx default dependencies + - matplotlib + - pandas + - scipy + # networkx extra dependencies + - lxml + - pydot + - pygraphviz + - sympy + # For updating algorithm list in README + - rich + # For linting + - pre-commit + # For testing + - pytest-cov + # For benchmarking + - requests + # For debugging + - icecream + - ipykernel + - ipython + # For type annotations + - mypy diff --git a/graphblas_algorithms/__init__.py b/graphblas_algorithms/__init__.py index fce1f5d..e86efa9 100644 --- a/graphblas_algorithms/__init__.py +++ b/graphblas_algorithms/__init__.py @@ -1,5 +1,18 @@ -from . import _version -from .cluster import average_clustering, clustering, transitivity, triangles # noqa -from .link_analysis import pagerank # noqa +import importlib.metadata -__version__ = _version.get_versions()["version"] +from .classes import * +from .generators import * +from .linalg import * + +from .algorithms import * # isort:skip + +try: + __version__ = importlib.metadata.version("graphblas-algorithms") +except Exception as exc: # pragma: no cover (safety) + raise AttributeError( + "`graphblas_algorithms.__version__` not available. This may mean " + "graphblas-algorithms was incorrectly installed or not installed at all. " + "For local development, you may want to do an editable install via " + "`python -m pip install -e path/to/graphblas-algorithms`" + ) from exc +del importlib diff --git a/graphblas_algorithms/_utils.py b/graphblas_algorithms/_utils.py deleted file mode 100644 index a633e7c..0000000 --- a/graphblas_algorithms/_utils.py +++ /dev/null @@ -1,46 +0,0 @@ -import graphblas as gb -from graphblas import Vector, binary - - -def graph_to_adjacency(G, weight=None, dtype=None, *, name=None): - key_to_id = {k: i for i, k in enumerate(G)} - A = gb.io.from_networkx(G, nodelist=key_to_id, weight=weight, dtype=dtype, name=name) - return A, key_to_id - - -def dict_to_vector(d, key_to_id, *, size=None, dtype=None, name=None): - if d is None: - return None - if size is None: - size = len(key_to_id) - indices, values = zip(*((key_to_id[key], val) for key, val in d.items())) - return Vector.from_values(indices, values, size=size, dtype=dtype, name=name) - - -def list_to_vector(nodes, key_to_id, *, size=None, name=None): - if nodes is None: - return None, None - if size is None: - size = len(key_to_id) - id_to_key = {key_to_id[key]: key for key in nodes} - v = Vector.from_values(list(id_to_key), True, size=size, dtype=bool, name=name) - return v, id_to_key - - -def list_to_mask(nodes, key_to_id, *, size=None, name="mask"): - if nodes is None: - return None, None - v, id_to_key = list_to_vector(nodes, key_to_id, size=size, name=name) - return v.S, id_to_key - - -def vector_to_dict(v, key_to_id, id_to_key=None, *, mask=None, fillvalue=None): - # This mutates the vector to fill it! - if id_to_key is None: - id_to_key = {key_to_id[key]: key for key in key_to_id} - if mask is not None: - if fillvalue is not None and v.nvals < mask.parent.nvals: - v(mask, binary.first) << fillvalue - elif fillvalue is not None and v.nvals < v.size: - v(mask=~v.S) << fillvalue - return {id_to_key[index]: value for index, value in zip(*v.to_values(sort=False))} diff --git a/graphblas_algorithms/_version.py b/graphblas_algorithms/_version.py deleted file mode 100644 index 940c53a..0000000 --- a/graphblas_algorithms/_version.py +++ /dev/null @@ -1,681 +0,0 @@ -# This file helps to compute a version number in source trees obtained from -# git-archive tarball (such as those provided by githubs download-from-tag -# feature). Distribution tarballs (built by setup.py sdist) and build -# directories (produced by setup.py build) will contain a much shorter file -# that just contains the computed version number. - -# This file is released into the public domain. Generated by -# versioneer-0.22 (https://github.com/python-versioneer/python-versioneer) - -"""Git implementation of _version.py.""" - -import errno -import functools -import os -import re -import subprocess -import sys -from typing import Callable, Dict - - -def get_keywords(): - """Get the keywords needed to look up the version information.""" - # these strings will be replaced by git during git-archive. - # setup.py/versioneer.py will grep for the variable names, so they must - # each be defined on a line of their own. _version.py will just call - # get_keywords(). - git_refnames = "$Format:%d$" - git_full = "$Format:%H$" - git_date = "$Format:%ci$" - keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} - return keywords - - -class VersioneerConfig: - """Container for Versioneer configuration parameters.""" - - -def get_config(): - """Create, populate and return the VersioneerConfig() object.""" - # these strings are filled in when 'setup.py versioneer' creates - # _version.py - cfg = VersioneerConfig() - cfg.VCS = "git" - cfg.style = "pep440" - cfg.tag_prefix = "" - cfg.parentdir_prefix = "graphblas_algorithms-" - cfg.versionfile_source = "graphblas_algorithms/_version.py" - cfg.verbose = False - return cfg - - -class NotThisMethod(Exception): - """Exception raised if a method is not valid for the current scenario.""" - - -LONG_VERSION_PY: Dict[str, str] = {} -HANDLERS: Dict[str, Dict[str, Callable]] = {} - - -def register_vcs_handler(vcs, method): # decorator - """Create decorator to mark a method as the handler of a VCS.""" - - def decorate(f): - """Store f in HANDLERS[vcs][method].""" - if vcs not in HANDLERS: - HANDLERS[vcs] = {} - HANDLERS[vcs][method] = f - return f - - return decorate - - -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): - """Call the given command(s).""" - assert isinstance(commands, list) - process = None - - popen_kwargs = {} - if sys.platform == "win32": - # This hides the console window if pythonw.exe is used - startupinfo = subprocess.STARTUPINFO() - startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW - popen_kwargs["startupinfo"] = startupinfo - - for command in commands: - try: - dispcmd = str([command] + args) - # remember shell=False, so use git.cmd on windows, not just git - process = subprocess.Popen( - [command] + args, - cwd=cwd, - env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr else None), - **popen_kwargs, - ) - break - except OSError: - e = sys.exc_info()[1] - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %s" % dispcmd) - print(e) - return None, None - else: - if verbose: - print(f"unable to find command, tried {commands}") - return None, None - stdout = process.communicate()[0].strip().decode() - if process.returncode != 0: - if verbose: - print("unable to run %s (error)" % dispcmd) - print("stdout was %s" % stdout) - return None, process.returncode - return stdout, process.returncode - - -def versions_from_parentdir(parentdir_prefix, root, verbose): - """Try to determine the version from the parent directory name. - - Source tarballs conventionally unpack into a directory that includes both - the project name and a version string. We will also support searching up - two directory levels for an appropriately named parent directory - """ - rootdirs = [] - - for _ in range(3): - dirname = os.path.basename(root) - if dirname.startswith(parentdir_prefix): - return { - "version": dirname[len(parentdir_prefix) :], - "full-revisionid": None, - "dirty": False, - "error": None, - "date": None, - } - rootdirs.append(root) - root = os.path.dirname(root) # up a level - - if verbose: - print( - "Tried directories %s but none started with prefix %s" - % (str(rootdirs), parentdir_prefix) - ) - raise NotThisMethod("rootdir doesn't start with parentdir_prefix") - - -@register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): - """Extract version information from the given file.""" - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords = {} - try: - with open(versionfile_abs) as fobj: - for line in fobj: - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - except OSError: - pass - return keywords - - -@register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): - """Get version information from git keywords.""" - if "refnames" not in keywords: - raise NotThisMethod("Short version file found") - date = keywords.get("date") - if date is not None: - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - - # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant - # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 - # -like" string, which we must then edit to make compliant), because - # it's been around since git-1.5.3, and it's too difficult to - # discover which version we're using, or to work around using an - # older one. - date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = {r.strip() for r in refnames.strip("()").split(",")} - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = {r for r in refs if re.search(r"\d", r)} - if verbose: - print("discarding '%s', no digits" % ",".join(refs - tags)) - if verbose: - print("likely tags: %s" % ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix) :] - # Filter out refs that exactly match prefix or that don't start - # with a number once the prefix is stripped (mostly a concern - # when prefix is '') - if not re.match(r"\d", r): - continue - if verbose: - print("picking %s" % r) - return { - "version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, - "error": None, - "date": date, - } - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return { - "version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, - "error": "no suitable tags", - "date": None, - } - - -@register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): - """Get version from 'git describe' in the root of the source tree. - - This only gets called if the git-archive 'subst' keywords were *not* - expanded, and _version.py hasn't already been rewritten with a short - version string, meaning we're inside a checked out source tree. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - - # GIT_DIR can interfere with correct operation of Versioneer. - # It may be intended to be passed to the Versioneer-versioned project, - # but that should not change where we get our version from. - env = os.environ.copy() - env.pop("GIT_DIR", None) - runner = functools.partial(runner, env=env) - - _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) - if rc != 0: - if verbose: - print("Directory %s not under git control" % root) - raise NotThisMethod("'git rev-parse --git-dir' returned error") - - MATCH_ARGS = ["--match", "%s*" % tag_prefix] if tag_prefix else [] - - # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] - # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = runner( - GITS, ["describe", "--tags", "--dirty", "--always", "--long", *MATCH_ARGS], cwd=root - ) - # --long was added in git-1.5.5 - if describe_out is None: - raise NotThisMethod("'git describe' failed") - describe_out = describe_out.strip() - full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) - if full_out is None: - raise NotThisMethod("'git rev-parse' failed") - full_out = full_out.strip() - - pieces = {} - pieces["long"] = full_out - pieces["short"] = full_out[:7] # maybe improved later - pieces["error"] = None - - branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], cwd=root) - # --abbrev-ref was added in git-1.6.3 - if rc != 0 or branch_name is None: - raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") - branch_name = branch_name.strip() - - if branch_name == "HEAD": - # If we aren't exactly on a branch, pick a branch which represents - # the current commit. If all else fails, we are on a branchless - # commit. - branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) - # --contains was added in git-1.5.4 - if rc != 0 or branches is None: - raise NotThisMethod("'git branch --contains' returned error") - branches = branches.split("\n") - - # Remove the first line if we're running detached - if "(" in branches[0]: - branches.pop(0) - - # Strip off the leading "* " from the list of branches. - branches = [branch[2:] for branch in branches] - if "master" in branches: - branch_name = "master" - elif not branches: - branch_name = None - else: - # Pick the first branch that is returned. Good or bad. - branch_name = branches[0] - - pieces["branch"] = branch_name - - # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] - # TAG might have hyphens. - git_describe = describe_out - - # look for -dirty suffix - dirty = git_describe.endswith("-dirty") - pieces["dirty"] = dirty - if dirty: - git_describe = git_describe[: git_describe.rindex("-dirty")] - - # now we have TAG-NUM-gHEX or HEX - - if "-" in git_describe: - # TAG-NUM-gHEX - mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) - if not mo: - # unparsable. Maybe git-describe is misbehaving? - pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out - return pieces - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%s' doesn't start with prefix '%s'" - print(fmt % (full_tag, tag_prefix)) - pieces["error"] = f"tag '{full_tag}' doesn't start with prefix '{tag_prefix}'" - return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix) :] - - # distance: number of commits since tag - pieces["distance"] = int(mo.group(2)) - - # commit: short hex revision ID - pieces["short"] = mo.group(3) - - else: - # HEX: no tags - pieces["closest-tag"] = None - count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root) - pieces["distance"] = int(count_out) # total number of commits - - # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - - return pieces - - -def plus_or_dot(pieces): - """Return a + if we don't already have one, else return a .""" - if "+" in pieces.get("closest-tag", ""): - return "." - return "+" - - -def render_pep440(pieces): - """Build up version string, with post-release "local version identifier". - - Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you - get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty - - Exceptions: - 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += plus_or_dot(pieces) - rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_branch(pieces): - """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . - - The ".dev0" means not master branch. Note that .dev0 sorts backwards - (a feature branch will appear "older" than the master branch). - - Exceptions: - 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0" - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def pep440_split_post(ver): - """Split pep440 version string at the post-release segment. - - Returns the release segments before the post-release and the - post-release version number (or -1 if no post-release segment is present). - """ - vc = str.split(ver, ".post") - return vc[0], int(vc[1] or 0) if len(vc) == 2 else None - - -def render_pep440_pre(pieces): - """TAG[.postN.devDISTANCE] -- No -dirty. - - Exceptions: - 1: no tags. 0.post0.devDISTANCE - """ - if pieces["closest-tag"]: - if pieces["distance"]: - # update the post release segment - tag_version, post_version = pep440_split_post(pieces["closest-tag"]) - rendered = tag_version - if post_version is not None: - rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"]) - else: - rendered += ".post0.dev%d" % (pieces["distance"]) - else: - # no commits, use the tag as the version - rendered = pieces["closest-tag"] - else: - # exception #1 - rendered = "0.post0.dev%d" % pieces["distance"] - return rendered - - -def render_pep440_post(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX] . - - The ".dev0" means dirty. Note that .dev0 sorts backwards - (a dirty tree will appear "older" than the corresponding clean one), - but you shouldn't be releasing software with -dirty anyways. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += "+g%s" % pieces["short"] - return rendered - - -def render_pep440_post_branch(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . - - The ".dev0" means not master branch. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+g%s" % pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_old(pieces): - """TAG[.postDISTANCE[.dev0]] . - - The ".dev0" means dirty. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - return rendered - - -def render_git_describe(pieces): - """TAG[-DISTANCE-gHEX][-dirty]. - - Like 'git describe --tags --dirty --always'. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render_git_describe_long(pieces): - """TAG-DISTANCE-gHEX[-dirty]. - - Like 'git describe --tags --dirty --always -long'. - The distance/hash is unconditional. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render(pieces, style): - """Render the given version pieces into the requested style.""" - if pieces["error"]: - return { - "version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None, - } - - if not style or style == "default": - style = "pep440" # the default - - if style == "pep440": - rendered = render_pep440(pieces) - elif style == "pep440-branch": - rendered = render_pep440_branch(pieces) - elif style == "pep440-pre": - rendered = render_pep440_pre(pieces) - elif style == "pep440-post": - rendered = render_pep440_post(pieces) - elif style == "pep440-post-branch": - rendered = render_pep440_post_branch(pieces) - elif style == "pep440-old": - rendered = render_pep440_old(pieces) - elif style == "git-describe": - rendered = render_git_describe(pieces) - elif style == "git-describe-long": - rendered = render_git_describe_long(pieces) - else: - raise ValueError("unknown style '%s'" % style) - - return { - "version": rendered, - "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], - "error": None, - "date": pieces.get("date"), - } - - -def get_versions(): - """Get version information or return default if unable to do so.""" - # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have - # __file__, we can work backwards from there to the root. Some - # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which - # case we can only use expanded keywords. - - cfg = get_config() - verbose = cfg.verbose - - try: - return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) - except NotThisMethod: - pass - - try: - root = os.path.realpath(__file__) - # versionfile_source is the relative path from the top of the source - # tree (where the .git directory might live) to this file. Invert - # this to find the root from __file__. - for _ in cfg.versionfile_source.split("/"): - root = os.path.dirname(root) - except NameError: - return { - "version": "0+unknown", - "full-revisionid": None, - "dirty": None, - "error": "unable to find root of source tree", - "date": None, - } - - try: - pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) - return render(pieces, cfg.style) - except NotThisMethod: - pass - - try: - if cfg.parentdir_prefix: - return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) - except NotThisMethod: - pass - - return { - "version": "0+unknown", - "full-revisionid": None, - "dirty": None, - "error": "unable to compute version", - "date": None, - } diff --git a/graphblas_algorithms/algorithms/__init__.py b/graphblas_algorithms/algorithms/__init__.py new file mode 100644 index 0000000..be06324 --- /dev/null +++ b/graphblas_algorithms/algorithms/__init__.py @@ -0,0 +1,25 @@ +from . import exceptions +from .boundary import * +from .centrality import * +from .cluster import * +from .community import * +from .components import * +from .core import * +from .cuts import * +from .dag import * +from .dominating import * +from .efficiency_measures import * +from .isolate import * +from .isomorphism import * +from .link_analysis import * +from .lowest_common_ancestors import * +from .operators import * +from .reciprocity import * +from .regular import * +from .shortest_paths import * +from .simple_paths import * +from .smetric import * +from .structuralholes import * +from .tournament import * +from .traversal import * +from .triads import * diff --git a/graphblas_algorithms/algorithms/_bfs.py b/graphblas_algorithms/algorithms/_bfs.py new file mode 100644 index 0000000..8189aae --- /dev/null +++ b/graphblas_algorithms/algorithms/_bfs.py @@ -0,0 +1,204 @@ +"""BFS routines used by other algorithms""" + +import numpy as np +from graphblas import Matrix, Vector, binary, indexunary, replace, semiring, unary +from graphblas.semiring import any_pair + + +def _get_cutoff(n, cutoff): + if cutoff is None or cutoff >= n: + return n # Everything + return cutoff + 1 # Inclusive + + +# Push-pull optimization is possible, but annoying to implement +def _bfs_plain( + G, source=None, target=None, *, index=None, cutoff=None, transpose=False, name="bfs_plain" +): + if source is not None: + if source not in G._key_to_id: + raise KeyError(f"The node {source} is not in the graph") + index = G._key_to_id[source] + if target is not None: + if target not in G._key_to_id: + raise KeyError(f"The node {target} is not in the graph") + dst_id = G._key_to_id[target] + else: + dst_id = None + A = G.get_property("offdiag") + if transpose and G.is_directed(): + A = A.T # TODO: should we use "AT" instead? + n = A.nrows + v = Vector(bool, n, name=name) + q = Vector(bool, n, name="q") + v[index] = True + q[index] = True + any_pair_bool = any_pair[bool] + cutoff = _get_cutoff(n, cutoff) + for _i in range(1, cutoff): + q(~v.S, replace) << any_pair_bool(q @ A) + if q.nvals == 0: + break + v(q.S) << True + if dst_id is not None and dst_id in q: + break + return v + + +def _bfs_level(G, source, target=None, *, cutoff=None, transpose=False, dtype=int): + if dtype == bool: + dtype = int + index = G._key_to_id[source] + if target is not None: + if target not in G._key_to_id: + raise KeyError(f"The node {target} is not in the graph") + dst_id = G._key_to_id[target] + else: + dst_id = None + A = G.get_property("offdiag") + if transpose and G.is_directed(): + A = A.T # TODO: should we use "AT" instead? + n = A.nrows + v = Vector(dtype, n, name="bfs_level") + q = Vector(bool, n, name="q") + v[index] = 0 + q[index] = True + any_pair_bool = any_pair[bool] + cutoff = _get_cutoff(n, cutoff) + for i in range(1, cutoff): + q(~v.S, replace) << any_pair_bool(q @ A) + if q.nvals == 0: + break + v(q.S) << i + if dst_id is not None and dst_id in q: + break + return v + + +def _bfs_levels(G, nodes, *, cutoff=None, dtype=int): + if dtype == bool: + dtype = int + A = G.get_property("offdiag") + n = A.nrows + if nodes is None: + # TODO: `D = Vector.from_scalar(0, n, dtype).diag()` + D = Vector(dtype, n, name="bfs_levels_vector") + D << 0 + D = D.diag(name="bfs_levels") + else: + ids = G.list_to_ids(nodes) + D = Matrix.from_coo( + np.arange(len(ids), dtype=np.uint64), + ids, + 0, + dtype, + nrows=len(ids), + ncols=n, + name="bfs_levels", + ) + Q = unary.one[bool](D).new(name="Q") + any_pair_bool = any_pair[bool] + cutoff = _get_cutoff(n, cutoff) + for i in range(1, cutoff): + Q(~D.S, replace) << any_pair_bool(Q @ A) + if Q.nvals == 0: + break + D(Q.S) << i + return D + + +def _bfs_parent(G, source, target=None, *, cutoff=None, transpose=False, dtype=int): + if dtype == bool: + dtype = int + index = G._key_to_id[source] + if target is not None: + dst_id = G._key_to_id[target] + else: + dst_id = None + A = G.get_property("offdiag") + if transpose and G.is_directed(): + A = A.T # TODO: should we use "AT" instead? + n = A.nrows + v = Vector(dtype, n, name="bfs_parent") + q = Vector(dtype, n, name="q") + v[index] = index + q[index] = index + min_first = semiring.min_first[v.dtype] + index = indexunary.index[v.dtype] + cutoff = _get_cutoff(n, cutoff) + for _i in range(1, cutoff): + q(~v.S, replace) << min_first(q @ A) + if q.nvals == 0: + break + v(q.S) << q + if dst_id is not None and dst_id in q: + break + q << index(q) + return v + + +# TODO: benchmark this and the version commented out below +def _bfs_plain_bidirectional(G, source): + # Bi-directional BFS w/o symmetrizing the adjacency matrix + index = G._key_to_id[source] + A = G.get_property("offdiag") + # XXX: should we use `AT` if available? + n = A.nrows + v = Vector(bool, n, name="bfs_plain") + q_out = Vector(bool, n, name="q_out") + q_in = Vector(bool, n, name="q_in") + v[index] = True + q_in[index] = True + any_pair_bool = any_pair[bool] + is_out_empty = True + is_in_empty = False + for _i in range(1, n): + # Traverse out-edges from the most recent `q_in` and `q_out` + if is_out_empty: + q_out(~v.S) << any_pair_bool(q_in @ A) + else: + q_out << binary.any(q_out | q_in) + q_out(~v.S, replace) << any_pair_bool(q_out @ A) + is_out_empty = q_out.nvals == 0 + if not is_out_empty: + v(q_out.S) << True + elif is_in_empty: + break + # Traverse in-edges from the most recent `q_in` and `q_out` + if is_in_empty: + q_in(~v.S) << any_pair_bool(A @ q_out) + else: + q_in << binary.any(q_out | q_in) + q_in(~v.S, replace) << any_pair_bool(A @ q_in) + is_in_empty = q_in.nvals == 0 + if not is_in_empty: + v(q_in.S) << True + elif is_out_empty: + break + return v + + +""" +def _bfs_plain_bidirectional(G, source): + # Bi-directional BFS w/o symmetrizing the adjacency matrix + index = G._key_to_id[source] + A = G.get_property("offdiag") + n = A.nrows + v = Vector(bool, n, name="bfs_plain") + q = Vector(bool, n, name="q") + q2 = Vector(bool, n, name="q_2") + v[index] = True + q[index] = True + any_pair_bool = any_pair[bool] + for _i in range(1, n): + q2(~v.S, replace) << any_pair_bool(q @ A) + v(q2.S) << True + q(~v.S, replace) << any_pair_bool(A @ q) + if q.nvals == 0: + if q2.nvals == 0: + break + q, q2 = q2, q + elif q2.nvals != 0: + q << binary.any(q | q2) + return v +""" diff --git a/graphblas_algorithms/algorithms/_helpers.py b/graphblas_algorithms/algorithms/_helpers.py new file mode 100644 index 0000000..2c0a820 --- /dev/null +++ b/graphblas_algorithms/algorithms/_helpers.py @@ -0,0 +1,29 @@ +from graphblas import binary, monoid, unary + + +def normalize(x, how): + how = how.lower() + if how == "l1": + denom = x.reduce().get(0) + elif how == "l2": + denom = (x @ x).get(0) ** 0.5 + elif how == "linf": + denom = x.reduce(monoid.max).get(0) + else: + raise ValueError(f"Unknown normalization method: {how}") + try: + x *= 1.0 / denom + except ZeroDivisionError: # pragma: no cover + pass + return x + + +def is_converged(xprev, x, tol): + """Check convergence, L1 norm: err = sum(abs(xprev - x)); err < N * tol + + This modifies `xprev`. + """ + xprev << binary.minus(xprev | x) + xprev << unary.abs(xprev) + err = xprev.reduce().get(0) + return err < xprev.size * tol diff --git a/graphblas_algorithms/algorithms/boundary.py b/graphblas_algorithms/algorithms/boundary.py new file mode 100644 index 0000000..96bd275 --- /dev/null +++ b/graphblas_algorithms/algorithms/boundary.py @@ -0,0 +1,29 @@ +from graphblas import binary +from graphblas.semiring import any_pair + +__all__ = ["edge_boundary", "node_boundary"] + + +def edge_boundary(G, nbunch1, nbunch2=None, *, is_weighted=False): + if is_weighted: + B = binary.second(nbunch1 & G._A).new(name="boundary") + else: + B = binary.pair(nbunch1 & G._A).new(name="boundary") + if nbunch2 is None: + # Default nbunch2 is the complement of nbunch1. + # We get the row_degrees to better handle hypersparse data. + nbunch2 = G.get_property("row_degrees+", mask=~nbunch1.S) + if is_weighted: + B << binary.first(B & nbunch2) + else: + B << binary.pair(B & nbunch2) + return B + + +def node_boundary(G, nbunch1, *, mask=None): + if mask is None: + mask = ~nbunch1.S + else: + mask = mask & (~nbunch1.S) + bdy = any_pair(nbunch1 @ G._A).new(mask=mask, name="boundary") + return bdy diff --git a/graphblas_algorithms/algorithms/centrality/__init__.py b/graphblas_algorithms/algorithms/centrality/__init__.py new file mode 100644 index 0000000..e5c137d --- /dev/null +++ b/graphblas_algorithms/algorithms/centrality/__init__.py @@ -0,0 +1,3 @@ +from .degree_alg import * +from .eigenvector import * +from .katz import * diff --git a/graphblas_algorithms/algorithms/centrality/degree_alg.py b/graphblas_algorithms/algorithms/centrality/degree_alg.py new file mode 100644 index 0000000..0237265 --- /dev/null +++ b/graphblas_algorithms/algorithms/centrality/degree_alg.py @@ -0,0 +1,32 @@ +from graphblas import Vector + +__all__ = ["degree_centrality", "in_degree_centrality", "out_degree_centrality"] + + +def _degree_centrality(G, degrees, name): + N = len(G) + rv = Vector(float, size=N, name=name) + if N <= 1: + rv << 1 + else: + s = 1 / (N - 1) + rv << s * degrees + return rv + + +def degree_centrality(G, *, name="degree_centrality"): + if G.is_directed(): + degrees = G.get_property("total_degrees+") + else: + degrees = G.get_property("degrees+") + return _degree_centrality(G, degrees, name) + + +def in_degree_centrality(G, *, name="in_degree_centrality"): + degrees = G.get_property("column_degrees+") + return _degree_centrality(G, degrees, name) + + +def out_degree_centrality(G, *, name="out_degree_centrality"): + degrees = G.get_property("row_degrees+") + return _degree_centrality(G, degrees, name) diff --git a/graphblas_algorithms/algorithms/centrality/eigenvector.py b/graphblas_algorithms/algorithms/centrality/eigenvector.py new file mode 100644 index 0000000..e9385f3 --- /dev/null +++ b/graphblas_algorithms/algorithms/centrality/eigenvector.py @@ -0,0 +1,33 @@ +from graphblas import Vector + +from .._helpers import is_converged, normalize +from ..exceptions import ConvergenceFailure, GraphBlasAlgorithmException, PointlessConcept + +__all__ = ["eigenvector_centrality"] + + +def eigenvector_centrality(G, max_iter=100, tol=1.0e-6, nstart=None, name="eigenvector_centrality"): + N = len(G) + if N == 0: + raise PointlessConcept("cannot compute centrality for the null graph") + x = Vector(float, N, name="x") + if nstart is None: + x << 1.0 / N + else: + x << nstart + denom = x.reduce().get(0) # why not use L2 norm? + if denom == 0: + raise GraphBlasAlgorithmException("initial vector cannot have all zero values") + x *= 1.0 / denom + + # Power iteration: make up to max_iter iterations + A = G._A + xprev = Vector(float, N, name="x_prev") + for _i in range(max_iter): + xprev << x + x += x @ A + normalize(x, "L2") + if is_converged(xprev, x, tol): # sum(abs(xprev - x)) < N * tol + x.name = name + return x + raise ConvergenceFailure(max_iter) diff --git a/graphblas_algorithms/algorithms/centrality/katz.py b/graphblas_algorithms/algorithms/centrality/katz.py new file mode 100644 index 0000000..8087e85 --- /dev/null +++ b/graphblas_algorithms/algorithms/centrality/katz.py @@ -0,0 +1,55 @@ +from graphblas import Scalar, Vector +from graphblas.core.utils import output_type +from graphblas.semiring import plus_first, plus_times + +from .._helpers import is_converged, normalize +from ..exceptions import ConvergenceFailure, GraphBlasAlgorithmException + +__all__ = ["katz_centrality"] + + +def katz_centrality( + G, + alpha=0.1, + beta=1.0, + max_iter=1000, + tol=1.0e-6, + nstart=None, + normalized=True, + name="katz_centrality", +): + N = len(G) + x = Vector(float, N, name="x") + if nstart is None: + x << 0.0 + else: + x << nstart + if output_type(beta) is not Vector: + b = Scalar.from_value(beta, dtype=float, name="beta") + else: + b = beta + if b.nvals != N: + raise GraphBlasAlgorithmException("beta must have a value for every node") + + A = G._A + if (iso_value := G.get_property("iso_value")) is not None: + # Fold iso-value into alpha + alpha *= iso_value.get(1.0) + semiring = plus_first[float] + else: + semiring = plus_times[float] + + # Power iteration: make up to max_iter iterations + xprev = Vector(float, N, name="x_prev") + for _i in range(max_iter): + xprev, x = x, xprev + # x << alpha * semiring(xprev @ A) + beta + x << semiring(xprev @ A) + x *= alpha + x += b + if is_converged(xprev, x, tol): # sum(abs(xprev - x)) < N * tol + x.name = name + if normalized: + normalize(x, "L2") + return x + raise ConvergenceFailure(max_iter) diff --git a/graphblas_algorithms/algorithms/cluster.py b/graphblas_algorithms/algorithms/cluster.py new file mode 100644 index 0000000..0b9ab21 --- /dev/null +++ b/graphblas_algorithms/algorithms/cluster.py @@ -0,0 +1,318 @@ +import numpy as np +from graphblas import Matrix, Vector, binary, replace, unary +from graphblas.semiring import plus_first, plus_pair, plus_times + +__all__ = [ + "single_triangle", + "triangles", + "total_triangles", + "transitivity", + "transitivity_directed", + "clustering", + "clustering_directed", + "single_clustering", + "single_clustering_directed", + "average_clustering", + "average_clustering_directed", + "single_square_clustering", + "square_clustering", + "generalized_degree", + "single_generalized_degree", +] + + +def single_triangle(G, node, *, weighted=False): + index = G._key_to_id[node] + r = G._A[index, :].new() + # Pretty much all the time is spent here taking TRIL, which is used to ignore self-edges + L = G.get_property("L-") + if G.get_property("has_self_edges"): + del r[index] # Ignore self-edges + if weighted: + maxval = G.get_property("max_element-") + L = unary.cbrt(L / maxval) + r = unary.cbrt(r / maxval) + semiring = plus_times + else: + semiring = plus_pair + val = semiring(L @ r).new(mask=r.S) + if weighted: + val *= r + return val.reduce().get(0) + + +def triangles(G, *, weighted=False, mask=None): + # Ignores self-edges + # Can we apply the mask earlier in the computation? + L, U = G.get_properties("L- U-") + if weighted: + maxval = G.get_property("max_element-") + L = unary.cbrt(L / maxval) + U = unary.cbrt(U / maxval) + semiring = plus_times + else: + semiring = plus_pair + C = semiring(L @ L.T).new(mask=L.S) + D = semiring(U @ L.T).new(mask=U.S) + if weighted: + C *= L + D *= U + return ( + C.reduce_rowwise().new(mask=mask) + + C.reduce_columnwise().new(mask=mask) + + D.reduce_rowwise().new(mask=mask) + ).new(name="triangles") + + +def total_triangles(G): + # We use SandiaDot method, because it's usually the fastest on large graphs. + # For smaller graphs, Sandia method is usually faster: plus_pair(L @ L).new(mask=L.S) + L, U = G.get_properties("L- U-") + return plus_pair(L @ U.T).new(mask=L.S).reduce_scalar().get(0) + + +def transitivity(G): + numerator = total_triangles(G) + if numerator == 0: + return 0 + degrees = G.get_property("degrees-") + denom = (degrees * (degrees - 1)).reduce().value + return 6 * numerator / denom + + +def transitivity_directed(G): + # XXX" is transitivity supposed to work on directed graphs like this? + A, AT = G.get_properties("offdiag AT") + numerator = plus_pair(A @ A.T).new(mask=A.S).reduce_scalar().get(0) + if numerator == 0: + return 0 + degrees = G.get_property("row_degrees-") + denom = (degrees * (degrees - 1)).reduce().value + return numerator / denom + + +def clustering(G, *, weighted=False, mask=None): + tri = triangles(G, weighted=weighted, mask=mask) + degrees = G.get_property("degrees-") + denom = degrees * (degrees - 1) + return (2 * tri / denom).new(name="clustering") + + +def clustering_directed(G, *, weighted=False, mask=None): + # Can we apply the mask earlier in the computation? + A, AT = G.get_properties("offdiag AT") + if weighted: + maxval = G.get_property("max_element-") + A = unary.cbrt(A / maxval) + AT = unary.cbrt(AT / maxval) + semiring = plus_times + else: + semiring = plus_pair + C = semiring(A @ A.T).new(mask=A.S) + D = semiring(AT @ A.T).new(mask=A.S) + E = semiring(AT @ AT.T).new(mask=A.S) + if weighted: + C *= A + D *= A + E *= A + tri = ( + C.reduce_rowwise().new(mask=mask) + + C.reduce_columnwise().new(mask=mask) + + D.reduce_rowwise().new(mask=mask) + + E.reduce_columnwise().new(mask=mask) + ) + recip_degrees, total_degrees = G.get_properties("recip_degrees- total_degrees-", mask=mask) + denom = total_degrees * (total_degrees - 1) - 2 * recip_degrees + return (tri / denom).new(name="clustering") + + +def single_clustering(G, node, *, weighted=False): + tri = single_triangle(G, node, weighted=weighted) + if tri == 0: + return 0 + index = G._key_to_id[node] + # TODO: it would be nice if we could clean this up, but still be fast + if "degrees-" in G._cache: + degrees = G.get_property("degrees-").get(index) + elif "degrees+" in G._cache: + degrees = G.get_property("degrees+").get(index) + if G.get_property("has_self_edges") and G._A.get(index, index) is not None: + degrees -= 1 + else: + row = G._A[index, :] + degrees = row.nvals + if G.get_property("has_self_edges") and row.get(index) is not None: + degrees -= 1 + denom = degrees * (degrees - 1) + return 2 * tri / denom + + +def single_clustering_directed(G, node, *, weighted=False): + A = G.get_property("offdiag") + index = G._key_to_id[node] + if weighted: + maxval = G.get_property("max_element-") + A = unary.cbrt(A / maxval) + semiring = plus_times + else: + semiring = plus_pair + r = A[index, :] + c = A[:, index] + tris = [] + for x, y in [(c, c), (c, r), (r, c), (r, r)]: + v = semiring(A @ x).new(mask=y.S) + if weighted: + v *= y + tris.append(v.reduce().new()) + # Getting Python scalars are blocking operations, so we do them last + tri = sum(t.get(0) for t in tris) + if tri == 0: + return 0 + total_degrees = c.nvals + r.nvals + recip_degrees = binary.pair(c & r).nvals + return tri / (total_degrees * (total_degrees - 1) - 2 * recip_degrees) + + +def average_clustering(G, *, count_zeros=True, weighted=False, mask=None): + c = clustering(G, weighted=weighted, mask=mask) + val = c.reduce().get(0) + if not count_zeros: + return val / c.nvals + if mask is not None: + return val / mask.parent.nvals + return val / c.size + + +def average_clustering_directed(G, *, count_zeros=True, weighted=False, mask=None): + c = clustering_directed(G, weighted=weighted, mask=mask) + val = c.reduce().get(0) + if not count_zeros: + return val / c.nvals + if mask is not None: + return val / mask.parent.nvals + return val / c.size + + +def single_square_clustering(G, idx): + A, degrees = G.get_properties("A degrees+") # TODO" how to handle self-edges? + deg = degrees.get(idx, 0) + if deg <= 1: + return 0 + # P2 from https://arxiv.org/pdf/2007.11111.pdf; we'll also use it as scratch + v = A[idx, :].new(name="v") + p2 = plus_pair(v @ A).new(name="p2") + del p2[idx] + # Denominator is thought of as the total number of squares that could exist. + # We use the definition from https://arxiv.org/pdf/0710.0117v1.pdf (equation 2). + # + # (1) Subtract 1 for each edge where u-w or w-u are connected (which would make triangles) + denom = -plus_first(p2 @ v).get(0) + # Numerator: number of squares + # Based on https://arxiv.org/pdf/2007.11111.pdf (sigma_12, c_4) + p2(binary.times) << p2 - 1 + squares = p2.reduce().get(0) // 2 + if squares == 0: + return 0 + # (2) Subtract 1 for each u and 1 for each w for all combos: degrees * (degrees - 1) + denom -= deg * (deg - 1) + # (3) The main contribution to the denominator: degrees[u] + degrees[w] for each u-w combo. + # This is the only positive term. + denom += plus_times(v @ degrees).value * (deg - 1) + # (4) Subtract the number of squares + denom -= squares + # And we're done! + return squares / denom + + +def square_clustering(G, node_ids=None): + # Warning: only tested on undirected graphs. + # Also, it may use a lot of memory, because we compute `P2 = A @ A.T` + # + # Pseudocode: + # P2(~degrees.diag().S) = plus_pair(A @ A.T) + # tri = first(P2 & A).reduce_rowwise() + # squares = (P2 * (P2 - 1)).reduce_rowwise() / 2 + # uw_count = degrees * (degrees - 1) + # uw_degrees = plus_times(A @ degrees) * (degrees - 1) + # square_clustering = squares / (uw_degrees - uw_count - tri - squares) + # + A, degrees = G.get_properties("A degrees+") # TODO" how to handle self-edges? + # P2 from https://arxiv.org/pdf/2007.11111.pdf; we'll also use it as scratch + if node_ids is not None: + v = Vector.from_coo(node_ids, True, size=degrees.size) + Asubset = binary.second(v & A).new(name="A_subset") + else: + Asubset = A + A = A.T + D = degrees.diag(name="D") + P2 = plus_pair(Asubset @ A).new(mask=~D.S, name="P2") + + # Denominator is thought of as the total number of squares that could exist. + # We use the definition from https://arxiv.org/pdf/0710.0117v1.pdf (equation 2). + # denom = uw_degrees - uw_count - tri - squares + # + # (1) Subtract 1 for each edge where u-w or w-u are connected (i.e., triangles) + # tri = first(P2 & A).reduce_rowwise() + D << binary.first(P2 & Asubset) + neg_denom = D.reduce_rowwise().new(name="neg_denom") + del D + + # Numerator: number of squares + # Based on https://arxiv.org/pdf/2007.11111.pdf (sigma_12, c_4) + # squares = (P2 * (P2 - 1)).reduce_rowwise() / 2 + P2(binary.times) << P2 - 1 + squares = P2.reduce_rowwise().new(name="squares") + del P2 + squares(squares.V, replace) << binary.cdiv(squares, 2) # Drop zeros + + # (2) Subtract 1 for each u and 1 for each w for all combos: degrees * (degrees - 1) + # uw_count = degrees * (degrees - 1) + denom = (degrees - 1).new(mask=squares.S, name="denom") + neg_denom(binary.plus) << degrees * denom + + # (3) The main contribution to the denominator: degrees[u] + degrees[w] for each u-w combo. + # uw_degrees = plus_times(A @ degrees) * (degrees - 1) + # denom(binary.times) << plus_times(A @ degrees) + denom(binary.times, denom.S) << plus_times(Asubset @ degrees) + + # (4) Subtract the number of squares + denom(binary.minus) << binary.plus(neg_denom & squares) + + # And we're done! This result does not include 0s + return (squares / denom).new(name="square_clustering") + + +def generalized_degree(G, *, mask=None): + # Not benchmarked or optimized + A = G.get_property("offdiag") + Tri = Matrix(int, A.nrows, A.ncols, name="Tri") + if mask is not None: + if mask.structure and not mask.value: + v_mask = mask.parent + else: + v_mask = mask.new() # Not covered + Tri << binary.pair(v_mask & A) # Mask out rows + Tri(Tri.S) << 0 + else: + Tri(A.S) << 0 + Tri(Tri.S, binary.second) << plus_pair(Tri @ A.T) + rows, cols, vals = Tri.to_coo() + # The column index indicates the number of triangles an edge participates in. + # The largest this can be is `A.ncols - 1`. Values is count of edges. + return Matrix.from_coo( + rows, + vals, + np.ones(vals.size, dtype=int), + dup_op=binary.plus, + nrows=A.nrows, + ncols=A.ncols - 1, + name="generalized_degree", + ) + + +def single_generalized_degree(G, node): + # Not benchmarked or optimized + index = G._key_to_id[node] + v = Vector(bool, len(G)) + v[index] = True + return generalized_degree(G, mask=v.S)[index, :].new(name=f"generalized_degree_{index}") diff --git a/graphblas_algorithms/algorithms/community/__init__.py b/graphblas_algorithms/algorithms/community/__init__.py new file mode 100644 index 0000000..fd9f65d --- /dev/null +++ b/graphblas_algorithms/algorithms/community/__init__.py @@ -0,0 +1 @@ +from .quality import * diff --git a/graphblas_algorithms/algorithms/community/quality.py b/graphblas_algorithms/algorithms/community/quality.py new file mode 100644 index 0000000..d2b1d95 --- /dev/null +++ b/graphblas_algorithms/algorithms/community/quality.py @@ -0,0 +1,23 @@ +from graphblas.semiring import plus_pair + +__all__ = ["intra_community_edges", "inter_community_edges"] + + +def intra_community_edges(G, partition): + A = G._A + count = 0 + for block in partition: + # is A or A.T faster? + count += plus_pair(A @ block).new(mask=block.S).reduce().get(0) + return count + + +def inter_community_edges(G, partition): + A = G._A + count = 0 + for block in partition: + # is A or A.T faster? + count += plus_pair(A @ block).new(mask=~block.S).reduce().get(0) + return count + # Alternative implementation if partition is complete set: + # return G._A.nvals - intra_community_edges_core(G, partition) diff --git a/graphblas_algorithms/algorithms/components/__init__.py b/graphblas_algorithms/algorithms/components/__init__.py new file mode 100644 index 0000000..bb0aea6 --- /dev/null +++ b/graphblas_algorithms/algorithms/components/__init__.py @@ -0,0 +1,2 @@ +from .connected import * +from .weakly_connected import * diff --git a/graphblas_algorithms/algorithms/components/connected.py b/graphblas_algorithms/algorithms/components/connected.py new file mode 100644 index 0000000..3f19b86 --- /dev/null +++ b/graphblas_algorithms/algorithms/components/connected.py @@ -0,0 +1,12 @@ +from .._bfs import _bfs_plain +from ..exceptions import PointlessConcept + + +def is_connected(G): + if len(G) == 0: + raise PointlessConcept("Connectivity is undefined for the null graph.") + return _bfs_plain(G, next(iter(G))).nvals == len(G) + + +def node_connected_component(G, n): + return _bfs_plain(G, n) diff --git a/graphblas_algorithms/algorithms/components/weakly_connected.py b/graphblas_algorithms/algorithms/components/weakly_connected.py new file mode 100644 index 0000000..306d96e --- /dev/null +++ b/graphblas_algorithms/algorithms/components/weakly_connected.py @@ -0,0 +1,8 @@ +from .._bfs import _bfs_plain_bidirectional +from ..exceptions import PointlessConcept + + +def is_weakly_connected(G): + if len(G) == 0: + raise PointlessConcept("Connectivity is undefined for the null graph.") + return _bfs_plain_bidirectional(G, next(iter(G))).nvals == len(G) diff --git a/graphblas_algorithms/algorithms/core.py b/graphblas_algorithms/algorithms/core.py new file mode 100644 index 0000000..a6ff26d --- /dev/null +++ b/graphblas_algorithms/algorithms/core.py @@ -0,0 +1,37 @@ +from graphblas import Matrix, monoid, replace, select, semiring + +from graphblas_algorithms import Graph + +__all__ = ["k_truss"] + + +def k_truss(G: Graph, k) -> Graph: + # TODO: should we have an option to keep the output matrix the same size? + # Ignore self-edges + S = G.get_property("offdiag") + + if k < 3: + # Most implementations consider k < 3 invalid, + # but networkx leaves the graph unchanged + C = S + else: + # Remove edges not in k-truss + nvals_last = S.nvals + # TODO: choose dtype based on max number of triangles + plus_pair = semiring.plus_pair["int32"] + C = Matrix("int32", S.nrows, S.ncols) + while True: + C(S.S, replace) << plus_pair(S @ S.T) + C << select.value(k - 2 <= C) + if C.nvals == nvals_last: + break + nvals_last = C.nvals + S = C + + # Remove isolate nodes + indices, _ = C.reduce_rowwise(monoid.any).to_coo(values=False) + Ktruss = C[indices, indices].new() + + # Convert back to networkx graph with correct node ids + key_to_id = G.renumber_key_to_id(indices.tolist()) + return Graph(Ktruss, key_to_id=key_to_id) diff --git a/graphblas_algorithms/algorithms/cuts.py b/graphblas_algorithms/algorithms/cuts.py new file mode 100644 index 0000000..918dc56 --- /dev/null +++ b/graphblas_algorithms/algorithms/cuts.py @@ -0,0 +1,76 @@ +from graphblas import monoid +from graphblas.semiring import any_pair, plus_first + +from .boundary import edge_boundary, node_boundary + +__all__ = [ + "cut_size", + "volume", + "normalized_cut_size", + "conductance", + "edge_expansion", + "mixing_expansion", + "node_expansion", + "boundary_expansion", +] + + +def cut_size(G, S, T=None, *, is_weighted=False): + edges = edge_boundary(G, S, T, is_weighted=is_weighted) + if is_weighted: + rv = edges.reduce_scalar(monoid.plus).get(0) + else: + rv = edges.nvals + if G.is_directed(): + edges = edge_boundary(G, T, S, is_weighted=is_weighted) + if is_weighted: + rv += edges.reduce_scalar(monoid.plus).get(0) + else: + rv += edges.nvals + return rv + + +def volume(G, S, *, weighted=False): + if weighted: + degrees = plus_first(G._A @ S) + else: + degrees = G.get_property("row_degrees+", mask=S.S) + return degrees.reduce(monoid.plus).get(0) + + +def normalized_cut_size(G, S, T=None): + num_cut_edges = cut_size(G, S, T) + volume_S = volume(G, S) + volume_T = volume(G, T) + return num_cut_edges * ((1 / volume_S) + (1 / volume_T)) + + +def conductance(G, S, T=None): + num_cut_edges = cut_size(G, S, T) + volume_S = volume(G, S) + volume_T = volume(G, T) + return num_cut_edges / min(volume_S, volume_T) + + +def edge_expansion(G, S, T=None): + num_cut_edges = cut_size(G, S, T) + if T is None: + Tnvals = S.size - S.nvals + else: + Tnvals = T.nvals + return num_cut_edges / min(S.nvals, Tnvals) + + +def mixing_expansion(G, S, T=None): + num_cut_edges = cut_size(G, S, T) + return num_cut_edges / G._A.nvals # Why no factor of 2 in denominator? + + +def node_expansion(G, S): + neighborhood = any_pair(S @ G._A) + return neighborhood.nvals / S.nvals + + +def boundary_expansion(G, S): + result = node_boundary(G, S) + return result.nvals / S.nvals diff --git a/graphblas_algorithms/algorithms/dag.py b/graphblas_algorithms/algorithms/dag.py new file mode 100644 index 0000000..63eb560 --- /dev/null +++ b/graphblas_algorithms/algorithms/dag.py @@ -0,0 +1,17 @@ +from ._bfs import _bfs_plain + +__all__ = ["descendants", "ancestors"] + + +def descendants(G, source): + rv = _bfs_plain(G, source, name="descendants") + index = G._key_to_id[source] + del rv[index] + return rv + + +def ancestors(G, source): + rv = _bfs_plain(G, source, transpose=True, name="ancestors") + index = G._key_to_id[source] + del rv[index] + return rv diff --git a/graphblas_algorithms/algorithms/dominating.py b/graphblas_algorithms/algorithms/dominating.py new file mode 100644 index 0000000..2894bd8 --- /dev/null +++ b/graphblas_algorithms/algorithms/dominating.py @@ -0,0 +1,8 @@ +from graphblas.semiring import any_pair + +__all__ = ["is_dominating_set"] + + +def is_dominating_set(G, nbunch): + nbrs = any_pair[bool](nbunch @ G._A).new(mask=~nbunch.S) # A or A.T? + return nbrs.size - nbunch.nvals - nbrs.nvals == 0 diff --git a/graphblas_algorithms/algorithms/efficiency_measures.py b/graphblas_algorithms/algorithms/efficiency_measures.py new file mode 100644 index 0000000..3d922ee --- /dev/null +++ b/graphblas_algorithms/algorithms/efficiency_measures.py @@ -0,0 +1,12 @@ +from .exceptions import NoPath +from .shortest_paths.unweighted import bidirectional_shortest_path_length + +__all__ = ["efficiency"] + + +def efficiency(G, u, v): + try: + eff = 1 / bidirectional_shortest_path_length(G, u, v) + except NoPath: + eff = 0 + return eff diff --git a/graphblas_algorithms/algorithms/exceptions.py b/graphblas_algorithms/algorithms/exceptions.py new file mode 100644 index 0000000..7c911c9 --- /dev/null +++ b/graphblas_algorithms/algorithms/exceptions.py @@ -0,0 +1,22 @@ +class GraphBlasAlgorithmException(Exception): + pass + + +class ConvergenceFailure(GraphBlasAlgorithmException): + pass + + +class EmptyGraphError(GraphBlasAlgorithmException): + pass + + +class PointlessConcept(GraphBlasAlgorithmException): + pass + + +class NoPath(GraphBlasAlgorithmException): + pass + + +class Unbounded(GraphBlasAlgorithmException): + pass diff --git a/graphblas_algorithms/algorithms/isolate.py b/graphblas_algorithms/algorithms/isolate.py new file mode 100644 index 0000000..8434017 --- /dev/null +++ b/graphblas_algorithms/algorithms/isolate.py @@ -0,0 +1,26 @@ +__all__ = ["is_isolate", "isolates", "number_of_isolates"] + + +def is_isolate(G, n): + index = G._key_to_id[n] + if G.is_directed(): + degrees = G.get_property("total_degrees+") + else: + degrees = G.get_property("degrees+") + return index not in degrees + + +def isolates(G): + if G.is_directed(): + degrees = G.get_property("total_degrees+") + else: + degrees = G.get_property("degrees+") + return (~degrees.S).new(name="isolates") + + +def number_of_isolates(G): + if G.is_directed(): + degrees = G.get_property("total_degrees+") + else: + degrees = G.get_property("degrees+") + return degrees.size - degrees.nvals diff --git a/graphblas_algorithms/algorithms/isomorphism/__init__.py b/graphblas_algorithms/algorithms/isomorphism/__init__.py new file mode 100644 index 0000000..e701b70 --- /dev/null +++ b/graphblas_algorithms/algorithms/isomorphism/__init__.py @@ -0,0 +1 @@ +from .isomorph import * diff --git a/graphblas_algorithms/algorithms/isomorphism/isomorph.py b/graphblas_algorithms/algorithms/isomorphism/isomorph.py new file mode 100644 index 0000000..12e5af4 --- /dev/null +++ b/graphblas_algorithms/algorithms/isomorphism/isomorph.py @@ -0,0 +1,56 @@ +import numpy as np +from graphblas import binary + +from ..cluster import triangles + +__all__ = [ + "fast_could_be_isomorphic", + "faster_could_be_isomorphic", +] + + +def fast_could_be_isomorphic(G1, G2): + if len(G1) != len(G2): + return False + d1 = G1.get_property("total_degrees+" if G1.is_directed() else "degrees+") + d2 = G2.get_property("total_degrees+" if G2.is_directed() else "degrees+") + if d1.nvals != d2.nvals: + return False + t1 = triangles(G1) + t2 = triangles(G2) + if t1.nvals != t2.nvals: + return False + # Make ds and ts the same shape as numpy arrays so we can sort them lexicographically. + if t1.nvals != d1.nvals: + # Assign 0 to t1 where present in d1 but not t1 + t1(~t1.S) << binary.second(d1, 0) + if t2.nvals != d2.nvals: + # Assign 0 to t2 where present in d2 but not t2 + t2(~t2.S) << binary.second(d2, 0) + d1 = d1.to_coo(indices=False)[1] + d2 = d2.to_coo(indices=False)[1] + t1 = t1.to_coo(indices=False)[1] + t2 = t2.to_coo(indices=False)[1] + ind1 = np.lexsort((d1, t1)) + ind2 = np.lexsort((d2, t2)) + if not np.array_equal(d1[ind1], d2[ind2]): + return False + if not np.array_equal(t1[ind1], t2[ind2]): + return False + return True + + +def faster_could_be_isomorphic(G1, G2): + if len(G1) != len(G2): + return False + d1 = G1.get_property("total_degrees+" if G1.is_directed() else "degrees+") + d2 = G2.get_property("total_degrees+" if G2.is_directed() else "degrees+") + if d1.nvals != d2.nvals: + return False + d1 = d1.to_coo(indices=False)[1] + d2 = d2.to_coo(indices=False)[1] + d1.sort() + d2.sort() + if not np.array_equal(d1, d2): + return False + return True diff --git a/graphblas_algorithms/algorithms/link_analysis/__init__.py b/graphblas_algorithms/algorithms/link_analysis/__init__.py new file mode 100644 index 0000000..938b30c --- /dev/null +++ b/graphblas_algorithms/algorithms/link_analysis/__init__.py @@ -0,0 +1,2 @@ +from .hits_alg import * +from .pagerank_alg import * diff --git a/graphblas_algorithms/algorithms/link_analysis/hits_alg.py b/graphblas_algorithms/algorithms/link_analysis/hits_alg.py new file mode 100644 index 0000000..662ac14 --- /dev/null +++ b/graphblas_algorithms/algorithms/link_analysis/hits_alg.py @@ -0,0 +1,62 @@ +from graphblas import Vector + +from .._helpers import is_converged, normalize +from ..exceptions import ConvergenceFailure + +__all__ = ["hits"] + + +def hits(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True, *, with_authority=False): + """HITS algorithms with additional parameter `with_authority`. + + When `with_authority` is True, the authority matrix, ``A.T @ A`` will be + created and used. This may be faster, but requires more memory. + """ + N = len(G) + h = Vector(float, N, name="h") + a = Vector(float, N, name="a") + if N == 0: + return h, a + if nstart is None: + h << 1.0 / N + else: + h << nstart + denom = h.reduce().get(0) + h *= 1.0 / denom + + # Power iteration: make up to max_iter iterations + A = G._A + if with_authority: + a, h = h, a + ATA = (A.T @ A).new(name="ATA") # Authority matrix + aprev = Vector(float, N, name="a_prev") + for _i in range(max_iter): + aprev, a = a, aprev + a << ATA @ aprev + normalize(a, "Linf") + if is_converged(aprev, a, tol): + h << A @ a + break + else: + raise ConvergenceFailure(max_iter) + else: + hprev = Vector(float, N, name="h_prev") + for _i in range(max_iter): + hprev, h = h, hprev + a << hprev @ A + h << A @ a + normalize(h, "Linf") + if is_converged(hprev, h, tol): + break + else: + raise ConvergenceFailure(max_iter) + if normalized: + normalize(h, "L1") + normalize(a, "L1") + elif with_authority: + normalize(h, "Linf") + else: + normalize(a, "Linf") + h.name = "hits_h" + a.name = "hits_a" + return h, a diff --git a/graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py b/graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py new file mode 100644 index 0000000..7391dbe --- /dev/null +++ b/graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py @@ -0,0 +1,162 @@ +from graphblas import Matrix, Vector, binary, monoid +from graphblas.semiring import plus_first, plus_times + +from graphblas_algorithms import Graph + +from .._helpers import is_converged +from ..exceptions import ConvergenceFailure + +__all__ = ["pagerank", "google_matrix"] + + +def pagerank( + G: Graph, + alpha=0.85, + personalization=None, + max_iter=100, + tol=1e-06, + nstart=None, + dangling=None, + row_degrees=None, + name="pagerank", +) -> Vector: + A = G._A + N = A.nrows + if A.nvals == 0: + return Vector(float, N, name=name) + + # Initial vector + x = Vector(float, N, name="x") + if nstart is None: + x[:] = 1.0 / N + else: + denom = nstart.reduce().get(0) + if denom == 0: + raise ZeroDivisionError("nstart sums to 0") + x << nstart / denom + + # Personalization vector or scalar + if personalization is None: + p = 1.0 / N + else: + denom = personalization.reduce().get(0) + if denom == 0: + raise ZeroDivisionError("personalization sums to 0") + p = (personalization / denom).new(name="p") + + # Inverse of row_degrees + # Fold alpha constant into S + if row_degrees is None: + row_degrees = G.get_property("plus_rowwise+") # XXX: What about self-edges? + S = (alpha / row_degrees).new(name="S") + + if (iso_value := G.get_property("iso_value")) is not None: + # Fold iso-value of A into S + # This lets us use the plus_first semiring, which is faster + if iso_value.get(1) != 1: + S *= iso_value + semiring = plus_first[float] + else: + semiring = plus_times[float] + + is_dangling = S.nvals < N + if is_dangling: + dangling_mask = Vector(float, N, name="dangling_mask") + dangling_mask(mask=~S.S) << 1.0 + # Fold alpha constant into dangling_weights (or dangling_mask) + if dangling is not None: + dangling_weights = (alpha / dangling.reduce().get(0) * dangling).new( + name="dangling_weights" + ) + elif personalization is None: + # Fast case (and common case); is iso-valued + dangling_mask(mask=dangling_mask.S) << alpha * p + else: + dangling_weights = (alpha * p).new(name="dangling_weights") + + # Fold constant into p + p *= 1 - alpha + + # Power iteration: make up to max_iter iterations + xprev = Vector(float, N, name="x_prev") + w = Vector(float, N, name="w") + for _i in range(max_iter): + xprev, x = x, xprev + + # x << alpha * ((xprev * S) @ A + "dangling_weights") + (1 - alpha) * p + x << p + if is_dangling: + if dangling is None and personalization is None: + # Fast case: add a scalar; x is still iso-valued (b/c p is also scalar) + x += xprev @ dangling_mask + else: + # Add a vector + x += plus_first(xprev @ dangling_mask) * dangling_weights + w << xprev * S + x += semiring(w @ A) # plus_first if A.ss.is_iso else plus_times + + if is_converged(xprev, x, tol): # sum(abs(xprev - x)) < N * tol + x.name = name + return x + raise ConvergenceFailure(max_iter) + + +def google_matrix( + G: Graph, + alpha=0.85, + personalization=None, + nodelist=None, + dangling=None, + name="google_matrix", +) -> Matrix: + A = G._A + ids = G.list_to_ids(nodelist) + if ids is not None: + A = A[ids, ids].new(float, name=name) + else: + A = A.dup(float, name=name) + N = A.nrows + if N == 0: + return A + + # Personalization vector or scalar + if personalization is None: + p = 1.0 / N + else: + if ids is not None: + personalization = personalization[ids].new(name="personalization") + denom = personalization.reduce().get(0) + if denom == 0: + raise ZeroDivisionError("personalization sums to 0") + p = (personalization / denom).new(mask=personalization.V, name="p") + + if ids is None or len(ids) == len(G): + nonempty_rows = G.get_property("any_rowwise+") # XXX: What about self-edges? + else: + nonempty_rows = A.reduce_rowwise(monoid.any).new(name="nonempty_rows") + + is_dangling = nonempty_rows.nvals < N + if is_dangling: + empty_rows = (~nonempty_rows.S).new(name="empty_rows") + if dangling is not None: + if ids is not None: + dangling = dangling[ids].new(name="dangling") + dangling_weights = (1.0 / dangling.reduce().get(0) * dangling).new( + mask=dangling.V, name="dangling_weights" + ) + A << binary.first(empty_rows.outer(dangling_weights) | A) + elif personalization is None: + A << binary.first((p * empty_rows) | A) + else: + A << binary.first(empty_rows.outer(p) | A) + + scale = A.reduce_rowwise(monoid.plus).new(float) + scale << alpha / scale + A << scale * A + p *= 1 - alpha + if personalization is None: + # Add a scalar everywhere, which makes A dense + A(binary.plus)[:, :] = p + else: + A << A + p + return A diff --git a/graphblas_algorithms/algorithms/lowest_common_ancestors.py b/graphblas_algorithms/algorithms/lowest_common_ancestors.py new file mode 100644 index 0000000..0dfac19 --- /dev/null +++ b/graphblas_algorithms/algorithms/lowest_common_ancestors.py @@ -0,0 +1,21 @@ +from graphblas import binary, replace +from graphblas.semiring import any_pair + +from ._bfs import _bfs_plain + +__all__ = ["lowest_common_ancestor"] + + +def lowest_common_ancestor(G, node1, node2, default=None): + common_ancestors = _bfs_plain(G, node1, name="common_ancestors", transpose=True) + other_ancestors = _bfs_plain(G, node2, name="other_ancestors", transpose=True) + common_ancestors << binary.pair(common_ancestors & other_ancestors) + if common_ancestors.nvals == 0: + return default + # Take one BFS step along predecessors. The lowest common ancestor is one we don't visit. + # An alternative strategy would be to walk along successors until there are no more. + other_ancestors(common_ancestors.S, replace) << any_pair[bool](G._A @ common_ancestors) + common_ancestors(~other_ancestors.S, replace) << common_ancestors + index = common_ancestors.to_coo(values=False)[0][0] + # XXX: should we return index or key? + return G.id_to_key[index] diff --git a/graphblas_algorithms/algorithms/operators/__init__.py b/graphblas_algorithms/algorithms/operators/__init__.py new file mode 100644 index 0000000..c2742b9 --- /dev/null +++ b/graphblas_algorithms/algorithms/operators/__init__.py @@ -0,0 +1,2 @@ +from .binary import * +from .unary import * diff --git a/graphblas_algorithms/algorithms/operators/binary.py b/graphblas_algorithms/algorithms/operators/binary.py new file mode 100644 index 0000000..4c14a11 --- /dev/null +++ b/graphblas_algorithms/algorithms/operators/binary.py @@ -0,0 +1,155 @@ +from graphblas import Matrix, binary, dtypes, unary + +from ..exceptions import GraphBlasAlgorithmException + +__all__ = [ + "compose", + "difference", + "disjoint_union", + "full_join", + "intersection", + "symmetric_difference", + "union", +] + + +def union(G, H, rename=(), *, name="union"): + if G.is_multigraph() != H.is_multigraph(): + raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.") + if G.is_multigraph(): + raise NotImplementedError("Not yet implemented for multigraphs") + if rename: + prefix = rename[0] + if prefix is not None: + G = type(G)( + G._A, key_to_id={f"{prefix}{key}": val for key, val in G._key_to_id.items()} + ) + if len(rename) > 1: + prefix = rename[1] + if prefix is not None: + H = type(H)( + H._A, key_to_id={f"{prefix}{key}": val for key, val in H._key_to_id.items()} + ) + A = G._A + B = H._A + if not G._key_to_id.keys().isdisjoint(H._key_to_id.keys()): + raise GraphBlasAlgorithmException("The node sets of the graphs are not disjoint.") + C = Matrix(dtypes.unify(A.dtype, B.dtype), A.nrows + B.nrows, A.ncols + B.ncols, name=name) + C[: A.nrows, : A.ncols] = A + C[A.nrows :, A.ncols :] = B + offset = A.nrows + key_to_id = {key: val + offset for key, val in H._key_to_id.items()} + key_to_id.update(G._key_to_id) + return type(G)(C, key_to_id=key_to_id) + + +def disjoint_union(G, H, *, name="disjoint_union"): + if G.is_multigraph() != H.is_multigraph(): + raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.") + if G.is_multigraph(): + raise NotImplementedError("Not yet implemented for multigraphs") + A = G._A + B = H._A + C = Matrix(dtypes.unify(A.dtype, B.dtype), A.nrows + B.nrows, A.ncols + B.ncols, name=name) + C[: A.nrows, : A.ncols] = A + C[A.nrows :, A.ncols :] = B + return type(G)(C) + + +def intersection(G, H, *, name="intersection"): + if G.is_multigraph() != H.is_multigraph(): + raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.") + if G.is_multigraph(): + raise NotImplementedError("Not yet implemented for multigraphs") + keys = sorted(G._key_to_id.keys() & H._key_to_id.keys(), key=G._key_to_id.__getitem__) + ids = G.list_to_ids(keys) + A = G._A[ids, ids].new() + ids = H.list_to_ids(keys) + B = H._A[ids, ids].new(dtypes.unify(A.dtype, H._A.dtype), mask=A.S, name=name) + B << unary.one(B) + return type(G)(B, key_to_id=dict(zip(keys, range(len(keys)), strict=True))) + + +def difference(G, H, *, name="difference"): + if G.is_multigraph() != H.is_multigraph(): + raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.") + if G.is_multigraph(): + raise NotImplementedError("Not yet implemented for multigraphs") + if G._key_to_id.keys() != H._key_to_id.keys(): + raise GraphBlasAlgorithmException("Node sets of graphs not equal") + A = G._A + if G._key_to_id == H._key_to_id: + B = H._A + else: + # Need to perform a permutation + keys = sorted(G._key_to_id, key=G._key_to_id.__getitem__) + ids = H.list_to_ids(keys) + B = H._A[ids, ids].new() + C = unary.one(A).new(mask=~B.S, name=name) + return type(G)(C, key_to_id=G._key_to_id) + + +def symmetric_difference(G, H, *, name="symmetric_difference"): + if G.is_multigraph() != H.is_multigraph(): + raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.") + if G.is_multigraph(): + raise NotImplementedError("Not yet implemented for multigraphs") + if G._key_to_id.keys() != H._key_to_id.keys(): + raise GraphBlasAlgorithmException("Node sets of graphs not equal") + A = G._A + if G._key_to_id == H._key_to_id: + B = H._A + else: + # Need to perform a permutation + keys = sorted(G._key_to_id, key=G._key_to_id.__getitem__) + ids = H.list_to_ids(keys) + B = H._A[ids, ids].new() + Mask = binary.pair[bool](A & B).new(name="mask") + C = binary.pair(A | B, left_default=True, right_default=True).new(mask=~Mask.S, name=name) + return type(G)(C, key_to_id=G._key_to_id) + + +def compose(G, H, *, name="compose"): + if G.is_multigraph() != H.is_multigraph(): + raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.") + if G.is_multigraph(): + raise NotImplementedError("Not yet implemented for multigraphs") + A = G._A + B = H._A + if G._key_to_id.keys() == H._key_to_id.keys(): + if G._key_to_id != H._key_to_id: + # Need to perform a permutation + keys = sorted(G._key_to_id, key=G._key_to_id.__getitem__) + ids = H.list_to_ids(keys) + B = B[ids, ids].new() + C = binary.second(A | B).new(name=name) + key_to_id = G._key_to_id + else: + keys = sorted(G._key_to_id.keys() & H._key_to_id.keys(), key=G._key_to_id.__getitem__) + B = H._A + C = Matrix( + dtypes.unify(A.dtype, B.dtype), + A.nrows + B.nrows - len(keys), + A.ncols + B.ncols - len(keys), + name=name, + ) + C[: A.nrows, : A.ncols] = A + ids1 = G.list_to_ids(keys) + ids2 = H.list_to_ids(keys) + C[ids1, ids1] = B[ids2, ids2] + newkeys = sorted(H._key_to_id.keys() - G._key_to_id.keys(), key=H._key_to_id.__getitem__) + ids = H.list_to_ids(newkeys) + C[A.nrows :, A.ncols :] = B[ids, ids] + # Now make new `key_to_id` + ids += A.nrows + key_to_id = dict(zip(newkeys, ids.tolist(), strict=True)) + key_to_id.update(G._key_to_id) + return type(G)(C, key_to_id=key_to_id) + + +def full_join(G, H, rename=(), *, name="full_join"): + rv = union(G, H, rename, name=name) + nrows, ncols = G._A.shape + rv._A[:nrows, ncols:] = True + rv._A[nrows:, :ncols] = True + return rv diff --git a/graphblas_algorithms/algorithms/operators/unary.py b/graphblas_algorithms/algorithms/operators/unary.py new file mode 100644 index 0000000..e7c46d6 --- /dev/null +++ b/graphblas_algorithms/algorithms/operators/unary.py @@ -0,0 +1,18 @@ +from graphblas import select + +from ..exceptions import GraphBlasAlgorithmException + +__all__ = ["complement", "reverse"] + + +def complement(G, *, name="complement"): + A = G._A + R = (~A.S).new(A.dtype, name=name) + R << select.offdiag(R) + return type(G)(R, key_to_id=G._key_to_id) + + +def reverse(G, copy=True): + if not G.is_directed(): + raise GraphBlasAlgorithmException("Cannot reverse an undirected graph.") + return G.reverse(copy=copy) diff --git a/graphblas_algorithms/algorithms/reciprocity.py b/graphblas_algorithms/algorithms/reciprocity.py new file mode 100644 index 0000000..173cb1d --- /dev/null +++ b/graphblas_algorithms/algorithms/reciprocity.py @@ -0,0 +1,22 @@ +from graphblas import binary + +from .exceptions import EmptyGraphError + +__all__ = ["reciprocity", "overall_reciprocity"] + + +def reciprocity(G, mask=None): + overlap, total_degrees = G.get_properties("recip_degrees+ total_degrees+", mask=mask) + return binary.truediv(2 * overlap | total_degrees, left_default=0, right_default=0).new( + name="reciprocity" + ) + + +def overall_reciprocity(G): + n_all_edge = G._A.nvals + if n_all_edge == 0: + raise EmptyGraphError("Not defined for empty graphs") + n_overlap_edges, has_self_edges = G.get_properties("total_recip+ has_self_edges") + if has_self_edges: + n_overlap_edges -= G.get_property("diag").nvals + return n_overlap_edges / n_all_edge diff --git a/graphblas_algorithms/algorithms/regular.py b/graphblas_algorithms/algorithms/regular.py new file mode 100644 index 0000000..cf3a057 --- /dev/null +++ b/graphblas_algorithms/algorithms/regular.py @@ -0,0 +1,30 @@ +from graphblas import monoid + +__all__ = ["is_regular", "is_k_regular"] + + +def is_regular(G): + if not G.is_directed(): + degrees = G.get_property("degrees+") + if degrees.nvals != degrees.size: + return False + d = degrees.get(0) + return (degrees == d).reduce(monoid.land).get(True) + row_degrees = G.get_property("row_degrees+") + if row_degrees.nvals != row_degrees.size: + return False + column_degrees = G.get_property("column_degrees+") + if column_degrees.nvals != column_degrees.size: + return False + d = row_degrees.get(0) + if not (row_degrees == d).reduce(monoid.land): + return False + d = column_degrees.get(0) + return (column_degrees == d).reduce(monoid.land).get(True) + + +def is_k_regular(G, k): + degrees = G.get_property("degrees+") + if degrees.nvals != degrees.size: + return False + return (degrees == k).reduce(monoid.land).get(True) diff --git a/graphblas_algorithms/algorithms/shortest_paths/__init__.py b/graphblas_algorithms/algorithms/shortest_paths/__init__.py new file mode 100644 index 0000000..781db9d --- /dev/null +++ b/graphblas_algorithms/algorithms/shortest_paths/__init__.py @@ -0,0 +1,4 @@ +from .dense import * +from .generic import * +from .unweighted import * +from .weighted import * diff --git a/graphblas_algorithms/algorithms/shortest_paths/dense.py b/graphblas_algorithms/algorithms/shortest_paths/dense.py new file mode 100644 index 0000000..394d1b4 --- /dev/null +++ b/graphblas_algorithms/algorithms/shortest_paths/dense.py @@ -0,0 +1,111 @@ +from graphblas import Matrix, Vector, binary, indexunary, replace, select +from graphblas.semiring import any_plus, any_second + +from ..exceptions import GraphBlasAlgorithmException + +__all__ = ["floyd_warshall", "floyd_warshall_predecessor_and_distance"] + + +def floyd_warshall(G, is_weighted=False): + return floyd_warshall_predecessor_and_distance(G, is_weighted, compute_predecessors=False)[1] + + +def floyd_warshall_predecessor_and_distance( + G, is_weighted=False, *, compute_predecessors=True, permutation=None +): + # By using `offdiag` instead of `G._A`, we ensure that D will not become dense. + # Dense D may be better at times, but not including the diagonal will result in less work. + # Typically, Floyd-Warshall algorithms sets the diagonal of D to 0 at the beginning. + # This is unnecessary with sparse matrices, and we set the diagonal to 0 at the end. + # We also don't iterate over index `i` if either row i or column i are empty. + if is_directed := G.is_directed(): + A, row_degrees, column_degrees = G.get_properties("offdiag row_degrees- column_degrees-") + nonempty_nodes = binary.pair(row_degrees & column_degrees).new(name="nonempty_nodes") + else: + A, nonempty_nodes = G.get_properties("U- degrees-") + if permutation is not None: + if len(permutation) != nonempty_nodes.size: + raise GraphBlasAlgorithmException( + "permutation must contain every node in G with no repeats." + ) + A = A[permutation, permutation].new() + nonempty_nodes = nonempty_nodes[permutation].new(name="nonempty_nodes") + + if A.dtype == bool or not is_weighted: + dtype = int + else: + dtype = A.dtype + n = A.nrows + D = Matrix(dtype, nrows=n, ncols=n, name="floyd_warshall_dist") + if is_weighted: + D << A + else: + D(A.S) << 1 # Like `D << unary.one[int](A)` + del A + Row = Matrix(dtype, nrows=1, ncols=n, name="Row") + if is_directed: + Col = Matrix(dtype, nrows=n, ncols=1, name="Col") + else: + Col = None + Outer = Matrix(dtype, nrows=n, ncols=n, name="Outer") + if compute_predecessors: + Mask = Matrix(bool, nrows=n, ncols=n, name="Mask") + P = indexunary.rowindex(D).new(name="floyd_warshall_pred") + if P.dtype == dtype: + P_row = Row + else: + P_row = Matrix(P.dtype, nrows=1, ncols=n, name="P_row") + else: + Mask = P = P_row = None + + for i in nonempty_nodes: + Row << D[[i], :] + if is_directed: + Col << D[:, [i]] + else: + Row(binary.any) << D.T[[i], :] + Col = Row.T + Outer << any_plus(Col @ Row) # Like `col.outer(row, binary.plus)` + + if not compute_predecessors: + # It is faster (approx 10%-30%) to use a mask as is done below when computing + # predecessors, but we choose to use less memory here by not using a mask. + if is_directed: + D(binary.min) << select.offdiag(Outer) + else: + D(binary.min) << select.triu(Outer, 1) + else: + # Update Outer to only include off-diagonal values that will update D and P. + if is_directed: + Mask << indexunary.offdiag(Outer) + else: + Mask << indexunary.triu(Outer, 1) + Mask(binary.second) << binary.lt(Outer & D) + Outer(Mask.V, replace) << Outer + + # Update distances; like `D(binary.min) << offdiag(any_plus(Col @ Row))` + D(Outer.S) << Outer + + # Broadcast predecessors in P_row to updated values + P_row << P[[i], :] + if not is_directed: + P_row(binary.any) << P.T[[i], :] + Col = P_row.T + P(Outer.S) << any_second(Col @ P_row) + del Outer, Mask, Col, Row, P_row + + if not is_directed: + # Symmetrize the results. + # It may be nice to be able to return these as upper-triangular. + D(binary.any) << D.T + if compute_predecessors: + P(binary.any) << P.T + + # Set diagonal values to 0 (this way seems fast). + # The missing values are implied to be infinity, so we set diagonals explicitly to 0. + diag_mask = Vector(bool, size=n, name="diag_mask") + diag_mask << True + Diag_mask = diag_mask.diag(name="Diag_mask") + D(Diag_mask.S) << 0 + + return P, D diff --git a/graphblas_algorithms/algorithms/shortest_paths/generic.py b/graphblas_algorithms/algorithms/shortest_paths/generic.py new file mode 100644 index 0000000..ef86f89 --- /dev/null +++ b/graphblas_algorithms/algorithms/shortest_paths/generic.py @@ -0,0 +1,12 @@ +from ..exceptions import NoPath +from .unweighted import bidirectional_shortest_path_length + +__all__ = ["has_path"] + + +def has_path(G, source, target): + try: + bidirectional_shortest_path_length(G, source, target) + except NoPath: + return False + return True diff --git a/graphblas_algorithms/algorithms/shortest_paths/unweighted.py b/graphblas_algorithms/algorithms/shortest_paths/unweighted.py new file mode 100644 index 0000000..ec87b65 --- /dev/null +++ b/graphblas_algorithms/algorithms/shortest_paths/unweighted.py @@ -0,0 +1,64 @@ +from graphblas import Matrix, Vector, replace +from graphblas.semiring import any_pair + +from .._bfs import _bfs_level, _bfs_levels +from ..exceptions import NoPath + +__all__ = [ + "single_source_shortest_path_length", + "single_target_shortest_path_length", + "all_pairs_shortest_path_length", +] + + +def single_source_shortest_path_length(G, source, cutoff=None): + return _bfs_level(G, source, cutoff=cutoff) + + +def single_target_shortest_path_length(G, target, cutoff=None): + return _bfs_level(G, target, cutoff=cutoff, transpose=True) + + +def all_pairs_shortest_path_length(G, cutoff=None, *, nodes=None, expand_output=False): + D = _bfs_levels(G, nodes, cutoff=cutoff) + if nodes is not None and expand_output and D.ncols != D.nrows: + ids = G.list_to_ids(nodes) + rv = Matrix(D.dtype, D.ncols, D.ncols, name=D.name) + rv[ids, :] = D + return rv + return D + + +def bidirectional_shortest_path_length(G, source, target): + # Perform bidirectional BFS from source to target and target to source + # TODO: have this raise NodeNotFound? + if source not in G or target not in G: + raise KeyError(f"Either source {source} or target {target} is not in G") # NodeNotFound + src = G._key_to_id[source] + dst = G._key_to_id[target] + if src == dst: + return 0 + A = G.get_property("offdiag") + q_src = Vector(bool, size=A.nrows, name="q_src") + q_src[src] = True + seen_src = q_src.dup(name="seen_src") + q_dst = Vector(bool, size=A.nrows, name="q_dst") + q_dst[dst] = True + seen_dst = q_dst.dup(name="seen_dst", clear=True) + any_pair_bool = any_pair[bool] + for i in range(1, A.nrows + 1, 2): + q_src(~seen_src.S, replace) << any_pair_bool(q_src @ A) + if q_src.nvals == 0: + raise NoPath(f"No path between {source} and {target}.") + if any_pair_bool(q_src @ q_dst): + return i + + seen_dst(q_dst.S) << True + q_dst(~seen_dst.S, replace) << any_pair_bool(A @ q_dst) + if q_dst.nvals == 0: + raise NoPath(f"No path between {source} and {target}.") + if any_pair_bool(q_src @ q_dst): + return i + 1 + + seen_src(q_src.S) << True + raise NoPath(f"No path between {source} and {target}.") diff --git a/graphblas_algorithms/algorithms/shortest_paths/weighted.py b/graphblas_algorithms/algorithms/shortest_paths/weighted.py new file mode 100644 index 0000000..a83a060 --- /dev/null +++ b/graphblas_algorithms/algorithms/shortest_paths/weighted.py @@ -0,0 +1,348 @@ +import numpy as np +from graphblas import Matrix, Vector, binary, indexunary, monoid, replace, select, unary +from graphblas.semiring import any_pair, min_plus + +from .._bfs import _bfs_level, _bfs_levels, _bfs_parent, _bfs_plain +from ..exceptions import NoPath, Unbounded + +__all__ = [ + "single_source_bellman_ford_path_length", + "bellman_ford_path", + "bellman_ford_path_length", + "bellman_ford_path_lengths", + "negative_edge_cycle", +] + + +def _bellman_ford_path_length(G, source, target=None, *, cutoff=None, name): + # No need for `is_weighted=` keyword, b/c this is assumed to be weighted (I think) + src_id = G._key_to_id[source] + if target is not None: + dst_id = G._key_to_id[target] + else: + dst_id = None + + if G.get_property("is_iso"): + # If the edges are iso-valued (and positive), then we can simply do level BFS + is_negative, iso_value = G.get_properties("has_negative_edges+ iso_value") + if not is_negative: + if cutoff is not None: + cutoff = int(cutoff // iso_value.get()) + d = _bfs_level(G, source, target, cutoff=cutoff, dtype=iso_value.dtype) + if dst_id is not None: + d = d.get(dst_id) + if d is None: + raise NoPath(f"node {target} not reachable from {source}") + if iso_value != 1: + d *= iso_value + return d + # It's difficult to detect negative cycles with BFS + if G._A[src_id, src_id].get() is not None: + raise Unbounded("Negative cycle detected.") + if not G.is_directed() and G._A[src_id, :].nvals > 0: + # For undirected graphs, any negative edge is a cycle + raise Unbounded("Negative cycle detected.") + + # Use `offdiag` instead of `A`, b/c self-loops don't contribute to the result, + # and negative self-loops are easy negative cycles to avoid. + # We check if we hit a self-loop negative cycle at the end. + if dst_id is None: + A, has_negative_diagonal = G.get_properties("offdiag has_negative_diagonal") + else: + A, is_negative, has_negative_diagonal = G.get_properties( + "offdiag has_negative_edges- has_negative_diagonal" + ) + if A.dtype == bool: + # Should we upcast e.g. INT8 to INT64 as well? + dtype = int + else: + dtype = A.dtype + n = A.nrows + d = Vector(dtype, n, name="single_source_bellman_ford_path_length") + d[src_id] = 0 + cur = d.dup(name="cur") + mask = Vector(bool, n, name="mask") + one = unary.one[bool] + for _i in range(n - 1): + # This is a slightly modified Bellman-Ford algorithm. + # `cur` is the current frontier of values that improved in the previous iteration. + # This means that in this iteration we drop values from `cur` that are not better. + cur << min_plus(cur @ A) + if cutoff is not None: + cur << select.valuele(cur, cutoff) + + # Mask is True where cur not in d or cur < d + mask << one(cur) + mask(binary.second) << binary.lt(cur & d) + + # Drop values from `cur` that didn't improve + cur(mask.V, replace) << cur + if cur.nvals == 0: + break + # Update `d` with values that improved + d(cur.S) << cur + if dst_id is not None and not is_negative: + # Limit exploration if we have a target + cutoff = cur.get(dst_id, cutoff) + else: + # Check for negative cycle when for loop completes without breaking + cur << min_plus(cur @ A) + if cutoff is not None: + cur << select.valuele(cur, cutoff) + mask << binary.lt(cur & d) + if dst_id is None and mask.reduce(monoid.lor) or dst_id is not None and mask.get(dst_id): + raise Unbounded("Negative cycle detected.") + if has_negative_diagonal: + # We removed diagonal entries above, so check if we visited one with a negative weight + diag = G.get_property("diag") + cur << select.valuelt(diag, 0) + if any_pair(d @ cur): + raise Unbounded("Negative cycle detected.") + if dst_id is not None: + d = d.get(dst_id) + if d is None: + raise NoPath(f"node {target} not reachable from {source}") + return d + + +def single_source_bellman_ford_path_length( + G, source, *, cutoff=None, name="single_source_bellman_ford_path_length" +): + return _bellman_ford_path_length(G, source, cutoff=cutoff, name=name) + + +def bellman_ford_path_length(G, source, target): + return _bellman_ford_path_length(G, source, target, name="bellman_ford_path_length") + + +def bellman_ford_path_lengths(G, nodes=None, *, expand_output=False): + """Extra parameter: expand_output + + Parameters + ---------- + expand_output : bool, default False + When False, the returned Matrix has one row per node in nodes. + When True, the returned Matrix has the same shape as the input Matrix. + """ + # Same algorithms as in `single_source_bellman_ford_path_length`, but with + # `Cur` as a Matrix with each row corresponding to a source node. + if G.get_property("is_iso"): + is_negative, iso_value = G.get_properties("has_negative_edges+ iso_value") + if not is_negative: + D = _bfs_levels(G, nodes, dtype=iso_value.dtype) + if iso_value != 1: + D *= iso_value + if nodes is not None and expand_output and D.ncols != D.nrows: + ids = G.list_to_ids(nodes) + rv = Matrix(D.dtype, D.ncols, D.ncols, name=D.name) + rv[ids, :] = D + return rv + return D + if not G.is_directed(): + # For undirected graphs, any negative edge is a cycle + if nodes is not None: + ids = G.list_to_ids(nodes) + if G._A[ids, :].nvals > 0: + raise Unbounded("Negative cycle detected.") + elif G._A.nvals > 0: + raise Unbounded("Negative cycle detected.") + + A, has_negative_diagonal = G.get_properties("offdiag has_negative_diagonal") + if A.dtype == bool: + dtype = int + else: + dtype = A.dtype + n = A.nrows + if nodes is None: + # TODO: `D = Vector.from_scalar(0, n, dtype).diag()` + D = Vector(dtype, n, name="bellman_ford_path_lengths_vector") + D << 0 + D = D.diag(name="bellman_ford_path_lengths") + else: + ids = G.list_to_ids(nodes) + D = Matrix.from_coo( + np.arange(len(ids), dtype=np.uint64), + ids, + 0, + dtype, + nrows=len(ids), + ncols=n, + name="bellman_ford_path_lengths", + ) + Cur = D.dup(name="Cur") + Mask = Matrix(bool, D.nrows, D.ncols, name="Mask") + one = unary.one[bool] + for _i in range(n - 1): + Cur << min_plus(Cur @ A) + Mask << one(Cur) + Mask(binary.second) << binary.lt(Cur & D) + Cur(Mask.V, replace) << Cur + if Cur.nvals == 0: + break + D(Cur.S) << Cur + else: + Cur << min_plus(Cur @ A) + Mask << binary.lt(Cur & D) + if Mask.reduce_scalar(monoid.lor): + raise Unbounded("Negative cycle detected.") + if has_negative_diagonal: + diag = G.get_property("diag") + cur = select.valuelt(diag, 0) + if any_pair(D @ cur).nvals > 0: + raise Unbounded("Negative cycle detected.") + if nodes is not None and expand_output and D.ncols != D.nrows: + rv = Matrix(D.dtype, n, n, name=D.name) + rv[ids, :] = D + return rv + return D + + +def _reconstruct_path_from_parents(G, parents, src, dst): + indices, values = parents.to_coo(sort=False) + d = dict(zip(indices.tolist(), values.tolist(), strict=True)) + if dst not in d: + return [] + cur = dst + path = [cur] + while cur != src: + cur = d[cur] + path.append(cur) + return G.list_to_keys(reversed(path)) + + +def bellman_ford_path(G, source, target): + src_id = G._key_to_id[source] + dst_id = G._key_to_id[target] + if G.get_property("is_iso"): + # If the edges are iso-valued (and positive), then we can simply do level BFS + is_negative = G.get_property("has_negative_edges+") + if not is_negative: + p = _bfs_parent(G, source, target) + return _reconstruct_path_from_parents(G, p, src_id, dst_id) + raise Unbounded("Negative cycle detected.") + A, is_negative, has_negative_diagonal = G.get_properties( + "offdiag has_negative_edges- has_negative_diagonal" + ) + if A.dtype == bool: + # Should we upcast e.g. INT8 to INT64 as well? + dtype = int + else: + dtype = A.dtype + cutoff = None + n = A.nrows + d = Vector(dtype, n, name="bellman_ford_path_length") + d[src_id] = 0 + p = Vector(int, n, name="bellman_ford_path_parent") + p[src_id] = src_id + + prev = d.dup(name="prev") + cur = Vector(dtype, n, name="cur") + indices = Vector(int, n, name="indices") + mask = Vector(bool, n, name="mask") + B = Matrix(dtype, n, n, name="B") + Indices = Matrix(int, n, n, name="Indices") + cols = prev.to_coo(values=False)[0] + one = unary.one[bool] + for _i in range(n - 1): + # This is a slightly modified Bellman-Ford algorithm. + # `cur` is the current frontier of values that improved in the previous iteration. + # This means that in this iteration we drop values from `cur` that are not better. + cur << min_plus(prev @ A) + if cutoff is not None: + cur << select.valuele(cur, cutoff) + + # Mask is True where cur not in d or cur < d + mask << one(cur) + mask(binary.second) << binary.lt(cur & d) + + # Drop values from `cur` that didn't improve + cur(mask.V, replace) << cur + if cur.nvals == 0: + break + # Update `d` with values that improved + d(cur.S) << cur + if not is_negative: + # Limit exploration if we have a target + cutoff = cur.get(dst_id, cutoff) + + # Now try to find the parents! + # This is also not standard. Typically, UDTs and UDFs are used to keep + # track of both the minimum element and the parent id at the same time. + # Only include rows and columns that were used this iteration. + rows = cols + cols = cur.to_coo(values=False)[0] + B.clear() + B[rows, cols] = A[rows, cols] + + # Reverse engineer to determine parent + B << binary.plus(prev & B) + B << binary.iseq(B & cur) + B << select.valuene(B, False) + Indices << indexunary.rowindex(B) + indices << Indices.reduce_columnwise(monoid.min) + p(indices.S) << indices + prev, cur = cur, prev + else: + # Check for negative cycle when for loop completes without breaking + cur << min_plus(prev @ A) + if cutoff is not None: + cur << select.valuele(cur, cutoff) + mask << binary.lt(cur & d) + if mask.get(dst_id): + raise Unbounded("Negative cycle detected.") + path = _reconstruct_path_from_parents(G, p, src_id, dst_id) + if has_negative_diagonal and path: + mask.clear() + mask[G.list_to_ids(path)] = True + diag = G.get_property("diag", mask=mask.S) + if diag.nvals > 0: + raise Unbounded("Negative cycle detected.") + mask << binary.first(mask & cur) # mask(cur.S, replace) << mask + if mask.nvals > 0: + # Is there a path from any visited node with negative self-loop to target? + # We could actually stop as soon as any from `path` is visited + indices, _ = mask.to_coo(values=False)[0] + q = _bfs_plain(G, target=target, index=indices, cutoff=_i) + if dst_id in q: + raise Unbounded("Negative cycle detected.") + return path + + +def negative_edge_cycle(G): + # TODO: use a heuristic to try to stop early + if G.is_directed(): + deg = "total_degrees-" + else: + deg = "degrees-" + A, degrees, has_negative_diagonal, has_negative_edges = G.get_properties( + f"offdiag {deg} has_negative_diagonal has_negative_edges-" + ) + if has_negative_diagonal: + return True + if not has_negative_edges: + return False + if A.dtype == bool: + # Should we upcast e.g. INT8 to INT64 as well? + dtype = int + else: + dtype = A.dtype + n = A.nrows + # Begin from every node that has edges + d = Vector(dtype, n, name="negative_edge_cycle") + d(degrees.S) << 0 + cur = d.dup(name="cur") + mask = Vector(bool, n, name="mask") + one = unary.one[bool] + for _i in range(n - 1): + cur << min_plus(cur @ A) + mask << one(cur) + mask(binary.second) << binary.lt(cur & d) + cur(mask.V, replace) << cur + if cur.nvals == 0: + return False + d(cur.S) << cur + cur << min_plus(cur @ A) + mask << binary.lt(cur & d) + if mask.reduce(monoid.lor): + return True + return False diff --git a/graphblas_algorithms/algorithms/simple_paths.py b/graphblas_algorithms/algorithms/simple_paths.py new file mode 100644 index 0000000..44ba66e --- /dev/null +++ b/graphblas_algorithms/algorithms/simple_paths.py @@ -0,0 +1,29 @@ +from graphblas import Matrix, binary + +__all__ = ["is_simple_path"] + + +def is_simple_path(G, nodes): + if len(nodes) == 0: + return False + if len(nodes) == 1: + return nodes[0] in G + A = G._A # offdiag instead? + if A.nvals < len(nodes) - 1: + return False + key_to_id = G._key_to_id + indices = [key_to_id[key] for key in nodes if key in key_to_id] + if len(indices) != len(nodes) or len(indices) > len(set(indices)): + return False + # Check all steps in path at once + P = Matrix.from_coo(indices[:-1], indices[1:], True, nrows=A.nrows, ncols=A.ncols) + P << binary.second(A & P) + return P.nvals == len(indices) - 1 + # Alternative + # it = iter(indices) + # prev = next(it) + # for cur in it: + # if (prev, cur) not in A: + # return False + # prev = cur + # return True diff --git a/graphblas_algorithms/algorithms/smetric.py b/graphblas_algorithms/algorithms/smetric.py new file mode 100644 index 0000000..e2e85bb --- /dev/null +++ b/graphblas_algorithms/algorithms/smetric.py @@ -0,0 +1,14 @@ +from graphblas import binary + +__all__ = ["s_metric"] + + +def s_metric(G): + if G.is_directed(): + degrees = G.get_property("total_degrees+") + else: + degrees = G.get_property("degrees+") + return (binary.first(degrees & G._A) @ degrees).reduce().get(0) / 2 + # Alternatives + # return (degrees @ binary.second(G._A & degrees)).reduce().get(0) / 2 + # return degrees.outer(degrees).new(mask=G._A.S).reduce_scalar().get(0) / 2 diff --git a/graphblas_algorithms/algorithms/structuralholes.py b/graphblas_algorithms/algorithms/structuralholes.py new file mode 100644 index 0000000..7c4bec8 --- /dev/null +++ b/graphblas_algorithms/algorithms/structuralholes.py @@ -0,0 +1,11 @@ +__all__ = ["mutual_weight"] + + +def mutual_weight(G, u, v): + key_to_id = G._key_to_id + if u not in key_to_id or v not in key_to_id: + return 0 + u = key_to_id[u] + v = key_to_id[v] + A = G._A + return A.get(u, v, 0) + A.get(v, u, 0) diff --git a/graphblas_algorithms/algorithms/tests/__init__.py b/graphblas_algorithms/algorithms/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/graphblas_algorithms/algorithms/tests/test_cluster.py b/graphblas_algorithms/algorithms/tests/test_cluster.py new file mode 100644 index 0000000..50a3196 --- /dev/null +++ b/graphblas_algorithms/algorithms/tests/test_cluster.py @@ -0,0 +1,53 @@ +import graphblas as gb + +from graphblas_algorithms import Graph +from graphblas_algorithms.algorithms import cluster + + +def test_triangles_full(): + # Including self-edges! + G = gb.Matrix(bool, 5, 5) + G[:, :] = True + G2 = gb.select.offdiag(G).new() + G = Graph(G) + G2 = Graph(G2) + result = cluster.triangles(G) + expected = gb.Vector(int, 5) + expected[:] = 6 + assert result.isequal(expected) + result = cluster.triangles(G2) + assert result.isequal(expected) + mask = gb.Vector(bool, 5) + mask[0] = True + mask[3] = True + result = cluster.triangles(G, mask=mask.S) + expected = gb.Vector(int, 5) + expected[0] = 6 + expected[3] = 6 + assert result.isequal(expected) + result = cluster.triangles(G2, mask=mask.S) + assert result.isequal(expected) + assert cluster.single_triangle(G, 1) == 6 + assert cluster.single_triangle(G, 0) == 6 + assert cluster.single_triangle(G2, 0) == 6 + assert cluster.total_triangles(G2) == 10 + assert cluster.total_triangles(G) == 10 + assert cluster.transitivity(G) == 1.0 + assert cluster.transitivity(G2) == 1.0 + result = cluster.clustering(G) + expected = gb.Vector(float, 5) + expected[:] = 1 + assert result.isequal(expected) + result = cluster.clustering(G2) + assert result.isequal(expected) + assert cluster.single_clustering(G, 0) == 1 + assert cluster.single_clustering(G2, 0) == 1 + expected(mask.S, replace=True) << 1 + result = cluster.clustering(G, mask=mask.S) + assert result.isequal(expected) + result = cluster.clustering(G2, mask=mask.S) + assert result.isequal(expected) + assert cluster.average_clustering(G) == 1 + assert cluster.average_clustering(G2) == 1 + assert cluster.average_clustering(G, mask=mask.S) == 1 + assert cluster.average_clustering(G2, mask=mask.S) == 1 diff --git a/graphblas_algorithms/algorithms/tournament.py b/graphblas_algorithms/algorithms/tournament.py new file mode 100644 index 0000000..32624ff --- /dev/null +++ b/graphblas_algorithms/algorithms/tournament.py @@ -0,0 +1,27 @@ +import numpy as np +from graphblas import select + +__all__ = ["is_tournament", "score_sequence", "tournament_matrix"] + + +def is_tournament(G): + A = G._A + if A.nvals != A.nrows * (A.ncols - 1) // 2 or G.get_property("has_self_edges"): + return False + # Alternative: do `select.triu(A.T).new(mask=A.S)` and don't check "has_self_edges" + val = select.triu(A.T, 1).new(mask=A.S) + return val.nvals == 0 + + +def score_sequence(G): + degrees = G.get_property("row_degrees+") + _, values = degrees.to_coo(indices=False, sort=False) + values.sort() + if degrees.nvals != degrees.size: + values = np.pad(values, (degrees.size - degrees.nvals, 0)) + return values + + +def tournament_matrix(G): + A = G._A + return (A - A.T).new(name="tournament_matrix") diff --git a/graphblas_algorithms/algorithms/traversal/__init__.py b/graphblas_algorithms/algorithms/traversal/__init__.py new file mode 100644 index 0000000..7811162 --- /dev/null +++ b/graphblas_algorithms/algorithms/traversal/__init__.py @@ -0,0 +1 @@ +from .breadth_first_search import * diff --git a/graphblas_algorithms/algorithms/traversal/breadth_first_search.py b/graphblas_algorithms/algorithms/traversal/breadth_first_search.py new file mode 100644 index 0000000..a761134 --- /dev/null +++ b/graphblas_algorithms/algorithms/traversal/breadth_first_search.py @@ -0,0 +1,45 @@ +from graphblas import Vector, replace +from graphblas.semiring import any_pair + +__all__ = [ + "bfs_layers", + "descendants_at_distance", +] + + +def bfs_layers(G, sources): + if sources in G: + sources = [sources] + ids = G.list_to_ids(sources) + if ids is None or len(ids) == 0: + return + A = G.get_property("offdiag") + n = A.nrows + v = Vector(bool, size=n, name="bfs_layers") + q = Vector.from_coo(ids, True, size=n, name="q") + any_pair_bool = any_pair[bool] + yield q.dup(name="bfs_layer_0") + for i in range(1, n): + v(q.S) << True + q(~v.S, replace) << any_pair_bool(q @ A) + if q.nvals == 0: + return + yield q.dup(name=f"bfs_layer_{i}") + + +def descendants_at_distance(G, source, distance): + index = G._key_to_id[source] + A = G.get_property("offdiag") + n = A.nrows + q = Vector(bool, size=n, name=f"descendants_at_distance_{distance}") + q[index] = True + if distance == 0: + return q + v = Vector(bool, size=n, name="bfs_seen") + any_pair_bool = any_pair[bool] + for _i in range(1, distance + 1): + v(q.S) << True + q(~v.S, replace) << any_pair_bool(q @ A) + if q.nvals == 0: + break + return q diff --git a/graphblas_algorithms/algorithms/triads.py b/graphblas_algorithms/algorithms/triads.py new file mode 100644 index 0000000..e6ec2be --- /dev/null +++ b/graphblas_algorithms/algorithms/triads.py @@ -0,0 +1,12 @@ +from graphblas_algorithms import DiGraph, Graph + +__all__ = ["is_triad"] + + +def is_triad(G): + return ( + isinstance(G, (Graph, DiGraph)) + and G.is_directed() + and G.order() == 3 + and not G.get_property("has_self_edges") + ) diff --git a/graphblas_algorithms/classes/__init__.py b/graphblas_algorithms/classes/__init__.py new file mode 100644 index 0000000..7fa216d --- /dev/null +++ b/graphblas_algorithms/classes/__init__.py @@ -0,0 +1,2 @@ +from .digraph import * +from .graph import * diff --git a/graphblas_algorithms/classes/_caching.py b/graphblas_algorithms/classes/_caching.py new file mode 100644 index 0000000..3074bed --- /dev/null +++ b/graphblas_algorithms/classes/_caching.py @@ -0,0 +1,185 @@ +from graphblas import Scalar, dtypes, op +from graphblas.core import operator + +NONNEGATIVE_DTYPES = {dtypes.BOOL, dtypes.UINT8, dtypes.UINT16, dtypes.UINT32, dtypes.UINT64} + + +def get_reduce_to_vector(key, opname, methodname): + op_ = op.from_string(opname) + op_, opclass = operator.find_opclass(op_) + keybase = key[:-1] + if key[-1] == "-": + + def get_reduction(G, mask=None): + cache = G._cache + if mask is not None: + if key in cache: + return cache[key].dup(mask=mask) + if cache.get("has_self_edges") is False and f"{keybase}+" in cache: + cache[key] = cache[f"{keybase}+"] + return cache[key].dup(mask=mask) + if "offdiag" in cache: + return getattr(cache["offdiag"], methodname)(op_).new(mask=mask, name=key) + if ( + "L-" in cache + and "U-" in cache + and opclass in {"BinaryOp", "Monoid"} + and G.get_property("has_self_edges") + ): + return op_( + getattr(cache["L-"], methodname)(op_).new(mask=mask) + | getattr(cache["U-"], methodname)(op_).new(mask=mask) + ).new(name=key) + if not G.get_property("has_self_edges"): + return G.get_property(f"{keybase}+", mask=mask) + return getattr(G.get_property("offdiag"), methodname)(op_).new(mask=mask, name=key) + if key not in cache: + if cache.get("has_self_edges") is False and f"{keybase}+" in cache: + cache[key] = cache[f"{keybase}+"] + elif "offdiag" in cache: + cache[key] = getattr(cache["offdiag"], methodname)(op_).new(name=key) + elif ( + "L-" in cache + and "U-" in cache + and opclass in {"BinaryOp", "Monoid"} + and G.get_property("has_self_edges") + ): + cache[key] = op_( + getattr(cache["L-"], methodname)(op_) + | getattr(cache["U-"], methodname)(op_) + ).new(name=key) + elif not G.get_property("has_self_edges"): + cache[key] = G.get_property(f"{keybase}+") + else: + cache[key] = getattr(G.get_property("offdiag"), methodname)(op_).new(name=key) + if ( + "has_self_edges" not in cache + and f"{keybase}+" in cache + and cache[key].nvals != cache[f"{keybase}+"].nvals + ): + cache["has_self_edges"] = True + elif cache.get("has_self_edges") is False: + cache[f"{keybase}+"] = cache[key] + return cache[key] + + else: + + def get_reduction(G, mask=None): + A = G._A + cache = G._cache + if mask is not None: + if key in cache: + return cache[key].dup(mask=mask) + if cache.get("has_self_edges") is False and f"{keybase}-" in cache: + cache[key] = cache[f"{keybase}-"] + return cache[key].dup(mask=mask) + if methodname == "reduce_columnwise" and "AT" in cache: + return cache["AT"].reduce_rowwise(op_).new(mask=mask, name=key) + return getattr(A, methodname)(op_).new(mask=mask, name=key) + if key not in cache: + if cache.get("has_self_edges") is False and f"{keybase}-" in cache: + cache[key] = cache[f"{keybase}-"] + elif methodname == "reduce_columnwise" and "AT" in cache: + cache[key] = cache["AT"].reduce_rowwise(op_).new(name=key) + else: + cache[key] = getattr(A, methodname)(op_).new(name=key) + if ( + "has_self_edges" not in cache + and f"{keybase}-" in cache + and cache[key].nvals != cache[f"{keybase}-"].nvals + ): + cache["has_self_edges"] = True + elif cache.get("has_self_edges") is False: + cache[f"{keybase}-"] = cache[key] + return cache[key] + + return get_reduction + + +def get_reduce_to_scalar(key, opname): + op_ = op.from_string(opname) + op_, opclass = operator.find_opclass(op_) + keybase = key[:-1] + if key[-1] == "-": + + def get_reduction(G, mask=None): + cache = G._cache + if key not in cache: + if cache.get("has_self_edges") is False and f"{keybase}+" in cache: + cache[key] = cache[f"{keybase}+"] + elif f"{opname}_rowwise-" in cache: + cache[key] = cache[f"{opname}_rowwise-"].reduce(op_).new(name=key) + elif f"{opname}_columnwise-" in cache: + cache[key] = cache[f"{opname}_columnwise-"].reduce(op_).new(name=key) + elif cache.get("has_self_edges") is False and f"{opname}_rowwise+" in cache: + cache[key] = cache[f"{opname}_rowwise+"].reduce(op_).new(name=key) + elif cache.get("has_self_edges") is False and f"{opname}_columnwise+" in cache: + cache[key] = cache[f"{opname}_columnwise+"].reduce(op_).new(name=key) + elif "offdiag" in cache: + cache[key] = cache["offdiag"].reduce_scalar(op_).new(name=key) + elif ( + "L-" in cache + and "U-" in cache + and opclass in {"BinaryOp", "Monoid"} + and G.get_property("has_self_edges") + ): + return op_( + cache["L-"].reduce(op_)._as_vector() | cache["U-"].reduce(op_)._as_vector() + )[0].new(name=key) + elif not G.get_property("has_self_edges"): + cache[key] = G.get_property(f"{keybase}+") + else: + cache[key] = G.get_property("offdiag").reduce_scalar(op_).new(name=key) + if ( + "has_self_edges" not in cache + and f"{keybase}+" in cache + and cache[key] != cache[f"{keybase}+"] + ): + cache["has_self_edges"] = True + elif cache.get("has_self_edges") is False: + cache[f"{keybase}+"] = cache[key] + return cache[key] + + elif key[-1] == "+": + + def get_reduction(G, mask=None): + A = G._A + cache = G._cache + if key not in cache: + if cache.get("has_self_edges") is False and f"{keybase}-" in cache: + cache[key] = cache[f"{keybase}-"] + elif f"{opname}_rowwise+" in cache: + cache[key] = cache[f"{opname}_rowwise+"].reduce(op_).new(name=key) + elif f"{opname}_columnwise+" in cache: + cache[key] = cache[f"{opname}_columnwise+"].reduce(op_).new(name=key) + elif cache.get("has_self_edges") is False and f"{opname}_rowwise-" in cache: + cache[key] = cache[f"{opname}_rowwise-"].reduce(op_).new(name=key) + elif cache.get("has_self_edges") is False and f"{opname}_columnwise-" in cache: + cache[key] = cache[f"{opname}_columnwise-"].reduce(op_).new(name=key) + else: + cache[key] = A.reduce_scalar(op_).new(name=key) + if ( + "has_self_edges" not in cache + and f"{keybase}-" in cache + and cache[key] != cache[f"{keybase}-"] + ): + cache["has_self_edges"] = True + elif cache.get("has_self_edges") is False: + cache[f"{keybase}-"] = cache[key] + return cache[key] + + elif key.endswith("_diagonal"): + + def get_reduction(G, mask=None): + A = G._A + cache = G._cache + if key not in cache: + if not G.get_property("has_self_edges"): + cache[key] = Scalar(op_[A.dtype].return_type, name=key) + else: + cache[key] = G.get_property("diag").reduce(op_).new(name=key) + return cache[key] + + else: # pragma: no cover (sanity) + raise RuntimeError + return get_reduction diff --git a/graphblas_algorithms/classes/_utils.py b/graphblas_algorithms/classes/_utils.py new file mode 100644 index 0000000..ecf66d9 --- /dev/null +++ b/graphblas_algorithms/classes/_utils.py @@ -0,0 +1,264 @@ +import graphblas as gb +import numpy as np +from graphblas import Matrix, Vector, binary +from graphblas.core.matrix import TransposedMatrix + +################ +# Classmethods # +################ + + +def from_networkx(cls, G, weight=None, dtype=None): + rv = cls() + rv._key_to_id = {k: i for i, k in enumerate(G)} + if rv._key_to_id: + rv._A = gb.io.from_networkx(G, nodelist=rv._key_to_id, weight=weight, dtype=dtype) + else: + rv._A = Matrix(dtype if dtype is not None else float) + return rv + + +############## +# Properties # +############## + + +def id_to_key(self): + if self._id_to_key is None: + self._id_to_key = {val: key for key, val in self._key_to_id.items()} + return self._id_to_key + + +########### +# Methods # +########### + + +def get_property(self, name, *, mask=None): + return self._get_property[self._cache_aliases.get(name, name)](self, mask) + + +def get_properties(self, names, *, mask=None): + if isinstance(names, str): + # Separated by commas and/or spaces + names = [ + self._cache_aliases.get(name, name) + for name in names.replace(" ", ",").split(",") + if name + ] + else: + names = [self._cache_aliases.get(name, name) for name in names] + results = { + name: self._get_property[name](self, mask) + for name in sorted(names, key=self._property_priority.__getitem__) + } + return [results[name] for name in names] + + +def dict_to_vector(self, d, *, size=None, dtype=None, name=None): + if d is None: + return None + if size is None: + size = len(self) + key_to_id = self._key_to_id + indices, values = zip(*((key_to_id[key], val) for key, val in d.items()), strict=True) + return Vector.from_coo(indices, values, size=size, dtype=dtype, name=name) + + +def list_to_vector(self, nodes, dtype=None, *, values=True, size=None, name=None): + if nodes is None: + return None + if size is None: + size = len(self) + key_to_id = self._key_to_id + index = [key_to_id[key] for key in nodes] + return Vector.from_coo(index, values, size=size, dtype=dtype, name=name) + + +def list_to_mask(self, nodes, *, size=None, name="mask"): + if nodes is None: + return None + return self.list_to_vector(nodes, size=size, name=name).S + + +def list_to_ids(self, nodes): + if nodes is None: + return None + key_to_id = self._key_to_id + return np.fromiter((key_to_id[key] for key in nodes), np.uint64) + + +def list_to_keys(self, indices): + if indices is None: + return None + id_to_key = self.id_to_key + return [id_to_key[idx] for idx in indices] + + +def set_to_vector(self, nodes, dtype=bool, *, ignore_extra=False, size=None, name=None): + if nodes is None: + return None + if size is None: + size = len(self) + key_to_id = self._key_to_id + if ignore_extra: + if not isinstance(nodes, set): + nodes = set(nodes) + nodes = nodes & key_to_id.keys() + index = [key_to_id[key] for key in nodes] + return Vector.from_coo(index, True, size=size, dtype=dtype, name=name) + + +def vector_to_dict(self, v, *, mask=None, fill_value=None): + if mask is not None: + if fill_value is not None and v.nvals < mask.parent.nvals: + v(mask, binary.first) << fill_value + elif fill_value is not None and v.nvals < v.size: + v(mask=~v.S) << fill_value + id_to_key = self.id_to_key + return {id_to_key[index]: value for index, value in zip(*v.to_coo(sort=False), strict=True)} + + +def vector_to_list(self, v, *, values_are_keys=False): + id_to_key = self.id_to_key + return [ + id_to_key[idx] + for idx in v.to_coo(indices=not values_are_keys, values=values_are_keys, sort=True)[ + bool(values_are_keys) + ].tolist() + ] + + +def vector_to_nodemap(self, v, *, mask=None, fill_value=None, values_are_keys=False): + from .nodemap import NodeMap + + if mask is not None: + if fill_value is not None and v.nvals < mask.parent.nvals: + v(mask, binary.first) << fill_value + fill_value = None + + rv = NodeMap( + v, fill_value=fill_value, values_are_keys=values_are_keys, key_to_id=self._key_to_id + ) + rv._id_to_key = self._id_to_key + return rv + + +def vector_to_nodeset(self, v): + from .nodeset import NodeSet + + rv = NodeSet(v, key_to_id=self._key_to_id) + rv._id_to_key = self._id_to_key + return rv + + +def vector_to_set(self, v): + id_to_key = self.id_to_key + indices, _ = v.to_coo(values=False, sort=False) + return {id_to_key[index] for index in indices} + + +def matrix_to_nodenodemap(self, A, *, fill_value=None, values_are_keys=False): + from .nodemap import NodeNodeMap + + rv = NodeNodeMap( + A, fill_value=fill_value, values_are_keys=values_are_keys, key_to_id=self._key_to_id + ) + rv._id_to_key = self._id_to_key + return rv + + +def matrix_to_vectornodemap(self, A): + from .nodemap import VectorNodeMap + + rv = VectorNodeMap(A, key_to_id=self._key_to_id) + rv._id_to_key = self._id_to_key + return rv + + +def matrix_to_dicts(self, A, *, use_row_index=False, use_column_index=False, values_are_keys=False): + """Convert a Matrix to a dict of dicts of the form ``{row: {col: val}}`` + + Use ``use_row_index=True`` to return the row index as keys in the dict, + and likewise for `use_column_index=True``. + + """ + if isinstance(A, TransposedMatrix): + # Not covered + d = A.T.ss.export("hypercsc") + rows = d["cols"].tolist() + col_indices = d["row_indices"].tolist() + use_row_index, use_column_index = use_column_index, use_row_index + else: + d = A.ss.export("hypercsr") + rows = d["rows"].tolist() + col_indices = d["col_indices"].tolist() + indptr = d["indptr"] + values = d["values"].tolist() + id_to_key = self.id_to_key + if values_are_keys: + values = [id_to_key[val] for val in values] + it = zip(rows, np.lib.stride_tricks.sliding_window_view(indptr, 2).tolist(), strict=True) + if use_row_index and use_column_index: + return { + row: dict(zip(col_indices[start:stop], values[start:stop], strict=True)) + for row, (start, stop) in it + } + if use_row_index: + return { + row: { + id_to_key[col]: val + for col, val in zip(col_indices[start:stop], values[start:stop], strict=True) + } + for row, (start, stop) in it + } + if use_column_index: + return { + id_to_key[row]: dict(zip(col_indices[start:stop], values[start:stop], strict=True)) + for row, (start, stop) in it + } + return { + id_to_key[row]: { + id_to_key[col]: val + for col, val in zip(col_indices[start:stop], values[start:stop], strict=True) + } + for row, (start, stop) in it + } + + +def to_networkx(self, edge_attribute="weight"): + import networkx as nx + + # Not covered yet, but will probably be useful soon + if self.is_directed(): + G = nx.DiGraph() + A = self._A + else: + G = nx.Graph() + A = self.get_property("L+") + G.add_nodes_from(self._key_to_id) + id_to_key = self.id_to_key + rows, cols, vals = A.to_coo() + rows = (id_to_key[row] for row in rows.tolist()) + cols = (id_to_key[col] for col in cols.tolist()) + if edge_attribute is None: + G.add_edges_from(zip(rows, cols, strict=True)) + else: + G.add_weighted_edges_from(zip(rows, cols, vals, strict=True), weight=edge_attribute) + # What else should we copy over? + return G + + +def _cacheit(self, key, func, *args, **kwargs): + if key not in self._cache: + self._cache[key] = func(*args, **kwargs) + return self._cache[key] + + +def renumber_key_to_id(self, indices): + """Create `key_to_id` for e.g. a subgraph with node ids from `indices`""" + id_to_key = self.id_to_key + return {id_to_key[index]: i for i, index in enumerate(indices)} + # Alternative (about the same performance) + # keys = self.list_to_keys(indices) + # return dict(zip(keys, range(len(indices)), strict=True)) diff --git a/graphblas_algorithms/classes/digraph.py b/graphblas_algorithms/classes/digraph.py new file mode 100644 index 0000000..1e9fe5f --- /dev/null +++ b/graphblas_algorithms/classes/digraph.py @@ -0,0 +1,629 @@ +from collections import defaultdict +from copy import deepcopy + +import graphblas as gb +from graphblas import Matrix, binary, replace, select, unary + +import graphblas_algorithms as ga + +from . import _utils +from ._caching import get_reduce_to_scalar, get_reduce_to_vector +from .graph import ( + Graph, + get_A, + get_diag, + get_iso_value, + get_offdiag, + has_negative_diagonal, + has_negative_edgesm, + has_negative_edgesp, + is_iso, +) + + +def get_AT(G, mask=None): + """A.T""" + A = G._A + cache = G._cache + if "AT" not in cache: + cache["AT"] = A.T.new() + return cache["AT"] + + +def get_Up(G, mask=None): + """select.triu(A)""" + A = G._A + cache = G._cache + if "U+" not in cache: + if "U-" in cache and not G.get_property("has_self_edges"): + cache["U+"] = cache["U-"] + else: + cache["U+"] = select.triu(A).new(name="U+") + if "has_self_edges" not in cache: + if "U-" in cache: + cache["has_self_edges"] = cache["U+"].nvals > cache["U-"].nvals + elif "L+" in cache: + cache["has_self_edges"] = cache["U+"].nvals + cache["L+"].nvals > A.nvals + if cache.get("has_self_edges") is False: + cache["U-"] = cache["U+"] + return cache["U+"] + + +def get_Lp(G, mask=None): + """select.tril(A)""" + A = G._A + cache = G._cache + if "L+" not in cache: + if "L-" in cache and not G.get_property("has_self_edges"): + cache["L+"] = cache["L-"] + else: + cache["L+"] = select.tril(A).new(name="L+") + if "has_self_edges" not in cache: + if "L-" in cache: + cache["has_self_edges"] = cache["L+"].nvals > cache["L-"].nvals + elif "U+" in cache: + cache["has_self_edges"] = cache["L+"].nvals + cache["U+"].nvals > A.nvals + if cache.get("has_self_edges") is False: + cache["L-"] = cache["L+"] + return cache["L+"] + + +def get_Um(G, mask=None): + """select.triu(A, 1)""" + A = G._A + cache = G._cache + if "U-" not in cache: + if "U+" in cache: + if cache.get("has_self_edges") is False: + cache["U-"] = cache["U+"] + else: + cache["U-"] = select.triu(cache["U+"], 1).new(name="U-") + elif "offdiag" in cache: + cache["U-"] = select.triu(cache["offdiag"], 1).new(name="U-") + else: + cache["U-"] = select.triu(A, 1).new(name="U-") + if "has_self_edges" not in cache: + if "U+" in cache: + cache["has_self_edges"] = cache["U-"].nvals < cache["U+"].nvals + elif "L-" in cache: + cache["has_self_edges"] = cache["U-"].nvals + cache["L-"].nvals < A.nvals + if cache.get("has_self_edges") is False: + cache["U+"] = cache["U-"] + return cache["U-"] + + +def get_Lm(G, mask=None): + """select.tril(A, -1)""" + A = G._A + cache = G._cache + if "L-" not in cache: + if "L+" in cache: + if cache.get("has_self_edges") is False: + cache["L-"] = cache["L+"] + else: + cache["L-"] = select.tril(cache["L+"], -1).new(name="L-") + elif "offdiag" in cache: + cache["L-"] = select.tril(cache["offdiag"], -1).new(name="L-") + else: + cache["L-"] = select.tril(A, -1).new(name="L-") + if "has_self_edges" not in cache: + if "L+" in cache: + cache["has_self_edges"] = cache["L-"].nvals < cache["L+"].nvals + elif "U-" in cache: + cache["has_self_edges"] = cache["L-"].nvals + cache["U-"].nvals < A.nvals + if cache.get("has_self_edges") is False: + cache["L+"] = cache["L-"] + return cache["L-"] + + +def get_recip_degreesp(G, mask=None): + """pair(A & A.T).reduce_rowwise()""" + A = G._A + cache = G._cache + AT = cache.get("AT", A.T) + if mask is not None: + if "recip_degrees+" in cache: + return cache["recip_degrees+"].dup(mask=mask) + if cache.get("has_self_edges") is False and "recip_degrees-" in cache: + cache["recip_degrees+"] = cache["recip_degrees-"] + return cache["recip_degrees-"].dup(mask=mask) + if "recip_degrees-" in cache and "diag" in cache: + return (unary.one(cache["diag"]) + cache["recip_degrees-"]).new( + mask=mask, name="recip_degrees+" + ) + if "recip_degrees-" in cache and not G.get_property("has_self_edges"): + return cache["recip_degrees-"].dup(mask=mask) + return binary.pair(A & AT).reduce_rowwise().new(mask=mask, name="recip_degrees+") + if "recip_degrees+" not in cache: + if cache.get("has_self_edges") is False and "recip_degrees-" in cache: + cache["recip_degrees+"] = cache["recip_degrees-"] + elif "recip_degrees-" in cache and "diag" in cache: + cache["recip_degrees+"] = (unary.one(cache["diag"]) + cache["recip_degrees-"]).new( + name="recip_degrees+" + ) + elif "recip_degrees-" in cache and not G.get_property("has_self_edges"): + cache["recip_degrees+"] = cache["recip_degrees-"] + else: + cache["recip_degrees+"] = ( + binary.pair(A & AT).reduce_rowwise().new(name="recip_degrees+") + ) + if ( + "has_self_edges" not in cache + and "recip_degrees-" in cache + and cache["recip_degrees-"].nvals != cache["recip_degrees+"].nvals + ): + cache["has_self_edges"] = True + elif cache.get("has_self_edges") is False: + cache["recip_degrees-"] = cache["recip_degrees+"] + return cache["recip_degrees+"] + + +def get_recip_degreesm(G, mask=None): + """C = select.offdiag(A) ; pair(C & C.T).reduce_rowwise()""" + A = G._A + cache = G._cache + if "AT" in cache: + AT = cache["AT"] + elif "offdiag" in cache: + AT = cache["offdiag"].T + else: + AT = A.T + if mask is not None: + if "recip_degrees-" in cache: + return cache["recip_degrees-"].dup(mask=mask) + if cache.get("has_self_edges") is False and "recip_degrees+" in cache: + cache["recip_degrees-"] = cache["recip_degrees+"] + return cache["recip_degrees-"].dup(mask=mask) + if "recip_degrees+" in cache and "diag" in cache: + rv = binary.minus(cache["recip_degrees+"] | unary.one(cache["diag"])).new( + mask=mask, name="recip_degrees-" + ) + rv(rv.V, replace) << rv # drop 0s + return rv + if not G.get_property("has_self_edges"): + return G.get_property("recip_degrees+", mask=mask) + if "offdiag" in cache: + return ( + binary.pair(cache["offdiag"] & AT) + .reduce_rowwise() + .new(mask=mask, name="recip_degrees-") + ) + if "L-" in cache and "U-" in cache: + return ( + binary.pair(cache["L-"] & AT).reduce_rowwise().new(mask=mask) + + binary.pair(cache["U-"] & AT).reduce_rowwise().new(mask=mask) + ).new(name="recip_degrees-") + diag = G.get_property("diag", mask=mask) + overlap = binary.pair(A & AT).reduce_rowwise().new(mask=mask) + rv = binary.minus(overlap | unary.one(diag)).new(name="recip_degrees-") + rv(rv.V, replace) << rv # drop 0s + return rv + if "recip_degrees-" not in cache: + if cache.get("has_self_edges") is False and "recip_degrees+" in cache: + cache["recip_degrees-"] = cache["recip_degrees+"] + elif "recip_degrees+" in cache and "diag" in cache: + rv = binary.minus(cache["recip_degrees+"] | unary.one(cache["diag"])).new( + name="recip_degrees-" + ) + rv(rv.V, replace) << rv # drop 0s + cache["recip_degrees-"] = rv + elif not G.get_property("has_self_edges"): + cache["recip_degrees-"] = G.get_property("recip_degrees+") + elif "offdiag" in cache: + cache["recip_degrees-"] = ( + binary.pair(cache["offdiag"] & AT).reduce_rowwise().new(name="recip_degrees-") + ) + elif "L-" in cache and "U-" in cache: + cache["recip_degrees-"] = ( + binary.pair(cache["L-"] & AT).reduce_rowwise().new() + + binary.pair(cache["U-"] & AT).reduce_rowwise().new() + ).new(name="recip_degrees-") + else: + diag = G.get_property("diag") + overlap = binary.pair(A & AT).reduce_rowwise().new() + rv = binary.minus(overlap | unary.one(diag)).new(name="recip_degrees-") + rv(rv.V, replace) << rv # drop 0s + cache["recip_degrees-"] = rv + if ( + "has_self_edges" not in cache + and "recip_degrees+" in cache + and cache["recip_degrees-"].nvals != cache["recip_degrees+"].nvals + ): + cache["has_self_edges"] = True + elif cache.get("has_self_edges") is False: + cache["recip_degrees+"] = cache["recip_degrees-"] + return cache["recip_degrees-"] + + +def get_total_degreesp(G, mask=None): + """A.reduce_rowwise(agg.count) + A.reduce_columnwise(agg.count)""" + cache = G._cache + if mask is not None: + if "total_degrees+" in cache: + return cache["total_degrees+"].dup(mask=mask) + if cache.get("has_self_edges") is False and "total_degrees-" in cache: + cache["total_degrees+"] = cache["total_degrees-"] + return cache["total_degrees+"].dup(mask=mask) + return ( + G.get_property("row_degrees+", mask=mask) + G.get_property("column_degrees+", mask=mask) + ).new(name="total_degrees+") + if "total_degrees+" not in cache: + if cache.get("has_self_edges") is False and "total_degrees-" in cache: + cache["total_degrees+"] = cache["total_degrees-"] + else: + cache["total_degrees+"] = ( + G.get_property("row_degrees+") + G.get_property("column_degrees+") + ).new(name="total_degrees+") + if ( + "has_self_edges" not in cache + and "total_degrees-" in cache + and cache["total_degrees-"].nvals != cache["total_degrees+"].nvals + ): + cache["has_self_edges"] = True + elif cache.get("has_self_edges") is False: + cache["total_degrees-"] = cache["total_degrees+"] + return cache["total_degrees+"] + + +def get_total_degreesm(G, mask=None): + """C = select.offdiag(A) ; C.reduce_rowwise(agg.count) + C.reduce_columnwise(agg.count)""" + cache = G._cache + if mask is not None: + if "total_degrees-" in cache: + return cache["total_degrees-"].dup(mask=mask) + if cache.get("has_self_edges") is False and "total_degrees+" in cache: + cache["total_degrees-"] = cache["total_degrees+"] + return cache["total_degrees-"].dup(mask=mask) + return ( + G.get_property("row_degrees-", mask=mask) + G.get_property("column_degrees-", mask=mask) + ).new(name="total_degrees-") + if "total_degrees-" not in cache: + if cache.get("has_self_edges") is False and "total_degrees+" in cache: + cache["total_degrees-"] = cache["total_degrees+"] + else: + cache["total_degrees-"] = ( + G.get_property("row_degrees-") + G.get_property("column_degrees-") + ).new(name="total_degrees-") + if ( + "has_self_edges" not in cache + and "total_degrees+" in cache + and cache["total_degrees-"].nvals != cache["total_degrees+"].nvals + ): + cache["has_self_edges"] = True + elif cache.get("has_self_edges") is False: + cache["total_degrees+"] = cache["total_degrees-"] + return cache["total_degrees-"] + + +def get_total_recipp(G, mask=None): + """pair(A & A.T).reduce_scalar()""" + A = G._A + cache = G._cache + if "total_recip+" not in cache: + if "total_recip-" in cache and cache.get("has_self_edges") is False: + cache["total_recip+"] = cache["total_recip-"] + elif "recip_degrees+" in cache: + cache["total_recip+"] = cache["recip_degrees+"].reduce().get(0) + else: + AT = cache.get("AT", A.T) + cache["total_recip+"] = binary.pair(A & AT).reduce_scalar().get(0) + if "has_self_edges" not in cache and "total_recip-" in cache: + cache["has_self_edges"] = cache["total_recip+"] > cache["total_recip-"] + if cache.get("has_self_edges") is False: + cache["total_recip-"] = cache["total_recip+"] + return cache["total_recip+"] + + +def get_total_recipm(G, mask=None): + """C = select.offdiag(A) ; pair(C & C.T).reduce_scalar()""" + cache = G._cache + if "total_recip-" not in cache: + if "total_recip+" in cache and cache.get("has_self_edges") is False: + cache["total_recip-"] = cache["total_recip+"] + else: + cache["total_recip-"] = G.get_property("recip_degrees-").reduce().get(0) + if "has_self_edges" not in cache and "total_recip+" in cache: + cache["has_self_edges"] = cache["total_recip+"] > cache["total_recip-"] + if cache.get("has_self_edges") is False: + cache["total_recip+"] = cache["total_recip-"] + return cache["total_recip-"] + + +def has_self_edges(G, mask=None): + """A.diag().nvals > 0""" + A = G._A + cache = G._cache + if "has_self_edges" not in cache: + if "offdiag" in cache: + cache["has_self_edges"] = A.nvals > cache["offdiag"].nvals + elif "L+" in cache and ("L-" in cache or "U+" in cache): + if "L-" in cache: + cache["has_self_edges"] = cache["L-"].nvals < cache["L+"].nvals + else: + cache["has_self_edges"] = cache["L+"].nvals + cache["U+"].nvals > A.nvals + elif "U-" in cache and ("U+" in cache or "L-" in cache): + if "U+" in cache: + cache["has_self_edges"] = cache["U-"].nvals < cache["U+"].nvals + else: + cache["has_self_edges"] = cache["U-"].nvals + cache["L-"].nvals < A.nvals + elif cache.get("has_negative_diagonal") is True: + cache["has_self_edges"] = True + elif "total_recip-" in cache and "total_recip+" in cache: + cache["has_self_edges"] = cache["total_recip+"] > cache["total_recip-"] + elif "row_degrees-" in cache and "row_degrees+" in cache: + cache["has_self_edges"] = not cache["row_degrees-"].isequal(cache["row_degrees+"]) + elif "column_degrees-" in cache and "column_degrees+" in cache: + cache["has_self_edges"] = not cache["column_degrees-"].isequal(cache["column_degrees+"]) + elif "total_degrees-" in cache and "total_degrees+" in cache: + cache["has_self_edges"] = not cache["total_degrees-"].isequal(cache["total_degrees+"]) + elif "recip_degrees-" in cache and "recip_degrees+" in cache: + cache["has_self_edges"] = not cache["recip_degrees-"].isequal(cache["recip_degrees+"]) + elif "row_degrees-" in cache: + cache["has_self_edges"] = cache["row_degrees-"].reduce().get(0) < A.nvals + elif "column_degrees-" in cache: + cache["has_self_edges"] = cache["column_degrees-"].reduce().get(0) < A.nvals + elif "total_degrees-" in cache: + cache["has_self_edges"] = cache["total_degrees-"].reduce().get(0) < 2 * A.nvals + elif "total_degrees+" in cache: + cache["has_self_edges"] = cache["total_degrees+"].reduce().get(0) > 2 * A.nvals + else: + G.get_property("diag") + return cache["has_self_edges"] + + +def to_directed_graph(G, weight=None, dtype=None): + # We should do some sanity checks here to ensure we're returning a valid directed graph + if isinstance(G, DiGraph): + return G + try: + return DiGraph(G) + except TypeError: + pass + + try: + import networkx as nx + + if isinstance(G, nx.DiGraph): + return DiGraph.from_networkx(G, weight=weight, dtype=dtype) + except ImportError: + pass + raise TypeError + + +def to_graph(G, weight=None, dtype=None): + if isinstance(G, (DiGraph, ga.Graph)): + return G + try: + # Should we check if it can be undirected? + return DiGraph(G) + except TypeError: + pass + + try: + import networkx as nx + + if isinstance(G, nx.DiGraph): + return DiGraph.from_networkx(G, weight=weight, dtype=dtype) + if isinstance(G, nx.Graph): + return ga.Graph.from_networkx(G, weight=weight, dtype=dtype) + except ImportError: + pass + raise TypeError + + +class AutoDict(dict): + def __missing__(self, key): + # Automatically compute keys such as "plus_rowwise-" and "max_element+" + if key[-1] in {"-", "+"}: + keybase = key[:-1] + if keybase.endswith("_rowwise"): + opname = keybase[: -len("_rowwise")] + methodname = "reduce_rowwise" + elif keybase.endswith("_columnwise"): + opname = keybase[: -len("_columnwise")] + methodname = "reduce_columnwise" + elif keybase.endswith("_element"): + opname = keybase[: -len("_element")] + methodname = "reduce_scalar" + else: + raise KeyError(key) + if methodname == "reduce_scalar": + get_reduction = get_reduce_to_scalar(key, opname) + else: + get_reduction = get_reduce_to_vector(key, opname, methodname) + elif key.endswith("_diagonal"): + # e.g., min_diagonal + opname = key[: -len("_diagonal")] + get_reduction = get_reduce_to_scalar(key, opname) + else: + raise KeyError(key) + self[key] = get_reduction + return get_reduction + + +class DiGraph(Graph): + __networkx_backend__ = "graphblas" + __networkx_plugin__ = "graphblas" + + # "-" properties ignore self-edges, "+" properties include self-edges + # Ideally, we would have "max_rowwise+" come before "max_element+". + _property_priority = defaultdict( + lambda: DiGraph._property_priority["has_self_edges"] - 0.5, + { + key: i + for i, key in enumerate( + [ + "A", + "AT", + "offdiag", + "U+", + "L+", + "U-", + "L-", + "diag", + "count_rowwise+", # row_degrees + "count_columnwise+", # column_degrees + "count_rowwise-", + "count_columnwise-", + "recip_degrees+", + "recip_degrees-", + "total_degrees+", + "total_degrees-", + "total_recip+", # scalar; I don't like this name + "total_recip-", # scalar; I don't like this name + "min_diagonal", + "min_element+", + "min_element-", + "has_negative_diagonal", + "has_negative_edges-", + "has_negative_edges+", + "has_self_edges", + ] + ) + }, + ) + _get_property = AutoDict( + { + "A": get_A, + "AT": get_AT, + "offdiag": get_offdiag, + "U+": get_Up, + "L+": get_Lp, + "U-": get_Um, + "L-": get_Lm, + "diag": get_diag, + "recip_degrees+": get_recip_degreesp, + "recip_degrees-": get_recip_degreesm, + "total_degrees+": get_total_degreesp, + "total_degrees-": get_total_degreesm, + "total_recip+": get_total_recipp, + "total_recip-": get_total_recipm, + "is_iso": is_iso, + "iso_value": get_iso_value, + "has_negative_diagonal": has_negative_diagonal, + "has_negative_edges-": has_negative_edgesm, + "has_negative_edges+": has_negative_edgesp, + "has_self_edges": has_self_edges, + } + ) + _cache_aliases = { + "row_degrees+": "count_rowwise+", + "column_degrees+": "count_columnwise+", + "row_degrees-": "count_rowwise-", + "column_degrees-": "count_columnwise-", + } + graph_attr_dict_factory = dict + + def __init__(self, incoming_graph_data=None, *, key_to_id=None, **attr): + if incoming_graph_data is not None: + # Does not copy if A is a Matrix! + A = gb.core.utils.ensure_type(incoming_graph_data, Matrix) + if A.nrows != A.ncols: + raise ValueError(f"Adjacency matrix must be square; got {A.nrows} x {A.ncols}") + else: + A = Matrix() + self.graph_attr_dict_factory = self.graph_attr_dict_factory + self.graph = self.graph_attr_dict_factory() # dictionary for graph attributes + self.graph.update(attr) + + # Graphblas-specific properties + self._A = A + if key_to_id is None: + key_to_id = {i: i for i in range(A.nrows)} + self._key_to_id = key_to_id + self._id_to_key = None + self._cache = {} + + # Graphblas-specific methods + from_networkx = classmethod(_utils.from_networkx) + id_to_key = property(_utils.id_to_key) + get_property = _utils.get_property + get_properties = _utils.get_properties + dict_to_vector = _utils.dict_to_vector + list_to_vector = _utils.list_to_vector + list_to_mask = _utils.list_to_mask + list_to_ids = _utils.list_to_ids + list_to_keys = _utils.list_to_keys + matrix_to_dicts = _utils.matrix_to_dicts + matrix_to_nodenodemap = _utils.matrix_to_nodenodemap + matrix_to_vectornodemap = _utils.matrix_to_vectornodemap + set_to_vector = _utils.set_to_vector + to_networkx = _utils.to_networkx + vector_to_dict = _utils.vector_to_dict + vector_to_list = _utils.vector_to_list + vector_to_nodemap = _utils.vector_to_nodemap + vector_to_nodeset = _utils.vector_to_nodeset + vector_to_set = _utils.vector_to_set + _cacheit = _utils._cacheit + renumber_key_to_id = _utils.renumber_key_to_id + + # NetworkX methods + def to_directed_class(self): + return DiGraph + + def to_undirected_class(self): + return ga.Graph + + @property + def name(self): + return self.graph.get("name", "") + + @name.setter + def name(self, s): + self._A.name = s + self.graph["name"] = s + + @property + def matrix(self): + return self._A + + def __iter__(self): + return iter(self._key_to_id) + + def __contains__(self, n): + try: + return n in self._key_to_id + except TypeError: + return False + + def __len__(self): + return self._A.nrows + + def number_of_nodes(self): + return self._A.nrows + + def order(self): + return self._A.nrows + + def is_multigraph(self): + return False + + def is_directed(self): + return True + + def to_undirected(self, reciprocal=False, as_view=False, *, name=None): + if as_view: + raise NotImplementedError("`as_vew=True` is not implemented in `G.to_undirected`") + A = self._A + if reciprocal: + B = binary.any(A & A.T).new(name=name) + else: + B = binary.any(A | A.T).new(name=name) + return Graph(B, key_to_id=self._key_to_id) + + def reverse(self, copy=True): + # We could even reuse many of the cached values + A = self._A.T # This probably mostly works, but does not yet support assignment + if copy: + A = A.new() + rv = type(self)(A, key_to_id=self._key_to_id) + rv.graph.update(deepcopy(self.graph)) + return rv + + +class MultiDiGraph(DiGraph): + def is_multigraph(self): + return True + + +__all__ = ["DiGraph", "MultiDiGraph"] diff --git a/graphblas_algorithms/classes/graph.py b/graphblas_algorithms/classes/graph.py new file mode 100644 index 0000000..f3e2239 --- /dev/null +++ b/graphblas_algorithms/classes/graph.py @@ -0,0 +1,457 @@ +from collections import defaultdict + +import graphblas as gb +from graphblas import Matrix, Vector, select + +import graphblas_algorithms as ga + +from . import _utils +from ._caching import NONNEGATIVE_DTYPES, get_reduce_to_scalar, get_reduce_to_vector + + +def get_A(G, mask=None): + """A""" + return G._A + + +def get_AT(G, mask=None): + """A.T""" + A = G._A + G._cache["AT"] = A + return A + + +def get_offdiag(G, mask=None): + """select.offdiag(A)""" + A = G._A + cache = G._cache + if "offdiag" not in cache: + if cache.get("has_self_edges") is False: + cache["offdiag"] = A + else: + cache["offdiag"] = select.offdiag(A).new(name="offdiag") + if "has_self_edges" not in cache: + cache["has_self_edges"] = A.nvals > cache["offdiag"].nvals + if not cache["has_self_edges"]: + cache["offdiag"] = A + return cache["offdiag"] + + +def get_Up(G, mask=None): + """select.triu(A)""" + A = G._A + cache = G._cache + if "U+" not in cache: + if "U-" in cache and not G.get_property("has_self_edges"): + cache["U+"] = cache["U-"] + else: + cache["U+"] = select.triu(A).new(name="U+") + if "has_self_edges" not in cache: + cache["has_self_edges"] = 2 * cache["U+"].nvals > A.nvals + if not cache["has_self_edges"]: + cache["U-"] = cache["U+"] + return cache["U+"] + + +def get_Lp(G, mask=None): + """select.tril(A)""" + A = G._A + cache = G._cache + if "L+" not in cache: + if "L-" in cache and not G.get_property("has_self_edges"): + cache["L+"] = cache["L-"] + else: + cache["L+"] = select.tril(A).new(name="L+") + if "has_self_edges" not in cache: + cache["has_self_edges"] = 2 * cache["L+"].nvals > A.nvals + if not cache["has_self_edges"]: + cache["L-"] = cache["L+"] + return cache["L+"] + + +def get_Um(G, mask=None): + """select.triu(A, 1)""" + A = G._A + cache = G._cache + if "U-" not in cache: + if "U+" in cache: + if G.get_property("has_self_edges"): + cache["U-"] = select.triu(cache["U+"], 1).new(name="U-") + else: + cache["U-"] = cache["U+"] + elif "offdiag" in cache: + cache["U-"] = select.triu(cache["offdiag"], 1).new(name="U-") + else: + cache["U-"] = select.triu(A, 1).new(name="U-") + if "has_self_edges" not in cache: + cache["has_self_edges"] = 2 * cache["U-"].nvals < A.nvals + if not cache["has_self_edges"]: + cache["U+"] = cache["U-"] + return cache["U-"] + + +def get_Lm(G, mask=None): + """select.tril(A, -1)""" + A = G._A + cache = G._cache + if "L-" not in cache: + if "L+" in cache: + if G.get_property("has_self_edges"): + cache["L-"] = select.tril(cache["L+"], -1).new(name="L-") + else: + cache["L-"] = cache["L+"] + elif "offdiag" in cache: + cache["L-"] = select.tril(cache["offdiag"], -1).new(name="L-") + else: + cache["L-"] = select.tril(A, -1).new(name="L-") + if "has_self_edges" not in cache: + cache["has_self_edges"] = 2 * cache["L-"].nvals < A.nvals + if not cache["has_self_edges"]: + cache["L+"] = cache["L-"] + return cache["L-"] + + +def get_diag(G, mask=None): + """A.diag()""" + A = G._A + cache = G._cache + if "diag" not in cache: + if cache.get("has_self_edges") is False: + cache["diag"] = Vector(A.dtype, size=A.nrows, name="diag") + elif "U+" in cache: + cache["diag"] = cache["U+"].diag(name="diag") + elif "L+" in cache: + cache["diag"] = cache["L+"].diag(name="diag") + else: + cache["diag"] = A.diag(name="diag") + if "has_self_edges" not in cache: + cache["has_self_edges"] = cache["diag"].nvals > 0 + if mask is not None: + return cache["diag"].dup(mask=mask) + return cache["diag"] + + +def has_negative_diagonal(G, mask=None): + A = G._A + cache = G._cache + if "has_negative_diagonal" not in cache: + if A.dtype in NONNEGATIVE_DTYPES or A.dtype._is_udt or cache.get("has_self_edges") is False: + cache["has_negative_diagonal"] = False + elif ( + cache.get("has_negative_edges+") is True + and cache.get("has_negative_edges-") is False + or cache.get("has_negative_edges+") is True + and cache.get("min_element-", 0) >= 0 + or cache.get("min_element+", 0) < 0 + and cache.get("min_element+", 0) < cache.get("min_element-", 0) + ): + cache["has_negative_diagonal"] = True + else: + cache["has_negative_diagonal"] = G.get_property("min_diagonal").get(0) < 0 + return cache["has_negative_diagonal"] + + +def has_negative_edgesp(G, mask=None): + A = G._A + cache = G._cache + if "has_negative_edges+" not in cache: + if A.dtype in NONNEGATIVE_DTYPES or A.dtype._is_udt: + cache["has_negative_edges+"] = False + elif ( + cache.get("has_negative_edges-") + or cache.get("min_element+", 0) < 0 + or cache.get("min_element-", 0) < 0 + or cache.get("min_diagonal", 0) < 0 + or cache.get("has_negative_diagonal") + ): + cache["has_negative_edges+"] = True + elif cache.get("iso_value") is not None: + cache["has_negative_edges+"] = cache["iso_value"].get(0) < 0 + elif cache.get("has_negative_edges-") is False: + cache["has_negative_edges+"] = G.get_property("min_diagonal").get(0) < 0 + else: + cache["has_negative_edges+"] = G.get_property("min_element+").get(0) < 0 + return cache["has_negative_edges+"] + + +def has_negative_edgesm(G, mask=None): + A = G._A + cache = G._cache + if "has_negative_edges-" not in cache: + if A.dtype in NONNEGATIVE_DTYPES or A.dtype._is_udt: + cache["has_negative_edges-"] = False + elif ( + cache.get("has_negative_edges+") + and cache.get("has_self_edges") is False + or cache.get("has_negative_edges+") + and cache.get("has_negative_diagonal") is False + ): + cache["has_negative_edges-"] = True + else: + cache["has_negative_edges-"] = G.get_property("min_element-").get(0) < 0 + return cache["has_negative_edges-"] + + +def has_self_edges(G, mask=None): + """A.diag().nvals > 0""" + A = G._A + cache = G._cache + if "has_self_edges" not in cache: + if "L+" in cache: + cache["has_self_edges"] = 2 * cache["L+"].nvals > A.nvals + elif "L-" in cache: + cache["has_self_edges"] = 2 * cache["L-"].nvals < A.nvals + elif "U+" in cache: + cache["has_self_edges"] = 2 * cache["U+"].nvals > A.nvals + elif "U-" in cache: + cache["has_self_edges"] = 2 * cache["U-"].nvals < A.nvals + elif "offdiag" in cache: + cache["has_self_edges"] = A.nvals > cache["offdiag"].nvals + elif cache.get("has_negative_diagonal") is True: + cache["has_self_edges"] = True + else: + G.get_property("diag") + return cache["has_self_edges"] + + +def is_iso(G, mask=None): + A = G._A + cache = G._cache + if "is_iso" not in cache: + if "iso_value" in cache: + cache["is_iso"] = cache["iso_value"] is not None + else: + # SuiteSparse:GraphBLAS. `A` may still be iso-valued even if `A.ss.is_iso` is False. + # Should we check this or rely on `A.ss.is_iso` b/c it's fast and should usually work? + cache["is_iso"] = A.ss.is_iso + return cache["is_iso"] + + +def get_iso_value(G, mask=None): + A = G._A + cache = G._cache + if "iso_value" not in cache: + if "is_iso" in cache: + if cache["is_iso"]: + # SuiteSparse:GraphBLAS + cache["iso_value"] = A.ss.iso_value + else: + cache["iso_value"] + + # min_val, max_val = G.get_properties('min_element+ max_element+') + # SuiteSparse:GraphBLAS + elif A.ss.is_iso: + cache["iso_value"] = A.ss.iso_value + cache["is_iso"] = True + else: + cache["iso_value"] = None + cache["is_iso"] = False + return cache["iso_value"] + + +def to_undirected_graph(G, weight=None, dtype=None): + # We should do some sanity checks here to ensure we're returning a valid undirected graph + if isinstance(G, Graph): + return G + try: + return Graph(G) + except TypeError: + pass + + try: + import networkx as nx + + if isinstance(G, nx.Graph): + return Graph.from_networkx(G, weight=weight, dtype=dtype) + except ImportError: + pass + + raise TypeError + + +class AutoDict(dict): + def __missing__(self, key): + # Automatically compute keys such as "plus_rowwise-" and "max_element+" + if key[-1] in {"-", "+"}: + keybase = key[:-1] + if keybase.endswith("_rowwise"): + opname = keybase[: -len("_rowwise")] + methodname = "reduce_rowwise" + elif keybase.endswith("_columnwise"): + opname = keybase[: -len("_columnwise")] + methodname = "reduce_rowwise" + elif keybase.endswith("_element"): + opname = keybase[: -len("_element")] + methodname = "reduce_scalar" + else: + raise KeyError(key) + if methodname == "reduce_scalar": + get_reduction = get_reduce_to_scalar(key, opname) + else: + get_reduction = get_reduce_to_vector(key, opname, methodname) + self[f"{opname}_columnwise{key[-1]}"] = get_reduction + elif key.endswith("_diagonal"): + # e.g., min_diagonal + opname = key[: -len("_diagonal")] + get_reduction = get_reduce_to_scalar(key, opname) + else: + raise KeyError(key) + self[key] = get_reduction + return get_reduction + + +class Graph: + __networkx_backend__ = "graphblas" + __networkx_plugin__ = "graphblas" + + # "-" properties ignore self-edges, "+" properties include self-edges + # Ideally, we would have "max_rowwise+" come before "max_element+". + _property_priority = defaultdict( + lambda: Graph._property_priority["has_self_edges"] - 0.5, + { + key: i + for i, key in enumerate( + [ + "A", + "AT", + "offdiag", + "U+", + "L+", + "U-", + "L-", + "diag", + "count_rowwise+", + "count_rowwise-", + "min_diagonal", + "min_element+", + "min_element-", + "has_negative_diagonal", + "has_negative_edges-", + "has_negative_edges+", + "has_self_edges", + ] + ) + }, + ) + _get_property = AutoDict( + { + "A": get_A, + "AT": get_AT, + "offdiag": get_offdiag, + "U+": get_Up, + "L+": get_Lp, + "U-": get_Um, + "L-": get_Lm, + "diag": get_diag, + "is_iso": is_iso, + "iso_value": get_iso_value, + "has_negative_diagonal": has_negative_diagonal, + "has_negative_edges-": has_negative_edgesm, + "has_negative_edges+": has_negative_edgesp, + "has_self_edges": has_self_edges, + } + ) + _cache_aliases = { + "degrees+": "count_rowwise+", + "degrees-": "count_rowwise-", + "row_degrees+": "count_rowwise+", + "row_degrees-": "count_rowwise-", + "column_degrees+": "count_rowwise+", + "column_degrees-": "count_rowwise-", + } + graph_attr_dict_factory = dict + + def __init__(self, incoming_graph_data=None, *, key_to_id=None, **attr): + if incoming_graph_data is not None: + # Does not copy if A is a Matrix! + A = gb.core.utils.ensure_type(incoming_graph_data, Matrix) + if A.nrows != A.ncols: + raise ValueError(f"Adjacency matrix must be square; got {A.nrows} x {A.ncols}") + else: + A = Matrix() + self.graph_attr_dict_factory = self.graph_attr_dict_factory + self.graph = self.graph_attr_dict_factory() # dictionary for graph attributes + self.graph.update(attr) + + # Graphblas-specific properties + self._A = A + if key_to_id is None: + key_to_id = {i: i for i in range(A.nrows)} + self._key_to_id = key_to_id + self._id_to_key = None + self._cache = {} + + # Graphblas-specific methods + from_networkx = classmethod(_utils.from_networkx) + id_to_key = property(_utils.id_to_key) + get_property = _utils.get_property + get_properties = _utils.get_properties + dict_to_vector = _utils.dict_to_vector + list_to_vector = _utils.list_to_vector + list_to_mask = _utils.list_to_mask + list_to_ids = _utils.list_to_ids + list_to_keys = _utils.list_to_keys + matrix_to_dicts = _utils.matrix_to_dicts + matrix_to_nodenodemap = _utils.matrix_to_nodenodemap + matrix_to_vectornodemap = _utils.matrix_to_vectornodemap + set_to_vector = _utils.set_to_vector + to_networkx = _utils.to_networkx + vector_to_dict = _utils.vector_to_dict + vector_to_list = _utils.vector_to_list + vector_to_nodemap = _utils.vector_to_nodemap + vector_to_nodeset = _utils.vector_to_nodeset + vector_to_set = _utils.vector_to_set + _cacheit = _utils._cacheit + renumber_key_to_id = _utils.renumber_key_to_id + + # NetworkX methods + def to_directed_class(self): + return ga.DiGraph + + def to_undirected_class(self): + return Graph + + @property + def name(self): + return self.graph.get("name", "") + + @name.setter + def name(self, s): + self._A.name = s + self.graph["name"] = s + + @property + def matrix(self): + return self._A + + def __iter__(self): + return iter(self._key_to_id) + + def __contains__(self, n): + try: + return n in self._key_to_id + except TypeError: + return False + + def __len__(self): + return self._A.nrows + + def number_of_nodes(self): + return self._A.nrows + + def order(self): + return self._A.nrows + + def is_multigraph(self): + return False + + def is_directed(self): + return False + + +class MultiGraph(Graph): + def is_multigraph(self): + return True + + +__all__ = ["Graph", "MultiGraph"] diff --git a/graphblas_algorithms/classes/nodemap.py b/graphblas_algorithms/classes/nodemap.py new file mode 100644 index 0000000..2a32502 --- /dev/null +++ b/graphblas_algorithms/classes/nodemap.py @@ -0,0 +1,447 @@ +from collections.abc import MutableMapping + +from graphblas import Vector, monoid + +from . import _utils + + +class NodeMap(MutableMapping): + def __init__(self, v, *, fill_value=None, values_are_keys=False, key_to_id=None): + self.vector = v + if key_to_id is None: + self._key_to_id = {i: i for i in range(v.size)} + else: + self._key_to_id = key_to_id + self._id_to_key = None + self._fill_value = fill_value + self._values_are_keys = values_are_keys + + id_to_key = property(_utils.id_to_key) + # get_property = _utils.get_property + # get_properties = _utils.get_properties + dict_to_vector = _utils.dict_to_vector + list_to_vector = _utils.list_to_vector + list_to_mask = _utils.list_to_mask + list_to_ids = _utils.list_to_ids + list_to_keys = _utils.list_to_keys + matrix_to_dicts = _utils.matrix_to_dicts + set_to_vector = _utils.set_to_vector + # to_networkx = _utils.to_networkx + vector_to_dict = _utils.vector_to_dict + vector_to_list = _utils.vector_to_list + vector_to_nodemap = _utils.vector_to_nodemap + vector_to_nodeset = _utils.vector_to_nodeset + vector_to_set = _utils.vector_to_set + # _cacheit = _utils._cacheit + + # Requirements for MutableMapping + def __delitem__(self, key): + idx = self._key_to_id[key] + del self.vector[idx] + + def __getitem__(self, key): + idx = self._key_to_id[key] + if (rv := self.vector.get(idx)) is not None: + if self._values_are_keys: + return self.id_to_key[rv] + return rv + if self._fill_value is not None: + return self._fill_value + raise KeyError(key) + + def __iter__(self): + if self._fill_value is not None: + return iter(self._key_to_id) + # Slow if we iterate over one; fast if we iterate over all + return map( + self.id_to_key.__getitem__, self.vector.to_coo(values=False, sort=False)[0].tolist() + ) + + def __len__(self): + if self._fill_value is not None: + return len(self._key_to_id) + return self.vector.nvals + + def __setitem__(self, key, val): + idx = self._key_to_id[key] + if self._values_are_keys: + val = self._key_to_id[val] + self.vector[idx] = val + + # Override other MutableMapping methods + def __contains__(self, key): + idx = self._key_to_id[key] + return self._fill_value is not None or idx in self.vector + + def __eq__(self, other): + if isinstance(other, NodeMap): + return ( + self._values_are_keys == other._values_are_keys + and self._fill_value == other._fill_value + and self.vector.isequal(other.vector) + and self._key_to_id == other._key_to_id + ) + return super().__eq__(other) + + def clear(self): + self.vector.clear() + self._fill_value = None + + def get(self, key, default=None): + idx = self._key_to_id[key] + rv = self.vector.get(idx) + if rv is None: + if self._fill_value is not None: + return self._fill_value + return default + if self._values_are_keys: + return self.id_to_key[rv] + return rv + + # items + # keys + # pop + + def popitem(self): + v = self.vector + try: + idx, value = next(v.ss.iteritems()) + except StopIteration: + raise KeyError from None + del v[idx] + if self._values_are_keys: + value = self.id_to_key[value] + return self.id_to_key[idx], value + + def setdefault(self, key, default=None): + idx = self._key_to_id[key] + if (value := self.vector.get(idx)) is not None: + if self._values_are_keys: + return self.id_to_key[value] + return value + if self._fill_value is not None: + return self._fill_value + if default is not None: + self.vector[idx] = default + return default + + # update + # values + + +class VectorMap(MutableMapping): + def __init__(self, v): + self.vector = v + + # Requirements for MutableMapping + def __delitem__(self, key): + del self.vector[key] + + def __getitem__(self, key): + return self.vector.get(key) + + def __iter__(self): + # Slow if we iterate over one; fast if we iterate over all + return iter(self.vector.to_coo(values=False, sort=False)[0].tolist()) + + def __len__(self): + return self.vector.nvals + + def __setitem__(self, key, val): + self.vector[key] = val + + # Override other MutableMapping methods + def __contains__(self, key): + return key in self.vector + + def __eq__(self, other): + if isinstance(other, VectorMap): + return self.vector.isequal(other.vector) + if isinstance(other, Vector): + return self.vector.isequal(other) + if isinstance(other, NodeMap): + return self.vector.isequal(other.vector) and other._key_to_id == { + i: i for i in range(self.vector.size) + } + return super().__eq__(other) + + def clear(self): + self.vector.clear() + + def get(self, key, default=None): + return self.vector.get(key, default) + + # items + # keys + # pop + + def popitem(self): + v = self.vector + try: + idx, value = next(v.ss.iteritems()) + except StopIteration: + raise KeyError from None + del v[idx] + return idx, value + + def setdefault(self, key, default=None): + if (value := self.vector.get(key)) is not None: + return value + self.vector[key] = default + return default + + # update + # values + + +class VectorNodeMap(MutableMapping): + def __init__(self, A, *, key_to_id=None): + self.matrix = A + if key_to_id is None: + self._key_to_id = {i: i for i in range(A.size)} + else: + self._key_to_id = key_to_id + self._id_to_key = None + self._rows = None + + def _get_rows(self): + if self._rows is None: + self._rows = self.matrix.reduce_rowwise(monoid.any).new() + self._rows(self._rows.S) << 1 # Make iso-valued + return self._rows + + id_to_key = property(_utils.id_to_key) + # get_property = _utils.get_property + # get_properties = _utils.get_properties + dict_to_vector = _utils.dict_to_vector + list_to_vector = _utils.list_to_vector + list_to_mask = _utils.list_to_mask + list_to_ids = _utils.list_to_ids + list_to_keys = _utils.list_to_keys + matrix_to_dicts = _utils.matrix_to_dicts + set_to_vector = _utils.set_to_vector + # to_networkx = _utils.to_networkx + vector_to_dict = _utils.vector_to_dict + vector_to_list = _utils.vector_to_list + vector_to_nodemap = _utils.vector_to_nodemap + vector_to_nodeset = _utils.vector_to_nodeset + vector_to_set = _utils.vector_to_set + # _cacheit = _utils._cacheit + + # Requirements for MutableMapping + def __delitem__(self, key): + idx = self._key_to_id[key] + del self.matrix[idx, :] + if self._rows is not None: + del self._rows[idx] + + def __getitem__(self, key): + idx = self._key_to_id[key] + if self._get_rows().get(idx) is None: + raise KeyError(key) + return VectorMap(self.matrix[idx, :].new()) + + def __iter__(self): + # Slow if we iterate over one; fast if we iterate over all + return map( + self.id_to_key.__getitem__, + self._get_rows().to_coo(values=False, sort=False)[0].tolist(), + ) + + def __len__(self): + return self._get_rows().nvals + + def __setitem__(self, key, val): + idx = self._key_to_id[key] + if isinstance(val, VectorMap): + val = val.vector + elif isinstance(val, dict): + val = Vector.from_dict(val, self.matrix.dtype, size=self.matrix.ncols) + else: + raise TypeError + if val.nvals == 0: + del self.matrix[idx, :] + if self._rows is not None: + del self._rows[idx] + else: + self.matrix[idx, :] = val + if self._rows is not None: + self._rows[idx] = 1 + + # Override other MutableMapping methods + def __contains__(self, key): + idx = self._key_to_id[key] + return idx in self._get_rows() + + def __eq__(self, other): + if isinstance(other, VectorNodeMap): + return self.matrix.isequal(other.matrix) and self._key_to_id == other._key_to_id + return super().__eq__(other) + + def clear(self): + self.matrix.clear() + self._rows = None + + def get(self, key, default=None): + idx = self._key_to_id[key] + if self._get_rows().get(idx) is None: + return default + return VectorMap(self.matrix[idx, :].new()) + + # items + # keys + # pop + + def popitem(self): + rows = self._get_rows() + try: + idx = next(rows.ss.iterkeys()) + except StopIteration: + raise KeyError from None + value = VectorMap(self.matrix[idx, :].new()) + del self.matrix[idx, :] + del rows[idx] + return self.id_to_key[idx], value + + # setdefault + # update + # values + + +class NodeNodeMap(MutableMapping): + def __init__(self, A, *, fill_value=None, values_are_keys=False, key_to_id=None): + self.matrix = A + if key_to_id is None: + self._key_to_id = {i: i for i in range(A.size)} + else: + self._key_to_id = key_to_id + self._id_to_key = None + self._rows = None + self._fill_value = fill_value + self._values_are_keys = values_are_keys + + def _get_rows(self): + if self._rows is None: + self._rows = self.matrix.reduce_rowwise(monoid.any).new() + self._rows(self._rows.S) << 1 # Make iso-valued + return self._rows + + id_to_key = property(_utils.id_to_key) + # get_property = _utils.get_property + # get_properties = _utils.get_properties + dict_to_vector = _utils.dict_to_vector + list_to_vector = _utils.list_to_vector + list_to_mask = _utils.list_to_mask + list_to_ids = _utils.list_to_ids + list_to_keys = _utils.list_to_keys + matrix_to_dicts = _utils.matrix_to_dicts + set_to_vector = _utils.set_to_vector + # to_networkx = _utils.to_networkx + vector_to_dict = _utils.vector_to_dict + vector_to_list = _utils.vector_to_list + vector_to_nodemap = _utils.vector_to_nodemap + vector_to_nodeset = _utils.vector_to_nodeset + vector_to_set = _utils.vector_to_set + # _cacheit = _utils._cacheit + + # Requirements for MutableMapping + def __delitem__(self, key): + idx = self._key_to_id[key] + del self.matrix[idx, :] + if self._rows is not None: + del self._rows[idx] + + def __getitem__(self, key): + idx = self._key_to_id[key] + if self._fill_value is None and self._get_rows().get(idx) is None: + raise KeyError(key) + return self.vector_to_nodemap( + self.matrix[idx, :].new(), + fill_value=self._fill_value, + values_are_keys=self._values_are_keys, + ) + + def __iter__(self): + if self._fill_value is not None: + return iter(self._key_to_id) + # Slow if we iterate over one; fast if we iterate over all + return map( + self.id_to_key.__getitem__, + self._get_rows().to_coo(values=False, sort=False)[0].tolist(), + ) + + def __len__(self): + if self._fill_value is not None: + return len(self._key_to_id) + return self._get_rows().nvals + + def __setitem__(self, key, val): + idx = self._key_to_id[key] + if isinstance(val, NodeMap): + # TODO: check val._key_to_id? + val = val.vector + elif isinstance(val, dict): + val = Vector.from_dict(val, self.matrix.dtype, size=self.matrix.ncols) + else: + raise TypeError + if val.nvals == 0: + del self.matrix[idx, :] + if self._rows is not None: + del self._rows[idx] + else: + self.matrix[idx, :] = val + if self._rows is not None: + self._rows[idx] = 1 + + # Override other MutableMapping methods + def __contains__(self, key): + idx = self._key_to_id[key] + return self._fill_value is not None or idx in self._get_rows() + + def __eq__(self, other): + if isinstance(other, NodeNodeMap): + return ( + self._fill_value == other._fill_value + and self._values_are_keys == other._values_are_keys + and self.matrix.isequal(other.matrix) + and self._key_to_id == other._key_to_id + ) + return super().__eq__(other) + + def clear(self): + self.matrix.clear() + self._rows = None + self._fill_value = None + + def get(self, key, default=None): + idx = self._key_to_id[key] + if self._fill_value is None and self._get_rows().get(idx) is None: + return default + self.vector_to_nodemap( + self.matrix[idx, :].new(), + fill_value=self._fill_value, + values_are_keys=self._values_are_keys, + ) + + # items + # keys + # pop + + def popitem(self): + rows = self._get_rows() + try: + idx = next(rows.ss.iterkeys()) + except StopIteration: + raise KeyError from None + value = self.vector_to_nodemap( + self.matrix[idx, :].new(), + fill_value=self._fill_value, + values_are_keys=self._values_are_keys, + ) + del self.matrix[idx, :] + del rows[idx] + return self.id_to_key[idx], value + + # setdefault + # update + # values diff --git a/graphblas_algorithms/classes/nodeset.py b/graphblas_algorithms/classes/nodeset.py new file mode 100644 index 0000000..b79895e --- /dev/null +++ b/graphblas_algorithms/classes/nodeset.py @@ -0,0 +1,112 @@ +from collections.abc import MutableSet + +from graphblas.semiring import any_pair, plus_pair + +from . import _utils + + +class NodeSet(MutableSet): + def __init__(self, v, *, key_to_id=None): + self.vector = v + if key_to_id is None: + self._key_to_id = {i: i for i in range(v.size)} + else: + self._key_to_id = key_to_id + self._id_to_key = None + + id_to_key = property(_utils.id_to_key) + # get_property = _utils.get_property + # get_properties = _utils.get_properties + dict_to_vector = _utils.dict_to_vector + list_to_vector = _utils.list_to_vector + list_to_mask = _utils.list_to_mask + list_to_ids = _utils.list_to_ids + list_to_keys = _utils.list_to_keys + matrix_to_dicts = _utils.matrix_to_dicts + set_to_vector = _utils.set_to_vector + # to_networkx = _utils.to_networkx + vector_to_dict = _utils.vector_to_dict + vector_to_list = _utils.vector_to_list + vector_to_nodemap = _utils.vector_to_nodemap + vector_to_nodeset = _utils.vector_to_nodeset + vector_to_set = _utils.vector_to_set + # _cacheit = _utils._cacheit + + # Requirements for MutableSet + def __contains__(self, x): + idx = self._key_to_id[x] + return idx in self.vector + + def __iter__(self): + # Slow if we iterate over one; fast if we iterate over all + return map( + self.id_to_key.__getitem__, self.vector.to_coo(values=False, sort=False)[0].tolist() + ) + + def __len__(self): + return self.vector.nvals + + def add(self, value): + idx = self._key_to_id[value] + self.vector[idx] = True + + def discard(self, value): + idx = self._key_to_id[value] + del self.vector[idx] + + # Override other MutableSet methods + def __eq__(self, other): + if isinstance(other, NodeSet): + a = self.vector + b = other.vector + return ( + a.size == b.size + and (nvals := a.nvals) == b.nvals + and plus_pair(a @ b).get(0) == nvals + and self._key_to_id == other._key_to_id + ) + return super().__eq__(other) + + # __and__ + # __or__ + # __sub__ + # __xor__ + + def clear(self): + self.vector.clear() + + def isdisjoin(self, other): + if isinstance(other, NodeSet): + return not any_pair[bool](self.vector @ other.vector) + return super().isdisjoint(other) + + def pop(self): + try: + idx = next(self.vector.ss.iterkeys()) + except StopIteration: + raise KeyError from None + del self.vector[idx] + return self.id_to_key[idx] + + def remove(self, value): + idx = self._key_to_id[value] + if idx not in self.vector: + raise KeyError(value) + del self.vector[idx] + + def _from_iterable(self, it): + # The elements in the iterable must be contained within key_to_id + rv = object.__new__(type(self)) + rv._key_to_id = self._key_to_id + rv._id_to_key = self._id_to_key + rv.vector = rv.set_to_vector(it, size=self.vector.size) + return rv + + # Add more set methods (as needed) + def union(self, *args): + return set(self).union(*args) # TODO: can we make this better? + + def copy(self): + rv = type(self)(self.vector.dup(), key_to_id=self._key_to_id) + rv._id_to_key = self._id_to_key + return rv diff --git a/graphblas_algorithms/cluster.py b/graphblas_algorithms/cluster.py deleted file mode 100644 index 90745b3..0000000 --- a/graphblas_algorithms/cluster.py +++ /dev/null @@ -1,180 +0,0 @@ -import graphblas as gb -import networkx as nx -from graphblas import Matrix, agg, select -from graphblas.semiring import any_pair, plus_pair -from networkx import average_clustering as _nx_average_clustering -from networkx import clustering as _nx_clustering -from networkx.utils import not_implemented_for - -from ._utils import graph_to_adjacency, list_to_mask, vector_to_dict - - -def get_properties(G, names, *, L=None, U=None, degrees=None, has_self_edges=True): - """Calculate properties of undirected graph""" - if isinstance(names, str): - # Separated by commas and/or spaces - names = [name for name in names.replace(" ", ",").split(",") if name] - rv = [] - for name in names: - if name == "L": - if L is None: - L = select.tril(G, -1).new(name="L") - rv.append(L) - elif name == "U": - if U is None: - U = select.triu(G, 1).new(name="U") - rv.append(U) - elif name == "degrees": - if degrees is None: - degrees = get_degrees(G, L=L, U=U, has_self_edges=has_self_edges) - rv.append(degrees) - elif name == "has_self_edges": - # Compute if cheap - if L is not None: - has_self_edges = G.nvals > 2 * L.nvals - elif U is not None: - has_self_edges = G.nvals > 2 * U.nvals - rv.append(has_self_edges) - else: - raise ValueError(f"Unknown property name: {name}") - if len(rv) == 1: - return rv[0] - return rv - - -def get_degrees(G, mask=None, *, L=None, U=None, has_self_edges=True): - if L is not None: - has_self_edges = G.nvals > 2 * L.nvals - elif U is not None: - has_self_edges = G.nvals > 2 * U.nvals - if has_self_edges: - if L is None or U is None: - L, U = get_properties(G, "L U", L=L, U=U) - degrees = ( - L.reduce_rowwise(agg.count).new(mask=mask) + U.reduce_rowwise(agg.count).new(mask=mask) - ).new(name="degrees") - else: - degrees = G.reduce_rowwise(agg.count).new(mask=mask, name="degrees") - return degrees - - -def single_triangle_core(G, index, *, L=None, has_self_edges=True): - M = Matrix(bool, G.nrows, G.ncols) - M[index, index] = True - C = any_pair(G.T @ M.T).new(name="C") # select.coleq(G.T, index) - has_self_edges = get_properties(G, "has_self_edges", L=L, has_self_edges=has_self_edges) - if has_self_edges: - del C[index, index] # Ignore self-edges - R = C.T.new(name="R") - if has_self_edges: - # Pretty much all the time is spent here taking TRIL, which is used to ignore self-edges - L = get_properties(G, "L", L=L) - return plus_pair(L @ R.T).new(mask=C.S).reduce_scalar(allow_empty=False).value - else: - return plus_pair(G @ R.T).new(mask=C.S).reduce_scalar(allow_empty=False).value // 2 - - -def triangles_core(G, mask=None, *, L=None, U=None): - # Ignores self-edges - L, U = get_properties(G, "L U", L=L, U=U) - C = plus_pair(L @ L.T).new(mask=L.S) - return ( - C.reduce_rowwise().new(mask=mask) - + C.reduce_columnwise().new(mask=mask) - + plus_pair(U @ L.T).new(mask=U.S).reduce_rowwise().new(mask=mask) - ).new(name="triangles") - - -@not_implemented_for("directed") -def triangles(G, nodes=None): - if len(G) == 0: - return {} - A, key_to_id = graph_to_adjacency(G, dtype=bool) - if nodes in G: - return single_triangle_core(A, key_to_id[nodes]) - mask, id_to_key = list_to_mask(nodes, key_to_id) - result = triangles_core(A, mask=mask) - return vector_to_dict(result, key_to_id, id_to_key, mask=mask, fillvalue=0) - - -def total_triangles_core(G, *, L=None, U=None): - # We use SandiaDot method, because it's usually the fastest on large graphs. - # For smaller graphs, Sandia method is usually faster: plus_pair(L @ L).new(mask=L.S) - L, U = get_properties(G, "L U", L=L, U=U) - return plus_pair(L @ U.T).new(mask=L.S).reduce_scalar(allow_empty=False).value - - -def transitivity_core(G, *, L=None, U=None, degrees=None): - L, U = get_properties(G, "L U", L=L, U=U) - numerator = total_triangles_core(G, L=L, U=U) - if numerator == 0: - return 0 - degrees = get_properties(G, "degrees", L=L, U=U, degrees=degrees) - denom = (degrees * (degrees - 1)).reduce().value - return 6 * numerator / denom - - -@not_implemented_for("directed") # Should we implement it for directed? -def transitivity(G): - if len(G) == 0: - return 0 - A = gb.io.from_networkx(G, weight=None, dtype=bool) - return transitivity_core(A) - - -def clustering_core(G, mask=None, *, L=None, U=None, degrees=None): - L, U = get_properties(G, "L U", L=L, U=U) - tri = triangles_core(G, mask=mask, L=L, U=U) - degrees = get_degrees(G, mask=mask, L=L, U=U) - denom = degrees * (degrees - 1) - return (2 * tri / denom).new(name="clustering") - - -def single_clustering_core(G, index, *, L=None, degrees=None, has_self_edges=True): - has_self_edges = get_properties(G, "has_self_edges", L=L, has_self_edges=has_self_edges) - tri = single_triangle_core(G, index, L=L, has_self_edges=has_self_edges) - if tri == 0: - return 0 - if degrees is not None: - degrees = degrees[index].value - else: - row = G[index, :].new() - degrees = row.reduce(agg.count).value - if has_self_edges and row[index].value is not None: - degrees -= 1 - denom = degrees * (degrees - 1) - return 2 * tri / denom - - -def clustering(G, nodes=None, weight=None): - if len(G) == 0: - return {} - if isinstance(G, nx.DiGraph) or weight is not None: - # TODO: Not yet implemented. Clustering implemented only for undirected and unweighted. - return _nx_clustering(G, nodes=nodes, weight=weight) - A, key_to_id = graph_to_adjacency(G, weight=weight) - if nodes in G: - return single_clustering_core(A, key_to_id[nodes]) - mask, id_to_key = list_to_mask(nodes, key_to_id) - result = clustering_core(A, mask=mask) - return vector_to_dict(result, key_to_id, id_to_key, mask=mask, fillvalue=0.0) - - -def average_clustering_core(G, mask=None, count_zeros=True, *, L=None, U=None, degrees=None): - c = clustering_core(G, mask=mask, L=L, U=U, degrees=degrees) - val = c.reduce(allow_empty=False).value - if not count_zeros: - return val / c.nvals - elif mask is not None: - return val / mask.parent.nvals - else: - return val / c.size - - -def average_clustering(G, nodes=None, weight=None, count_zeros=True): - if len(G) == 0 or isinstance(G, nx.DiGraph) or weight is not None: - # TODO: Not yet implemented. Clustering implemented only for undirected and unweighted. - return _nx_average_clustering(G, nodes=nodes, weight=weight, count_zeros=count_zeros) - A, key_to_id = graph_to_adjacency(G, weight=weight) - mask, _ = list_to_mask(nodes, key_to_id) - return average_clustering_core(A, mask=mask, count_zeros=count_zeros) diff --git a/graphblas_algorithms/conftest.py b/graphblas_algorithms/conftest.py index c598c08..8d42a7d 100644 --- a/graphblas_algorithms/conftest.py +++ b/graphblas_algorithms/conftest.py @@ -1 +1,13 @@ -from networkx.conftest import * # noqa +import pytest + + +@pytest.fixture(scope="session", autouse=True) +def ic(): + """Make `ic` available everywhere during testing for easier debugging""" + try: + import icecream + except ImportError: + return + icecream.install() + # icecream.ic.disable() # do ic.enable() to re-enable + return icecream.ic diff --git a/graphblas_algorithms/generators/__init__.py b/graphblas_algorithms/generators/__init__.py new file mode 100644 index 0000000..65a6526 --- /dev/null +++ b/graphblas_algorithms/generators/__init__.py @@ -0,0 +1 @@ +from .ego import * diff --git a/graphblas_algorithms/generators/ego.py b/graphblas_algorithms/generators/ego.py new file mode 100644 index 0000000..4d95e0f --- /dev/null +++ b/graphblas_algorithms/generators/ego.py @@ -0,0 +1,24 @@ +from ..algorithms.components.connected import _bfs_plain +from ..algorithms.shortest_paths.weighted import single_source_bellman_ford_path_length + +__all__ = ["ego_graph"] + + +def ego_graph(G, n, radius=1, center=True, undirected=False, is_weighted=False): + # TODO: should we have an option to keep the output matrix the same size? + if undirected and G.is_directed(): + # NOT COVERED + G2 = G.to_undirected() + else: + G2 = G + if is_weighted: + v = single_source_bellman_ford_path_length(G2, n, cutoff=radius) + else: + v = _bfs_plain(G2, n, cutoff=radius) + if not center: + del v[G._key_to_id[n]] + + indices, _ = v.to_coo(values=False) + A = G._A[indices, indices].new(name="ego") + key_to_id = G.renumber_key_to_id(indices.tolist()) + return type(G)(A, key_to_id=key_to_id) diff --git a/graphblas_algorithms/interface.py b/graphblas_algorithms/interface.py new file mode 100644 index 0000000..c718371 --- /dev/null +++ b/graphblas_algorithms/interface.py @@ -0,0 +1,314 @@ +from . import nxapi + +####### +# NOTE: Remember to run `python scripts/maketree.py` when adding or removing algorithms +# to automatically add it to README.md. You must still add algorithms below. +####### + + +class Dispatcher: + # Begin auto-generated code: dispatch + mod = nxapi.boundary + # ================== + edge_boundary = mod.edge_boundary + node_boundary = mod.node_boundary + + mod = nxapi.centrality + # ==================== + degree_centrality = mod.degree_alg.degree_centrality + in_degree_centrality = mod.degree_alg.in_degree_centrality + out_degree_centrality = mod.degree_alg.out_degree_centrality + eigenvector_centrality = mod.eigenvector.eigenvector_centrality + katz_centrality = mod.katz.katz_centrality + + mod = nxapi.cluster + # ================= + average_clustering = mod.average_clustering + clustering = mod.clustering + generalized_degree = mod.generalized_degree + square_clustering = mod.square_clustering + transitivity = mod.transitivity + triangles = mod.triangles + + mod = nxapi.community + # =================== + inter_community_edges = mod.quality.inter_community_edges + intra_community_edges = mod.quality.intra_community_edges + + mod = nxapi.components + # ==================== + is_connected = mod.connected.is_connected + node_connected_component = mod.connected.node_connected_component + is_weakly_connected = mod.weakly_connected.is_weakly_connected + + mod = nxapi.core + # ============== + k_truss = mod.k_truss + + mod = nxapi.cuts + # ============== + boundary_expansion = mod.boundary_expansion + conductance = mod.conductance + cut_size = mod.cut_size + edge_expansion = mod.edge_expansion + mixing_expansion = mod.mixing_expansion + node_expansion = mod.node_expansion + normalized_cut_size = mod.normalized_cut_size + volume = mod.volume + + mod = nxapi.dag + # ============= + ancestors = mod.ancestors + descendants = mod.descendants + + mod = nxapi.dominating + # ==================== + is_dominating_set = mod.is_dominating_set + + mod = nxapi.efficiency_measures + # ============================= + efficiency = mod.efficiency + + mod = nxapi.generators + # ==================== + ego_graph = mod.ego.ego_graph + + mod = nxapi.isolate + # ================= + is_isolate = mod.is_isolate + isolates = mod.isolates + number_of_isolates = mod.number_of_isolates + + mod = nxapi.isomorphism + # ===================== + fast_could_be_isomorphic = mod.isomorph.fast_could_be_isomorphic + faster_could_be_isomorphic = mod.isomorph.faster_could_be_isomorphic + + mod = nxapi.linalg + # ================ + bethe_hessian_matrix = mod.bethehessianmatrix.bethe_hessian_matrix + adjacency_matrix = mod.graphmatrix.adjacency_matrix + laplacian_matrix = mod.laplacianmatrix.laplacian_matrix + normalized_laplacian_matrix = mod.laplacianmatrix.normalized_laplacian_matrix + directed_modularity_matrix = mod.modularitymatrix.directed_modularity_matrix + modularity_matrix = mod.modularitymatrix.modularity_matrix + + mod = nxapi.link_analysis + # ======================= + hits = mod.hits_alg.hits + google_matrix = mod.pagerank_alg.google_matrix + pagerank = mod.pagerank_alg.pagerank + + mod = nxapi.lowest_common_ancestors + # ================================= + lowest_common_ancestor = mod.lowest_common_ancestor + + mod = nxapi.operators + # =================== + compose = mod.binary.compose + difference = mod.binary.difference + disjoint_union = mod.binary.disjoint_union + full_join = mod.binary.full_join + intersection = mod.binary.intersection + symmetric_difference = mod.binary.symmetric_difference + union = mod.binary.union + complement = mod.unary.complement + reverse = mod.unary.reverse + + mod = nxapi.reciprocity + # ===================== + overall_reciprocity = nxapi.overall_reciprocity + reciprocity = nxapi.reciprocity + + mod = nxapi.regular + # ================= + is_k_regular = mod.is_k_regular + is_regular = mod.is_regular + + mod = nxapi.shortest_paths + # ======================== + floyd_warshall = mod.dense.floyd_warshall + floyd_warshall_numpy = mod.dense.floyd_warshall_numpy + floyd_warshall_predecessor_and_distance = mod.dense.floyd_warshall_predecessor_and_distance + has_path = mod.generic.has_path + all_pairs_shortest_path_length = mod.unweighted.all_pairs_shortest_path_length + single_source_shortest_path_length = mod.unweighted.single_source_shortest_path_length + single_target_shortest_path_length = mod.unweighted.single_target_shortest_path_length + all_pairs_bellman_ford_path_length = mod.weighted.all_pairs_bellman_ford_path_length + bellman_ford_path = mod.weighted.bellman_ford_path + bellman_ford_path_length = mod.weighted.bellman_ford_path_length + negative_edge_cycle = mod.weighted.negative_edge_cycle + single_source_bellman_ford_path_length = mod.weighted.single_source_bellman_ford_path_length + + mod = nxapi.simple_paths + # ====================== + is_simple_path = mod.is_simple_path + + mod = nxapi.smetric + # ================= + s_metric = mod.s_metric + + mod = nxapi.structuralholes + # ========================= + mutual_weight = mod.mutual_weight + + mod = nxapi.tournament + # ==================== + is_tournament = mod.is_tournament + score_sequence = mod.score_sequence + tournament_matrix = mod.tournament_matrix + + mod = nxapi.traversal + # =================== + bfs_layers = mod.breadth_first_search.bfs_layers + descendants_at_distance = mod.breadth_first_search.descendants_at_distance + + mod = nxapi.triads + # ================ + is_triad = mod.is_triad + + del mod + # End auto-generated code: dispatch + + @staticmethod + def convert_from_nx( + graph, + edge_attrs=None, + node_attrs=None, + preserve_edge_attrs=False, + preserve_node_attrs=False, + preserve_graph_attrs=False, + name=None, + graph_name=None, + *, + weight=None, # For nx.__version__ <= 3.1 + ): + import networkx as nx + + from .classes import DiGraph, Graph, MultiDiGraph, MultiGraph + + if preserve_edge_attrs: + if graph.is_multigraph(): + attrs = set().union( + *( + datadict + for nbrs in graph._adj.values() + for keydict in nbrs.values() + for datadict in keydict.values() + ) + ) + else: + attrs = set().union( + *(datadict for nbrs in graph._adj.values() for datadict in nbrs.values()) + ) + if len(attrs) == 1: + [attr] = attrs + edge_attrs = {attr: None} + elif attrs: + raise NotImplementedError("`preserve_edge_attrs=True` is not fully implemented") + if node_attrs: + raise NotImplementedError("non-None `node_attrs` is not yet implemented") + if preserve_node_attrs: + attrs = set().union(*(datadict for node, datadict in graph.nodes(data=True))) + if attrs: + raise NotImplementedError("`preserve_node_attrs=True` is not implemented") + if edge_attrs: + if len(edge_attrs) > 1: + raise NotImplementedError( + "Multiple edge attributes is not implemented (bad value for edge_attrs)" + ) + if weight is not None: + raise TypeError("edge_attrs and weight both given") + [[weight, default]] = edge_attrs.items() + if default is not None and default != 1: + raise NotImplementedError(f"edge default != 1 is not implemented; got {default}") + + if isinstance(graph, nx.MultiDiGraph): + G = MultiDiGraph.from_networkx(graph, weight=weight) + elif isinstance(graph, nx.MultiGraph): + G = MultiGraph.from_networkx(graph, weight=weight) + elif isinstance(graph, nx.DiGraph): + G = DiGraph.from_networkx(graph, weight=weight) + elif isinstance(graph, nx.Graph): + G = Graph.from_networkx(graph, weight=weight) + else: + raise TypeError(f"Unsupported type of graph: {type(graph)}") + if preserve_graph_attrs: + G.graph.update(graph.graph) + return G + + @staticmethod + def convert_to_nx(obj, *, name=None): + from graphblas import Matrix, io + + from .classes import Graph + + if isinstance(obj, Graph): + obj = obj.to_networkx() + elif isinstance(obj, Matrix): + if name in { + "adjacency_matrix", + "bethe_hessian_matrix", + "laplacian_matrix", + "normalized_laplacian_matrix", + "tournament_matrix", + }: + obj = io.to_scipy_sparse(obj) + elif name in { + "directed_modularity_matrix", + "floyd_warshall_numpy", + "google_matrix", + "modularity_matrix", + }: + obj = obj.to_dense(fill_value=False) + else: # pragma: no cover + raise RuntimeError(f"Should {name} return a numpy or scipy.sparse array?") + return obj + + @staticmethod + def on_start_tests(items): + try: + import pytest + except ImportError: # pragma: no cover (import) + return + + def key(testpath): + filename, path = testpath.split(":") + *names, testname = path.split(".") + if names: + [classname] = names + return (testname, frozenset({classname, filename})) + return (testname, frozenset({filename})) + + # Reasons to skip tests + # multi_attributed = "unable to handle multi-attributed graphs" + multidigraph = "unable to handle MultiDiGraph" + multigraph = "unable to handle MultiGraph" + + # Which tests to skip + skip = { + # key("test_mst.py:TestBoruvka.test_attributes"): multi_attributed, + # key("test_mst.py:TestBoruvka.test_weight_attribute"): multi_attributed, + key("test_dense.py:TestFloyd.test_zero_weight"): multidigraph, + key("test_dense_numpy.py:test_zero_weight"): multidigraph, + key("test_weighted.py:TestBellmanFordAndGoldbergRadzik.test_multigraph"): multigraph, + # key("test_binary.py:test_compose_multigraph"): multigraph, + # key("test_binary.py:test_difference_multigraph_attributes"): multigraph, + # key("test_binary.py:test_disjoint_union_multigraph"): multigraph, + # key("test_binary.py:test_full_join_multigraph"): multigraph, + # key("test_binary.py:test_intersection_multigraph_attributes"): multigraph, + # key( + # "test_binary.py:test_intersection_multigraph_attributes_node_set_different" + # ): multigraph, + # key("test_binary.py:test_symmetric_difference_multigraph"): multigraph, + # key("test_binary.py:test_union_attributes"): multi_attributed, + # TODO: move failing assertion from `test_union_and_compose` + # key("test_binary.py:test_union_and_compose"): multi_attributed, + # key("test_binary.py:test_union_multigraph"): multigraph, + # key("test_vf2pp.py:test_custom_multigraph4_different_labels"): multigraph, + } + for item in items: + kset = set(item.keywords) + for (test_name, keywords), reason in skip.items(): + if item.name == test_name and keywords.issubset(kset): + item.add_marker(pytest.mark.xfail(reason=reason)) diff --git a/graphblas_algorithms/linalg/__init__.py b/graphblas_algorithms/linalg/__init__.py new file mode 100644 index 0000000..5fb0b2b --- /dev/null +++ b/graphblas_algorithms/linalg/__init__.py @@ -0,0 +1,4 @@ +from .bethehessianmatrix import * +from .graphmatrix import * +from .laplacianmatrix import * +from .modularitymatrix import * diff --git a/graphblas_algorithms/linalg/bethehessianmatrix.py b/graphblas_algorithms/linalg/bethehessianmatrix.py new file mode 100644 index 0000000..edd000f --- /dev/null +++ b/graphblas_algorithms/linalg/bethehessianmatrix.py @@ -0,0 +1,25 @@ +from graphblas import Vector, binary + +__all__ = ["bethe_hessian_matrix"] + + +def bethe_hessian_matrix(G, r=None, nodelist=None, *, name="bethe_hessian_matrix"): + A = G._A + if nodelist is not None: + ids = G.list_to_ids(nodelist) + A = A[ids, ids].new() + d = A.reduce_rowwise().new(name="d") + else: + d = G.get_property("plus_rowwise+") + if r is None: + degrees = G.get_property("degrees+") + k = degrees.reduce().get(0) + k2 = (degrees @ degrees).get(0) + r = k2 / k - 1 + n = A.nrows + # result = (r**2 - 1) * I - r * A + D + ri = Vector.from_scalar(r**2 - 1.0, n, name="ri") + ri += d + rI = ri.diag(name=name) + rI(binary.plus) << binary.times(-r, A) # rI += -r * A + return rI diff --git a/graphblas_algorithms/linalg/graphmatrix.py b/graphblas_algorithms/linalg/graphmatrix.py new file mode 100644 index 0000000..0eff6ef --- /dev/null +++ b/graphblas_algorithms/linalg/graphmatrix.py @@ -0,0 +1,19 @@ +from graphblas import unary + +__all__ = ["adjacency_matrix"] + + +def adjacency_matrix(G, nodelist=None, dtype=None, is_weighted=False, *, name="adjacency_matrix"): + if dtype is None: + dtype = G._A.dtype + if G.is_multigraph(): + is_weighted = True # XXX + if nodelist is None: + if not is_weighted: + return unary.one[dtype](G._A).new(name=name) + return G._A.dup(dtype, name=name) + ids = G.list_to_ids(nodelist) + A = G._A[ids, ids].new(dtype, name=name) + if not is_weighted: + A << unary.one(A) + return A diff --git a/graphblas_algorithms/linalg/laplacianmatrix.py b/graphblas_algorithms/linalg/laplacianmatrix.py new file mode 100644 index 0000000..18ed65a --- /dev/null +++ b/graphblas_algorithms/linalg/laplacianmatrix.py @@ -0,0 +1,54 @@ +from graphblas import monoid, unary + +__all__ = [ + "laplacian_matrix", + "normalized_laplacian_matrix", +] + + +def _laplacian_helper(G, nodelist=None, is_weighted=False): + if G.is_multigraph(): + is_weighted = True # XXX + A = G._A + if nodelist is not None: + ids = G.list_to_ids(nodelist) + A = A[ids, ids].new() + if not is_weighted: + A << unary.one(A) + d = A.reduce_rowwise(monoid.plus).new() + elif is_weighted: + d = G.get_property("plus_rowwise+") + else: + d = G.get_property("degrees+") + A = unary.one(A).new() + return d, A + + +def laplacian_matrix(G, nodelist=None, is_weighted=False, *, name="laplacian_matrix"): + d, A = _laplacian_helper(G, nodelist, is_weighted) + D = d.diag(name="D") + return (D - A).new(name=name) + + +def normalized_laplacian_matrix( + G, nodelist=None, is_weighted=False, *, name="normalized_laplacian_matrix" +): + d, A = _laplacian_helper(G, nodelist, is_weighted) + d_invsqrt = unary.sqrt(d).new(name="d_invsqrt") + d_invsqrt << unary.minv(d_invsqrt) + + # XXX: what if `d` is 0 and `d_invsqrt` is infinity? (not tested) + # d_invsqrt(unary.isinf(d_invsqrt)) << 0 + + # Calculate: A_weighted = D_invsqrt @ A @ D_invsqrt + A_weighted = d_invsqrt.outer(d_invsqrt).new(mask=A.S, name=name) + A_weighted *= A + # Alt (no idea which implementation is better) + # D_invsqrt = d_invsqrt.diag(name="D_invsqrt") + # A_weighted = (D_invsqrt @ A).new(name=name) + # A_weighted @= D_invsqrt + + d_invsqrt << unary.one(d_invsqrt) + D = d_invsqrt.diag(name="D") + A_weighted << D - A_weighted + return A_weighted diff --git a/graphblas_algorithms/linalg/modularitymatrix.py b/graphblas_algorithms/linalg/modularitymatrix.py new file mode 100644 index 0000000..1efff65 --- /dev/null +++ b/graphblas_algorithms/linalg/modularitymatrix.py @@ -0,0 +1,37 @@ +from graphblas import monoid, unary + +from .laplacianmatrix import _laplacian_helper + +__all__ = ["modularity_matrix", "directed_modularity_matrix"] + + +def modularity_matrix(G, nodelist=None, is_weighted=False, *, name="modularity_matrix"): + k, A = _laplacian_helper(G, nodelist, is_weighted) + m = k.reduce().get(0) + X = k.outer(k).new(float, name=name) + X /= m + X << A - X + return X + + +def directed_modularity_matrix( + G, nodelist=None, is_weighted=False, *, name="directed_modularity_matrix" +): + A = G._A + if nodelist is not None: + ids = G.list_to_ids(nodelist) + A = A[ids, ids].new() + if not is_weighted: + A << unary.one(A) + k_out = A.reduce_rowwise(monoid.plus).new() + k_in = A.reduce_columnwise(monoid.plus).new() + elif is_weighted: + k_out, k_in = G.get_properties("plus_rowwise+ plus_columnwise+") + else: + A = unary.one(A).new() + k_out, k_in = G.get_properties("row_degrees+ column_degrees+") + m = k_out.reduce().get(0) + X = k_out.outer(k_in).new(float, name=name) + X /= m + X << A - X + return X diff --git a/graphblas_algorithms/link_analysis.py b/graphblas_algorithms/link_analysis.py deleted file mode 100644 index bf8389d..0000000 --- a/graphblas_algorithms/link_analysis.py +++ /dev/null @@ -1,141 +0,0 @@ -from warnings import warn - -import networkx as nx -from graphblas import Vector, binary, unary -from graphblas.semiring import plus_first, plus_times - -from ._utils import dict_to_vector, graph_to_adjacency, vector_to_dict - - -def pagerank_core( - A, - alpha=0.85, - personalization=None, - max_iter=100, - tol=1e-06, - nstart=None, - dangling=None, - row_degrees=None, - name="pagerank", -): - N = A.nrows - if A.nvals == 0: - return Vector.new(float, N, name=name) - - # Initial vector - x = Vector.new(float, N, name="x") - if nstart is None: - x[:] = 1.0 / N - else: - denom = nstart.reduce(allow_empty=False).value - if denom == 0: - raise ZeroDivisionError() - x << nstart / denom - - # Personalization vector or scalar - if personalization is None: - p = 1.0 / N - else: - denom = personalization.reduce(allow_empty=False).value - if denom == 0: - raise ZeroDivisionError() - p = (personalization / denom).new(name="p") - - # Inverse of row_degrees - # Fold alpha constant into S - if row_degrees is None: - S = A.reduce_rowwise().new(float, name="S") # XXX: What about self-edges - S << alpha / S - else: - S = (alpha / row_degrees).new(name="S") - - if A.ss.is_iso: - # Fold iso-value of A into S - # This lets us use the plus_first semiring, which is faster - iso_value = A.ss.iso_value - if iso_value != 1: - S *= iso_value - semiring = plus_first[float] - else: - semiring = plus_times[float] - - is_dangling = S.nvals < N - if is_dangling: - dangling_mask = Vector.new(float, N, name="dangling_mask") - dangling_mask(mask=~S.S) << 1.0 - # Fold alpha constant into dangling_weights (or dangling_mask) - if dangling is not None: - dangling_weights = (alpha / dangling.reduce(allow_empty=False).value * dangling).new( - name="dangling_weights" - ) - elif personalization is None: - # Fast case (and common case); is iso-valued - dangling_mask(mask=dangling_mask.S) << alpha * p - else: - dangling_weights = (alpha * p).new(name="dangling_weights") - - # Fold constant into p - p *= 1 - alpha - - # Power iteration: make up to max_iter iterations - xprev = Vector.new(float, N, name="x_prev") - w = Vector.new(float, N, name="w") - for _ in range(max_iter): - xprev, x = x, xprev - - # x << alpha * ((xprev * S) @ A + "dangling_weights") + (1 - alpha) * p - x << p - if is_dangling: - if dangling is None and personalization is None: - # Fast case: add a scalar; x is still iso-valued (b/c p is also scalar) - x += xprev @ dangling_mask - else: - # Add a vector - x += plus_first(xprev @ dangling_mask) * dangling_weights - w << xprev * S - x += semiring(w @ A) # plus_first if A.ss.is_iso else plus_times - - # Check convergence, l1 norm: err = sum(abs(xprev - x)) - xprev << binary.minus(xprev | x, require_monoid=False) - xprev << unary.abs(xprev) - err = xprev.reduce().value - if err < N * tol: - x.name = name - return x - raise nx.PowerIterationFailedConvergence(max_iter) - - -def pagerank( - G, - alpha=0.85, - personalization=None, - max_iter=100, - tol=1e-06, - nstart=None, - weight="weight", - dangling=None, -): - warn("", DeprecationWarning, stacklevel=2) - N = len(G) - if N == 0: - return {} - A, key_to_id = graph_to_adjacency(G, weight=weight, dtype=float) - # We'll normalize initial, personalization, and dangling vectors later - x = dict_to_vector(nstart, key_to_id, dtype=float, name="nstart") - p = dict_to_vector(personalization, key_to_id, dtype=float, name="personalization") - row_degrees = A.reduce_rowwise().new(name="row_degrees") # XXX: What about self-edges? - if dangling is not None and row_degrees.nvals < N: - dangling_weights = dict_to_vector(dangling, key_to_id, dtype=float, name="dangling") - else: - dangling_weights = None - result = pagerank_core( - A, - alpha=alpha, - personalization=p, - max_iter=max_iter, - tol=tol, - nstart=x, - dangling=dangling_weights, - row_degrees=row_degrees, - ) - return vector_to_dict(result, key_to_id, fillvalue=0.0) diff --git a/graphblas_algorithms/nxapi/__init__.py b/graphblas_algorithms/nxapi/__init__.py new file mode 100644 index 0000000..97d4249 --- /dev/null +++ b/graphblas_algorithms/nxapi/__init__.py @@ -0,0 +1,41 @@ +from .boundary import * +from .centrality import * +from .cluster import * +from .community import * +from .components import * +from .core import * +from .cuts import * +from .dag import * +from .dominating import * +from .efficiency_measures import * +from .generators import * +from .isolate import * +from .isomorphism import fast_could_be_isomorphic, faster_could_be_isomorphic +from .linalg import * +from .link_analysis import * +from .lowest_common_ancestors import * +from .operators import * +from .reciprocity import * +from .regular import * +from .shortest_paths import * +from .simple_paths import * +from .smetric import * +from .structuralholes import * +from .traversal import * +from .triads import * +from .tournament import is_tournament + +from . import centrality +from . import cluster +from . import community +from . import components +from . import efficiency_measures +from . import generators +from . import isomorphism +from . import linalg +from . import link_analysis +from . import lowest_common_ancestors +from . import operators +from . import shortest_paths +from . import tournament +from . import traversal diff --git a/graphblas_algorithms/nxapi/_utils.py b/graphblas_algorithms/nxapi/_utils.py new file mode 100644 index 0000000..0bb9617 --- /dev/null +++ b/graphblas_algorithms/nxapi/_utils.py @@ -0,0 +1,127 @@ +from math import ceil +from numbers import Number + +try: + from itertools import pairwise # Added in Python 3.10 +except ImportError: + + def pairwise(it): + it = iter(it) + for prev in it: + for cur in it: + yield (prev, cur) + prev = cur + + +BYTES_UNITS = { + "": 1, + "b": 1, + "kb": 1000, + "mb": 1000**2, + "gb": 1000**3, + "tb": 1000**4, + "pb": 1000**5, + "eb": 1000**6, + "zb": 1000**7, + "kib": 1024, + "mib": 1024**2, + "gib": 1024**3, + "tib": 1024**4, + "pib": 1024**5, + "eib": 1024**6, + "zib": 1024**7, +} + + +def normalize_chunksize(chunksize, itemsize=1, N=None): + if chunksize is None: + return None + if isinstance(chunksize, Number): + rv = int(chunksize) + if rv <= 0 or N is not None and rv >= N: + return None + return rv + if not isinstance(chunksize, str): + raise TypeError(f"chunksize must be a number or a string; got {type(chunksize)}") + chunkstring = chunksize.replace(" ", "").replace("_", "").lower() + if not chunkstring or chunkstring == "all": + return None + for i, c in enumerate(reversed(chunkstring)): + if c.isdigit(): + index = len(chunkstring) - i + break + else: + chunkstring = f"1{chunkstring}" + index = 1 + + prefix = chunkstring[:index] + suffix = chunkstring[index:] + + try: + number = float(prefix) + except ValueError as exc: + raise ValueError( + f"Bad chunksize: {chunksize!r}. Could not interpret {prefix!r} as a number." + ) from exc + + if suffix in {"chunk", "chunks"}: + if number <= 1: + return None + if N is None: + raise TypeError( + f"N argument is required to determine chunksize to split into {int(number)} chunks" + ) + rv = ceil(N / number) + else: + scale = BYTES_UNITS.get(suffix) + if scale is None: + raise ValueError( + f"Bad chunksize: {chunksize!r}. Could not interpret {suffix!r} as a bytes unit." + ) + number *= scale + if chunkstring[-1] == "b": + number = max(1, number / itemsize) + rv = int(round(number)) + if rv <= 0 or N is not None and rv >= N: + return None + return rv + + +def partition(chunksize, L, *, evenly=True): + """Partition a list into chunks""" + N = len(L) + if N == 0: + return + chunksize = int(chunksize) + if chunksize <= 0 or chunksize >= N: + yield L + return + if chunksize == 1: + yield from L + return + if evenly: + k = ceil(len(L) / chunksize) + if k * chunksize != N: + yield from split_evenly(k, L) + return + for start, stop in pairwise(range(0, N + chunksize, chunksize)): + yield L[start:stop] + + +def split_evenly(k, L): + """Split a list into approximately-equal parts""" + N = len(L) + if N == 0: + return + k = int(k) + if k <= 1: + yield L + return + start = 0 + for i in range(1, k): + stop = (N * i + k - 1) // k + if stop != start: + yield L[start:stop] + start = stop + if stop != N: + yield L[stop:] diff --git a/graphblas_algorithms/nxapi/boundary.py b/graphblas_algorithms/nxapi/boundary.py new file mode 100644 index 0000000..662cfe4 --- /dev/null +++ b/graphblas_algorithms/nxapi/boundary.py @@ -0,0 +1,56 @@ +import itertools + +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph + +__all__ = ["edge_boundary", "node_boundary"] + + +def edge_boundary(G, nbunch1, nbunch2=None, data=False, keys=False, default=None): + # TODO: figure out data, keys, and default arguments and handle multigraph + # data=True will be tested in test_mst.py + is_multigraph = G.is_multigraph() + # This may be wrong for multi-attributed graphs + if data is True: + weight = "weight" + elif not data: + weight = None + else: + weight = data + G = to_graph(G, weight=weight) + v1 = G.set_to_vector(nbunch1, ignore_extra=True) + v2 = G.set_to_vector(nbunch2, ignore_extra=True) + result = algorithms.edge_boundary(G, v1, v2, is_weighted=is_multigraph or data) + rows, cols, vals = result.to_coo(values=is_multigraph or data) + id_to_key = G.id_to_key + if data: + it = zip( + (id_to_key[row] for row in rows), + (id_to_key[col] for col in cols), + # Unsure about this; data argument may mean *all* edge attributes + ({weight: val} for val in vals), + strict=True, + ) + else: + it = zip( + (id_to_key[row] for row in rows), + (id_to_key[col] for col in cols), + strict=True, + ) + if is_multigraph: + # Edge weights indicate number of times to repeat edges + it = itertools.chain.from_iterable( + itertools.starmap(itertools.repeat, zip(it, vals, strict=True)) + ) + return it + + +def node_boundary(G, nbunch1, nbunch2=None): + G = to_graph(G) + v1 = G.set_to_vector(nbunch1, ignore_extra=True) + if nbunch2 is not None: + mask = G.set_to_vector(nbunch2, ignore_extra=True).S + else: + mask = None + result = algorithms.node_boundary(G, v1, mask=mask) + return G.vector_to_nodeset(result) diff --git a/graphblas_algorithms/nxapi/centrality/__init__.py b/graphblas_algorithms/nxapi/centrality/__init__.py new file mode 100644 index 0000000..e5c137d --- /dev/null +++ b/graphblas_algorithms/nxapi/centrality/__init__.py @@ -0,0 +1,3 @@ +from .degree_alg import * +from .eigenvector import * +from .katz import * diff --git a/graphblas_algorithms/nxapi/centrality/degree_alg.py b/graphblas_algorithms/nxapi/centrality/degree_alg.py new file mode 100644 index 0000000..66776b0 --- /dev/null +++ b/graphblas_algorithms/nxapi/centrality/degree_alg.py @@ -0,0 +1,25 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_directed_graph, to_graph +from graphblas_algorithms.utils import not_implemented_for + +__all__ = ["degree_centrality", "in_degree_centrality", "out_degree_centrality"] + + +def degree_centrality(G): + G = to_graph(G) + result = algorithms.degree_centrality(G) + return G.vector_to_nodemap(result, fill_value=0.0) + + +@not_implemented_for("undirected") +def in_degree_centrality(G): + G = to_directed_graph(G) + result = algorithms.in_degree_centrality(G) + return G.vector_to_nodemap(result, fill_value=0.0) + + +@not_implemented_for("undirected") +def out_degree_centrality(G): + G = to_directed_graph(G) + result = algorithms.out_degree_centrality(G) + return G.vector_to_nodemap(result, fill_value=0.0) diff --git a/graphblas_algorithms/nxapi/centrality/eigenvector.py b/graphblas_algorithms/nxapi/centrality/eigenvector.py new file mode 100644 index 0000000..29dae1d --- /dev/null +++ b/graphblas_algorithms/nxapi/centrality/eigenvector.py @@ -0,0 +1,22 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph +from graphblas_algorithms.utils import not_implemented_for + +from ..exception import NetworkXError, NetworkXPointlessConcept, PowerIterationFailedConvergence + +__all__ = ["eigenvector_centrality"] + + +@not_implemented_for("multigraph") +def eigenvector_centrality(G, max_iter=100, tol=1.0e-6, nstart=None, weight=None): + G = to_graph(G, weight=weight, dtype=float) + if len(G) == 0: + raise NetworkXPointlessConcept("cannot compute centrality for the null graph") + x = G.dict_to_vector(nstart, dtype=float, name="nstart") + try: + result = algorithms.eigenvector_centrality(G, max_iter=max_iter, tol=tol, nstart=x) + except algorithms.exceptions.ConvergenceFailure as e: + raise PowerIterationFailedConvergence(*e.args) from e + except algorithms.exceptions.GraphBlasAlgorithmException as e: + raise NetworkXError(*e.args) from e + return G.vector_to_nodemap(result) diff --git a/graphblas_algorithms/nxapi/centrality/katz.py b/graphblas_algorithms/nxapi/centrality/katz.py new file mode 100644 index 0000000..2611a59 --- /dev/null +++ b/graphblas_algorithms/nxapi/centrality/katz.py @@ -0,0 +1,40 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph +from graphblas_algorithms.utils import not_implemented_for + +from ..exception import NetworkXError, PowerIterationFailedConvergence + +__all__ = ["katz_centrality"] + + +@not_implemented_for("multigraph") +def katz_centrality( + G, + alpha=0.1, + beta=1.0, + max_iter=1000, + tol=1.0e-6, + nstart=None, + normalized=True, + weight=None, +): + G = to_graph(G, weight=weight, dtype=float) + if len(G) == 0: + return {} + x = G.dict_to_vector(nstart, dtype=float, name="nstart") + try: + b = float(beta) + except (TypeError, ValueError): + try: + b = G.dict_to_vector(beta, dtype=float, name="beta") + except (TypeError, ValueError, AttributeError) as e: + raise NetworkXError(*e.args) from e + try: + result = algorithms.katz_centrality( + G, alpha=alpha, beta=b, max_iter=max_iter, tol=tol, nstart=x, normalized=normalized + ) + except algorithms.exceptions.ConvergenceFailure as e: + raise PowerIterationFailedConvergence(*e.args) from e + except algorithms.exceptions.GraphBlasAlgorithmException as e: + raise NetworkXError(*e.args) from e + return G.vector_to_nodemap(result) diff --git a/graphblas_algorithms/nxapi/cluster.py b/graphblas_algorithms/nxapi/cluster.py new file mode 100644 index 0000000..8e61f9b --- /dev/null +++ b/graphblas_algorithms/nxapi/cluster.py @@ -0,0 +1,134 @@ +from graphblas import monoid + +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph +from graphblas_algorithms.classes.graph import to_undirected_graph +from graphblas_algorithms.utils import not_implemented_for + +from ._utils import normalize_chunksize, partition + +__all__ = [ + "triangles", + "transitivity", + "clustering", + "average_clustering", + "square_clustering", + "generalized_degree", +] + + +@not_implemented_for("directed") +def triangles(G, nodes=None): + G = to_undirected_graph(G, dtype=bool) + if len(G) == 0: + return {} + if nodes in G: + return algorithms.single_triangle(G, nodes) + mask = G.list_to_mask(nodes) + result = algorithms.triangles(G, mask=mask) + return G.vector_to_nodemap(result, mask=mask, fill_value=0) + + +def transitivity(G): + G = to_graph(G, dtype=bool) # directed or undirected + if len(G) == 0: + return 0 + if G.is_directed(): + func = algorithms.transitivity_directed + else: + func = algorithms.transitivity + return G._cacheit("transitivity", func, G) + + +def clustering(G, nodes=None, weight=None): + G = to_graph(G, weight=weight) # to directed or undirected + if len(G) == 0: + return {} + weighted = weight is not None + if nodes in G: + if G.is_directed(): + return algorithms.single_clustering_directed(G, nodes, weighted=weighted) + return algorithms.single_clustering(G, nodes, weighted=weighted) + mask = G.list_to_mask(nodes) + if G.is_directed(): + result = algorithms.clustering_directed(G, weighted=weighted, mask=mask) + else: + result = algorithms.clustering(G, weighted=weighted, mask=mask) + return G.vector_to_nodemap(result, mask=mask, fill_value=0.0) + + +def average_clustering(G, nodes=None, weight=None, count_zeros=True): + G = to_graph(G, weight=weight) # to directed or undirected + if len(G) == 0: + raise ZeroDivisionError + weighted = weight is not None + mask = G.list_to_mask(nodes) + if G.is_directed(): + func = algorithms.average_clustering_directed + else: + func = algorithms.average_clustering + if mask is None: + return G._cacheit( + f"average_clustering(count_zeros={count_zeros})", + func, + G, + weighted=weighted, + count_zeros=count_zeros, + ) + return func(G, weighted=weighted, count_zeros=count_zeros, mask=mask) + + +# TODO: should this move into algorithms? +def _square_clustering_split(G, node_ids=None, *, chunksize): + if node_ids is None: + node_ids, _ = G._A.reduce_rowwise(monoid.any).to_coo(values=False) + result = None + for chunk_ids in partition(chunksize, node_ids): + res = algorithms.square_clustering(G, chunk_ids) + if result is None: + result = res + else: + result << monoid.any(result | res) + return result + + +def square_clustering(G, nodes=None, *, chunksize="256 MiB"): + # `chunksize` is used to split the computation into chunks. + # square_clustering computes `A @ A`, which can get very large, even dense. + # The default `chunksize` is to choose the number so that `Asubset @ A` + # will be about 256 MB if dense. + G = to_undirected_graph(G) + if len(G) == 0: + return {} + + chunksize = normalize_chunksize(chunksize, len(G) * G._A.dtype.np_type.itemsize, len(G)) + + if nodes is None: + # Should we use this one for subsets of nodes as well? + if chunksize is None: + result = algorithms.square_clustering(G) + else: + result = _square_clustering_split(G, chunksize=chunksize) + return G.vector_to_nodemap(result, fill_value=0) + if nodes in G: + idx = G._key_to_id[nodes] + return algorithms.single_square_clustering(G, idx) + ids = G.list_to_ids(nodes) + if chunksize is None: + result = algorithms.square_clustering(G, ids) + else: + result = _square_clustering_split(G, ids, chunksize=chunksize) + return G.vector_to_nodemap(result) + + +@not_implemented_for("directed") +def generalized_degree(G, nodes=None): + G = to_undirected_graph(G) + if len(G) == 0: + return {} + if nodes in G: + result = algorithms.single_generalized_degree(G, nodes) + return G.vector_to_nodemap(result) + mask = G.list_to_mask(nodes) + result = algorithms.generalized_degree(G, mask=mask) + return G.matrix_to_vectornodemap(result) diff --git a/graphblas_algorithms/nxapi/community/__init__.py b/graphblas_algorithms/nxapi/community/__init__.py new file mode 100644 index 0000000..fd9f65d --- /dev/null +++ b/graphblas_algorithms/nxapi/community/__init__.py @@ -0,0 +1 @@ +from .quality import * diff --git a/graphblas_algorithms/nxapi/community/quality.py b/graphblas_algorithms/nxapi/community/quality.py new file mode 100644 index 0000000..bcb96ea --- /dev/null +++ b/graphblas_algorithms/nxapi/community/quality.py @@ -0,0 +1,16 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph + +__all__ = [] + + +def intra_community_edges(G, partition): + G = to_graph(G) + partition = [G.set_to_vector(block, ignore_extra=True) for block in partition] + return algorithms.intra_community_edges(G, partition) + + +def inter_community_edges(G, partition): + G = to_graph(G) + partition = [G.set_to_vector(block, ignore_extra=True) for block in partition] + return algorithms.inter_community_edges(G, partition) diff --git a/graphblas_algorithms/nxapi/components/__init__.py b/graphblas_algorithms/nxapi/components/__init__.py new file mode 100644 index 0000000..bb0aea6 --- /dev/null +++ b/graphblas_algorithms/nxapi/components/__init__.py @@ -0,0 +1,2 @@ +from .connected import * +from .weakly_connected import * diff --git a/graphblas_algorithms/nxapi/components/connected.py b/graphblas_algorithms/nxapi/components/connected.py new file mode 100644 index 0000000..d55a430 --- /dev/null +++ b/graphblas_algorithms/nxapi/components/connected.py @@ -0,0 +1,27 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.algorithms.exceptions import PointlessConcept +from graphblas_algorithms.classes.graph import to_undirected_graph +from graphblas_algorithms.utils import not_implemented_for + +from ..exception import NetworkXPointlessConcept + +__all__ = [ + "is_connected", + "node_connected_component", +] + + +@not_implemented_for("directed") +def is_connected(G): + G = to_undirected_graph(G) + try: + return algorithms.is_connected(G) + except PointlessConcept as e: + raise NetworkXPointlessConcept(*e.args) from e + + +@not_implemented_for("directed") +def node_connected_component(G, n): + G = to_undirected_graph(G) + rv = algorithms.node_connected_component(G, n) + return G.vector_to_nodeset(rv) diff --git a/graphblas_algorithms/nxapi/components/weakly_connected.py b/graphblas_algorithms/nxapi/components/weakly_connected.py new file mode 100644 index 0000000..c72b532 --- /dev/null +++ b/graphblas_algorithms/nxapi/components/weakly_connected.py @@ -0,0 +1,19 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.algorithms.exceptions import PointlessConcept +from graphblas_algorithms.classes.digraph import to_directed_graph +from graphblas_algorithms.utils import not_implemented_for + +from ..exception import NetworkXPointlessConcept + +__all__ = [ + "is_weakly_connected", +] + + +@not_implemented_for("undirected") +def is_weakly_connected(G): + G = to_directed_graph(G) + try: + return algorithms.is_weakly_connected(G) + except PointlessConcept as e: + raise NetworkXPointlessConcept(*e.args) from e diff --git a/graphblas_algorithms/nxapi/core.py b/graphblas_algorithms/nxapi/core.py new file mode 100644 index 0000000..7da6e81 --- /dev/null +++ b/graphblas_algorithms/nxapi/core.py @@ -0,0 +1,13 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.graph import to_undirected_graph +from graphblas_algorithms.utils import not_implemented_for + +__all__ = ["k_truss"] + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +def k_truss(G, k): + G = to_undirected_graph(G, dtype=bool) + result = algorithms.k_truss(G, k) + return result diff --git a/graphblas_algorithms/nxapi/cuts.py b/graphblas_algorithms/nxapi/cuts.py new file mode 100644 index 0000000..8343109 --- /dev/null +++ b/graphblas_algorithms/nxapi/cuts.py @@ -0,0 +1,74 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph + +__all__ = [ + "cut_size", + "volume", + "normalized_cut_size", + "conductance", + "edge_expansion", + "mixing_expansion", + "node_expansion", + "boundary_expansion", +] + + +def cut_size(G, S, T=None, weight=None): + is_multigraph = G.is_multigraph() + G = to_graph(G, weight=weight) + S = G.set_to_vector(S, ignore_extra=True) + T = G.set_to_vector(T, ignore_extra=True) + return algorithms.cut_size(G, S, T, is_weighted=is_multigraph or weight is not None) + + +def volume(G, S, weight=None): + is_multigraph = G.is_multigraph() + G = to_graph(G, weight=weight) + S = G.list_to_vector(S) + return algorithms.volume(G, S, weighted=is_multigraph or weight is not None) + + +def normalized_cut_size(G, S, T=None, weight=None): + G = to_graph(G, weight=weight) + S = G.set_to_vector(S, ignore_extra=True) + if T is None: + T = (~S.S).new() + else: + T = G.set_to_vector(T, ignore_extra=True) + return algorithms.normalized_cut_size(G, S, T) + + +def conductance(G, S, T=None, weight=None): + G = to_graph(G, weight=weight) + S = G.set_to_vector(S, ignore_extra=True) + if T is None: + T = (~S.S).new() + else: + T = G.set_to_vector(T, ignore_extra=True) + return algorithms.conductance(G, S, T) + + +def edge_expansion(G, S, T=None, weight=None): + G = to_graph(G, weight=weight) + S = G.set_to_vector(S, ignore_extra=True) + T = G.set_to_vector(T, ignore_extra=True) + return algorithms.edge_expansion(G, S, T) + + +def mixing_expansion(G, S, T=None, weight=None): + G = to_graph(G, weight=weight) + S = G.set_to_vector(S, ignore_extra=True) + T = G.set_to_vector(T, ignore_extra=True) + return algorithms.mixing_expansion(G, S, T) + + +def node_expansion(G, S): + G = to_graph(G) + S = G.list_to_vector(S) + return algorithms.node_expansion(G, S) + + +def boundary_expansion(G, S): + G = to_graph(G) + S = G.set_to_vector(S, ignore_extra=True) + return algorithms.boundary_expansion(G, S) diff --git a/graphblas_algorithms/nxapi/dag.py b/graphblas_algorithms/nxapi/dag.py new file mode 100644 index 0000000..56150e3 --- /dev/null +++ b/graphblas_algorithms/nxapi/dag.py @@ -0,0 +1,26 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph + +from .exception import NetworkXError + +__all__ = ["descendants", "ancestors"] + + +def descendants(G, source): + G = to_graph(G) + try: + result = algorithms.descendants(G, source) + except KeyError as e: + raise NetworkXError(*e.args) from e + else: + return G.vector_to_nodeset(result) + + +def ancestors(G, source): + G = to_graph(G) + try: + result = algorithms.ancestors(G, source) + except KeyError as e: + raise NetworkXError(*e.args) from e + else: + return G.vector_to_nodeset(result) diff --git a/graphblas_algorithms/nxapi/dominating.py b/graphblas_algorithms/nxapi/dominating.py new file mode 100644 index 0000000..b11a242 --- /dev/null +++ b/graphblas_algorithms/nxapi/dominating.py @@ -0,0 +1,10 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph + +__all__ = ["is_dominating_set"] + + +def is_dominating_set(G, nbunch): + G = to_graph(G) + v = G.set_to_vector(nbunch, ignore_extra=True) + return algorithms.is_dominating_set(G, v) diff --git a/graphblas_algorithms/nxapi/efficiency_measures.py b/graphblas_algorithms/nxapi/efficiency_measures.py new file mode 100644 index 0000000..06971a2 --- /dev/null +++ b/graphblas_algorithms/nxapi/efficiency_measures.py @@ -0,0 +1,9 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.graph import to_undirected_graph +from graphblas_algorithms.utils import not_implemented_for + + +@not_implemented_for("directed") +def efficiency(G, u, v): + G = to_undirected_graph(G) + return algorithms.efficiency(G, u, v) diff --git a/graphblas_algorithms/nxapi/exception.py b/graphblas_algorithms/nxapi/exception.py new file mode 100644 index 0000000..0804bb1 --- /dev/null +++ b/graphblas_algorithms/nxapi/exception.py @@ -0,0 +1,43 @@ +try: + import networkx as nx +except ImportError: + + class NetworkXError(Exception): + pass + + class NetworkXNoPath(Exception): + pass + + class NetworkXPointlessConcept(Exception): + pass + + class NetworkXUnbounded(Exception): + pass + + class NodeNotFound(Exception): + pass + + class PowerIterationFailedConvergence(Exception): + pass + +else: + from networkx import ( + NetworkXError, + NetworkXNoPath, + NetworkXPointlessConcept, + NetworkXUnbounded, + NodeNotFound, + PowerIterationFailedConvergence, + ) +try: + import scipy as sp +except ImportError: + + class ArpackNoConvergence(Exception): + def __init__(self, msg, eigenvalues, eigenvectors): + super().__init__(msg) + self.eigenvalues = eigenvalues + self.eigenvectors = eigenvectors + +else: + from scipy.sparse.linalg import ArpackNoConvergence diff --git a/graphblas_algorithms/nxapi/generators/__init__.py b/graphblas_algorithms/nxapi/generators/__init__.py new file mode 100644 index 0000000..65a6526 --- /dev/null +++ b/graphblas_algorithms/nxapi/generators/__init__.py @@ -0,0 +1 @@ +from .ego import * diff --git a/graphblas_algorithms/nxapi/generators/ego.py b/graphblas_algorithms/nxapi/generators/ego.py new file mode 100644 index 0000000..e591cb3 --- /dev/null +++ b/graphblas_algorithms/nxapi/generators/ego.py @@ -0,0 +1,11 @@ +from graphblas_algorithms import generators +from graphblas_algorithms.classes.digraph import to_graph + +__all__ = ["ego_graph"] + + +def ego_graph(G, n, radius=1, center=True, undirected=False, distance=None): + G = to_graph(G, weight=distance) + return generators.ego_graph( + G, n, radius=radius, center=center, undirected=undirected, is_weighted=distance is not None + ) diff --git a/graphblas_algorithms/nxapi/isolate.py b/graphblas_algorithms/nxapi/isolate.py new file mode 100644 index 0000000..728e9d8 --- /dev/null +++ b/graphblas_algorithms/nxapi/isolate.py @@ -0,0 +1,20 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph + +__all__ = ["is_isolate", "isolates", "number_of_isolates"] + + +def is_isolate(G, n): + G = to_graph(G) + return algorithms.is_isolate(G, n) + + +def isolates(G): + G = to_graph(G) + result = algorithms.isolates(G) + return G.vector_to_nodeset(result) # Return type is iterable + + +def number_of_isolates(G): + G = to_graph(G) + return algorithms.number_of_isolates(G) diff --git a/graphblas_algorithms/nxapi/isomorphism/__init__.py b/graphblas_algorithms/nxapi/isomorphism/__init__.py new file mode 100644 index 0000000..e701b70 --- /dev/null +++ b/graphblas_algorithms/nxapi/isomorphism/__init__.py @@ -0,0 +1 @@ +from .isomorph import * diff --git a/graphblas_algorithms/nxapi/isomorphism/isomorph.py b/graphblas_algorithms/nxapi/isomorphism/isomorph.py new file mode 100644 index 0000000..1dedb64 --- /dev/null +++ b/graphblas_algorithms/nxapi/isomorphism/isomorph.py @@ -0,0 +1,25 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph + +__all__ = [ + "fast_could_be_isomorphic", + "faster_could_be_isomorphic", +] + + +def fast_could_be_isomorphic(G1, G2): + G1 = to_graph(G1) + G2 = to_graph(G2) + return algorithms.fast_could_be_isomorphic(G1, G2) + + +fast_graph_could_be_isomorphic = fast_could_be_isomorphic + + +def faster_could_be_isomorphic(G1, G2): + G1 = to_graph(G1) + G2 = to_graph(G2) + return algorithms.faster_could_be_isomorphic(G1, G2) + + +faster_graph_could_be_isomorphic = faster_could_be_isomorphic diff --git a/graphblas_algorithms/nxapi/linalg/__init__.py b/graphblas_algorithms/nxapi/linalg/__init__.py new file mode 100644 index 0000000..aada0f4 --- /dev/null +++ b/graphblas_algorithms/nxapi/linalg/__init__.py @@ -0,0 +1,5 @@ +from . import bethehessianmatrix, graphmatrix, laplacianmatrix, modularitymatrix +from .bethehessianmatrix import * +from .graphmatrix import * +from .laplacianmatrix import * +from .modularitymatrix import * diff --git a/graphblas_algorithms/nxapi/linalg/bethehessianmatrix.py b/graphblas_algorithms/nxapi/linalg/bethehessianmatrix.py new file mode 100644 index 0000000..7fa30b4 --- /dev/null +++ b/graphblas_algorithms/nxapi/linalg/bethehessianmatrix.py @@ -0,0 +1,12 @@ +from graphblas_algorithms import linalg +from graphblas_algorithms.classes.graph import to_undirected_graph +from graphblas_algorithms.utils import not_implemented_for + +__all__ = ["bethe_hessian_matrix"] + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +def bethe_hessian_matrix(G, r=None, nodelist=None): + G = to_undirected_graph(G) + return linalg.bethe_hessian_matrix(G, r=r, nodelist=nodelist) diff --git a/graphblas_algorithms/nxapi/linalg/graphmatrix.py b/graphblas_algorithms/nxapi/linalg/graphmatrix.py new file mode 100644 index 0000000..0b3e7d9 --- /dev/null +++ b/graphblas_algorithms/nxapi/linalg/graphmatrix.py @@ -0,0 +1,9 @@ +from graphblas_algorithms import linalg +from graphblas_algorithms.classes.digraph import to_graph + +__all__ = ["adjacency_matrix"] + + +def adjacency_matrix(G, nodelist=None, dtype=None, weight="weight"): + G = to_graph(G, weight=weight, dtype=dtype) + return linalg.adjacency_matrix(G, nodelist, dtype, is_weighted=weight is not None) diff --git a/graphblas_algorithms/nxapi/linalg/laplacianmatrix.py b/graphblas_algorithms/nxapi/linalg/laplacianmatrix.py new file mode 100644 index 0000000..752ca1e --- /dev/null +++ b/graphblas_algorithms/nxapi/linalg/laplacianmatrix.py @@ -0,0 +1,14 @@ +from graphblas_algorithms import linalg +from graphblas_algorithms.classes.digraph import to_graph + +__all__ = ["laplacian_matrix", "normalized_laplacian_matrix"] + + +def laplacian_matrix(G, nodelist=None, weight="weight"): + G = to_graph(G, weight=weight) + return linalg.laplacian_matrix(G, nodelist, is_weighted=weight is not None) + + +def normalized_laplacian_matrix(G, nodelist=None, weight="weight"): + G = to_graph(G, weight=weight) + return linalg.normalized_laplacian_matrix(G, nodelist, is_weighted=weight is not None) diff --git a/graphblas_algorithms/nxapi/linalg/modularitymatrix.py b/graphblas_algorithms/nxapi/linalg/modularitymatrix.py new file mode 100644 index 0000000..76e160f --- /dev/null +++ b/graphblas_algorithms/nxapi/linalg/modularitymatrix.py @@ -0,0 +1,20 @@ +from graphblas_algorithms import linalg +from graphblas_algorithms.classes.digraph import to_directed_graph +from graphblas_algorithms.classes.graph import to_undirected_graph +from graphblas_algorithms.utils import not_implemented_for + +__all__ = ["modularity_matrix", "directed_modularity_matrix"] + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +def modularity_matrix(G, nodelist=None, weight=None): + G = to_undirected_graph(G, weight=weight) + return linalg.modularity_matrix(G, nodelist, is_weighted=weight is not None) + + +@not_implemented_for("undirected") +@not_implemented_for("multigraph") +def directed_modularity_matrix(G, nodelist=None, weight=None): + G = to_directed_graph(G, weight=weight) + return linalg.directed_modularity_matrix(G, nodelist, is_weighted=weight is not None) diff --git a/graphblas_algorithms/nxapi/link_analysis/__init__.py b/graphblas_algorithms/nxapi/link_analysis/__init__.py new file mode 100644 index 0000000..938b30c --- /dev/null +++ b/graphblas_algorithms/nxapi/link_analysis/__init__.py @@ -0,0 +1,2 @@ +from .hits_alg import * +from .pagerank_alg import * diff --git a/graphblas_algorithms/nxapi/link_analysis/hits_alg.py b/graphblas_algorithms/nxapi/link_analysis/hits_alg.py new file mode 100644 index 0000000..70e9ed6 --- /dev/null +++ b/graphblas_algorithms/nxapi/link_analysis/hits_alg.py @@ -0,0 +1,22 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph + +from ..exception import ArpackNoConvergence + +__all__ = ["hits"] + + +def hits(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True): + G = to_graph(G, weight="weight", dtype=float) + if len(G) == 0: + return {}, {} + x = G.dict_to_vector(nstart, dtype=float, name="nstart") + try: + h, a = algorithms.hits(G, max_iter=max_iter, tol=tol, nstart=x, normalized=normalized) + except algorithms.exceptions.ConvergenceFailure as e: + if max_iter < 1: + raise ValueError(*e.args) from e + raise ArpackNoConvergence(*e.args, (), ()) from e + # TODO: it would be nice if networkx raised their own exception, such as: + # raise nx.PowerIterationFailedConvergence(*e.args) from e + return G.vector_to_nodemap(h, fill_value=0), G.vector_to_nodemap(a, fill_value=0) diff --git a/graphblas_algorithms/nxapi/link_analysis/pagerank_alg.py b/graphblas_algorithms/nxapi/link_analysis/pagerank_alg.py new file mode 100644 index 0000000..22e977e --- /dev/null +++ b/graphblas_algorithms/nxapi/link_analysis/pagerank_alg.py @@ -0,0 +1,63 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph + +from ..exception import PowerIterationFailedConvergence + +_all = ["pagerank", "google_matrix"] + + +def pagerank( + G, + alpha=0.85, + personalization=None, + max_iter=100, + tol=1e-06, + nstart=None, + weight="weight", + dangling=None, +): + G = to_graph(G, weight=weight, dtype=float) + N = len(G) + if N == 0: + return {} + # We'll normalize initial, personalization, and dangling vectors later + x = G.dict_to_vector(nstart, dtype=float, name="nstart") + p = G.dict_to_vector(personalization, dtype=float, name="personalization") + row_degrees = G.get_property("plus_rowwise+") # XXX: What about self-edges? + if dangling is not None and row_degrees.nvals < N: + dangling_weights = G.dict_to_vector(dangling, dtype=float, name="dangling") + else: + dangling_weights = None + try: + result = algorithms.pagerank( + G, + alpha=alpha, + personalization=p, + max_iter=max_iter, + tol=tol, + nstart=x, + dangling=dangling_weights, + row_degrees=row_degrees, + ) + except algorithms.exceptions.ConvergenceFailure as e: + raise PowerIterationFailedConvergence(*e.args) from e + else: + return G.vector_to_nodemap(result, fill_value=0.0) + + +def google_matrix( + G, alpha=0.85, personalization=None, nodelist=None, weight="weight", dangling=None +): + G = to_graph(G, weight=weight, dtype=float) + p = G.dict_to_vector(personalization, dtype=float, name="personalization") + if dangling is not None and G.get_property("row_degrees+").nvals < len(G): + dangling_weights = G.dict_to_vector(dangling, dtype=float, name="dangling") + else: + dangling_weights = None + return algorithms.google_matrix( + G, + alpha=alpha, + personalization=p, + nodelist=nodelist, + dangling=dangling_weights, + ) diff --git a/graphblas_algorithms/nxapi/lowest_common_ancestors.py b/graphblas_algorithms/nxapi/lowest_common_ancestors.py new file mode 100644 index 0000000..f94e8c2 --- /dev/null +++ b/graphblas_algorithms/nxapi/lowest_common_ancestors.py @@ -0,0 +1,11 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_directed_graph +from graphblas_algorithms.utils import not_implemented_for + +__all__ = ["lowest_common_ancestor"] + + +@not_implemented_for("undirected") +def lowest_common_ancestor(G, node1, node2, default=None): + G = to_directed_graph(G) + return algorithms.lowest_common_ancestor(G, node1, node2, default=default) diff --git a/graphblas_algorithms/nxapi/operators/__init__.py b/graphblas_algorithms/nxapi/operators/__init__.py new file mode 100644 index 0000000..c2742b9 --- /dev/null +++ b/graphblas_algorithms/nxapi/operators/__init__.py @@ -0,0 +1,2 @@ +from .binary import * +from .unary import * diff --git a/graphblas_algorithms/nxapi/operators/binary.py b/graphblas_algorithms/nxapi/operators/binary.py new file mode 100644 index 0000000..82e8f08 --- /dev/null +++ b/graphblas_algorithms/nxapi/operators/binary.py @@ -0,0 +1,77 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph + +from ..exception import NetworkXError + +__all__ = [ + "compose", + "difference", + "disjoint_union", + "full_join", + "intersection", + "symmetric_difference", + "union", +] + + +def union(G, H, rename=()): + G = to_graph(G) + H = to_graph(H) + try: + return algorithms.union(G, H, rename=rename) + except algorithms.exceptions.GraphBlasAlgorithmException as e: + raise NetworkXError(*e.args) from e + + +def disjoint_union(G, H): + G = to_graph(G) + H = to_graph(H) + try: + return algorithms.disjoint_union(G, H) + except algorithms.exceptions.GraphBlasAlgorithmException as e: + raise NetworkXError(*e.args) from e + + +def intersection(G, H): + G = to_graph(G) + H = to_graph(H) + try: + return algorithms.intersection(G, H) + except algorithms.exceptions.GraphBlasAlgorithmException as e: + raise NetworkXError(*e.args) from e + + +def difference(G, H): + G = to_graph(G) + H = to_graph(H) + try: + return algorithms.difference(G, H) + except algorithms.exceptions.GraphBlasAlgorithmException as e: + raise NetworkXError(*e.args) from e + + +def symmetric_difference(G, H): + G = to_graph(G) + H = to_graph(H) + try: + return algorithms.symmetric_difference(G, H) + except algorithms.exceptions.GraphBlasAlgorithmException as e: + raise NetworkXError(*e.args) from e + + +def compose(G, H): + G = to_graph(G) + H = to_graph(H) + try: + return algorithms.compose(G, H) + except algorithms.exceptions.GraphBlasAlgorithmException as e: + raise NetworkXError(*e.args) from e + + +def full_join(G, H, rename=()): + G = to_graph(G) + H = to_graph(H) + try: + return algorithms.full_join(G, H, rename=rename) + except algorithms.exceptions.GraphBlasAlgorithmException as e: + raise NetworkXError(*e.args) from e diff --git a/graphblas_algorithms/nxapi/operators/unary.py b/graphblas_algorithms/nxapi/operators/unary.py new file mode 100644 index 0000000..6633b3b --- /dev/null +++ b/graphblas_algorithms/nxapi/operators/unary.py @@ -0,0 +1,22 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph + +from ..exception import NetworkXError + +__all__ = [ + "complement", + "reverse", +] + + +def complement(G): + G = to_graph(G) + return algorithms.complement(G) + + +def reverse(G, copy=True): + G = to_graph(G) + try: + return algorithms.reverse(G, copy=copy) + except algorithms.exceptions.GraphBlasAlgorithmException as e: + raise NetworkXError(*e.args) from e diff --git a/graphblas_algorithms/nxapi/reciprocity.py b/graphblas_algorithms/nxapi/reciprocity.py new file mode 100644 index 0000000..54192e5 --- /dev/null +++ b/graphblas_algorithms/nxapi/reciprocity.py @@ -0,0 +1,33 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_directed_graph +from graphblas_algorithms.utils import not_implemented_for + +from .exception import NetworkXError + +__all__ = ["reciprocity", "overall_reciprocity"] + + +@not_implemented_for("undirected", "multigraph") +def reciprocity(G, nodes=None): + if nodes is None: + return overall_reciprocity(G) + G = to_directed_graph(G, dtype=bool) + if nodes in G: + mask = G.list_to_mask([nodes]) + result = algorithms.reciprocity(G, mask=mask) + rv = result.get(G._key_to_id[nodes]) + if rv is None: + raise NetworkXError("Not defined for isolated nodes.") + return rv + mask = G.list_to_mask(nodes) + result = algorithms.reciprocity(G, mask=mask) + return G.vector_to_nodemap(result, mask=mask) + + +@not_implemented_for("undirected", "multigraph") +def overall_reciprocity(G): + G = to_directed_graph(G, dtype=bool) + try: + return algorithms.overall_reciprocity(G) + except algorithms.exceptions.EmptyGraphError as e: + raise NetworkXError("Not defined for empty graphs") from e diff --git a/graphblas_algorithms/nxapi/regular.py b/graphblas_algorithms/nxapi/regular.py new file mode 100644 index 0000000..a1fb650 --- /dev/null +++ b/graphblas_algorithms/nxapi/regular.py @@ -0,0 +1,17 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph +from graphblas_algorithms.classes.graph import to_undirected_graph +from graphblas_algorithms.utils import not_implemented_for + +__all__ = ["is_regular", "is_k_regular"] + + +def is_regular(G): + G = to_graph(G) + return algorithms.is_regular(G) + + +@not_implemented_for("directed") +def is_k_regular(G, k): + G = to_undirected_graph(G) + return algorithms.is_k_regular(G, k) diff --git a/graphblas_algorithms/nxapi/shortest_paths/__init__.py b/graphblas_algorithms/nxapi/shortest_paths/__init__.py new file mode 100644 index 0000000..781db9d --- /dev/null +++ b/graphblas_algorithms/nxapi/shortest_paths/__init__.py @@ -0,0 +1,4 @@ +from .dense import * +from .generic import * +from .unweighted import * +from .weighted import * diff --git a/graphblas_algorithms/nxapi/shortest_paths/dense.py b/graphblas_algorithms/nxapi/shortest_paths/dense.py new file mode 100644 index 0000000..82c2eed --- /dev/null +++ b/graphblas_algorithms/nxapi/shortest_paths/dense.py @@ -0,0 +1,37 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph + +from ..exception import NetworkXError + +__all__ = ["floyd_warshall", "floyd_warshall_numpy", "floyd_warshall_predecessor_and_distance"] + + +def floyd_warshall(G, weight="weight"): + G = to_graph(G, weight=weight) + D = algorithms.floyd_warshall(G, is_weighted=weight is not None) + return G.matrix_to_nodenodemap(D) + + +def floyd_warshall_predecessor_and_distance(G, weight="weight"): + G = to_graph(G, weight=weight) + P, D = algorithms.floyd_warshall_predecessor_and_distance(G, is_weighted=weight is not None) + return ( + G.matrix_to_nodenodemap(P, values_are_keys=True), + G.matrix_to_nodenodemap(D, fill_value=float("inf")), + ) + + +def floyd_warshall_numpy(G, nodelist=None, weight="weight"): + G = to_graph(G, weight=weight) + if nodelist is not None: + if not (len(nodelist) == len(G) == len(set(nodelist))): + raise NetworkXError("nodelist must contain every node in G with no repeats.") + permutation = G.list_to_ids(nodelist) + else: + permutation = None + try: + return algorithms.floyd_warshall_predecessor_and_distance( + G, is_weighted=weight is not None, compute_predecessors=False, permutation=permutation + )[1] + except algorithms.exceptions.GraphBlasAlgorithmException as e: + raise NetworkXError(*e.args) from e diff --git a/graphblas_algorithms/nxapi/shortest_paths/generic.py b/graphblas_algorithms/nxapi/shortest_paths/generic.py new file mode 100644 index 0000000..b8df8cb --- /dev/null +++ b/graphblas_algorithms/nxapi/shortest_paths/generic.py @@ -0,0 +1,14 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph + +from ..exception import NodeNotFound + +__all__ = ["has_path"] + + +def has_path(G, source, target): + G = to_graph(G) + try: + return algorithms.has_path(G, source, target) + except KeyError as e: + raise NodeNotFound(*e.args) from e diff --git a/graphblas_algorithms/nxapi/shortest_paths/unweighted.py b/graphblas_algorithms/nxapi/shortest_paths/unweighted.py new file mode 100644 index 0000000..f1700f3 --- /dev/null +++ b/graphblas_algorithms/nxapi/shortest_paths/unweighted.py @@ -0,0 +1,45 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph + +from .._utils import normalize_chunksize, partition +from ..exception import NodeNotFound + +__all__ = [ + "single_source_shortest_path_length", + "single_target_shortest_path_length", + "all_pairs_shortest_path_length", +] + + +def single_source_shortest_path_length(G, source, cutoff=None): + G = to_graph(G) + if source not in G: + raise NodeNotFound(f"Source {source} is not in G") + v = algorithms.single_source_shortest_path_length(G, source, cutoff) + return G.vector_to_nodemap(v) + + +def single_target_shortest_path_length(G, target, cutoff=None): + G = to_graph(G) + if target not in G: + raise NodeNotFound(f"Target {target} is not in G") + v = algorithms.single_target_shortest_path_length(G, target, cutoff) + return G.vector_to_nodemap(v) + + +def all_pairs_shortest_path_length(G, cutoff=None, *, chunksize="10 MiB"): + G = to_graph(G) + chunksize = normalize_chunksize(chunksize, len(G) * G._A.dtype.np_type.itemsize, len(G)) + if chunksize is None: + D = algorithms.all_pairs_shortest_path_length(G, cutoff) + yield from G.matrix_to_nodenodemap(D).items() + elif chunksize < 2: + for source in G: + d = algorithms.single_source_shortest_path_length(G, source, cutoff) + yield (source, G.vector_to_nodemap(d)) + else: + for cur_nodes in partition(chunksize, list(G)): + D = algorithms.all_pairs_shortest_path_length(G, cutoff, nodes=cur_nodes) + for i, source in enumerate(cur_nodes): + d = D[i, :].new(name=f"all_pairs_shortest_path_length_{i}") + yield (source, G.vector_to_nodemap(d)) diff --git a/graphblas_algorithms/nxapi/shortest_paths/weighted.py b/graphblas_algorithms/nxapi/shortest_paths/weighted.py new file mode 100644 index 0000000..b08dd85 --- /dev/null +++ b/graphblas_algorithms/nxapi/shortest_paths/weighted.py @@ -0,0 +1,83 @@ +from graphblas_algorithms import algorithms, exceptions +from graphblas_algorithms.classes.digraph import to_graph + +from .._utils import normalize_chunksize, partition +from ..exception import NetworkXNoPath, NetworkXUnbounded, NodeNotFound + +__all__ = [ + "all_pairs_bellman_ford_path_length", + "bellman_ford_path", + "bellman_ford_path_length", + "negative_edge_cycle", + "single_source_bellman_ford_path_length", +] + + +def all_pairs_bellman_ford_path_length(G, weight="weight", *, chunksize="10 MiB"): + # Larger chunksize offers more parallelism, but uses more memory. + # Chunksize indicates for how many source nodes to compute at one time. + # The default is to choose the number of rows so the result, if dense, + # will be about 10MB. + G = to_graph(G, weight=weight) + chunksize = normalize_chunksize(chunksize, len(G) * G._A.dtype.np_type.itemsize, len(G)) + if chunksize is None: + # All at once + try: + D = algorithms.bellman_ford_path_lengths(G) + except algorithms.exceptions.Unbounded as e: + raise NetworkXUnbounded(*e.args) from e + yield from G.matrix_to_nodenodemap(D).items() + elif chunksize < 2: + for source in G: + try: + d = algorithms.single_source_bellman_ford_path_length(G, source) + except algorithms.exceptions.Unbounded as e: + raise NetworkXUnbounded(*e.args) from e + yield (source, G.vector_to_nodemap(d)) + else: + for cur_nodes in partition(chunksize, list(G)): + try: + D = algorithms.bellman_ford_path_lengths(G, cur_nodes) + except algorithms.exceptions.Unbounded as e: + raise NetworkXUnbounded(*e.args) from e + for i, source in enumerate(cur_nodes): + d = D[i, :].new(name=f"all_pairs_bellman_ford_path_length_{i}") + yield (source, G.vector_to_nodemap(d)) + + +def single_source_bellman_ford_path_length(G, source, weight="weight"): + # TODO: what if weight is a function? + G = to_graph(G, weight=weight) + try: + d = algorithms.single_source_bellman_ford_path_length(G, source) + except algorithms.exceptions.Unbounded as e: + raise NetworkXUnbounded(*e.args) from e + except KeyError as e: + raise NodeNotFound(*e.args) from e + return G.vector_to_nodemap(d) + + +def bellman_ford_path(G, source, target, weight="weight"): + # TODO: what if weight is a function? + G = to_graph(G, weight=weight) + try: + return algorithms.bellman_ford_path(G, source, target) + except KeyError as e: + raise NodeNotFound(*e.args) from e + + +def bellman_ford_path_length(G, source, target, weight="weight"): + G = to_graph(G, weight=weight) + try: + return algorithms.bellman_ford_path_length(G, source, target) + except KeyError as e: + raise NodeNotFound(*e.args) from e + except exceptions.NoPath as e: + raise NetworkXNoPath(*e.args) from e + + +def negative_edge_cycle(G, weight="weight", heuristic=True): + # TODO: what if weight is a function? + # TODO: use a heuristic to try to stop early + G = to_graph(G, weight=weight) + return algorithms.negative_edge_cycle(G) diff --git a/graphblas_algorithms/nxapi/simple_paths.py b/graphblas_algorithms/nxapi/simple_paths.py new file mode 100644 index 0000000..fd9a60d --- /dev/null +++ b/graphblas_algorithms/nxapi/simple_paths.py @@ -0,0 +1,9 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph + +__all__ = ["is_simple_path"] + + +def is_simple_path(G, nodes): + G = to_graph(G) + return algorithms.is_simple_path(G, nodes) diff --git a/graphblas_algorithms/nxapi/smetric.py b/graphblas_algorithms/nxapi/smetric.py new file mode 100644 index 0000000..a1f60ab --- /dev/null +++ b/graphblas_algorithms/nxapi/smetric.py @@ -0,0 +1,22 @@ +import warnings + +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph + +__all__ = ["s_metric"] + + +def s_metric(G, **kwargs): + if kwargs: + if "normalized" in kwargs: + warnings.warn( + "\n\nThe `normalized` keyword is deprecated and will be removed\n" + "in the future. To silence this warning, remove `normalized`\n" + "when calling `s_metric`.\n\nThe value of `normalized` is ignored.", + DeprecationWarning, + stacklevel=2, + ) + else: + raise TypeError(f"s_metric got an unexpected keyword argument '{kwargs.popitem()[0]}'") + G = to_graph(G) + return algorithms.s_metric(G) diff --git a/graphblas_algorithms/nxapi/structuralholes.py b/graphblas_algorithms/nxapi/structuralholes.py new file mode 100644 index 0000000..20227cd --- /dev/null +++ b/graphblas_algorithms/nxapi/structuralholes.py @@ -0,0 +1,9 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph + +__all__ = [] + + +def mutual_weight(G, u, v, weight=None): + G = to_graph(G, weight=weight) + return algorithms.mutual_weight(G, u, v) diff --git a/graphblas_algorithms/nxapi/tests/__init__.py b/graphblas_algorithms/nxapi/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/graphblas_algorithms/nxapi/tests/test_cluster.py b/graphblas_algorithms/nxapi/tests/test_cluster.py new file mode 100644 index 0000000..21884f7 --- /dev/null +++ b/graphblas_algorithms/nxapi/tests/test_cluster.py @@ -0,0 +1,34 @@ +import networkx as nx + +from graphblas_algorithms import DiGraph, nxapi + + +def test_directed(): + # XXX" is transitivity supposed to work on directed graphs like this? + G = nx.complete_graph(5, create_using=nx.DiGraph()) + G.remove_edge(1, 2) + G.remove_edge(2, 3) + G.add_node(5) + G2 = DiGraph.from_networkx(G) + expected = nx.transitivity(G) + result = nxapi.transitivity(G2) + assert expected == result + # clustering + expected = nx.clustering(G) + result = nxapi.clustering(G2) + assert result == expected + expected = nx.clustering(G, [0, 1, 2]) + result = nxapi.clustering(G2, [0, 1, 2]) + assert result == expected + for i in range(6): + assert nx.clustering(G, i) == nxapi.clustering(G2, i) + # average_clustering + expected = nx.average_clustering(G) + result = nxapi.average_clustering(G2) + assert result == expected + expected = nx.average_clustering(G, [0, 1, 2]) + result = nxapi.average_clustering(G2, [0, 1, 2]) + assert result == expected + expected = nx.average_clustering(G, count_zeros=False) + result = nxapi.average_clustering(G2, count_zeros=False) + assert result == expected diff --git a/graphblas_algorithms/nxapi/tests/test_utils.py b/graphblas_algorithms/nxapi/tests/test_utils.py new file mode 100644 index 0000000..0fda5d9 --- /dev/null +++ b/graphblas_algorithms/nxapi/tests/test_utils.py @@ -0,0 +1,33 @@ +import pytest + +from graphblas_algorithms.nxapi._utils import normalize_chunksize + + +def test_normalize_chunksize(): + assert normalize_chunksize(None) is None + assert normalize_chunksize("all") is None + assert normalize_chunksize("") is None + assert normalize_chunksize(-1) is None + assert normalize_chunksize("-1") is None + assert normalize_chunksize(10, N=10) is None + assert normalize_chunksize("1 MB", N=100) is None + assert normalize_chunksize("1 chunk") is None + assert normalize_chunksize("2 chunks", N=20) == 10 + assert normalize_chunksize(10) == 10 + assert normalize_chunksize(10.0) == 10 + assert normalize_chunksize("10") == 10 + assert normalize_chunksize("10.0") == 10 + assert normalize_chunksize("1_0 B") == 10 + assert normalize_chunksize("1e1") == 10 + assert normalize_chunksize("1e-2 kb") == 10 + assert normalize_chunksize("Mb") == 1000**2 + assert normalize_chunksize(" mb") == 1000**2 + assert normalize_chunksize("gib") == 1024**3 + with pytest.raises(TypeError, match="chunksize must be"): + normalize_chunksize(object()) + with pytest.raises(ValueError, match="as a bytes"): + normalize_chunksize("10 badbytes") + with pytest.raises(ValueError, match="as a number"): + normalize_chunksize("1bad0 TB") + with pytest.raises(TypeError, match="N argument is required"): + normalize_chunksize("10 chunks") diff --git a/graphblas_algorithms/nxapi/tournament.py b/graphblas_algorithms/nxapi/tournament.py new file mode 100644 index 0000000..6c1bb1f --- /dev/null +++ b/graphblas_algorithms/nxapi/tournament.py @@ -0,0 +1,30 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_directed_graph +from graphblas_algorithms.utils import not_implemented_for + +from .simple_paths import is_simple_path as is_path # noqa: F401 + +__all__ = ["is_tournament", "score_sequence", "tournament_matrix"] + + +@not_implemented_for("undirected") +@not_implemented_for("multigraph") +def is_tournament(G): + G = to_directed_graph(G) + return algorithms.is_tournament(G) + + +@not_implemented_for("undirected") +@not_implemented_for("multigraph") +def score_sequence(G): + G = to_directed_graph(G) + # TODO: can we return a different, more native object? + return algorithms.score_sequence(G).tolist() + + +@not_implemented_for("undirected") +@not_implemented_for("multigraph") +def tournament_matrix(G): + G = to_directed_graph(G) + # TODO: can we return a different, more native object? + return algorithms.tournament_matrix(G) diff --git a/graphblas_algorithms/nxapi/traversal/__init__.py b/graphblas_algorithms/nxapi/traversal/__init__.py new file mode 100644 index 0000000..7811162 --- /dev/null +++ b/graphblas_algorithms/nxapi/traversal/__init__.py @@ -0,0 +1 @@ +from .breadth_first_search import * diff --git a/graphblas_algorithms/nxapi/traversal/breadth_first_search.py b/graphblas_algorithms/nxapi/traversal/breadth_first_search.py new file mode 100644 index 0000000..0b2c6a7 --- /dev/null +++ b/graphblas_algorithms/nxapi/traversal/breadth_first_search.py @@ -0,0 +1,27 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph + +from ..exception import NetworkXError + +__all__ = [ + "bfs_layers", + "descendants_at_distance", +] + + +def bfs_layers(G, sources): + G = to_graph(G) + try: + for layer in algorithms.bfs_layers(G, sources): + yield G.vector_to_list(layer) + except KeyError as e: + raise NetworkXError(*e.args) from e + + +def descendants_at_distance(G, source, distance): + G = to_graph(G) + try: + v = algorithms.descendants_at_distance(G, source, distance) + except KeyError as e: + raise NetworkXError(*e.args) from e + return G.vector_to_nodeset(v) diff --git a/graphblas_algorithms/nxapi/triads.py b/graphblas_algorithms/nxapi/triads.py new file mode 100644 index 0000000..ca13685 --- /dev/null +++ b/graphblas_algorithms/nxapi/triads.py @@ -0,0 +1,9 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph + +__all__ = ["is_triad"] + + +def is_triad(G): + G = to_graph(G) + return algorithms.is_triad(G) diff --git a/graphblas_algorithms/tests/test_cluster.py b/graphblas_algorithms/tests/test_cluster.py deleted file mode 100644 index af5ec59..0000000 --- a/graphblas_algorithms/tests/test_cluster.py +++ /dev/null @@ -1,92 +0,0 @@ -import inspect - -import graphblas as gb -import networkx as nx - -import graphblas_algorithms as ga -from graphblas_algorithms import average_clustering, clustering, transitivity, triangles - -nx_triangles = nx.triangles -nx.triangles = triangles -nx.algorithms.triangles = triangles -nx.algorithms.cluster.triangles = triangles - -nx_transitivity = nx.transitivity -nx.transitivity = transitivity -nx.algorithms.transitivity = transitivity -nx.algorithms.cluster.transitivity = transitivity - -nx_clustering = nx.clustering -nx.clustering = clustering -nx.algorithms.clustering = clustering -nx.algorithms.cluster.clustering = clustering - -nx_average_clustering = nx.average_clustering -nx.average_clustering = average_clustering -nx.algorithms.average_clustering = average_clustering -nx.algorithms.cluster.average_clustering = average_clustering - - -def test_signatures(): - nx_sig = inspect.signature(nx_triangles) - sig = inspect.signature(triangles) - assert nx_sig == sig - nx_sig = inspect.signature(nx_transitivity) - sig = inspect.signature(transitivity) - assert nx_sig == sig - nx_sig = inspect.signature(nx_clustering) - sig = inspect.signature(clustering) - assert nx_sig == sig - - -def test_triangles_full(): - # Including self-edges! - G = gb.Matrix(bool, 5, 5) - G[:, :] = True - G2 = gb.select.offdiag(G).new() - L = gb.select.tril(G, -1).new(name="L") - U = gb.select.triu(G, 1).new(name="U") - result = ga.cluster.triangles_core(G, L=L, U=U) - expected = gb.Vector(int, 5) - expected[:] = 6 - assert result.isequal(expected) - result = ga.cluster.triangles_core(G2, L=L, U=U) - assert result.isequal(expected) - mask = gb.Vector(bool, 5) - mask[0] = True - mask[3] = True - result = ga.cluster.triangles_core(G, mask=mask.S) - expected = gb.Vector(int, 5) - expected[0] = 6 - expected[3] = 6 - assert result.isequal(expected) - result = ga.cluster.triangles_core(G2, mask=mask.S) - assert result.isequal(expected) - assert ga.cluster.single_triangle_core(G, 1) == 6 - assert ga.cluster.single_triangle_core(G, 0, L=L) == 6 - assert ga.cluster.single_triangle_core(G2, 0, has_self_edges=False) == 6 - assert ga.cluster.total_triangles_core(G2) == 10 - assert ga.cluster.total_triangles_core(G) == 10 - assert ga.cluster.total_triangles_core(G, L=L, U=U) == 10 - assert ga.cluster.transitivity_core(G) == 1.0 - assert ga.cluster.transitivity_core(G2) == 1.0 - result = ga.cluster.clustering_core(G) - expected = gb.Vector(float, 5) - expected[:] = 1 - assert result.isequal(expected) - result = ga.cluster.clustering_core(G2) - assert result.isequal(expected) - assert ga.cluster.single_clustering_core(G, 0) == 1 - assert ga.cluster.single_clustering_core(G2, 0) == 1 - expected(mask.S, replace=True) << 1 - result = ga.cluster.clustering_core(G, mask=mask.S) - assert result.isequal(expected) - result = ga.cluster.clustering_core(G2, mask=mask.S) - assert result.isequal(expected) - assert ga.cluster.average_clustering_core(G) == 1 - assert ga.cluster.average_clustering_core(G2) == 1 - assert ga.cluster.average_clustering_core(G, mask=mask.S) == 1 - assert ga.cluster.average_clustering_core(G2, mask=mask.S) == 1 - - -from networkx.algorithms.tests.test_cluster import * # noqa isort:skip diff --git a/graphblas_algorithms/tests/test_core.py b/graphblas_algorithms/tests/test_core.py new file mode 100644 index 0000000..68dbeb7 --- /dev/null +++ b/graphblas_algorithms/tests/test_core.py @@ -0,0 +1,39 @@ +import pathlib + +import pytest + +import graphblas_algorithms as ga + +try: + import setuptools +except ImportError: # pragma: no cover (import) + setuptools = None + +try: + import tomli +except ImportError: # pragma: no cover (import) + tomli = None + + +def test_version(): + assert ga.__version__ > "2022.11.0" + + +@pytest.mark.skipif("not setuptools or not tomli or not ga.__file__") +def test_packages(): + """Ensure all packages are declared in pyproject.toml.""" + # Currently assume s`pyproject.toml` is at the same level as `graphblas_algorithms` folder. + # This probably isn't always True, and we can probably do a better job of finding it. + path = pathlib.Path(ga.__file__).parent + pkgs = [f"graphblas_algorithms.{x}" for x in setuptools.find_packages(path)] + pkgs.append("graphblas_algorithms") + pkgs.append("_nx_graphblas") + pkgs.sort() + pyproject = path.parent / "pyproject.toml" + if not pyproject.exists(): + pytest.skip("Did not find pyproject.toml") + with pyproject.open("rb") as f: + pkgs2 = sorted(tomli.load(f)["tool"]["setuptools"]["packages"]) + assert ( + pkgs == pkgs2 + ), "If there are extra items on the left, add them to pyproject.toml:tool.setuptools.packages" diff --git a/graphblas_algorithms/tests/test_match_nx.py b/graphblas_algorithms/tests/test_match_nx.py new file mode 100644 index 0000000..1924ff7 --- /dev/null +++ b/graphblas_algorithms/tests/test_match_nx.py @@ -0,0 +1,237 @@ +"""Test that `graphblas.nxapi` structure matches that of networkx. + +This currently checks the locations and names of all networkx-dispatchable functions +that are implemented by `graphblas_algorithms`. It ignores names that begin with `_`. + +The `test_dispatched_funcs_in_nxap` test below will say what to add and delete under `nxapi`. + +We should consider removing any test here that becomes too much of a nuisance. +For now, though, let's try to match and stay up-to-date with NetworkX! + +""" +import sys +from collections import namedtuple +from pathlib import Path + +import pytest + +try: + import networkx as nx # noqa: F401 +except ImportError: + pytest.skip( + "Matching networkx namespace requires networkx to be installed", allow_module_level=True + ) +else: + try: + from networkx.utils import backends + + IS_NX_30_OR_31 = False + except ImportError: # pragma: no cover (import) + # This is the location in nx 3.1 + from networkx.classes import backends # noqa: F401 + + IS_NX_30_OR_31 = True + + +def isdispatched(func): + """Can this NetworkX function dispatch to other backends?""" + if IS_NX_30_OR_31: + return ( + callable(func) + and hasattr(func, "dispatchname") + and func.__module__.startswith("networkx") + ) + return ( + callable(func) + and hasattr(func, "preserve_edge_attrs") + and func.__module__.startswith("networkx") + ) + + +def dispatchname(func): + """The dispatched name of the dispatchable NetworkX function""" + # Haha, there should be a better way to get this + if not isdispatched(func): + raise ValueError(f"Function is not dispatched in NetworkX: {func.__name__}") + if IS_NX_30_OR_31: + return func.dispatchname + return func.name + + +def fullname(func): + return f"{func.__module__}.{func.__name__}" + + +NameInfo = namedtuple("NameInfo", ["dispatchname", "fullname", "curpath"]) + + +@pytest.fixture(scope="module") +def nx_info(): + rv = {} # {modulepath: {dispatchname: NameInfo}} + for modname, module in sys.modules.items(): + cur = {} + if not modname.startswith("networkx.") and modname != "networkx" or "tests" in modname: + continue + for key, val in vars(module).items(): + if not key.startswith("_") and isdispatched(val): + dname = dispatchname(val) + cur[dname] = NameInfo(dname, fullname(val), f"{modname}.{key}") + if cur: + rv[modname] = cur + return rv + + +@pytest.fixture(scope="module") +def gb_info(): + rv = {} # {modulepath: {dispatchname: NameInfo}} + from graphblas_algorithms import nxapi # noqa: F401 + from graphblas_algorithms.interface import Dispatcher + + ga_map = { + fullname(val): key + for key, val in vars(Dispatcher).items() + if callable(val) and fullname(val).startswith("graphblas_algorithms.nxapi.") + } + for modname, module in sys.modules.items(): + cur = {} + if not modname.startswith("graphblas_algorithms.nxapi") or "tests" in modname: + continue + for key, val in vars(module).items(): + try: + fname = fullname(val) + except Exception: + continue + if key.startswith("_") or fname not in ga_map: + continue + dname = ga_map[fname] + cur[dname] = NameInfo(dname, fname, f"{modname}.{key}") + if cur: + rv[modname] = cur + return rv + + +@pytest.fixture(scope="module") +def nx_names_to_info(nx_info): + rv = {} # {dispatchname: {NameInfo}} + for names in nx_info.values(): + for name, info in names.items(): + if name not in rv: + rv[name] = set() + rv[name].add(info) + return rv + + +@pytest.fixture(scope="module") +def gb_names_to_info(gb_info): + rv = {} # {dispatchname: {NameInfo}} + for names in gb_info.values(): + for name, info in names.items(): + if name not in rv: + rv[name] = set() + rv[name].add(info) + return rv + + +@pytest.mark.checkstructure +def test_nonempty(nx_info, gb_info, nx_names_to_info, gb_names_to_info): + assert len(nx_info) > 15 + assert len(gb_info) > 15 + assert len(nx_names_to_info) > 30 + assert len(gb_names_to_info) > 30 + + +def nx_to_gb_info(info): + gb = "graphblas_algorithms.nxapi" + return NameInfo( + info[0], + info[1].replace("networkx.algorithms", gb).replace("networkx", gb), + info[2].replace("networkx.algorithms", gb).replace("networkx", gb), + ) + + +def module_exists(info): + return info[2].rsplit(".", 1)[0] in sys.modules + + +@pytest.mark.checkstructure +def test_dispatched_funcs_in_nxapi(nx_names_to_info, gb_names_to_info): + """Are graphblas_algorithms functions in the correct locations in nxapi?""" + failing = False + for name in nx_names_to_info.keys() & gb_names_to_info.keys(): + nx_paths = { + gbinfo + for info in nx_names_to_info[name] + if module_exists(gbinfo := nx_to_gb_info(info)) + } + gb_paths = gb_names_to_info[name] + if nx_paths != gb_paths: # pragma: no cover + failing = True + if missing := (nx_paths - gb_paths): + from_ = ":".join(next(iter(missing))[1].rsplit(".", 1)) + print(f"Add `{name}` from `{from_}` here:") + for _, _, path in sorted(missing): + print(" ", ":".join(path.rsplit(".", 1))) + if extra := (gb_paths - nx_paths): + from_ = ":".join(next(iter(extra))[1].rsplit(".", 1)) + print(f"Remove `{name}` from `{from_}` here:") + for _, _, path in sorted(extra): + print(" ", ":".join(path.rsplit(".", 1))) + if failing: # pragma: no cover + raise AssertionError + + +def get_fullname(info): + fullname = info.fullname + if not fullname.endswith(f".{info.dispatchname}"): + fullname += f" ({info.dispatchname})" + return fullname + + +def test_print_dispatched_not_implemented(nx_names_to_info, gb_names_to_info): + """It may be informative to see the results from this to identify functions to implement. + + $ pytest -s -k test_print_dispatched_not_implemented + """ + not_implemented = nx_names_to_info.keys() - gb_names_to_info.keys() + fullnames = {get_fullname(next(iter(nx_names_to_info[name]))) for name in not_implemented} + print() + print("=================================================================================") + print("Functions dispatched in NetworkX that ARE NOT implemented in graphblas-algorithms") + print("---------------------------------------------------------------------------------") + for i, name in enumerate(sorted(fullnames)): + print(i, name) + print("=================================================================================") + + +def test_print_dispatched_implemented(nx_names_to_info, gb_names_to_info): + """It may be informative to see the results from this to identify implemented functions. + + $ pytest -s -k test_print_dispatched_implemented + """ + implemented = nx_names_to_info.keys() & gb_names_to_info.keys() + fullnames = {get_fullname(next(iter(nx_names_to_info[name]))) for name in implemented} + print() + print("=============================================================================") + print("Functions dispatched in NetworkX that ARE implemented in graphblas-algorithms") + print("-----------------------------------------------------------------------------") + for i, name in enumerate(sorted(fullnames)): + print(i, name) + print("=============================================================================") + + +def test_algorithms_in_readme(nx_names_to_info, gb_names_to_info): + """Ensure all algorithms are mentioned in README.md.""" + implemented = nx_names_to_info.keys() & gb_names_to_info.keys() + path = Path(__file__).parent.parent.parent / "README.md" + if not path.exists(): + return + with path.open("r") as f: + text = f.read() + missing = set() + for name in sorted(implemented): + if name not in text: + missing.add(name) + if missing: + msg = f"Algorithms missing in README.md: {', '.join(sorted(missing))}" + print(msg) + raise AssertionError(msg) diff --git a/graphblas_algorithms/tests/test_pagerank.py b/graphblas_algorithms/tests/test_pagerank.py deleted file mode 100644 index cdbda24..0000000 --- a/graphblas_algorithms/tests/test_pagerank.py +++ /dev/null @@ -1,21 +0,0 @@ -import inspect - -import networkx as nx - -from graphblas_algorithms import pagerank - -nx_pagerank = nx.pagerank -nx_pagerank_scipy = nx.pagerank_scipy - -nx.pagerank = pagerank -nx.pagerank_scipy = pagerank -nx.algorithms.link_analysis.pagerank_alg.pagerank_scipy = pagerank - - -def test_signatures(): - nx_sig = inspect.signature(nx_pagerank) - sig = inspect.signature(pagerank) - assert nx_sig == sig - - -from networkx.algorithms.link_analysis.tests.test_pagerank import * # noqa isort:skip diff --git a/graphblas_algorithms/utils/__init__.py b/graphblas_algorithms/utils/__init__.py new file mode 100644 index 0000000..48bad83 --- /dev/null +++ b/graphblas_algorithms/utils/__init__.py @@ -0,0 +1 @@ +from .decorators import * diff --git a/graphblas_algorithms/utils/decorators.py b/graphblas_algorithms/utils/decorators.py new file mode 100644 index 0000000..054a084 --- /dev/null +++ b/graphblas_algorithms/utils/decorators.py @@ -0,0 +1,20 @@ +from graphblas import Matrix + +__all__ = ["not_implemented_for"] + + +def not_implemented_for(*graph_types): + import networkx.utils.decorators + + rv = networkx.utils.decorators.not_implemented_for(*graph_types) + func = rv._func + + def inner(g): + if not isinstance(g, Matrix): + return func(g) + # Let Matrix objects pass through and check later. + # We could check now and convert to appropriate graph type. + return g + + rv._func = inner + return rv diff --git a/pyproject.toml b/pyproject.toml index 90ccf9c..b1625c6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,271 @@ [build-system] -requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" +requires = [ + "setuptools >=64", + "setuptools-git-versioning", +] + +[project] +name = "graphblas-algorithms" +dynamic = ["version"] +description = "Graph algorithms written in GraphBLAS and backend for NetworkX" +readme = "README.md" +requires-python = ">=3.10" +license = {file = "LICENSE"} +authors = [ + {name = "Erik Welch", email = "erik.n.welch@gmail.com"}, + {name = "Jim Kitchen"}, + {name = "Graphblas-algorithms contributors"}, +] +maintainers = [ + {name = "Erik Welch", email = "erik.n.welch@gmail.com"}, + {name = "Jim Kitchen", email = "jim22k@gmail.com"}, +] +keywords = [ + "graphblas", + "graph", + "sparse", + "matrix", + "lagraph", + "suitesparse", + "Networks", + "Graph Theory", + "Mathematics", + "network", + "discrete mathematics", + "math", +] +classifiers = [ + "Development Status :: 4 - Beta", + "License :: OSI Approved :: Apache Software License", + "Operating System :: MacOS :: MacOS X", + "Operating System :: POSIX :: Linux", + "Operating System :: Microsoft :: Windows", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3 :: Only", + "Intended Audience :: Developers", + "Intended Audience :: Other Audience", + "Intended Audience :: Science/Research", + "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Bio-Informatics", + "Topic :: Scientific/Engineering :: Information Analysis", + "Topic :: Scientific/Engineering :: Mathematics", + "Topic :: Scientific/Engineering :: Physics", + "Topic :: Software Development :: Libraries :: Python Modules", +] +dependencies = [ + "python-graphblas >=2023.1.0", +] + +[project.entry-points."networkx.plugins"] +graphblas = "graphblas_algorithms.interface:Dispatcher" + +[project.entry-points."networkx.backends"] +graphblas = "graphblas_algorithms.interface:Dispatcher" + +[project.entry-points."networkx.backend_info"] +graphblas = "_nx_graphblas:get_info" + +[project.urls] +homepage = "https://github.com/python-graphblas/graphblas-algorithms" +# documentation = "https://graphblas-algorithms.readthedocs.io" +repository = "https://github.com/python-graphblas/graphblas-algorithms" +changelog = "https://github.com/python-graphblas/graphblas-algorithms/releases" + +[project.optional-dependencies] +test = [ + "pytest", + "networkx >=3.0", + "scipy >=1.9", + "setuptools", + "tomli", +] +all = [ + "graphblas-algorithms[test]", +] + +[tool.setuptools] +# Let's be explicit (we test this too) +# TODO: it would be nice if setuptools (or our build backend) could handle this automatically and reliably. +# $ python -c 'from setuptools import find_packages ; [print(x) for x in sorted(find_packages())]' +# $ find graphblas_algorithms/ -name __init__.py -print | sort | sed -e 's/\/__init__.py//g' -e 's/\//./g' +# $ python -c 'import tomli ; [print(x) for x in sorted(tomli.load(open("pyproject.toml", "rb"))["tool"]["setuptools"]["packages"])]' +packages = [ + "_nx_graphblas", + "graphblas_algorithms", + "graphblas_algorithms.algorithms", + "graphblas_algorithms.algorithms.centrality", + "graphblas_algorithms.algorithms.community", + "graphblas_algorithms.algorithms.components", + "graphblas_algorithms.algorithms.isomorphism", + "graphblas_algorithms.algorithms.link_analysis", + "graphblas_algorithms.algorithms.operators", + "graphblas_algorithms.algorithms.shortest_paths", + "graphblas_algorithms.algorithms.tests", + "graphblas_algorithms.algorithms.traversal", + "graphblas_algorithms.classes", + "graphblas_algorithms.generators", + "graphblas_algorithms.linalg", + "graphblas_algorithms.nxapi", + "graphblas_algorithms.nxapi.centrality", + "graphblas_algorithms.nxapi.community", + "graphblas_algorithms.nxapi.components", + "graphblas_algorithms.nxapi.generators", + "graphblas_algorithms.nxapi.isomorphism", + "graphblas_algorithms.nxapi.linalg", + "graphblas_algorithms.nxapi.link_analysis", + "graphblas_algorithms.nxapi.operators", + "graphblas_algorithms.nxapi.shortest_paths", + "graphblas_algorithms.nxapi.tests", + "graphblas_algorithms.nxapi.traversal", + "graphblas_algorithms.tests", + "graphblas_algorithms.utils", +] + +[tool.setuptools-git-versioning] +enabled = true +dev_template = "{tag}+{ccount}.g{sha}" +dirty_template = "{tag}+{ccount}.g{sha}.dirty" [tool.black] line-length = 100 +target-version = ["py310", "py311", "py312"] + +[tool.isort] +sections = ["FUTURE", "STDLIB", "THIRDPARTY", "FIRSTPARTY", "LOCALFOLDER"] +profile = "black" +skip_gitignore = true +float_to_top = true +default_section = "THIRDPARTY" +known_first_party = "graphblas_algorithms" +line_length = 100 +skip = [ + "graphblas_algorithms/nxapi/__init__.py", + "scripts/bench.py", +] + +[tool.pytest.ini_options] +minversion = "6.0" +testpaths = "graphblas_algorithms" +xfail_strict = false +markers = [ + "checkstructure: Skipped unless --check-structure passed", +] + +[tool.coverage.run] +branch = true +source = ["graphblas_algorithms"] +omit = [] + +[tool.coverage.report] +ignore_errors = false +precision = 1 +fail_under = 0 +skip_covered = true +skip_empty = true +exclude_lines = [ + "pragma: no cover", + "raise AssertionError", + "raise NotImplementedError", +] + +[tool.ruff] +# https://github.com/charliermarsh/ruff/ +line-length = 100 +target-version = "py310" +unfixable = [ + "F841" # unused-variable (Note: can leave useless expression) +] +select = [ + "ALL", +] +external = [ + # noqa codes that ruff doesn't know about: https://github.com/charliermarsh/ruff#external +] +ignore = [ + # Would be nice to fix these + "D100", # Missing docstring in public module + "D101", # Missing docstring in public class + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D104", # Missing docstring in public package + "D105", # Missing docstring in magic method + # "D107", # Missing docstring in `__init__` + "D401", # First line of docstring should be in imperative mood: + # "D417", # Missing argument description in the docstring: + "PLE0605", # Invalid format for `__all__`, must be `tuple` or `list` (Note: broken in v0.0.237) + + # Maybe consider + # "SIM300", # Yoda conditions are discouraged, use ... instead (Note: we're not this picky) + # "SIM401", # Use dict.get ... instead of if-else-block (Note: if-else better for coverage and sometimes clearer) + # "TRY004", # Prefer `TypeError` exception for invalid type (Note: good advice, but not worth the nuisance) + # "TRY200", # Use `raise from` to specify exception cause (Note: sometimes okay to raise original exception) + "UP038", # Use `X | Y` in `isinstance` call instead of `(X, Y)` (Note: using `|` seems to be slower) + + # Intentionally ignored + "COM812", # Trailing comma missing + "D203", # 1 blank line required before class docstring (Note: conflicts with D211, which is preferred) + "D400", # First line should end with a period (Note: prefer D415, which also allows "?" and "!") + "F403", # `from .classes import *` used; unable to detect undefined names (Note: used to match networkx) + "N802", # Function name ... should be lowercase + "N803", # Argument name ... should be lowercase (Maybe okay--except in tests) + "N806", # Variable ... in function should be lowercase + "N807", # Function name should not start and end with `__` + "N818", # Exception name ... should be named with an Error suffix (Note: good advice) + "PLR0911", # Too many return statements + "PLR0912", # Too many branches + "PLR0913", # Too many arguments to function call + "PLR0915", # Too many statements + "PLR2004", # Magic number used in comparison, consider replacing magic with a constant variable + "PLW2901", # Outer for loop variable ... overwritten by inner assignment target (Note: good advice, but too strict) + "RET502", # Do not implicitly `return None` in function able to return non-`None` value + "RET503", # Missing explicit `return` at the end of function able to return non-`None` value + "RET504", # Unnecessary variable assignment before `return` statement + "RUF012", # Mutable class attributes should be annotated with `typing.ClassVar` (Note: no annotations yet) + "S110", # `try`-`except`-`pass` detected, consider logging the exception (Note: good advice, but we don't log) + "S112", # `try`-`except`-`continue` detected, consider logging the exception (Note: good advice, but we don't log) + "SIM102", # Use a single `if` statement instead of nested `if` statements (Note: often necessary) + "SIM105", # Use contextlib.suppress(...) instead of try-except-pass (Note: try-except-pass is much faster) + "SIM108", # Use ternary operator ... instead of if-else-block (Note: if-else better for coverage and sometimes clearer) + "TRY003", # Avoid specifying long messages outside the exception class (Note: why?) + "FIX001", "FIX002", "FIX003", "FIX004", # flake8-fixme (like flake8-todos) + + # Ignored categories + "C90", # mccabe (Too strict, but maybe we should make things less complex) + "I", # isort (Should we replace `isort` with this?) + "ANN", # flake8-annotations (We don't use annotations yet) + "BLE", # flake8-blind-except (Maybe consider) + "FBT", # flake8-boolean-trap (Why?) + "DJ", # flake8-django (We don't use django) + "EM", # flake8-errmsg (Perhaps nicer, but too much work) + "ICN", # flake8-import-conventions (Doesn't allow "_" prefix such as `_np`) + "PYI", # flake8-pyi (We don't have stub files yet) + "SLF", # flake8-self (We can use our own private variables--sheesh!) + "TID", # flake8-tidy-imports (Rely on isort and our own judgement) + "TCH", # flake8-type-checking (Note: figure out type checking later) + "ARG", # flake8-unused-arguments (Sometimes helpful, but too strict) + "TD", # flake8-todos (Maybe okay to add some of these) + "ERA", # eradicate (We like code in comments!) + "PD", # pandas-vet (Intended for scripts that use pandas, not libraries) +] + +[tool.ruff.per-file-ignores] +"__init__.py" = ["F401"] # Allow unused imports (w/o defining `__all__`) +"graphblas_algorithms/**/tests/*py" = ["S101", "T201", "D103", "D100"] # Allow assert, print, and no docstring +"graphblas_algorithms/interface.py" = ["PIE794"] # Allow us to use `mod = nxapi.` repeatedly +"graphblas_algorithms/nxapi/exception.py" = ["F401"] # Allow unused imports (w/o defining `__all__`) +"scripts/*.py" = ["INP001", "S101", "T201"] # Not a package, allow assert, allow print + +[tool.ruff.flake8-builtins] +builtins-ignorelist = ["copyright"] + +[tool.ruff.flake8-pytest-style] +fixture-parentheses = false +mark-parentheses = false + +[tool.ruff.pydocstyle] +convention = "numpy" diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index d6ac249..0000000 --- a/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -python-graphblas >=2022.4.2 -networkx diff --git a/run_nx_tests.sh b/run_nx_tests.sh new file mode 100755 index 0000000..740ab26 --- /dev/null +++ b/run_nx_tests.sh @@ -0,0 +1,6 @@ +#!/bin/bash +NETWORKX_GRAPH_CONVERT=graphblas \ +NETWORKX_TEST_BACKEND=graphblas \ +NETWORKX_FALLBACK_TO_NX=True \ + pytest --pyargs networkx "$@" +# pytest --pyargs networkx --cov --cov-report term-missing "$@" diff --git a/scripts/bench.py b/scripts/bench.py new file mode 100755 index 0000000..3b3f4dc --- /dev/null +++ b/scripts/bench.py @@ -0,0 +1,268 @@ +#!/usr/bin/env python +import argparse +import gc +import json +from pathlib import Path +import statistics +import sys +import timeit + +import download_data +import graphblas as gb +import networkx as nx +import numpy as np +import scipy.sparse + +import graphblas_algorithms as ga +import scipy_impl +from graphblas_algorithms.interface import Dispatcher + +datapaths = [ + Path(__file__).parent / ".." / "data", + Path(), +] + + +def find_data(dataname): + curpath = Path(dataname) + if not curpath.exists(): + for path in datapaths: + curpath = path / f"{dataname}.mtx" + if curpath.exists(): + break + curpath = path / f"{dataname}" + if curpath.exists(): + break + else: + if dataname not in download_data.data_urls: + raise FileNotFoundError(f"Unable to find data file for {dataname}") + curpath = Path(download_data.main([dataname])[0]) + return curpath.resolve().relative_to(Path().resolve()) + + +def get_symmetry(file_or_mminfo): + if not isinstance(file_or_mminfo, tuple): + mminfo = scipy.io.mminfo(file_or_mminfo) + else: + mminfo = file_or_mminfo + return mminfo[5] + + +def readfile(filepath, is_symmetric, backend): + if backend == "graphblas": + A = gb.io.mmread(filepath, name=filepath.stem) + A.wait() + if is_symmetric: + return ga.Graph(A) + return ga.DiGraph(A) + a = scipy.io.mmread(filepath) + if backend == "networkx": + create_using = nx.Graph if is_symmetric else nx.DiGraph + return nx.from_scipy_sparse_array(a, create_using=create_using) + if backend == "scipy": + return scipy.sparse.csr_array(a) + raise ValueError( + f"Backend {backend!r} not understood; must be 'graphblas', 'networkx', or 'scipy'" + ) + + +def best_units(num): + """Returns scale factor and prefix such that 1 <= num*scale < 1000""" + if num < 1e-12: + return 1e15, "f" + if num < 1e-9: + return 1e12, "p" + if num < 1e-6: + return 1e9, "n" + if num < 1e-3: + return 1e6, "\N{MICRO SIGN}" + if num < 1: + return 1e3, "m" + if num < 1e3: + return 1.0, "" + if num < 1e6: + return 1e-3, "k" + if num < 1e9: + return 1e-6, "M" + if num < 1e12: + return 1e-9, "G" + return 1e-12, "T" + + +def stime(time): + scale, units = best_units(time) + return f"{time * scale:4.3g} {units}s" + + +# Functions that aren't available in the main networkx namespace +functionpaths = { + "inter_community_edges": "community.quality.inter_community_edges", + "intra_community_edges": "community.quality.intra_community_edges", + "is_tournament": "tournament.is_tournament", + "mutual_weight": "structuralholes.mutual_weight", + "score_sequence": "tournament.score_sequence", + "tournament_matrix": "tournament.tournament_matrix", +} +functioncall = { + "s_metric": "func(G, normalized=False)", +} +poweriteration = {"eigenvector_centrality", "katz_centrality", "pagerank"} +directed_only = { + "in_degree_centrality", + "is_tournament", + "out_degree_centrality", + "score_sequence", + "tournament_matrix", + "reciprocity", + "overall_reciprocity", +} +# Is square_clustering undirected only? graphblas-algorthms doesn't implement it for directed +undirected_only = {"generalized_degree", "k_truss", "triangles", "square_clustering"} +returns_iterators = {"all_pairs_bellman_ford_path_length", "isolates"} + + +def getfunction(functionname, backend): + if backend == "graphblas": + return getattr(Dispatcher, functionname) + if backend == "scipy": + return getattr(scipy_impl, functionname) + if functionname in functionpaths: + func = nx + for attr in functionpaths[functionname].split("."): + func = getattr(func, attr) + return func + return getattr(nx, functionname) + + +def getgraph(dataname, backend="graphblas", functionname=None): + filename = find_data(dataname) + is_symmetric = get_symmetry(filename) == "symmetric" + if not is_symmetric and functionname is not None and functionname in undirected_only: + # Should we automatically symmetrize? + raise ValueError( + f"Data {dataname!r} is not symmetric, but {functionname} only works on undirected" + ) + if is_symmetric and functionname in directed_only: + is_symmetric = False # Make into directed graph + rv = readfile(filename, is_symmetric, backend) + return rv + + +def main( + dataname, backend, functionname, time=3.0, n=None, extra=None, display=True, enable_gc=False +): + G = getgraph(dataname, backend, functionname) + func = getfunction(functionname, backend) + benchstring = functioncall.get(functionname, "func(G)") + if extra is not None: + benchstring = f"{benchstring[:-1]}, {extra})" + if functionname in returns_iterators: + benchstring = f"for _ in {benchstring}: pass" + globals_ = {"func": func, "G": G} + if functionname in poweriteration: + benchstring = f"try:\n {benchstring}\nexcept exc:\n pass" + globals_["exc"] = nx.PowerIterationFailedConvergence + if backend == "graphblas": + benchstring = f"G._cache.clear()\n{benchstring}" + if enable_gc: + setup = "gc.enable()" + globals_["gc"] = gc + else: + setup = "pass" + timer = timeit.Timer(benchstring, setup=setup, globals=globals_) + if display: + line = f"Backend = {backend}, function = {functionname}, data = {dataname}" + if extra is not None: + line += f", extra = {extra}" + print("=" * len(line)) + print(line) + print("-" * len(line)) + info = {"backend": backend, "function": functionname, "data": dataname} + if extra is not None: + info["extra"] = extra + try: + first_time = timer.timeit(1) + except Exception as exc: + if display: + print(f"EXCEPTION: {exc}") + print("=" * len(line)) + raise + info["exception"] = str(exc) + return info + if time == 0: + n = 1 + elif n is None: + n = 2 ** max(0, int(np.ceil(np.log2(time / first_time)))) + if display: + print("Number of runs:", n) + print("first: ", stime(first_time)) + info["n"] = n + info["first"] = first_time + if n > 1: + results = timer.repeat(n - 1, 1) + results.append(first_time) + if display: + print("median:", stime(statistics.median(results))) + print("mean: ", stime(statistics.mean(results))) + print("stdev: ", stime(statistics.stdev(results))) + print("min: ", stime(min(results))) + print("max: ", stime(max(results))) + info["median"] = statistics.median(results) + info["mean"] = statistics.mean(results) + info["stdev"] = statistics.stdev(results) + info["min"] = min(results) + info["max"] = max(results) + if display: + print("=" * len(line)) + return info + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=f"Example usage: python {sys.argv[0]} -b graphblas -f pagerank -d amazon0302" + ) + parser.add_argument( + "-b", "--backend", choices=["graphblas", "networkx", "scipy"], default="graphblas" + ) + parser.add_argument( + "-t", "--time", type=float, default=3.0, help="Target minimum time to run benchmarks" + ) + parser.add_argument( + "-n", + type=int, + help="The number of times to run the benchmark (the default is to run according to time)", + ) + parser.add_argument( + "-d", + "--data", + required=True, + help="The path to a mtx file or one of the following data names: {" + + ", ".join(sorted(download_data.data_urls)) + + "}; data will be downloaded if necessary", + ) + parser.add_argument( + "-j", + "--json", + action="store_true", + help="Print results as json instead of human-readable text", + ) + parser.add_argument( + "--gc", + action="store_true", + help="Enable the garbage collector during timing (may help if running out of memory)", + ) + parser.add_argument("-f", "--func", required=True, help="Which function to benchmark") + parser.add_argument("--extra", help="Extra string to add to the function call") + args = parser.parse_args() + info = main( + args.data, + args.backend, + args.func, + time=args.time, + n=args.n, + extra=args.extra, + display=not args.json, + enable_gc=args.gc, + ) + if args.json: + print(json.dumps(info)) diff --git a/scripts/bench_pagerank.py b/scripts/bench_pagerank.py deleted file mode 100644 index 5162cf3..0000000 --- a/scripts/bench_pagerank.py +++ /dev/null @@ -1,250 +0,0 @@ -import click -import networkx as nx - - -def best_units(num): - """Returns scale factor and prefix such that 1 <= num*scale < 1000""" - if num < 1e-12: - return 1e15, "f" - if num < 1e-9: - return 1e12, "p" - if num < 1e-6: - return 1e9, "n" - if num < 1e-3: - return 1e6, "u" - if num < 1: - return 1e3, "m" - if num < 1e3: - return 1.0, "" - if num < 1e6: - return 1e-3, "k" - if num < 1e9: - return 1e-6, "M" - if num < 1e12: - return 1e-9, "G" - return 1e-12, "T" - - -def stime(time): - scale, units = best_units(time) - return f"{time * scale:4.3g} {units}s" - - -# Copied and modified from networkx -def pagerank_scipy( - A, - alpha=0.85, - personalization=None, - max_iter=100, - tol=1.0e-6, - nstart=None, - weight="weight", - dangling=None, -): - import numpy as np - import scipy as sp - import scipy.sparse # call as sp.sparse - - N = A.shape[0] - if A.nnz == 0: - return {} - - # nodelist = list(G) - S = A.sum(axis=1) - S[S != 0] = 1.0 / S[S != 0] - # TODO: csr_array - Q = sp.sparse.csr_array(sp.sparse.spdiags(S.T, 0, *A.shape)) - A = Q @ A - - # initial vector - if nstart is None: - x = np.repeat(1.0 / N, N) - else: - raise NotImplementedError() - # Personalization vector - if personalization is None: - p = np.repeat(1.0 / N, N) - else: - raise NotImplementedError() - # Dangling nodes - if dangling is None: - dangling_weights = p - else: - raise NotImplementedError() - is_dangling = np.where(S == 0)[0] - - # power iteration: make up to max_iter iterations - for _ in range(max_iter): - xlast = x - x = alpha * (x @ A + sum(x[is_dangling]) * dangling_weights) + (1 - alpha) * p - # check convergence, l1 norm - err = np.absolute(x - xlast).sum() - if err < N * tol: - return x - # return dict(zip(nodelist, map(float, x))) - raise nx.PowerIterationFailedConvergence(max_iter) - - -@click.command() -@click.argument("filename") -@click.option( - "-b", - "--backend", - default="graphblas", - type=click.Choice(["graphblas", "networkx", "scipy", "gb", "nx", "sp", "gbnx"]), -) -@click.option( - "-t", - "--time", - default=3, - type=click.FloatRange(min=0, min_open=True), -) -@click.option( - "-n", - default=None, - type=click.IntRange(min=1), -) -@click.option( - "--verify", - is_flag=True, -) -@click.option( - "--alpha", - default=0.85, - type=click.FloatRange(min=0, max=1), -) -@click.option( - "--tol", - default=1e-06, - type=click.FloatRange(min=0, min_open=True), -) -def main(filename, backend, time, n, verify, alpha, tol, _get_result=False): - import statistics - import timeit - import warnings - - import numpy as np - - warnings.simplefilter("ignore") - if verify: - gb_result = main.callback(filename, "gb", None, None, False, alpha, tol, _get_result=True) - sp_result = main.callback(filename, "sp", None, None, False, alpha, tol, _get_result=True) - rtol = tol / gb_result.size - atol = 1e-16 - np.testing.assert_allclose(gb_result, sp_result, rtol=rtol, atol=atol) - print(" |- graphblas and scipy.sparse match") - nx_result = main.callback(filename, "nx", None, None, False, alpha, tol, _get_result=True) - np.testing.assert_allclose(gb_result, nx_result, rtol=rtol, atol=atol) - print(" |- graphblas and networkx match") - np.testing.assert_allclose(sp_result, nx_result, rtol=rtol, atol=atol) - print(" |- scipy.sparse and networkx match") - gbnx_result = main.callback( - filename, "gbnx", None, None, False, alpha, tol, _get_result=True - ) - np.testing.assert_allclose(gbnx_result, gb_result, rtol=rtol, atol=atol) - np.testing.assert_allclose(gbnx_result, sp_result, rtol=rtol, atol=atol) - np.testing.assert_allclose(gbnx_result, nx_result, rtol=rtol, atol=atol) - print("All good!") - # Show a grid of total absolute differences between results - results = { - "gb": gb_result, - "sp": sp_result, - "nx": nx_result, - "gbnx": gbnx_result, - } - print(" ", end="") - for k1 in results: - print("%9s" % k1, end="") - print() - for k1, v1 in results.items(): - print("%5s" % k1, end="") - for v2 in results.values(): - print("%9.2g" % np.abs(v1 - v2).sum(), end="") - print() - return - - backend = { - "gb": "graphblas", - "nx": "networkx", - "sp": "scipy", - }.get(backend, backend) - print(f"Filename: {filename} ; backend: {backend}") - - if backend == "graphblas": - import pandas as pd - from graphblas import Matrix - - from graphblas_algorithms.link_analysis import pagerank_core as pagerank - - start = timeit.default_timer() - df = pd.read_csv(filename, delimiter="\t", names=["row", "col"]) - G = Matrix.from_values(df["row"].values, df["col"].values, 1) - stop = timeit.default_timer() - num_nodes = G.nrows - num_edges = G.nvals - if _get_result: - result = pagerank(G, alpha=alpha, tol=tol) - result(~result.S) << 0 # Densify just in case - return result.to_values()[1] - - elif backend == "scipy": - import pandas as pd - import scipy.sparse - - start = timeit.default_timer() - df = pd.read_csv(filename, delimiter="\t", names=["row", "col"]) - G = scipy.sparse.csr_array((np.repeat(1.0, len(df)), (df["row"].values, df["col"].values))) - pagerank = pagerank_scipy - stop = timeit.default_timer() - num_nodes = G.shape[0] - num_edges = G.nnz - if _get_result: - return pagerank(G, alpha=alpha, tol=tol) - else: - if backend == "networkx": - from networkx import pagerank - else: - from graphblas_algorithms.link_analysis import pagerank - - start = timeit.default_timer() - G = nx.read_edgelist(filename, delimiter="\t", nodetype=int, create_using=nx.DiGraph) - N = max(G) - for i in range(N): - if i not in G: - G.add_node(i) - stop = timeit.default_timer() - num_nodes = len(G.nodes) - num_edges = len(G.edges) - - if _get_result: - result = pagerank(G, alpha=alpha, tol=tol) - return np.array([result.get(key, 0) for key in range(N + 1)]) - - print("Num nodes:", num_nodes) - print("Num edges:", num_edges) - print("Load time:", stime(stop - start)) - timer = timeit.Timer( - "pagerank(G, alpha=alpha, tol=tol)", - globals={"pagerank": pagerank, "G": G, "alpha": alpha, "tol": tol}, - ) - first_time = timer.timeit(1) - if time == 0: - n = 1 - elif n is None: - n = 2 ** max(0, int(np.ceil(np.log2(time / first_time)))) - print("Number of runs:", n) - print("first: ", stime(first_time)) - if n > 1: - results = timer.repeat(n - 1, 1) - results.append(first_time) - print("median:", stime(statistics.median(results))) - print("mean: ", stime(statistics.mean(results))) - # print("hmean: ", stime(statistics.harmonic_mean(results))) - # print("gmean: ", stime(statistics.geometric_mean(results))) - print("stdev: ", stime(statistics.stdev(results))) - print("min: ", stime(min(results))) - print("max: ", stime(max(results))) - - -if __name__ == "__main__": - main() diff --git a/scripts/download_data.py b/scripts/download_data.py new file mode 100755 index 0000000..b01626c --- /dev/null +++ b/scripts/download_data.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python +import argparse +import gzip +import io +import os +import sys +import tarfile +from pathlib import Path + +import requests + +datapath = Path(__file__).parent.parent / "data" + +data_urls = { + "amazon0302": "https://sparse.tamu.edu/MM/SNAP/amazon0302.tar.gz", + "web-Google": "https://sparse.tamu.edu/MM/SNAP/web-Google.tar.gz", + "soc-Pokec": "https://sparse.tamu.edu/MM/SNAP/soc-Pokec.tar.gz", + "email-Enron": "https://sparse.tamu.edu/MM/SNAP/email-Enron.tar.gz", + "preferentialAttachment": "https://sparse.tamu.edu/MM/DIMACS10/preferentialAttachment.tar.gz", + "caidaRouterLevel": "https://sparse.tamu.edu/MM/DIMACS10/caidaRouterLevel.tar.gz", + "dblp-2010": "https://sparse.tamu.edu/MM/LAW/dblp-2010.tar.gz", + "citationCiteseer": "https://sparse.tamu.edu/MM/DIMACS10/citationCiteseer.tar.gz", + "coAuthorsDBLP": "https://sparse.tamu.edu/MM/DIMACS10/coAuthorsDBLP.tar.gz", + "as-Skitter": "https://sparse.tamu.edu/MM/SNAP/as-Skitter.tar.gz", + "coPapersCiteseer": "https://sparse.tamu.edu/MM/DIMACS10/coPapersCiteseer.tar.gz", + "coPapersDBLP": "https://sparse.tamu.edu/MM/DIMACS10/coPapersDBLP.tar.gz", +} + + +def download(url, target=None): + req = requests.request("GET", url) + assert req.ok, req.reason + tar = tarfile.open(fileobj=io.BytesIO(gzip.decompress(req.content))) + for member in tar.members: + if not member.name.endswith(".mtx"): + continue + tar.extract(member) + if target: + member = Path(member.name) + target.parent.mkdir(parents=True, exist_ok=True) + member.replace(target) + os.removedirs(member.parent) + + +def main(datanames, overwrite=False): + filenames = [] + for name in datanames: + target = datapath / f"{name}.mtx" + filenames.append(target) + relpath = target.resolve().relative_to(Path().resolve()) + if not overwrite and target.exists(): + print(f"{relpath} already exists; skipping", file=sys.stderr) + continue + url = data_urls[name] + print(f"Downloading {relpath} from {url}", file=sys.stderr) + download(url, target) + return filenames + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("datanames", nargs="*", choices=[*data_urls, []]) + args = parser.parse_args() + datanames = args.datanames + if not datanames: + # None specified, so download all that are missing + datanames = data_urls + overwrite = False + else: + overwrite = True + main(datanames, overwrite=overwrite) diff --git a/scripts/maketree.py b/scripts/maketree.py new file mode 100755 index 0000000..e4deed5 --- /dev/null +++ b/scripts/maketree.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python +"""Run this script to auto-generate API when adding or removing nxapi functions. + +This updates API tree in README.md and dispatch functions in `graphblas_algorithms/interface.py`. + +""" +from io import StringIO +from pathlib import Path + +import rich +from graphblas.core.utils import _autogenerate_code +from rich.tree import Tree + +from graphblas_algorithms.tests import test_match_nx +from graphblas_algorithms.tests.test_match_nx import get_fullname + + +def get_fixture(attr): + return getattr(test_match_nx, attr).__wrapped__ + + +def trim(name): + for prefix in ["networkx.algorithms.", "networkx."]: + if name.startswith(prefix): + return name[len(prefix) :] + raise ValueError(f"{name!r} does not begin with a recognized prefix") + + +def get_names(): + nx_names_to_info = get_fixture("nx_names_to_info")(get_fixture("nx_info")()) + gb_names_to_info = get_fixture("gb_names_to_info")(get_fixture("gb_info")()) + implemented = nx_names_to_info.keys() & gb_names_to_info.keys() + return sorted(trim(get_fullname(next(iter(nx_names_to_info[name])))) for name in implemented) + + +# Dispatched functions that are only available from `nxapi` +SHORTPATH = { + "overall_reciprocity", + "reciprocity", +} + + +def main(print_to_console=True, update_readme=True, update_interface=True): + fullnames = get_names() + # Step 1: add to README.md + tree = Tree("graphblas_algorithms.nxapi") + subtrees = {} + + def addtree(path): + if path in subtrees: + rv = subtrees[path] + elif "." not in path: + rv = subtrees[path] = tree.add(path) + else: + subpath, last = path.rsplit(".", 1) + subtree = addtree(subpath) + rv = subtrees[path] = subtree.add(last) + return rv + + for fullname in fullnames: + addtree(fullname) + if print_to_console: + rich.print(tree) + if update_readme: + s = StringIO() + rich.print(tree, file=s) + s.seek(0) + text = s.read() + _autogenerate_code( + Path(__file__).parent.parent / "README.md", + f"\n```\n{text}```\n\n", + begin="[//]: # (Begin auto-generated code)", + end="[//]: # (End auto-generated code)", + callblack=False, + ) + # Step 2: add to interface.py + lines = [] + prev_mod = None + for fullname in fullnames: + mod, subpath = fullname.split(".", 1) + if mod != prev_mod: + if prev_mod is not None: + lines.append("") + prev_mod = mod + lines.append(f" mod = nxapi.{mod}") + lines.append(" # " + "=" * (len(mod) + 10)) + if " (" in subpath: + subpath, name = subpath.rsplit(" (", 1) + name = name.split(")")[0] + else: + name = subpath.rsplit(".", 1)[-1] + if name in SHORTPATH: + subpath = subpath.rsplit(".", 1)[-1] + lines.append(f" {name} = nxapi.{subpath}") + else: + lines.append(f" {name} = mod.{subpath}") + lines.append("") + lines.append(" del mod") + lines.append("") + text = "\n".join(lines) + if update_interface: + _autogenerate_code( + Path(__file__).parent.parent / "graphblas_algorithms" / "interface.py", + text, + specializer="dispatch", + ) + return tree + + +if __name__ == "__main__": + main() diff --git a/scripts/scipy_impl.py b/scripts/scipy_impl.py new file mode 100644 index 0000000..35815a6 --- /dev/null +++ b/scripts/scipy_impl.py @@ -0,0 +1,65 @@ +import networkx as nx +import numpy as np +import scipy as sp +import scipy.sparse # call as sp.sparse + + +def pagerank( + A, + alpha=0.85, + personalization=None, + max_iter=100, + tol=1.0e-6, + nstart=None, + weight="weight", + dangling=None, +): + N = A.shape[0] + if A.nnz == 0: + return {} + + # nodelist = list(G) + S = A.sum(axis=1) + S[S != 0] = 1.0 / S[S != 0] + # TODO: csr_array + Q = sp.sparse.csr_array(sp.sparse.spdiags(S.T, 0, *A.shape)) + A = Q @ A + + # initial vector + if nstart is None: + x = np.repeat(1.0 / N, N) + else: + raise NotImplementedError + # Personalization vector + if personalization is None: + p = np.repeat(1.0 / N, N) + else: + raise NotImplementedError + # Dangling nodes + if dangling is None: + dangling_weights = p + else: + raise NotImplementedError + is_dangling = np.where(S == 0)[0] + + # power iteration: make up to max_iter iterations + for _i in range(max_iter): + xlast = x + x = alpha * (x @ A + sum(x[is_dangling]) * dangling_weights) + (1 - alpha) * p + # check convergence, l1 norm + err = np.absolute(x - xlast).sum() + if err < N * tol: + return x + # return dict(zip(nodelist, map(float, x), strict=True)) + raise nx.PowerIterationFailedConvergence(max_iter) + + +def all_pairs_bellman_ford_path_length(A, weight="weight"): + for source in range(A.shape[0]): + d = single_source_bellman_ford_path_length(A, source) + yield (source, d) + + +def single_source_bellman_ford_path_length(A, source, weight="weight"): + return scipy.sparse.csgraph.bellman_ford(A, indices=source) # So slow! + # return scipy.sparse.csgraph.dijkstra(A, indices=source) # Faster diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 3ad34d1..0000000 --- a/setup.cfg +++ /dev/null @@ -1,54 +0,0 @@ -[aliases] -test=pytest - -[tool:pytest] -testpaths = graphblas_algorithms/tests - -[flake8] -max-line-length = 100 -inline-quotes = " -exclude = - versioneer.py, - graphblas_algorithms/tests/, - build/ -ignore = - E203, # whitespace before ':' - E231, # Multiple spaces around "," - W503, # line break before binary operator - -[isort] -sections = FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER -profile = black -skip_gitignore = true -float_to_top = true -default_section = THIRDPARTY -known_first_party = graphblas_algorithms -line_length = 100 - -[coverage:run] -source = graphblas_algorithms -branch = True -omit = - graphblas_algorithms/_version.py - -[coverage:report] -# Regexes for lines to exclude from consideration -exclude_lines = - pragma: no cover - - raise AssertionError - raise NotImplementedError - -ignore_errors = True -precision = 1 -fail_under = 0 -skip_covered = True -skip_empty = True - -[versioneer] -VCS = git -style = pep440 -versionfile_source = graphblas_algorithms/_version.py -versionfile_build = graphblas_algorithms/_version.py -tag_prefix = -parentdir_prefix = graphblas_algorithms- diff --git a/setup.py b/setup.py index e83e92d..6068493 100644 --- a/setup.py +++ b/setup.py @@ -1,67 +1,3 @@ -from setuptools import find_packages, setup +from setuptools import setup -import versioneer - -extras_require = { - "test": ["pytest", "scipy"], -} -extras_require["complete"] = sorted({v for req in extras_require.values() for v in req}) - -with open("requirements.txt") as f: - install_requires = f.read().strip().split("\n") -with open("README.md") as f: - long_description = f.read() - -setup( - name="graphblas-algorithms", - version=versioneer.get_version(), - cmdclass=versioneer.get_cmdclass(), - description="Graph algorithms written in GraphBLAS", - long_description=long_description, - long_description_content_type="text/markdown", - author="Jim Kitchen and Erik Welch", - author_email="erik.n.welch@gmail.com,jim22k@gmail.com", - url="https://github.com/python-graphblas/graphblas-algorithms", - packages=find_packages(), - python_requires=">=3.8", - install_requires=install_requires, - extras_require=extras_require, - include_package_data=True, - license="Apache License 2.0", - keywords=[ - "graphblas", - "graph", - "sparse", - "matrix", - "lagraph", - "suitesparse", - "Networks", - "Graph Theory", - "Mathematics", - "network", - "discrete mathematics", - "math", - ], - classifiers=[ - "Development Status :: 3 - Alpha", - "License :: OSI Approved :: Apache Software License", - "Operating System :: MacOS :: MacOS X", - "Operating System :: POSIX :: Linux", - "Operating System :: Microsoft :: Windows", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3 :: Only", - "Intended Audience :: Developers", - "Intended Audience :: Science/Research", - "Topic :: Scientific/Engineering", - "Topic :: Scientific/Engineering :: Bio-Informatics", - "Topic :: Scientific/Engineering :: Information Analysis", - "Topic :: Scientific/Engineering :: Mathematics", - "Topic :: Scientific/Engineering :: Physics", - "Topic :: Software Development :: Libraries :: Python Modules", - ], - zip_safe=False, -) +setup() diff --git a/versioneer.py b/versioneer.py deleted file mode 100644 index 905f625..0000000 --- a/versioneer.py +++ /dev/null @@ -1,2173 +0,0 @@ -# Version: 0.22 - -"""The Versioneer - like a rocketeer, but for versions. - -The Versioneer -============== - -* like a rocketeer, but for versions! -* https://github.com/python-versioneer/python-versioneer -* Brian Warner -* License: Public Domain -* Compatible with: Python 3.6, 3.7, 3.8, 3.9, 3.10 and pypy3 -* [![Latest Version][pypi-image]][pypi-url] -* [![Build Status][travis-image]][travis-url] - -This is a tool for managing a recorded version number in distutils/setuptools-based -python projects. The goal is to remove the tedious and error-prone "update -the embedded version string" step from your release process. Making a new -release should be as easy as recording a new tag in your version-control -system, and maybe making new tarballs. - - -## Quick Install - -* `pip install versioneer` to somewhere in your $PATH -* add a `[versioneer]` section to your setup.cfg (see [Install](INSTALL.md)) -* run `versioneer install` in your source tree, commit the results -* Verify version information with `python setup.py version` - -## Version Identifiers - -Source trees come from a variety of places: - -* a version-control system checkout (mostly used by developers) -* a nightly tarball, produced by build automation -* a snapshot tarball, produced by a web-based VCS browser, like github's - "tarball from tag" feature -* a release tarball, produced by "setup.py sdist", distributed through PyPI - -Within each source tree, the version identifier (either a string or a number, -this tool is format-agnostic) can come from a variety of places: - -* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows - about recent "tags" and an absolute revision-id -* the name of the directory into which the tarball was unpacked -* an expanded VCS keyword ($Id$, etc) -* a `_version.py` created by some earlier build step - -For released software, the version identifier is closely related to a VCS -tag. Some projects use tag names that include more than just the version -string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool -needs to strip the tag prefix to extract the version identifier. For -unreleased software (between tags), the version identifier should provide -enough information to help developers recreate the same tree, while also -giving them an idea of roughly how old the tree is (after version 1.2, before -version 1.3). Many VCS systems can report a description that captures this, -for example `git describe --tags --dirty --always` reports things like -"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the -0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has -uncommitted changes). - -The version identifier is used for multiple purposes: - -* to allow the module to self-identify its version: `myproject.__version__` -* to choose a name and prefix for a 'setup.py sdist' tarball - -## Theory of Operation - -Versioneer works by adding a special `_version.py` file into your source -tree, where your `__init__.py` can import it. This `_version.py` knows how to -dynamically ask the VCS tool for version information at import time. - -`_version.py` also contains `$Revision$` markers, and the installation -process marks `_version.py` to have this marker rewritten with a tag name -during the `git archive` command. As a result, generated tarballs will -contain enough information to get the proper version. - -To allow `setup.py` to compute a version too, a `versioneer.py` is added to -the top level of your source tree, next to `setup.py` and the `setup.cfg` -that configures it. This overrides several distutils/setuptools commands to -compute the version when invoked, and changes `setup.py build` and `setup.py -sdist` to replace `_version.py` with a small static file that contains just -the generated version data. - -## Installation - -See [INSTALL.md](./INSTALL.md) for detailed installation instructions. - -## Version-String Flavors - -Code which uses Versioneer can learn about its version string at runtime by -importing `_version` from your main `__init__.py` file and running the -`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can -import the top-level `versioneer.py` and run `get_versions()`. - -Both functions return a dictionary with different flavors of version -information: - -* `['version']`: A condensed version string, rendered using the selected - style. This is the most commonly used value for the project's version - string. The default "pep440" style yields strings like `0.11`, - `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section - below for alternative styles. - -* `['full-revisionid']`: detailed revision identifier. For Git, this is the - full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac". - -* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the - commit date in ISO 8601 format. This will be None if the date is not - available. - -* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that - this is only accurate if run in a VCS checkout, otherwise it is likely to - be False or None - -* `['error']`: if the version string could not be computed, this will be set - to a string describing the problem, otherwise it will be None. It may be - useful to throw an exception in setup.py if this is set, to avoid e.g. - creating tarballs with a version string of "unknown". - -Some variants are more useful than others. Including `full-revisionid` in a -bug report should allow developers to reconstruct the exact code being tested -(or indicate the presence of local changes that should be shared with the -developers). `version` is suitable for display in an "about" box or a CLI -`--version` output: it can be easily compared against release notes and lists -of bugs fixed in various releases. - -The installer adds the following text to your `__init__.py` to place a basic -version in `YOURPROJECT.__version__`: - - from ._version import get_versions - __version__ = get_versions()['version'] - del get_versions - -## Styles - -The setup.cfg `style=` configuration controls how the VCS information is -rendered into a version string. - -The default style, "pep440", produces a PEP440-compliant string, equal to the -un-prefixed tag name for actual releases, and containing an additional "local -version" section with more detail for in-between builds. For Git, this is -TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags ---dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the -tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and -that this commit is two revisions ("+2") beyond the "0.11" tag. For released -software (exactly equal to a known tag), the identifier will only contain the -stripped tag, e.g. "0.11". - -Other styles are available. See [details.md](details.md) in the Versioneer -source tree for descriptions. - -## Debugging - -Versioneer tries to avoid fatal errors: if something goes wrong, it will tend -to return a version of "0+unknown". To investigate the problem, run `setup.py -version`, which will run the version-lookup code in a verbose mode, and will -display the full contents of `get_versions()` (including the `error` string, -which may help identify what went wrong). - -## Known Limitations - -Some situations are known to cause problems for Versioneer. This details the -most significant ones. More can be found on Github -[issues page](https://github.com/python-versioneer/python-versioneer/issues). - -### Subprojects - -Versioneer has limited support for source trees in which `setup.py` is not in -the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are -two common reasons why `setup.py` might not be in the root: - -* Source trees which contain multiple subprojects, such as - [Buildbot](https://github.com/buildbot/buildbot), which contains both - "master" and "slave" subprojects, each with their own `setup.py`, - `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI - distributions (and upload multiple independently-installable tarballs). -* Source trees whose main purpose is to contain a C library, but which also - provide bindings to Python (and perhaps other languages) in subdirectories. - -Versioneer will look for `.git` in parent directories, and most operations -should get the right version string. However `pip` and `setuptools` have bugs -and implementation details which frequently cause `pip install .` from a -subproject directory to fail to find a correct version string (so it usually -defaults to `0+unknown`). - -`pip install --editable .` should work correctly. `setup.py install` might -work too. - -Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in -some later version. - -[Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking -this issue. The discussion in -[PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the -issue from the Versioneer side in more detail. -[pip PR#3176](https://github.com/pypa/pip/pull/3176) and -[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve -pip to let Versioneer work correctly. - -Versioneer-0.16 and earlier only looked for a `.git` directory next to the -`setup.cfg`, so subprojects were completely unsupported with those releases. - -### Editable installs with setuptools <= 18.5 - -`setup.py develop` and `pip install --editable .` allow you to install a -project into a virtualenv once, then continue editing the source code (and -test) without re-installing after every change. - -"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a -convenient way to specify executable scripts that should be installed along -with the python package. - -These both work as expected when using modern setuptools. When using -setuptools-18.5 or earlier, however, certain operations will cause -`pkg_resources.DistributionNotFound` errors when running the entrypoint -script, which must be resolved by re-installing the package. This happens -when the install happens with one version, then the egg_info data is -regenerated while a different version is checked out. Many setup.py commands -cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into -a different virtualenv), so this can be surprising. - -[Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes -this one, but upgrading to a newer version of setuptools should probably -resolve it. - - -## Updating Versioneer - -To upgrade your project to a new release of Versioneer, do the following: - -* install the new Versioneer (`pip install -U versioneer` or equivalent) -* edit `setup.cfg`, if necessary, to include any new configuration settings - indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details. -* re-run `versioneer install` in your source tree, to replace - `SRC/_version.py` -* commit any changed files - -## Future Directions - -This tool is designed to make it easily extended to other version-control -systems: all VCS-specific components are in separate directories like -src/git/ . The top-level `versioneer.py` script is assembled from these -components by running make-versioneer.py . In the future, make-versioneer.py -will take a VCS name as an argument, and will construct a version of -`versioneer.py` that is specific to the given VCS. It might also take the -configuration arguments that are currently provided manually during -installation by editing setup.py . Alternatively, it might go the other -direction and include code from all supported VCS systems, reducing the -number of intermediate scripts. - -## Similar projects - -* [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time - dependency -* [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of - versioneer -* [versioningit](https://github.com/jwodder/versioningit) - a PEP 518-based setuptools - plugin - -## License - -To make Versioneer easier to embed, all its code is dedicated to the public -domain. The `_version.py` that it creates is also in the public domain. -Specifically, both are released under the Creative Commons "Public Domain -Dedication" license (CC0-1.0), as described in -https://creativecommons.org/publicdomain/zero/1.0/ . - -[pypi-image]: https://img.shields.io/pypi/v/versioneer.svg -[pypi-url]: https://pypi.python.org/pypi/versioneer/ -[travis-image]: -https://img.shields.io/travis/com/python-versioneer/python-versioneer.svg -[travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer - -""" -# pylint:disable=invalid-name,import-outside-toplevel,missing-function-docstring -# pylint:disable=missing-class-docstring,too-many-branches,too-many-statements -# pylint:disable=raise-missing-from,too-many-lines,too-many-locals,import-error -# pylint:disable=too-few-public-methods,redefined-outer-name,consider-using-with -# pylint:disable=attribute-defined-outside-init,too-many-arguments - -import configparser -import errno -import functools -import json -import os -import re -import subprocess -import sys -from typing import Callable, Dict - - -class VersioneerConfig: - """Container for Versioneer configuration parameters.""" - - -def get_root(): - """Get the project root directory. - - We require that all commands are run from the project root, i.e. the - directory that contains setup.py, setup.cfg, and versioneer.py . - """ - root = os.path.realpath(os.path.abspath(os.getcwd())) - setup_py = os.path.join(root, "setup.py") - versioneer_py = os.path.join(root, "versioneer.py") - if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): - # allow 'python path/to/setup.py COMMAND' - root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) - setup_py = os.path.join(root, "setup.py") - versioneer_py = os.path.join(root, "versioneer.py") - if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): - err = ( - "Versioneer was unable to run the project root directory. " - "Versioneer requires setup.py to be executed from " - "its immediate directory (like 'python setup.py COMMAND'), " - "or in a way that lets it use sys.argv[0] to find the root " - "(like 'python path/to/setup.py COMMAND')." - ) - raise VersioneerBadRootError(err) - try: - # Certain runtime workflows (setup.py install/develop in a setuptools - # tree) execute all dependencies in a single python process, so - # "versioneer" may be imported multiple times, and python's shared - # module-import table will cache the first one. So we can't use - # os.path.dirname(__file__), as that will find whichever - # versioneer.py was first imported, even in later projects. - my_path = os.path.realpath(os.path.abspath(__file__)) - me_dir = os.path.normcase(os.path.splitext(my_path)[0]) - vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) - if me_dir != vsr_dir: - print( - "Warning: build in %s is using versioneer.py from %s" - % (os.path.dirname(my_path), versioneer_py) - ) - except NameError: - pass - return root - - -def get_config_from_root(root): - """Read the project setup.cfg file to determine Versioneer config.""" - # This might raise OSError (if setup.cfg is missing), or - # configparser.NoSectionError (if it lacks a [versioneer] section), or - # configparser.NoOptionError (if it lacks "VCS="). See the docstring at - # the top of versioneer.py for instructions on writing your setup.cfg . - setup_cfg = os.path.join(root, "setup.cfg") - parser = configparser.ConfigParser() - with open(setup_cfg) as cfg_file: - parser.read_file(cfg_file) - VCS = parser.get("versioneer", "VCS") # mandatory - - # Dict-like interface for non-mandatory entries - section = parser["versioneer"] - - cfg = VersioneerConfig() - cfg.VCS = VCS - cfg.style = section.get("style", "") - cfg.versionfile_source = section.get("versionfile_source") - cfg.versionfile_build = section.get("versionfile_build") - cfg.tag_prefix = section.get("tag_prefix") - if cfg.tag_prefix in ("''", '""'): - cfg.tag_prefix = "" - cfg.parentdir_prefix = section.get("parentdir_prefix") - cfg.verbose = section.get("verbose") - return cfg - - -class NotThisMethod(Exception): - """Exception raised if a method is not valid for the current scenario.""" - - -# these dictionaries contain VCS-specific tools -LONG_VERSION_PY: Dict[str, str] = {} -HANDLERS: Dict[str, Dict[str, Callable]] = {} - - -def register_vcs_handler(vcs, method): # decorator - """Create decorator to mark a method as the handler of a VCS.""" - - def decorate(f): - """Store f in HANDLERS[vcs][method].""" - HANDLERS.setdefault(vcs, {})[method] = f - return f - - return decorate - - -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): - """Call the given command(s).""" - assert isinstance(commands, list) - process = None - - popen_kwargs = {} - if sys.platform == "win32": - # This hides the console window if pythonw.exe is used - startupinfo = subprocess.STARTUPINFO() - startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW - popen_kwargs["startupinfo"] = startupinfo - - for command in commands: - try: - dispcmd = str([command] + args) - # remember shell=False, so use git.cmd on windows, not just git - process = subprocess.Popen( - [command] + args, - cwd=cwd, - env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr else None), - **popen_kwargs, - ) - break - except OSError: - e = sys.exc_info()[1] - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %s" % dispcmd) - print(e) - return None, None - else: - if verbose: - print(f"unable to find command, tried {commands}") - return None, None - stdout = process.communicate()[0].strip().decode() - if process.returncode != 0: - if verbose: - print("unable to run %s (error)" % dispcmd) - print("stdout was %s" % stdout) - return None, process.returncode - return stdout, process.returncode - - -LONG_VERSION_PY[ - "git" -] = r''' -# This file helps to compute a version number in source trees obtained from -# git-archive tarball (such as those provided by githubs download-from-tag -# feature). Distribution tarballs (built by setup.py sdist) and build -# directories (produced by setup.py build) will contain a much shorter file -# that just contains the computed version number. - -# This file is released into the public domain. Generated by -# versioneer-0.22 (https://github.com/python-versioneer/python-versioneer) - -"""Git implementation of _version.py.""" - -import errno -import os -import re -import subprocess -import sys -from typing import Callable, Dict -import functools - - -def get_keywords(): - """Get the keywords needed to look up the version information.""" - # these strings will be replaced by git during git-archive. - # setup.py/versioneer.py will grep for the variable names, so they must - # each be defined on a line of their own. _version.py will just call - # get_keywords(). - git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" - git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" - git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s" - keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} - return keywords - - -class VersioneerConfig: - """Container for Versioneer configuration parameters.""" - - -def get_config(): - """Create, populate and return the VersioneerConfig() object.""" - # these strings are filled in when 'setup.py versioneer' creates - # _version.py - cfg = VersioneerConfig() - cfg.VCS = "git" - cfg.style = "%(STYLE)s" - cfg.tag_prefix = "%(TAG_PREFIX)s" - cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s" - cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s" - cfg.verbose = False - return cfg - - -class NotThisMethod(Exception): - """Exception raised if a method is not valid for the current scenario.""" - - -LONG_VERSION_PY: Dict[str, str] = {} -HANDLERS: Dict[str, Dict[str, Callable]] = {} - - -def register_vcs_handler(vcs, method): # decorator - """Create decorator to mark a method as the handler of a VCS.""" - def decorate(f): - """Store f in HANDLERS[vcs][method].""" - if vcs not in HANDLERS: - HANDLERS[vcs] = {} - HANDLERS[vcs][method] = f - return f - return decorate - - -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): - """Call the given command(s).""" - assert isinstance(commands, list) - process = None - - popen_kwargs = {} - if sys.platform == "win32": - # This hides the console window if pythonw.exe is used - startupinfo = subprocess.STARTUPINFO() - startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW - popen_kwargs["startupinfo"] = startupinfo - - for command in commands: - try: - dispcmd = str([command] + args) - # remember shell=False, so use git.cmd on windows, not just git - process = subprocess.Popen([command] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None), **popen_kwargs) - break - except OSError: - e = sys.exc_info()[1] - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %%s" %% dispcmd) - print(e) - return None, None - else: - if verbose: - print("unable to find command, tried %%s" %% (commands,)) - return None, None - stdout = process.communicate()[0].strip().decode() - if process.returncode != 0: - if verbose: - print("unable to run %%s (error)" %% dispcmd) - print("stdout was %%s" %% stdout) - return None, process.returncode - return stdout, process.returncode - - -def versions_from_parentdir(parentdir_prefix, root, verbose): - """Try to determine the version from the parent directory name. - - Source tarballs conventionally unpack into a directory that includes both - the project name and a version string. We will also support searching up - two directory levels for an appropriately named parent directory - """ - rootdirs = [] - - for _ in range(3): - dirname = os.path.basename(root) - if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} - rootdirs.append(root) - root = os.path.dirname(root) # up a level - - if verbose: - print("Tried directories %%s but none started with prefix %%s" %% - (str(rootdirs), parentdir_prefix)) - raise NotThisMethod("rootdir doesn't start with parentdir_prefix") - - -@register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): - """Extract version information from the given file.""" - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords = {} - try: - with open(versionfile_abs, "r") as fobj: - for line in fobj: - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - except OSError: - pass - return keywords - - -@register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): - """Get version information from git keywords.""" - if "refnames" not in keywords: - raise NotThisMethod("Short version file found") - date = keywords.get("date") - if date is not None: - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - - # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant - # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601 - # -like" string, which we must then edit to make compliant), because - # it's been around since git-1.5.3, and it's too difficult to - # discover which version we're using, or to work around using an - # older one. - date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = {r.strip() for r in refnames.strip("()").split(",")} - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %%d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = {r for r in refs if re.search(r'\d', r)} - if verbose: - print("discarding '%%s', no digits" %% ",".join(refs - tags)) - if verbose: - print("likely tags: %%s" %% ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] - # Filter out refs that exactly match prefix or that don't start - # with a number once the prefix is stripped (mostly a concern - # when prefix is '') - if not re.match(r'\d', r): - continue - if verbose: - print("picking %%s" %% r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} - - -@register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): - """Get version from 'git describe' in the root of the source tree. - - This only gets called if the git-archive 'subst' keywords were *not* - expanded, and _version.py hasn't already been rewritten with a short - version string, meaning we're inside a checked out source tree. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - - # GIT_DIR can interfere with correct operation of Versioneer. - # It may be intended to be passed to the Versioneer-versioned project, - # but that should not change where we get our version from. - env = os.environ.copy() - env.pop("GIT_DIR", None) - runner = functools.partial(runner, env=env) - - _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=True) - if rc != 0: - if verbose: - print("Directory %%s not under git control" %% root) - raise NotThisMethod("'git rev-parse --git-dir' returned error") - - MATCH_ARGS = ["--match", "%%s*" %% tag_prefix] if tag_prefix else [] - - # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] - # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty", - "--always", "--long", *MATCH_ARGS], - cwd=root) - # --long was added in git-1.5.5 - if describe_out is None: - raise NotThisMethod("'git describe' failed") - describe_out = describe_out.strip() - full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) - if full_out is None: - raise NotThisMethod("'git rev-parse' failed") - full_out = full_out.strip() - - pieces = {} - pieces["long"] = full_out - pieces["short"] = full_out[:7] # maybe improved later - pieces["error"] = None - - branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], - cwd=root) - # --abbrev-ref was added in git-1.6.3 - if rc != 0 or branch_name is None: - raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") - branch_name = branch_name.strip() - - if branch_name == "HEAD": - # If we aren't exactly on a branch, pick a branch which represents - # the current commit. If all else fails, we are on a branchless - # commit. - branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) - # --contains was added in git-1.5.4 - if rc != 0 or branches is None: - raise NotThisMethod("'git branch --contains' returned error") - branches = branches.split("\n") - - # Remove the first line if we're running detached - if "(" in branches[0]: - branches.pop(0) - - # Strip off the leading "* " from the list of branches. - branches = [branch[2:] for branch in branches] - if "master" in branches: - branch_name = "master" - elif not branches: - branch_name = None - else: - # Pick the first branch that is returned. Good or bad. - branch_name = branches[0] - - pieces["branch"] = branch_name - - # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] - # TAG might have hyphens. - git_describe = describe_out - - # look for -dirty suffix - dirty = git_describe.endswith("-dirty") - pieces["dirty"] = dirty - if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] - - # now we have TAG-NUM-gHEX or HEX - - if "-" in git_describe: - # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) - if not mo: - # unparsable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%%s'" - %% describe_out) - return pieces - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%%s' doesn't start with prefix '%%s'" - print(fmt %% (full_tag, tag_prefix)) - pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'" - %% (full_tag, tag_prefix)) - return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] - - # distance: number of commits since tag - pieces["distance"] = int(mo.group(2)) - - # commit: short hex revision ID - pieces["short"] = mo.group(3) - - else: - # HEX: no tags - pieces["closest-tag"] = None - count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root) - pieces["distance"] = int(count_out) # total number of commits - - # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = runner(GITS, ["show", "-s", "--format=%%ci", "HEAD"], cwd=root)[0].strip() - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - - return pieces - - -def plus_or_dot(pieces): - """Return a + if we don't already have one, else return a .""" - if "+" in pieces.get("closest-tag", ""): - return "." - return "+" - - -def render_pep440(pieces): - """Build up version string, with post-release "local version identifier". - - Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you - get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty - - Exceptions: - 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += plus_or_dot(pieces) - rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_branch(pieces): - """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . - - The ".dev0" means not master branch. Note that .dev0 sorts backwards - (a feature branch will appear "older" than the master branch). - - Exceptions: - 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0" - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+untagged.%%d.g%%s" %% (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def pep440_split_post(ver): - """Split pep440 version string at the post-release segment. - - Returns the release segments before the post-release and the - post-release version number (or -1 if no post-release segment is present). - """ - vc = str.split(ver, ".post") - return vc[0], int(vc[1] or 0) if len(vc) == 2 else None - - -def render_pep440_pre(pieces): - """TAG[.postN.devDISTANCE] -- No -dirty. - - Exceptions: - 1: no tags. 0.post0.devDISTANCE - """ - if pieces["closest-tag"]: - if pieces["distance"]: - # update the post release segment - tag_version, post_version = pep440_split_post(pieces["closest-tag"]) - rendered = tag_version - if post_version is not None: - rendered += ".post%%d.dev%%d" %% (post_version+1, pieces["distance"]) - else: - rendered += ".post0.dev%%d" %% (pieces["distance"]) - else: - # no commits, use the tag as the version - rendered = pieces["closest-tag"] - else: - # exception #1 - rendered = "0.post0.dev%%d" %% pieces["distance"] - return rendered - - -def render_pep440_post(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX] . - - The ".dev0" means dirty. Note that .dev0 sorts backwards - (a dirty tree will appear "older" than the corresponding clean one), - but you shouldn't be releasing software with -dirty anyways. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%%s" %% pieces["short"] - else: - # exception #1 - rendered = "0.post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += "+g%%s" %% pieces["short"] - return rendered - - -def render_pep440_post_branch(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . - - The ".dev0" means not master branch. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%%d" %% pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%%s" %% pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0.post%%d" %% pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+g%%s" %% pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_old(pieces): - """TAG[.postDISTANCE[.dev0]] . - - The ".dev0" means dirty. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - else: - # exception #1 - rendered = "0.post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - return rendered - - -def render_git_describe(pieces): - """TAG[-DISTANCE-gHEX][-dirty]. - - Like 'git describe --tags --dirty --always'. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render_git_describe_long(pieces): - """TAG-DISTANCE-gHEX[-dirty]. - - Like 'git describe --tags --dirty --always -long'. - The distance/hash is unconditional. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render(pieces, style): - """Render the given version pieces into the requested style.""" - if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} - - if not style or style == "default": - style = "pep440" # the default - - if style == "pep440": - rendered = render_pep440(pieces) - elif style == "pep440-branch": - rendered = render_pep440_branch(pieces) - elif style == "pep440-pre": - rendered = render_pep440_pre(pieces) - elif style == "pep440-post": - rendered = render_pep440_post(pieces) - elif style == "pep440-post-branch": - rendered = render_pep440_post_branch(pieces) - elif style == "pep440-old": - rendered = render_pep440_old(pieces) - elif style == "git-describe": - rendered = render_git_describe(pieces) - elif style == "git-describe-long": - rendered = render_git_describe_long(pieces) - else: - raise ValueError("unknown style '%%s'" %% style) - - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} - - -def get_versions(): - """Get version information or return default if unable to do so.""" - # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have - # __file__, we can work backwards from there to the root. Some - # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which - # case we can only use expanded keywords. - - cfg = get_config() - verbose = cfg.verbose - - try: - return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, - verbose) - except NotThisMethod: - pass - - try: - root = os.path.realpath(__file__) - # versionfile_source is the relative path from the top of the source - # tree (where the .git directory might live) to this file. Invert - # this to find the root from __file__. - for _ in cfg.versionfile_source.split('/'): - root = os.path.dirname(root) - except NameError: - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to find root of source tree", - "date": None} - - try: - pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) - return render(pieces, cfg.style) - except NotThisMethod: - pass - - try: - if cfg.parentdir_prefix: - return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) - except NotThisMethod: - pass - - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to compute version", "date": None} -''' - - -@register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): - """Extract version information from the given file.""" - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords = {} - try: - with open(versionfile_abs) as fobj: - for line in fobj: - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - except OSError: - pass - return keywords - - -@register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): - """Get version information from git keywords.""" - if "refnames" not in keywords: - raise NotThisMethod("Short version file found") - date = keywords.get("date") - if date is not None: - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - - # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant - # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 - # -like" string, which we must then edit to make compliant), because - # it's been around since git-1.5.3, and it's too difficult to - # discover which version we're using, or to work around using an - # older one. - date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = {r.strip() for r in refnames.strip("()").split(",")} - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = {r for r in refs if re.search(r"\d", r)} - if verbose: - print("discarding '%s', no digits" % ",".join(refs - tags)) - if verbose: - print("likely tags: %s" % ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix) :] - # Filter out refs that exactly match prefix or that don't start - # with a number once the prefix is stripped (mostly a concern - # when prefix is '') - if not re.match(r"\d", r): - continue - if verbose: - print("picking %s" % r) - return { - "version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, - "error": None, - "date": date, - } - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return { - "version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, - "error": "no suitable tags", - "date": None, - } - - -@register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): - """Get version from 'git describe' in the root of the source tree. - - This only gets called if the git-archive 'subst' keywords were *not* - expanded, and _version.py hasn't already been rewritten with a short - version string, meaning we're inside a checked out source tree. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - - # GIT_DIR can interfere with correct operation of Versioneer. - # It may be intended to be passed to the Versioneer-versioned project, - # but that should not change where we get our version from. - env = os.environ.copy() - env.pop("GIT_DIR", None) - runner = functools.partial(runner, env=env) - - _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) - if rc != 0: - if verbose: - print("Directory %s not under git control" % root) - raise NotThisMethod("'git rev-parse --git-dir' returned error") - - MATCH_ARGS = ["--match", "%s*" % tag_prefix] if tag_prefix else [] - - # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] - # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = runner( - GITS, ["describe", "--tags", "--dirty", "--always", "--long", *MATCH_ARGS], cwd=root - ) - # --long was added in git-1.5.5 - if describe_out is None: - raise NotThisMethod("'git describe' failed") - describe_out = describe_out.strip() - full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) - if full_out is None: - raise NotThisMethod("'git rev-parse' failed") - full_out = full_out.strip() - - pieces = {} - pieces["long"] = full_out - pieces["short"] = full_out[:7] # maybe improved later - pieces["error"] = None - - branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], cwd=root) - # --abbrev-ref was added in git-1.6.3 - if rc != 0 or branch_name is None: - raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") - branch_name = branch_name.strip() - - if branch_name == "HEAD": - # If we aren't exactly on a branch, pick a branch which represents - # the current commit. If all else fails, we are on a branchless - # commit. - branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) - # --contains was added in git-1.5.4 - if rc != 0 or branches is None: - raise NotThisMethod("'git branch --contains' returned error") - branches = branches.split("\n") - - # Remove the first line if we're running detached - if "(" in branches[0]: - branches.pop(0) - - # Strip off the leading "* " from the list of branches. - branches = [branch[2:] for branch in branches] - if "master" in branches: - branch_name = "master" - elif not branches: - branch_name = None - else: - # Pick the first branch that is returned. Good or bad. - branch_name = branches[0] - - pieces["branch"] = branch_name - - # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] - # TAG might have hyphens. - git_describe = describe_out - - # look for -dirty suffix - dirty = git_describe.endswith("-dirty") - pieces["dirty"] = dirty - if dirty: - git_describe = git_describe[: git_describe.rindex("-dirty")] - - # now we have TAG-NUM-gHEX or HEX - - if "-" in git_describe: - # TAG-NUM-gHEX - mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) - if not mo: - # unparsable. Maybe git-describe is misbehaving? - pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out - return pieces - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%s' doesn't start with prefix '%s'" - print(fmt % (full_tag, tag_prefix)) - pieces["error"] = f"tag '{full_tag}' doesn't start with prefix '{tag_prefix}'" - return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix) :] - - # distance: number of commits since tag - pieces["distance"] = int(mo.group(2)) - - # commit: short hex revision ID - pieces["short"] = mo.group(3) - - else: - # HEX: no tags - pieces["closest-tag"] = None - count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root) - pieces["distance"] = int(count_out) # total number of commits - - # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - - return pieces - - -def do_vcs_install(manifest_in, versionfile_source, ipy): - """Git-specific installation logic for Versioneer. - - For Git, this means creating/changing .gitattributes to mark _version.py - for export-subst keyword substitution. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - files = [manifest_in, versionfile_source] - if ipy: - files.append(ipy) - try: - my_path = __file__ - if my_path.endswith(".pyc") or my_path.endswith(".pyo"): - my_path = os.path.splitext(my_path)[0] + ".py" - versioneer_file = os.path.relpath(my_path) - except NameError: - versioneer_file = "versioneer.py" - files.append(versioneer_file) - present = False - try: - with open(".gitattributes") as fobj: - for line in fobj: - if line.strip().startswith(versionfile_source): - if "export-subst" in line.strip().split()[1:]: - present = True - break - except OSError: - pass - if not present: - with open(".gitattributes", "a+") as fobj: - fobj.write(f"{versionfile_source} export-subst\n") - files.append(".gitattributes") - run_command(GITS, ["add", "--"] + files) - - -def versions_from_parentdir(parentdir_prefix, root, verbose): - """Try to determine the version from the parent directory name. - - Source tarballs conventionally unpack into a directory that includes both - the project name and a version string. We will also support searching up - two directory levels for an appropriately named parent directory - """ - rootdirs = [] - - for _ in range(3): - dirname = os.path.basename(root) - if dirname.startswith(parentdir_prefix): - return { - "version": dirname[len(parentdir_prefix) :], - "full-revisionid": None, - "dirty": False, - "error": None, - "date": None, - } - rootdirs.append(root) - root = os.path.dirname(root) # up a level - - if verbose: - print( - "Tried directories %s but none started with prefix %s" - % (str(rootdirs), parentdir_prefix) - ) - raise NotThisMethod("rootdir doesn't start with parentdir_prefix") - - -SHORT_VERSION_PY = """ -# This file was generated by 'versioneer.py' (0.22) from -# revision-control system data, or from the parent directory name of an -# unpacked source archive. Distribution tarballs contain a pre-generated copy -# of this file. - -import json - -version_json = ''' -%s -''' # END VERSION_JSON - - -def get_versions(): - return json.loads(version_json) -""" - - -def versions_from_file(filename): - """Try to determine the version from _version.py if present.""" - try: - with open(filename) as f: - contents = f.read() - except OSError: - raise NotThisMethod("unable to read _version.py") - mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", contents, re.M | re.S) - if not mo: - mo = re.search(r"version_json = '''\r\n(.*)''' # END VERSION_JSON", contents, re.M | re.S) - if not mo: - raise NotThisMethod("no version_json in _version.py") - return json.loads(mo.group(1)) - - -def write_to_version_file(filename, versions): - """Write the given version number to the given _version.py file.""" - os.unlink(filename) - contents = json.dumps(versions, sort_keys=True, indent=1, separators=(",", ": ")) - with open(filename, "w") as f: - f.write(SHORT_VERSION_PY % contents) - - print("set {} to '{}'".format(filename, versions["version"])) - - -def plus_or_dot(pieces): - """Return a + if we don't already have one, else return a .""" - if "+" in pieces.get("closest-tag", ""): - return "." - return "+" - - -def render_pep440(pieces): - """Build up version string, with post-release "local version identifier". - - Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you - get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty - - Exceptions: - 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += plus_or_dot(pieces) - rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_branch(pieces): - """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . - - The ".dev0" means not master branch. Note that .dev0 sorts backwards - (a feature branch will appear "older" than the master branch). - - Exceptions: - 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0" - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def pep440_split_post(ver): - """Split pep440 version string at the post-release segment. - - Returns the release segments before the post-release and the - post-release version number (or -1 if no post-release segment is present). - """ - vc = str.split(ver, ".post") - return vc[0], int(vc[1] or 0) if len(vc) == 2 else None - - -def render_pep440_pre(pieces): - """TAG[.postN.devDISTANCE] -- No -dirty. - - Exceptions: - 1: no tags. 0.post0.devDISTANCE - """ - if pieces["closest-tag"]: - if pieces["distance"]: - # update the post release segment - tag_version, post_version = pep440_split_post(pieces["closest-tag"]) - rendered = tag_version - if post_version is not None: - rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"]) - else: - rendered += ".post0.dev%d" % (pieces["distance"]) - else: - # no commits, use the tag as the version - rendered = pieces["closest-tag"] - else: - # exception #1 - rendered = "0.post0.dev%d" % pieces["distance"] - return rendered - - -def render_pep440_post(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX] . - - The ".dev0" means dirty. Note that .dev0 sorts backwards - (a dirty tree will appear "older" than the corresponding clean one), - but you shouldn't be releasing software with -dirty anyways. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += "+g%s" % pieces["short"] - return rendered - - -def render_pep440_post_branch(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . - - The ".dev0" means not master branch. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+g%s" % pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_old(pieces): - """TAG[.postDISTANCE[.dev0]] . - - The ".dev0" means dirty. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - return rendered - - -def render_git_describe(pieces): - """TAG[-DISTANCE-gHEX][-dirty]. - - Like 'git describe --tags --dirty --always'. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render_git_describe_long(pieces): - """TAG-DISTANCE-gHEX[-dirty]. - - Like 'git describe --tags --dirty --always -long'. - The distance/hash is unconditional. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render(pieces, style): - """Render the given version pieces into the requested style.""" - if pieces["error"]: - return { - "version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None, - } - - if not style or style == "default": - style = "pep440" # the default - - if style == "pep440": - rendered = render_pep440(pieces) - elif style == "pep440-branch": - rendered = render_pep440_branch(pieces) - elif style == "pep440-pre": - rendered = render_pep440_pre(pieces) - elif style == "pep440-post": - rendered = render_pep440_post(pieces) - elif style == "pep440-post-branch": - rendered = render_pep440_post_branch(pieces) - elif style == "pep440-old": - rendered = render_pep440_old(pieces) - elif style == "git-describe": - rendered = render_git_describe(pieces) - elif style == "git-describe-long": - rendered = render_git_describe_long(pieces) - else: - raise ValueError("unknown style '%s'" % style) - - return { - "version": rendered, - "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], - "error": None, - "date": pieces.get("date"), - } - - -class VersioneerBadRootError(Exception): - """The project root directory is unknown or missing key files.""" - - -def get_versions(verbose=False): - """Get the project version from whatever source is available. - - Returns dict with two keys: 'version' and 'full'. - """ - if "versioneer" in sys.modules: - # see the discussion in cmdclass.py:get_cmdclass() - del sys.modules["versioneer"] - - root = get_root() - cfg = get_config_from_root(root) - - assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" - handlers = HANDLERS.get(cfg.VCS) - assert handlers, "unrecognized VCS '%s'" % cfg.VCS - verbose = verbose or cfg.verbose - assert cfg.versionfile_source is not None, "please set versioneer.versionfile_source" - assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" - - versionfile_abs = os.path.join(root, cfg.versionfile_source) - - # extract version from first of: _version.py, VCS command (e.g. 'git - # describe'), parentdir. This is meant to work for developers using a - # source checkout, for users of a tarball created by 'setup.py sdist', - # and for users of a tarball/zipball created by 'git archive' or github's - # download-from-tag feature or the equivalent in other VCSes. - - get_keywords_f = handlers.get("get_keywords") - from_keywords_f = handlers.get("keywords") - if get_keywords_f and from_keywords_f: - try: - keywords = get_keywords_f(versionfile_abs) - ver = from_keywords_f(keywords, cfg.tag_prefix, verbose) - if verbose: - print("got version from expanded keyword %s" % ver) - return ver - except NotThisMethod: - pass - - try: - ver = versions_from_file(versionfile_abs) - if verbose: - print(f"got version from file {versionfile_abs} {ver}") - return ver - except NotThisMethod: - pass - - from_vcs_f = handlers.get("pieces_from_vcs") - if from_vcs_f: - try: - pieces = from_vcs_f(cfg.tag_prefix, root, verbose) - ver = render(pieces, cfg.style) - if verbose: - print("got version from VCS %s" % ver) - return ver - except NotThisMethod: - pass - - try: - if cfg.parentdir_prefix: - ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose) - if verbose: - print("got version from parentdir %s" % ver) - return ver - except NotThisMethod: - pass - - if verbose: - print("unable to compute version") - - return { - "version": "0+unknown", - "full-revisionid": None, - "dirty": None, - "error": "unable to compute version", - "date": None, - } - - -def get_version(): - """Get the short version string for this project.""" - return get_versions()["version"] - - -def get_cmdclass(cmdclass=None): - """Get the custom setuptools/distutils subclasses used by Versioneer. - - If the package uses a different cmdclass (e.g. one from numpy), it - should be provide as an argument. - """ - if "versioneer" in sys.modules: - del sys.modules["versioneer"] - # this fixes the "python setup.py develop" case (also 'install' and - # 'easy_install .'), in which subdependencies of the main project are - # built (using setup.py bdist_egg) in the same python process. Assume - # a main project A and a dependency B, which use different versions - # of Versioneer. A's setup.py imports A's Versioneer, leaving it in - # sys.modules by the time B's setup.py is executed, causing B to run - # with the wrong versioneer. Setuptools wraps the sub-dep builds in a - # sandbox that restores sys.modules to it's pre-build state, so the - # parent is protected against the child's "import versioneer". By - # removing ourselves from sys.modules here, before the child build - # happens, we protect the child from the parent's versioneer too. - # Also see https://github.com/python-versioneer/python-versioneer/issues/52 - - cmds = {} if cmdclass is None else cmdclass.copy() - - # we add "version" to both distutils and setuptools - try: - from setuptools import Command - except ImportError: - from distutils.core import Command - - class cmd_version(Command): - description = "report generated version string" - user_options = [] - boolean_options = [] - - def initialize_options(self): - pass - - def finalize_options(self): - pass - - def run(self): - vers = get_versions(verbose=True) - print("Version: %s" % vers["version"]) - print(" full-revisionid: %s" % vers.get("full-revisionid")) - print(" dirty: %s" % vers.get("dirty")) - print(" date: %s" % vers.get("date")) - if vers["error"]: - print(" error: %s" % vers["error"]) - - cmds["version"] = cmd_version - - # we override "build_py" in both distutils and setuptools - # - # most invocation pathways end up running build_py: - # distutils/build -> build_py - # distutils/install -> distutils/build ->.. - # setuptools/bdist_wheel -> distutils/install ->.. - # setuptools/bdist_egg -> distutils/install_lib -> build_py - # setuptools/install -> bdist_egg ->.. - # setuptools/develop -> ? - # pip install: - # copies source tree to a tempdir before running egg_info/etc - # if .git isn't copied too, 'git describe' will fail - # then does setup.py bdist_wheel, or sometimes setup.py install - # setup.py egg_info -> ? - - # we override different "build_py" commands for both environments - if "build_py" in cmds: - _build_py = cmds["build_py"] - elif "setuptools" in sys.modules: - from setuptools.command.build_py import build_py as _build_py - else: - from distutils.command.build_py import build_py as _build_py - - class cmd_build_py(_build_py): - def run(self): - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - _build_py.run(self) - # now locate _version.py in the new build/ directory and replace - # it with an updated value - if cfg.versionfile_build: - target_versionfile = os.path.join(self.build_lib, cfg.versionfile_build) - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - - cmds["build_py"] = cmd_build_py - - if "build_ext" in cmds: - _build_ext = cmds["build_ext"] - elif "setuptools" in sys.modules: - from setuptools.command.build_ext import build_ext as _build_ext - else: - from distutils.command.build_ext import build_ext as _build_ext - - class cmd_build_ext(_build_ext): - def run(self): - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - _build_ext.run(self) - if self.inplace: - # build_ext --inplace will only build extensions in - # build/lib<..> dir with no _version.py to write to. - # As in place builds will already have a _version.py - # in the module dir, we do not need to write one. - return - # now locate _version.py in the new build/ directory and replace - # it with an updated value - target_versionfile = os.path.join(self.build_lib, cfg.versionfile_build) - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - - cmds["build_ext"] = cmd_build_ext - - if "cx_Freeze" in sys.modules: # cx_freeze enabled? - from cx_Freeze.dist import build_exe as _build_exe - - # nczeczulin reports that py2exe won't like the pep440-style string - # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. - # setup(console=[{ - # "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION - # "product_version": versioneer.get_version(), - # ... - - class cmd_build_exe(_build_exe): - def run(self): - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - target_versionfile = cfg.versionfile_source - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - - _build_exe.run(self) - os.unlink(target_versionfile) - with open(cfg.versionfile_source, "w") as f: - LONG = LONG_VERSION_PY[cfg.VCS] - f.write( - LONG - % { - "DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - } - ) - - cmds["build_exe"] = cmd_build_exe - del cmds["build_py"] - - if "py2exe" in sys.modules: # py2exe enabled? - from py2exe.distutils_buildexe import py2exe as _py2exe - - class cmd_py2exe(_py2exe): - def run(self): - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - target_versionfile = cfg.versionfile_source - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - - _py2exe.run(self) - os.unlink(target_versionfile) - with open(cfg.versionfile_source, "w") as f: - LONG = LONG_VERSION_PY[cfg.VCS] - f.write( - LONG - % { - "DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - } - ) - - cmds["py2exe"] = cmd_py2exe - - # we override different "sdist" commands for both environments - if "sdist" in cmds: - _sdist = cmds["sdist"] - elif "setuptools" in sys.modules: - from setuptools.command.sdist import sdist as _sdist - else: - from distutils.command.sdist import sdist as _sdist - - class cmd_sdist(_sdist): - def run(self): - versions = get_versions() - self._versioneer_generated_versions = versions - # unless we update this, the command will keep using the old - # version - self.distribution.metadata.version = versions["version"] - return _sdist.run(self) - - def make_release_tree(self, base_dir, files): - root = get_root() - cfg = get_config_from_root(root) - _sdist.make_release_tree(self, base_dir, files) - # now locate _version.py in the new base_dir directory - # (remembering that it may be a hardlink) and replace it with an - # updated value - target_versionfile = os.path.join(base_dir, cfg.versionfile_source) - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, self._versioneer_generated_versions) - - cmds["sdist"] = cmd_sdist - - return cmds - - -CONFIG_ERROR = """ -setup.cfg is missing the necessary Versioneer configuration. You need -a section like: - - [versioneer] - VCS = git - style = pep440 - versionfile_source = src/myproject/_version.py - versionfile_build = myproject/_version.py - tag_prefix = - parentdir_prefix = myproject- - -You will also need to edit your setup.py to use the results: - - import versioneer - setup(version=versioneer.get_version(), - cmdclass=versioneer.get_cmdclass(), ...) - -Please read the docstring in ./versioneer.py for configuration instructions, -edit setup.cfg, and re-run the installer or 'python versioneer.py setup'. -""" - -SAMPLE_CONFIG = """ -# See the docstring in versioneer.py for instructions. Note that you must -# re-run 'versioneer.py setup' after changing this section, and commit the -# resulting files. - -[versioneer] -#VCS = git -#style = pep440 -#versionfile_source = -#versionfile_build = -#tag_prefix = -#parentdir_prefix = - -""" - -OLD_SNIPPET = """ -from ._version import get_versions -__version__ = get_versions()['version'] -del get_versions -""" - -INIT_PY_SNIPPET = """ -from . import {0} -__version__ = {0}.get_versions()['version'] -""" - - -def do_setup(): - """Do main VCS-independent setup function for installing Versioneer.""" - root = get_root() - try: - cfg = get_config_from_root(root) - except (OSError, configparser.NoSectionError, configparser.NoOptionError) as e: - if isinstance(e, (OSError, configparser.NoSectionError)): - print("Adding sample versioneer config to setup.cfg", file=sys.stderr) - with open(os.path.join(root, "setup.cfg"), "a") as f: - f.write(SAMPLE_CONFIG) - print(CONFIG_ERROR, file=sys.stderr) - return 1 - - print(" creating %s" % cfg.versionfile_source) - with open(cfg.versionfile_source, "w") as f: - LONG = LONG_VERSION_PY[cfg.VCS] - f.write( - LONG - % { - "DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - } - ) - - ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") - if os.path.exists(ipy): - try: - with open(ipy) as f: - old = f.read() - except OSError: - old = "" - module = os.path.splitext(os.path.basename(cfg.versionfile_source))[0] - snippet = INIT_PY_SNIPPET.format(module) - if OLD_SNIPPET in old: - print(" replacing boilerplate in %s" % ipy) - with open(ipy, "w") as f: - f.write(old.replace(OLD_SNIPPET, snippet)) - elif snippet not in old: - print(" appending to %s" % ipy) - with open(ipy, "a") as f: - f.write(snippet) - else: - print(" %s unmodified" % ipy) - else: - print(" %s doesn't exist, ok" % ipy) - ipy = None - - # Make sure both the top-level "versioneer.py" and versionfile_source - # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so - # they'll be copied into source distributions. Pip won't be able to - # install the package without this. - manifest_in = os.path.join(root, "MANIFEST.in") - simple_includes = set() - try: - with open(manifest_in) as f: - for line in f: - if line.startswith("include "): - for include in line.split()[1:]: - simple_includes.add(include) - except OSError: - pass - # That doesn't cover everything MANIFEST.in can do - # (http://docs.python.org/2/distutils/sourcedist.html#commands), so - # it might give some false negatives. Appending redundant 'include' - # lines is safe, though. - if "versioneer.py" not in simple_includes: - print(" appending 'versioneer.py' to MANIFEST.in") - with open(manifest_in, "a") as f: - f.write("include versioneer.py\n") - else: - print(" 'versioneer.py' already in MANIFEST.in") - if cfg.versionfile_source not in simple_includes: - print(" appending versionfile_source ('%s') to MANIFEST.in" % cfg.versionfile_source) - with open(manifest_in, "a") as f: - f.write("include %s\n" % cfg.versionfile_source) - else: - print(" versionfile_source already in MANIFEST.in") - - # Make VCS-specific changes. For git, this means creating/changing - # .gitattributes to mark _version.py for export-subst keyword - # substitution. - do_vcs_install(manifest_in, cfg.versionfile_source, ipy) - return 0 - - -def scan_setup_py(): - """Validate the contents of setup.py against Versioneer's expectations.""" - found = set() - setters = False - errors = 0 - with open("setup.py") as f: - for line in f.readlines(): - if "import versioneer" in line: - found.add("import") - if "versioneer.get_cmdclass()" in line: - found.add("cmdclass") - if "versioneer.get_version()" in line: - found.add("get_version") - if "versioneer.VCS" in line: - setters = True - if "versioneer.versionfile_source" in line: - setters = True - if len(found) != 3: - print("") - print("Your setup.py appears to be missing some important items") - print("(but I might be wrong). Please make sure it has something") - print("roughly like the following:") - print("") - print(" import versioneer") - print(" setup( version=versioneer.get_version(),") - print(" cmdclass=versioneer.get_cmdclass(), ...)") - print("") - errors += 1 - if setters: - print("You should remove lines like 'versioneer.VCS = ' and") - print("'versioneer.versionfile_source = ' . This configuration") - print("now lives in setup.cfg, and should be removed from setup.py") - print("") - errors += 1 - return errors - - -if __name__ == "__main__": - cmd = sys.argv[1] - if cmd == "setup": - errors = do_setup() - errors += scan_setup_py() - if errors: - sys.exit(1)