From e458cadb01fb1c57cdf01161472c1cf832edce18 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Sat, 18 Feb 2023 23:21:44 +0100 Subject: [PATCH 01/15] Read Matrix Market with `fast_matrix_market` --- .github/workflows/test_and_build.yml | 3 +- environment.yml | 2 +- graphblas/io.py | 75 +++++++++++++++++++++++++--- graphblas/tests/test_io.py | 54 +++++++++++++++----- pyproject.toml | 2 + scripts/check_versions.sh | 1 + 6 files changed, 115 insertions(+), 22 deletions(-) diff --git a/.github/workflows/test_and_build.yml b/.github/workflows/test_and_build.yml index ea82052f8..c85641061 100644 --- a/.github/workflows/test_and_build.yml +++ b/.github/workflows/test_and_build.yml @@ -143,6 +143,7 @@ jobs: nxver=$(python -c 'import random ; print(random.choice(["=2.7", "=2.8", "=3.0", ""]))') yamlver=$(python -c 'import random ; print(random.choice(["=5.4", "=6.0", ""]))') sparsever=$(python -c 'import random ; print(random.choice(["=0.12", "=0.13", ""]))') + fmmver=$(python -c 'import random ; print(random.choice(["=1.4", ""]))') if [[ ${{ steps.pyver.outputs.selected }} == "3.8" ]]; then npver=$(python -c 'import random ; print(random.choice(["=1.21", "=1.22", "=1.23", ""]))') spver=$(python -c 'import random ; print(random.choice(["=1.8", "=1.9", "=1.10", ""]))') @@ -188,7 +189,7 @@ jobs: # Once we have wheels for all OSes, we can delete the last two lines. mamba install pytest coverage coveralls=3.3.1 pytest-randomly cffi donfig pyyaml${yamlver} sparse${sparsever} \ - pandas${pdver} scipy${spver} numpy${npver} awkward${akver} networkx${nxver} numba${numbaver} \ + pandas${pdver} scipy${spver} numpy${npver} awkward${akver} networkx${nxver} numba${numbaver} fast_matrix_market${fmmver} \ ${{ matrix.slowtask == 'pytest_bizarro' && 'black' || '' }} \ ${{ matrix.slowtask == 'notebooks' && 'matplotlib nbconvert jupyter "ipython>=7"' || '' }} \ ${{ steps.sourcetype.outputs.selected == 'upstream' && 'cython' || '' }} \ diff --git a/environment.yml b/environment.yml index 90675527c..5153b7aae 100644 --- a/environment.yml +++ b/environment.yml @@ -23,7 +23,7 @@ dependencies: - pandas # For I/O - awkward - # - fast_matrix_market # Coming soon... + - fast_matrix_market - networkx - scipy - sparse diff --git a/graphblas/io.py b/graphblas/io.py index 32a5f3f0d..2c7cec80a 100644 --- a/graphblas/io.py +++ b/graphblas/io.py @@ -603,16 +603,27 @@ def to_pydata_sparse(A, format="coo"): return s.asformat(format) -def mmread(source, *, dup_op=None, name=None): +def mmread(source, engine="auto", *, dup_op=None, name=None, **kwargs): """Create a GraphBLAS Matrix from the contents of a Matrix Market file. This uses `scipy.io.mmread - `_. + `_ + or `fast_matrix_market.mmread + `_. + + By default, ``fast_matrix_market`` will be used if available, because it + is faster. Additional keyword arguments in ``**kwargs`` will be passed + to the engine's ``mmread``. For example, ``parallelism=8`` will set the + number of threads to use to 8 when using ``fast_matrix_market``. Parameters ---------- - filename : str or file + source : str or file Filename (.mtx or .mtz.gz) or file-like object + engine : {"auto", "scipy", "fmm", "fast_matrix_market"}, default "auto" + How to read the matrix market file. "scipy" uses ``scipy.io.mmread``, + "fmm" and "fast_matrix_market" uses ``fast_matrix_market.mmread``, + and "auto" will use "fast_matrix_market" if available. dup_op : BinaryOp, optional Aggregation function for duplicate coordinates (if found) name : str, optional @@ -627,7 +638,21 @@ def mmread(source, *, dup_op=None, name=None): from scipy.sparse import isspmatrix_coo except ImportError: # pragma: no cover (import) raise ImportError("scipy is required to read Matrix Market files") from None - array = mmread(source) + engine = engine.lower() + if engine in {"auto", "fmm", "fast_matrix_market"}: + try: + from fast_matrix_market import mmread # noqa: F811 + except ImportError: # pragma: no cover (import) + if engine != "auto": + raise ImportError( + "fast_matrix_market is required to read Matrix Market files " + f'using the "{engine}" engine' + ) from None + elif engine != "scipy": + raise ValueError( + f'Bad engine value: {engine!r}. Must be "auto", "scipy", "fmm", or "fast_matrix_market"' + ) + array = mmread(source, **kwargs) if isspmatrix_coo(array): nrows, ncols = array.shape return _Matrix.from_coo( @@ -636,7 +661,17 @@ def mmread(source, *, dup_op=None, name=None): return _Matrix.from_dense(array, name=name) -def mmwrite(target, matrix, *, comment="", field=None, precision=None, symmetry=None): +def mmwrite( + target, + matrix, + engine="auto", + *, + comment="", + field=None, + precision=None, + symmetry=None, + **kwargs, +): """Write a Matrix Market file from the contents of a GraphBLAS Matrix. This uses `scipy.io.mmwrite @@ -644,10 +679,14 @@ def mmwrite(target, matrix, *, comment="", field=None, precision=None, symmetry= Parameters ---------- - filename : str or file target + target : str or file target Filename (.mtx) or file-like object opened for writing matrix : Matrix Matrix to be written + engine : {"auto", "scipy", "fmm", "fast_matrix_market"}, default "auto" + How to read the matrix market file. "scipy" uses ``scipy.io.mmwrite``, + "fmm" and "fast_matrix_market" uses ``fast_matrix_market.mmwrite``, + and "auto" will use "fast_matrix_market" if available. comment : str, optional Comments to be prepended to the Matrix Market file field : str @@ -661,8 +700,30 @@ def mmwrite(target, matrix, *, comment="", field=None, precision=None, symmetry= from scipy.io import mmwrite except ImportError: # pragma: no cover (import) raise ImportError("scipy is required to write Matrix Market files") from None + engine = engine.lower() + if engine in {"auto", "fmm", "fast_matrix_market"}: + try: + from fast_matrix_market import mmwrite # noqa: F811 + except ImportError: # pragma: no cover (import) + if engine != "auto": + raise ImportError( + "fast_matrix_market is required to write Matrix Market files " + f'using the "{engine}" engine' + ) from None + elif engine != "scipy": + raise ValueError( + f'Bad engine value: {engine!r}. Must be "auto", "scipy", "fmm", or "fast_matrix_market"' + ) if _backend == "suitesparse" and matrix.ss.format in {"fullr", "fullc"}: array = matrix.ss.export()["values"] else: array = to_scipy_sparse(matrix, format="coo") - mmwrite(target, array, comment=comment, field=field, precision=precision, symmetry=symmetry) + mmwrite( + target, + array, + comment=comment, + field=field, + precision=precision, + symmetry=symmetry, + **kwargs, + ) diff --git a/graphblas/tests/test_io.py b/graphblas/tests/test_io.py index 0f78430c1..c8b1cfc28 100644 --- a/graphblas/tests/test_io.py +++ b/graphblas/tests/test_io.py @@ -30,6 +30,10 @@ except ImportError: # pragma: no cover (import) ak = None +try: + import fast_matrix_market as fmm +except ImportError: # pragma: no cover (import) + fmm = None suitesparse = gb.backend == "suitesparse" @@ -161,7 +165,10 @@ def test_matrix_to_from_networkx(): @pytest.mark.skipif("not ss") -def test_mmread_mmwrite(): +@pytest.mark.parametrize("engine", ["auto", "scipy", "fmm"]) +def test_mmread_mmwrite(engine): + if engine == "fmm" and fmm is None: # pragma: no cover (import) + pytest.skip("needs fast_matrix_market") from scipy.io.tests import test_mmio p31 = 2**31 @@ -258,10 +265,16 @@ def test_mmread_mmwrite(): continue mm_in = StringIO(getattr(test_mmio, example)) if over64: - with pytest.raises(OverflowError): - M = gb.io.mmread(mm_in) + with pytest.raises((OverflowError, ValueError)): + # fast_matrix_market v1.4.5 raises ValueError instead of OverflowError + M = gb.io.mmread(mm_in, engine) else: - M = gb.io.mmread(mm_in) + if example == "_empty_lines_example" and engine in {"fmm", "auto"} and fmm is not None: + # TODO MAINT: is this a bug in fast_matrix_market, or does scipy.io.mmread + # read an invalid file? `fast_matrix_market` v1.4.5 does not handle this. + continue + print(example) + M = gb.io.mmread(mm_in, engine) if not M.isequal(expected): # pragma: no cover (debug) print(example) print("Expected:") @@ -270,12 +283,12 @@ def test_mmread_mmwrite(): print(M) raise AssertionError("Matrix M not as expected. See print output above") mm_out = BytesIO() - gb.io.mmwrite(mm_out, M) + gb.io.mmwrite(mm_out, M, engine) mm_out.flush() mm_out.seek(0) mm_out_str = b"".join(mm_out.readlines()).decode() mm_out.seek(0) - M2 = gb.io.mmread(mm_out) + M2 = gb.io.mmread(mm_out, engine) if not M2.isequal(expected): # pragma: no cover (debug) print(example) print("Expected:") @@ -304,23 +317,38 @@ def test_from_scipy_sparse_duplicates(): @pytest.mark.skipif("not ss") -def test_matrix_market_sparse_duplicates(): - mm = StringIO( - """%%MatrixMarket matrix coordinate real general +@pytest.mark.parametrize("engine", ["auto", "scipy", "fast_matrix_market"]) +def test_matrix_market_sparse_duplicates(engine): + if engine == "fast_matrix_market" and fmm is None: # pragma: no cover (import) + pytest.skip("needs fast_matrix_market") + string = """%%MatrixMarket matrix coordinate real general 3 3 4 1 3 1 2 2 2 3 1 3 3 1 4""" - ) + mm = StringIO(string) with pytest.raises(ValueError, match="Duplicate indices found"): - gb.io.mmread(mm) - mm.seek(0) - a = gb.io.mmread(mm, dup_op=gb.binary.plus) + gb.io.mmread(mm, engine) + # mm.seek(0) # Doesn't work with `fast_matrix_market` 1.4.5 + mm = StringIO(string) + a = gb.io.mmread(mm, engine, dup_op=gb.binary.plus) expected = gb.Matrix.from_coo([0, 1, 2], [2, 1, 0], [1, 2, 7]) assert a.isequal(expected) +@pytest.mark.skipif("not ss") +def test_matrix_market_bad_engine(): + A = gb.Matrix.from_coo([0, 0, 3, 5], [1, 4, 0, 2], [1, 0, 2, -1], nrows=7, ncols=6) + with pytest.raises(ValueError, match="Bad engine value"): + gb.io.mmwrite(BytesIO(), A, engine="bad_engine") + mm_out = BytesIO() + gb.io.mmwrite(mm_out, A) + mm_out.seek(0) + with pytest.raises(ValueError, match="Bad engine value"): + gb.io.mmread(mm_out, engine="bad_engine") + + @pytest.mark.skipif("not ss") def test_scipy_sparse(): a = np.arange(12).reshape(3, 4) diff --git a/pyproject.toml b/pyproject.toml index 7dea49e98..16d8b5cc2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,6 +70,7 @@ io = [ "scipy >=1.8", "awkward >=1.9", "sparse >=0.12", + "fast-matrix-market >=1.4.5", ] viz = [ "matplotlib >=3.5", @@ -85,6 +86,7 @@ complete = [ "scipy >=1.8", "awkward >=1.9", "sparse >=0.12", + "fast-matrix-market >=1.4.5", "matplotlib >=3.5", "pytest", ] diff --git a/scripts/check_versions.sh b/scripts/check_versions.sh index ff7c88b32..8b356d834 100755 --- a/scripts/check_versions.sh +++ b/scripts/check_versions.sh @@ -8,6 +8,7 @@ conda search 'pandas[channel=conda-forge]>=1.5.3' conda search 'scipy[channel=conda-forge]>=1.10.0' conda search 'networkx[channel=conda-forge]>=3.0' conda search 'awkward[channel=conda-forge]>=2.0.8' +conda search 'fast_matrix_market[channel=conda-forge]>=1.4.5' conda search 'numba[channel=conda-forge]>=0.56.4' conda search 'pyyaml[channel=conda-forge]>=6.0' conda search 'flake8-comprehensions[channel=conda-forge]>=3.10.1' From 8795df1eb171aa689729ef92c77dec4f88a7a722 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Sun, 19 Feb 2023 05:06:36 +0100 Subject: [PATCH 02/15] oops print statement --- graphblas/tests/test_io.py | 1 - 1 file changed, 1 deletion(-) diff --git a/graphblas/tests/test_io.py b/graphblas/tests/test_io.py index c8b1cfc28..b546c5457 100644 --- a/graphblas/tests/test_io.py +++ b/graphblas/tests/test_io.py @@ -273,7 +273,6 @@ def test_mmread_mmwrite(engine): # TODO MAINT: is this a bug in fast_matrix_market, or does scipy.io.mmread # read an invalid file? `fast_matrix_market` v1.4.5 does not handle this. continue - print(example) M = gb.io.mmread(mm_in, engine) if not M.isequal(expected): # pragma: no cover (debug) print(example) From faaf1c7b42f87f1a37783ee703ca772407dcd5a5 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Fri, 24 Feb 2023 17:43:18 +0100 Subject: [PATCH 03/15] Update usage of `from_dense` and `to_dense` in docs and notebooks --- .pre-commit-config.yaml | 2 +- README.md | 12 +++++++----- docs/user_guide/io.rst | 14 +++++++------- graphblas/core/operator.py | 9 ++++----- graphblas/dtypes.py | 3 +-- graphblas/viz.py | 2 +- notebooks/Louvain.ipynb | 6 +++--- pyproject.toml | 1 + 8 files changed, 25 insertions(+), 24 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index df37d4461..0c9c94988 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -71,7 +71,7 @@ repos: additional_dependencies: [tomli] files: ^(graphblas|docs)/ - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.249 + rev: v0.0.252 hooks: - id: ruff - repo: https://github.com/sphinx-contrib/sphinx-lint diff --git a/README.md b/README.md index 0b8318b3f..34c1c1994 100644 --- a/README.md +++ b/README.md @@ -186,11 +186,6 @@ Similar methods exist for BinaryOp, Monoid, and Semiring. ```python import graphblas as gb -# numpy arrays -# 1-D array becomes Vector, 2-D array becomes Matrix -A = gb.io.from_numpy(m) -m = gb.io.to_numpy(A) - # scipy.sparse matrices A = gb.io.from_scipy_sparse(m) m = gb.io.to_scipy_sparse(m, format='csr') @@ -198,4 +193,11 @@ m = gb.io.to_scipy_sparse(m, format='csr') # networkx graphs A = gb.io.from_networkx(g) g = gb.io.to_networkx(A) + +# numpy arrays can use `from_dense` and `to_dense` on Vector and Matrix +v = gb.Vector.from_dense(m) +m = v.to_dense() + +A = gb.Matrix.from_dense(m, missing_value=0) +m = A.to_dense(fill_value=0) ``` diff --git a/docs/user_guide/io.rst b/docs/user_guide/io.rst index 52211c465..9431ff413 100644 --- a/docs/user_guide/io.rst +++ b/docs/user_guide/io.rst @@ -14,7 +14,7 @@ Matrix and Vector, instead, have a ``.from_coo()`` and a ``.to_coo()`` method. ``.from_coo()`` takes index(es) and values as either: - Python lists - - Numpy arrays + - NumPy arrays If no dtype is provided, the data type is inferred from the values. @@ -77,17 +77,17 @@ A python-graphblas Matrix can be created from a 2-D (PyData) sparse array or mat ``gb.io.to_pydata_sparse()`` will output a 2-D (PyData) sparse array given a python-graphblas Matrix. The sparse format can be specified. It defaults to "coo". -Numpy (Dense) +NumPy (Dense) ------------- While not useful for very large graphs, converting to and from small dense numpy arrays can be useful. -``gb.io.from_numpy()`` will convert a 1-D array into a Vector and a 2-D array into a Matrix. When converting -from numpy, zeros are treated as missing values. - -``gb.io.to_numpy()`` will convert a Vector or Matrix into the dense equivalent in numpy, filling missing -values with zero. +``Vector.from_dense()`` converts a 1-D array into a Vector and +``Matrix.from_dense()`` a 2-D array into a Matrix. When converting from numpy, a value may be +chosen to become a missing value, such as ``Matrix.from_dense(a, missing_value=0)``. +``.to_dense()`` converts a Vector or Matrix into a numpy array. If there are missing values, a fill +value should be given such as ``.to_dense(fill_value=0)``. SuiteSparse Export/Import ------------------------- diff --git a/graphblas/core/operator.py b/graphblas/core/operator.py index eca7c9d75..bfd03d9df 100644 --- a/graphblas/core/operator.py +++ b/graphblas/core/operator.py @@ -941,8 +941,7 @@ def __contains__(self, type_): self[type_] except (TypeError, KeyError, numba.NumbaError): return False - else: - return True + return True @classmethod def _remove_nesting(cls, funcname, *, module=None, modname=None, strict=True): @@ -2740,9 +2739,9 @@ def _initialize(cls): cur_op._typed_ops[dtype] = bool_op # Builtin monoids that are idempotent; i.e., `op(x, x) == x` for any x - for name in {"any", "band", "bor", "land", "lor", "max", "min"}: + for name in ["any", "band", "bor", "land", "lor", "max", "min"]: getattr(monoid, name)._is_idempotent = True - for name in { + for name in [ "bitwise_and", "bitwise_or", "fmax", @@ -2752,7 +2751,7 @@ def _initialize(cls): "logical_or", "maximum", "minimum", - }: + ]: getattr(monoid.numpy, name)._is_idempotent = True # Allow some functions to work on UDTs diff --git a/graphblas/dtypes.py b/graphblas/dtypes.py index 2f8b40e43..22d98b8f1 100644 --- a/graphblas/dtypes.py +++ b/graphblas/dtypes.py @@ -44,8 +44,7 @@ def __lt__(self, other): t2 = lookup_dtype(other).np_type except ValueError: raise TypeError(f"Invalid or unknown datatype: {other}") from None - else: - return (t1.kind, t1.itemsize, t1.name) < (t2.kind, t2.itemsize, t2.name) + return (t1.kind, t1.itemsize, t1.name) < (t2.kind, t2.itemsize, t2.name) def __reduce__(self): if self._is_udt: diff --git a/graphblas/viz.py b/graphblas/viz.py index 72e18361a..89010bc3d 100644 --- a/graphblas/viz.py +++ b/graphblas/viz.py @@ -182,7 +182,7 @@ def datashade(M, agg="count", *, width=None, height=None, opts_kwargs=None, **kw images.extend(image_row) return hv.Layout(images).cols(ncols) - kwds = dict( # noqa: C408 + kwds = dict( # noqa: C408 pylint: disable=use-dict-literal x="col", y="row", c="val", diff --git a/notebooks/Louvain.ipynb b/notebooks/Louvain.ipynb index f1b042a49..eb523ef9e 100644 --- a/notebooks/Louvain.ipynb +++ b/notebooks/Louvain.ipynb @@ -270,7 +270,7 @@ " [0, 0, 1, 0, 0, 1, 0],\n", " ]\n", ")\n", - "g = gb.io.from_numpy(m)" + "g = Matrix.from_dense(m, missing_value=0)" ] }, { @@ -370,7 +370,7 @@ " [0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", " ]\n", ")\n", - "g = gb.io.from_numpy(m)" + "g = Matrix.from_dense(m, missing_value=0)" ] }, { @@ -469,7 +469,7 @@ " [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],\n", " ]\n", ")\n", - "g = gb.io.from_numpy(m)" + "g = Matrix.from_dense(m, missing_value=0)" ] }, { diff --git a/pyproject.toml b/pyproject.toml index 8e673eb81..7c2d7dba0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -273,6 +273,7 @@ ignore = [ "PLR0913", # Too many arguments to function call "PLR0915", # Too many statements "PLR2004", # Magic number used in comparison, consider replacing magic with a constant variable + "PLW2901", # Outer for loop variable ... overwritten by inner assignment target (Note: good advice, but too strict) "RET502", # Do not implicitly `return None` in function able to return non-`None` value "RET503", # Missing explicit `return` at the end of function able to return non-`None` value "RET504", # Unnecessary variable assignment before `return` statement From 00c653969013636824ce34c96ea79b38f7297d37 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Fri, 24 Feb 2023 22:46:14 +0100 Subject: [PATCH 04/15] Ignore warning from new version of pydata/sparse (we should investigate later) --- .github/workflows/test_and_build.yml | 2 +- pyproject.toml | 3 +++ scripts/check_versions.sh | 3 ++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test_and_build.yml b/.github/workflows/test_and_build.yml index c85641061..c4474dd8a 100644 --- a/.github/workflows/test_and_build.yml +++ b/.github/workflows/test_and_build.yml @@ -142,7 +142,7 @@ jobs: # Consider removing old versions when they become problematic or very old (>=2 years). nxver=$(python -c 'import random ; print(random.choice(["=2.7", "=2.8", "=3.0", ""]))') yamlver=$(python -c 'import random ; print(random.choice(["=5.4", "=6.0", ""]))') - sparsever=$(python -c 'import random ; print(random.choice(["=0.12", "=0.13", ""]))') + sparsever=$(python -c 'import random ; print(random.choice(["=0.12", "=0.13", "=0.14", ""]))') fmmver=$(python -c 'import random ; print(random.choice(["=1.4", ""]))') if [[ ${{ steps.pyver.outputs.selected }} == "3.8" ]]; then npver=$(python -c 'import random ; print(random.choice(["=1.21", "=1.22", "=1.23", ""]))') diff --git a/pyproject.toml b/pyproject.toml index 7c2d7dba0..c7d7b30b8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -149,6 +149,9 @@ filterwarnings = [ "error", # MAINT: we can drop support for sparse <0.13 at any time "ignore:`np.bool` is a deprecated alias:DeprecationWarning:sparse._umath", # sparse <0.13 + # sparse 0.14.0 (2022-02-24) began raising this warning; we should investigate to understand why. + "ignore:coords should be an ndarray. This will raise a ValueError:DeprecationWarning:sparse._coo.core", + # setuptools v67.3.0 deprecated `pkg_resources.declare_namespace` on 13 Feb 2023. See: # https://setuptools.pypa.io/en/latest/history.html#v67-3-0 # MAINT: check if this is still necessary in 2025 diff --git a/scripts/check_versions.sh b/scripts/check_versions.sh index 8b356d834..714109951 100755 --- a/scripts/check_versions.sh +++ b/scripts/check_versions.sh @@ -5,9 +5,10 @@ # Tip: add `--json` for more information. conda search 'numpy[channel=conda-forge]>=1.24.2' conda search 'pandas[channel=conda-forge]>=1.5.3' -conda search 'scipy[channel=conda-forge]>=1.10.0' +conda search 'scipy[channel=conda-forge]>=1.10.1' conda search 'networkx[channel=conda-forge]>=3.0' conda search 'awkward[channel=conda-forge]>=2.0.8' +conda search 'sparse[channel=conda-forge]>=0.14.0' conda search 'fast_matrix_market[channel=conda-forge]>=1.4.5' conda search 'numba[channel=conda-forge]>=0.56.4' conda search 'pyyaml[channel=conda-forge]>=6.0' From 632ebefebcdf32837d7de30577a057039e1ac43d Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Sat, 25 Feb 2023 14:19:21 +0100 Subject: [PATCH 05/15] `to_pydata_sparse(v)` on Vector should create 1-d array --- graphblas/io.py | 17 ++++++++++------- graphblas/tests/test_io.py | 9 ++++++--- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/graphblas/io.py b/graphblas/io.py index 107d1d09b..fa9a0e5e7 100644 --- a/graphblas/io.py +++ b/graphblas/io.py @@ -550,14 +550,17 @@ def to_pydata_sparse(A, format="coo"): if format not in {"coo", "dok", "gcxs"}: raise ValueError(f"Invalid format: {format}") - if format == "gcxs": - B = to_scipy_sparse(A, format="csr") + if _output_type(A) is _Vector: + indices, values = A.to_coo(sort=False) + s = COO(indices, values, shape=A.shape) else: - # obtain an intermediate conversion via hardcoded 'coo' intermediate object - B = to_scipy_sparse(A, format="coo") - - # convert to pydata.sparse - s = COO.from_scipy_sparse(B) + if format == "gcxs": + B = to_scipy_sparse(A, format="csr") + else: + # obtain an intermediate conversion via hardcoded 'coo' intermediate object + B = to_scipy_sparse(A, format="coo") + # convert to pydata.sparse + s = COO.from_scipy_sparse(B) # express in the desired format return s.asformat(format) diff --git a/graphblas/tests/test_io.py b/graphblas/tests/test_io.py index 404acac44..ada092025 100644 --- a/graphblas/tests/test_io.py +++ b/graphblas/tests/test_io.py @@ -433,7 +433,8 @@ def test_vector_to_from_pydata_sparse(): assert v.isequal(gb.Vector.from_coo(coords, data, dtype=dtypes.INT64), check_dtype=True) t = gb.io.to_pydata_sparse(v) - assert t == s + assert t.shape == s.shape + assert (t == s).all() @pytest.mark.skipif("not sparse") @@ -445,7 +446,8 @@ def test_matrix_to_from_pydata_sparse(): assert v.isequal(gb.Matrix.from_coo(*coords, data, dtype=dtypes.INT64), check_dtype=False) t = gb.io.to_pydata_sparse(v) - assert t == s + assert t.shape == s.shape + assert (t == s).all() # test ndim e = sparse.random(shape=(5, 5, 5), density=0) @@ -464,7 +466,8 @@ def test_matrix_to_from_pydata_sparse(): assert w.isequal(gb.Matrix.from_coo(*coords, data, dtype=dtypes.INT64), check_dtype=False) r = gb.io.to_pydata_sparse(w, format="gcxs") - assert r == g + assert r.shape == g.shape + assert (r == g).all() with pytest.raises(ValueError, match="format"): gb.io.to_pydata_sparse(w, format="badformat") with pytest.raises(TypeError, match="sparse.pydata"): From 64d32a03d7ba77c381b86eea88e2452d73a89f4b Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Mon, 27 Feb 2023 22:20:16 +0100 Subject: [PATCH 06/15] ruff 253 --- .pre-commit-config.yaml | 2 +- graphblas/core/expr.py | 5 ++--- graphblas/core/matrix.py | 11 ++++------- graphblas/core/ss/matrix.py | 31 +++++++++++-------------------- graphblas/core/ss/vector.py | 15 ++++++--------- graphblas/core/vector.py | 9 ++++----- graphblas/tests/test_matrix.py | 15 +++++++-------- pyproject.toml | 1 + 8 files changed, 36 insertions(+), 53 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0c9c94988..2ea865ad9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -71,7 +71,7 @@ repos: additional_dependencies: [tomli] files: ^(graphblas|docs)/ - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.252 + rev: v0.0.253 hooks: - id: ruff - repo: https://github.com/sphinx-contrib/sphinx-lint diff --git a/graphblas/core/expr.py b/graphblas/core/expr.py index 9046795db..affe06112 100644 --- a/graphblas/core/expr.py +++ b/graphblas/core/expr.py @@ -160,9 +160,8 @@ def parse_indices(self, indices, shape): raise TypeError(f"Index for {type(self.obj).__name__} cannot be a tuple") # Convert to tuple for consistent processing indices = (indices,) - else: # len(shape) == 2 - if type(indices) is not tuple or len(indices) != 2: - raise TypeError(f"Index for {type(self.obj).__name__} must be a 2-tuple") + elif type(indices) is not tuple or len(indices) != 2: + raise TypeError(f"Index for {type(self.obj).__name__} must be a 2-tuple") out = [] for i, idx in enumerate(indices): diff --git a/graphblas/core/matrix.py b/graphblas/core/matrix.py index 8b9b4b678..1935fcee7 100644 --- a/graphblas/core/matrix.py +++ b/graphblas/core/matrix.py @@ -3154,14 +3154,11 @@ def _prep_for_assign(self, resolved_indexes, value, mask, is_submask, replace, o mask = _vanilla_subassign_mask( self, mask, rowidx, colidx, replace, opts ) + elif backend == "suitesparse": + cfunc_name = "GxB_Matrix_subassign_Scalar" else: - if backend == "suitesparse": - cfunc_name = "GxB_Matrix_subassign_Scalar" - else: - cfunc_name = "GrB_Matrix_assign_Scalar" - mask = _vanilla_subassign_mask( - self, mask, rowidx, colidx, replace, opts - ) + cfunc_name = "GrB_Matrix_assign_Scalar" + mask = _vanilla_subassign_mask(self, mask, rowidx, colidx, replace, opts) expr_repr = ( "[[{2._expr_name} rows], [{4._expr_name} cols]]" f"({mask.name})" diff --git a/graphblas/core/ss/matrix.py b/graphblas/core/ss/matrix.py index b455d760e..b1869f198 100644 --- a/graphblas/core/ss/matrix.py +++ b/graphblas/core/ss/matrix.py @@ -895,9 +895,8 @@ def _export(self, format=None, *, sort=False, give_ownership=False, raw=False, m if is_iso: if values.size > 1: # pragma: no branch (suitesparse) values = values[:1] - else: - if values.size > nvals: # pragma: no branch (suitesparse) - values = values[:nvals] + elif values.size > nvals: # pragma: no branch (suitesparse) + values = values[:nvals] # Note: nvals is also at `indptr[nrows]` rv = { "indptr": indptr, @@ -937,9 +936,8 @@ def _export(self, format=None, *, sort=False, give_ownership=False, raw=False, m if is_iso: if values.size > 1: # pragma: no cover (suitesparse) values = values[:1] - else: - if values.size > nvals: - values = values[:nvals] + elif values.size > nvals: + values = values[:nvals] # Note: nvals is also at `indptr[ncols]` rv = { "indptr": indptr, @@ -989,9 +987,8 @@ def _export(self, format=None, *, sort=False, give_ownership=False, raw=False, m if is_iso: if values.size > 1: # pragma: no cover (suitesparse) values = values[:1] - else: - if values.size > nvals: - values = values[:nvals] + elif values.size > nvals: + values = values[:nvals] # Note: nvals is also at `indptr[nvec]` rv = { "indptr": indptr, @@ -1044,9 +1041,8 @@ def _export(self, format=None, *, sort=False, give_ownership=False, raw=False, m if is_iso: if values.size > 1: # pragma: no cover (suitesparse) values = values[:1] - else: - if values.size > nvals: - values = values[:nvals] + elif values.size > nvals: + values = values[:nvals] # Note: nvals is also at `indptr[nvec]` rv = { "indptr": indptr, @@ -3480,15 +3476,10 @@ def _import_any( format = "cooc" else: format = "coo" + elif isinstance(values, np.ndarray) and values.ndim == 2 and values.flags.f_contiguous: + format = "fullc" else: - if ( - isinstance(values, np.ndarray) - and values.ndim == 2 - and values.flags.f_contiguous - ): - format = "fullc" - else: - format = "fullr" + format = "fullr" else: format = format.lower() if method == "pack": diff --git a/graphblas/core/ss/vector.py b/graphblas/core/ss/vector.py index d13d78ac3..343335773 100644 --- a/graphblas/core/ss/vector.py +++ b/graphblas/core/ss/vector.py @@ -551,9 +551,8 @@ def _export(self, format=None, *, sort=False, give_ownership=False, raw=False, m if is_iso: if values.size > 1: # pragma: no cover (suitesparse) values = values[:1] - else: - if values.size > nvals: - values = values[:nvals] + elif values.size > nvals: + values = values[:nvals] rv = { "size": size, "indices": indices, @@ -589,9 +588,8 @@ def _export(self, format=None, *, sort=False, give_ownership=False, raw=False, m if is_iso: if values.size > 1: # pragma: no cover (suitesparse) values = values[:1] - else: - if values.size > size: # pragma: no branch (suitesparse) - values = values[:size] + elif values.size > size: # pragma: no branch (suitesparse) + values = values[:size] rv = { "bitmap": bitmap, "nvals": nvals[0], @@ -616,9 +614,8 @@ def _export(self, format=None, *, sort=False, give_ownership=False, raw=False, m if is_iso: if values.size > 1: values = values[:1] - else: - if values.size > size: # pragma: no branch (suitesparse) - values = values[:size] + elif values.size > size: # pragma: no branch (suitesparse) + values = values[:size] rv = {} if raw or is_iso: rv["size"] = size diff --git a/graphblas/core/vector.py b/graphblas/core/vector.py index dd183d856..8231691c6 100644 --- a/graphblas/core/vector.py +++ b/graphblas/core/vector.py @@ -1868,12 +1868,11 @@ def _prep_for_assign(self, resolved_indexes, value, mask, is_submask, replace, o else: cfunc_name = f"GrB_Vector_assign_{dtype_name}" mask = _vanilla_subassign_mask(self, mask, idx, replace, opts) + elif backend == "suitesparse": + cfunc_name = "GxB_Vector_subassign_Scalar" else: - if backend == "suitesparse": - cfunc_name = "GxB_Vector_subassign_Scalar" - else: - cfunc_name = "GrB_Vector_assign_Scalar" - mask = _vanilla_subassign_mask(self, mask, idx, replace, opts) + cfunc_name = "GrB_Vector_assign_Scalar" + mask = _vanilla_subassign_mask(self, mask, idx, replace, opts) expr_repr = ( "[[{2._expr_name} elements]]" f"({mask.name})" # fmt: skip diff --git a/graphblas/tests/test_matrix.py b/graphblas/tests/test_matrix.py index 40676f71a..1d42035a3 100644 --- a/graphblas/tests/test_matrix.py +++ b/graphblas/tests/test_matrix.py @@ -2173,15 +2173,14 @@ def test_ss_import_export(A, do_iso, methods): C1.ss.pack_any(**d) assert C1.isequal(C) assert C1.ss.is_iso is do_iso + elif in_method == "import": + D1 = Matrix.ss.import_any(**d) + assert D1.isequal(C) + assert D1.ss.is_iso is do_iso else: - if in_method == "import": - D1 = Matrix.ss.import_any(**d) - assert D1.isequal(C) - assert D1.ss.is_iso is do_iso - else: - C1.ss.pack_any(**d) - assert C1.isequal(C) - assert C1.ss.is_iso is do_iso + C1.ss.pack_any(**d) + assert C1.isequal(C) + assert C1.ss.is_iso is do_iso C2 = C.dup() d = getattr(C2.ss, out_method)("fullc") diff --git a/pyproject.toml b/pyproject.toml index 9be600f04..73871c718 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -278,6 +278,7 @@ ignore = [ "PLR0913", # Too many arguments to function call "PLR0915", # Too many statements "PLR2004", # Magic number used in comparison, consider replacing magic with a constant variable + "PLW0603", # Using the global statement to update ... is discouraged (Note: yeah, discouraged, but too strict) "PLW2901", # Outer for loop variable ... overwritten by inner assignment target (Note: good advice, but too strict) "RET502", # Do not implicitly `return None` in function able to return non-`None` value "RET503", # Missing explicit `return` at the end of function able to return non-`None` value From 46e0c623a58271814a195c954525d7341658503b Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Fri, 10 Mar 2023 11:21:32 -0600 Subject: [PATCH 07/15] bump awkward to 2.1.0 --- .github/workflows/test_and_build.yml | 8 ++++---- .pre-commit-config.yaml | 4 ++-- scripts/check_versions.sh | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/test_and_build.yml b/.github/workflows/test_and_build.yml index 31008e156..0dfa08859 100644 --- a/.github/workflows/test_and_build.yml +++ b/.github/workflows/test_and_build.yml @@ -148,22 +148,22 @@ jobs: npver=$(python -c 'import random ; print(random.choice(["=1.21", "=1.22", "=1.23", ""]))') spver=$(python -c 'import random ; print(random.choice(["=1.8", "=1.9", "=1.10", ""]))') pdver=$(python -c 'import random ; print(random.choice(["=1.2", "=1.3", "=1.4", "=1.5", ""]))') - akver=$(python -c 'import random ; print(random.choice(["=1.9", "=1.10", "=2.0", ""]))') + akver=$(python -c 'import random ; print(random.choice(["=1.9", "=1.10", "=2.0", "=2.1", ""]))') elif [[ ${{ steps.pyver.outputs.selected }} == "3.9" ]]; then npver=$(python -c 'import random ; print(random.choice(["=1.21", "=1.22", "=1.23", ""]))') spver=$(python -c 'import random ; print(random.choice(["=1.8", "=1.9", "=1.10", ""]))') pdver=$(python -c 'import random ; print(random.choice(["=1.2", "=1.3", "=1.4", "=1.5", ""]))') - akver=$(python -c 'import random ; print(random.choice(["=1.9", "=1.10", "=2.0", ""]))') + akver=$(python -c 'import random ; print(random.choice(["=1.9", "=1.10", "=2.0", "=2.1", ""]))') elif [[ ${{ steps.pyver.outputs.selected }} == "3.10" ]]; then npver=$(python -c 'import random ; print(random.choice(["=1.21", "=1.22", "=1.23", ""]))') spver=$(python -c 'import random ; print(random.choice(["=1.8", "=1.9", "=1.10", ""]))') pdver=$(python -c 'import random ; print(random.choice(["=1.3", "=1.4", "=1.5", ""]))') - akver=$(python -c 'import random ; print(random.choice(["=1.9", "=1.10", "=2.0", ""]))') + akver=$(python -c 'import random ; print(random.choice(["=1.9", "=1.10", "=2.0", "=2.1", ""]))') else # Python 3.11 npver=$(python -c 'import random ; print(random.choice(["=1.23", ""]))') spver=$(python -c 'import random ; print(random.choice(["=1.9", "=1.10", ""]))') pdver=$(python -c 'import random ; print(random.choice(["=1.5", ""]))') - akver=$(python -c 'import random ; print(random.choice(["=1.10", "=2.0.5", "=2.0.6", "=2.0.7", "=2.0.8", ""]))') + akver=$(python -c 'import random ; print(random.choice(["=1.10", "=2.0", "=2.1", ""]))') fi if [[ ${{ steps.sourcetype.outputs.selected }} == "source" || ${{ steps.sourcetype.outputs.selected }} == "upstream" ]]; then # TODO: there are currently issues with some numpy versions when diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2ea865ad9..dfa1c5ee0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -64,14 +64,14 @@ repos: - id: yesqa additional_dependencies: *flake8_dependencies - repo: https://github.com/codespell-project/codespell - rev: v2.2.2 + rev: v2.2.4 hooks: - id: codespell types_or: [python, rst, markdown] additional_dependencies: [tomli] files: ^(graphblas|docs)/ - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.253 + rev: v0.0.254 hooks: - id: ruff - repo: https://github.com/sphinx-contrib/sphinx-lint diff --git a/scripts/check_versions.sh b/scripts/check_versions.sh index 714109951..bc813b28c 100755 --- a/scripts/check_versions.sh +++ b/scripts/check_versions.sh @@ -7,7 +7,7 @@ conda search 'numpy[channel=conda-forge]>=1.24.2' conda search 'pandas[channel=conda-forge]>=1.5.3' conda search 'scipy[channel=conda-forge]>=1.10.1' conda search 'networkx[channel=conda-forge]>=3.0' -conda search 'awkward[channel=conda-forge]>=2.0.8' +conda search 'awkward[channel=conda-forge]>=2.1.0' conda search 'sparse[channel=conda-forge]>=0.14.0' conda search 'fast_matrix_market[channel=conda-forge]>=1.4.5' conda search 'numba[channel=conda-forge]>=0.56.4' From 2faf4996f2713fd7f58d04ad5fb8f8950eff5d5e Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Mon, 13 Mar 2023 08:23:40 -0500 Subject: [PATCH 08/15] Maybe trust `ruff` to fix some things But run `autoflake`, `isort`, `pyupgrade`, and `black` first (for now). --- .pre-commit-config.yaml | 7 ++++++- pyproject.toml | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index dfa1c5ee0..9d61cdede 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -25,7 +25,7 @@ repos: - id: validate-pyproject name: Validate pyproject.toml - repo: https://github.com/myint/autoflake - rev: v2.0.1 + rev: v2.0.2 hooks: - id: autoflake args: [--in-place] @@ -48,6 +48,11 @@ repos: hooks: - id: black - id: black-jupyter + - repo: https://github.com/charliermarsh/ruff-pre-commit + rev: v0.0.254 + hooks: + - id: ruff + args: [--fix-only] - repo: https://github.com/PyCQA/flake8 rev: 6.0.0 hooks: diff --git a/pyproject.toml b/pyproject.toml index b80938267..3dda43430 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ license = {file = "LICENSE"} authors = [ {name = "Erik Welch"}, {name = "Jim Kitchen"}, + {name = "Python-graphblas contributors"}, ] maintainers = [ {name = "Erik Welch", email = "erik.n.welch@gmail.com"}, From b0544d8e75c5705d634f35a62b4c1487adf9ad38 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Mon, 13 Mar 2023 08:57:26 -0500 Subject: [PATCH 09/15] Add a few notes to pre-commit hooks --- .pre-commit-config.yaml | 15 ++++++++++++++- pyproject.toml | 2 +- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9d61cdede..db152d265 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,15 +24,19 @@ repos: hooks: - id: validate-pyproject name: Validate pyproject.toml + # We can probably remove `autoflake` if we come to trust `ruff --fix` - repo: https://github.com/myint/autoflake rev: v2.0.2 hooks: - id: autoflake args: [--in-place] + # We can probably remove `isort` if we come to trust `ruff --fix`, + # but we'll need to figure out the configuration to do this in `ruff` - repo: https://github.com/pycqa/isort rev: 5.12.0 hooks: - id: isort + # Let's keep `pyupgrade` even though `ruff --fix` probably does most of it - repo: https://github.com/asottile/pyupgrade rev: v3.3.1 hooks: @@ -53,6 +57,8 @@ repos: hooks: - id: ruff args: [--fix-only] + # Let's keep `flake8` even though `ruff` does much of the same. + # `flake8-bugbear` and `flake8-simplify` have caught things missed by `ruff`. - repo: https://github.com/PyCQA/flake8 rev: 6.0.0 hooks: @@ -60,7 +66,7 @@ repos: additional_dependencies: &flake8_dependencies # These versions need updated manually - flake8==6.0.0 - - flake8-comprehensions==3.10.1 + - flake8-comprehensions==3.10.1 # Consider removing and rely on `ruff` instead - flake8-bugbear==23.2.13 - flake8-simplify==0.19.3 - repo: https://github.com/asottile/yesqa @@ -84,6 +90,13 @@ repos: hooks: - id: sphinx-lint args: [--enable, all, "--disable=line-too-long,leaked-markup"] + # `pyroma` may help keep our package standards up to date if best practices change. + # This is probably a "low value" check though and safe to remove if we want faster pre-commit. + - repo: https://github.com/regebro/pyroma + rev: "4.2" + hooks: + - id: pyroma + args: [-n, "10", .] - repo: local hooks: # Add `--hook-stage manual` to pre-commit command to run (very slow) diff --git a/pyproject.toml b/pyproject.toml index 3dda43430..0b3f38577 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ readme = "README.md" requires-python = ">=3.8" license = {file = "LICENSE"} authors = [ - {name = "Erik Welch"}, + {name = "Erik Welch", email = "erik.n.welch@gmail.com"}, {name = "Jim Kitchen"}, {name = "Python-graphblas contributors"}, ] From 5969089c9c77987b2a98bfbd4c32e90183f135f9 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Mon, 13 Mar 2023 09:06:34 -0500 Subject: [PATCH 10/15] Add `fast-matrix-market` to optional dependencies documentation --- README.md | 3 ++- docs/getting_started/index.rst | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 2c4b2d1b7..dab91782a 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,8 @@ The following are not required by python-graphblas, but may be needed for certai - `pandas` – required for nicer `__repr__`; - `matplotlib` – required for basic plotting of graphs; - `scipy` – used in io module to read/write `scipy.sparse` format; -- `networkx` – used in `io` module to interface with `networkx` graphs. +- `networkx` – used in `io` module to interface with `networkx` graphs; +- `fast-matrix-market` - for faster read/write of Matrix Market files with `gb.io.mmread` and `gb.io.mmwrite`. ## Description Currently works with [SuiteSparse:GraphBLAS](https://github.com/DrTimothyAldenDavis/GraphBLAS), but the goal is to make it work with all implementations of the GraphBLAS spec. diff --git a/docs/getting_started/index.rst b/docs/getting_started/index.rst index 661550803..d603df30b 100644 --- a/docs/getting_started/index.rst +++ b/docs/getting_started/index.rst @@ -34,6 +34,7 @@ to work. - `matplotlib `__ -- required for basic plotting of graphs - `scipy `__ -- used in ``io`` module to read/write ``scipy.sparse`` format - `networkx `__ -- used in ``io`` module to interface with networkx graphs + - `fast-matrix-market `__ -- for faster read/write of Matrix Market files with ``gb.io.mmread`` and ``gb.io.mmwrite`` GraphBLAS Fundamentals ---------------------- From d9ceebcf1214e0e96663b91c6a89ec42bdf1fd55 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Mon, 13 Mar 2023 18:44:32 -0500 Subject: [PATCH 11/15] Drop autoflake in pre-commit (use ruff instead) --- .pre-commit-config.yaml | 11 ++------- graphblas/viz.py | 48 +++++++++++++++++++-------------------- pyproject.toml | 2 +- scripts/check_versions.sh | 1 - 4 files changed, 27 insertions(+), 35 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index db152d265..75d1d0b89 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,12 +24,6 @@ repos: hooks: - id: validate-pyproject name: Validate pyproject.toml - # We can probably remove `autoflake` if we come to trust `ruff --fix` - - repo: https://github.com/myint/autoflake - rev: v2.0.2 - hooks: - - id: autoflake - args: [--in-place] # We can probably remove `isort` if we come to trust `ruff --fix`, # but we'll need to figure out the configuration to do this in `ruff` - repo: https://github.com/pycqa/isort @@ -53,7 +47,7 @@ repos: - id: black - id: black-jupyter - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.254 + rev: v0.0.255 hooks: - id: ruff args: [--fix-only] @@ -66,7 +60,6 @@ repos: additional_dependencies: &flake8_dependencies # These versions need updated manually - flake8==6.0.0 - - flake8-comprehensions==3.10.1 # Consider removing and rely on `ruff` instead - flake8-bugbear==23.2.13 - flake8-simplify==0.19.3 - repo: https://github.com/asottile/yesqa @@ -82,7 +75,7 @@ repos: additional_dependencies: [tomli] files: ^(graphblas|docs)/ - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.254 + rev: v0.0.255 hooks: - id: ruff - repo: https://github.com/sphinx-contrib/sphinx-lint diff --git a/graphblas/viz.py b/graphblas/viz.py index 89010bc3d..d8a96d343 100644 --- a/graphblas/viz.py +++ b/graphblas/viz.py @@ -182,30 +182,30 @@ def datashade(M, agg="count", *, width=None, height=None, opts_kwargs=None, **kw images.extend(image_row) return hv.Layout(images).cols(ncols) - kwds = dict( # noqa: C408 pylint: disable=use-dict-literal - x="col", - y="row", - c="val", - aggregator=agg, - frame_width=width, - frame_height=height, - cmap="fire", - cnorm="eq_hist", - xlim=(0, M.ncols), - ylim=(0, M.nrows), - rasterize=True, - flip_yaxis=True, - hover=True, - xlabel="", - ylabel="", - data_aspect=1, - x_sampling=1, - y_sampling=1, - xaxis="top", - xformatter="%d", - yformatter="%d", - rot=60, - ) + kwds = { + "x": "col", + "y": "row", + "c": "val", + "aggregator": agg, + "frame_width": width, + "frame_height": height, + "cmap": "fire", + "cnorm": "eq_hist", + "xlim": (0, M.ncols), + "ylim": (0, M.nrows), + "rasterize": True, + "flip_yaxis": True, + "hover": True, + "xlabel": "", + "ylabel": "", + "data_aspect": 1, + "x_sampling": 1, + "y_sampling": 1, + "xaxis": "top", + "xformatter": "%d", + "yformatter": "%d", + "rot": 60, + } # Only show axes on outer-most plots if kwargs.pop("_col", 0) != 0: kwds["yaxis"] = None diff --git a/pyproject.toml b/pyproject.toml index 0b3f38577..2603e72b3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -209,7 +209,7 @@ select = [ "B", # flake8-bugbear "A", # flake8-builtins "COM", # flake8-commas - "C4", # flake8-comprehensions + "C40", # flake8-comprehensions "DTZ", # flake8-datetimez "T10", # flake8-debugger # "DJ", # flake8-django (We don't use django) diff --git a/scripts/check_versions.sh b/scripts/check_versions.sh index bc813b28c..ade1c3313 100755 --- a/scripts/check_versions.sh +++ b/scripts/check_versions.sh @@ -12,6 +12,5 @@ conda search 'sparse[channel=conda-forge]>=0.14.0' conda search 'fast_matrix_market[channel=conda-forge]>=1.4.5' conda search 'numba[channel=conda-forge]>=0.56.4' conda search 'pyyaml[channel=conda-forge]>=6.0' -conda search 'flake8-comprehensions[channel=conda-forge]>=3.10.1' conda search 'flake8-bugbear[channel=conda-forge]>=23.2.13' conda search 'flake8-simplify[channel=conda-forge]>=0.19.3' From 47c9bdfda0b0e191f0187a284ef6450d8ae6d3c4 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Wed, 15 Mar 2023 08:41:27 -0500 Subject: [PATCH 12/15] bump --- .pre-commit-config.yaml | 6 +++--- pyproject.toml | 2 +- scripts/check_versions.sh | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 75d1d0b89..b8e74e3e3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -47,7 +47,7 @@ repos: - id: black - id: black-jupyter - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.255 + rev: v0.0.256 hooks: - id: ruff args: [--fix-only] @@ -60,7 +60,7 @@ repos: additional_dependencies: &flake8_dependencies # These versions need updated manually - flake8==6.0.0 - - flake8-bugbear==23.2.13 + - flake8-bugbear==23.3.12 - flake8-simplify==0.19.3 - repo: https://github.com/asottile/yesqa rev: v1.4.0 @@ -75,7 +75,7 @@ repos: additional_dependencies: [tomli] files: ^(graphblas|docs)/ - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.255 + rev: v0.0.256 hooks: - id: ruff - repo: https://github.com/sphinx-contrib/sphinx-lint diff --git a/pyproject.toml b/pyproject.toml index 2603e72b3..0b3f38577 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -209,7 +209,7 @@ select = [ "B", # flake8-bugbear "A", # flake8-builtins "COM", # flake8-commas - "C40", # flake8-comprehensions + "C4", # flake8-comprehensions "DTZ", # flake8-datetimez "T10", # flake8-debugger # "DJ", # flake8-django (We don't use django) diff --git a/scripts/check_versions.sh b/scripts/check_versions.sh index ade1c3313..63eb8dba5 100755 --- a/scripts/check_versions.sh +++ b/scripts/check_versions.sh @@ -12,5 +12,5 @@ conda search 'sparse[channel=conda-forge]>=0.14.0' conda search 'fast_matrix_market[channel=conda-forge]>=1.4.5' conda search 'numba[channel=conda-forge]>=0.56.4' conda search 'pyyaml[channel=conda-forge]>=6.0' -conda search 'flake8-bugbear[channel=conda-forge]>=23.2.13' +conda search 'flake8-bugbear[channel=conda-forge]>=23.3.12' conda search 'flake8-simplify[channel=conda-forge]>=0.19.3' From 7bf394d240940d3403cc66b52a108044031b78e9 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Sat, 18 Mar 2023 13:58:27 -0500 Subject: [PATCH 13/15] bump ruff --- .pre-commit-config.yaml | 4 ++-- graphblas/io.py | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b8e74e3e3..ab097216e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -47,7 +47,7 @@ repos: - id: black - id: black-jupyter - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.256 + rev: v0.0.257 hooks: - id: ruff args: [--fix-only] @@ -75,7 +75,7 @@ repos: additional_dependencies: [tomli] files: ^(graphblas|docs)/ - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.256 + rev: v0.0.257 hooks: - id: ruff - repo: https://github.com/sphinx-contrib/sphinx-lint diff --git a/graphblas/io.py b/graphblas/io.py index fa9a0e5e7..3286eb98c 100644 --- a/graphblas/io.py +++ b/graphblas/io.py @@ -26,6 +26,7 @@ def draw(m): # pragma: no cover _warn( "`graphblas.io.draw` is deprecated; it has been moved to `graphblas.viz.draw`", DeprecationWarning, + stacklevel=2, ) viz.draw(m) @@ -93,6 +94,7 @@ def from_numpy(m): # pragma: no cover (deprecated) "`graphblas.io.from_numpy` is deprecated; " "use `Matrix.from_dense` and `Vector.from_dense` instead.", DeprecationWarning, + stacklevel=2, ) if m.ndim > 2: raise _GraphblasException("m.ndim must be <= 2") @@ -336,6 +338,7 @@ def to_numpy(m): # pragma: no cover (deprecated) "`graphblas.io.to_numpy` is deprecated; " "use `Matrix.to_dense` and `Vector.to_dense` instead.", DeprecationWarning, + stacklevel=2, ) try: import scipy # noqa: F401 From eb60ec56dd03cc272a8b86b60c998926b6799964 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Mon, 20 Mar 2023 11:10:17 -0500 Subject: [PATCH 14/15] Note that `scipy` is needed for all backends for mmread and mmwrite --- graphblas/io.py | 2 ++ scripts/check_versions.sh | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/graphblas/io.py b/graphblas/io.py index 3286eb98c..bc57c2084 100644 --- a/graphblas/io.py +++ b/graphblas/io.py @@ -600,6 +600,7 @@ def mmread(source, engine="auto", *, dup_op=None, name=None, **kwargs): :class:`~graphblas.Matrix` """ try: + # scipy is currently needed for *all* engines from scipy.io import mmread from scipy.sparse import isspmatrix_coo except ImportError: # pragma: no cover (import) @@ -663,6 +664,7 @@ def mmwrite( {"general", "symmetric", "skew-symmetric", "hermetian"} """ try: + # scipy is currently needed for *all* engines from scipy.io import mmwrite except ImportError: # pragma: no cover (import) raise ImportError("scipy is required to write Matrix Market files") from None diff --git a/scripts/check_versions.sh b/scripts/check_versions.sh index 63eb8dba5..d08ad6476 100755 --- a/scripts/check_versions.sh +++ b/scripts/check_versions.sh @@ -7,7 +7,7 @@ conda search 'numpy[channel=conda-forge]>=1.24.2' conda search 'pandas[channel=conda-forge]>=1.5.3' conda search 'scipy[channel=conda-forge]>=1.10.1' conda search 'networkx[channel=conda-forge]>=3.0' -conda search 'awkward[channel=conda-forge]>=2.1.0' +conda search 'awkward[channel=conda-forge]>=2.1.1' conda search 'sparse[channel=conda-forge]>=0.14.0' conda search 'fast_matrix_market[channel=conda-forge]>=1.4.5' conda search 'numba[channel=conda-forge]>=0.56.4' From 00045205c6d9c0611263fd3ecd0bc1670c6e7de1 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Tue, 21 Mar 2023 16:37:31 -0500 Subject: [PATCH 15/15] Add Matrix Market to `io` docs --- dev-requirements.txt | 2 ++ docs/user_guide/io.rst | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/dev-requirements.txt b/dev-requirements.txt index b84c0e849..273980db9 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -6,6 +6,7 @@ pyyaml pandas # For I/O awkward +fast_matrix_market networkx scipy sparse @@ -16,6 +17,7 @@ matplotlib # For linting pre-commit # For testing +packaging pytest-cov # For debugging icecream diff --git a/docs/user_guide/io.rst b/docs/user_guide/io.rst index 9431ff413..c13fda5d6 100644 --- a/docs/user_guide/io.rst +++ b/docs/user_guide/io.rst @@ -129,3 +129,19 @@ Note that A is unchanged in the above example. The SuiteSparse export has a ``give_ownership`` option. This performs a zero-copy move operation and invalidates the original python-graphblas object. When extreme speed is needed or memory is too limited to make a copy, this option may be needed. + +Matrix Market files +------------------- + +The `Matrix Market file format `_ is a common +file format for storing sparse arrays in human-readable ASCII. +Matrix Market files--also called MM files--often use ".mtx" file extension. +For example, many datasets in MM format can be found in `the SuiteSparse Matrix Collection `_. + +Use ``gb.io.mmread()`` to read a Matrix Market file to a python-graphblas Matrix, +and ``gb.io.mmwrite()`` to write a Matrix to a Matrix Market file. +These names match the equivalent functions in `scipy.sparse `_. + +``scipy`` is required to be installed to read Matrix Market files. +If ``fast_matrix_market`` is installed, it will be used by default for +`much better performance `_.