diff --git a/.github/workflows/test_and_build.yml b/.github/workflows/test_and_build.yml index 7c4159379..d3f193c51 100644 --- a/.github/workflows/test_and_build.yml +++ b/.github/workflows/test_and_build.yml @@ -162,7 +162,7 @@ jobs: npver=$(python -c 'import random ; print(random.choice(["=1.23", ""]))') spver=$(python -c 'import random ; print(random.choice(["=1.9", "=1.10", ""]))') pdver=$(python -c 'import random ; print(random.choice(["=1.5", ""]))') - akver=$(python -c 'import random ; print(random.choice(["=1.10", "=2.0.5", "=2.0.6", ""]))') + akver=$(python -c 'import random ; print(random.choice(["=1.10", "=2.0.5", "=2.0.6", "=2.0.7", ""]))') fi if [[ ${{ steps.sourcetype.outputs.selected }} == "source" || ${{ steps.sourcetype.outputs.selected }} == "upstream" ]]; then # TODO: there are currently issues with some numpy versions when diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index dc514793f..4eb2db4d0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -25,7 +25,7 @@ repos: - id: validate-pyproject name: Validate pyproject.toml - repo: https://github.com/myint/autoflake - rev: v2.0.0 + rev: v2.0.1 hooks: - id: autoflake args: [--in-place] @@ -44,7 +44,7 @@ repos: - id: auto-walrus args: [--line-length, "100"] - repo: https://github.com/psf/black - rev: 22.12.0 + rev: 23.1.0 hooks: - id: black - id: black-jupyter @@ -71,7 +71,7 @@ repos: additional_dependencies: [tomli] files: ^(graphblas|docs)/ - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.237 + rev: v0.0.241 hooks: - id: ruff - repo: https://github.com/sphinx-contrib/sphinx-lint diff --git a/graphblas/core/matrix.py b/graphblas/core/matrix.py index 9dbfd2320..a1e18152b 100644 --- a/graphblas/core/matrix.py +++ b/graphblas/core/matrix.py @@ -872,7 +872,7 @@ def from_coo( """ rows = ints_to_numpy_buffer(rows, np.uint64, name="row indices") columns = ints_to_numpy_buffer(columns, np.uint64, name="column indices") - values, new_dtype = values_to_numpy_buffer(values, dtype) + values, dtype = values_to_numpy_buffer(values, dtype, subarray_after=1) # Compute nrows and ncols if not provided if nrows is None: if rows.size == 0: @@ -882,11 +882,8 @@ def from_coo( if columns.size == 0: raise ValueError("No column indices provided. Unable to infer ncols.") ncols = int(columns.max()) + 1 - if dtype is None and values.ndim > 1: - # Look for array-subtdype - new_dtype = lookup_dtype(np.dtype((new_dtype.np_type, values.shape[1:]))) # Create the new matrix - C = cls(new_dtype, nrows, ncols, name=name) + C = cls(dtype, nrows, ncols, name=name) if values.ndim == 0: if dup_op is not None: raise ValueError( @@ -1004,7 +1001,7 @@ def _from_csx(cls, fmt, indptr, indices, values, dtype, num, check_num, name): indices_name = "row indices" indptr = ints_to_numpy_buffer(indptr, np.uint64, name="index pointers") indices = ints_to_numpy_buffer(indices, np.uint64, name=indices_name) - values, new_dtype = values_to_numpy_buffer(values, dtype) + values, dtype = values_to_numpy_buffer(values, dtype, subarray_after=1) if num is None: if indices.size > 0: num = int(indices.max()) + 1 @@ -1026,9 +1023,6 @@ def _from_csx(cls, fmt, indptr, indices, values, dtype, num, check_num, name): "ncols must be None or equal to len(indptr) - 1; " f"expected {check_num}, got {ncols}" ) - if dtype is None and values.ndim > 1: - # Look for array-subtdype - new_dtype = lookup_dtype(np.dtype((new_dtype.np_type, values.shape[1:]))) if values.ndim == 0: if backend == "suitesparse": # SuiteSparse GxB can handle iso-value @@ -1055,21 +1049,21 @@ def _from_csx(cls, fmt, indptr, indices, values, dtype, num, check_num, name): ) values = np.broadcast_to(values, indices.size) new_mat = ffi_new("GrB_Matrix*") - rv = Matrix._from_obj(new_mat, new_dtype, nrows, ncols, name=name) - if new_dtype._is_udt: + rv = Matrix._from_obj(new_mat, dtype, nrows, ncols, name=name) + if dtype._is_udt: dtype_name = "UDT" else: - dtype_name = new_dtype.name + dtype_name = dtype.name call( f"GrB_Matrix_import_{dtype_name}", [ _Pointer(rv), - new_dtype, + dtype, _as_scalar(nrows, _INDEX, is_cscalar=True), _as_scalar(ncols, _INDEX, is_cscalar=True), _CArray(indptr), _CArray(indices), - _CArray(values, dtype=new_dtype), + _CArray(values, dtype=dtype), _as_scalar(indptr.size, _INDEX, is_cscalar=True), _as_scalar(indices.size, _INDEX, is_cscalar=True), _as_scalar(values.shape[0], _INDEX, is_cscalar=True), @@ -1436,12 +1430,14 @@ def from_dicts( col_indices = np.fromiter(itertools.chain.from_iterable(dicts), np.uint64) iter_values = itertools.chain.from_iterable(v.values() for v in dicts) if dtype is None: - values = np.array(list(iter_values)) - dtype = lookup_dtype(values.dtype) + values, dtype = values_to_numpy_buffer(list(iter_values), subarray_after=1) else: # If we know the dtype, then using `np.fromiter` is much faster dtype = lookup_dtype(dtype) - values = np.fromiter(iter_values, dtype.np_type) + if dtype.np_type.subdtype is not None and np.__version__[:5] in {"1.21.", "1.22."}: + values, dtype = values_to_numpy_buffer(list(iter_values), dtype) + else: + values = np.fromiter(iter_values, dtype.np_type) return getattr(cls, methodname)( *args, indptr, col_indices, values, dtype, nrows=nrows, ncols=ncols, name=name ) diff --git a/graphblas/core/operator.py b/graphblas/core/operator.py index 6a726cc0c..02f9bff52 100644 --- a/graphblas/core/operator.py +++ b/graphblas/core/operator.py @@ -1313,7 +1313,7 @@ def _initialize(cls): op._typed_ops[dtype] = typed_op op.coercions[dtype] = target_type # Allow some functions to work on UDTs - for (unop, func) in [ + for unop, func in [ (unary.identity, _identity), (unary.one, _one), ]: @@ -2287,7 +2287,7 @@ def _initialize(cls): # If the inputs are FP32, we use DIV_FP32; use DIV_FP64 for all other input dtypes truediv = binary.truediv = op.truediv = BinaryOp("truediv") rtruediv = binary.rtruediv = op.rtruediv = BinaryOp("rtruediv") - for (new_op, builtin_op) in [(truediv, binary.cdiv), (rtruediv, binary.rdiv)]: + for new_op, builtin_op in [(truediv, binary.cdiv), (rtruediv, binary.rdiv)]: for dtype in builtin_op.types: if dtype.name in {"FP32", "FC32", "FC64"}: orig_dtype = dtype @@ -2420,7 +2420,7 @@ def _initialize(cls): left._semiring_commutes_to = right right._semiring_commutes_to = left # Allow some functions to work on UDTs - for (binop, func) in [ + for binop, func in [ (binary.first, _first), (binary.second, _second), (binary.pair, _pair), diff --git a/graphblas/core/ss/matrix.py b/graphblas/core/ss/matrix.py index fcfdabf3b..6c4809a83 100644 --- a/graphblas/core/ss/matrix.py +++ b/graphblas/core/ss/matrix.py @@ -1305,7 +1305,9 @@ def _import_csr( ) if method == "pack": dtype = matrix.dtype - values, dtype = values_to_numpy_buffer(values, dtype, copy=copy, ownable=True) + values, dtype = values_to_numpy_buffer( + values, dtype, copy=copy, ownable=True, subarray_after=1 + ) if col_indices is values: values = np.copy(values) Ap = ffi_new("GrB_Index**", ffi.from_buffer("GrB_Index*", indptr)) @@ -1493,7 +1495,9 @@ def _import_csc( ) if method == "pack": dtype = matrix.dtype - values, dtype = values_to_numpy_buffer(values, dtype, copy=copy, ownable=True) + values, dtype = values_to_numpy_buffer( + values, dtype, copy=copy, ownable=True, subarray_after=1 + ) if row_indices is values: values = np.copy(values) Ap = ffi_new("GrB_Index**", ffi.from_buffer("GrB_Index*", indptr)) @@ -1696,7 +1700,9 @@ def _import_hypercsr( ) if method == "pack": dtype = matrix.dtype - values, dtype = values_to_numpy_buffer(values, dtype, copy=copy, ownable=True) + values, dtype = values_to_numpy_buffer( + values, dtype, copy=copy, ownable=True, subarray_after=1 + ) if not is_iso and values.ndim == 0: is_iso = True if col_indices is values: @@ -1917,7 +1923,9 @@ def _import_hypercsc( ) if method == "pack": dtype = matrix.dtype - values, dtype = values_to_numpy_buffer(values, dtype, copy=copy, ownable=True) + values, dtype = values_to_numpy_buffer( + values, dtype, copy=copy, ownable=True, subarray_after=1 + ) if row_indices is values: values = np.copy(values) if not is_iso and values.ndim == 0: @@ -2122,7 +2130,9 @@ def _import_bitmapr( ) if method == "pack": dtype = matrix.dtype - values, dtype = values_to_numpy_buffer(values, dtype, copy=copy, ownable=True, order="C") + values, dtype = values_to_numpy_buffer( + values, dtype, copy=copy, ownable=True, order="C", subarray_after=2 + ) if bitmap is values: values = np.copy(values) if method == "import": @@ -2313,7 +2323,9 @@ def _import_bitmapc( ) if method == "pack": dtype = matrix.dtype - values, dtype = values_to_numpy_buffer(values, dtype, copy=copy, ownable=True, order="F") + values, dtype = values_to_numpy_buffer( + values, dtype, copy=copy, ownable=True, order="F", subarray_after=2 + ) if bitmap is values: values = np.copy(values) if method == "import": @@ -2486,7 +2498,9 @@ def _import_fullr( copy = not take_ownership if method == "pack": dtype = matrix.dtype - values, dtype = values_to_numpy_buffer(values, dtype, copy=copy, order="C", ownable=True) + values, dtype = values_to_numpy_buffer( + values, dtype, copy=copy, order="C", ownable=True, subarray_after=2 + ) if method == "import": nrows, ncols = get_shape(nrows, ncols, dtype, values=values) else: @@ -2643,7 +2657,9 @@ def _import_fullc( copy = not take_ownership if method == "pack": dtype = matrix.dtype - values, dtype = values_to_numpy_buffer(values, dtype, copy=copy, order="F", ownable=True) + values, dtype = values_to_numpy_buffer( + values, dtype, copy=copy, order="F", ownable=True, subarray_after=2 + ) if method == "import": nrows, ncols = get_shape(nrows, ncols, dtype, values=values) else: @@ -2848,7 +2864,7 @@ def _import_coo( if method == "pack": dtype = matrix.dtype - values, dtype = values_to_numpy_buffer(values, dtype) + values, dtype = values_to_numpy_buffer(values, dtype, subarray_after=1) if method == "import": matrix = gb.Matrix(dtype, nrows=nrows, ncols=ncols, name=name) if is_iso: diff --git a/graphblas/core/ss/vector.py b/graphblas/core/ss/vector.py index 2395eb13f..9635e8fb9 100644 --- a/graphblas/core/ss/vector.py +++ b/graphblas/core/ss/vector.py @@ -970,7 +970,9 @@ def _import_sparse( indices = ints_to_numpy_buffer(indices, np.uint64, copy=copy, ownable=True, name="indices") if method == "pack": dtype = vector.dtype - values, dtype = values_to_numpy_buffer(values, dtype, copy=copy, ownable=True) + values, dtype = values_to_numpy_buffer( + values, dtype, copy=copy, ownable=True, subarray_after=1 + ) if indices is values: values = np.copy(values) vi = ffi_new("GrB_Index**", ffi.from_buffer("GrB_Index*", indices)) @@ -1150,7 +1152,9 @@ def _import_bitmap( if method == "pack": dtype = vector.dtype size = vector._size - values, dtype = values_to_numpy_buffer(values, dtype, copy=copy, ownable=True) + values, dtype = values_to_numpy_buffer( + values, dtype, copy=copy, ownable=True, subarray_after=1 + ) if bitmap is values: values = np.copy(values) vhandle = ffi_new("GrB_Vector*") @@ -1320,7 +1324,9 @@ def _import_full( if method == "pack": dtype = vector.dtype size = vector._size - values, dtype = values_to_numpy_buffer(values, dtype, copy=copy, ownable=True) + values, dtype = values_to_numpy_buffer( + values, dtype, copy=copy, ownable=True, subarray_after=1 + ) vhandle = ffi_new("GrB_Vector*") vx = ffi_new("void**", ffi.from_buffer("void*", values)) if size is None: diff --git a/graphblas/core/utils.py b/graphblas/core/utils.py index 8d07eadea..b09f71713 100644 --- a/graphblas/core/utils.py +++ b/graphblas/core/utils.py @@ -72,7 +72,21 @@ def _get_subdtype(dtype): return dtype -def values_to_numpy_buffer(array, dtype=None, *, copy=False, ownable=False, order="C"): +def values_to_numpy_buffer( + array, dtype=None, *, copy=False, ownable=False, order="C", subarray_after=None +): + """Convert an array-like object to a numpy array and infer the dtype if necessary. + + Parameters + ---------- + subarray_after : int, optional + If dtype is not provided, infer "sub-array" dtype if the array has extra dimensions. + + Returns + ------- + np.ndarray + dtype + """ if dtype is not None: dtype = lookup_dtype(dtype) array = np.array(array, _get_subdtype(dtype.np_type), copy=copy, order=order) @@ -85,6 +99,8 @@ def values_to_numpy_buffer(array, dtype=None, *, copy=False, ownable=False, orde # fix for win64 numpy handling of ints array = array.astype(np.int64) dtype = lookup_dtype(array.dtype) + if subarray_after is not None and array.ndim > subarray_after: + dtype = lookup_dtype(np.dtype((dtype.np_type, array.shape[subarray_after:]))) if ownable and (not array.flags.owndata or not array.flags.writeable): array = array.copy(order) return array, dtype diff --git a/graphblas/core/vector.py b/graphblas/core/vector.py index e0d55cc99..a39a92f40 100644 --- a/graphblas/core/vector.py +++ b/graphblas/core/vector.py @@ -725,17 +725,14 @@ def from_coo(cls, indices, values=1.0, dtype=None, *, size=None, dup_op=None, na Vector """ indices = ints_to_numpy_buffer(indices, np.uint64, name="indices") - values, new_dtype = values_to_numpy_buffer(values, dtype) + values, dtype = values_to_numpy_buffer(values, dtype, subarray_after=1) # Compute size if not provided if size is None: if indices.size == 0: raise ValueError("No indices provided. Unable to infer size.") size = int(indices.max()) + 1 - if dtype is None and values.ndim > 1: - # Look for array-subtdype - new_dtype = lookup_dtype(np.dtype((new_dtype.np_type, values.shape[1:]))) # Create the new vector - w = cls(new_dtype, size, name=name) + w = cls(dtype, size, name=name) if values.ndim == 0: if dup_op is not None: raise ValueError( @@ -1791,12 +1788,14 @@ def from_dict(cls, d, dtype=None, *, size=None, name=None): """ indices = np.fromiter(d.keys(), np.uint64) if dtype is None: - values = np.array(list(d.values())) # let numpy infer dtype - dtype = lookup_dtype(values.dtype) + values, dtype = values_to_numpy_buffer(list(d.values()), subarray_after=1) else: # If we know the dtype, then using `np.fromiter` is much faster dtype = lookup_dtype(dtype) - values = np.fromiter(d.values(), dtype.np_type) + if dtype.np_type.subdtype is not None and np.__version__[:5] in {"1.21.", "1.22."}: + values, dtype = values_to_numpy_buffer(list(d.values()), dtype) + else: + values = np.fromiter(d.values(), dtype.np_type) if size is None and indices.size == 0: size = 0 return cls.from_coo(indices, values, dtype, size=size, name=name) diff --git a/graphblas/tests/test_matrix.py b/graphblas/tests/test_matrix.py index 57bf6e5e3..351e44e64 100644 --- a/graphblas/tests/test_matrix.py +++ b/graphblas/tests/test_matrix.py @@ -3612,6 +3612,7 @@ def test_ss_iteration(A): assert next(A.ss.iteritems()) is not None +@pytest.mark.slow def test_udt(): record_dtype = np.dtype([("x", np.bool_), ("y", np.float64)], align=True) udt = dtypes.register_anonymous(record_dtype, "MatrixUDT") @@ -4231,3 +4232,37 @@ def test_ss_descriptors(A): else: with pytest.raises(ValueError, match="escriptor"): (A @ A).new(nthreads=4, axb_method="dot", sort=True) + + +def test_subarray_dtypes(): + a = np.arange(3 * 4, dtype=np.int64).reshape(3, 4) + A = Matrix.from_coo([1, 3, 5], [0, 1, 3], a) + B = Matrix("INT64[4]", nrows=6, ncols=4) + B[1, 0] = [0, 1, 2, 3] + B[3, 1] = [4, 5, 6, 7] + B[5, 3] = [8, 9, 10, 11] + assert A.isequal(B, check_dtype=True) + for method in ["coo", "csr", "csc", "dcsr", "dcsc", "edgelist"]: + B = getattr(A, f"from_{method}")(*getattr(A, f"to_{method}")()) + B = Matrix.from_dicts(A.to_dicts()) + assert A.isequal(B, check_dtype=True) + B = Matrix.from_dicts(A.to_dicts(), A.dtype) + assert A.isequal(B, check_dtype=True) + + b1 = np.arange(2 * 3 * 4, dtype=np.int64).reshape(2 * 3, 4) + b2 = np.arange(2 * 3 * 4, dtype=np.int64).reshape(2, 3, 4) + Full1 = Matrix.from_coo([0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2], b1) + Full2 = Matrix("INT64[4]", nrows=2, ncols=3) + Full2[0, 0] = [0, 1, 2, 3] + Full2[0, 1] = [4, 5, 6, 7] + Full2[0, 2] = [8, 9, 10, 11] + Full2[1, 0] = [12, 13, 14, 15] + Full2[1, 1] = [16, 17, 18, 19] + Full2[1, 2] = [20, 21, 22, 23] + assert Full1.isequal(Full2, check_dtype=True) + Full2 = Matrix("INT64[4]", nrows=2, ncols=3) + Full2[:, :] = b2 + assert Full1.isequal(Full2, check_dtype=True) + if suitesparse: + Full2 = Matrix.ss.import_fullr(b2) + assert Full1.isequal(Full2, check_dtype=True) diff --git a/graphblas/tests/test_vector.py b/graphblas/tests/test_vector.py index c7f90c10c..90d936a6c 100644 --- a/graphblas/tests/test_vector.py +++ b/graphblas/tests/test_vector.py @@ -1316,7 +1316,7 @@ def import_func(x, import_name, **kwargs): w(w.S) << 1 w_orig = w.dup() format = "full" - for (raw, import_format, give_ownership, take_ownership, import_name) in itertools.product( + for raw, import_format, give_ownership, take_ownership, import_name in itertools.product( [False, True], [format, None], [False, True], @@ -2516,3 +2516,38 @@ def test_ss_sort(v): expected_p = Vector.from_coo([0, 1, 2, 3], [6, 4, 3, 1], size=7) assert p.isequal(expected_p) w, p = v.ss.sort(monoid.lxor) # Weird, but user-defined monoids may not commute, so okay + + +def test_subarray_dtypes(): + a = np.arange(3 * 4, dtype=np.int64).reshape(3, 4) + v = Vector.from_coo([1, 3, 5], a) + w = Vector("INT64[4]", size=6) + w[1] = [0, 1, 2, 3] + w[3] = [4, 5, 6, 7] + w[5] = [8, 9, 10, 11] + assert v.isequal(w, check_dtype=True) + w = Vector.from_coo(*v.to_coo()) + assert v.isequal(w, check_dtype=True) + w = Vector.from_dict(v.to_dict()) + assert v.isequal(w, check_dtype=True) + w = Vector.from_dict(v.to_dict(), v.dtype) + assert v.isequal(w, check_dtype=True) + w = Vector.from_pairs([[1, [0, 1, 2, 3]], [3, [4, 5, 6, 7]], [5, [8, 9, 10, 11]]]) + assert v.isequal(w, check_dtype=True) + + full1 = Vector.from_coo([0, 1, 2], a) + full2 = Vector("INT64[4]", size=3) + full2[0] = [0, 1, 2, 3] + full2[1] = [4, 5, 6, 7] + full2[2] = [8, 9, 10, 11] + assert full1.isequal(full2, check_dtype=True) + full2 = Vector("INT64[4]", size=3) + full2[:] = a + assert full1.isequal(full2, check_dtype=True) + if suitesparse: + w = Vector.ss.import_sparse(indices=[1, 3, 5], values=a, size=6) + assert v.isequal(w, check_dtype=True) + full2 = Vector.ss.import_full(a) + assert full1.isequal(full2, check_dtype=True) + full2 = Vector.ss.import_bitmap(values=a, bitmap=[True, True, True]) + assert full1.isequal(full2, check_dtype=True) diff --git a/pyproject.toml b/pyproject.toml index d88f1bdc8..d23d2079a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -214,6 +214,8 @@ select = [ "PLR", # pylint Refactor "PLW", # pylint Warning "TRY", # tryceratops + # "RSE", # flake8-raise + # "SLF", # flake8-self "RUF", # ruff-specific rules ] external = [ @@ -242,6 +244,8 @@ ignore = [ # Intentionally ignored "COM812", # Trailing comma missing "D203", # 1 blank line required before class docstring (Note: conflicts with D211, which is preferred) + "PLR0913", # Too many arguments to function call + "PLR0915", # Too many statements "PLR2004", # Magic number used in comparison, consider replacing magic with a constant variable "PT001", # Use `@pytest.fixture()` over `@pytest.fixture` (Note: why?) "PT003", # `scope='function'` is implied in `@pytest.fixture()` (Note: no harm in being explicit) diff --git a/scripts/check_versions.sh b/scripts/check_versions.sh index b14f62206..f1fb8246e 100755 --- a/scripts/check_versions.sh +++ b/scripts/check_versions.sh @@ -7,7 +7,7 @@ conda search 'numpy[channel=conda-forge]>=1.24.1' conda search 'pandas[channel=conda-forge]>=1.5.3' conda search 'scipy[channel=conda-forge]>=1.10.0' conda search 'networkx[channel=conda-forge]>=3.0' -conda search 'awkward[channel=conda-forge]>=2.0.6' +conda search 'awkward[channel=conda-forge]>=2.0.7' conda search 'numba[channel=conda-forge]>=0.56.4' conda search 'pyyaml[channel=conda-forge]>=6.0' conda search 'flake8-comprehensions[channel=conda-forge]>=3.10.1'