Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Improve construction when inferring sub-array dtype (a.k.a. array subdtype) #381

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Feb 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test_and_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ jobs:
npver=$(python -c 'import random ; print(random.choice(["=1.23", ""]))')
spver=$(python -c 'import random ; print(random.choice(["=1.9", "=1.10", ""]))')
pdver=$(python -c 'import random ; print(random.choice(["=1.5", ""]))')
akver=$(python -c 'import random ; print(random.choice(["=1.10", "=2.0.5", "=2.0.6", ""]))')
akver=$(python -c 'import random ; print(random.choice(["=1.10", "=2.0.5", "=2.0.6", "=2.0.7", ""]))')
fi
if [[ ${{ steps.sourcetype.outputs.selected }} == "source" || ${{ steps.sourcetype.outputs.selected }} == "upstream" ]]; then
# TODO: there are currently issues with some numpy versions when
Expand Down
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ repos:
- id: validate-pyproject
name: Validate pyproject.toml
- repo: https://github.com/myint/autoflake
rev: v2.0.0
rev: v2.0.1
hooks:
- id: autoflake
args: [--in-place]
Expand All @@ -44,7 +44,7 @@ repos:
- id: auto-walrus
args: [--line-length, "100"]
- repo: https://github.com/psf/black
rev: 22.12.0
rev: 23.1.0
hooks:
- id: black
- id: black-jupyter
Expand All @@ -71,7 +71,7 @@ repos:
additional_dependencies: [tomli]
files: ^(graphblas|docs)/
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.0.237
rev: v0.0.241
hooks:
- id: ruff
- repo: https://github.com/sphinx-contrib/sphinx-lint
Expand Down
30 changes: 13 additions & 17 deletions graphblas/core/matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -872,7 +872,7 @@ def from_coo(
"""
rows = ints_to_numpy_buffer(rows, np.uint64, name="row indices")
columns = ints_to_numpy_buffer(columns, np.uint64, name="column indices")
values, new_dtype = values_to_numpy_buffer(values, dtype)
values, dtype = values_to_numpy_buffer(values, dtype, subarray_after=1)
# Compute nrows and ncols if not provided
if nrows is None:
if rows.size == 0:
Expand All @@ -882,11 +882,8 @@ def from_coo(
if columns.size == 0:
raise ValueError("No column indices provided. Unable to infer ncols.")
ncols = int(columns.max()) + 1
if dtype is None and values.ndim > 1:
# Look for array-subtdype
new_dtype = lookup_dtype(np.dtype((new_dtype.np_type, values.shape[1:])))
# Create the new matrix
C = cls(new_dtype, nrows, ncols, name=name)
C = cls(dtype, nrows, ncols, name=name)
if values.ndim == 0:
if dup_op is not None:
raise ValueError(
Expand Down Expand Up @@ -1004,7 +1001,7 @@ def _from_csx(cls, fmt, indptr, indices, values, dtype, num, check_num, name):
indices_name = "row indices"
indptr = ints_to_numpy_buffer(indptr, np.uint64, name="index pointers")
indices = ints_to_numpy_buffer(indices, np.uint64, name=indices_name)
values, new_dtype = values_to_numpy_buffer(values, dtype)
values, dtype = values_to_numpy_buffer(values, dtype, subarray_after=1)
if num is None:
if indices.size > 0:
num = int(indices.max()) + 1
Expand All @@ -1026,9 +1023,6 @@ def _from_csx(cls, fmt, indptr, indices, values, dtype, num, check_num, name):
"ncols must be None or equal to len(indptr) - 1; "
f"expected {check_num}, got {ncols}"
)
if dtype is None and values.ndim > 1:
# Look for array-subtdype
new_dtype = lookup_dtype(np.dtype((new_dtype.np_type, values.shape[1:])))
if values.ndim == 0:
if backend == "suitesparse":
# SuiteSparse GxB can handle iso-value
Expand All @@ -1055,21 +1049,21 @@ def _from_csx(cls, fmt, indptr, indices, values, dtype, num, check_num, name):
)
values = np.broadcast_to(values, indices.size)
new_mat = ffi_new("GrB_Matrix*")
rv = Matrix._from_obj(new_mat, new_dtype, nrows, ncols, name=name)
if new_dtype._is_udt:
rv = Matrix._from_obj(new_mat, dtype, nrows, ncols, name=name)
if dtype._is_udt:
dtype_name = "UDT"
else:
dtype_name = new_dtype.name
dtype_name = dtype.name
call(
f"GrB_Matrix_import_{dtype_name}",
[
_Pointer(rv),
new_dtype,
dtype,
_as_scalar(nrows, _INDEX, is_cscalar=True),
_as_scalar(ncols, _INDEX, is_cscalar=True),
_CArray(indptr),
_CArray(indices),
_CArray(values, dtype=new_dtype),
_CArray(values, dtype=dtype),
_as_scalar(indptr.size, _INDEX, is_cscalar=True),
_as_scalar(indices.size, _INDEX, is_cscalar=True),
_as_scalar(values.shape[0], _INDEX, is_cscalar=True),
Expand Down Expand Up @@ -1436,12 +1430,14 @@ def from_dicts(
col_indices = np.fromiter(itertools.chain.from_iterable(dicts), np.uint64)
iter_values = itertools.chain.from_iterable(v.values() for v in dicts)
if dtype is None:
values = np.array(list(iter_values))
dtype = lookup_dtype(values.dtype)
values, dtype = values_to_numpy_buffer(list(iter_values), subarray_after=1)
else:
# If we know the dtype, then using `np.fromiter` is much faster
dtype = lookup_dtype(dtype)
values = np.fromiter(iter_values, dtype.np_type)
if dtype.np_type.subdtype is not None and np.__version__[:5] in {"1.21.", "1.22."}:
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is the first time we actually use the version of a dependency. This way is pretty low-tech, and hopefully fine. If we need to do more version-dependent behavior, we'll probably want to use a helper utility such as packaging.version.parse.

Also, CI testing of dependency versions FTW!

We can drop 1.21 in June, and 1.22 next January.

values, dtype = values_to_numpy_buffer(list(iter_values), dtype)
else:
values = np.fromiter(iter_values, dtype.np_type)
return getattr(cls, methodname)(
*args, indptr, col_indices, values, dtype, nrows=nrows, ncols=ncols, name=name
)
Expand Down
6 changes: 3 additions & 3 deletions graphblas/core/operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -1313,7 +1313,7 @@ def _initialize(cls):
op._typed_ops[dtype] = typed_op
op.coercions[dtype] = target_type
# Allow some functions to work on UDTs
for (unop, func) in [
for unop, func in [
(unary.identity, _identity),
(unary.one, _one),
]:
Expand Down Expand Up @@ -2287,7 +2287,7 @@ def _initialize(cls):
# If the inputs are FP32, we use DIV_FP32; use DIV_FP64 for all other input dtypes
truediv = binary.truediv = op.truediv = BinaryOp("truediv")
rtruediv = binary.rtruediv = op.rtruediv = BinaryOp("rtruediv")
for (new_op, builtin_op) in [(truediv, binary.cdiv), (rtruediv, binary.rdiv)]:
for new_op, builtin_op in [(truediv, binary.cdiv), (rtruediv, binary.rdiv)]:
for dtype in builtin_op.types:
if dtype.name in {"FP32", "FC32", "FC64"}:
orig_dtype = dtype
Expand Down Expand Up @@ -2420,7 +2420,7 @@ def _initialize(cls):
left._semiring_commutes_to = right
right._semiring_commutes_to = left
# Allow some functions to work on UDTs
for (binop, func) in [
for binop, func in [
(binary.first, _first),
(binary.second, _second),
(binary.pair, _pair),
Expand Down
34 changes: 25 additions & 9 deletions graphblas/core/ss/matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -1305,7 +1305,9 @@ def _import_csr(
)
if method == "pack":
dtype = matrix.dtype
values, dtype = values_to_numpy_buffer(values, dtype, copy=copy, ownable=True)
values, dtype = values_to_numpy_buffer(
values, dtype, copy=copy, ownable=True, subarray_after=1
)
if col_indices is values:
values = np.copy(values)
Ap = ffi_new("GrB_Index**", ffi.from_buffer("GrB_Index*", indptr))
Expand Down Expand Up @@ -1493,7 +1495,9 @@ def _import_csc(
)
if method == "pack":
dtype = matrix.dtype
values, dtype = values_to_numpy_buffer(values, dtype, copy=copy, ownable=True)
values, dtype = values_to_numpy_buffer(
values, dtype, copy=copy, ownable=True, subarray_after=1
)
if row_indices is values:
values = np.copy(values)
Ap = ffi_new("GrB_Index**", ffi.from_buffer("GrB_Index*", indptr))
Expand Down Expand Up @@ -1696,7 +1700,9 @@ def _import_hypercsr(
)
if method == "pack":
dtype = matrix.dtype
values, dtype = values_to_numpy_buffer(values, dtype, copy=copy, ownable=True)
values, dtype = values_to_numpy_buffer(
values, dtype, copy=copy, ownable=True, subarray_after=1
)
if not is_iso and values.ndim == 0:
is_iso = True
if col_indices is values:
Expand Down Expand Up @@ -1917,7 +1923,9 @@ def _import_hypercsc(
)
if method == "pack":
dtype = matrix.dtype
values, dtype = values_to_numpy_buffer(values, dtype, copy=copy, ownable=True)
values, dtype = values_to_numpy_buffer(
values, dtype, copy=copy, ownable=True, subarray_after=1
)
if row_indices is values:
values = np.copy(values)
if not is_iso and values.ndim == 0:
Expand Down Expand Up @@ -2122,7 +2130,9 @@ def _import_bitmapr(
)
if method == "pack":
dtype = matrix.dtype
values, dtype = values_to_numpy_buffer(values, dtype, copy=copy, ownable=True, order="C")
values, dtype = values_to_numpy_buffer(
values, dtype, copy=copy, ownable=True, order="C", subarray_after=2
)
if bitmap is values:
values = np.copy(values)
if method == "import":
Expand Down Expand Up @@ -2313,7 +2323,9 @@ def _import_bitmapc(
)
if method == "pack":
dtype = matrix.dtype
values, dtype = values_to_numpy_buffer(values, dtype, copy=copy, ownable=True, order="F")
values, dtype = values_to_numpy_buffer(
values, dtype, copy=copy, ownable=True, order="F", subarray_after=2
)
if bitmap is values:
values = np.copy(values)
if method == "import":
Expand Down Expand Up @@ -2486,7 +2498,9 @@ def _import_fullr(
copy = not take_ownership
if method == "pack":
dtype = matrix.dtype
values, dtype = values_to_numpy_buffer(values, dtype, copy=copy, order="C", ownable=True)
values, dtype = values_to_numpy_buffer(
values, dtype, copy=copy, order="C", ownable=True, subarray_after=2
)
if method == "import":
nrows, ncols = get_shape(nrows, ncols, dtype, values=values)
else:
Expand Down Expand Up @@ -2643,7 +2657,9 @@ def _import_fullc(
copy = not take_ownership
if method == "pack":
dtype = matrix.dtype
values, dtype = values_to_numpy_buffer(values, dtype, copy=copy, order="F", ownable=True)
values, dtype = values_to_numpy_buffer(
values, dtype, copy=copy, order="F", ownable=True, subarray_after=2
)
if method == "import":
nrows, ncols = get_shape(nrows, ncols, dtype, values=values)
else:
Expand Down Expand Up @@ -2848,7 +2864,7 @@ def _import_coo(

if method == "pack":
dtype = matrix.dtype
values, dtype = values_to_numpy_buffer(values, dtype)
values, dtype = values_to_numpy_buffer(values, dtype, subarray_after=1)
if method == "import":
matrix = gb.Matrix(dtype, nrows=nrows, ncols=ncols, name=name)
if is_iso:
Expand Down
12 changes: 9 additions & 3 deletions graphblas/core/ss/vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -970,7 +970,9 @@ def _import_sparse(
indices = ints_to_numpy_buffer(indices, np.uint64, copy=copy, ownable=True, name="indices")
if method == "pack":
dtype = vector.dtype
values, dtype = values_to_numpy_buffer(values, dtype, copy=copy, ownable=True)
values, dtype = values_to_numpy_buffer(
values, dtype, copy=copy, ownable=True, subarray_after=1
)
if indices is values:
values = np.copy(values)
vi = ffi_new("GrB_Index**", ffi.from_buffer("GrB_Index*", indices))
Expand Down Expand Up @@ -1150,7 +1152,9 @@ def _import_bitmap(
if method == "pack":
dtype = vector.dtype
size = vector._size
values, dtype = values_to_numpy_buffer(values, dtype, copy=copy, ownable=True)
values, dtype = values_to_numpy_buffer(
values, dtype, copy=copy, ownable=True, subarray_after=1
)
if bitmap is values:
values = np.copy(values)
vhandle = ffi_new("GrB_Vector*")
Expand Down Expand Up @@ -1320,7 +1324,9 @@ def _import_full(
if method == "pack":
dtype = vector.dtype
size = vector._size
values, dtype = values_to_numpy_buffer(values, dtype, copy=copy, ownable=True)
values, dtype = values_to_numpy_buffer(
values, dtype, copy=copy, ownable=True, subarray_after=1
)
vhandle = ffi_new("GrB_Vector*")
vx = ffi_new("void**", ffi.from_buffer("void*", values))
if size is None:
Expand Down
18 changes: 17 additions & 1 deletion graphblas/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,21 @@ def _get_subdtype(dtype):
return dtype


def values_to_numpy_buffer(array, dtype=None, *, copy=False, ownable=False, order="C"):
def values_to_numpy_buffer(
array, dtype=None, *, copy=False, ownable=False, order="C", subarray_after=None
):
"""Convert an array-like object to a numpy array and infer the dtype if necessary.

Parameters
----------
subarray_after : int, optional
If dtype is not provided, infer "sub-array" dtype if the array has extra dimensions.

Returns
-------
np.ndarray
dtype
"""
if dtype is not None:
dtype = lookup_dtype(dtype)
array = np.array(array, _get_subdtype(dtype.np_type), copy=copy, order=order)
Expand All @@ -85,6 +99,8 @@ def values_to_numpy_buffer(array, dtype=None, *, copy=False, ownable=False, orde
# fix for win64 numpy handling of ints
array = array.astype(np.int64)
dtype = lookup_dtype(array.dtype)
if subarray_after is not None and array.ndim > subarray_after:
dtype = lookup_dtype(np.dtype((dtype.np_type, array.shape[subarray_after:])))
if ownable and (not array.flags.owndata or not array.flags.writeable):
array = array.copy(order)
return array, dtype
Expand Down
15 changes: 7 additions & 8 deletions graphblas/core/vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -725,17 +725,14 @@ def from_coo(cls, indices, values=1.0, dtype=None, *, size=None, dup_op=None, na
Vector
"""
indices = ints_to_numpy_buffer(indices, np.uint64, name="indices")
values, new_dtype = values_to_numpy_buffer(values, dtype)
values, dtype = values_to_numpy_buffer(values, dtype, subarray_after=1)
# Compute size if not provided
if size is None:
if indices.size == 0:
raise ValueError("No indices provided. Unable to infer size.")
size = int(indices.max()) + 1
if dtype is None and values.ndim > 1:
# Look for array-subtdype
new_dtype = lookup_dtype(np.dtype((new_dtype.np_type, values.shape[1:])))
# Create the new vector
w = cls(new_dtype, size, name=name)
w = cls(dtype, size, name=name)
if values.ndim == 0:
if dup_op is not None:
raise ValueError(
Expand Down Expand Up @@ -1791,12 +1788,14 @@ def from_dict(cls, d, dtype=None, *, size=None, name=None):
"""
indices = np.fromiter(d.keys(), np.uint64)
if dtype is None:
values = np.array(list(d.values())) # let numpy infer dtype
dtype = lookup_dtype(values.dtype)
values, dtype = values_to_numpy_buffer(list(d.values()), subarray_after=1)
else:
# If we know the dtype, then using `np.fromiter` is much faster
dtype = lookup_dtype(dtype)
values = np.fromiter(d.values(), dtype.np_type)
if dtype.np_type.subdtype is not None and np.__version__[:5] in {"1.21.", "1.22."}:
values, dtype = values_to_numpy_buffer(list(d.values()), dtype)
else:
values = np.fromiter(d.values(), dtype.np_type)
if size is None and indices.size == 0:
size = 0
return cls.from_coo(indices, values, dtype, size=size, name=name)
Expand Down
35 changes: 35 additions & 0 deletions graphblas/tests/test_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -3612,6 +3612,7 @@ def test_ss_iteration(A):
assert next(A.ss.iteritems()) is not None


@pytest.mark.slow
def test_udt():
record_dtype = np.dtype([("x", np.bool_), ("y", np.float64)], align=True)
udt = dtypes.register_anonymous(record_dtype, "MatrixUDT")
Expand Down Expand Up @@ -4231,3 +4232,37 @@ def test_ss_descriptors(A):
else:
with pytest.raises(ValueError, match="escriptor"):
(A @ A).new(nthreads=4, axb_method="dot", sort=True)


def test_subarray_dtypes():
a = np.arange(3 * 4, dtype=np.int64).reshape(3, 4)
A = Matrix.from_coo([1, 3, 5], [0, 1, 3], a)
B = Matrix("INT64[4]", nrows=6, ncols=4)
B[1, 0] = [0, 1, 2, 3]
B[3, 1] = [4, 5, 6, 7]
B[5, 3] = [8, 9, 10, 11]
assert A.isequal(B, check_dtype=True)
for method in ["coo", "csr", "csc", "dcsr", "dcsc", "edgelist"]:
B = getattr(A, f"from_{method}")(*getattr(A, f"to_{method}")())
B = Matrix.from_dicts(A.to_dicts())
assert A.isequal(B, check_dtype=True)
B = Matrix.from_dicts(A.to_dicts(), A.dtype)
assert A.isequal(B, check_dtype=True)

b1 = np.arange(2 * 3 * 4, dtype=np.int64).reshape(2 * 3, 4)
b2 = np.arange(2 * 3 * 4, dtype=np.int64).reshape(2, 3, 4)
Full1 = Matrix.from_coo([0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2], b1)
Full2 = Matrix("INT64[4]", nrows=2, ncols=3)
Full2[0, 0] = [0, 1, 2, 3]
Full2[0, 1] = [4, 5, 6, 7]
Full2[0, 2] = [8, 9, 10, 11]
Full2[1, 0] = [12, 13, 14, 15]
Full2[1, 1] = [16, 17, 18, 19]
Full2[1, 2] = [20, 21, 22, 23]
assert Full1.isequal(Full2, check_dtype=True)
Full2 = Matrix("INT64[4]", nrows=2, ncols=3)
Full2[:, :] = b2
assert Full1.isequal(Full2, check_dtype=True)
if suitesparse:
Full2 = Matrix.ss.import_fullr(b2)
assert Full1.isequal(Full2, check_dtype=True)
Loading