Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Issue #1445 fix performance big model #1446

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Feb 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/api/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ Fixed
like MetaSWAP expects.
- Models imported with :meth:`imod.msw.MetaSwapModel.from_imod5_data` can be
written with ``validate`` set to True.
- Fixed performance issue when coverting very large wells (>10k) with
:meth:`imod.mf6.Well.to_mf6_pkg` and :meth:`imod.mf6.LayeredWell.to_mf6_pkg`,
such as those created with :meth:`imod.mf6.LayeredWell.from_imod5_cap_data`
for a large grid.


[1.0.0rc1] - 2024-12-20
Expand Down
13 changes: 7 additions & 6 deletions imod/mf6/wel.py
Original file line number Diff line number Diff line change
Expand Up @@ -531,12 +531,13 @@ def _to_mf6_pkg(
def gather_filtered_well_ids(
self, well_data_filtered: pd.DataFrame | xr.Dataset, well_data: pd.DataFrame
) -> list[str]:
filtered_well_ids = [
id
for id in well_data["id"].unique()
if id not in well_data_filtered["id"].values
]
return filtered_well_ids
# Work around performance issue with xarray isin for large datasets.
if isinstance(well_data_filtered, xr.Dataset):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you need to convert it to a dataframe? Can't you do directly access the values by well_data_filtered["id"].values ?

Or if it needs to be a dataframe wouldn't it better to construct it like
pd.DataFrame(well_data_filtered["id"].values)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The function now accepts both xr.Dataset as well as pd.Series. It is called twice in the _to_mf6_pkg method, once with a pd.Series, once with a xr.Dataset.

The problem with using .values is that there is a difference between xarray DataArrays and pandas Series: xarray has a .values method, whereas pandas has a values property. I therefore thought calling to_dataframe made it more explicit that were are converting an xarray DataArray to a pandas Dataframe.

filtered_ids = well_data_filtered["id"].to_dataframe()["id"]
else:
filtered_ids = well_data_filtered["id"]
is_missing_id = ~well_data["id"].isin(filtered_ids.unique())
return well_data["id"].loc[is_missing_id].unique()

def to_mf6_package_information(
self, filtered_wells: list[str], reason_text: str
Expand Down
1 change: 1 addition & 0 deletions imod/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from .fixtures.flow_transport_simulation_fixture import flow_transport_simulation
from .fixtures.imod5_cap_data import (
cap_data_sprinkling_grid,
cap_data_sprinkling_grid__big,
cap_data_sprinkling_points,
)
from .fixtures.imod5_well_data import (
Expand Down
61 changes: 46 additions & 15 deletions imod/tests/fixtures/imod5_cap_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
from imod.typing import Imod5DataDict


def zeros_grid():
x = [1.0, 2.0, 3.0]
y = [3.0, 2.0, 1.0]
def zeros_grid(n):
x = np.arange(n, dtype=float) + 1.0
y = np.arange(n, dtype=float)[::-1] + 1.0
dx = 1.0
dy = -1.0

Expand All @@ -19,13 +19,19 @@ def zeros_grid():
return xr.DataArray(data, coords=coords, dims=("y", "x"))


def zeros_dask_grid(n):
da = zeros_grid(n)
return da.chunk({"x": n, "y": n})


@pytest.fixture(scope="function")
def cap_data_sprinkling_grid() -> Imod5DataDict:
boundary = zeros_grid() + 1
wetted_area = zeros_grid() + 0.5
urban_area = zeros_grid() + 0.25
n = 3
boundary = zeros_grid(n) + 1
wetted_area = zeros_grid(n) + 0.5
urban_area = zeros_grid(n) + 0.25

artificial_rch_type = zeros_grid()
artificial_rch_type = zeros_grid(n)
artificial_rch_type[:, 1] = 1
artificial_rch_type[:, 2] = 2
layer = xr.ones_like(artificial_rch_type)
Expand All @@ -43,13 +49,37 @@ def cap_data_sprinkling_grid() -> Imod5DataDict:
return {"cap": cap_data}


@pytest.fixture(scope="function")
def cap_data_sprinkling_grid__big() -> Imod5DataDict:
n = 1000
boundary = zeros_dask_grid(n) + 1
wetted_area = zeros_dask_grid(n) + 0.5
urban_area = zeros_dask_grid(n) + 0.25

artificial_rch_type = zeros_dask_grid(n) + 2.0
layer = xr.ones_like(artificial_rch_type)
layer[:, 1] = 2

cap_data = {
"boundary": boundary,
"wetted_area": wetted_area,
"urban_area": urban_area,
"artificial_recharge": artificial_rch_type,
"artificial_recharge_layer": layer,
"artificial_recharge_capacity": xr.DataArray(25.0),
}

return {"cap": cap_data}


@pytest.fixture(scope="function")
def cap_data_sprinkling_grid_with_layer() -> Imod5DataDict:
boundary = zeros_grid() + 1
wetted_area = zeros_grid() + 0.5
urban_area = zeros_grid() + 0.25
n = 3
boundary = zeros_grid(n) + 1
wetted_area = zeros_grid(n) + 0.5
urban_area = zeros_grid(n) + 0.25

artificial_rch_type = zeros_grid()
artificial_rch_type = zeros_grid(n)
artificial_rch_type[:, 1] = 1
artificial_rch_type[:, 2] = 2
layer = xr.ones_like(artificial_rch_type)
Expand All @@ -69,11 +99,12 @@ def cap_data_sprinkling_grid_with_layer() -> Imod5DataDict:

@pytest.fixture(scope="function")
def cap_data_sprinkling_points() -> Imod5DataDict:
boundary = zeros_grid() + 1
wetted_area = zeros_grid() + 0.5
urban_area = zeros_grid() + 0.25
n = 3
boundary = zeros_grid(n) + 1
wetted_area = zeros_grid(n) + 0.5
urban_area = zeros_grid(n) + 0.25

artificial_rch_type = zeros_grid()
artificial_rch_type = zeros_grid(n)
artificial_rch_type[:, 1] = 3000
artificial_rch_type[:, 2] = 4000

Expand Down
14 changes: 14 additions & 0 deletions imod/tests/test_mf6/test_mf6_wel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1255,6 +1255,20 @@ def test_from_imod5_cap_data__grid(cap_data_sprinkling_grid):
np.testing.assert_array_equal(ds["y"].to_numpy(), expected_y)


@pytest.mark.timeout(300)
def test_from_imod5_cap_data__big_grid(cap_data_sprinkling_grid__big):
"""Test if performance is acceptable for large grids."""
# Arrange
bnd_2d = cap_data_sprinkling_grid__big["cap"]["boundary"]
layer = xr.DataArray([1, 1], coords={"layer": [1, 2]}, dims=["layer"])
bnd = layer * bnd_2d
# Act
well = LayeredWell.from_imod5_cap_data(cap_data_sprinkling_grid__big)
mf6_wel = well.to_mf6_pkg(bnd.astype(bool), bnd, bnd, bnd)
# Assert
assert mf6_wel.dataset.sizes["ncellid"] == bnd_2d.size


def test_from_imod5_cap_data__points(cap_data_sprinkling_points):
with pytest.raises(NotImplementedError):
LayeredWell.from_imod5_cap_data(cap_data_sprinkling_points)
Loading