Deltares · JoerivanEngelen · Feb 21, 2025 · Feb 21, 2025 · Feb 21, 2025 · Feb 21, 2025
diff --git a/docs/api/changelog.rst b/docs/api/changelog.rst
@@ -91,6 +91,10 @@ Fixed
   like MetaSWAP expects.
 - Models imported with :meth:`imod.msw.MetaSwapModel.from_imod5_data` can be
   written with ``validate`` set to True.
+- Fixed performance issue when coverting very large wells (>10k) with
+  :meth:`imod.mf6.Well.to_mf6_pkg` and :meth:`imod.mf6.LayeredWell.to_mf6_pkg`,
+  such as those created with :meth:`imod.mf6.LayeredWell.from_imod5_cap_data`
+  for a large grid.
 
 
 [1.0.0rc1] - 2024-12-20

diff --git a/imod/mf6/wel.py b/imod/mf6/wel.py
@@ -531,12 +531,13 @@ def _to_mf6_pkg(
     def gather_filtered_well_ids(
         self, well_data_filtered: pd.DataFrame | xr.Dataset, well_data: pd.DataFrame
     ) -> list[str]:
-        filtered_well_ids = [
-            id
-            for id in well_data["id"].unique()
-            if id not in well_data_filtered["id"].values
-        ]
-        return filtered_well_ids
+        # Work around performance issue with xarray isin for large datasets.
+        if isinstance(well_data_filtered, xr.Dataset):
+            filtered_ids = well_data_filtered["id"].to_dataframe()["id"]
+        else:
+            filtered_ids = well_data_filtered["id"]
+        is_missing_id = ~well_data["id"].isin(filtered_ids.unique())
+        return well_data["id"].loc[is_missing_id].unique()
 
     def to_mf6_package_information(
         self, filtered_wells: list[str], reason_text: str

diff --git a/imod/tests/conftest.py b/imod/tests/conftest.py
@@ -24,6 +24,7 @@
 from .fixtures.flow_transport_simulation_fixture import flow_transport_simulation
 from .fixtures.imod5_cap_data import (
     cap_data_sprinkling_grid,
+    cap_data_sprinkling_grid__big,
     cap_data_sprinkling_points,
 )
 from .fixtures.imod5_well_data import (

diff --git a/imod/tests/fixtures/imod5_cap_data.py b/imod/tests/fixtures/imod5_cap_data.py
@@ -6,9 +6,9 @@
 from imod.typing import Imod5DataDict
 
 
-def zeros_grid():
-    x = [1.0, 2.0, 3.0]
-    y = [3.0, 2.0, 1.0]
+def zeros_grid(n):
+    x = np.arange(n, dtype=float) + 1.0
+    y = np.arange(n, dtype=float)[::-1] + 1.0
     dx = 1.0
     dy = -1.0
 
@@ -19,13 +19,19 @@ def zeros_grid():
     return xr.DataArray(data, coords=coords, dims=("y", "x"))
 
 
+def zeros_dask_grid(n):
+    da = zeros_grid(n)
+    return da.chunk({"x": n, "y": n})
+
+
 @pytest.fixture(scope="function")
 def cap_data_sprinkling_grid() -> Imod5DataDict:
-    boundary = zeros_grid() + 1
-    wetted_area = zeros_grid() + 0.5
-    urban_area = zeros_grid() + 0.25
+    n = 3
+    boundary = zeros_grid(n) + 1
+    wetted_area = zeros_grid(n) + 0.5
+    urban_area = zeros_grid(n) + 0.25
 
-    artificial_rch_type = zeros_grid()
+    artificial_rch_type = zeros_grid(n)
     artificial_rch_type[:, 1] = 1
     artificial_rch_type[:, 2] = 2
     layer = xr.ones_like(artificial_rch_type)
@@ -43,13 +49,37 @@ def cap_data_sprinkling_grid() -> Imod5DataDict:
     return {"cap": cap_data}
 
 
+@pytest.fixture(scope="function")
+def cap_data_sprinkling_grid__big() -> Imod5DataDict:
+    n = 1000
+    boundary = zeros_dask_grid(n) + 1
+    wetted_area = zeros_dask_grid(n) + 0.5
+    urban_area = zeros_dask_grid(n) + 0.25
+
+    artificial_rch_type = zeros_dask_grid(n) + 2.0
+    layer = xr.ones_like(artificial_rch_type)
+    layer[:, 1] = 2
+
+    cap_data = {
+        "boundary": boundary,
+        "wetted_area": wetted_area,
+        "urban_area": urban_area,
+        "artificial_recharge": artificial_rch_type,
+        "artificial_recharge_layer": layer,
+        "artificial_recharge_capacity": xr.DataArray(25.0),
+    }
+
+    return {"cap": cap_data}
+
+
 @pytest.fixture(scope="function")
 def cap_data_sprinkling_grid_with_layer() -> Imod5DataDict:
-    boundary = zeros_grid() + 1
-    wetted_area = zeros_grid() + 0.5
-    urban_area = zeros_grid() + 0.25
+    n = 3
+    boundary = zeros_grid(n) + 1
+    wetted_area = zeros_grid(n) + 0.5
+    urban_area = zeros_grid(n) + 0.25
 
-    artificial_rch_type = zeros_grid()
+    artificial_rch_type = zeros_grid(n)
     artificial_rch_type[:, 1] = 1
     artificial_rch_type[:, 2] = 2
     layer = xr.ones_like(artificial_rch_type)
@@ -69,11 +99,12 @@ def cap_data_sprinkling_grid_with_layer() -> Imod5DataDict:
 
 @pytest.fixture(scope="function")
 def cap_data_sprinkling_points() -> Imod5DataDict:
-    boundary = zeros_grid() + 1
-    wetted_area = zeros_grid() + 0.5
-    urban_area = zeros_grid() + 0.25
+    n = 3
+    boundary = zeros_grid(n) + 1
+    wetted_area = zeros_grid(n) + 0.5
+    urban_area = zeros_grid(n) + 0.25
 
-    artificial_rch_type = zeros_grid()
+    artificial_rch_type = zeros_grid(n)
     artificial_rch_type[:, 1] = 3000
     artificial_rch_type[:, 2] = 4000
 

diff --git a/imod/tests/test_mf6/test_mf6_wel.py b/imod/tests/test_mf6/test_mf6_wel.py
@@ -1255,6 +1255,20 @@ def test_from_imod5_cap_data__grid(cap_data_sprinkling_grid):
     np.testing.assert_array_equal(ds["y"].to_numpy(), expected_y)
 
 
+@pytest.mark.timeout(300)
+def test_from_imod5_cap_data__big_grid(cap_data_sprinkling_grid__big):
+    """Test if performance is acceptable for large grids."""
+    # Arrange
+    bnd_2d = cap_data_sprinkling_grid__big["cap"]["boundary"]
+    layer = xr.DataArray([1, 1], coords={"layer": [1, 2]}, dims=["layer"])
+    bnd = layer * bnd_2d
+    # Act
+    well = LayeredWell.from_imod5_cap_data(cap_data_sprinkling_grid__big)
+    mf6_wel = well.to_mf6_pkg(bnd.astype(bool), bnd, bnd, bnd)
+    # Assert
+    assert mf6_wel.dataset.sizes["ncellid"] == bnd_2d.size
+
+
 def test_from_imod5_cap_data__points(cap_data_sprinkling_points):
     with pytest.raises(NotImplementedError):
         LayeredWell.from_imod5_cap_data(cap_data_sprinkling_points)