diff --git a/.gitignore b/.gitignore
index efdd953..9410386 100644
--- a/.gitignore
+++ b/.gitignore
@@ -171,4 +171,12 @@ cython_debug/
.pypirc
# Ignore model save files (too large)
-*.pt
\ No newline at end of file
+*.pt
+
+# Scratch / development / old / and other temporary files
+*scratch*
+*_dev*
+*_old*
+temp/
+tmp/
+*.tmp
\ No newline at end of file
diff --git a/bmi_config_files/bmi_config.yaml b/bmi_config_files/bmi_config.yaml
index 8c66286..3390cd7 100644
--- a/bmi_config_files/bmi_config.yaml
+++ b/bmi_config_files/bmi_config.yaml
@@ -1,5 +1,5 @@
### Full-catchment configuration ###
-train_cfg_file: ../trained_models/merit_conus_40yr/config.yaml
+train_cfg_file: models/hydrofabric_15yr/config.yaml
initial_state: zero # zero: set initial states of the model to zero
verbose: 1 # 0: no output, 1: print output
diff --git a/bmi_config_files/bmi_config_cat_88306_5yr.yaml b/bmi_config_files/bmi_config_cat-88306_5yr.yaml
similarity index 82%
rename from bmi_config_files/bmi_config_cat_88306_5yr.yaml
rename to bmi_config_files/bmi_config_cat-88306_5yr.yaml
index 32c7267..01e2f19 100644
--- a/bmi_config_files/bmi_config_cat_88306_5yr.yaml
+++ b/bmi_config_files/bmi_config_cat-88306_5yr.yaml
@@ -3,37 +3,39 @@ catchment_id: 'cat-88306'
catchment_name: 'JRB-88306'
# Static catchment attributes
+aridity: 1.041
+meanP: 991.3
+ETPOT_Hargr: 1031.0
+NDVI: 0.594
FW: 0.004622
+meanslope: 10.55
+SoilGrids1km_sand: 38.65
+SoilGrids1km_clay: 23.31
+SoilGrids1km_silt: 38.06
+glaciers: 0.0
HWSD_clay: 25.0
+HWSD_gravel: 15.0
HWSD_sand: 33.0
-T_clay: 20.0
-uparea: 3.254
-T_gravel: 10.0
+HWSD_silt: 42.0
meanelevation: 335.1
-meanP: 991.3
-HWSD_gravel: 15.0
-seasonality_P: 0.1035
-T_sand: 41.0
-SoilGrids1km_silt: 38.06
+meanTa: 9.845
permafrost: 0.0
-snowfall_fraction: 0.1049
-SoilGrids1km_sand: 38.65
-Porosity: 0.01
-T_silt: 39.0
-glaciers: 0.0
-HWSD_silt: 42.0
-meanslope: 10.55
permeability: -15.05
+seasonality_P: 0.1035
seasonality_PET: 0.5703
-ETPOT_Hargr: 1031.0
-meanTa: 9.845
-SoilGrids1km_clay: 23.31
snow_fraction: 0.02967
-aridity: 1.041
-NDVI: 0.594
+snowfall_fraction: 0.1049
+T_clay: 20.0
+T_gravel: 10.0
+T_sand: 41.0
+T_silt: 39.0
+Porosity: 0.01
+uparea: 3.254
-train_cfg_file: ../trained_models/merit_conus_40yr/config.yaml
+config_model: models/hydrofabric_15yr/config.yaml
+stepwise: False # True: stepwise inference, False: Single forward simulation on all data in one go
initial_state: zero # zero: set initial states of the model to zero
+dtype: np.float32
verbose: 1 # 0: no output, 1: print output
# Simulation window
diff --git a/environment.yaml b/envs/environment.yaml
similarity index 100%
rename from environment.yaml
rename to envs/environment.yaml
diff --git a/example/bmi_demo.ipynb b/example/bmi_demo.ipynb
new file mode 100644
index 0000000..65b963b
--- /dev/null
+++ b/example/bmi_demo.ipynb
@@ -0,0 +1,181 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Forward $\\delta$ HBV 2.0UH with BMI\n",
+ "\n",
+ "\n",
+ "Notes:\n",
+ "- This example uses a subset of AORC forcings that can be downloaded from S3 here.\n",
+ "\n",
+ "- An environment can be set up with either pip or conda using `./envs/requirements.txt` or `./envs/ngen_env.yaml`, respectively.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import sys\n",
+ "sys.path.append('../../')\n",
+ " \n",
+ "# import numpy as np\n",
+ "\n",
+ "from dHBV_2_0.bmi import DeltaModelBmi\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "NameError",
+ "evalue": "name 'dHbv2Bmi' is not defined",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[1;32mIn[15], line 28\u001b[0m\n\u001b[0;32m 24\u001b[0m attr \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mload(attr_path)\n\u001b[0;32m 25\u001b[0m \u001b[38;5;66;03m# obs = np.load(obs_path)\u001b[39;00m\n\u001b[0;32m 26\u001b[0m \n\u001b[0;32m 27\u001b[0m \u001b[38;5;66;03m# Create an instance of the dHBV 2.0 through BMI\u001b[39;00m\n\u001b[1;32m---> 28\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[43mdHbv2Bmi\u001b[49m()\n\u001b[0;32m 30\u001b[0m \u001b[38;5;66;03m### BMI initialization ###\u001b[39;00m\n\u001b[0;32m 31\u001b[0m model\u001b[38;5;241m.\u001b[39minitialize(bmi_cfg_file\u001b[38;5;241m=\u001b[39mPath(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mC:/Users/LeoLo/Desktop/noaa_owp/dHBV_2_0/bmi_config_files/bmi_config_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mbasin_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.yaml\u001b[39m\u001b[38;5;124m'\u001b[39m))\n",
+ "\u001b[1;31mNameError\u001b[0m: name 'dHbv2Bmi' is not defined"
+ ]
+ }
+ ],
+ "source": [
+ "from pathlib import Path\n",
+ "\n",
+ "\n",
+ "\n",
+ "# os.chdir(os.path.expanduser('../dHBV_2_0/'))\n",
+ "\n",
+ "\n",
+ "### Select a basin from the sample data ###\n",
+ "basin_id = \"cat-88306\"\n",
+ "### ----------------------------------- ###\n",
+ "\n",
+ "\n",
+ "# Load the USGS data\n",
+ "# REPLACE THIS PATH WITH YOUR LOCAL FILE PATH:\n",
+ "forc_path = f'C:/Users/LeoLo/Desktop/noaa_owp/dHBV_2_0/data/aorc/juniata_river_basin/forcings_5yr_{basin_id}.npy'\n",
+ "attr_path = f'C:/Users/LeoLo/Desktop/noaa_owp/dHBV_2_0/data/aorc/juniata_river_basin/attributes_5yr_{basin_id}.npy'\n",
+ "# obs_path = f'/Users/LeoLo/Desktop/noaa_owp/dHBV_2_0/data/aorc/juniata_river_basin/obs_5yr_{basin_id}.npy'\n",
+ "\n",
+ "forc = np.load(forc_path)\n",
+ "attr = np.load(attr_path)\n",
+ "# obs = np.load(obs_path)\n",
+ "\n",
+ "# Create an instance of the dHBV 2.0 through BMI\n",
+ "model = dHbv2Bmi()\n",
+ "\n",
+ "### BMI initialization ###\n",
+ "model.initialize(bmi_cfg_file=Path(f'C:/Users/LeoLo/Desktop/noaa_owp/dHBV_2_0/bmi_config_files/bmi_config_{basin_id}.yaml'))\n",
+ "\n",
+ "streamflow_pred = np.zeros(forc.shape[0])\n",
+ "\n",
+ "for i in range(0, forc.shape[0]):\n",
+ " # Extract forcing/attribute data for the current time step\n",
+ " prcp = forc[i, :0, 0]\n",
+ " temp = forc[i, :0, 1]\n",
+ " pet = forc[i, :0, 2]\n",
+ "\n",
+ " # # Check if any of the inputs are NaN\n",
+ " # if np.isnan([prcp, temp, pet]).any():\n",
+ " # if model.verbose > 0:\n",
+ " # print(f\"Skipping timestep {i} due to NaN values in inputs.\")\n",
+ " # continue\n",
+ "\n",
+ " model.set_value('atmosphere_water__liquid_equivalent_precipitation_rate', prcp)\n",
+ " model.set_value('land_surface_air__temperature', temp)\n",
+ " model.set_value('land_surface_water__potential_evaporation_volume_flux', pet)\n",
+ "\n",
+ " ### BMI update ###\n",
+ " model.update()\n",
+ "\n",
+ " # Retrieve and scale the runoff output\n",
+ " dest_array = np.zeros(1)\n",
+ " model.get_value('land_surface_water__runoff_volume_flux', dest_array)\n",
+ " \n",
+ " # streamflow_pred[i] = dest_array[0] * 1000 # Convert to mm/hr\n",
+ " streamflow_pred[i] = dest_array[0]\n",
+ "\n",
+ "\n",
+ "# Calculate NSE for the model predictions\n",
+ "obs = obs.dropna()\n",
+ "sim = streamflow_pred.dropna()\n",
+ "\n",
+ "denom = ((obs - obs.mean()) ** 2).sum()\n",
+ "num = ((sim - obs) ** 2).sum()\n",
+ "nse = 1 - num / denom\n",
+ "print(f\"NSE: {nse:.2f}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import dMG\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "TypeError",
+ "evalue": "initialize_config() missing 1 required positional argument: 'config'",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[1;32mIn[6], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mdMG\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mutils\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minitialize_config\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
+ "\u001b[1;31mTypeError\u001b[0m: initialize_config() missing 1 required positional argument: 'config'"
+ ]
+ }
+ ],
+ "source": [
+ "dMG.utils.initialize_config()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "venv",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/example/data_processing.ipynb b/example/data_processing.ipynb
index 2b98c34..d8b5a88 100644
--- a/example/data_processing.ipynb
+++ b/example/data_processing.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 45,
+ "execution_count": 1,
"metadata": {},
"outputs": [
{
@@ -33,7 +33,7 @@
"\n",
"gdf = gpd.read_file(r\"C:\\Users\\LeoLo\\Desktop\\jrb\\jrb_2.gpkg\", layer=\"flowpaths\")\n",
"nexus = gpd.read_file(r\"C:\\Users\\LeoLo\\Desktop\\jrb\\jrb_2.gpkg\", layer=\"nexus\")\n",
- "# Many more layers 'flowpaths', 'divides', 'lakes', 'nexus', 'pois', 'hydrolocations', 'flowpath-attributes', \n",
+ "# Many more layers 'flowpaths', 'divides', 'lakes', 'nexus', 'pois', 'hydrolocations', 'flowpath-attributes',\n",
"# 'flowpath-attributes-ml', 'network', 'divide-attributes'\n",
"\n",
"# print(gdf.head())\n",
@@ -845,7 +845,7 @@
},
{
"cell_type": "code",
- "execution_count": 72,
+ "execution_count": 3,
"metadata": {},
"outputs": [
{
@@ -915,6 +915,441 @@
"print(f\"\\n --------\\nForcing data has {len(duplicates)} duplicate divide_id values.\")"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "
<xarray.Dataset> Size: 225MB\n",
+ "Dimensions: (divide_id: 839543)\n",
+ "Coordinates:\n",
+ " * divide_id (divide_id) <U11 37MB 'cat-1068193' ... 'cat-3014411'\n",
+ "Data variables: (12/28)\n",
+ " FW (divide_id) float64 7MB ...\n",
+ " HWSD_clay (divide_id) float64 7MB ...\n",
+ " HWSD_sand (divide_id) float64 7MB ...\n",
+ " T_clay (divide_id) float64 7MB ...\n",
+ " uparea (divide_id) float64 7MB ...\n",
+ " T_gravel (divide_id) float64 7MB ...\n",
+ " ... ...\n",
+ " ETPOT_Hargr (divide_id) float64 7MB ...\n",
+ " meanTa (divide_id) float64 7MB ...\n",
+ " SoilGrids1km_clay (divide_id) float64 7MB ...\n",
+ " snow_fraction (divide_id) float64 7MB ...\n",
+ " aridity (divide_id) float64 7MB ...\n",
+ " NDVI (divide_id) float64 7MB ...
PandasIndex
PandasIndex(Index(['cat-1068193', 'cat-1111829', 'cat-1111831', 'cat-1111834',\n",
+ " 'cat-1111835', 'cat-1111836', 'cat-1111837', 'cat-1111839',\n",
+ " 'cat-1111840', 'cat-1111841',\n",
+ " ...\n",
+ " 'cat-3014402', 'cat-3014403', 'cat-3014404', 'cat-3014405',\n",
+ " 'cat-3014406', 'cat-3014407', 'cat-3014408', 'cat-3014409',\n",
+ " 'cat-3014410', 'cat-3014411'],\n",
+ " dtype='object', name='divide_id', length=839543))
"
+ ],
+ "text/plain": [
+ " Size: 225MB\n",
+ "Dimensions: (divide_id: 839543)\n",
+ "Coordinates:\n",
+ " * divide_id (divide_id) =42", "wheel"]
+# build-backend = "setuptools.build_meta"
[project]
name = "dHBV_2_0"
-description = "Hydrology models and modules manager"
-requires-python = ">=3.9"
+description = "NextGen Compatible dHBV 2.0 model with UH routing."
+readme = "README.md"
license = {file = "LICENSE"}
authors = [
{name = "Leo Lonzarich"},
@@ -15,27 +17,67 @@ maintainers = [
{name = "Tadd Bindas", email = "taddbindas@gmail.com"},
{name = "Yalan Song", email = "songyalan1@gmail.com"},
]
+requires-python = ">=3.9"
+dynamic = ["version"]
dependencies = [
- "torch",
- "numpy",
- "pandas",
+ "hydra-core>=1.3.2",
+ "ipykernel>=6.29.5",
+ "matplotlib>=3.10.0",
+ "numpy>=1.22.4",
+ "omegaconf>=2.3.0",
+ "pandas>=1.4.3",
+ "pydantic>=2.0.0",
+ "scikit-learn>=1.0.2",
+ "scipy>=1.7.3",
+ "torch>=1.10.1",
+ "torchaudio>=0.10.1",
+ "torchvision>=0.11.2",
+ "tqdm>=4.67.1",
+ "uv>=0.6.6",
+ "zarr>=3.0.5",
+]
+classifiers = [
+ "Programming Language :: Python :: 3",
+ "License :: OSI Approved :: Apache Software License",
+ "Operating System :: OS Independent",
]
-dynamic = ["version"] # Add this line to indicate dynamic versioning
+[project.urls]
+Homepage = "https://mhpi.github.io/codes/frameworks/"
+Repository = "https://github.com/mhpi/dHBV_2_0"
[project.optional-dependencies]
-test = [
- "pytest",
- "pytest-cov",
+hydrodl2 = [
+ "hydroDL2 @ git+https://github.com/mhpi/hydroDL2.git@master"
+]
+dev = [
+ "mypy>=0.940",
+ "pytest>=7.4.2",
+ "ruff>=0.9.10",
]
[tool.hatch]
version.source = "vcs"
-build.hooks.vcs.version-file = "src/hydroDL2/_version.py"
+build.hooks.vcs.version-file = "src/dHBV_2_0/_version.py"
+
+[tool.hatch.metadata]
+allow-direct-references = true
+
+[tool.mypy]
+python_version = "3.12"
+strict = true
+disallow_untyped_defs = false
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+xfail_strict = true
+addopts = [
+ "--import-mode=importlib", # allow using test files with same name
+]
[tool.ruff]
src = ["src"]
-line-length = 120
+line-length = 88
lint.select = [
"F", # Errors detected by Pyflakes
"E", # Error detected by Pycodestyle
@@ -49,6 +91,12 @@ lint.select = [
"UP", # pyupgrade
"RUF100", # Report unused noqa directives
]
+exclude = [
+ ".git",
+ ".vscode",
+ "archive",
+ "dev",
+]
lint.ignore = [
# line too long -> we accept long comment lines; black gets rid of long code lines
"E501",
@@ -71,10 +119,21 @@ lint.ignore = [
# First line should be in imperative mood; try rephrasing
"D401",
## Disable one in each pair of mutually incompatible rules
- # We don’t want a blank line before a class docstring
+ # We don't want a blank line before a class docstring
"D203",
# We want docstrings to start immediately after the opening triple quote
"D213",
+ # Keep standard multi-package import format from isort
+ "I001",
+ # Allow blank lines in docstrings, between code
+ "W293",
+ # Ignore escape sequences used in strings for latex formating
+ "W605",
+ # Ignore extra line requirements at end of docstring and docstring summary
+ "D204",
+ "D205",
+ # Missing docstring in public method
+ "D102",
]
[tool.ruff.lint.pydocstyle]
diff --git a/setup.py b/setup.py
index 216b8fb..b3ed071 100644
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,7 @@
#!/usr/bin/env python
-from setuptools import setup, find_packages
+from setuptools import find_packages, setup
setup(
name='dHBV_2_0',
diff --git a/src/dHBV_2_0/__init__.py b/src/dHBV_2_0/__init__.py
index ed3c4a6..6f86f29 100644
--- a/src/dHBV_2_0/__init__.py
+++ b/src/dHBV_2_0/__init__.py
@@ -1,6 +1,19 @@
-# This is not necessary and may cause confusion.
-# If class name (bmi_LSTM) were to be exactly the same
-# as module name (bmi_lstm), this would cause trouble
-# because the module itself would no longer be accessible. (NOAA)
+from __future__ import annotations
-# from .bmi_dm import BmiDm
\ No newline at end of file
+import sys
+import typing
+from dataclasses import dataclass
+
+import numpy.typing as npt
+
+if sys.version_info < (3, 10):
+ import typing_extensions as typing
+else:
+ import typing
+
+# `slots` feature added to of `dataclass` in 3.10
+# see: https://docs.python.org/3.12/library/dataclasses.html#dataclasses.dataclass
+if sys.version_info < (3, 10):
+ dataclass_kwargs = {}
+else:
+ dataclass_kwargs = {"slots": True}
diff --git a/src/dHBV_2_0/__main__.py b/src/dHBV_2_0/__main__.py
index 44dacc3..c652fb3 100644
--- a/src/dHBV_2_0/__main__.py
+++ b/src/dHBV_2_0/__main__.py
@@ -1,5 +1,4 @@
from dHBV_2_0.run_bmi_aorc import execute
-# TODO: Add tests here later on -- NOAA
if __name__ == '__main__':
execute()
diff --git a/src/dHBV_2_0/_version.py b/src/dHBV_2_0/_version.py
new file mode 100644
index 0000000..cd7f15f
--- /dev/null
+++ b/src/dHBV_2_0/_version.py
@@ -0,0 +1,21 @@
+# file generated by setuptools-scm
+# don't change, don't track in version control
+
+__all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
+
+TYPE_CHECKING = False
+if TYPE_CHECKING:
+ from typing import Tuple
+ from typing import Union
+
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
+else:
+ VERSION_TUPLE = object
+
+version: str
+__version__: str
+__version_tuple__: VERSION_TUPLE
+version_tuple: VERSION_TUPLE
+
+__version__ = version = '0.1.dev25+g5f43f62.d20250406'
+__version_tuple__ = version_tuple = (0, 1, 'dev25', 'g5f43f62.d20250406')
diff --git a/src/dHBV_2_0/bmi.py b/src/dHBV_2_0/bmi.py
new file mode 100644
index 0000000..9519e17
--- /dev/null
+++ b/src/dHBV_2_0/bmi.py
@@ -0,0 +1,1064 @@
+"""BMI wrapper for interfacing dHBV 2.0 with NOAA-OWP NextGen framework.
+
+Author: Leo Lonzarich
+
+Motivated by LSTM BMI implementation of Austin Raney, Jonathan Frame.
+"""
+import logging
+import os
+import time
+import json
+from pathlib import Path
+from typing import Optional, Union
+
+import numpy as np
+import torch
+import yaml
+from bmipy import Bmi
+from dMG import ModelHandler, import_data_sampler, utils
+from numpy.typing import NDArray
+from sklearn.exceptions import DataDimensionalityWarning
+
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger(__name__)
+
+script_dir = os.path.dirname(os.path.abspath(__file__))
+
+
+
+# -------------------------------------------- #
+# Dynamic input variables (CSDMS standard names)
+# -------------------------------------------- #
+_dynamic_input_vars = [
+ ('atmosphere_water__liquid_equivalent_precipitation_rate', 'mm d-1'),
+ ('land_surface_air__temperature', 'degC'),
+ ('land_surface_water__potential_evaporation_volume_flux', 'mm d-1'),
+]
+
+# ------------------------------------------- #
+# Static input variables (CSDMS standard names)
+# ------------------------------------------- #
+_static_input_vars = [
+ ('ratio__mean_potential_evapotranspiration__mean_precipitation', '-'),
+ ('atmosphere_water__daily_mean_of_liquid_equivalent_precipitation_rate', 'mm d-1'),
+ ('land_surface_water__Hargreaves_potential_evaporation_volume_flux', 'mm d-1'),
+ ('land_vegetation__normalized_diff_vegetation_index', '-'),
+ ('free_land_surface_water', 'mm d-1'),
+ ('basin__mean_of_slope', 'm km-1'),
+ ('soil_sand__grid', 'km2'),
+ ('soil_clay__grid', 'km2'),
+ ('soil_silt__grid', 'km2'),
+ ('land_surface_water__glacier_fraction', 'percent'),
+ ('soil_clay__attr', 'percent'),
+ ('soil_gravel__attr', 'percent'),
+ ('soil_sand__attr', 'percent'),
+ ('soil_silt__attr', 'percent'),
+ ('basin__mean_of_elevation', 'm'),
+ ('atmosphere_water__daily_mean_of_temperature', 'degC'),
+ ('land_surface_water__permafrost_fraction', '-'),
+ ('bedrock__permeability', 'm2'),
+ ('p_seasonality', '-'),
+ ('land_surface_water__potential_evaporation_volume_flux_seasonality', '-'),
+ ('land_surface_water__snow_fraction', 'percent'),
+ ('atmosphere_water__precipitation_falling_as_snow_fraction', 'percent'),
+ ('soil_clay__volume_fraction', 'percent'),
+ ('soil_gravel__volume_fraction', 'percent'),
+ ('soil_sand__volume_fraction', 'percent'),
+ ('soil_silt__volume_fraction', 'percent'),
+ ('soil_active-layer__porosity', '-'),
+ ('basin__area', 'km2'),
+]
+
+# ------------------------------------- #
+# Output variables (CSDMS standard names)
+# ------------------------------------- #
+_output_vars = [
+ ('land_surface_water__runoff_volume_flux', 'm3 s-1'),
+]
+
+# ---------------------------------------------- #
+# Internal variable names <-> CSDMS standard names
+# ---------------------------------------------- #
+_var_name_internal_map = {
+ # ----------- Dynamic inputs -----------
+ 'P': 'atmosphere_water__liquid_equivalent_precipitation_rate',
+ 'Temp': 'land_surface_air__temperature',
+ 'PET': 'land_surface_water__potential_evaporation_volume_flux',
+ # ----------- Static inputs -----------
+ 'aridity': 'ratio__mean_potential_evapotranspiration__mean_precipitation',
+ 'meanP': 'atmosphere_water__daily_mean_of_liquid_equivalent_precipitation_rate',
+ 'ETPOT_Hargr': 'land_surface_water__Hargreaves_potential_evaporation_volume_flux',
+ 'NDVI': 'land_vegetation__normalized_diff_vegetation_index',
+ 'FW': 'free_land_surface_water',
+ 'meanslope': 'basin__mean_of_slope',
+ 'SoilGrids1km_sand': 'soil_sand__grid',
+ 'SoilGrids1km_clay': 'soil_clay__grid',
+ 'SoilGrids1km_silt': 'soil_silt__grid',
+ 'glaciers': 'land_surface_water__glacier_fraction',
+ 'HWSD_clay': 'soil_clay__attr',
+ 'HWSD_gravel': 'soil_gravel__attr',
+ 'HWSD_sand': 'soil_sand__attr',
+ 'HWSD_silt': 'soil_silt__attr',
+ 'meanelevation': 'basin__mean_of_elevation',
+ 'meanTa': 'atmosphere_water__daily_mean_of_temperature',
+ 'permafrost': 'land_surface_water__permafrost_fraction',
+ 'permeability': 'bedrock__permeability',
+ 'seasonality_P': 'p_seasonality',
+ 'seasonality_PET': 'land_surface_water__potential_evaporation_volume_flux_seasonality',
+ 'snow_fraction': 'land_surface_water__snow_fraction',
+ 'snowfall_fraction': 'atmosphere_water__precipitation_falling_as_snow_fraction',
+ 'T_clay': 'soil_clay__volume_fraction',
+ 'T_gravel': 'soil_gravel__volume_fraction',
+ 'T_sand': 'soil_sand__volume_fraction',
+ 'T_silt': 'soil_silt__volume_fraction',
+ 'Porosity': 'soil_active-layer__porosity',
+ 'uparea': 'basin__area',
+ # ----------- Outputs -----------
+ 'flow_sim': 'land_surface_water__runoff_volume_flux',
+}
+
+_var_name_external_map = {v: k for k, v in _var_name_internal_map.items()}
+
+
+def map_to_external(name: str):
+ """Return the external name (exposed via BMI) for a given internal name."""
+ return _var_name_internal_map[name]
+
+
+def map_to_internal(name: str):
+ """Return the internal name for a given external name (exposed via BMI)."""
+ return _var_name_external_map[name]
+
+
+def bmi_array(arr: list[float]) -> NDArray:
+ """Trivial wrapper function to ensure the expected numpy array datatype is used."""
+ return np.array(arr, dtype="float64")
+
+
+
+#==============================================================================#
+#==============================================================================#
+#==============================================================================#
+
+
+# MAIN BMI >>>>
+
+
+#==============================================================================#
+#==============================================================================#
+#==============================================================================#
+
+
+
+class DeltaModelBmi(Bmi):
+ """
+ dHBV 2.0UH BMI: NextGen-compatible, differentiable, physics-informed ML
+ model for hydrologic forecasting. (Song et al., 2024)
+
+ Note: This dHBV 2.0UH BMI can only run forward inference. To train,
+ see dMG package (https://github.com/mhpi/generic_deltaModel).
+ """
+ _att_map = {
+ 'model_name': 'dHVB 2.0UH for NextGen',
+ 'version': '2.0',
+ 'author_name': 'Leo Lonzarich',
+ 'time_step_size': 86400,
+ 'time_units': 'seconds',
+ # 'time_step_type': '',
+ # 'grid_type': 'scalar',
+ # 'step_method': '',
+ }
+
+ def __init__(
+ self,
+ config_path: Optional[str] = None,
+ verbose=False,
+ ) -> None:
+ """Create a BMI dHBV 2.0UH model ready for initialization.
+
+ Parameters
+ ----------
+ config_path
+ Path to the BMI configuration file.
+ verbose
+ Enables debug print statements if True.
+ """
+ super().__init__()
+ self._model = None
+ self._initialized = False
+ self.verbose = verbose
+
+ self._var_loc = 'node'
+ self._var_grid_id = 0
+
+ self._start_time = 0.0
+ self._end_time = np.finfo('d').max
+ self._time_units = 's'
+ self._timestep = 0
+
+ self.config_bmi = None
+ self.config_model = None
+
+ # Timing BMI computations
+ t_start = time.time()
+ self.bmi_process_time = 0
+
+ # Read BMI and model configuration files.
+ if config_path is not None:
+ if not Path(config_path).is_file():
+ raise FileNotFoundError(f"Configuration file not found: {config_path}")
+ with open(config_path) as f:
+ self.config_bmi = yaml.safe_load(f)
+ self.stepwise = self.config_bmi.get('stepwise', True)
+
+ try:
+ model_config_path = os.path.join(
+ script_dir, '..', '..', self.config_bmi.get('config_model')
+ )
+ with open(model_config_path) as f:
+ self.config_model = yaml.safe_load(f)
+ except Exception as e:
+ raise RuntimeError(f"Failed to load model configuration: {e}") from e
+
+ # Initialize variables.
+ self._dynamic_var = self._set_vars(_dynamic_input_vars, bmi_array([]))
+ self._static_var = self._set_vars(_static_input_vars, bmi_array([]))
+ self._output_vars = self._set_vars(_output_vars, bmi_array([]))
+
+ # Track total BMI runtime.
+ self.bmi_process_time += time.time() - t_start
+ if self.verbose:
+ log.info(f"BMI init took {time.time() - t_start} s")
+
+ @staticmethod
+ def _set_vars(
+ vars: list[tuple[str, str]],
+ var_value: NDArray,
+ ) -> dict[str, dict[str, Union[NDArray, str]]]:
+ """Set the values of the given variables."""
+ var_dict = {}
+ for item in vars:
+ var_dict[item[0]] = {'value': var_value.copy(), 'units': item[1]}
+ return var_dict
+
+ def initialize(self, config_path: Optional[str] = None) -> None:
+ """(Control function) Initialize the BMI model.
+
+ This BMI operates in two modes:
+ (Necessesitated by the fact that dhBV 2.0's internal NN must forward
+ on all data at once. <-- Forwarding on each timestep one-by-one with
+ saving/loading hidden states would slash LSTM performance. However,
+ feeding in hidden states day-by-day leeds to great efficiency losses
+ vs simply feeding all data at once due to carrying gradients at each
+ step.)
+
+ 1) Feed all input dataBMI before
+ 'bmi.initialize()'. Then internal model is forwarded on all data
+ and generates predictions during '.initialize()'.
+
+ 2) Run '.initialize()', then pass data day by day as normal during
+ 'bmi.update()'. If forwarding period is sufficiently small (say,
+ <100 days), then forwarding LSTM on individual days with saved
+ states is reasonable.
+
+ To this end, a configuration file can be specified either during
+ `bmi.__init__()`, or during `.initialize()`. If running BMI as type (1),
+ config must be passed in the former, otherwise passed in the latter for (2).
+
+ Parameters
+ ----------
+ config_path
+ Path to the BMI configuration file.
+ """
+ t_start = time.time()
+
+ # Read BMI configuration file if provided.
+ if config_path is not None:
+ if not Path(config_path).is_file():
+ raise FileNotFoundError(f"Configuration file not found: {config_path}")
+ with open(config_path) as f:
+ self.config_bmi = yaml.safe_load(f)
+ self.stepwise = self.config_bmi.get('stepwise', True)
+
+ if self.config_bmi is None:
+ raise ValueError("No configuration file given. A config path" \
+ "must be passed at time of bmi init() or" \
+ "initialize() call.")
+
+ # Load model configuration.
+ if self.config_model is None:
+ try:
+ model_config_path = os.path.join(
+ script_dir, '..', '..', self.config_bmi.get('config_model')
+ )
+ with open(model_config_path) as f:
+ self.config_model = yaml.safe_load(f)
+ except Exception as e:
+ raise RuntimeError(f"Failed to load model configuration: {e}") from e
+
+ self.config_model = utils.initialize_config(self.config_model)
+ self.config_model['model_path'] = os.path.join(
+ script_dir, '..', '..', self.config_model.get('trained_model')
+ )
+ self.device = self.config_model['device']
+ self.internal_dtype = self.config_model['dtype']
+ self.external_dtype = eval(self.config_bmi['dtype'])
+ self.sampler = import_data_sampler(self.config_model['data_sampler'])(self.config_model)
+
+ # Load static variables from BMI conf
+ for name in self._static_var.keys():
+ ext_name = map_to_internal(name)
+ if ext_name in self.config_bmi.keys():
+ self._static_var[name]['value'] = bmi_array(self.config_bmi[ext_name])
+ else:
+ log.warning(f"Static variable '{name}' not in BMI config. Skipping.")
+
+ # # Set simulation parameters.
+ self.current_time = self.config_bmi.get('start_time', 0.0)
+ # self._time_step_size = self.config_bmi.get('time_step_size', 86400) # Default to 1 day in seconds.
+ # self._end_time = self.config_bmi.get('end_time', np.finfo('d').max)\
+
+ # Load a trained model.
+ try:
+ self._model = self._load_trained_model(self.config_model).to(self.device)
+ self._initialized = True
+ except Exception as e:
+ raise RuntimeError(f"Failed to load trained model: {e}") from e
+
+ # Forward simulation on all data in one go.
+ if not self.stepwise:
+ predictions = self._do_forward()
+ self._format_outputs(predictions) # Process and store predictions.
+
+ # Track total BMI runtime.
+ self.bmi_process_time += time.time() - t_start
+ if self.verbose:
+ log.info(f"BMI Initialize took {time.time() - t_start} s | Total runtime: {self.bmi_process_time} s")
+
+ def update(self) -> None:
+ """(Control function) Advance model state by one time step."""
+ t_start = time.time()
+ # self.current_time += self._time_step_size
+
+ # Forward model on individual timesteps if not initialized with forward_init.
+ if self.stepwise:
+ predictions = self._do_forward()
+ self._format_outputs(predictions)
+
+ # Increment model time.
+ self._timestep += 1
+
+ # Track total BMI runtime.
+ self.bmi_process_time += time.time() - t_start
+ if self.verbose:
+ log.info(f"BMI Update took {time.time() - t_start} s | Total runtime: {self.bmi_process_time} s")
+
+ def update_until(self, end_time: float) -> None:
+ """(Control function) Update model until a particular time.
+
+ Note: Models should be trained standalone with dPLHydro_PMI first before
+ forward predictions with this BMI.
+
+ Parameters
+ ----------
+ end_time : float
+ Time to run model until.
+ """
+ t_start = time.time()
+
+ n_steps = (end_time - self.get_current_time()) / self.get_time_step()
+
+ for _ in range(int(n_steps)):
+ self.update()
+ self.update_frac(n_steps - int(n_steps))
+
+ # Keep running total of BMI runtime.
+ self.bmi_process_time += time.time() - t_start
+ if self.verbose:
+ log.info(f"BMI Update Until took {time.time() - t_start} s | Total runtime: {self.bmi_process_time} s")
+
+ def finalize(self) -> None:
+ """(Control function) Finalize model."""
+ if self._model is not None:
+ del self._model
+ torch.cuda.empty_cache()
+ self._initialized = False
+ if self.verbose:
+ log.info("BMI model finalized.")
+
+
+
+#==============================================================================#
+#==============================================================================#
+
+ # Helper functions for BMI
+
+#==============================================================================#
+#==============================================================================#
+
+
+
+ def _do_forward(self):
+ """Forward model and save outputs to return on update call."""
+ data_dict = self._format_inputs()
+
+ n_samples = data_dict['xc_nn_norm'].shape[1]
+ batch_start = np.arange(0, n_samples, self.config_model['predict']['batch_size'])
+ batch_end = np.append(batch_start[1:], n_samples)
+
+ batch_predictions = []
+ # Forward through basins in batches.
+ with torch.no_grad():
+ for i in range(len(batch_start)):
+ dataset_sample = self.sampler.get_validation_sample(
+ data_dict,
+ batch_start[i],
+ batch_end[i],
+ )
+
+ # Forward dPLHydro model
+ self.prediction = self._model.forward(dataset_sample, eval=True)
+
+ # For single hydrology model.
+ model_name = self.config_model['dpl_model']['phy_model']['model'][0]
+ prediction = {
+ key: tensor.cpu().detach() for key, tensor in self.prediction[model_name].items()
+ }
+ batch_predictions.append(prediction)
+
+ return self._batch_data(batch_predictions)
+
+ # preds = torch.cat([d['flow_sim'] for d in batched_preds_list], dim=1)
+ # preds = preds.numpy()
+
+ # # Scale and check output
+ # self.scale_output()
+
+ @staticmethod
+ def _load_trained_model(config: dict):
+ """Load a pre-trained model based on the configuration."""
+ model_path = config.get('model_path')
+ if not model_path:
+ raise ValueError("No model path specified in configuration.")
+ if not Path(model_path).exists():
+ raise FileNotFoundError(f"Model file not found: {model_path}")
+ return ModelHandler(config, verbose=True)
+
+ def update_frac(self, time_frac: float) -> None:
+ """
+ Update model by a fraction of a time step.
+
+ Parameters
+ ----------
+ time_frac : float
+ Fraction fo a time step.
+ """
+ if self.verbose:
+ print("Warning: This model is trained to make predictions on one day timesteps.")
+ time_step = self.get_time_step()
+ self._time_step_size = self._time_step_size * time_frac
+ self.update()
+ self._time_step_size = time_step
+
+ def _format_outputs(self, outputs):
+ """Format model outputs as BMI outputs."""
+ if not isinstance(outputs['flow_sim'], np.ndarray):
+ outputs = outputs['flow_sim'].detach().cpu().numpy()
+ else:
+ outputs = outputs['flow_sim']
+ self._output_vars[_output_vars[0][0]] = outputs
+
+ def _format_inputs(self):
+ """Format dynamic and static inputs for the model."""
+ #=====================================================================#
+ x_list = []
+ c_list = []
+
+ for name, data in self._dynamic_var.items():
+ if data['value'].size == 0:
+ raise ValueError(f"Dynamic variable '{name}' has no value.")
+ if data['value'].ndim == 1:
+ data['value'] = np.expand_dims(data['value'], axis=(1, 2)) # Shape: (n, 1, 1)
+ elif data['value'].ndim == 2:
+ data['value'] = np.expand_dims(data['value'], axis=2) # Shape: (n, m, 1)
+ elif data['value'].ndim != 3:
+ raise ValueError(f"Dynamic variable '{name}' has unsupported " \
+ f"dimensions ({data['value'].ndim}).")
+ x_list.append(data['value'])
+
+ for name, data in self._static_var.items():
+ if data['value'].size == 0:
+ raise ValueError(f"Static variable '{name}' has no value.")
+ if data['value'].ndim != 2:
+ data['value'] = np.expand_dims(data['value'], axis=(0,1))
+ c_list.append(data['value'])
+
+ x = np.concatenate(x_list, axis=2)
+ x = self._fill_nan(x)
+ c = np.concatenate(c_list, axis=1)
+
+ xc_nn_norm, c_nn_norm = self.normalize(x.copy(), c)
+
+
+ # Get upstream area and elevation
+ try:
+ ac_name = self.config_model['observations']['upstream_area_name']
+ ac_array = self._static_var[map_to_external(ac_name)]['value']
+ except ValueError as e:
+ raise ValueError("Upstream area is not provided. This is needed for high-resolution streamflow model.") from e
+ try:
+ elevation_name = self.config_model['observations']['elevation_name']
+ elev_array = self._static_var[map_to_external(elevation_name)]['value']
+ except ValueError as e:
+ raise ValueError("Elevation is not provided. This is needed for high-resolution streamflow model.") from e
+
+
+ # Convert to torch tensors.
+ # dataset = {
+ # 'ac_all': torch.tensor(ac_array, dtype=torch.float32, device=self.device).squeeze(-1),
+ # 'elev_all': torch.tensor(elev_array, dtype=torch.float32, device=self.device).squeeze(-1),
+ # 'c_nn': torch.tensor(c, dtype=torch.float32, device=self.device),
+ # 'xc_nn_norm': torch.tensor(xc_nn_norm, dtype=torch.float32, device=self.device),
+ # 'c_nn_norm': torch.tensor(c_nn_norm, dtype=torch.float32, device=self.device),
+ # 'x_phy': torch.tensor(x, dtype=torch.float32, device=self.device),
+ # }
+ dataset = {
+ 'ac_all': ac_array.squeeze(-1),
+ 'elev_all': elev_array.squeeze(-1),
+ 'c_nn': c,
+ 'xc_nn_norm': xc_nn_norm,
+ 'c_nn_norm': c_nn_norm,
+ 'x_phy': x,
+ }
+ return dataset
+ #=====================================================================#
+
+ def normalize(
+ self,
+ x_nn: NDArray[np.float32],
+ c_nn: NDArray[np.float32]
+ ) -> NDArray[np.float32]:
+ """Normalize data for neural network."""
+
+ self.load_norm_stats()
+ x_nn_norm = self._to_norm(x_nn, _dynamic_input_vars)
+ c_nn_norm = self._to_norm(c_nn, _static_input_vars)
+
+ # Remove nans
+ x_nn_norm[x_nn_norm != x_nn_norm] = 0
+ c_nn_norm[c_nn_norm != c_nn_norm] = 0
+
+ c_nn_norm_repeat = np.repeat(
+ np.expand_dims(c_nn_norm, 0),
+ x_nn_norm.shape[0],
+ axis=0,
+ )
+
+ xc_nn_norm = np.concatenate((x_nn_norm, c_nn_norm_repeat), axis=2)
+ del x_nn_norm, x_nn
+
+ return xc_nn_norm, c_nn_norm
+
+ def _to_norm(
+ self,
+ data: NDArray[np.float32],
+ vars: list[str],
+ ) -> NDArray[np.float32]:
+ """Standard data normalization."""
+ log_norm_vars = self.config_model['dpl_model']['phy_model']['use_log_norm']
+
+ data_norm = np.zeros(data.shape)
+
+ for k, var in enumerate(vars):
+ stat = self.norm_stats[map_to_internal(var[0])]
+
+ if len(data.shape) == 3:
+ if map_to_internal(var[0]) in log_norm_vars:
+ data[:, :, k] = np.log10(np.sqrt(data[:, :, k]) + 0.1)
+ data_norm[:, :, k] = (data[:, :, k] - stat[2]) / stat[3]
+ elif len(data.shape) == 2:
+ if var[0] in log_norm_vars:
+ data[:, k] = np.log10(np.sqrt(data[:, k]) + 0.1)
+ data_norm[:, k] = (data[:, k] - stat[2]) / stat[3]
+ else:
+ raise DataDimensionalityWarning("Data dimension must be 2 or 3.")
+ return data_norm
+
+ def load_norm_stats(self) -> None:
+ """Load normalization statistics."""
+ path = os.path.join(
+ self.config_model['model_path'],
+ '..',
+ 'normalization_statistics.json',
+ )
+ try:
+ with open(path) as f:
+ self.norm_stats = json.load(f)
+ except ValueError as e:
+ raise ValueError("Normalization statistics not found.") from e
+
+ def _process_predictions(self, predictions):
+ """Process model predictions and store them in output variables."""
+ for var_name, prediction in predictions.items():
+ if var_name in self._output_vars:
+ self._output_vars[var_name]['value'] = prediction.cpu().numpy()
+ else:
+ log.warning(f"Output variable '{var_name}' not recognized. Skipping.")
+
+ def _batch_data(
+ self,
+ batch_list: list[dict[str, torch.Tensor]],
+ target_key: str = None,
+ ) -> None:
+ """Merge list of batch data dictionaries into a single dictionary."""
+ data = {}
+ try:
+ if target_key:
+ return torch.cat([x[target_key] for x in batch_list], dim=1).numpy()
+
+ for key in batch_list[0].keys():
+ if len(batch_list[0][key].shape) == 3:
+ dim = 1
+ else:
+ dim = 0
+ data[key] = torch.cat([d[key] for d in batch_list], dim=dim).cpu().numpy()
+ return data
+
+ except ValueError as e:
+ raise ValueError(f"Error concatenating batch data: {e}") from e
+
+ @staticmethod
+ def _fill_nan(array_3d):
+ # Define the x-axis for interpolation
+ x = np.arange(array_3d.shape[1])
+
+ # Iterate over the first and third dimensions to interpolate the second dimension
+ for i in range(array_3d.shape[0]):
+ for j in range(array_3d.shape[2]):
+ # Select the 1D slice for interpolation
+ slice_1d = array_3d[i, :, j]
+
+ # Find indices of NaNs and non-NaNs
+ nans = np.isnan(slice_1d)
+ non_nans = ~nans
+
+ # Only interpolate if there are NaNs and at least two non-NaN values for reference
+ if np.any(nans) and np.sum(non_nans) > 1:
+ # Perform linear interpolation using numpy.interp
+ array_3d[i, :, j] = np.interp(x, x[non_nans], slice_1d[non_nans], left=None, right=None)
+ return array_3d
+
+ def array_to_tensor(self) -> None:
+ """Converts input values into Torch tensor object to be read by model."""
+ raise NotImplementedError("array_to_tensor")
+
+ def tensor_to_array(self) -> None:
+ """
+ Converts model output Torch tensor into date + gradient arrays to be
+ passed out of BMI for backpropagation, loss, optimizer tuning.
+ """
+ raise NotImplementedError("tensor_to_array")
+
+ def get_tensor_slice(self):
+ """Get tensor of input data for a single timestep."""
+ # sample_dict = take_sample_test(self.bmi_config, self.dataset_dict)
+ # self.input_tensor = torch.Tensor()
+
+ raise NotImplementedError("get_tensor_slice")
+
+ def get_var_type(self, var_name):
+ """
+ Data type of variable.
+
+ Parameters
+ ----------
+ ----------g
+ var_name : str
+ Name of variable as CSDMS Standard Name.
+
+ Returns
+ -------
+ str
+ Data type.
+ """
+ return str(self.get_value_ptr(var_name).dtype)
+
+ def get_var_units(self, var_standard_name):
+ """Get units of variable.
+
+ Parameters
+ ----------
+ var_standard_name : str
+ Name of variable as CSDMS Standard Name.
+
+ Returns
+ -------
+ str
+ Variable units.
+ """
+ return self._var_units_map[var_standard_name]
+
+ def get_var_nbytes(self, var_name):
+ """Get units of variable.
+
+ Parameters
+ ----------
+ var_name : str
+ Name of variable as CSDMS Standard Name.
+
+ Returns
+ -------
+ int
+ Size of data array in bytes.
+ """
+ return self.get_value_ptr(var_name).nbytes
+
+ def get_var_itemsize(self, name):
+ return np.dtype(self.get_var_type(name)).itemsize
+
+ def get_var_location(self, name):
+ return self._var_loc[name]
+
+ def get_var_grid(self, var_name):
+ """Grid id for a variable.
+
+ Parameters
+ ----------
+ var_name : str
+ Name of variable as CSDMS Standard Name.
+
+ Returns
+ -------
+ int
+ Grid id.
+ """
+ # for grid_id, var_name_list in self._grids.items():
+ # if var_name in var_name_list:
+ # return grid_id
+ raise NotImplementedError("get_var_grid")
+
+ def get_grid_rank(self, grid_id: int):
+ """Rank of grid.
+
+ Parameters
+ ----------
+ grid_id
+ Identifier of a grid.
+
+ Returns
+ -------
+ int
+ Rank of grid.
+ """
+ if grid_id == 0:
+ return 1
+ raise ValueError(f"Unsupported grid rank: {grid_id!s}. only support 0")
+
+ def get_grid_size(self, grid_id):
+ """Size of grid.
+
+ Parameters
+ ----------
+ grid_id : int
+ Identifier of a grid.
+
+ Returns
+ -------
+ int
+ Size of grid.
+ """
+ # return int(np.prod(self._model.shape))
+ raise NotImplementedError("get_grid_size")
+
+ def get_value_ptr(self, var_standard_name: str) -> np.ndarray:
+ """Reference to values."""
+ return self._output_vars[var_standard_name]
+
+ def get_value(self, var_name: str, dest: NDArray):
+ """Return copy of variable values."""
+ # TODO: will need to properly account for multiple basins.
+ try:
+ dest[:] = self.get_value_ptr(var_name)[self._timestep-1,].flatten()
+ except RuntimeError as e:
+ raise e
+ return dest
+
+ def get_value_at_indices(self, var_name, dest, indices):
+ """Get values at particular indices.
+
+ Parameters
+ ----------
+ var_name : str
+ Name of variable as CSDMS Standard Name.
+ dest : ndarray
+ A numpy array into which to place the values.
+ indices : array_like
+ Array of indices.
+
+ Returns
+ -------
+ array_like
+ Values at indices.
+ """
+ dest[:] = self.get_value_ptr(var_name).take(indices)
+ return dest
+
+ def set_value(self, var_name, values: np.ndarray):
+ """Set variable value."""
+ for dict in [self._dynamic_var, self._static_var]:
+ if var_name in dict.keys():
+ if self.stepwise:
+ dict[var_name]['value'] = values
+ else:
+ dict[var_name]['value'] = np.append(
+ dict[var_name]['value'], values
+ )
+ break
+
+ def set_value_at_indices(self, name, inds, src):
+ """Set model values at particular indices.
+
+ Parameters
+ ----------
+ var_name : str
+ Name of variable as CSDMS Standard Name.
+ src : array_like
+ Array of new values.
+ indices : array_like
+ Array of indices.
+ """
+ val = self.get_value_ptr(name)
+ val.flat[inds] = src
+
+ def get_component_name(self):
+ """Name of the component."""
+ return self._name
+
+ def get_input_item_count(self):
+ """Get names of input variables."""
+ return len(self._input_var_names)
+
+ def get_output_item_count(self):
+ """Get names of output variables."""
+ return len(self._output_var_names)
+
+ def get_input_var_names(self):
+ """Get names of input variables."""
+ return self._input_var_names
+
+ def get_output_var_names(self):
+ """Get names of output variables."""
+ return self._output_var_names
+
+ def get_grid_shape(self, grid_id, shape):
+ """Number of rows and columns of uniform rectilinear grid."""
+ # var_name = self._grids[grid_id][0]
+ # shape[:] = self.get_value_ptr(var_name).shape
+ # return shape
+ raise NotImplementedError("get_grid_shape")
+
+ def get_grid_spacing(self, grid_id, spacing):
+ """Spacing of rows and columns of uniform rectilinear grid."""
+ # spacing[:] = self._model.spacing
+ # return spacing
+ raise NotImplementedError("get_grid_spacing")
+
+ def get_grid_origin(self, grid_id, origin):
+ """Origin of uniform rectilinear grid."""
+ # origin[:] = self._model.origin
+ # return origin
+ raise NotImplementedError("get_grid_origin")
+
+ def get_grid_type(self, grid_id):
+ """Type of grid."""
+ # return self._grid_type[grid_id]
+ raise NotImplementedError("get_grid_type")
+
+ def get_start_time(self):
+ """Start time of model."""
+ return self._start_time
+
+ def get_end_time(self):
+ """End time of model."""
+ return self._end_time
+
+ def get_current_time(self):
+ return self._current_time
+
+ def get_time_step(self):
+ return self._time_step_size
+
+ def get_time_units(self):
+ return self._time_units
+
+ def get_grid_edge_count(self, grid):
+ raise NotImplementedError("get_grid_edge_count")
+
+ def get_grid_edge_nodes(self, grid, edge_nodes):
+ raise NotImplementedError("get_grid_edge_nodes")
+
+ def get_grid_face_count(self, grid):
+ raise NotImplementedError("get_grid_face_count")
+
+ def get_grid_face_nodes(self, grid, face_nodes):
+ raise NotImplementedError("get_grid_face_nodes")
+
+ def get_grid_node_count(self, grid):
+ raise NotImplementedError("get_grid_node_count")
+
+ def get_grid_nodes_per_face(self, grid, nodes_per_face):
+ raise NotImplementedError("get_grid_nodes_per_face")
+
+ def get_grid_face_edges(self, grid, face_edges):
+ raise NotImplementedError("get_grid_face_edges")
+
+ def get_grid_x(self, grid, x):
+ raise NotImplementedError("get_grid_x")
+
+ def get_grid_y(self, grid, y):
+ raise NotImplementedError("get_grid_y")
+
+ def get_grid_z(self, grid, z):
+ raise NotImplementedError("get_grid_z")
+
+ def initialize_config(self, config_path: str) -> dict:
+ """
+ Check that config_path is valid path and convert config into a
+ dictionary object.
+ """
+ config_path = Path(config_path).resolve()
+
+ if not config_path:
+ raise RuntimeError("No BMI configuration path provided.")
+ elif not config_path.is_file():
+ raise RuntimeError(f"BMI configuration not found at path {config_path}.")
+ else:
+ with config_path.open('r') as f:
+ self.config = yaml.safe_load(f)
+
+
+ # USE BELOW FOR HYDRA + OMEGACONF:
+ # try:
+ # config_dict: Union[Dict[str, Any], Any] = OmegaConf.to_container(
+ # cfg, resolve=True
+ # )
+ # config = Config(**config_dict)
+ # except ValidationError as e:
+ # log.exception(e)
+ # raise e
+ # return config, config_dict
+
+ # def init_var_dicts(self):
+ # """
+ # Create lookup tables for CSDMS variables and init variable arrays.
+ # """
+ # # Make lookup tables for variable name (Peckham et al.).
+ # self._var_name_map_long_first = {
+ # long_name:self._var_name_units_map[long_name][0] for \
+ # long_name in self._var_name_units_map.keys()
+ # }
+ # self._var_name_map_short_first = {
+ # self._var_name_units_map[long_name][0]:long_name for \
+ # long_name in self._var_name_units_map.keys()}
+ # self._var_units_map = {
+ # long_name:self._var_name_units_map[long_name][1] for \
+ # long_name in self._var_name_units_map.keys()
+ # }
+
+ # # Initialize inputs and outputs.
+ # for var in self.config['observations']['var_t_nn'] + self.config['observations']['var_c_nn']:
+ # standard_name = self._var_name_map_short_first[var]
+ # self._nn_values[standard_name] = []
+ # # setattr(self, var, 0)
+
+ # for var in self.config['observations']['var_t_hydro_model'] + self.config['observations']['var_c_hydro_model']:
+ # standard_name = self._var_name_map_short_first[var]
+ # self._pm_values[standard_name] = []
+ # # setattr(self, var, 0)
+
+ # def scale_output(self) -> None:
+ # """
+ # Scale and return more meaningful output from wrapped model.
+ # """
+ # models = self.config['hydro_models'][0]
+
+ # # TODO: still have to finish finding and undoing scaling applied before
+ # # model run. (See some checks used in bmi_lstm.py.)
+
+ # # Strip unnecessary time and variable dims. This gives 1D array of flow
+ # # at each basin.
+ # # TODO: setup properly for multiple models later.
+ # self.streamflow_cms = self.preds[models]['flow_sim'].squeeze()
+
+ # def _get_batch_sample(self, config: Dict, dataset_dictionary: Dict[str, torch.Tensor],
+ # i_s: int, i_e: int) -> Dict[str, torch.Tensor]:
+ # """
+ # Take sample of data for testing batch.
+ # """
+ # dataset_sample = {}
+ # for key, value in dataset_dictionary.items():
+ # if value.ndim == 3:
+ # # TODO: I don't think we actually need this.
+ # # Remove the warmup period for all except airtemp_memory and hydro inputs.
+ # if key in ['airT_mem_temp_model', 'x_phy', 'inputs_nn_scaled']:
+ # warm_up = 0
+ # else:
+ # warm_up = config['warm_up']
+ # dataset_sample[key] = value[warm_up:, i_s:i_e, :].to(config['device'])
+ # elif value.ndim == 2:
+ # dataset_sample[key] = value[i_s:i_e, :].to(config['device'])
+ # else:
+ # raise ValueError(f"Incorrect input dimensions. {key} array must have 2 or 3 dimensions.")
+ # return dataset_sample
+
+ # def _values_to_dict(self) -> None:
+ # """
+ # Take CSDMS Standard Name-mapped forcings + attributes and construct data
+ # dictionary for NN and physics model.
+ # """
+ # # n_basins = self.config['batch_basins']
+ # n_basins = 671
+ # rho = self.config['rho']
+
+ # # Initialize dict arrays.
+ # # NOTE: used to have rho+1 here but this is no longer necessary?
+ # x_nn = np.zeros((rho + 1, n_basins, len(self.config['observations']['var_t_nn'])))
+ # c_nn = np.zeros((rho + 1, n_basins, len(self.config['observations']['var_c_nn'])))
+ # x_phy = np.zeros((rho + 1, n_basins, len(self.config['observations']['var_t_hydro_model'])))
+ # c_hydro_model = np.zeros((n_basins, len(self.config['observations']['var_c_hydro_model'])))
+
+ # for i, var in enumerate(self.config['observations']['var_t_nn']):
+ # standard_name = self._var_name_map_short_first[var]
+ # # NOTE: Using _values is a bit hacky. Should use get_values I think.
+ # x_nn[:, :, i] = np.array([self._nn_values[standard_name]])
+
+ # for i, var in enumerate(self.config['observations']['var_c_nn']):
+ # standard_name = self._var_name_map_short_first[var]
+ # c_nn[:, :, i] = np.array([self._nn_values[standard_name]])
+
+ # for i, var in enumerate(self.config['observations']['var_t_hydro_model']):
+ # standard_name = self._var_name_map_short_first[var]
+ # x_phy[:, :, i] = np.array([self._pm_values[standard_name]])
+
+ # for i, var in enumerate(self.config['observations']['var_c_hydro_model']):
+ # standard_name = self._var_name_map_short_first[var]
+ # c_hydro_model[:, i] = np.array([self._pm_values[standard_name]])
+
+ # self.dataset_dict = {
+ # 'inputs_nn_scaled': np.concatenate((x_nn, c_nn), axis=2), #[np.newaxis,:,:],
+ # 'x_phy': x_phy, #[np.newaxis,:,:],
+ # 'c_hydro_model': c_hydro_model
+ # }
+ # print(self.dataset_dict['inputs_nn_scaled'].shape)
+
+ # # Convert to torch tensors:
+ # for key in self.dataset_dict.keys():
+ # if type(self.dataset_dict[key]) == np.ndarray:
+ # self.dataset_dict[key] = torch.from_numpy(self.dataset_dict[key]).float() #.to(self.config['device'])
+
+ # def get_csdms_name(self, var_name):
+ # """
+ # Get CSDMS Standard Name from variable name.
+ # """
+ # return self._var_name_map_long_first[var_name]
+
diff --git a/src/dHBV_2_0/bmi_dm.py b/src/dHBV_2_0/bmi_dm.py
deleted file mode 100644
index ca8785b..0000000
--- a/src/dHBV_2_0/bmi_dm.py
+++ /dev/null
@@ -1,883 +0,0 @@
-"""BMI wrapper for interfacing dHBV 2.0 with NOAA-OWP NextGen framework."""
-import sys
-
-
-import logging
-import os
-import time
-from pathlib import Path
-from typing import Any, Dict, Optional, Union
-
-import numpy as np
-import torch
-import yaml
-from bmipy import Bmi
-from conf import config
-from core.data import take_sample_test
-from models.model_handler import ModelHandler
-from omegaconf import DictConfig, OmegaConf
-from pydantic import ValidationError
-from ruamel.yaml import YAML
-
-log = logging.getLogger(__name__)
-
-
-class BmiDm(Bmi):
- def __init__(self, config_filepath: Optional[str] = None, verbose=False):
- """
- Create an instance of a differentiable, physics-informed ML model BMI
- for dHBV 2.0UH (Song et al., 2024).
-
- Parameters
- ----------
- config_filepath : str, optional
- Path to the BMI configuration file.
- verbose : bool, optional
- Enables debug print statements if True.
- """
- super().__init__()
- self._model = None
- self._initialized = False
- self.verbose = verbose
-
- self._values = {}
- self._nn_values = {}
- self._pm_values = {}
- self._start_time = 0.0
- self._end_time = np.finfo(float).max
- self._time_units = 'day' # NOTE: NextGen currently only supports seconds.
- self._time_step_size = 1.0
- self._var_array_lengths = 1
-
- # Timing BMI computations
- t_start = time.time()
- self.bmi_process_time = 0
-
- # Basic model attributes
- _att_map = {
- 'model_name': "Differentiable, Physics-informed ML BMI",
- 'version': '1.5',
- 'author_name': 'MHPI, Leo Lonzarich',
- }
-
- # Input forcing/attribute CSDMS Standard Names
- self._input_var_names = [
- ############## Forcings ##############
- 'atmosphere_water__liquid_equivalent_precipitation_rate',
- 'land_surface_air__temperature',
- 'land_surface_air__max_of_temperature', # custom name
- 'land_surface_air__min_of_temperature', # custom name
- 'day__length', # custom name
- 'land_surface_water__potential_evaporation_volume_flux', # check name,
- ############## Attributes ##############
- # ------------- CAMELS ------------- #
- 'atmosphere_water__daily_mean_of_liquid_equivalent_precipitation_rate',
- 'land_surface_water__daily_mean_of_potential_evaporation_flux',
- 'p_seasonality', # custom name
- 'atmosphere_water__precipitation_falling_as_snow_fraction',
- 'ratio__mean_potential_evapotranspiration__mean_precipitation',
- 'atmosphere_water__frequency_of_high_precipitation_events',
- 'atmosphere_water__mean_duration_of_high_precipitation_events',
- 'atmosphere_water__precipitation_frequency',
- 'atmosphere_water__low_precipitation_duration',
- 'basin__mean_of_elevation',
- 'basin__mean_of_slope',
- 'basin__area',
- 'land_vegetation__forest_area_fraction',
- 'land_vegetation__max_monthly_mean_of_leaf-area_index',
- 'land_vegetation__diff_max_min_monthly_mean_of_leaf-area_index',
- 'land_vegetation__max_monthly_mean_of_green_vegetation_fraction',
- 'land_vegetation__diff__max_min_monthly_mean_of_green_vegetation_fraction',
- 'region_state_land~covered__area_fraction', # custom name
- 'region_state_land~covered__area', # custom name
- 'root__depth', # custom name
- 'soil_bedrock_top__depth__pelletier',
- 'soil_bedrock_top__depth__statsgo',
- 'soil__porosity',
- 'soil__saturated_hydraulic_conductivity',
- 'maximum_water_content',
- 'soil_sand__volume_fraction',
- 'soil_silt__volume_fraction',
- 'soil_clay__volume_fraction',
- 'geol_1st_class', # custom name
- 'geol_1st_class__fraction', # custom name
- 'geol_2nd_class', # custom name
- 'geol_2nd_class__fraction', # custom name
- 'basin__carbonate_rocks_area_fraction',
- 'soil_active-layer__porosity', # check name
- 'bedrock__permeability'
- # -------------- CONUS -------------- #
- # 'land_surface_water__Hargreaves_potential_evaporation_volume_flux',
- # 'free_land_surface_water', # check name
- # 'soil_clay__attr', # custom name; need to confirm
- # 'soil_gravel__attr', # custom name; need to confirm
- # 'soil_sand__attr', # custo=m name; need to confirm
- # 'soil_silt__attr', # custom name; need to confirm
- # 'land_vegetation__normalized_diff_vegitation_index', # custom name
- # 'soil_clay__grid', # custom name
- # 'soil_sand__grid', # custom name
- # 'soil_silt__grid', # custom name
- # 'land_surface_water__glacier_fraction', # custom name
- # 'atmosphere_water__daily_mean_of_liquid_equivalent_precipitation_rate',
- # 'atmosphere_water__daily_mean_of_temperature', # custom name
- # 'land_surface_water__potential_evaporation_volume_flux_seasonality', # custom name
- # 'land_surface_water__snow_fraction',
- ]
-
- # Output variable names (CSDMS standard names)
- self._output_var_names = [
- 'land_surface_water__runoff_volume_flux',
- 'srflow',
- 'ssflow',
- 'gwflow',
- 'AET_hydro',
- 'PET_hydro',
- 'flow_sim_no_rout',
- 'srflow_no_rout',
- 'ssflow_no_rout',
- 'gwflow_no_rout',
- 'excs',
- 'evapfactor',
- 'tosoil',
- 'percolation',
- 'BFI_sim'
- ]
-
- # Map CSDMS Standard Names to the model's internal variable names (For CAMELS, CONUS).
- self._var_name_units_map = {
- ############## Forcings ##############
- # ------------- CAMELS ------------- #
- 'atmosphere_water__liquid_equivalent_precipitation_rate':['prcp(mm/day)', 'mm d-1'],
- 'land_surface_air__temperature':['tmean(C)','degC'],
- 'land_surface_air__max_of_temperature':['tmax(C)', 'degC'], # custom name
- 'land_surface_air__min_of_temperature':['tmin(C)', 'degC'], # custom name
- 'day__length':['dayl(s)', 's'], # custom name
- 'land_surface_water__potential_evaporation_volume_flux':['PET_hargreaves(mm/day)', 'mm d-1'], # check name
- # -------------- CONUS -------------- #
- # 'atmosphere_water__liquid_equivalent_precipitation_rate':['P', 'mm d-1'],
- # 'land_surface_air__temperature':['Temp','degC'],
- # 'land_surface_water__potential_evaporation_volume_flux':['PET', 'mm d-1'], # check name
- ############## Attributes ##############
- # -------------- CAMELS -------------- #
- 'atmosphere_water__daily_mean_of_liquid_equivalent_precipitation_rate':['p_mean','mm d-1'],
- 'land_surface_water__daily_mean_of_potential_evaporation_flux':['pet_mean','mm d-1'],
- 'p_seasonality':['p_seasonality', '-'], # custom name
- 'atmosphere_water__precipitation_falling_as_snow_fraction':['frac_snow','-'],
- 'ratio__mean_potential_evapotranspiration__mean_precipitation':['aridity','-'],
- 'atmosphere_water__frequency_of_high_precipitation_events':['high_prec_freq','d yr-1'],
- 'atmosphere_water__mean_duration_of_high_precipitation_events':['high_prec_dur','d'],
- 'atmosphere_water__precipitation_frequency':['low_prec_freq','d yr-1'],
- 'atmosphere_water__low_precipitation_duration':['low_prec_dur','d'],
- 'basin__mean_of_elevation':['elev_mean','m'],
- 'basin__mean_of_slope':['slope_mean','m km-1'],
- 'basin__area':['area_gages2','km2'],
- 'land_vegetation__forest_area_fraction':['frac_forest','-'],
- 'land_vegetation__max_monthly_mean_of_leaf-area_index':['lai_max','-'],
- 'land_vegetation__diff_max_min_monthly_mean_of_leaf-area_index':['lai_diff','-'],
- 'land_vegetation__max_monthly_mean_of_green_vegetation_fraction':['gvf_max','-'],
- 'land_vegetation__diff__max_min_monthly_mean_of_green_vegetation_fraction':['gvf_diff','-'],
- 'region_state_land~covered__area_fraction':['dom_land_cover_frac', 'percent'], # custom name
- 'region_state_land~covered__area':['dom_land_cover', '-'], # custom name
- 'root__depth':['root_depth_50', '-'], # custom name
- 'soil_bedrock_top__depth__pelletier':['soil_depth_pelletier','m'],
- 'soil_bedrock_top__depth__statsgo':['soil_depth_statsgo','m'],
- 'soil__porosity':['soil_porosity','-'],
- 'soil__saturated_hydraulic_conductivity':['soil_conductivity','cm hr-1'],
- 'maximum_water_content':['max_water_content','m'],
- 'soil_sand__volume_fraction':['sand_frac','percent'],
- 'soil_silt__volume_fraction':['silt_frac','percent'],
- 'soil_clay__volume_fraction':['clay_frac','percent'],
- 'geol_1st_class':['geol_1st_class', '-'], # custom name
- 'geol_1st_class__fraction':['glim_1st_class_frac', '-'], # custom name
- 'geol_2nd_class':['geol_2nd_class', '-'], # custom name
- 'geol_2nd_class__fraction':['glim_2nd_class_frac', '-'], # custom name
- 'basin__carbonate_rocks_area_fraction':['carbonate_rocks_frac','-'],
- 'soil_active-layer__porosity':['geol_porosity', '-'], # check name
- 'bedrock__permeability':['geol_permeability','m2'],
- 'drainage__area':['DRAIN_SQKM', 'km2'], # custom name
- 'land_surface__latitude':['lat','degrees'],
- # --------------- CONUS --------------- #
- # 'basin__area':['uparea','km2'],
- # 'land_surface_water__Hargreaves_potential_evaporation_volume_flux':['ETPOT_Hargr', 'mm d-1'], # check name
- # 'free_land_surface_water':['FW', 'mm d-1'], # check name
- # 'soil_clay__attr':['HWSD_clay','percent'], # custom name; need to confirm
- # 'soil_gravel__attr':['HWSD_gravel','percent'], # custom name; need to confirm
- # 'soil_sand__attr':['HWSD_sand','percent'], # custom name; need to confirm
- # 'soil_silt__attr':['HWSD_silt','percent'], # custom name; need to confirm
- # 'land_vegetation__normalized_diff_vegitation_index':['NDVI','-'], # custom name
- # 'soil_active-layer__porosity':['Porosity', '-'], # check name
- # 'soil_clay__grid':['SoilGrids1km_clay','km2'], # custom name
- # 'soil_sand__grid':['SoilGrids1km_sand','km2'], # custom name
- # 'soil_silt__grid':['SoilGrids1km_silt','km2'], # custom name
- # 'soil_clay__volume_fraction':['T_clay','percent'],
- # 'soil_gravel__volume_fraction':['T_gravel','percent'],
- # 'soil_sand__volume_fraction':['T_sand','percent'],
- # 'soil_silt__volume_fraction':['T_silt','percent'],
- # # Aridity in camels
- # 'land_surface_water__glacier_fraction':['glaciers','percent'], # custom name
- # 'atmosphere_water__daily_mean_of_liquid_equivalent_precipitation_rate':['meanP','mm d-1'],
- # 'atmosphere_water__daily_mean_of_temperature':['meanTa','mm d-1'], # custom name
- # 'basin__mean_of_elevation':['meanelevation','m'],
- # 'basin__mean_of_slope':['meanslope','m km-1'],
- # 'bedrock__permeability':['permeability','m2'],
- # 'p_seasonality':['seasonality_P', '-'], # custom name
- # 'land_surface_water__potential_evaporation_volume_flux_seasonality':['seasonality_PET', '-'], # custom name
- # 'land_surface_water__snow_fraction':['snow_fraction','percent'],
- # 'atmosphere_water__precipitation_falling_as_snow_fraction':['snowfall_fraction','percent'],
- ############## Outputs ##############
- # --------- CAMELS/CONUS ---------- #
- 'land_surface_water__runoff_volume_flux':['flow_sim','m3 s-1'],
- 'srflow':['srflow','m3 s-1'],
- 'ssflow':['ssflow','m3 s-1'],
- 'gwflow':['gwflow','m3 s-1'],
- 'AET_hydro':['AET_hydro','m3 s-1'],
- 'PET_hydro':['PET_hydro','m3 s-1'],
- 'flow_sim_no_rout':['flow_sim_no_rout','m3 s-1'],
- 'srflow_no_rout':['srflow_no_rout','m3 s-1'],
- 'ssflow_no_rout':['ssflow_no_rout','m3 s-1'],
- 'gwflow_no_rout':['gwflow_no_rout','m3 s-1'],
- 'excs':['excs','-'],
- 'evapfactor':['evapfactor','-'],
- 'tosoil':['tosoil','m3 s-1'],
- 'percolation':['percolation','-'],
- 'BFI_sim':['BFI_sim','-'],
- }
-
- if config_filepath:
- # Read in model & BMI configurations.
- self.initialize_config(config_filepath)
-
- # Create lookup tables for CSDMS variables + init variable arrays.
- self.init_var_dicts()
-
- # Track total BMI runtime.
- self.bmi_process_time += time.time() - t_start
- if self.verbose:
- log.info(f"BMI init took {time.time() - t_start} s")
-
- def initialize(self, config_filepath: Optional[str] = None) -> None:
- """
- (BMI Control function) Initialize the dPLHydro model.
-
- dPL model BMI operates in two modes:
- (Necessesitated by the fact that dPL model's pNN is forwarded on all of
- a prediction period's data at once. Forwarding on each timestep individually
- without saving/loading hidden states would slash LSTM performance. However,
- feeding in hidden states day by day leeds to great efficiency losses vs
- simply feeding all data at once due to carrying gradients at each step.)
-
- 1) All attributes/forcings that will be forwarded on are fed to BMI before
- 'bmi.initialize()'. Then internal model is forwarded on all data
- and generates predictions during '.initialize()'.
-
- 2) Run '.initialize()', then pass data day by day as normal during
- 'bmi.update()'. If forwarding period is sufficiently small (say, <100 days),
- then forwarding LSTM on individual days with saved states is reasonable.
-
- To this end, a configuration file can be specified either during
- `bmi.__init__()`, or during `.initialize()`. If running BMI as type (1),
- config must be passed in the former, otherwise passed in the latter for (2).
-
- Parameters
- ----------
- config_filepath : str, optional
- Path to the BMI configuration file.
- """
- t_start = time.time()
-
- if not self.config:
- # Read in model & BMI configurations.
- self.initialize_config(config_filepath)
-
- # Create lookup tables for CSDMS variables + init variable arrays.
- self.init_var_dicts()
-
- if not config_filepath:
- raise ValueError("No configuration file given. A config path \
- must be passed at time of bmi init or .initialize() call.")
-
- # Set a simulation start time and gettimestep size.
- self.current_time = self._start_time
- self._time_step_size = self.config['time_step_delta']
-
- # Load a trained model.
- self._model = ModelHandler(self.config).to(self.config['device'])
- self._initialized = True
-
- if self.config['forward_init']:
- # Forward model on all data in this .initialize() step.
- self.run_forward()
-
- # Track total BMI runtime.
- self.bmi_process_time += time.time() - t_start
- if self.verbose:
- log.info(f"BMI initialize [ctrl fn] took {time.time() - t_start} s | Total runtime: {self.bmi_process_time} s")
-
- def update(self) -> None:
- """
- (BMI Control function) Advance model state by one time step.
-
- Note: Models should be trained standalone with dPLHydro_PMI first before forward predictions with this BMI.
- """
- t_start = time.time()
- self.current_time += self._time_step_size
-
- if not self.config['forward_init']:
- # Conventional forward pass during .update()
- self.run_forward()
-
- # Track total BMI runtime.
- self.bmi_process_time += time.time() - t_start
- if self.verbose:
- log.info(f"BMI update [ctrl fn] took {time.time() - t_start} s | Total runtime: {self.bmi_process_time} s")
-
- def run_forward(self):
- """
- Forward model and save outputs to return on update call.
- """
- # Format inputs
- self._values_to_dict()
-
- ngrid = self.dataset_dict['inputs_nn_scaled'].shape[1]
- i_start = np.arange(0, ngrid, self.config['batch_basins'])
- i_end = np.append(i_start[1:], ngrid)
-
- batched_preds_list = []
- # Forward through basins in batches.
- for i in range(len(i_start)):
- dataset_dict_sample = self._get_batch_sample(self.config, self.dataset_dict,
- i_start[i], i_end[i])
-
- # TODO: Include architecture here for saving/loading states of hydro
- # model and pNN for single timestep updates.
-
- # Forward dPLHydro model
- self.preds = self._model.forward(dataset_dict_sample, eval=True)
-
- # For single hydrology model.
- model_name = self.config['hydro_models'][0]
- batched_preds_list.append({key: tensor.cpu().detach() for key,
- tensor in self.preds[model_name].items()})
-
- # TODO: Expand list of supported outputs (e.g., a dict of output vars).
- preds = torch.cat([d['flow_sim'] for d in batched_preds_list], dim=1)
- preds = preds.numpy()
-
- # Scale and check output
- self.scale_output()
-
- def update_frac(self, time_frac: float) -> None:
- """
- Update model by a fraction of a time step.
-
- Parameters
- ----------
- time_frac : float
- Fraction fo a time step.
- """
- if self.verbose:
- print("Warning: This model is trained to make predictions on one day timesteps.")
- time_step = self.get_time_step()
- self._time_step_size = self._time_step_size * time_frac
- self.update()
- self._time_step_size = time_step
-
- def update_until(self, end_time: float) -> None:
- """
- (BMI Control function) Update model until a particular time.
- Note: Models should be trained standalone with dPLHydro_PMI first before forward predictions with this BMI.
-
- Parameters
- ----------
- end_time : float
- Time to run model until.
- """
- t_start = time.time()
-
- n_steps = (end_time - self.get_current_time()) / self.get_time_step()
-
- for _ in range(int(n_steps)):
- self.update()
- self.update_frac(n_steps - int(n_steps))
-
- # Keep running total of BMI runtime.
- self.bmi_process_time += time.time() - t_start
- if self.verbose:
- log.info(f"BMI update_until [ctrl fn] took {time.time() - t_start} s | Total runtime: {self.bmi_process_time} s")
-
- def finalize(self) -> None:
- """
- (BMI Control function) Finalize model.
- """
- # TODO: Force destruction of ESMF and other objects when testing is done
- # to save space.
-
- torch.cuda.empty_cache()
- self._model = None
-
- def array_to_tensor(self) -> None:
- """
- Converts input values into Torch tensor object to be read by model.
- """
- raise NotImplementedError("array_to_tensor")
-
- def tensor_to_array(self) -> None:
- """
- Converts model output Torch tensor into date + gradient arrays to be
- passed out of BMI for backpropagation, loss, optimizer tuning.
- """
- raise NotImplementedError("tensor_to_array")
-
- def get_tensor_slice(self):
- """
- Get tensor of input data for a single timestep.
- """
- # sample_dict = take_sample_test(self.bmi_config, self.dataset_dict)
- # self.input_tensor = torch.Tensor()
-
- raise NotImplementedError("get_tensor_slice")
-
- def get_var_type(self, var_name):
- """
- Data type of variable.
-
- Parameters
- ----------
- var_name : str
- Name of variable as CSDMS Standard Name.
-
- Returns
- -------
- str
- Data type.
- """
- return str(self.get_value_ptr(var_name).dtype)
-
- def get_var_units(self, var_standard_name):
- """Get units of variable.
-
- Parameters
- ----------
- var_standard_name : str
- Name of variable as CSDMS Standard Name.
-
- Returns
- -------
- str
- Variable units.
- """
- return self._var_units_map[var_standard_name]
-
- def get_var_nbytes(self, var_name):
- """Get units of variable.
-
- Parameters
- ----------
- var_name : str
- Name of variable as CSDMS Standard Name.
-
- Returns
- -------
- int
- Size of data array in bytes.
- """
- return self.get_value_ptr(var_name).nbytes
-
- def get_var_itemsize(self, name):
- return np.dtype(self.get_var_type(name)).itemsize
-
- def get_var_location(self, name):
- return self._var_loc[name]
-
- def get_var_grid(self, var_name):
- """Grid id for a variable.
-
- Parameters
- ----------
- var_name : str
- Name of variable as CSDMS Standard Name.
-
- Returns
- -------
- int
- Grid id.
- """
- # for grid_id, var_name_list in self._grids.items():
- # if var_name in var_name_list:
- # return grid_id
- raise NotImplementedError("get_var_grid")
-
- def get_grid_rank(self, grid_id):
- """Rank of grid.
-
- Parameters
- ----------
- grid_id : int
- Identifier of a grid.
-
- Returns
- -------
- int
- Rank of grid.
- """
- # return len(self._model.shape)
- raise NotImplementedError("get_grid_rank")
-
- def get_grid_size(self, grid_id):
- """Size of grid.
-
- Parameters
- ----------
- grid_id : int
- Identifier of a grid.
-
- Returns
- -------
- int
- Size of grid.
- """
- # return int(np.prod(self._model.shape))
- raise NotImplementedError("get_grid_size")
-
- def get_value_ptr(self, var_standard_name: str, model:str) -> np.ndarray:
- """Reference to values.
-
- Parameters
- ----------
- var_standard_name : str
- Name of variable as CSDMS Standard Name.
-
- Returns
- -------
- array_like
- Value array.
- """
- if model == 'nn':
- if var_standard_name not in self._nn_values.keys():
- raise ValueError(f"No known variable in BMI model: {var_standard_name}")
- return self._nn_values[var_standard_name]
-
- elif model == 'pm':
- if var_standard_name not in self._pm_values.keys():
- raise ValueError(f"No known variable in BMI model: {var_standard_name}")
- return self._pm_values[var_standard_name]
-
- else:
- raise ValueError("Valid model type (nn or pm) must be specified.")
-
- def get_value(self, var_name, dest):
- """Copy of values.
-
- Parameters
- ----------
- var_name : str
- Name of variable as CSDMS Standard Name.
- dest : ndarray
- A numpy array into which to place the values.
-
- Returns
- -------
- array_like
- Copy of values.
- """
- dest[:] = self.get_value_ptr(var_name).flatten()
- return dest
-
- def get_value_at_indices(self, var_name, dest, indices):
- """Get values at particular indices.
-
- Parameters
- ----------
- var_name : str
- Name of variable as CSDMS Standard Name.
- dest : ndarray
- A numpy array into which to place the values.
- indices : array_like
- Array of indices.
-
- Returns
- -------
- array_like
- Values at indices.
- """
- dest[:] = self.get_value_ptr(var_name).take(indices)
- return dest
-
- def set_value(self, var_name, values: np.ndarray, model:str):
- """Set model values.
-
- Parameters
- ----------
- var_name : str
- Name of variable as CSDMS Standard Name.
- values : array_like
- Array of new values.
- """
- if not isinstance(values, (np.ndarray, list, tuple)):
- values = np.array([values])
-
- val = self.get_value_ptr(var_name, model=model)
-
- # val = values.reshape(val.shape)
- val[:] = values
-
- def set_value_at_indices(self, name, inds, src):
- """Set model values at particular indices.
-
- Parameters
- ----------
- var_name : str
- Name of variable as CSDMS Standard Name.
- src : array_like
- Array of new values.
- indices : array_like
- Array of indices.
- """
- val = self.get_value_ptr(name)
- val.flat[inds] = src
-
- def get_component_name(self):
- """Name of the component."""
- return self._name
-
- def get_input_item_count(self):
- """Get names of input variables."""
- return len(self._input_var_names)
-
- def get_output_item_count(self):
- """Get names of output variables."""
- return len(self._output_var_names)
-
- def get_input_var_names(self):
- """Get names of input variables."""
- return self._input_var_names
-
- def get_output_var_names(self):
- """Get names of output variables."""
- return self._output_var_names
-
- def get_grid_shape(self, grid_id, shape):
- """Number of rows and columns of uniform rectilinear grid."""
- # var_name = self._grids[grid_id][0]
- # shape[:] = self.get_value_ptr(var_name).shape
- # return shape
- raise NotImplementedError("get_grid_shape")
-
- def get_grid_spacing(self, grid_id, spacing):
- """Spacing of rows and columns of uniform rectilinear grid."""
- # spacing[:] = self._model.spacing
- # return spacing
- raise NotImplementedError("get_grid_spacing")
-
- def get_grid_origin(self, grid_id, origin):
- """Origin of uniform rectilinear grid."""
- # origin[:] = self._model.origin
- # return origin
- raise NotImplementedError("get_grid_origin")
-
- def get_grid_type(self, grid_id):
- """Type of grid."""
- # return self._grid_type[grid_id]
- raise NotImplementedError("get_grid_type")
-
- def get_start_time(self):
- """Start time of model."""
- return self._start_time
-
- def get_end_time(self):
- """End time of model."""
- return self._end_time
-
- def get_current_time(self):
- return self._current_time
-
- def get_time_step(self):
- return self._time_step_size
-
- def get_time_units(self):
- return self._time_units
-
- def get_grid_edge_count(self, grid):
- raise NotImplementedError("get_grid_edge_count")
-
- def get_grid_edge_nodes(self, grid, edge_nodes):
- raise NotImplementedError("get_grid_edge_nodes")
-
- def get_grid_face_count(self, grid):
- raise NotImplementedError("get_grid_face_count")
-
- def get_grid_face_nodes(self, grid, face_nodes):
- raise NotImplementedError("get_grid_face_nodes")
-
- def get_grid_node_count(self, grid):
- """Number of grid nodes.
-
- Parameters
- ----------
- grid : int
- Identifier of a grid.
-
- Returns
- -------
- int
- Size of grid.
- """
- # return self.get_grid_size(grid)
- raise NotImplementedError("get_grid_node_count")
-
- def get_grid_nodes_per_face(self, grid, nodes_per_face):
- raise NotImplementedError("get_grid_nodes_per_face")
-
- def get_grid_face_edges(self, grid, face_edges):
- raise NotImplementedError("get_grid_face_edges")
-
- def get_grid_x(self, grid, x):
- raise NotImplementedError("get_grid_x")
-
- def get_grid_y(self, grid, y):
- raise NotImplementedError("get_grid_y")
-
- def get_grid_z(self, grid, z):
- raise NotImplementedError("get_grid_z")
-
- def initialize_config(self, config_path: str) -> Dict:
- """
- Check that config_path is valid path and convert config into a
- dictionary object.
- """
- config_path = Path(config_path).resolve()
-
- if not config_path:
- raise RuntimeError("No BMI configuration path provided.")
- elif not config_path.is_file():
- raise RuntimeError(f"BMI configuration not found at path {config_path}.")
- else:
- with config_path.open('r') as f:
- self.config = yaml.safe_load(f)
-
-
- # USE BELOW FOR HYDRA + OMEGACONF:
- # try:
- # config_dict: Union[Dict[str, Any], Any] = OmegaConf.to_container(
- # cfg, resolve=True
- # )
- # config = Config(**config_dict)
- # except ValidationError as e:
- # log.exception(e)
- # raise e
- # return config, config_dict
-
- def init_var_dicts(self):
- """
- Create lookup tables for CSDMS variables and init variable arrays.
- """
- # Make lookup tables for variable name (Peckham et al.).
- self._var_name_map_long_first = {
- long_name:self._var_name_units_map[long_name][0] for \
- long_name in self._var_name_units_map.keys()
- }
- self._var_name_map_short_first = {
- self._var_name_units_map[long_name][0]:long_name for \
- long_name in self._var_name_units_map.keys()}
- self._var_units_map = {
- long_name:self._var_name_units_map[long_name][1] for \
- long_name in self._var_name_units_map.keys()
- }
-
- # Initialize inputs and outputs.
- for var in self.config['observations']['var_t_nn'] + self.config['observations']['var_c_nn']:
- standard_name = self._var_name_map_short_first[var]
- self._nn_values[standard_name] = []
- # setattr(self, var, 0)
-
- for var in self.config['observations']['var_t_hydro_model'] + self.config['observations']['var_c_hydro_model']:
- standard_name = self._var_name_map_short_first[var]
- self._pm_values[standard_name] = []
- # setattr(self, var, 0)
-
- def scale_output(self) -> None:
- """
- Scale and return more meaningful output from wrapped model.
- """
- models = self.config['hydro_models'][0]
-
- # TODO: still have to finish finding and undoing scaling applied before
- # model run. (See some checks used in bmi_lstm.py.)
-
- # Strip unnecessary time and variable dims. This gives 1D array of flow
- # at each basin.
- # TODO: setup properly for multiple models later.
- self.streamflow_cms = self.preds[models]['flow_sim'].squeeze()
-
- def _get_batch_sample(self, config: Dict, dataset_dictionary: Dict[str, torch.Tensor],
- i_s: int, i_e: int) -> Dict[str, torch.Tensor]:
- """
- Take sample of data for testing batch.
- """
- dataset_sample = {}
- for key, value in dataset_dictionary.items():
- if value.ndim == 3:
- # TODO: I don't think we actually need this.
- # Remove the warmup period for all except airtemp_memory and hydro inputs.
- if key in ['airT_mem_temp_model', 'x_phy', 'inputs_nn_scaled']:
- warm_up = 0
- else:
- warm_up = config['warm_up']
- dataset_sample[key] = value[warm_up:, i_s:i_e, :].to(config['device'])
- elif value.ndim == 2:
- dataset_sample[key] = value[i_s:i_e, :].to(config['device'])
- else:
- raise ValueError(f"Incorrect input dimensions. {key} array must have 2 or 3 dimensions.")
- return dataset_sample
-
- def _values_to_dict(self) -> None:
- """
- Take CSDMS Standard Name-mapped forcings + attributes and construct data
- dictionary for NN and physics model.
- """
- # n_basins = self.config['batch_basins']
- n_basins = 671
- rho = self.config['rho']
-
- # Initialize dict arrays.
- # NOTE: used to have rho+1 here but this is no longer necessary?
- x_nn = np.zeros((rho + 1, n_basins, len(self.config['observations']['var_t_nn'])))
- c_nn = np.zeros((rho + 1, n_basins, len(self.config['observations']['var_c_nn'])))
- x_phy = np.zeros((rho + 1, n_basins, len(self.config['observations']['var_t_hydro_model'])))
- c_hydro_model = np.zeros((n_basins, len(self.config['observations']['var_c_hydro_model'])))
-
- for i, var in enumerate(self.config['observations']['var_t_nn']):
- standard_name = self._var_name_map_short_first[var]
- # NOTE: Using _values is a bit hacky. Should use get_values I think.
- x_nn[:, :, i] = np.array([self._nn_values[standard_name]])
-
- for i, var in enumerate(self.config['observations']['var_c_nn']):
- standard_name = self._var_name_map_short_first[var]
- c_nn[:, :, i] = np.array([self._nn_values[standard_name]])
-
- for i, var in enumerate(self.config['observations']['var_t_hydro_model']):
- standard_name = self._var_name_map_short_first[var]
- x_phy[:, :, i] = np.array([self._pm_values[standard_name]])
-
- for i, var in enumerate(self.config['observations']['var_c_hydro_model']):
- standard_name = self._var_name_map_short_first[var]
- c_hydro_model[:, i] = np.array([self._pm_values[standard_name]])
-
- self.dataset_dict = {
- 'inputs_nn_scaled': np.concatenate((x_nn, c_nn), axis=2), #[np.newaxis,:,:],
- 'x_phy': x_phy, #[np.newaxis,:,:],
- 'c_hydro_model': c_hydro_model
- }
- print(self.dataset_dict['inputs_nn_scaled'].shape)
-
- # Convert to torch tensors:
- for key in self.dataset_dict.keys():
- if type(self.dataset_dict[key]) == np.ndarray:
- self.dataset_dict[key] = torch.from_numpy(self.dataset_dict[key]).float() #.to(self.config['device'])
-
- def get_csdms_name(self, var_name):
- """
- Get CSDMS Standard Name from variable name.
- """
- return self._var_name_map_long_first[var_name]
-
\ No newline at end of file
diff --git a/src/dHBV_2_0/example_dev.py b/src/dHBV_2_0/example_dev.py
new file mode 100644
index 0000000..57daafa
--- /dev/null
+++ b/src/dHBV_2_0/example_dev.py
@@ -0,0 +1,1011 @@
+# Need these for BMI
+import os
+import time
+from pathlib import Path
+
+# Here is the LSTM model we want to run
+# import nextgen_cuda_lstm
+import lstm.nextgen_cuda_lstm as nextgen_cuda_lstm # (SDP)
+# Import data_tools
+# Basic utilities
+import numpy as np
+import pandas as pd
+# LSTM here is based on PyTorch
+import torch
+# Configuration file functionality
+import yaml
+from bmipy import Bmi
+
+# These are not used (SDP)
+### from torch import nn
+### import sys
+
+#------------------------------------------------------------------------
+USE_PATH = True # (SDP)
+if not(USE_PATH):
+ import os
+
+class bmi_LSTM(Bmi):
+
+ def __init__(self):
+ """Create a Bmi LSTM model that is ready for initialization."""
+ super(bmi_LSTM, self).__init__()
+ self._name = "LSTM for Next Generation NWM"
+ self._values = {}
+ self._var_loc = "node"
+ self._var_grid_id = 0
+ self._var_grid_type = "scalar"
+ self._start_time = 0
+ self._end_time = np.finfo("d").max
+ self._time_units = "hour" # (SDP)
+ self._time_step_size = 1.0 # (SDP)
+
+ #----------------------------------------------
+ # Required, static attributes of the model
+ #----------------------------------------------
+ # Note: not currently in use
+ _att_map = {
+ 'model_name': 'LSTM for Next Generation NWM',
+ 'version': '1.0',
+ 'author_name': 'Jonathan Martin Frame' }
+
+ #---------------------------------------------
+ # Input variable names (CSDMS standard names)
+ #---------------------------------------------
+ _input_var_names = [
+ 'land_surface_radiation~incoming~longwave__energy_flux',
+ 'land_surface_air__pressure',
+ 'atmosphere_air_water~vapor__relative_saturation',
+ 'atmosphere_water__liquid_equivalent_precipitation_rate', ### SDP, 08/30/22
+ ##### 'atmosphere_water__time_integral_of_precipitation_mass_flux', #### SDP
+ 'land_surface_radiation~incoming~shortwave__energy_flux',
+ 'land_surface_air__temperature',
+ 'land_surface_wind__x_component_of_velocity',
+ 'land_surface_wind__y_component_of_velocity']
+ # (Next line didn't fix ngen pointer error)
+ # _input_var_names = []
+
+ #---------------------------------------------
+ # Output variable names (CSDMS standard names)
+ #---------------------------------------------
+ _output_var_names = ['land_surface_water__runoff_depth',
+ 'land_surface_water__runoff_volume_flux']
+ # (Next line didn't fix ngen pointer error)
+ # _output_var_names = ['land_surface_water__runoff_volume_flux']
+
+ #------------------------------------------------------
+ # Create a Python dictionary that maps CSDMS Standard
+ # Names to the model's internal variable names.
+ # This is going to get long,
+ # since the input variable names could come from any forcing...
+ #------------------------------------------------------
+ #_var_name_map_long_first = {
+ _var_name_units_map = {
+ 'land_surface_water__runoff_volume_flux':['streamflow_cms','m3 s-1'],
+ 'land_surface_water__runoff_depth':['streamflow_m','m'],
+ #-------------- Dynamic inputs --------------------------------
+ #NJF Let the model assume equivalence of `kg m-2` == `mm h-1` since we can't convert
+ #mass flux automatically from the ngen framework
+ 'atmosphere_water__liquid_equivalent_precipitation_rate':['APCP_surface','mm h-1'],
+ 'land_surface_radiation~incoming~longwave__energy_flux':['DLWRF_surface','W m-2'],
+ 'land_surface_radiation~incoming~shortwave__energy_flux':['DSWRF_surface','W m-2'],
+ 'atmosphere_air_water~vapor__relative_saturation':['SPFH_2maboveground','kg kg-1'],
+ 'land_surface_air__pressure':['PRES_surface','Pa'],
+ 'land_surface_air__temperature':['TMP_2maboveground','degK'],
+ 'land_surface_wind__x_component_of_velocity':['UGRD_10maboveground','m s-1'],
+ 'land_surface_wind__y_component_of_velocity':['VGRD_10maboveground','m s-1'],
+ #-------------- STATIC Attributes -----------------------------
+ 'basin__mean_of_elevation':['elev_mean','m'],
+ 'basin__mean_of_slope':['slope_mean','m km-1'],
+ }
+
+ _static_attributes_list = ['elev_mean','slope_mean']
+
+ def __getattribute__(self, item):
+ """
+ Customize instance attribute access.
+
+ For those items that correspond to BMI input or output variables (which should be in numpy arrays) and have
+ values that are just a single-element array, deviate from the standard behavior and return the single array
+ element. Fall back to the default behavior in any other case.
+
+ This supports having a BMI variable be backed by a numpy array, while also allowing the attribute to be used as
+ just a scalar, as it is in many places for this type.
+
+ Parameters
+ ----------
+ item
+ The name of the attribute item to get.
+
+ Returns
+ -------
+ The value of the named item.
+ """
+ # Have these work explicitly (or else loops)
+ if item == '_input_var_names' or item == '_output_var_names':
+ return super(bmi_LSTM, self).__getattribute__(item)
+
+ # By default, for things other than BMI variables, use normal behavior
+ if item not in super(bmi_LSTM, self).__getattribute__('_input_var_names') and item not in super(bmi_LSTM, self).__getattribute__('_output_var_names'):
+ return super(bmi_LSTM, self).__getattribute__(item)
+
+ # Return the single scalar value from any ndarray of size 1
+ value = super(bmi_LSTM, self).__getattribute__(item)
+ if isinstance(value, np.ndarray) and value.size == 1:
+ return value[0]
+ else:
+ return value
+
+ def __setattr__(self, key, value):
+ """
+ Customized instance attribute mutator functionality.
+
+ For those attribute with keys indicating they are a BMI input or output variable (which should be in numpy
+ arrays), wrap any scalar ``value`` as a one-element numpy array and use that in a nested call to the superclass
+ implementation of this function. In any other cases, just pass the given ``key`` and ``value`` to a nested
+ call.
+
+ This supports automatically having a BMI variable be backed by a numpy array, even if it is initialized using a
+ scalar, while otherwise maintaining standard behavior.
+
+ Parameters
+ ----------
+ key
+ value
+
+ Returns
+ -------
+
+ """
+ # Have these work explicitly (or else loops)
+ if key == '_input_var_names' or key == '_output_var_names':
+ super(bmi_LSTM, self).__setattr__(key, value)
+
+ # Pass thru if value is already an array
+ if isinstance(value, np.ndarray):
+ super(bmi_LSTM, self).__setattr__(key, value)
+ # Override to put scalars into ndarray for BMI input/output variables
+ elif key in self._input_var_names or key in self._output_var_names:
+ super(bmi_LSTM, self).__setattr__(key, np.array([value]))
+ # By default, use normal behavior
+ else:
+ super(bmi_LSTM, self).__setattr__(key, value)
+
+ #------------------------------------------------------------
+ #------------------------------------------------------------
+ # BMI: Model Control Functions
+ #------------------------------------------------------------
+ #------------------------------------------------------------
+
+ #-------------------------------------------------------------------
+ def initialize( self, bmi_cfg_file=None ):
+ #NJF ensure this is a Path type so the follow open works as expected
+ #When used with NGen, the bmi_cfg_file is just a string...
+
+ bmi_cfg_file = Path(bmi_cfg_file)
+ # ----- Create some lookup tables from the long variable names --------#
+ self._var_name_map_long_first = {long_name:self._var_name_units_map[long_name][0] for \
+ long_name in self._var_name_units_map.keys()}
+ self._var_name_map_short_first = {self._var_name_units_map[long_name][0]:long_name for \
+ long_name in self._var_name_units_map.keys()}
+ self._var_units_map = {long_name:self._var_name_units_map[long_name][1] for \
+ long_name in self._var_name_units_map.keys()}
+
+ # -------------- Initalize all the variables --------------------------#
+ # -------------- so that they'll be picked up with the get functions --#
+ for var_name in list(self._var_name_units_map.keys()):
+ # ---------- All the variables are single values ------------------#
+ # ---------- so just set to zero for now. ------------------#
+ self._values[var_name] = 0.0
+ setattr( self, var_name, 0.0 )
+
+ # -------------- Read in the BMI configuration -------------------------#
+ # This will direct all the next moves.
+ if bmi_cfg_file is not None:
+ #----------------------------------------------------------
+ # Note: bmi_cfg_file should have type 'str', vs. being a
+ # Path object. So apply Path in initialize(). (SDP)
+ #----------------------------------------------------------
+ ### with bmi_cfg_file.open('r') as fp: # (orig)
+ with open(bmi_cfg_file,'r') as fp: # (SDP)
+ cfg = yaml.safe_load(fp)
+ self.cfg_bmi = self._parse_config(cfg)
+ else:
+ print("Error: No configuration provided, nothing to do...")
+
+ # Number of inidividual ensemble members
+ self.N_ENS = len(self.cfg_bmi['train_cfg_file'])
+
+ # Note: these need to be initialized here as scale_output() called in update()
+ self.lstm_output = {i_ens:0.0 for i_ens in range(self.N_ENS)}
+ self.streamflow_cms = {i_ens:0.0 for i_ens in range(self.N_ENS)}
+ self.streamflow_fms = {i_ens:0.0 for i_ens in range(self.N_ENS)}
+ self.surface_runoff_mm = {i_ens:0.0 for i_ens in range(self.N_ENS)}
+
+ # Gather verbosity lvl from bmi-config for stdout printing, etc.
+ self.verbose = self.cfg_bmi['verbose']
+ if self.verbose == 0:
+ print("Will not print anything except errors because verbosity set to", self.verbose)
+ if self.verbose == 1:
+ print("Will print warnings and errors because verbosity set to", self.verbose)
+ if self.verbose > 1:
+ print("Will print warnings, errors and random information because verbosity set to", self.verbose)
+ print("self.verbose", self.verbose)
+
+ # ------------- Load in the configuration file for the specific LSTM --#
+ # This will include all the details about how the model was trained
+ # Inputs, outputs, hyper-parameters, scalers, weights, etc. etc.
+ self.get_training_configurations()
+ self.get_scaler_values()
+
+ # ------------- Initialize an ENSEMBLE OF LSTM models ------------------------------#
+ self.lstm = {}
+ self.h_t = {}
+ self.c_t = {}
+
+ for i_ens in range(self.N_ENS):
+ self.lstm[i_ens] = nextgen_cuda_lstm.Nextgen_CudaLSTM(input_size=self.input_size[i_ens],
+ hidden_layer_size=self.hidden_layer_size[i_ens],
+ output_size=self.output_size[i_ens],
+ batch_size=1,
+ seq_length=1)
+
+ # ------------ Load in the trained weights ----------------------------#
+ # Save the default model weights. We need to make sure we have the same keys.
+ default_state_dict = self.lstm[i_ens].state_dict()
+
+ # Trained model weights from Neuralhydrology.
+ if (USE_PATH): # (SDP)
+
+ if self.verbose > 0:
+ print(self.cfg_train[i_ens]['run_dir'])
+
+ trained_model_file = self.cfg_train[i_ens]['run_dir'] / 'model_epoch{}.pt'.format(str(self.cfg_train[i_ens]['epochs']).zfill(3))
+ else:
+ str1 = self.cfg_train[i_ens]['run_dir'] + '/' + 'model_epoch{}.pt'
+ trained_model_file = str1.format(str(self.cfg_train[i_ens]['epochs']).zfill(3))
+
+ trained_state_dict = torch.load(trained_model_file, map_location=torch.device('cpu'))
+
+ # Changing the name of the head weights, since different in NH
+ trained_state_dict['head.weight'] = trained_state_dict.pop('head.net.0.weight')
+ trained_state_dict['head.bias'] = trained_state_dict.pop('head.net.0.bias')
+ trained_state_dict = {x:trained_state_dict[x] for x in default_state_dict.keys()}
+
+ # Load in the trained weights.
+ self.lstm[i_ens].load_state_dict(trained_state_dict)
+
+ # ------------- Initialize the values for the input to the LSTM -----#
+ # jmframe(jan 27): If we assume all models have the same inputs, this only needs to happen once.
+ if i_ens == 0:
+ self.set_static_attributes()
+ self.initialize_forcings()
+
+ if self.cfg_bmi['initial_state'] == 'zero':
+ self.h_t[i_ens] = torch.zeros(1, self.batch_size, self.hidden_layer_size[i_ens]).float()
+ self.c_t[i_ens] = torch.zeros(1, self.batch_size, self.hidden_layer_size[i_ens]).float()
+
+ # ------------- Start a simulation time -----------------------------#
+ # jmframe: Since the simulation time here doesn't really matter.
+ # Just use seconds and set the time to zero
+ # But add some logic maybe, so to be able to start at some time
+ self.t = self._start_time
+
+ # ----------- The output is area normalized, this is needed to un-normalize it
+ # mm->m km2 -> m2 hour->s
+ self.output_factor_cms = (1/1000) * (self.cfg_bmi['area_sqkm'] * 1000*1000) * (1/3600)
+
+ #------------------------------------------------------------
+ def update(self):
+ with torch.no_grad():
+
+ self.create_scaled_input_tensor()
+
+ for i_ens in range(self.N_ENS):
+
+ self.lstm_output[i_ens], self.h_t[i_ens], self.c_t[i_ens] = self.lstm[i_ens].forward(self.input_tensor[i_ens], self.h_t[i_ens], self.c_t[i_ens])
+
+ self.scale_output(i_ens)
+
+ self.ensemble_output()
+
+ #self.t += self._time_step_size
+ self.t += self.get_time_step()
+
+ #------------------------------------------------------------
+ def update_frac(self, time_frac):
+ """Update model by a fraction of a time step.
+ Parameters
+ ----------
+ time_frac : float
+ Fraction fo a time step.
+ """
+ if self.verbose > 0:
+ print("Warning: This version of the LSTM is designed to make predictions on one hour timesteps.")
+ time_step = self.get_time_step()
+ self._time_step_size = time_frac * self._time_step_size
+ self.update()
+ self._time_step_size = time_step
+
+ #------------------------------------------------------------
+ def update_until(self, then):
+ """Update model until a particular time.
+ Parameters
+ ----------
+ then : float
+ Time to run model until.
+ """
+ if self.verbose > 0:
+ print("then", then)
+ print("self.get_current_time()", self.get_current_time())
+ print("self.get_time_step()", self.get_time_step())
+ n_steps = (then - self.get_current_time()) / self.get_time_step()
+
+ for _ in range(int(n_steps)):
+ self.update()
+ self.update_frac(n_steps - int(n_steps))
+
+ #------------------------------------------------------------
+ def finalize( self ):
+ """Finalize model."""
+ self._model = None
+
+ #------------------------------------------------------------
+ #------------------------------------------------------------
+ # LSTM: SETUP Functions
+ #------------------------------------------------------------
+ #------------------------------------------------------------
+
+ #-------------------------------------------------------------------
+ def get_training_configurations(self):
+
+ self.cfg_train = {}
+ self.input_size = {}
+ self.hidden_layer_size = {}
+ self.output_size = {}
+ self.all_lstm_inputs = {}
+ self.train_data_scaler = {}
+
+ for i_ens in range(self.N_ENS):
+
+ if self.cfg_bmi['train_cfg_file'][i_ens] is not None:
+ if (USE_PATH): # (SDP)
+ with self.cfg_bmi['train_cfg_file'][i_ens].open('r') as fp:
+ cfg = yaml.safe_load(fp)
+ self.cfg_train[i_ens] = self._parse_config(cfg)
+ else:
+ with open(self.cfg_bmi['train_cfg_file'][i_ens],'r') as fp: # (SDP)
+ cfg = yaml.safe_load(fp)
+ self.cfg_train[i_ens] = self._parse_config(cfg)
+
+ # Including a list of the model input names.
+ if self.verbose > 0:
+ print("Setting the LSTM arcitecture based on the last run ensemble configuration")
+ print(self.cfg_train[i_ens])
+ # Collect the LSTM model architecture details from the configuration file
+ self.input_size[i_ens] = len(self.cfg_train[i_ens]['dynamic_inputs']) + len(self.cfg_train[i_ens]['static_attributes'])
+ self.hidden_layer_size[i_ens] = self.cfg_train[i_ens]['hidden_size']
+ self.output_size[i_ens] = len(self.cfg_train[i_ens]['target_variables'])
+
+ self.all_lstm_inputs[i_ens] = []
+ self.all_lstm_inputs[i_ens].extend(self.cfg_train[i_ens]['dynamic_inputs'])
+ self.all_lstm_inputs[i_ens].extend(self.cfg_train[i_ens]['static_attributes'])
+
+ # WARNING: This implimentation of the LSTM can only handle a batch size of 1
+ # No need to included different batch sizes
+ self.batch_size = 1
+
+ scaler_file = os.path.join(self.cfg_train[i_ens]['run_dir'], 'train_data', 'train_data_scaler.yml')
+
+ with open(scaler_file, 'r') as f:
+ scaler_data = yaml.safe_load(f)
+
+ self.train_data_scaler[i_ens] = scaler_data
+
+ # Scaler data from the training set. This is used to normalize the data (input and output).
+ if self.verbose > 1:
+ print(f"ensemble member {i_ens}")
+ print(self.cfg_train[i_ens]['run_dir'])
+ print(self.cfg_train[i_ens]['run_dir'])
+
+ #------------------------------------------------------------
+ def get_scaler_values(self):
+
+ """Mean and standard deviation for the inputs and LSTM outputs"""
+
+ self.input_mean = {}
+ self.input_std = {}
+ self.out_mean = {}
+ self.out_std = {}
+
+ for i_ens in range(self.N_ENS):
+
+ self.out_mean[i_ens] = self.train_data_scaler[i_ens]['xarray_feature_center']['data_vars'][self.cfg_train[i_ens]['target_variables'][0]]['data']
+ self.out_std[i_ens] = self.train_data_scaler[i_ens]['xarray_feature_scale']['data_vars'][self.cfg_train[i_ens]['target_variables'][0]]['data']
+
+ self.input_mean[i_ens] = []
+ self.input_mean[i_ens].extend([self.train_data_scaler[i_ens]['xarray_feature_center']['data_vars'][x]['data'] for x in self.cfg_train[i_ens]['dynamic_inputs']])
+ self.input_mean[i_ens].extend([self.train_data_scaler[i_ens]['attribute_means'][x] for x in self.cfg_train[i_ens]['static_attributes']])
+ self.input_mean[i_ens] = np.array(self.input_mean[i_ens])
+
+ self.input_std[i_ens] = []
+ self.input_std[i_ens].extend([self.train_data_scaler[i_ens]['xarray_feature_scale']['data_vars'][x]['data'] for x in self.cfg_train[i_ens]['dynamic_inputs']])
+ self.input_std[i_ens].extend([self.train_data_scaler[i_ens]['attribute_stds'][x] for x in self.cfg_train[i_ens]['static_attributes']])
+ self.input_std[i_ens] = np.array(self.input_std[i_ens])
+ if self.verbose > 1:
+ print('###########################')
+ print('input_mean')
+ print(self.input_mean[i_ens])
+ print('input_std')
+ print(self.input_std[i_ens])
+ print('out_mean')
+ print(self.out_mean[i_ens])
+ print('out_std')
+ print(self.out_std[i_ens])
+
+ #------------------------------------------------------------
+ def create_scaled_input_tensor(self):
+
+ self.input_list = {}
+ self.input_array = {}
+ self.input_array_scaled = {}
+ self.input_tensor = {}
+
+ #------------------------------------------------------------
+ # Note: A BMI-enabled model should not use long var names
+ # internally (i.e. saved into self); it should just
+ # use convenient short names. For the BMI functions
+ # that require a long var name, it should be mapped
+ # to the model's short name before taking action.
+ #------------------------------------------------------------
+ # TODO: Choose to store values in dictionary or not.
+
+ #--------------------------------------------------------------
+ # Note: The code in this block is more verbose, but makes it
+ # much easier to test and debug and helped find a bug
+ # in the lines above (long vs. short names.)
+ #--------------------------------------------------------------
+ for i_ens in range(self.N_ENS):
+ if self.verbose > 1:
+ print('Creating scaled input tensor...')
+ n_inputs = len(self.all_lstm_inputs[i_ens])
+ self.input_list[i_ens] = [] #############
+ DEBUG = False
+ for k in range(n_inputs):
+ short_name = self.all_lstm_inputs[i_ens][k]
+ long_name = self._var_name_map_short_first[ short_name ]
+ # vals = self.get_value( self, long_name )
+ vals = getattr( self, short_name ) ####################
+
+ self.input_list[i_ens].append( vals )
+ if self.verbose > 1:
+ print(' short_name =', short_name )
+ print(' long_name =', long_name )
+ array = getattr( self, short_name )
+ ## array = self.get_value( long_name )
+ print(' type =', type(vals) )
+ print(' vals =', vals )
+
+ #--------------------------------------------------------
+ # W/o setting dtype here, it was "object_", and crashed
+ #--------------------------------------------------------
+ ## self.input_array = np.array( self.input_list )
+ self.input_array[i_ens] = np.array( self.input_list[i_ens], dtype='float64' ) # SDP
+ if self.verbose > 0:
+ print('Normalizing the tensor...')
+ print(' input_mean =', self.input_mean[i_ens] )
+ print(' input_std =', self.input_std[i_ens] )
+ print()
+ # Center and scale the input values for use in torch
+ self.input_array_scaled[i_ens] = (self.input_array[i_ens] - self.input_mean[i_ens]) / self.input_std[i_ens]
+ if self.verbose > 1:
+ print('### input_list =', self.input_list[i_ens])
+ print('### input_array =', self.input_array[i_ens])
+ print('### dtype(input_array) =', self.input_array[i_ens].dtype )
+ print('### type(input_array_scaled) =', type(self.input_array_scaled[i_ens]))
+ print('### dtype(input_array_scaled) =', self.input_array_scaled.dtype[i_ens] )
+ print()
+ self.input_tensor[i_ens] = torch.tensor(self.input_array_scaled[i_ens])
+
+ #------------------------------------------------------------
+ def scale_output(self, i_ens):
+
+ if self.verbose > 1:
+ print("model output:", self.lstm_output[i_ens][0,0,0].numpy().tolist())
+
+ if self.cfg_train[i_ens]['target_variables'][0] in ['qobs_mm_per_hour', 'QObs(mm/hr)', 'QObs(mm/h)']:
+ self.surface_runoff_mm[i_ens] = (self.lstm_output[i_ens][0,0,0].numpy().tolist() * self.out_std[i_ens] + self.out_mean[i_ens])
+
+ elif self.cfg_train[i_ens]['target_variables'][0] in ['QObs(mm/d)']:
+ self.surface_runoff_mm[i_ens] = (self.lstm_output[i_ens][0,0,0].numpy().tolist() * self.out_std[i_ens] + self.out_mean[i_ens]) * (1/24)
+
+ self.surface_runoff_mm[i_ens] = max(self.surface_runoff_mm[i_ens],0.0)
+
+ setattr(self, 'land_surface_water__runoff_depth', self.surface_runoff_mm[i_ens]/1000.0)
+ self.streamflow_cms[i_ens] = self.surface_runoff_mm[i_ens] * self.output_factor_cms
+
+ if self.verbose > 1:
+ print("streamflow:", self.streamflow_cms[i_ens])
+
+
+ #-------------------------------------------------------------------
+ def ensemble_output(self):
+ # Calculate mean surface runoff (mm) across ensemble members
+ ens_mean_surface_runoff_mm = np.mean([self.surface_runoff_mm[i_ens] for i_ens in range(self.N_ENS)])
+
+ # Set the land_surface_water__runoff_depth attribute (convert mm to m)
+ setattr(self, 'land_surface_water__runoff_depth', ens_mean_surface_runoff_mm / 1000.0)
+
+ # Calculate mean streamflow (cms) across ensemble members
+ ens_mean_streamflow_cms = np.mean([self.streamflow_cms[i_ens] for i_ens in range(self.N_ENS)])
+
+ # Set the land_surface_water__runoff_volume_flux attribute
+ setattr(self, 'land_surface_water__runoff_volume_flux', ens_mean_streamflow_cms)
+
+ #----------------------------------------------------------------------------
+ def set_static_attributes(self):
+ """ Get the static attributes from the configuration file
+ """
+ i_ens = 0
+ for attribute in self._static_attributes_list:
+ if attribute in self.cfg_train[i_ens]['static_attributes']:
+ #------------------------------------------------------------
+ # Note: A BMI-enabled model should not use long var names
+ # internally (i.e. saved into self); it should just
+ # use convenient short names. For the BMI functions
+ # that require a long var name, it should be mapped
+ # to the model's short name before taking action.
+ #------------------------------------------------------------
+ setattr(self, attribute, self.cfg_bmi[attribute]) # SDP
+
+ #----------------------------------------------------------------------------
+ def initialize_forcings(self):
+
+ if self.verbose > 0:
+ print('Initializing all forcings to 0...')
+ i_ens = 0
+
+ for forcing_name in self.cfg_train[i_ens]['dynamic_inputs']:
+ if self.verbose > 1:
+ print(' forcing_name =', forcing_name)
+ #------------------------------------------------------------
+ # Note: A BMI-enabled model should not use long var names
+ # internally (i.e. saved into self); it should just
+ # use convenient short names. For the BMI functions
+ # that require a long var name, it should be mapped
+ # to the model's short name before taking action.
+ #------------------------------------------------------------
+ setattr(self, forcing_name, 0)
+
+ #-------------------------------------------------------------------
+ #-------------------------------------------------------------------
+ # BMI: Model Information Functions
+ #-------------------------------------------------------------------
+ #-------------------------------------------------------------------
+
+ # Note: not currently using _att_map{}
+ # def get_attribute(self, att_name):
+
+ # try:
+ # return self._att_map[ att_name.lower() ]
+ # except:
+ # print(' ERROR: Could not find attribute: ' + att_name)
+
+ #--------------------------------------------------------
+ # Note: These are currently variables needed from other
+ # components vs. those read from files or GUI.
+ #--------------------------------------------------------
+ def get_input_var_names(self):
+
+ return self._input_var_names
+
+ def get_output_var_names(self):
+
+ return self._output_var_names
+
+ #------------------------------------------------------------
+ def get_component_name(self):
+ """Name of the component."""
+ #return self.get_attribute( 'model_name' )
+ return self._name
+
+ #------------------------------------------------------------
+ def get_input_item_count(self):
+ """Get names of input variables."""
+ return len(self._input_var_names)
+
+ #------------------------------------------------------------
+ def get_output_item_count(self):
+ """Get names of output variables."""
+ return len(self._output_var_names)
+
+ #------------------------------------------------------------
+ def get_value(self, var_name: str, dest: np.ndarray) -> np.ndarray:
+ """
+ Copy values for the named variable into the provided destination array.
+
+ Parameters
+ ----------
+ var_name : str
+ Name of variable as CSDMS Standard Name.
+ dest : np.ndarray
+ A numpy array into which to copy the variable values.
+ Returns
+ -------
+ np.ndarray
+ Copy of values.
+ """
+ dest[:] = self.get_value_ptr(var_name)
+
+ if self.verbose > 1:
+ print("self.verbose", self.verbose)
+ print("get value dest", dest)
+
+ return dest
+
+ #-------------------------------------------------------------------
+ def get_value_ptr(self, var_name: str) -> np.ndarray:
+ """
+ Get reference to values.
+
+ Get the backing reference - i.e., the backing numpy array - for the given variable.
+
+ Parameters
+ ----------
+ var_name : str
+ Name of variable as CSDMS Standard Name.
+ Returns
+ -------
+ np.ndarray
+ Value array.
+ """
+ # We actually need this function to return the backing array, so bypass override of __getattribute__ (that
+ # extracts scalar) and use the base implementation
+ return super(bmi_LSTM, self).__getattribute__(var_name)
+
+ #-------------------------------------------------------------------
+ #-------------------------------------------------------------------
+ # BMI: Variable Information Functions
+ #-------------------------------------------------------------------
+ #-------------------------------------------------------------------
+ def get_var_name(self, long_var_name):
+
+ return self._var_name_map_long_first[ long_var_name ]
+
+ #-------------------------------------------------------------------
+ def get_var_units(self, long_var_name):
+
+ return self._var_units_map[ long_var_name ]
+
+ #-------------------------------------------------------------------
+ def get_var_type(self, long_var_name):
+ """Data type of variable.
+
+ Parameters
+ ----------
+ var_name : str
+ Name of variable as CSDMS Standard Name.
+
+ Returns
+ -------
+ str
+ Data type.
+ """
+
+ #JG MW 03.01.23 - otherwise Bmi_py_Adaptor.hpp `get_analogous_cxx_type` fails
+ return self.get_value_ptr(long_var_name).dtype.name
+ #------------------------------------------------------------
+ def get_var_grid(self, name):
+
+ # Note: all vars have grid 0 but check if its in names list first
+ if name in (self._output_var_names + self._input_var_names):
+ return self._var_grid_id
+
+ #------------------------------------------------------------
+ def get_var_itemsize(self, name):
+ # JG get_value_ptr is already an np.array
+ return self.get_value_ptr(name).itemsize
+
+ #------------------------------------------------------------
+ def get_var_location(self, name):
+
+ # Note: all vars have location node but check if its in names list first
+ if name in (self._output_var_names + self._input_var_names):
+ return self._var_loc
+
+ #-------------------------------------------------------------------
+ # JG Note: what is this used for?
+ def get_var_rank(self, long_var_name):
+
+ return np.int16(0)
+
+ #-------------------------------------------------------------------
+ def get_start_time( self ):
+
+ return self._start_time
+
+ #-------------------------------------------------------------------
+ def get_end_time( self ):
+
+ return self._end_time
+
+
+ #-------------------------------------------------------------------
+ def get_current_time( self ):
+
+ return self.t
+
+ #-------------------------------------------------------------------
+ def get_time_step( self ):
+
+ return self._time_step_size
+
+ #-------------------------------------------------------------------
+ def get_time_units( self ):
+
+ # Note: get_attribute() is not a BMI v2 method
+ return self._time_units
+
+ #-------------------------------------------------------------------
+ def set_value(self, var_name: str, values:np.ndarray):
+ """Set model values.
+
+ Parameters
+ ----------
+ var_name : str
+ Name of variable as CSDMS Standard Name.
+ src : array_like
+ Array of new values.
+ """
+
+ internal_array = self.get_value_ptr(var_name)
+ internal_array[:] = values
+
+ short_name = self._var_name_map_long_first[ var_name ]
+
+ if (internal_array.ndim > 0):
+ setattr( self, short_name, internal_array[0])
+ else:
+ setattr( self, short_name, internal_array )
+
+ try:
+ #NJF From NGEN, `internal_array` is a singleton array
+ setattr( self, var_name, internal_array[0] )
+
+ # jmframe: this next line is basically a duplicate.
+ # I guess we should stick with the attribute names instead of a dictionary approach.
+ self._values[var_name] = internal_array[0]
+ # JLG 03242022: this isn't really an "error" block as standalone considers value as scalar?
+ except TypeError:
+ setattr( self, var_name, internal_array )
+
+ # jmframe: this next line is basically a duplicate.
+ # I guess we should stick with the attribute names instead of a dictionary approach.
+ self._values[var_name] = internal_array
+
+ #------------------------------------------------------------
+ def set_value_at_indices(self, var_name: str, inds: np.ndarray, src: np.ndarray):
+ """
+ Set model values at particular indices.
+
+ Parameters
+ ----------
+ var_name : str
+ Name of variable as CSDMS Standard Name.
+ inds : np.ndarray
+ Array of corresponding indices into which to copy the values within ``src``.
+ src : np.ndarray
+ Array of new values.
+ """
+ internal_array = self.get_value_ptr(var_name)
+ for i in range(inds.shape[0]):
+ internal_array[inds[i]] = src[i]
+
+ #------------------------------------------------------------
+ def get_var_nbytes(self, var_name):
+ """Get units of variable.
+ Parameters
+ ----------
+ var_name : str
+ Name of variable as CSDMS Standard Name.
+ Returns
+ -------
+ int
+ Size of data array in bytes.
+ """
+ return self.get_var_itemsize(var_name)*len(self.get_value_ptr(var_name))
+
+ #------------------------------------------------------------
+ def get_value_at_indices(self, var_name: str, dest:np.ndarray, indices:np.ndarray) -> np.ndarray:
+ """Get values at particular indices.
+ Parameters
+ ----------
+ var_name : str
+ Name of variable as CSDMS Standard Name.
+ dest : ndarray
+ A numpy array into which to place the values.
+ indices : array_like
+ Array of indices.
+ Returns
+ -------
+ array_like
+ Values at indices.
+ """
+ #NJF This must copy into dest!!!
+ #Convert to np.array in case of singleton/non numpy type, then flatten
+ original: np.ndarray = self.get_value_ptr(var_name)
+ for i in range(indices.shape[0]):
+ value_index = indices[i]
+ dest[i] = original[value_index]
+ return dest
+
+ # Note: remaining grid funcs do not apply for type 'scalar'
+ # Yet all functions in the BMI must be implemented
+ # See https://bmi.readthedocs.io/en/latest/bmi.best_practices.html
+ #------------------------------------------------------------
+ def get_grid_edge_count(self, grid):
+ raise NotImplementedError("get_grid_edge_count")
+
+ #------------------------------------------------------------
+ def get_grid_edge_nodes(self, grid, edge_nodes):
+ raise NotImplementedError("get_grid_edge_nodes")
+
+ #------------------------------------------------------------
+ def get_grid_face_count(self, grid):
+ raise NotImplementedError("get_grid_face_count")
+
+ #------------------------------------------------------------
+ def get_grid_face_edges(self, grid, face_edges):
+ raise NotImplementedError("get_grid_face_edges")
+
+ #------------------------------------------------------------
+ def get_grid_face_nodes(self, grid, face_nodes):
+ raise NotImplementedError("get_grid_face_nodes")
+
+ #------------------------------------------------------------
+ def get_grid_node_count(self, grid):
+ raise NotImplementedError("get_grid_node_count")
+
+ #------------------------------------------------------------
+ def get_grid_nodes_per_face(self, grid, nodes_per_face):
+ raise NotImplementedError("get_grid_nodes_per_face")
+
+ #------------------------------------------------------------
+ def get_grid_origin(self, grid_id, origin):
+ raise NotImplementedError("get_grid_origin")
+
+ #------------------------------------------------------------
+ def get_grid_rank(self, grid_id):
+
+ # 0 is the only id we have
+ if grid_id == 0:
+ return 1
+
+ #------------------------------------------------------------
+ def get_grid_shape(self, grid_id, shape):
+ raise NotImplementedError("get_grid_shape")
+
+ #------------------------------------------------------------
+ def get_grid_size(self, grid_id):
+
+ # 0 is the only id we have
+ if grid_id == 0:
+ return 1
+
+ #------------------------------------------------------------
+ def get_grid_spacing(self, grid_id, spacing):
+ raise NotImplementedError("get_grid_spacing")
+
+ #------------------------------------------------------------
+ def get_grid_type(self, grid_id=0):
+
+ # 0 is the only id we have
+ if grid_id == 0:
+ return 'scalar'
+
+ #------------------------------------------------------------
+ def get_grid_x(self):
+ raise NotImplementedError("get_grid_x")
+
+ #------------------------------------------------------------
+ def get_grid_y(self):
+ raise NotImplementedError("get_grid_y")
+
+ #------------------------------------------------------------
+ def get_grid_z(self):
+ raise NotImplementedError("get_grid_z")
+
+
+ #------------------------------------------------------------
+ #------------------------------------------------------------
+ #-- Random utility functions
+ #------------------------------------------------------------
+ #------------------------------------------------------------
+
+ def _parse_config(self, cfg):
+ for key, val in cfg.items():
+ # Handle 'train_cfg_file' specifically to ensure it is always a list
+ if key == 'train_cfg_file':
+ if val is not None and val != "None":
+ if isinstance(val, list):
+ cfg[key] = [Path(element) if USE_PATH else element for element in val]
+ else:
+ cfg[key] = [Path(val)] if USE_PATH else [val]
+ else:
+ cfg[key] = []
+
+ # Convert all path strings to PosixPath objects for other keys
+ elif any([key.endswith(x) for x in ['_dir', '_path', '_file', '_files']]):
+ if val is not None and val != "None":
+ if isinstance(val, list):
+ temp_list = []
+ for element in val:
+ if USE_PATH:
+ temp_list.append(Path(element))
+ else:
+ temp_list.append(element) # (SDP)
+ cfg[key] = temp_list
+ else:
+ if USE_PATH:
+ cfg[key] = Path(val)
+ else:
+ cfg[key] = val # (SDP)
+ else:
+ cfg[key] = None
+
+ # Convert Dates to pandas Datetime indexs
+ elif key.endswith('_date'):
+ if isinstance(val, list):
+ temp_list = []
+ for elem in val:
+ temp_list.append(pd.to_datetime(elem, format='%d/%m/%Y'))
+ cfg[key] = temp_list
+ else:
+ cfg[key] = pd.to_datetime(val, format='%d/%m/%Y')
+
+ else:
+ pass
+
+ # Add more config parsing if necessary
+ return cfg
+
+
+
+def coerce_config(cfg: dict[str, typing.Any]):
+ for key, val in cfg.items():
+ # Handle 'train_cfg_file' specifically to ensure it is always a list
+ if key == "train_cfg_file":
+ if val is not None and val != "None":
+ if isinstance(val, list):
+ cfg[key] = [Path(element) for element in val]
+ else:
+ cfg[key] = [Path(val)]
+ else:
+ cfg[key] = []
+
+ # Convert all path strings to PosixPath objects for other keys
+ elif any([key.endswith(x) for x in ["_dir", "_path", "_file", "_files"]]):
+ if val is not None and val != "None":
+ if isinstance(val, list):
+ temp_list = []
+ for element in val:
+ temp_list.append(Path(element))
+ cfg[key] = temp_list
+ else:
+ cfg[key] = Path(val)
+ else:
+ cfg[key] = None
+
+ # Convert Dates to pandas Datetime indexs
+ elif key.endswith("_date"):
+ if isinstance(val, list):
+ temp_list = []
+ for elem in val:
+ temp_list.append(pd.to_datetime(elem, format="%d/%m/%Y"))
+ cfg[key] = temp_list
+ else:
+ cfg[key] = pd.to_datetime(val, format="%d/%m/%Y")
+
+
+
+
diff --git a/src/dHBV_2_0/hbv_2_0.py b/src/dHBV_2_0/hbv_2_0.py
index d8e3d28..54fb267 100644
--- a/src/dHBV_2_0/hbv_2_0.py
+++ b/src/dHBV_2_0/hbv_2_0.py
@@ -1,7 +1,6 @@
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Optional, Union
import torch
-
from hydroDL2.core.calc import change_param_range
from hydroDL2.core.calc.uh_routing import UH_conv, UH_gamma
@@ -35,7 +34,7 @@ class HBVUnitBasin(torch.nn.Module):
"""
def __init__(
self,
- config: Optional[Dict[str, Any]] = None,
+ config: Optional[dict[str, Any]] = None,
device: Optional[torch.device] = None
) -> None:
super().__init__()
@@ -96,12 +95,12 @@ def __init__(
def set_parameters(self) -> None:
"""Get physical parameters."""
self.phy_param_names = self.parameter_bounds.keys()
- if self.routing == True:
+ if self.routing:
self.routing_param_names = self.routing_parameter_bounds.keys()
else:
self.routing_param_names = []
- self.learnable_param_count1 = len(self.dynamic_params) * self.nmul
+ self.learnable_param_count1 = len(self.dynamic_params) * self.nmul
self.learnable_param_count2 = (len(self.phy_param_names) - len(self.dynamic_params)) * self.nmul \
+ len(self.routing_param_names)
self.learnable_param_count = self.learnable_param_count1 + self.learnable_param_count2
@@ -109,7 +108,7 @@ def set_parameters(self) -> None:
def unpack_parameters(
self,
parameters: torch.Tensor,
- ) -> Dict[str, torch.Tensor]:
+ ) -> dict[str, torch.Tensor]:
"""Extract physical model and routing parameters from NN output.
Parameters
@@ -144,7 +143,7 @@ def unpack_parameters(
# Routing parameters
routing_params = None
- if self.routing == True:
+ if self.routing:
routing_params = parameters[1][:, dif_count * self.nmul:]
return phy_dy_params, phy_static_params, routing_params
@@ -179,7 +178,7 @@ def descale_phy_dy_parameters(
staPar = phy_dy_params[-1, :, i,:].unsqueeze(0).repeat([n_steps, 1, 1])
dynPar = phy_dy_params[:, :, i,:]
- drmask = torch.bernoulli(pmat).detach_().cuda()
+ drmask = torch.bernoulli(pmat).detach_().cuda()
comPar = dynPar * (1 - drmask) + staPar * drmask
param_dict[name] = change_param_range(
param=comPar,
@@ -242,9 +241,9 @@ def descale_rout_parameters(
def forward(
self,
- x_dict: Dict[str, torch.Tensor],
+ x_dict: dict[str, torch.Tensor],
parameters: torch.Tensor
- ) -> Union[Tuple, Dict[str, torch.Tensor]]:
+ ) -> Union[tuple, dict[str, torch.Tensor]]:
"""Forward pass for HBV1.1p.
Parameters
@@ -302,7 +301,7 @@ def forward(
)
# Run the model for the remainder of simulation period.
- return self.PBM(
+ return self.PBM(
x,
Ac,
Elevation,
@@ -314,12 +313,12 @@ def forward(
def PBM(
self,
forcing: torch.Tensor,
- Ac:torch.Tensor,
- Elevation:torch.Tensor,
- states: Tuple,
- phy_dy_params_dict: Dict,
- phy_static_params_dict: Dict
- ) -> Union[Tuple, Dict[str, torch.Tensor]]:
+ Ac: torch.Tensor,
+ Elevation: torch.Tensor,
+ states: tuple,
+ phy_dy_params_dict: dict,
+ phy_static_params_dict: dict
+ ) -> Union[tuple, dict[str, torch.Tensor]]:
"""Run the HBV1.1p model forward.
Parameters
@@ -459,7 +458,7 @@ def PBM(
tosoil_sim[t, :, :] = tosoil
PERC_sim[t, :, :] = PERC
- # Get the overall average
+ # Get the overall average
# or weighted average using learned weights.
if self.muwts is None:
Qsimavg = Qsimmu.mean(-1)
@@ -493,7 +492,7 @@ def PBM(
rf_Q2 = Q2_sim.mean(-1, keepdim=True).permute([1, 2, 0])
Q2_rout = UH_conv(rf_Q2, UH).permute([2, 0, 1])
- if self.comprout:
+ if self.comprout:
# Qs is now shape [time, [gages*num models], vars]
Qstemp = Qsrout.view(n_steps, n_grid, self.nmul)
if self.muwts is None:
diff --git a/src/dHBV_2_0/run_bmi_aorc.py b/src/dHBV_2_0/run_bmi_aorc.py
index 567657e..c9211b5 100644
--- a/src/dHBV_2_0/run_bmi_aorc.py
+++ b/src/dHBV_2_0/run_bmi_aorc.py
@@ -1,105 +1,83 @@
-## TODO: needs to be updated to latest dHBV 2.0 dMG implementation.
-
import numpy as np
-from pathlib import Path
-import bmi_dm # Load module bmi_dm (bmi_dm.py) from dhbv_2_0 package.
-import os, os.path
-lstm_dir = os.path.expanduser('../dhbv_2_0/')
-os.chdir( lstm_dir )
-import pandas as pd
+from dHBV_2_0.src.dHBV_2_0.bmi import DeltaModelBmi as Bmi
-basin_id = "cat-88306"
+### Select a basin from the sample data ###
+basin_id = "cat-88306"
+bmi_config_path = f'C:/Users/LeoLo/Desktop/noaa_owp/dHBV_2_0/bmi_config_files/bmi_config_{basin_id}_5yr.yaml'
+### ----------------------------------- ###
-# Load the USGS data
-# REPLACE THIS PATH WITH YOUR LOCAL FILE PATH:
-file_path = f"/Users/jmframe/CAMELS_data_sample/hourly/usgs-streamflow/{basin_id}-usgs-hourly.csv"
-df_runoff = pd.read_csv(file_path)
-df_runoff = df_runoff.set_index("date")
-df_runoff.index = pd.to_datetime(df_runoff.index)
-df_runoff = df_runoff[["QObs(mm/h)"]].rename(columns={"QObs(mm/h)": "usgs_obs"})
-df_runoff["model_pred"] = None
+# Load the USGS data
# REPLACE THIS PATH WITH YOUR LOCAL FILE PATH:
-forcing_file_path = f"/Users/jmframe/CAMELS_data_sample/hourly/aorc_hourly/{basin_id}_1980_to_2024_agg_rounded.csv"
-df_forcing = pd.read_csv(forcing_file_path)
-df_forcing = df_forcing.set_index("time")
-df_forcing.index = pd.to_datetime(df_forcing.index)
-df_forcing = df_forcing[df_runoff.index[0]:df_runoff.index[-1]]
-
-# Create an instance of the LSTM model with BMI
-model_instance = bmi_lstm.bmi_LSTM()
-
-# Initialize the model with a configuration file
-model_instance.initialize(bmi_cfg_file=Path(f'../bmi_config_files/{basin_id}_nh_AORC_hourly_ensemble.yml'))
-
-# Add ensemble columns to the runoff DataFrame
-for i_ens in range(model_instance.N_ENS):
- df_runoff[f"ensemble_{i_ens+1}"] = None # Initialize ensemble columns with None
-
-
-# Iterate through the forcing DataFrame and calculate model predictions
-print('Working, please wait...')
-for i, (idx, row) in enumerate(df_forcing.iterrows()):
- # Extract forcing data for the current timestep
- precip = row["APCP_surface"]
- temp = row["TMP_2maboveground"]
- dlwrf = row["DLWRF_surface"]
- dswrf = row["DSWRF_surface"]
- pres = row["PRES_surface"]
- spfh = row["SPFH_2maboveground"]
- ugrd = row["UGRD_10maboveground"]
- vgrd = row["VGRD_10maboveground"]
-
- # Check if any of the inputs are NaN
- if np.isnan([precip, temp, dlwrf, dswrf, pres, spfh, ugrd, vgrd]).any():
- if model_instance.verbose > 0:
- print(f"Skipping timestep {idx} due to NaN values in inputs.")
+forc_path = f'C:/Users/LeoLo/Desktop/noaa_owp/dHBV_2_0/data/aorc/juniata_river_basin/forcings_5yr_{basin_id}.npy'
+attr_path = f'C:/Users/LeoLo/Desktop/noaa_owp/dHBV_2_0/data/aorc/juniata_river_basin/attributes_5yr_{basin_id}.npy'
+# obs_path = f'/Users/LeoLo/Desktop/noaa_owp/dHBV_2_0/data/aorc/juniata_river_basin/obs_5yr_{basin_id}.npy'
+
+forc = np.load(forc_path)
+attr = np.load(attr_path)
+# obs = np.load(obs_path)
+
+# Create an instance of the dHBV 2.0 through BMI
+model = Bmi(config_path=bmi_config_path)
+
+streamflow_pred = np.zeros(forc.shape[0])
+nan_idx = []
+
+# 1) Compile forcing data within BMI to do batch run.
+for i in range(0, forc.shape[0]):
+ # Extract forcing/attribute data for the current time step
+ prcp = forc[i, :, 0]
+ temp = forc[i, :, 1]
+ pet = forc[i, :, 2]
+
+ ## Check if any of the inputs are NaN
+ if np.isnan([prcp, temp, pet]).any():
+ # if model.verbose > 0:
+ print(f"Skipping timestep {i} due to NaN values in inputs.")
+ nan_idx.append(i)
continue
- # Set the model forcings
- model_instance.set_value('atmosphere_water__liquid_equivalent_precipitation_rate', precip)
- model_instance.set_value('land_surface_air__temperature', temp)
- model_instance.set_value('land_surface_radiation~incoming~longwave__energy_flux', dlwrf)
- model_instance.set_value('land_surface_radiation~incoming~shortwave__energy_flux', dswrf)
- model_instance.set_value('land_surface_air__pressure', pres)
- model_instance.set_value('atmosphere_air_water~vapor__relative_saturation', spfh)
- model_instance.set_value('land_surface_wind__x_component_of_velocity', ugrd)
- model_instance.set_value('land_surface_wind__y_component_of_velocity', vgrd)
-
- # Update the model
- model_instance.update()
-
- # Retrieve and scale the runoff output
- dest_array = np.zeros(1)
- model_instance.get_value('land_surface_water__runoff_depth', dest_array)
- land_surface_water__runoff_depth = dest_array[0] * 1000 # Convert to mm/hr
+ model.set_value('atmosphere_water__liquid_equivalent_precipitation_rate', prcp)
+ model.set_value('land_surface_air__temperature', temp)
+ model.set_value('land_surface_water__potential_evaporation_volume_flux', pet)
- # Add ensemble member values to the DataFrame
- for i_ens in range(model_instance.N_ENS):
- df_runoff.loc[idx, f"ensemble_{i_ens+1}"] = model_instance.surface_runoff_mm[i_ens] # Add individual ensemble member values
+### BMI initialization ###
+model.initialize()
- # Add the output to the DataFrame
- df_runoff.loc[idx, "model_pred"] = land_surface_water__runoff_depth
-
- if i > 10000:
- break
-
-
-# Ensure "model_pred" is numeric
-df_runoff["model_pred"] = pd.to_numeric(df_runoff["model_pred"], errors="coerce")
-
-# Calculate NSE for the model predictions
-obs = df_runoff["usgs_obs"].dropna()
-sim = df_runoff["model_pred"].dropna()
+# 2) DO pseudo model forward and return pre-predicted values at each timestep
+for i in range(0, forc.shape[0]):
+ if i in nan_idx:
+ # Skip the update for this timestep
+ continue
-# Align indices of observation and simulation for metric calculation
-common_index = obs.index.intersection(sim.index)
-obs = obs.loc[common_index].values
-sim = sim.loc[common_index].values
+ ### BMI update ###
+ model.update()
-denominator = ((obs - obs.mean()) ** 2).sum()
-numerator = ((sim - obs) ** 2).sum()
-nse = 1 - numerator / denominator
-print(f"NSE: {nse:.2f}")
\ No newline at end of file
+ # Retrieve and scale the runoff output
+ dest_array = np.zeros(1)
+ model.get_value('land_surface_water__runoff_volume_flux', dest_array)
+
+ streamflow_pred[i] = dest_array[0] # Convert to mm/day -> mm/hr
+
+ ### BMI finalization ###
+model.finalize()
+
+print("\n=/= -- Streamflow prediction completed -- =/=")
+print(f" Basin ID: {basin_id}")
+print(f" Total Process Time: {model.bmi_process_time:.4f} seconds")
+print(f" Mean streamflow: {streamflow_pred.mean():.4f} mm/day")
+print(f" Max streamflow: {streamflow_pred.max():.4f} mm/day")
+print(f" Min streamflow: {streamflow_pred.min():.4f} mm/day")
+print("=/= ------------------------------------ =/=")
+
+
+# # Calculate NSE for the model predictions
+# obs = obs.dropna()
+# sim = streamflow_pred.dropna()
+
+# denom = ((obs - obs.mean()) ** 2).sum()
+# num = ((sim - obs) ** 2).sum()
+# nse = 1 - num / denom
+# print(f"NSE: {nse:.2f}")
diff --git a/src/dHBV_2_0/run _bmi_unit_test.py b/src/dHBV_2_0/run_bmi_unit_test.py
similarity index 99%
rename from src/dHBV_2_0/run _bmi_unit_test.py
rename to src/dHBV_2_0/run_bmi_unit_test.py
index 9b4e5b3..515c609 100644
--- a/src/dHBV_2_0/run _bmi_unit_test.py
+++ b/src/dHBV_2_0/run_bmi_unit_test.py
@@ -6,11 +6,11 @@
"""
import os
import sys
-import numpy as np
from pathlib import Path
-from pathlib import Path
-import bmi_dm
+import numpy as np
+
+import dHBV_2_0.src.dHBV_2_0.bmi as bmi
# setup a "success counter" for number of passing and failing bmi functions
# keep track of function def fails (vs function call)
@@ -46,7 +46,7 @@ def bmi_except(fstring):
print("No configuration file found, exiting...")
sys.exit()
-bmi=bmi_dm.BmiDm(cfg_file)
+bmi=bmi.BmiDm(cfg_file)
#-------------------------------------------------------------------
# initialize()
@@ -385,11 +385,11 @@ def bmi_except(fstring):
bmi.finalize()
print (" finalizing...")
pass_count += 1
-except:
+except :
bmi_except('finalize()')
# lastly - print test summary
print ("\n Total BMI function PASS: " + str(pass_count))
print (" Total BMI function FAIL: " + str(fail_count))
for ff in fail_list:
- print (" " + ff)
+ print (" " + ff)