diff --git a/.gitignore b/.gitignore index efdd953..9410386 100644 --- a/.gitignore +++ b/.gitignore @@ -171,4 +171,12 @@ cython_debug/ .pypirc # Ignore model save files (too large) -*.pt \ No newline at end of file +*.pt + +# Scratch / development / old / and other temporary files +*scratch* +*_dev* +*_old* +temp/ +tmp/ +*.tmp \ No newline at end of file diff --git a/bmi_config_files/bmi_config.yaml b/bmi_config_files/bmi_config.yaml index 8c66286..3390cd7 100644 --- a/bmi_config_files/bmi_config.yaml +++ b/bmi_config_files/bmi_config.yaml @@ -1,5 +1,5 @@ ### Full-catchment configuration ### -train_cfg_file: ../trained_models/merit_conus_40yr/config.yaml +train_cfg_file: models/hydrofabric_15yr/config.yaml initial_state: zero # zero: set initial states of the model to zero verbose: 1 # 0: no output, 1: print output diff --git a/bmi_config_files/bmi_config_cat_88306_5yr.yaml b/bmi_config_files/bmi_config_cat-88306_5yr.yaml similarity index 82% rename from bmi_config_files/bmi_config_cat_88306_5yr.yaml rename to bmi_config_files/bmi_config_cat-88306_5yr.yaml index 32c7267..01e2f19 100644 --- a/bmi_config_files/bmi_config_cat_88306_5yr.yaml +++ b/bmi_config_files/bmi_config_cat-88306_5yr.yaml @@ -3,37 +3,39 @@ catchment_id: 'cat-88306' catchment_name: 'JRB-88306' # Static catchment attributes +aridity: 1.041 +meanP: 991.3 +ETPOT_Hargr: 1031.0 +NDVI: 0.594 FW: 0.004622 +meanslope: 10.55 +SoilGrids1km_sand: 38.65 +SoilGrids1km_clay: 23.31 +SoilGrids1km_silt: 38.06 +glaciers: 0.0 HWSD_clay: 25.0 +HWSD_gravel: 15.0 HWSD_sand: 33.0 -T_clay: 20.0 -uparea: 3.254 -T_gravel: 10.0 +HWSD_silt: 42.0 meanelevation: 335.1 -meanP: 991.3 -HWSD_gravel: 15.0 -seasonality_P: 0.1035 -T_sand: 41.0 -SoilGrids1km_silt: 38.06 +meanTa: 9.845 permafrost: 0.0 -snowfall_fraction: 0.1049 -SoilGrids1km_sand: 38.65 -Porosity: 0.01 -T_silt: 39.0 -glaciers: 0.0 -HWSD_silt: 42.0 -meanslope: 10.55 permeability: -15.05 +seasonality_P: 0.1035 seasonality_PET: 0.5703 -ETPOT_Hargr: 1031.0 -meanTa: 9.845 -SoilGrids1km_clay: 23.31 snow_fraction: 0.02967 -aridity: 1.041 -NDVI: 0.594 +snowfall_fraction: 0.1049 +T_clay: 20.0 +T_gravel: 10.0 +T_sand: 41.0 +T_silt: 39.0 +Porosity: 0.01 +uparea: 3.254 -train_cfg_file: ../trained_models/merit_conus_40yr/config.yaml +config_model: models/hydrofabric_15yr/config.yaml +stepwise: False # True: stepwise inference, False: Single forward simulation on all data in one go initial_state: zero # zero: set initial states of the model to zero +dtype: np.float32 verbose: 1 # 0: no output, 1: print output # Simulation window diff --git a/environment.yaml b/envs/environment.yaml similarity index 100% rename from environment.yaml rename to envs/environment.yaml diff --git a/example/bmi_demo.ipynb b/example/bmi_demo.ipynb new file mode 100644 index 0000000..65b963b --- /dev/null +++ b/example/bmi_demo.ipynb @@ -0,0 +1,181 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Forward $\\delta$ HBV 2.0UH with BMI\n", + "\n", + "\n", + "Notes:\n", + "- This example uses a subset of AORC forcings that can be downloaded from S3 here.\n", + "\n", + "- An environment can be set up with either pip or conda using `./envs/requirements.txt` or `./envs/ngen_env.yaml`, respectively.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append('../../')\n", + " \n", + "# import numpy as np\n", + "\n", + "from dHBV_2_0.bmi import DeltaModelBmi\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'dHbv2Bmi' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[15], line 28\u001b[0m\n\u001b[0;32m 24\u001b[0m attr \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mload(attr_path)\n\u001b[0;32m 25\u001b[0m \u001b[38;5;66;03m# obs = np.load(obs_path)\u001b[39;00m\n\u001b[0;32m 26\u001b[0m \n\u001b[0;32m 27\u001b[0m \u001b[38;5;66;03m# Create an instance of the dHBV 2.0 through BMI\u001b[39;00m\n\u001b[1;32m---> 28\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[43mdHbv2Bmi\u001b[49m()\n\u001b[0;32m 30\u001b[0m \u001b[38;5;66;03m### BMI initialization ###\u001b[39;00m\n\u001b[0;32m 31\u001b[0m model\u001b[38;5;241m.\u001b[39minitialize(bmi_cfg_file\u001b[38;5;241m=\u001b[39mPath(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mC:/Users/LeoLo/Desktop/noaa_owp/dHBV_2_0/bmi_config_files/bmi_config_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mbasin_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.yaml\u001b[39m\u001b[38;5;124m'\u001b[39m))\n", + "\u001b[1;31mNameError\u001b[0m: name 'dHbv2Bmi' is not defined" + ] + } + ], + "source": [ + "from pathlib import Path\n", + "\n", + "\n", + "\n", + "# os.chdir(os.path.expanduser('../dHBV_2_0/'))\n", + "\n", + "\n", + "### Select a basin from the sample data ###\n", + "basin_id = \"cat-88306\"\n", + "### ----------------------------------- ###\n", + "\n", + "\n", + "# Load the USGS data\n", + "# REPLACE THIS PATH WITH YOUR LOCAL FILE PATH:\n", + "forc_path = f'C:/Users/LeoLo/Desktop/noaa_owp/dHBV_2_0/data/aorc/juniata_river_basin/forcings_5yr_{basin_id}.npy'\n", + "attr_path = f'C:/Users/LeoLo/Desktop/noaa_owp/dHBV_2_0/data/aorc/juniata_river_basin/attributes_5yr_{basin_id}.npy'\n", + "# obs_path = f'/Users/LeoLo/Desktop/noaa_owp/dHBV_2_0/data/aorc/juniata_river_basin/obs_5yr_{basin_id}.npy'\n", + "\n", + "forc = np.load(forc_path)\n", + "attr = np.load(attr_path)\n", + "# obs = np.load(obs_path)\n", + "\n", + "# Create an instance of the dHBV 2.0 through BMI\n", + "model = dHbv2Bmi()\n", + "\n", + "### BMI initialization ###\n", + "model.initialize(bmi_cfg_file=Path(f'C:/Users/LeoLo/Desktop/noaa_owp/dHBV_2_0/bmi_config_files/bmi_config_{basin_id}.yaml'))\n", + "\n", + "streamflow_pred = np.zeros(forc.shape[0])\n", + "\n", + "for i in range(0, forc.shape[0]):\n", + " # Extract forcing/attribute data for the current time step\n", + " prcp = forc[i, :0, 0]\n", + " temp = forc[i, :0, 1]\n", + " pet = forc[i, :0, 2]\n", + "\n", + " # # Check if any of the inputs are NaN\n", + " # if np.isnan([prcp, temp, pet]).any():\n", + " # if model.verbose > 0:\n", + " # print(f\"Skipping timestep {i} due to NaN values in inputs.\")\n", + " # continue\n", + "\n", + " model.set_value('atmosphere_water__liquid_equivalent_precipitation_rate', prcp)\n", + " model.set_value('land_surface_air__temperature', temp)\n", + " model.set_value('land_surface_water__potential_evaporation_volume_flux', pet)\n", + "\n", + " ### BMI update ###\n", + " model.update()\n", + "\n", + " # Retrieve and scale the runoff output\n", + " dest_array = np.zeros(1)\n", + " model.get_value('land_surface_water__runoff_volume_flux', dest_array)\n", + " \n", + " # streamflow_pred[i] = dest_array[0] * 1000 # Convert to mm/hr\n", + " streamflow_pred[i] = dest_array[0]\n", + "\n", + "\n", + "# Calculate NSE for the model predictions\n", + "obs = obs.dropna()\n", + "sim = streamflow_pred.dropna()\n", + "\n", + "denom = ((obs - obs.mean()) ** 2).sum()\n", + "num = ((sim - obs) ** 2).sum()\n", + "nse = 1 - num / denom\n", + "print(f\"NSE: {nse:.2f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import dMG\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "initialize_config() missing 1 required positional argument: 'config'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[6], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mdMG\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mutils\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minitialize_config\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[1;31mTypeError\u001b[0m: initialize_config() missing 1 required positional argument: 'config'" + ] + } + ], + "source": [ + "dMG.utils.initialize_config()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/example/data_processing.ipynb b/example/data_processing.ipynb index 2b98c34..d8b5a88 100644 --- a/example/data_processing.ipynb +++ b/example/data_processing.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 45, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -33,7 +33,7 @@ "\n", "gdf = gpd.read_file(r\"C:\\Users\\LeoLo\\Desktop\\jrb\\jrb_2.gpkg\", layer=\"flowpaths\")\n", "nexus = gpd.read_file(r\"C:\\Users\\LeoLo\\Desktop\\jrb\\jrb_2.gpkg\", layer=\"nexus\")\n", - "# Many more layers 'flowpaths', 'divides', 'lakes', 'nexus', 'pois', 'hydrolocations', 'flowpath-attributes', \n", + "# Many more layers 'flowpaths', 'divides', 'lakes', 'nexus', 'pois', 'hydrolocations', 'flowpath-attributes',\n", "# 'flowpath-attributes-ml', 'network', 'divide-attributes'\n", "\n", "# print(gdf.head())\n", @@ -845,7 +845,7 @@ }, { "cell_type": "code", - "execution_count": 72, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -915,6 +915,441 @@ "print(f\"\\n --------\\nForcing data has {len(duplicates)} duplicate divide_id values.\")" ] }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 225MB\n",
+       "Dimensions:            (divide_id: 839543)\n",
+       "Coordinates:\n",
+       "  * divide_id          (divide_id) <U11 37MB 'cat-1068193' ... 'cat-3014411'\n",
+       "Data variables: (12/28)\n",
+       "    FW                 (divide_id) float64 7MB ...\n",
+       "    HWSD_clay          (divide_id) float64 7MB ...\n",
+       "    HWSD_sand          (divide_id) float64 7MB ...\n",
+       "    T_clay             (divide_id) float64 7MB ...\n",
+       "    uparea             (divide_id) float64 7MB ...\n",
+       "    T_gravel           (divide_id) float64 7MB ...\n",
+       "    ...                 ...\n",
+       "    ETPOT_Hargr        (divide_id) float64 7MB ...\n",
+       "    meanTa             (divide_id) float64 7MB ...\n",
+       "    SoilGrids1km_clay  (divide_id) float64 7MB ...\n",
+       "    snow_fraction      (divide_id) float64 7MB ...\n",
+       "    aridity            (divide_id) float64 7MB ...\n",
+       "    NDVI               (divide_id) float64 7MB ...
" + ], + "text/plain": [ + " Size: 225MB\n", + "Dimensions: (divide_id: 839543)\n", + "Coordinates:\n", + " * divide_id (divide_id) =42", "wheel"] +# build-backend = "setuptools.build_meta" [project] name = "dHBV_2_0" -description = "Hydrology models and modules manager" -requires-python = ">=3.9" +description = "NextGen Compatible dHBV 2.0 model with UH routing." +readme = "README.md" license = {file = "LICENSE"} authors = [ {name = "Leo Lonzarich"}, @@ -15,27 +17,67 @@ maintainers = [ {name = "Tadd Bindas", email = "taddbindas@gmail.com"}, {name = "Yalan Song", email = "songyalan1@gmail.com"}, ] +requires-python = ">=3.9" +dynamic = ["version"] dependencies = [ - "torch", - "numpy", - "pandas", + "hydra-core>=1.3.2", + "ipykernel>=6.29.5", + "matplotlib>=3.10.0", + "numpy>=1.22.4", + "omegaconf>=2.3.0", + "pandas>=1.4.3", + "pydantic>=2.0.0", + "scikit-learn>=1.0.2", + "scipy>=1.7.3", + "torch>=1.10.1", + "torchaudio>=0.10.1", + "torchvision>=0.11.2", + "tqdm>=4.67.1", + "uv>=0.6.6", + "zarr>=3.0.5", +] +classifiers = [ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", ] -dynamic = ["version"] # Add this line to indicate dynamic versioning +[project.urls] +Homepage = "https://mhpi.github.io/codes/frameworks/" +Repository = "https://github.com/mhpi/dHBV_2_0" [project.optional-dependencies] -test = [ - "pytest", - "pytest-cov", +hydrodl2 = [ + "hydroDL2 @ git+https://github.com/mhpi/hydroDL2.git@master" +] +dev = [ + "mypy>=0.940", + "pytest>=7.4.2", + "ruff>=0.9.10", ] [tool.hatch] version.source = "vcs" -build.hooks.vcs.version-file = "src/hydroDL2/_version.py" +build.hooks.vcs.version-file = "src/dHBV_2_0/_version.py" + +[tool.hatch.metadata] +allow-direct-references = true + +[tool.mypy] +python_version = "3.12" +strict = true +disallow_untyped_defs = false + +[tool.pytest.ini_options] +testpaths = ["tests"] +xfail_strict = true +addopts = [ + "--import-mode=importlib", # allow using test files with same name +] [tool.ruff] src = ["src"] -line-length = 120 +line-length = 88 lint.select = [ "F", # Errors detected by Pyflakes "E", # Error detected by Pycodestyle @@ -49,6 +91,12 @@ lint.select = [ "UP", # pyupgrade "RUF100", # Report unused noqa directives ] +exclude = [ + ".git", + ".vscode", + "archive", + "dev", +] lint.ignore = [ # line too long -> we accept long comment lines; black gets rid of long code lines "E501", @@ -71,10 +119,21 @@ lint.ignore = [ # First line should be in imperative mood; try rephrasing "D401", ## Disable one in each pair of mutually incompatible rules - # We don’t want a blank line before a class docstring + # We don't want a blank line before a class docstring "D203", # We want docstrings to start immediately after the opening triple quote "D213", + # Keep standard multi-package import format from isort + "I001", + # Allow blank lines in docstrings, between code + "W293", + # Ignore escape sequences used in strings for latex formating + "W605", + # Ignore extra line requirements at end of docstring and docstring summary + "D204", + "D205", + # Missing docstring in public method + "D102", ] [tool.ruff.lint.pydocstyle] diff --git a/setup.py b/setup.py index 216b8fb..b3ed071 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ #!/usr/bin/env python -from setuptools import setup, find_packages +from setuptools import find_packages, setup setup( name='dHBV_2_0', diff --git a/src/dHBV_2_0/__init__.py b/src/dHBV_2_0/__init__.py index ed3c4a6..6f86f29 100644 --- a/src/dHBV_2_0/__init__.py +++ b/src/dHBV_2_0/__init__.py @@ -1,6 +1,19 @@ -# This is not necessary and may cause confusion. -# If class name (bmi_LSTM) were to be exactly the same -# as module name (bmi_lstm), this would cause trouble -# because the module itself would no longer be accessible. (NOAA) +from __future__ import annotations -# from .bmi_dm import BmiDm \ No newline at end of file +import sys +import typing +from dataclasses import dataclass + +import numpy.typing as npt + +if sys.version_info < (3, 10): + import typing_extensions as typing +else: + import typing + +# `slots` feature added to of `dataclass` in 3.10 +# see: https://docs.python.org/3.12/library/dataclasses.html#dataclasses.dataclass +if sys.version_info < (3, 10): + dataclass_kwargs = {} +else: + dataclass_kwargs = {"slots": True} diff --git a/src/dHBV_2_0/__main__.py b/src/dHBV_2_0/__main__.py index 44dacc3..c652fb3 100644 --- a/src/dHBV_2_0/__main__.py +++ b/src/dHBV_2_0/__main__.py @@ -1,5 +1,4 @@ from dHBV_2_0.run_bmi_aorc import execute -# TODO: Add tests here later on -- NOAA if __name__ == '__main__': execute() diff --git a/src/dHBV_2_0/_version.py b/src/dHBV_2_0/_version.py new file mode 100644 index 0000000..cd7f15f --- /dev/null +++ b/src/dHBV_2_0/_version.py @@ -0,0 +1,21 @@ +# file generated by setuptools-scm +# don't change, don't track in version control + +__all__ = ["__version__", "__version_tuple__", "version", "version_tuple"] + +TYPE_CHECKING = False +if TYPE_CHECKING: + from typing import Tuple + from typing import Union + + VERSION_TUPLE = Tuple[Union[int, str], ...] +else: + VERSION_TUPLE = object + +version: str +__version__: str +__version_tuple__: VERSION_TUPLE +version_tuple: VERSION_TUPLE + +__version__ = version = '0.1.dev25+g5f43f62.d20250406' +__version_tuple__ = version_tuple = (0, 1, 'dev25', 'g5f43f62.d20250406') diff --git a/src/dHBV_2_0/bmi.py b/src/dHBV_2_0/bmi.py new file mode 100644 index 0000000..9519e17 --- /dev/null +++ b/src/dHBV_2_0/bmi.py @@ -0,0 +1,1064 @@ +"""BMI wrapper for interfacing dHBV 2.0 with NOAA-OWP NextGen framework. + +Author: Leo Lonzarich + +Motivated by LSTM BMI implementation of Austin Raney, Jonathan Frame. +""" +import logging +import os +import time +import json +from pathlib import Path +from typing import Optional, Union + +import numpy as np +import torch +import yaml +from bmipy import Bmi +from dMG import ModelHandler, import_data_sampler, utils +from numpy.typing import NDArray +from sklearn.exceptions import DataDimensionalityWarning + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger(__name__) + +script_dir = os.path.dirname(os.path.abspath(__file__)) + + + +# -------------------------------------------- # +# Dynamic input variables (CSDMS standard names) +# -------------------------------------------- # +_dynamic_input_vars = [ + ('atmosphere_water__liquid_equivalent_precipitation_rate', 'mm d-1'), + ('land_surface_air__temperature', 'degC'), + ('land_surface_water__potential_evaporation_volume_flux', 'mm d-1'), +] + +# ------------------------------------------- # +# Static input variables (CSDMS standard names) +# ------------------------------------------- # +_static_input_vars = [ + ('ratio__mean_potential_evapotranspiration__mean_precipitation', '-'), + ('atmosphere_water__daily_mean_of_liquid_equivalent_precipitation_rate', 'mm d-1'), + ('land_surface_water__Hargreaves_potential_evaporation_volume_flux', 'mm d-1'), + ('land_vegetation__normalized_diff_vegetation_index', '-'), + ('free_land_surface_water', 'mm d-1'), + ('basin__mean_of_slope', 'm km-1'), + ('soil_sand__grid', 'km2'), + ('soil_clay__grid', 'km2'), + ('soil_silt__grid', 'km2'), + ('land_surface_water__glacier_fraction', 'percent'), + ('soil_clay__attr', 'percent'), + ('soil_gravel__attr', 'percent'), + ('soil_sand__attr', 'percent'), + ('soil_silt__attr', 'percent'), + ('basin__mean_of_elevation', 'm'), + ('atmosphere_water__daily_mean_of_temperature', 'degC'), + ('land_surface_water__permafrost_fraction', '-'), + ('bedrock__permeability', 'm2'), + ('p_seasonality', '-'), + ('land_surface_water__potential_evaporation_volume_flux_seasonality', '-'), + ('land_surface_water__snow_fraction', 'percent'), + ('atmosphere_water__precipitation_falling_as_snow_fraction', 'percent'), + ('soil_clay__volume_fraction', 'percent'), + ('soil_gravel__volume_fraction', 'percent'), + ('soil_sand__volume_fraction', 'percent'), + ('soil_silt__volume_fraction', 'percent'), + ('soil_active-layer__porosity', '-'), + ('basin__area', 'km2'), +] + +# ------------------------------------- # +# Output variables (CSDMS standard names) +# ------------------------------------- # +_output_vars = [ + ('land_surface_water__runoff_volume_flux', 'm3 s-1'), +] + +# ---------------------------------------------- # +# Internal variable names <-> CSDMS standard names +# ---------------------------------------------- # +_var_name_internal_map = { + # ----------- Dynamic inputs ----------- + 'P': 'atmosphere_water__liquid_equivalent_precipitation_rate', + 'Temp': 'land_surface_air__temperature', + 'PET': 'land_surface_water__potential_evaporation_volume_flux', + # ----------- Static inputs ----------- + 'aridity': 'ratio__mean_potential_evapotranspiration__mean_precipitation', + 'meanP': 'atmosphere_water__daily_mean_of_liquid_equivalent_precipitation_rate', + 'ETPOT_Hargr': 'land_surface_water__Hargreaves_potential_evaporation_volume_flux', + 'NDVI': 'land_vegetation__normalized_diff_vegetation_index', + 'FW': 'free_land_surface_water', + 'meanslope': 'basin__mean_of_slope', + 'SoilGrids1km_sand': 'soil_sand__grid', + 'SoilGrids1km_clay': 'soil_clay__grid', + 'SoilGrids1km_silt': 'soil_silt__grid', + 'glaciers': 'land_surface_water__glacier_fraction', + 'HWSD_clay': 'soil_clay__attr', + 'HWSD_gravel': 'soil_gravel__attr', + 'HWSD_sand': 'soil_sand__attr', + 'HWSD_silt': 'soil_silt__attr', + 'meanelevation': 'basin__mean_of_elevation', + 'meanTa': 'atmosphere_water__daily_mean_of_temperature', + 'permafrost': 'land_surface_water__permafrost_fraction', + 'permeability': 'bedrock__permeability', + 'seasonality_P': 'p_seasonality', + 'seasonality_PET': 'land_surface_water__potential_evaporation_volume_flux_seasonality', + 'snow_fraction': 'land_surface_water__snow_fraction', + 'snowfall_fraction': 'atmosphere_water__precipitation_falling_as_snow_fraction', + 'T_clay': 'soil_clay__volume_fraction', + 'T_gravel': 'soil_gravel__volume_fraction', + 'T_sand': 'soil_sand__volume_fraction', + 'T_silt': 'soil_silt__volume_fraction', + 'Porosity': 'soil_active-layer__porosity', + 'uparea': 'basin__area', + # ----------- Outputs ----------- + 'flow_sim': 'land_surface_water__runoff_volume_flux', +} + +_var_name_external_map = {v: k for k, v in _var_name_internal_map.items()} + + +def map_to_external(name: str): + """Return the external name (exposed via BMI) for a given internal name.""" + return _var_name_internal_map[name] + + +def map_to_internal(name: str): + """Return the internal name for a given external name (exposed via BMI).""" + return _var_name_external_map[name] + + +def bmi_array(arr: list[float]) -> NDArray: + """Trivial wrapper function to ensure the expected numpy array datatype is used.""" + return np.array(arr, dtype="float64") + + + +#==============================================================================# +#==============================================================================# +#==============================================================================# + + +# MAIN BMI >>>> + + +#==============================================================================# +#==============================================================================# +#==============================================================================# + + + +class DeltaModelBmi(Bmi): + """ + dHBV 2.0UH BMI: NextGen-compatible, differentiable, physics-informed ML + model for hydrologic forecasting. (Song et al., 2024) + + Note: This dHBV 2.0UH BMI can only run forward inference. To train, + see dMG package (https://github.com/mhpi/generic_deltaModel). + """ + _att_map = { + 'model_name': 'dHVB 2.0UH for NextGen', + 'version': '2.0', + 'author_name': 'Leo Lonzarich', + 'time_step_size': 86400, + 'time_units': 'seconds', + # 'time_step_type': '', + # 'grid_type': 'scalar', + # 'step_method': '', + } + + def __init__( + self, + config_path: Optional[str] = None, + verbose=False, + ) -> None: + """Create a BMI dHBV 2.0UH model ready for initialization. + + Parameters + ---------- + config_path + Path to the BMI configuration file. + verbose + Enables debug print statements if True. + """ + super().__init__() + self._model = None + self._initialized = False + self.verbose = verbose + + self._var_loc = 'node' + self._var_grid_id = 0 + + self._start_time = 0.0 + self._end_time = np.finfo('d').max + self._time_units = 's' + self._timestep = 0 + + self.config_bmi = None + self.config_model = None + + # Timing BMI computations + t_start = time.time() + self.bmi_process_time = 0 + + # Read BMI and model configuration files. + if config_path is not None: + if not Path(config_path).is_file(): + raise FileNotFoundError(f"Configuration file not found: {config_path}") + with open(config_path) as f: + self.config_bmi = yaml.safe_load(f) + self.stepwise = self.config_bmi.get('stepwise', True) + + try: + model_config_path = os.path.join( + script_dir, '..', '..', self.config_bmi.get('config_model') + ) + with open(model_config_path) as f: + self.config_model = yaml.safe_load(f) + except Exception as e: + raise RuntimeError(f"Failed to load model configuration: {e}") from e + + # Initialize variables. + self._dynamic_var = self._set_vars(_dynamic_input_vars, bmi_array([])) + self._static_var = self._set_vars(_static_input_vars, bmi_array([])) + self._output_vars = self._set_vars(_output_vars, bmi_array([])) + + # Track total BMI runtime. + self.bmi_process_time += time.time() - t_start + if self.verbose: + log.info(f"BMI init took {time.time() - t_start} s") + + @staticmethod + def _set_vars( + vars: list[tuple[str, str]], + var_value: NDArray, + ) -> dict[str, dict[str, Union[NDArray, str]]]: + """Set the values of the given variables.""" + var_dict = {} + for item in vars: + var_dict[item[0]] = {'value': var_value.copy(), 'units': item[1]} + return var_dict + + def initialize(self, config_path: Optional[str] = None) -> None: + """(Control function) Initialize the BMI model. + + This BMI operates in two modes: + (Necessesitated by the fact that dhBV 2.0's internal NN must forward + on all data at once. <-- Forwarding on each timestep one-by-one with + saving/loading hidden states would slash LSTM performance. However, + feeding in hidden states day-by-day leeds to great efficiency losses + vs simply feeding all data at once due to carrying gradients at each + step.) + + 1) Feed all input dataBMI before + 'bmi.initialize()'. Then internal model is forwarded on all data + and generates predictions during '.initialize()'. + + 2) Run '.initialize()', then pass data day by day as normal during + 'bmi.update()'. If forwarding period is sufficiently small (say, + <100 days), then forwarding LSTM on individual days with saved + states is reasonable. + + To this end, a configuration file can be specified either during + `bmi.__init__()`, or during `.initialize()`. If running BMI as type (1), + config must be passed in the former, otherwise passed in the latter for (2). + + Parameters + ---------- + config_path + Path to the BMI configuration file. + """ + t_start = time.time() + + # Read BMI configuration file if provided. + if config_path is not None: + if not Path(config_path).is_file(): + raise FileNotFoundError(f"Configuration file not found: {config_path}") + with open(config_path) as f: + self.config_bmi = yaml.safe_load(f) + self.stepwise = self.config_bmi.get('stepwise', True) + + if self.config_bmi is None: + raise ValueError("No configuration file given. A config path" \ + "must be passed at time of bmi init() or" \ + "initialize() call.") + + # Load model configuration. + if self.config_model is None: + try: + model_config_path = os.path.join( + script_dir, '..', '..', self.config_bmi.get('config_model') + ) + with open(model_config_path) as f: + self.config_model = yaml.safe_load(f) + except Exception as e: + raise RuntimeError(f"Failed to load model configuration: {e}") from e + + self.config_model = utils.initialize_config(self.config_model) + self.config_model['model_path'] = os.path.join( + script_dir, '..', '..', self.config_model.get('trained_model') + ) + self.device = self.config_model['device'] + self.internal_dtype = self.config_model['dtype'] + self.external_dtype = eval(self.config_bmi['dtype']) + self.sampler = import_data_sampler(self.config_model['data_sampler'])(self.config_model) + + # Load static variables from BMI conf + for name in self._static_var.keys(): + ext_name = map_to_internal(name) + if ext_name in self.config_bmi.keys(): + self._static_var[name]['value'] = bmi_array(self.config_bmi[ext_name]) + else: + log.warning(f"Static variable '{name}' not in BMI config. Skipping.") + + # # Set simulation parameters. + self.current_time = self.config_bmi.get('start_time', 0.0) + # self._time_step_size = self.config_bmi.get('time_step_size', 86400) # Default to 1 day in seconds. + # self._end_time = self.config_bmi.get('end_time', np.finfo('d').max)\ + + # Load a trained model. + try: + self._model = self._load_trained_model(self.config_model).to(self.device) + self._initialized = True + except Exception as e: + raise RuntimeError(f"Failed to load trained model: {e}") from e + + # Forward simulation on all data in one go. + if not self.stepwise: + predictions = self._do_forward() + self._format_outputs(predictions) # Process and store predictions. + + # Track total BMI runtime. + self.bmi_process_time += time.time() - t_start + if self.verbose: + log.info(f"BMI Initialize took {time.time() - t_start} s | Total runtime: {self.bmi_process_time} s") + + def update(self) -> None: + """(Control function) Advance model state by one time step.""" + t_start = time.time() + # self.current_time += self._time_step_size + + # Forward model on individual timesteps if not initialized with forward_init. + if self.stepwise: + predictions = self._do_forward() + self._format_outputs(predictions) + + # Increment model time. + self._timestep += 1 + + # Track total BMI runtime. + self.bmi_process_time += time.time() - t_start + if self.verbose: + log.info(f"BMI Update took {time.time() - t_start} s | Total runtime: {self.bmi_process_time} s") + + def update_until(self, end_time: float) -> None: + """(Control function) Update model until a particular time. + + Note: Models should be trained standalone with dPLHydro_PMI first before + forward predictions with this BMI. + + Parameters + ---------- + end_time : float + Time to run model until. + """ + t_start = time.time() + + n_steps = (end_time - self.get_current_time()) / self.get_time_step() + + for _ in range(int(n_steps)): + self.update() + self.update_frac(n_steps - int(n_steps)) + + # Keep running total of BMI runtime. + self.bmi_process_time += time.time() - t_start + if self.verbose: + log.info(f"BMI Update Until took {time.time() - t_start} s | Total runtime: {self.bmi_process_time} s") + + def finalize(self) -> None: + """(Control function) Finalize model.""" + if self._model is not None: + del self._model + torch.cuda.empty_cache() + self._initialized = False + if self.verbose: + log.info("BMI model finalized.") + + + +#==============================================================================# +#==============================================================================# + + # Helper functions for BMI + +#==============================================================================# +#==============================================================================# + + + + def _do_forward(self): + """Forward model and save outputs to return on update call.""" + data_dict = self._format_inputs() + + n_samples = data_dict['xc_nn_norm'].shape[1] + batch_start = np.arange(0, n_samples, self.config_model['predict']['batch_size']) + batch_end = np.append(batch_start[1:], n_samples) + + batch_predictions = [] + # Forward through basins in batches. + with torch.no_grad(): + for i in range(len(batch_start)): + dataset_sample = self.sampler.get_validation_sample( + data_dict, + batch_start[i], + batch_end[i], + ) + + # Forward dPLHydro model + self.prediction = self._model.forward(dataset_sample, eval=True) + + # For single hydrology model. + model_name = self.config_model['dpl_model']['phy_model']['model'][0] + prediction = { + key: tensor.cpu().detach() for key, tensor in self.prediction[model_name].items() + } + batch_predictions.append(prediction) + + return self._batch_data(batch_predictions) + + # preds = torch.cat([d['flow_sim'] for d in batched_preds_list], dim=1) + # preds = preds.numpy() + + # # Scale and check output + # self.scale_output() + + @staticmethod + def _load_trained_model(config: dict): + """Load a pre-trained model based on the configuration.""" + model_path = config.get('model_path') + if not model_path: + raise ValueError("No model path specified in configuration.") + if not Path(model_path).exists(): + raise FileNotFoundError(f"Model file not found: {model_path}") + return ModelHandler(config, verbose=True) + + def update_frac(self, time_frac: float) -> None: + """ + Update model by a fraction of a time step. + + Parameters + ---------- + time_frac : float + Fraction fo a time step. + """ + if self.verbose: + print("Warning: This model is trained to make predictions on one day timesteps.") + time_step = self.get_time_step() + self._time_step_size = self._time_step_size * time_frac + self.update() + self._time_step_size = time_step + + def _format_outputs(self, outputs): + """Format model outputs as BMI outputs.""" + if not isinstance(outputs['flow_sim'], np.ndarray): + outputs = outputs['flow_sim'].detach().cpu().numpy() + else: + outputs = outputs['flow_sim'] + self._output_vars[_output_vars[0][0]] = outputs + + def _format_inputs(self): + """Format dynamic and static inputs for the model.""" + #=====================================================================# + x_list = [] + c_list = [] + + for name, data in self._dynamic_var.items(): + if data['value'].size == 0: + raise ValueError(f"Dynamic variable '{name}' has no value.") + if data['value'].ndim == 1: + data['value'] = np.expand_dims(data['value'], axis=(1, 2)) # Shape: (n, 1, 1) + elif data['value'].ndim == 2: + data['value'] = np.expand_dims(data['value'], axis=2) # Shape: (n, m, 1) + elif data['value'].ndim != 3: + raise ValueError(f"Dynamic variable '{name}' has unsupported " \ + f"dimensions ({data['value'].ndim}).") + x_list.append(data['value']) + + for name, data in self._static_var.items(): + if data['value'].size == 0: + raise ValueError(f"Static variable '{name}' has no value.") + if data['value'].ndim != 2: + data['value'] = np.expand_dims(data['value'], axis=(0,1)) + c_list.append(data['value']) + + x = np.concatenate(x_list, axis=2) + x = self._fill_nan(x) + c = np.concatenate(c_list, axis=1) + + xc_nn_norm, c_nn_norm = self.normalize(x.copy(), c) + + + # Get upstream area and elevation + try: + ac_name = self.config_model['observations']['upstream_area_name'] + ac_array = self._static_var[map_to_external(ac_name)]['value'] + except ValueError as e: + raise ValueError("Upstream area is not provided. This is needed for high-resolution streamflow model.") from e + try: + elevation_name = self.config_model['observations']['elevation_name'] + elev_array = self._static_var[map_to_external(elevation_name)]['value'] + except ValueError as e: + raise ValueError("Elevation is not provided. This is needed for high-resolution streamflow model.") from e + + + # Convert to torch tensors. + # dataset = { + # 'ac_all': torch.tensor(ac_array, dtype=torch.float32, device=self.device).squeeze(-1), + # 'elev_all': torch.tensor(elev_array, dtype=torch.float32, device=self.device).squeeze(-1), + # 'c_nn': torch.tensor(c, dtype=torch.float32, device=self.device), + # 'xc_nn_norm': torch.tensor(xc_nn_norm, dtype=torch.float32, device=self.device), + # 'c_nn_norm': torch.tensor(c_nn_norm, dtype=torch.float32, device=self.device), + # 'x_phy': torch.tensor(x, dtype=torch.float32, device=self.device), + # } + dataset = { + 'ac_all': ac_array.squeeze(-1), + 'elev_all': elev_array.squeeze(-1), + 'c_nn': c, + 'xc_nn_norm': xc_nn_norm, + 'c_nn_norm': c_nn_norm, + 'x_phy': x, + } + return dataset + #=====================================================================# + + def normalize( + self, + x_nn: NDArray[np.float32], + c_nn: NDArray[np.float32] + ) -> NDArray[np.float32]: + """Normalize data for neural network.""" + + self.load_norm_stats() + x_nn_norm = self._to_norm(x_nn, _dynamic_input_vars) + c_nn_norm = self._to_norm(c_nn, _static_input_vars) + + # Remove nans + x_nn_norm[x_nn_norm != x_nn_norm] = 0 + c_nn_norm[c_nn_norm != c_nn_norm] = 0 + + c_nn_norm_repeat = np.repeat( + np.expand_dims(c_nn_norm, 0), + x_nn_norm.shape[0], + axis=0, + ) + + xc_nn_norm = np.concatenate((x_nn_norm, c_nn_norm_repeat), axis=2) + del x_nn_norm, x_nn + + return xc_nn_norm, c_nn_norm + + def _to_norm( + self, + data: NDArray[np.float32], + vars: list[str], + ) -> NDArray[np.float32]: + """Standard data normalization.""" + log_norm_vars = self.config_model['dpl_model']['phy_model']['use_log_norm'] + + data_norm = np.zeros(data.shape) + + for k, var in enumerate(vars): + stat = self.norm_stats[map_to_internal(var[0])] + + if len(data.shape) == 3: + if map_to_internal(var[0]) in log_norm_vars: + data[:, :, k] = np.log10(np.sqrt(data[:, :, k]) + 0.1) + data_norm[:, :, k] = (data[:, :, k] - stat[2]) / stat[3] + elif len(data.shape) == 2: + if var[0] in log_norm_vars: + data[:, k] = np.log10(np.sqrt(data[:, k]) + 0.1) + data_norm[:, k] = (data[:, k] - stat[2]) / stat[3] + else: + raise DataDimensionalityWarning("Data dimension must be 2 or 3.") + return data_norm + + def load_norm_stats(self) -> None: + """Load normalization statistics.""" + path = os.path.join( + self.config_model['model_path'], + '..', + 'normalization_statistics.json', + ) + try: + with open(path) as f: + self.norm_stats = json.load(f) + except ValueError as e: + raise ValueError("Normalization statistics not found.") from e + + def _process_predictions(self, predictions): + """Process model predictions and store them in output variables.""" + for var_name, prediction in predictions.items(): + if var_name in self._output_vars: + self._output_vars[var_name]['value'] = prediction.cpu().numpy() + else: + log.warning(f"Output variable '{var_name}' not recognized. Skipping.") + + def _batch_data( + self, + batch_list: list[dict[str, torch.Tensor]], + target_key: str = None, + ) -> None: + """Merge list of batch data dictionaries into a single dictionary.""" + data = {} + try: + if target_key: + return torch.cat([x[target_key] for x in batch_list], dim=1).numpy() + + for key in batch_list[0].keys(): + if len(batch_list[0][key].shape) == 3: + dim = 1 + else: + dim = 0 + data[key] = torch.cat([d[key] for d in batch_list], dim=dim).cpu().numpy() + return data + + except ValueError as e: + raise ValueError(f"Error concatenating batch data: {e}") from e + + @staticmethod + def _fill_nan(array_3d): + # Define the x-axis for interpolation + x = np.arange(array_3d.shape[1]) + + # Iterate over the first and third dimensions to interpolate the second dimension + for i in range(array_3d.shape[0]): + for j in range(array_3d.shape[2]): + # Select the 1D slice for interpolation + slice_1d = array_3d[i, :, j] + + # Find indices of NaNs and non-NaNs + nans = np.isnan(slice_1d) + non_nans = ~nans + + # Only interpolate if there are NaNs and at least two non-NaN values for reference + if np.any(nans) and np.sum(non_nans) > 1: + # Perform linear interpolation using numpy.interp + array_3d[i, :, j] = np.interp(x, x[non_nans], slice_1d[non_nans], left=None, right=None) + return array_3d + + def array_to_tensor(self) -> None: + """Converts input values into Torch tensor object to be read by model.""" + raise NotImplementedError("array_to_tensor") + + def tensor_to_array(self) -> None: + """ + Converts model output Torch tensor into date + gradient arrays to be + passed out of BMI for backpropagation, loss, optimizer tuning. + """ + raise NotImplementedError("tensor_to_array") + + def get_tensor_slice(self): + """Get tensor of input data for a single timestep.""" + # sample_dict = take_sample_test(self.bmi_config, self.dataset_dict) + # self.input_tensor = torch.Tensor() + + raise NotImplementedError("get_tensor_slice") + + def get_var_type(self, var_name): + """ + Data type of variable. + + Parameters + ---------- + ----------g + var_name : str + Name of variable as CSDMS Standard Name. + + Returns + ------- + str + Data type. + """ + return str(self.get_value_ptr(var_name).dtype) + + def get_var_units(self, var_standard_name): + """Get units of variable. + + Parameters + ---------- + var_standard_name : str + Name of variable as CSDMS Standard Name. + + Returns + ------- + str + Variable units. + """ + return self._var_units_map[var_standard_name] + + def get_var_nbytes(self, var_name): + """Get units of variable. + + Parameters + ---------- + var_name : str + Name of variable as CSDMS Standard Name. + + Returns + ------- + int + Size of data array in bytes. + """ + return self.get_value_ptr(var_name).nbytes + + def get_var_itemsize(self, name): + return np.dtype(self.get_var_type(name)).itemsize + + def get_var_location(self, name): + return self._var_loc[name] + + def get_var_grid(self, var_name): + """Grid id for a variable. + + Parameters + ---------- + var_name : str + Name of variable as CSDMS Standard Name. + + Returns + ------- + int + Grid id. + """ + # for grid_id, var_name_list in self._grids.items(): + # if var_name in var_name_list: + # return grid_id + raise NotImplementedError("get_var_grid") + + def get_grid_rank(self, grid_id: int): + """Rank of grid. + + Parameters + ---------- + grid_id + Identifier of a grid. + + Returns + ------- + int + Rank of grid. + """ + if grid_id == 0: + return 1 + raise ValueError(f"Unsupported grid rank: {grid_id!s}. only support 0") + + def get_grid_size(self, grid_id): + """Size of grid. + + Parameters + ---------- + grid_id : int + Identifier of a grid. + + Returns + ------- + int + Size of grid. + """ + # return int(np.prod(self._model.shape)) + raise NotImplementedError("get_grid_size") + + def get_value_ptr(self, var_standard_name: str) -> np.ndarray: + """Reference to values.""" + return self._output_vars[var_standard_name] + + def get_value(self, var_name: str, dest: NDArray): + """Return copy of variable values.""" + # TODO: will need to properly account for multiple basins. + try: + dest[:] = self.get_value_ptr(var_name)[self._timestep-1,].flatten() + except RuntimeError as e: + raise e + return dest + + def get_value_at_indices(self, var_name, dest, indices): + """Get values at particular indices. + + Parameters + ---------- + var_name : str + Name of variable as CSDMS Standard Name. + dest : ndarray + A numpy array into which to place the values. + indices : array_like + Array of indices. + + Returns + ------- + array_like + Values at indices. + """ + dest[:] = self.get_value_ptr(var_name).take(indices) + return dest + + def set_value(self, var_name, values: np.ndarray): + """Set variable value.""" + for dict in [self._dynamic_var, self._static_var]: + if var_name in dict.keys(): + if self.stepwise: + dict[var_name]['value'] = values + else: + dict[var_name]['value'] = np.append( + dict[var_name]['value'], values + ) + break + + def set_value_at_indices(self, name, inds, src): + """Set model values at particular indices. + + Parameters + ---------- + var_name : str + Name of variable as CSDMS Standard Name. + src : array_like + Array of new values. + indices : array_like + Array of indices. + """ + val = self.get_value_ptr(name) + val.flat[inds] = src + + def get_component_name(self): + """Name of the component.""" + return self._name + + def get_input_item_count(self): + """Get names of input variables.""" + return len(self._input_var_names) + + def get_output_item_count(self): + """Get names of output variables.""" + return len(self._output_var_names) + + def get_input_var_names(self): + """Get names of input variables.""" + return self._input_var_names + + def get_output_var_names(self): + """Get names of output variables.""" + return self._output_var_names + + def get_grid_shape(self, grid_id, shape): + """Number of rows and columns of uniform rectilinear grid.""" + # var_name = self._grids[grid_id][0] + # shape[:] = self.get_value_ptr(var_name).shape + # return shape + raise NotImplementedError("get_grid_shape") + + def get_grid_spacing(self, grid_id, spacing): + """Spacing of rows and columns of uniform rectilinear grid.""" + # spacing[:] = self._model.spacing + # return spacing + raise NotImplementedError("get_grid_spacing") + + def get_grid_origin(self, grid_id, origin): + """Origin of uniform rectilinear grid.""" + # origin[:] = self._model.origin + # return origin + raise NotImplementedError("get_grid_origin") + + def get_grid_type(self, grid_id): + """Type of grid.""" + # return self._grid_type[grid_id] + raise NotImplementedError("get_grid_type") + + def get_start_time(self): + """Start time of model.""" + return self._start_time + + def get_end_time(self): + """End time of model.""" + return self._end_time + + def get_current_time(self): + return self._current_time + + def get_time_step(self): + return self._time_step_size + + def get_time_units(self): + return self._time_units + + def get_grid_edge_count(self, grid): + raise NotImplementedError("get_grid_edge_count") + + def get_grid_edge_nodes(self, grid, edge_nodes): + raise NotImplementedError("get_grid_edge_nodes") + + def get_grid_face_count(self, grid): + raise NotImplementedError("get_grid_face_count") + + def get_grid_face_nodes(self, grid, face_nodes): + raise NotImplementedError("get_grid_face_nodes") + + def get_grid_node_count(self, grid): + raise NotImplementedError("get_grid_node_count") + + def get_grid_nodes_per_face(self, grid, nodes_per_face): + raise NotImplementedError("get_grid_nodes_per_face") + + def get_grid_face_edges(self, grid, face_edges): + raise NotImplementedError("get_grid_face_edges") + + def get_grid_x(self, grid, x): + raise NotImplementedError("get_grid_x") + + def get_grid_y(self, grid, y): + raise NotImplementedError("get_grid_y") + + def get_grid_z(self, grid, z): + raise NotImplementedError("get_grid_z") + + def initialize_config(self, config_path: str) -> dict: + """ + Check that config_path is valid path and convert config into a + dictionary object. + """ + config_path = Path(config_path).resolve() + + if not config_path: + raise RuntimeError("No BMI configuration path provided.") + elif not config_path.is_file(): + raise RuntimeError(f"BMI configuration not found at path {config_path}.") + else: + with config_path.open('r') as f: + self.config = yaml.safe_load(f) + + + # USE BELOW FOR HYDRA + OMEGACONF: + # try: + # config_dict: Union[Dict[str, Any], Any] = OmegaConf.to_container( + # cfg, resolve=True + # ) + # config = Config(**config_dict) + # except ValidationError as e: + # log.exception(e) + # raise e + # return config, config_dict + + # def init_var_dicts(self): + # """ + # Create lookup tables for CSDMS variables and init variable arrays. + # """ + # # Make lookup tables for variable name (Peckham et al.). + # self._var_name_map_long_first = { + # long_name:self._var_name_units_map[long_name][0] for \ + # long_name in self._var_name_units_map.keys() + # } + # self._var_name_map_short_first = { + # self._var_name_units_map[long_name][0]:long_name for \ + # long_name in self._var_name_units_map.keys()} + # self._var_units_map = { + # long_name:self._var_name_units_map[long_name][1] for \ + # long_name in self._var_name_units_map.keys() + # } + + # # Initialize inputs and outputs. + # for var in self.config['observations']['var_t_nn'] + self.config['observations']['var_c_nn']: + # standard_name = self._var_name_map_short_first[var] + # self._nn_values[standard_name] = [] + # # setattr(self, var, 0) + + # for var in self.config['observations']['var_t_hydro_model'] + self.config['observations']['var_c_hydro_model']: + # standard_name = self._var_name_map_short_first[var] + # self._pm_values[standard_name] = [] + # # setattr(self, var, 0) + + # def scale_output(self) -> None: + # """ + # Scale and return more meaningful output from wrapped model. + # """ + # models = self.config['hydro_models'][0] + + # # TODO: still have to finish finding and undoing scaling applied before + # # model run. (See some checks used in bmi_lstm.py.) + + # # Strip unnecessary time and variable dims. This gives 1D array of flow + # # at each basin. + # # TODO: setup properly for multiple models later. + # self.streamflow_cms = self.preds[models]['flow_sim'].squeeze() + + # def _get_batch_sample(self, config: Dict, dataset_dictionary: Dict[str, torch.Tensor], + # i_s: int, i_e: int) -> Dict[str, torch.Tensor]: + # """ + # Take sample of data for testing batch. + # """ + # dataset_sample = {} + # for key, value in dataset_dictionary.items(): + # if value.ndim == 3: + # # TODO: I don't think we actually need this. + # # Remove the warmup period for all except airtemp_memory and hydro inputs. + # if key in ['airT_mem_temp_model', 'x_phy', 'inputs_nn_scaled']: + # warm_up = 0 + # else: + # warm_up = config['warm_up'] + # dataset_sample[key] = value[warm_up:, i_s:i_e, :].to(config['device']) + # elif value.ndim == 2: + # dataset_sample[key] = value[i_s:i_e, :].to(config['device']) + # else: + # raise ValueError(f"Incorrect input dimensions. {key} array must have 2 or 3 dimensions.") + # return dataset_sample + + # def _values_to_dict(self) -> None: + # """ + # Take CSDMS Standard Name-mapped forcings + attributes and construct data + # dictionary for NN and physics model. + # """ + # # n_basins = self.config['batch_basins'] + # n_basins = 671 + # rho = self.config['rho'] + + # # Initialize dict arrays. + # # NOTE: used to have rho+1 here but this is no longer necessary? + # x_nn = np.zeros((rho + 1, n_basins, len(self.config['observations']['var_t_nn']))) + # c_nn = np.zeros((rho + 1, n_basins, len(self.config['observations']['var_c_nn']))) + # x_phy = np.zeros((rho + 1, n_basins, len(self.config['observations']['var_t_hydro_model']))) + # c_hydro_model = np.zeros((n_basins, len(self.config['observations']['var_c_hydro_model']))) + + # for i, var in enumerate(self.config['observations']['var_t_nn']): + # standard_name = self._var_name_map_short_first[var] + # # NOTE: Using _values is a bit hacky. Should use get_values I think. + # x_nn[:, :, i] = np.array([self._nn_values[standard_name]]) + + # for i, var in enumerate(self.config['observations']['var_c_nn']): + # standard_name = self._var_name_map_short_first[var] + # c_nn[:, :, i] = np.array([self._nn_values[standard_name]]) + + # for i, var in enumerate(self.config['observations']['var_t_hydro_model']): + # standard_name = self._var_name_map_short_first[var] + # x_phy[:, :, i] = np.array([self._pm_values[standard_name]]) + + # for i, var in enumerate(self.config['observations']['var_c_hydro_model']): + # standard_name = self._var_name_map_short_first[var] + # c_hydro_model[:, i] = np.array([self._pm_values[standard_name]]) + + # self.dataset_dict = { + # 'inputs_nn_scaled': np.concatenate((x_nn, c_nn), axis=2), #[np.newaxis,:,:], + # 'x_phy': x_phy, #[np.newaxis,:,:], + # 'c_hydro_model': c_hydro_model + # } + # print(self.dataset_dict['inputs_nn_scaled'].shape) + + # # Convert to torch tensors: + # for key in self.dataset_dict.keys(): + # if type(self.dataset_dict[key]) == np.ndarray: + # self.dataset_dict[key] = torch.from_numpy(self.dataset_dict[key]).float() #.to(self.config['device']) + + # def get_csdms_name(self, var_name): + # """ + # Get CSDMS Standard Name from variable name. + # """ + # return self._var_name_map_long_first[var_name] + diff --git a/src/dHBV_2_0/bmi_dm.py b/src/dHBV_2_0/bmi_dm.py deleted file mode 100644 index ca8785b..0000000 --- a/src/dHBV_2_0/bmi_dm.py +++ /dev/null @@ -1,883 +0,0 @@ -"""BMI wrapper for interfacing dHBV 2.0 with NOAA-OWP NextGen framework.""" -import sys - - -import logging -import os -import time -from pathlib import Path -from typing import Any, Dict, Optional, Union - -import numpy as np -import torch -import yaml -from bmipy import Bmi -from conf import config -from core.data import take_sample_test -from models.model_handler import ModelHandler -from omegaconf import DictConfig, OmegaConf -from pydantic import ValidationError -from ruamel.yaml import YAML - -log = logging.getLogger(__name__) - - -class BmiDm(Bmi): - def __init__(self, config_filepath: Optional[str] = None, verbose=False): - """ - Create an instance of a differentiable, physics-informed ML model BMI - for dHBV 2.0UH (Song et al., 2024). - - Parameters - ---------- - config_filepath : str, optional - Path to the BMI configuration file. - verbose : bool, optional - Enables debug print statements if True. - """ - super().__init__() - self._model = None - self._initialized = False - self.verbose = verbose - - self._values = {} - self._nn_values = {} - self._pm_values = {} - self._start_time = 0.0 - self._end_time = np.finfo(float).max - self._time_units = 'day' # NOTE: NextGen currently only supports seconds. - self._time_step_size = 1.0 - self._var_array_lengths = 1 - - # Timing BMI computations - t_start = time.time() - self.bmi_process_time = 0 - - # Basic model attributes - _att_map = { - 'model_name': "Differentiable, Physics-informed ML BMI", - 'version': '1.5', - 'author_name': 'MHPI, Leo Lonzarich', - } - - # Input forcing/attribute CSDMS Standard Names - self._input_var_names = [ - ############## Forcings ############## - 'atmosphere_water__liquid_equivalent_precipitation_rate', - 'land_surface_air__temperature', - 'land_surface_air__max_of_temperature', # custom name - 'land_surface_air__min_of_temperature', # custom name - 'day__length', # custom name - 'land_surface_water__potential_evaporation_volume_flux', # check name, - ############## Attributes ############## - # ------------- CAMELS ------------- # - 'atmosphere_water__daily_mean_of_liquid_equivalent_precipitation_rate', - 'land_surface_water__daily_mean_of_potential_evaporation_flux', - 'p_seasonality', # custom name - 'atmosphere_water__precipitation_falling_as_snow_fraction', - 'ratio__mean_potential_evapotranspiration__mean_precipitation', - 'atmosphere_water__frequency_of_high_precipitation_events', - 'atmosphere_water__mean_duration_of_high_precipitation_events', - 'atmosphere_water__precipitation_frequency', - 'atmosphere_water__low_precipitation_duration', - 'basin__mean_of_elevation', - 'basin__mean_of_slope', - 'basin__area', - 'land_vegetation__forest_area_fraction', - 'land_vegetation__max_monthly_mean_of_leaf-area_index', - 'land_vegetation__diff_max_min_monthly_mean_of_leaf-area_index', - 'land_vegetation__max_monthly_mean_of_green_vegetation_fraction', - 'land_vegetation__diff__max_min_monthly_mean_of_green_vegetation_fraction', - 'region_state_land~covered__area_fraction', # custom name - 'region_state_land~covered__area', # custom name - 'root__depth', # custom name - 'soil_bedrock_top__depth__pelletier', - 'soil_bedrock_top__depth__statsgo', - 'soil__porosity', - 'soil__saturated_hydraulic_conductivity', - 'maximum_water_content', - 'soil_sand__volume_fraction', - 'soil_silt__volume_fraction', - 'soil_clay__volume_fraction', - 'geol_1st_class', # custom name - 'geol_1st_class__fraction', # custom name - 'geol_2nd_class', # custom name - 'geol_2nd_class__fraction', # custom name - 'basin__carbonate_rocks_area_fraction', - 'soil_active-layer__porosity', # check name - 'bedrock__permeability' - # -------------- CONUS -------------- # - # 'land_surface_water__Hargreaves_potential_evaporation_volume_flux', - # 'free_land_surface_water', # check name - # 'soil_clay__attr', # custom name; need to confirm - # 'soil_gravel__attr', # custom name; need to confirm - # 'soil_sand__attr', # custo=m name; need to confirm - # 'soil_silt__attr', # custom name; need to confirm - # 'land_vegetation__normalized_diff_vegitation_index', # custom name - # 'soil_clay__grid', # custom name - # 'soil_sand__grid', # custom name - # 'soil_silt__grid', # custom name - # 'land_surface_water__glacier_fraction', # custom name - # 'atmosphere_water__daily_mean_of_liquid_equivalent_precipitation_rate', - # 'atmosphere_water__daily_mean_of_temperature', # custom name - # 'land_surface_water__potential_evaporation_volume_flux_seasonality', # custom name - # 'land_surface_water__snow_fraction', - ] - - # Output variable names (CSDMS standard names) - self._output_var_names = [ - 'land_surface_water__runoff_volume_flux', - 'srflow', - 'ssflow', - 'gwflow', - 'AET_hydro', - 'PET_hydro', - 'flow_sim_no_rout', - 'srflow_no_rout', - 'ssflow_no_rout', - 'gwflow_no_rout', - 'excs', - 'evapfactor', - 'tosoil', - 'percolation', - 'BFI_sim' - ] - - # Map CSDMS Standard Names to the model's internal variable names (For CAMELS, CONUS). - self._var_name_units_map = { - ############## Forcings ############## - # ------------- CAMELS ------------- # - 'atmosphere_water__liquid_equivalent_precipitation_rate':['prcp(mm/day)', 'mm d-1'], - 'land_surface_air__temperature':['tmean(C)','degC'], - 'land_surface_air__max_of_temperature':['tmax(C)', 'degC'], # custom name - 'land_surface_air__min_of_temperature':['tmin(C)', 'degC'], # custom name - 'day__length':['dayl(s)', 's'], # custom name - 'land_surface_water__potential_evaporation_volume_flux':['PET_hargreaves(mm/day)', 'mm d-1'], # check name - # -------------- CONUS -------------- # - # 'atmosphere_water__liquid_equivalent_precipitation_rate':['P', 'mm d-1'], - # 'land_surface_air__temperature':['Temp','degC'], - # 'land_surface_water__potential_evaporation_volume_flux':['PET', 'mm d-1'], # check name - ############## Attributes ############## - # -------------- CAMELS -------------- # - 'atmosphere_water__daily_mean_of_liquid_equivalent_precipitation_rate':['p_mean','mm d-1'], - 'land_surface_water__daily_mean_of_potential_evaporation_flux':['pet_mean','mm d-1'], - 'p_seasonality':['p_seasonality', '-'], # custom name - 'atmosphere_water__precipitation_falling_as_snow_fraction':['frac_snow','-'], - 'ratio__mean_potential_evapotranspiration__mean_precipitation':['aridity','-'], - 'atmosphere_water__frequency_of_high_precipitation_events':['high_prec_freq','d yr-1'], - 'atmosphere_water__mean_duration_of_high_precipitation_events':['high_prec_dur','d'], - 'atmosphere_water__precipitation_frequency':['low_prec_freq','d yr-1'], - 'atmosphere_water__low_precipitation_duration':['low_prec_dur','d'], - 'basin__mean_of_elevation':['elev_mean','m'], - 'basin__mean_of_slope':['slope_mean','m km-1'], - 'basin__area':['area_gages2','km2'], - 'land_vegetation__forest_area_fraction':['frac_forest','-'], - 'land_vegetation__max_monthly_mean_of_leaf-area_index':['lai_max','-'], - 'land_vegetation__diff_max_min_monthly_mean_of_leaf-area_index':['lai_diff','-'], - 'land_vegetation__max_monthly_mean_of_green_vegetation_fraction':['gvf_max','-'], - 'land_vegetation__diff__max_min_monthly_mean_of_green_vegetation_fraction':['gvf_diff','-'], - 'region_state_land~covered__area_fraction':['dom_land_cover_frac', 'percent'], # custom name - 'region_state_land~covered__area':['dom_land_cover', '-'], # custom name - 'root__depth':['root_depth_50', '-'], # custom name - 'soil_bedrock_top__depth__pelletier':['soil_depth_pelletier','m'], - 'soil_bedrock_top__depth__statsgo':['soil_depth_statsgo','m'], - 'soil__porosity':['soil_porosity','-'], - 'soil__saturated_hydraulic_conductivity':['soil_conductivity','cm hr-1'], - 'maximum_water_content':['max_water_content','m'], - 'soil_sand__volume_fraction':['sand_frac','percent'], - 'soil_silt__volume_fraction':['silt_frac','percent'], - 'soil_clay__volume_fraction':['clay_frac','percent'], - 'geol_1st_class':['geol_1st_class', '-'], # custom name - 'geol_1st_class__fraction':['glim_1st_class_frac', '-'], # custom name - 'geol_2nd_class':['geol_2nd_class', '-'], # custom name - 'geol_2nd_class__fraction':['glim_2nd_class_frac', '-'], # custom name - 'basin__carbonate_rocks_area_fraction':['carbonate_rocks_frac','-'], - 'soil_active-layer__porosity':['geol_porosity', '-'], # check name - 'bedrock__permeability':['geol_permeability','m2'], - 'drainage__area':['DRAIN_SQKM', 'km2'], # custom name - 'land_surface__latitude':['lat','degrees'], - # --------------- CONUS --------------- # - # 'basin__area':['uparea','km2'], - # 'land_surface_water__Hargreaves_potential_evaporation_volume_flux':['ETPOT_Hargr', 'mm d-1'], # check name - # 'free_land_surface_water':['FW', 'mm d-1'], # check name - # 'soil_clay__attr':['HWSD_clay','percent'], # custom name; need to confirm - # 'soil_gravel__attr':['HWSD_gravel','percent'], # custom name; need to confirm - # 'soil_sand__attr':['HWSD_sand','percent'], # custom name; need to confirm - # 'soil_silt__attr':['HWSD_silt','percent'], # custom name; need to confirm - # 'land_vegetation__normalized_diff_vegitation_index':['NDVI','-'], # custom name - # 'soil_active-layer__porosity':['Porosity', '-'], # check name - # 'soil_clay__grid':['SoilGrids1km_clay','km2'], # custom name - # 'soil_sand__grid':['SoilGrids1km_sand','km2'], # custom name - # 'soil_silt__grid':['SoilGrids1km_silt','km2'], # custom name - # 'soil_clay__volume_fraction':['T_clay','percent'], - # 'soil_gravel__volume_fraction':['T_gravel','percent'], - # 'soil_sand__volume_fraction':['T_sand','percent'], - # 'soil_silt__volume_fraction':['T_silt','percent'], - # # Aridity in camels - # 'land_surface_water__glacier_fraction':['glaciers','percent'], # custom name - # 'atmosphere_water__daily_mean_of_liquid_equivalent_precipitation_rate':['meanP','mm d-1'], - # 'atmosphere_water__daily_mean_of_temperature':['meanTa','mm d-1'], # custom name - # 'basin__mean_of_elevation':['meanelevation','m'], - # 'basin__mean_of_slope':['meanslope','m km-1'], - # 'bedrock__permeability':['permeability','m2'], - # 'p_seasonality':['seasonality_P', '-'], # custom name - # 'land_surface_water__potential_evaporation_volume_flux_seasonality':['seasonality_PET', '-'], # custom name - # 'land_surface_water__snow_fraction':['snow_fraction','percent'], - # 'atmosphere_water__precipitation_falling_as_snow_fraction':['snowfall_fraction','percent'], - ############## Outputs ############## - # --------- CAMELS/CONUS ---------- # - 'land_surface_water__runoff_volume_flux':['flow_sim','m3 s-1'], - 'srflow':['srflow','m3 s-1'], - 'ssflow':['ssflow','m3 s-1'], - 'gwflow':['gwflow','m3 s-1'], - 'AET_hydro':['AET_hydro','m3 s-1'], - 'PET_hydro':['PET_hydro','m3 s-1'], - 'flow_sim_no_rout':['flow_sim_no_rout','m3 s-1'], - 'srflow_no_rout':['srflow_no_rout','m3 s-1'], - 'ssflow_no_rout':['ssflow_no_rout','m3 s-1'], - 'gwflow_no_rout':['gwflow_no_rout','m3 s-1'], - 'excs':['excs','-'], - 'evapfactor':['evapfactor','-'], - 'tosoil':['tosoil','m3 s-1'], - 'percolation':['percolation','-'], - 'BFI_sim':['BFI_sim','-'], - } - - if config_filepath: - # Read in model & BMI configurations. - self.initialize_config(config_filepath) - - # Create lookup tables for CSDMS variables + init variable arrays. - self.init_var_dicts() - - # Track total BMI runtime. - self.bmi_process_time += time.time() - t_start - if self.verbose: - log.info(f"BMI init took {time.time() - t_start} s") - - def initialize(self, config_filepath: Optional[str] = None) -> None: - """ - (BMI Control function) Initialize the dPLHydro model. - - dPL model BMI operates in two modes: - (Necessesitated by the fact that dPL model's pNN is forwarded on all of - a prediction period's data at once. Forwarding on each timestep individually - without saving/loading hidden states would slash LSTM performance. However, - feeding in hidden states day by day leeds to great efficiency losses vs - simply feeding all data at once due to carrying gradients at each step.) - - 1) All attributes/forcings that will be forwarded on are fed to BMI before - 'bmi.initialize()'. Then internal model is forwarded on all data - and generates predictions during '.initialize()'. - - 2) Run '.initialize()', then pass data day by day as normal during - 'bmi.update()'. If forwarding period is sufficiently small (say, <100 days), - then forwarding LSTM on individual days with saved states is reasonable. - - To this end, a configuration file can be specified either during - `bmi.__init__()`, or during `.initialize()`. If running BMI as type (1), - config must be passed in the former, otherwise passed in the latter for (2). - - Parameters - ---------- - config_filepath : str, optional - Path to the BMI configuration file. - """ - t_start = time.time() - - if not self.config: - # Read in model & BMI configurations. - self.initialize_config(config_filepath) - - # Create lookup tables for CSDMS variables + init variable arrays. - self.init_var_dicts() - - if not config_filepath: - raise ValueError("No configuration file given. A config path \ - must be passed at time of bmi init or .initialize() call.") - - # Set a simulation start time and gettimestep size. - self.current_time = self._start_time - self._time_step_size = self.config['time_step_delta'] - - # Load a trained model. - self._model = ModelHandler(self.config).to(self.config['device']) - self._initialized = True - - if self.config['forward_init']: - # Forward model on all data in this .initialize() step. - self.run_forward() - - # Track total BMI runtime. - self.bmi_process_time += time.time() - t_start - if self.verbose: - log.info(f"BMI initialize [ctrl fn] took {time.time() - t_start} s | Total runtime: {self.bmi_process_time} s") - - def update(self) -> None: - """ - (BMI Control function) Advance model state by one time step. - - Note: Models should be trained standalone with dPLHydro_PMI first before forward predictions with this BMI. - """ - t_start = time.time() - self.current_time += self._time_step_size - - if not self.config['forward_init']: - # Conventional forward pass during .update() - self.run_forward() - - # Track total BMI runtime. - self.bmi_process_time += time.time() - t_start - if self.verbose: - log.info(f"BMI update [ctrl fn] took {time.time() - t_start} s | Total runtime: {self.bmi_process_time} s") - - def run_forward(self): - """ - Forward model and save outputs to return on update call. - """ - # Format inputs - self._values_to_dict() - - ngrid = self.dataset_dict['inputs_nn_scaled'].shape[1] - i_start = np.arange(0, ngrid, self.config['batch_basins']) - i_end = np.append(i_start[1:], ngrid) - - batched_preds_list = [] - # Forward through basins in batches. - for i in range(len(i_start)): - dataset_dict_sample = self._get_batch_sample(self.config, self.dataset_dict, - i_start[i], i_end[i]) - - # TODO: Include architecture here for saving/loading states of hydro - # model and pNN for single timestep updates. - - # Forward dPLHydro model - self.preds = self._model.forward(dataset_dict_sample, eval=True) - - # For single hydrology model. - model_name = self.config['hydro_models'][0] - batched_preds_list.append({key: tensor.cpu().detach() for key, - tensor in self.preds[model_name].items()}) - - # TODO: Expand list of supported outputs (e.g., a dict of output vars). - preds = torch.cat([d['flow_sim'] for d in batched_preds_list], dim=1) - preds = preds.numpy() - - # Scale and check output - self.scale_output() - - def update_frac(self, time_frac: float) -> None: - """ - Update model by a fraction of a time step. - - Parameters - ---------- - time_frac : float - Fraction fo a time step. - """ - if self.verbose: - print("Warning: This model is trained to make predictions on one day timesteps.") - time_step = self.get_time_step() - self._time_step_size = self._time_step_size * time_frac - self.update() - self._time_step_size = time_step - - def update_until(self, end_time: float) -> None: - """ - (BMI Control function) Update model until a particular time. - Note: Models should be trained standalone with dPLHydro_PMI first before forward predictions with this BMI. - - Parameters - ---------- - end_time : float - Time to run model until. - """ - t_start = time.time() - - n_steps = (end_time - self.get_current_time()) / self.get_time_step() - - for _ in range(int(n_steps)): - self.update() - self.update_frac(n_steps - int(n_steps)) - - # Keep running total of BMI runtime. - self.bmi_process_time += time.time() - t_start - if self.verbose: - log.info(f"BMI update_until [ctrl fn] took {time.time() - t_start} s | Total runtime: {self.bmi_process_time} s") - - def finalize(self) -> None: - """ - (BMI Control function) Finalize model. - """ - # TODO: Force destruction of ESMF and other objects when testing is done - # to save space. - - torch.cuda.empty_cache() - self._model = None - - def array_to_tensor(self) -> None: - """ - Converts input values into Torch tensor object to be read by model. - """ - raise NotImplementedError("array_to_tensor") - - def tensor_to_array(self) -> None: - """ - Converts model output Torch tensor into date + gradient arrays to be - passed out of BMI for backpropagation, loss, optimizer tuning. - """ - raise NotImplementedError("tensor_to_array") - - def get_tensor_slice(self): - """ - Get tensor of input data for a single timestep. - """ - # sample_dict = take_sample_test(self.bmi_config, self.dataset_dict) - # self.input_tensor = torch.Tensor() - - raise NotImplementedError("get_tensor_slice") - - def get_var_type(self, var_name): - """ - Data type of variable. - - Parameters - ---------- - var_name : str - Name of variable as CSDMS Standard Name. - - Returns - ------- - str - Data type. - """ - return str(self.get_value_ptr(var_name).dtype) - - def get_var_units(self, var_standard_name): - """Get units of variable. - - Parameters - ---------- - var_standard_name : str - Name of variable as CSDMS Standard Name. - - Returns - ------- - str - Variable units. - """ - return self._var_units_map[var_standard_name] - - def get_var_nbytes(self, var_name): - """Get units of variable. - - Parameters - ---------- - var_name : str - Name of variable as CSDMS Standard Name. - - Returns - ------- - int - Size of data array in bytes. - """ - return self.get_value_ptr(var_name).nbytes - - def get_var_itemsize(self, name): - return np.dtype(self.get_var_type(name)).itemsize - - def get_var_location(self, name): - return self._var_loc[name] - - def get_var_grid(self, var_name): - """Grid id for a variable. - - Parameters - ---------- - var_name : str - Name of variable as CSDMS Standard Name. - - Returns - ------- - int - Grid id. - """ - # for grid_id, var_name_list in self._grids.items(): - # if var_name in var_name_list: - # return grid_id - raise NotImplementedError("get_var_grid") - - def get_grid_rank(self, grid_id): - """Rank of grid. - - Parameters - ---------- - grid_id : int - Identifier of a grid. - - Returns - ------- - int - Rank of grid. - """ - # return len(self._model.shape) - raise NotImplementedError("get_grid_rank") - - def get_grid_size(self, grid_id): - """Size of grid. - - Parameters - ---------- - grid_id : int - Identifier of a grid. - - Returns - ------- - int - Size of grid. - """ - # return int(np.prod(self._model.shape)) - raise NotImplementedError("get_grid_size") - - def get_value_ptr(self, var_standard_name: str, model:str) -> np.ndarray: - """Reference to values. - - Parameters - ---------- - var_standard_name : str - Name of variable as CSDMS Standard Name. - - Returns - ------- - array_like - Value array. - """ - if model == 'nn': - if var_standard_name not in self._nn_values.keys(): - raise ValueError(f"No known variable in BMI model: {var_standard_name}") - return self._nn_values[var_standard_name] - - elif model == 'pm': - if var_standard_name not in self._pm_values.keys(): - raise ValueError(f"No known variable in BMI model: {var_standard_name}") - return self._pm_values[var_standard_name] - - else: - raise ValueError("Valid model type (nn or pm) must be specified.") - - def get_value(self, var_name, dest): - """Copy of values. - - Parameters - ---------- - var_name : str - Name of variable as CSDMS Standard Name. - dest : ndarray - A numpy array into which to place the values. - - Returns - ------- - array_like - Copy of values. - """ - dest[:] = self.get_value_ptr(var_name).flatten() - return dest - - def get_value_at_indices(self, var_name, dest, indices): - """Get values at particular indices. - - Parameters - ---------- - var_name : str - Name of variable as CSDMS Standard Name. - dest : ndarray - A numpy array into which to place the values. - indices : array_like - Array of indices. - - Returns - ------- - array_like - Values at indices. - """ - dest[:] = self.get_value_ptr(var_name).take(indices) - return dest - - def set_value(self, var_name, values: np.ndarray, model:str): - """Set model values. - - Parameters - ---------- - var_name : str - Name of variable as CSDMS Standard Name. - values : array_like - Array of new values. - """ - if not isinstance(values, (np.ndarray, list, tuple)): - values = np.array([values]) - - val = self.get_value_ptr(var_name, model=model) - - # val = values.reshape(val.shape) - val[:] = values - - def set_value_at_indices(self, name, inds, src): - """Set model values at particular indices. - - Parameters - ---------- - var_name : str - Name of variable as CSDMS Standard Name. - src : array_like - Array of new values. - indices : array_like - Array of indices. - """ - val = self.get_value_ptr(name) - val.flat[inds] = src - - def get_component_name(self): - """Name of the component.""" - return self._name - - def get_input_item_count(self): - """Get names of input variables.""" - return len(self._input_var_names) - - def get_output_item_count(self): - """Get names of output variables.""" - return len(self._output_var_names) - - def get_input_var_names(self): - """Get names of input variables.""" - return self._input_var_names - - def get_output_var_names(self): - """Get names of output variables.""" - return self._output_var_names - - def get_grid_shape(self, grid_id, shape): - """Number of rows and columns of uniform rectilinear grid.""" - # var_name = self._grids[grid_id][0] - # shape[:] = self.get_value_ptr(var_name).shape - # return shape - raise NotImplementedError("get_grid_shape") - - def get_grid_spacing(self, grid_id, spacing): - """Spacing of rows and columns of uniform rectilinear grid.""" - # spacing[:] = self._model.spacing - # return spacing - raise NotImplementedError("get_grid_spacing") - - def get_grid_origin(self, grid_id, origin): - """Origin of uniform rectilinear grid.""" - # origin[:] = self._model.origin - # return origin - raise NotImplementedError("get_grid_origin") - - def get_grid_type(self, grid_id): - """Type of grid.""" - # return self._grid_type[grid_id] - raise NotImplementedError("get_grid_type") - - def get_start_time(self): - """Start time of model.""" - return self._start_time - - def get_end_time(self): - """End time of model.""" - return self._end_time - - def get_current_time(self): - return self._current_time - - def get_time_step(self): - return self._time_step_size - - def get_time_units(self): - return self._time_units - - def get_grid_edge_count(self, grid): - raise NotImplementedError("get_grid_edge_count") - - def get_grid_edge_nodes(self, grid, edge_nodes): - raise NotImplementedError("get_grid_edge_nodes") - - def get_grid_face_count(self, grid): - raise NotImplementedError("get_grid_face_count") - - def get_grid_face_nodes(self, grid, face_nodes): - raise NotImplementedError("get_grid_face_nodes") - - def get_grid_node_count(self, grid): - """Number of grid nodes. - - Parameters - ---------- - grid : int - Identifier of a grid. - - Returns - ------- - int - Size of grid. - """ - # return self.get_grid_size(grid) - raise NotImplementedError("get_grid_node_count") - - def get_grid_nodes_per_face(self, grid, nodes_per_face): - raise NotImplementedError("get_grid_nodes_per_face") - - def get_grid_face_edges(self, grid, face_edges): - raise NotImplementedError("get_grid_face_edges") - - def get_grid_x(self, grid, x): - raise NotImplementedError("get_grid_x") - - def get_grid_y(self, grid, y): - raise NotImplementedError("get_grid_y") - - def get_grid_z(self, grid, z): - raise NotImplementedError("get_grid_z") - - def initialize_config(self, config_path: str) -> Dict: - """ - Check that config_path is valid path and convert config into a - dictionary object. - """ - config_path = Path(config_path).resolve() - - if not config_path: - raise RuntimeError("No BMI configuration path provided.") - elif not config_path.is_file(): - raise RuntimeError(f"BMI configuration not found at path {config_path}.") - else: - with config_path.open('r') as f: - self.config = yaml.safe_load(f) - - - # USE BELOW FOR HYDRA + OMEGACONF: - # try: - # config_dict: Union[Dict[str, Any], Any] = OmegaConf.to_container( - # cfg, resolve=True - # ) - # config = Config(**config_dict) - # except ValidationError as e: - # log.exception(e) - # raise e - # return config, config_dict - - def init_var_dicts(self): - """ - Create lookup tables for CSDMS variables and init variable arrays. - """ - # Make lookup tables for variable name (Peckham et al.). - self._var_name_map_long_first = { - long_name:self._var_name_units_map[long_name][0] for \ - long_name in self._var_name_units_map.keys() - } - self._var_name_map_short_first = { - self._var_name_units_map[long_name][0]:long_name for \ - long_name in self._var_name_units_map.keys()} - self._var_units_map = { - long_name:self._var_name_units_map[long_name][1] for \ - long_name in self._var_name_units_map.keys() - } - - # Initialize inputs and outputs. - for var in self.config['observations']['var_t_nn'] + self.config['observations']['var_c_nn']: - standard_name = self._var_name_map_short_first[var] - self._nn_values[standard_name] = [] - # setattr(self, var, 0) - - for var in self.config['observations']['var_t_hydro_model'] + self.config['observations']['var_c_hydro_model']: - standard_name = self._var_name_map_short_first[var] - self._pm_values[standard_name] = [] - # setattr(self, var, 0) - - def scale_output(self) -> None: - """ - Scale and return more meaningful output from wrapped model. - """ - models = self.config['hydro_models'][0] - - # TODO: still have to finish finding and undoing scaling applied before - # model run. (See some checks used in bmi_lstm.py.) - - # Strip unnecessary time and variable dims. This gives 1D array of flow - # at each basin. - # TODO: setup properly for multiple models later. - self.streamflow_cms = self.preds[models]['flow_sim'].squeeze() - - def _get_batch_sample(self, config: Dict, dataset_dictionary: Dict[str, torch.Tensor], - i_s: int, i_e: int) -> Dict[str, torch.Tensor]: - """ - Take sample of data for testing batch. - """ - dataset_sample = {} - for key, value in dataset_dictionary.items(): - if value.ndim == 3: - # TODO: I don't think we actually need this. - # Remove the warmup period for all except airtemp_memory and hydro inputs. - if key in ['airT_mem_temp_model', 'x_phy', 'inputs_nn_scaled']: - warm_up = 0 - else: - warm_up = config['warm_up'] - dataset_sample[key] = value[warm_up:, i_s:i_e, :].to(config['device']) - elif value.ndim == 2: - dataset_sample[key] = value[i_s:i_e, :].to(config['device']) - else: - raise ValueError(f"Incorrect input dimensions. {key} array must have 2 or 3 dimensions.") - return dataset_sample - - def _values_to_dict(self) -> None: - """ - Take CSDMS Standard Name-mapped forcings + attributes and construct data - dictionary for NN and physics model. - """ - # n_basins = self.config['batch_basins'] - n_basins = 671 - rho = self.config['rho'] - - # Initialize dict arrays. - # NOTE: used to have rho+1 here but this is no longer necessary? - x_nn = np.zeros((rho + 1, n_basins, len(self.config['observations']['var_t_nn']))) - c_nn = np.zeros((rho + 1, n_basins, len(self.config['observations']['var_c_nn']))) - x_phy = np.zeros((rho + 1, n_basins, len(self.config['observations']['var_t_hydro_model']))) - c_hydro_model = np.zeros((n_basins, len(self.config['observations']['var_c_hydro_model']))) - - for i, var in enumerate(self.config['observations']['var_t_nn']): - standard_name = self._var_name_map_short_first[var] - # NOTE: Using _values is a bit hacky. Should use get_values I think. - x_nn[:, :, i] = np.array([self._nn_values[standard_name]]) - - for i, var in enumerate(self.config['observations']['var_c_nn']): - standard_name = self._var_name_map_short_first[var] - c_nn[:, :, i] = np.array([self._nn_values[standard_name]]) - - for i, var in enumerate(self.config['observations']['var_t_hydro_model']): - standard_name = self._var_name_map_short_first[var] - x_phy[:, :, i] = np.array([self._pm_values[standard_name]]) - - for i, var in enumerate(self.config['observations']['var_c_hydro_model']): - standard_name = self._var_name_map_short_first[var] - c_hydro_model[:, i] = np.array([self._pm_values[standard_name]]) - - self.dataset_dict = { - 'inputs_nn_scaled': np.concatenate((x_nn, c_nn), axis=2), #[np.newaxis,:,:], - 'x_phy': x_phy, #[np.newaxis,:,:], - 'c_hydro_model': c_hydro_model - } - print(self.dataset_dict['inputs_nn_scaled'].shape) - - # Convert to torch tensors: - for key in self.dataset_dict.keys(): - if type(self.dataset_dict[key]) == np.ndarray: - self.dataset_dict[key] = torch.from_numpy(self.dataset_dict[key]).float() #.to(self.config['device']) - - def get_csdms_name(self, var_name): - """ - Get CSDMS Standard Name from variable name. - """ - return self._var_name_map_long_first[var_name] - \ No newline at end of file diff --git a/src/dHBV_2_0/example_dev.py b/src/dHBV_2_0/example_dev.py new file mode 100644 index 0000000..57daafa --- /dev/null +++ b/src/dHBV_2_0/example_dev.py @@ -0,0 +1,1011 @@ +# Need these for BMI +import os +import time +from pathlib import Path + +# Here is the LSTM model we want to run +# import nextgen_cuda_lstm +import lstm.nextgen_cuda_lstm as nextgen_cuda_lstm # (SDP) +# Import data_tools +# Basic utilities +import numpy as np +import pandas as pd +# LSTM here is based on PyTorch +import torch +# Configuration file functionality +import yaml +from bmipy import Bmi + +# These are not used (SDP) +### from torch import nn +### import sys + +#------------------------------------------------------------------------ +USE_PATH = True # (SDP) +if not(USE_PATH): + import os + +class bmi_LSTM(Bmi): + + def __init__(self): + """Create a Bmi LSTM model that is ready for initialization.""" + super(bmi_LSTM, self).__init__() + self._name = "LSTM for Next Generation NWM" + self._values = {} + self._var_loc = "node" + self._var_grid_id = 0 + self._var_grid_type = "scalar" + self._start_time = 0 + self._end_time = np.finfo("d").max + self._time_units = "hour" # (SDP) + self._time_step_size = 1.0 # (SDP) + + #---------------------------------------------- + # Required, static attributes of the model + #---------------------------------------------- + # Note: not currently in use + _att_map = { + 'model_name': 'LSTM for Next Generation NWM', + 'version': '1.0', + 'author_name': 'Jonathan Martin Frame' } + + #--------------------------------------------- + # Input variable names (CSDMS standard names) + #--------------------------------------------- + _input_var_names = [ + 'land_surface_radiation~incoming~longwave__energy_flux', + 'land_surface_air__pressure', + 'atmosphere_air_water~vapor__relative_saturation', + 'atmosphere_water__liquid_equivalent_precipitation_rate', ### SDP, 08/30/22 + ##### 'atmosphere_water__time_integral_of_precipitation_mass_flux', #### SDP + 'land_surface_radiation~incoming~shortwave__energy_flux', + 'land_surface_air__temperature', + 'land_surface_wind__x_component_of_velocity', + 'land_surface_wind__y_component_of_velocity'] + # (Next line didn't fix ngen pointer error) + # _input_var_names = [] + + #--------------------------------------------- + # Output variable names (CSDMS standard names) + #--------------------------------------------- + _output_var_names = ['land_surface_water__runoff_depth', + 'land_surface_water__runoff_volume_flux'] + # (Next line didn't fix ngen pointer error) + # _output_var_names = ['land_surface_water__runoff_volume_flux'] + + #------------------------------------------------------ + # Create a Python dictionary that maps CSDMS Standard + # Names to the model's internal variable names. + # This is going to get long, + # since the input variable names could come from any forcing... + #------------------------------------------------------ + #_var_name_map_long_first = { + _var_name_units_map = { + 'land_surface_water__runoff_volume_flux':['streamflow_cms','m3 s-1'], + 'land_surface_water__runoff_depth':['streamflow_m','m'], + #-------------- Dynamic inputs -------------------------------- + #NJF Let the model assume equivalence of `kg m-2` == `mm h-1` since we can't convert + #mass flux automatically from the ngen framework + 'atmosphere_water__liquid_equivalent_precipitation_rate':['APCP_surface','mm h-1'], + 'land_surface_radiation~incoming~longwave__energy_flux':['DLWRF_surface','W m-2'], + 'land_surface_radiation~incoming~shortwave__energy_flux':['DSWRF_surface','W m-2'], + 'atmosphere_air_water~vapor__relative_saturation':['SPFH_2maboveground','kg kg-1'], + 'land_surface_air__pressure':['PRES_surface','Pa'], + 'land_surface_air__temperature':['TMP_2maboveground','degK'], + 'land_surface_wind__x_component_of_velocity':['UGRD_10maboveground','m s-1'], + 'land_surface_wind__y_component_of_velocity':['VGRD_10maboveground','m s-1'], + #-------------- STATIC Attributes ----------------------------- + 'basin__mean_of_elevation':['elev_mean','m'], + 'basin__mean_of_slope':['slope_mean','m km-1'], + } + + _static_attributes_list = ['elev_mean','slope_mean'] + + def __getattribute__(self, item): + """ + Customize instance attribute access. + + For those items that correspond to BMI input or output variables (which should be in numpy arrays) and have + values that are just a single-element array, deviate from the standard behavior and return the single array + element. Fall back to the default behavior in any other case. + + This supports having a BMI variable be backed by a numpy array, while also allowing the attribute to be used as + just a scalar, as it is in many places for this type. + + Parameters + ---------- + item + The name of the attribute item to get. + + Returns + ------- + The value of the named item. + """ + # Have these work explicitly (or else loops) + if item == '_input_var_names' or item == '_output_var_names': + return super(bmi_LSTM, self).__getattribute__(item) + + # By default, for things other than BMI variables, use normal behavior + if item not in super(bmi_LSTM, self).__getattribute__('_input_var_names') and item not in super(bmi_LSTM, self).__getattribute__('_output_var_names'): + return super(bmi_LSTM, self).__getattribute__(item) + + # Return the single scalar value from any ndarray of size 1 + value = super(bmi_LSTM, self).__getattribute__(item) + if isinstance(value, np.ndarray) and value.size == 1: + return value[0] + else: + return value + + def __setattr__(self, key, value): + """ + Customized instance attribute mutator functionality. + + For those attribute with keys indicating they are a BMI input or output variable (which should be in numpy + arrays), wrap any scalar ``value`` as a one-element numpy array and use that in a nested call to the superclass + implementation of this function. In any other cases, just pass the given ``key`` and ``value`` to a nested + call. + + This supports automatically having a BMI variable be backed by a numpy array, even if it is initialized using a + scalar, while otherwise maintaining standard behavior. + + Parameters + ---------- + key + value + + Returns + ------- + + """ + # Have these work explicitly (or else loops) + if key == '_input_var_names' or key == '_output_var_names': + super(bmi_LSTM, self).__setattr__(key, value) + + # Pass thru if value is already an array + if isinstance(value, np.ndarray): + super(bmi_LSTM, self).__setattr__(key, value) + # Override to put scalars into ndarray for BMI input/output variables + elif key in self._input_var_names or key in self._output_var_names: + super(bmi_LSTM, self).__setattr__(key, np.array([value])) + # By default, use normal behavior + else: + super(bmi_LSTM, self).__setattr__(key, value) + + #------------------------------------------------------------ + #------------------------------------------------------------ + # BMI: Model Control Functions + #------------------------------------------------------------ + #------------------------------------------------------------ + + #------------------------------------------------------------------- + def initialize( self, bmi_cfg_file=None ): + #NJF ensure this is a Path type so the follow open works as expected + #When used with NGen, the bmi_cfg_file is just a string... + + bmi_cfg_file = Path(bmi_cfg_file) + # ----- Create some lookup tables from the long variable names --------# + self._var_name_map_long_first = {long_name:self._var_name_units_map[long_name][0] for \ + long_name in self._var_name_units_map.keys()} + self._var_name_map_short_first = {self._var_name_units_map[long_name][0]:long_name for \ + long_name in self._var_name_units_map.keys()} + self._var_units_map = {long_name:self._var_name_units_map[long_name][1] for \ + long_name in self._var_name_units_map.keys()} + + # -------------- Initalize all the variables --------------------------# + # -------------- so that they'll be picked up with the get functions --# + for var_name in list(self._var_name_units_map.keys()): + # ---------- All the variables are single values ------------------# + # ---------- so just set to zero for now. ------------------# + self._values[var_name] = 0.0 + setattr( self, var_name, 0.0 ) + + # -------------- Read in the BMI configuration -------------------------# + # This will direct all the next moves. + if bmi_cfg_file is not None: + #---------------------------------------------------------- + # Note: bmi_cfg_file should have type 'str', vs. being a + # Path object. So apply Path in initialize(). (SDP) + #---------------------------------------------------------- + ### with bmi_cfg_file.open('r') as fp: # (orig) + with open(bmi_cfg_file,'r') as fp: # (SDP) + cfg = yaml.safe_load(fp) + self.cfg_bmi = self._parse_config(cfg) + else: + print("Error: No configuration provided, nothing to do...") + + # Number of inidividual ensemble members + self.N_ENS = len(self.cfg_bmi['train_cfg_file']) + + # Note: these need to be initialized here as scale_output() called in update() + self.lstm_output = {i_ens:0.0 for i_ens in range(self.N_ENS)} + self.streamflow_cms = {i_ens:0.0 for i_ens in range(self.N_ENS)} + self.streamflow_fms = {i_ens:0.0 for i_ens in range(self.N_ENS)} + self.surface_runoff_mm = {i_ens:0.0 for i_ens in range(self.N_ENS)} + + # Gather verbosity lvl from bmi-config for stdout printing, etc. + self.verbose = self.cfg_bmi['verbose'] + if self.verbose == 0: + print("Will not print anything except errors because verbosity set to", self.verbose) + if self.verbose == 1: + print("Will print warnings and errors because verbosity set to", self.verbose) + if self.verbose > 1: + print("Will print warnings, errors and random information because verbosity set to", self.verbose) + print("self.verbose", self.verbose) + + # ------------- Load in the configuration file for the specific LSTM --# + # This will include all the details about how the model was trained + # Inputs, outputs, hyper-parameters, scalers, weights, etc. etc. + self.get_training_configurations() + self.get_scaler_values() + + # ------------- Initialize an ENSEMBLE OF LSTM models ------------------------------# + self.lstm = {} + self.h_t = {} + self.c_t = {} + + for i_ens in range(self.N_ENS): + self.lstm[i_ens] = nextgen_cuda_lstm.Nextgen_CudaLSTM(input_size=self.input_size[i_ens], + hidden_layer_size=self.hidden_layer_size[i_ens], + output_size=self.output_size[i_ens], + batch_size=1, + seq_length=1) + + # ------------ Load in the trained weights ----------------------------# + # Save the default model weights. We need to make sure we have the same keys. + default_state_dict = self.lstm[i_ens].state_dict() + + # Trained model weights from Neuralhydrology. + if (USE_PATH): # (SDP) + + if self.verbose > 0: + print(self.cfg_train[i_ens]['run_dir']) + + trained_model_file = self.cfg_train[i_ens]['run_dir'] / 'model_epoch{}.pt'.format(str(self.cfg_train[i_ens]['epochs']).zfill(3)) + else: + str1 = self.cfg_train[i_ens]['run_dir'] + '/' + 'model_epoch{}.pt' + trained_model_file = str1.format(str(self.cfg_train[i_ens]['epochs']).zfill(3)) + + trained_state_dict = torch.load(trained_model_file, map_location=torch.device('cpu')) + + # Changing the name of the head weights, since different in NH + trained_state_dict['head.weight'] = trained_state_dict.pop('head.net.0.weight') + trained_state_dict['head.bias'] = trained_state_dict.pop('head.net.0.bias') + trained_state_dict = {x:trained_state_dict[x] for x in default_state_dict.keys()} + + # Load in the trained weights. + self.lstm[i_ens].load_state_dict(trained_state_dict) + + # ------------- Initialize the values for the input to the LSTM -----# + # jmframe(jan 27): If we assume all models have the same inputs, this only needs to happen once. + if i_ens == 0: + self.set_static_attributes() + self.initialize_forcings() + + if self.cfg_bmi['initial_state'] == 'zero': + self.h_t[i_ens] = torch.zeros(1, self.batch_size, self.hidden_layer_size[i_ens]).float() + self.c_t[i_ens] = torch.zeros(1, self.batch_size, self.hidden_layer_size[i_ens]).float() + + # ------------- Start a simulation time -----------------------------# + # jmframe: Since the simulation time here doesn't really matter. + # Just use seconds and set the time to zero + # But add some logic maybe, so to be able to start at some time + self.t = self._start_time + + # ----------- The output is area normalized, this is needed to un-normalize it + # mm->m km2 -> m2 hour->s + self.output_factor_cms = (1/1000) * (self.cfg_bmi['area_sqkm'] * 1000*1000) * (1/3600) + + #------------------------------------------------------------ + def update(self): + with torch.no_grad(): + + self.create_scaled_input_tensor() + + for i_ens in range(self.N_ENS): + + self.lstm_output[i_ens], self.h_t[i_ens], self.c_t[i_ens] = self.lstm[i_ens].forward(self.input_tensor[i_ens], self.h_t[i_ens], self.c_t[i_ens]) + + self.scale_output(i_ens) + + self.ensemble_output() + + #self.t += self._time_step_size + self.t += self.get_time_step() + + #------------------------------------------------------------ + def update_frac(self, time_frac): + """Update model by a fraction of a time step. + Parameters + ---------- + time_frac : float + Fraction fo a time step. + """ + if self.verbose > 0: + print("Warning: This version of the LSTM is designed to make predictions on one hour timesteps.") + time_step = self.get_time_step() + self._time_step_size = time_frac * self._time_step_size + self.update() + self._time_step_size = time_step + + #------------------------------------------------------------ + def update_until(self, then): + """Update model until a particular time. + Parameters + ---------- + then : float + Time to run model until. + """ + if self.verbose > 0: + print("then", then) + print("self.get_current_time()", self.get_current_time()) + print("self.get_time_step()", self.get_time_step()) + n_steps = (then - self.get_current_time()) / self.get_time_step() + + for _ in range(int(n_steps)): + self.update() + self.update_frac(n_steps - int(n_steps)) + + #------------------------------------------------------------ + def finalize( self ): + """Finalize model.""" + self._model = None + + #------------------------------------------------------------ + #------------------------------------------------------------ + # LSTM: SETUP Functions + #------------------------------------------------------------ + #------------------------------------------------------------ + + #------------------------------------------------------------------- + def get_training_configurations(self): + + self.cfg_train = {} + self.input_size = {} + self.hidden_layer_size = {} + self.output_size = {} + self.all_lstm_inputs = {} + self.train_data_scaler = {} + + for i_ens in range(self.N_ENS): + + if self.cfg_bmi['train_cfg_file'][i_ens] is not None: + if (USE_PATH): # (SDP) + with self.cfg_bmi['train_cfg_file'][i_ens].open('r') as fp: + cfg = yaml.safe_load(fp) + self.cfg_train[i_ens] = self._parse_config(cfg) + else: + with open(self.cfg_bmi['train_cfg_file'][i_ens],'r') as fp: # (SDP) + cfg = yaml.safe_load(fp) + self.cfg_train[i_ens] = self._parse_config(cfg) + + # Including a list of the model input names. + if self.verbose > 0: + print("Setting the LSTM arcitecture based on the last run ensemble configuration") + print(self.cfg_train[i_ens]) + # Collect the LSTM model architecture details from the configuration file + self.input_size[i_ens] = len(self.cfg_train[i_ens]['dynamic_inputs']) + len(self.cfg_train[i_ens]['static_attributes']) + self.hidden_layer_size[i_ens] = self.cfg_train[i_ens]['hidden_size'] + self.output_size[i_ens] = len(self.cfg_train[i_ens]['target_variables']) + + self.all_lstm_inputs[i_ens] = [] + self.all_lstm_inputs[i_ens].extend(self.cfg_train[i_ens]['dynamic_inputs']) + self.all_lstm_inputs[i_ens].extend(self.cfg_train[i_ens]['static_attributes']) + + # WARNING: This implimentation of the LSTM can only handle a batch size of 1 + # No need to included different batch sizes + self.batch_size = 1 + + scaler_file = os.path.join(self.cfg_train[i_ens]['run_dir'], 'train_data', 'train_data_scaler.yml') + + with open(scaler_file, 'r') as f: + scaler_data = yaml.safe_load(f) + + self.train_data_scaler[i_ens] = scaler_data + + # Scaler data from the training set. This is used to normalize the data (input and output). + if self.verbose > 1: + print(f"ensemble member {i_ens}") + print(self.cfg_train[i_ens]['run_dir']) + print(self.cfg_train[i_ens]['run_dir']) + + #------------------------------------------------------------ + def get_scaler_values(self): + + """Mean and standard deviation for the inputs and LSTM outputs""" + + self.input_mean = {} + self.input_std = {} + self.out_mean = {} + self.out_std = {} + + for i_ens in range(self.N_ENS): + + self.out_mean[i_ens] = self.train_data_scaler[i_ens]['xarray_feature_center']['data_vars'][self.cfg_train[i_ens]['target_variables'][0]]['data'] + self.out_std[i_ens] = self.train_data_scaler[i_ens]['xarray_feature_scale']['data_vars'][self.cfg_train[i_ens]['target_variables'][0]]['data'] + + self.input_mean[i_ens] = [] + self.input_mean[i_ens].extend([self.train_data_scaler[i_ens]['xarray_feature_center']['data_vars'][x]['data'] for x in self.cfg_train[i_ens]['dynamic_inputs']]) + self.input_mean[i_ens].extend([self.train_data_scaler[i_ens]['attribute_means'][x] for x in self.cfg_train[i_ens]['static_attributes']]) + self.input_mean[i_ens] = np.array(self.input_mean[i_ens]) + + self.input_std[i_ens] = [] + self.input_std[i_ens].extend([self.train_data_scaler[i_ens]['xarray_feature_scale']['data_vars'][x]['data'] for x in self.cfg_train[i_ens]['dynamic_inputs']]) + self.input_std[i_ens].extend([self.train_data_scaler[i_ens]['attribute_stds'][x] for x in self.cfg_train[i_ens]['static_attributes']]) + self.input_std[i_ens] = np.array(self.input_std[i_ens]) + if self.verbose > 1: + print('###########################') + print('input_mean') + print(self.input_mean[i_ens]) + print('input_std') + print(self.input_std[i_ens]) + print('out_mean') + print(self.out_mean[i_ens]) + print('out_std') + print(self.out_std[i_ens]) + + #------------------------------------------------------------ + def create_scaled_input_tensor(self): + + self.input_list = {} + self.input_array = {} + self.input_array_scaled = {} + self.input_tensor = {} + + #------------------------------------------------------------ + # Note: A BMI-enabled model should not use long var names + # internally (i.e. saved into self); it should just + # use convenient short names. For the BMI functions + # that require a long var name, it should be mapped + # to the model's short name before taking action. + #------------------------------------------------------------ + # TODO: Choose to store values in dictionary or not. + + #-------------------------------------------------------------- + # Note: The code in this block is more verbose, but makes it + # much easier to test and debug and helped find a bug + # in the lines above (long vs. short names.) + #-------------------------------------------------------------- + for i_ens in range(self.N_ENS): + if self.verbose > 1: + print('Creating scaled input tensor...') + n_inputs = len(self.all_lstm_inputs[i_ens]) + self.input_list[i_ens] = [] ############# + DEBUG = False + for k in range(n_inputs): + short_name = self.all_lstm_inputs[i_ens][k] + long_name = self._var_name_map_short_first[ short_name ] + # vals = self.get_value( self, long_name ) + vals = getattr( self, short_name ) #################### + + self.input_list[i_ens].append( vals ) + if self.verbose > 1: + print(' short_name =', short_name ) + print(' long_name =', long_name ) + array = getattr( self, short_name ) + ## array = self.get_value( long_name ) + print(' type =', type(vals) ) + print(' vals =', vals ) + + #-------------------------------------------------------- + # W/o setting dtype here, it was "object_", and crashed + #-------------------------------------------------------- + ## self.input_array = np.array( self.input_list ) + self.input_array[i_ens] = np.array( self.input_list[i_ens], dtype='float64' ) # SDP + if self.verbose > 0: + print('Normalizing the tensor...') + print(' input_mean =', self.input_mean[i_ens] ) + print(' input_std =', self.input_std[i_ens] ) + print() + # Center and scale the input values for use in torch + self.input_array_scaled[i_ens] = (self.input_array[i_ens] - self.input_mean[i_ens]) / self.input_std[i_ens] + if self.verbose > 1: + print('### input_list =', self.input_list[i_ens]) + print('### input_array =', self.input_array[i_ens]) + print('### dtype(input_array) =', self.input_array[i_ens].dtype ) + print('### type(input_array_scaled) =', type(self.input_array_scaled[i_ens])) + print('### dtype(input_array_scaled) =', self.input_array_scaled.dtype[i_ens] ) + print() + self.input_tensor[i_ens] = torch.tensor(self.input_array_scaled[i_ens]) + + #------------------------------------------------------------ + def scale_output(self, i_ens): + + if self.verbose > 1: + print("model output:", self.lstm_output[i_ens][0,0,0].numpy().tolist()) + + if self.cfg_train[i_ens]['target_variables'][0] in ['qobs_mm_per_hour', 'QObs(mm/hr)', 'QObs(mm/h)']: + self.surface_runoff_mm[i_ens] = (self.lstm_output[i_ens][0,0,0].numpy().tolist() * self.out_std[i_ens] + self.out_mean[i_ens]) + + elif self.cfg_train[i_ens]['target_variables'][0] in ['QObs(mm/d)']: + self.surface_runoff_mm[i_ens] = (self.lstm_output[i_ens][0,0,0].numpy().tolist() * self.out_std[i_ens] + self.out_mean[i_ens]) * (1/24) + + self.surface_runoff_mm[i_ens] = max(self.surface_runoff_mm[i_ens],0.0) + + setattr(self, 'land_surface_water__runoff_depth', self.surface_runoff_mm[i_ens]/1000.0) + self.streamflow_cms[i_ens] = self.surface_runoff_mm[i_ens] * self.output_factor_cms + + if self.verbose > 1: + print("streamflow:", self.streamflow_cms[i_ens]) + + + #------------------------------------------------------------------- + def ensemble_output(self): + # Calculate mean surface runoff (mm) across ensemble members + ens_mean_surface_runoff_mm = np.mean([self.surface_runoff_mm[i_ens] for i_ens in range(self.N_ENS)]) + + # Set the land_surface_water__runoff_depth attribute (convert mm to m) + setattr(self, 'land_surface_water__runoff_depth', ens_mean_surface_runoff_mm / 1000.0) + + # Calculate mean streamflow (cms) across ensemble members + ens_mean_streamflow_cms = np.mean([self.streamflow_cms[i_ens] for i_ens in range(self.N_ENS)]) + + # Set the land_surface_water__runoff_volume_flux attribute + setattr(self, 'land_surface_water__runoff_volume_flux', ens_mean_streamflow_cms) + + #---------------------------------------------------------------------------- + def set_static_attributes(self): + """ Get the static attributes from the configuration file + """ + i_ens = 0 + for attribute in self._static_attributes_list: + if attribute in self.cfg_train[i_ens]['static_attributes']: + #------------------------------------------------------------ + # Note: A BMI-enabled model should not use long var names + # internally (i.e. saved into self); it should just + # use convenient short names. For the BMI functions + # that require a long var name, it should be mapped + # to the model's short name before taking action. + #------------------------------------------------------------ + setattr(self, attribute, self.cfg_bmi[attribute]) # SDP + + #---------------------------------------------------------------------------- + def initialize_forcings(self): + + if self.verbose > 0: + print('Initializing all forcings to 0...') + i_ens = 0 + + for forcing_name in self.cfg_train[i_ens]['dynamic_inputs']: + if self.verbose > 1: + print(' forcing_name =', forcing_name) + #------------------------------------------------------------ + # Note: A BMI-enabled model should not use long var names + # internally (i.e. saved into self); it should just + # use convenient short names. For the BMI functions + # that require a long var name, it should be mapped + # to the model's short name before taking action. + #------------------------------------------------------------ + setattr(self, forcing_name, 0) + + #------------------------------------------------------------------- + #------------------------------------------------------------------- + # BMI: Model Information Functions + #------------------------------------------------------------------- + #------------------------------------------------------------------- + + # Note: not currently using _att_map{} + # def get_attribute(self, att_name): + + # try: + # return self._att_map[ att_name.lower() ] + # except: + # print(' ERROR: Could not find attribute: ' + att_name) + + #-------------------------------------------------------- + # Note: These are currently variables needed from other + # components vs. those read from files or GUI. + #-------------------------------------------------------- + def get_input_var_names(self): + + return self._input_var_names + + def get_output_var_names(self): + + return self._output_var_names + + #------------------------------------------------------------ + def get_component_name(self): + """Name of the component.""" + #return self.get_attribute( 'model_name' ) + return self._name + + #------------------------------------------------------------ + def get_input_item_count(self): + """Get names of input variables.""" + return len(self._input_var_names) + + #------------------------------------------------------------ + def get_output_item_count(self): + """Get names of output variables.""" + return len(self._output_var_names) + + #------------------------------------------------------------ + def get_value(self, var_name: str, dest: np.ndarray) -> np.ndarray: + """ + Copy values for the named variable into the provided destination array. + + Parameters + ---------- + var_name : str + Name of variable as CSDMS Standard Name. + dest : np.ndarray + A numpy array into which to copy the variable values. + Returns + ------- + np.ndarray + Copy of values. + """ + dest[:] = self.get_value_ptr(var_name) + + if self.verbose > 1: + print("self.verbose", self.verbose) + print("get value dest", dest) + + return dest + + #------------------------------------------------------------------- + def get_value_ptr(self, var_name: str) -> np.ndarray: + """ + Get reference to values. + + Get the backing reference - i.e., the backing numpy array - for the given variable. + + Parameters + ---------- + var_name : str + Name of variable as CSDMS Standard Name. + Returns + ------- + np.ndarray + Value array. + """ + # We actually need this function to return the backing array, so bypass override of __getattribute__ (that + # extracts scalar) and use the base implementation + return super(bmi_LSTM, self).__getattribute__(var_name) + + #------------------------------------------------------------------- + #------------------------------------------------------------------- + # BMI: Variable Information Functions + #------------------------------------------------------------------- + #------------------------------------------------------------------- + def get_var_name(self, long_var_name): + + return self._var_name_map_long_first[ long_var_name ] + + #------------------------------------------------------------------- + def get_var_units(self, long_var_name): + + return self._var_units_map[ long_var_name ] + + #------------------------------------------------------------------- + def get_var_type(self, long_var_name): + """Data type of variable. + + Parameters + ---------- + var_name : str + Name of variable as CSDMS Standard Name. + + Returns + ------- + str + Data type. + """ + + #JG MW 03.01.23 - otherwise Bmi_py_Adaptor.hpp `get_analogous_cxx_type` fails + return self.get_value_ptr(long_var_name).dtype.name + #------------------------------------------------------------ + def get_var_grid(self, name): + + # Note: all vars have grid 0 but check if its in names list first + if name in (self._output_var_names + self._input_var_names): + return self._var_grid_id + + #------------------------------------------------------------ + def get_var_itemsize(self, name): + # JG get_value_ptr is already an np.array + return self.get_value_ptr(name).itemsize + + #------------------------------------------------------------ + def get_var_location(self, name): + + # Note: all vars have location node but check if its in names list first + if name in (self._output_var_names + self._input_var_names): + return self._var_loc + + #------------------------------------------------------------------- + # JG Note: what is this used for? + def get_var_rank(self, long_var_name): + + return np.int16(0) + + #------------------------------------------------------------------- + def get_start_time( self ): + + return self._start_time + + #------------------------------------------------------------------- + def get_end_time( self ): + + return self._end_time + + + #------------------------------------------------------------------- + def get_current_time( self ): + + return self.t + + #------------------------------------------------------------------- + def get_time_step( self ): + + return self._time_step_size + + #------------------------------------------------------------------- + def get_time_units( self ): + + # Note: get_attribute() is not a BMI v2 method + return self._time_units + + #------------------------------------------------------------------- + def set_value(self, var_name: str, values:np.ndarray): + """Set model values. + + Parameters + ---------- + var_name : str + Name of variable as CSDMS Standard Name. + src : array_like + Array of new values. + """ + + internal_array = self.get_value_ptr(var_name) + internal_array[:] = values + + short_name = self._var_name_map_long_first[ var_name ] + + if (internal_array.ndim > 0): + setattr( self, short_name, internal_array[0]) + else: + setattr( self, short_name, internal_array ) + + try: + #NJF From NGEN, `internal_array` is a singleton array + setattr( self, var_name, internal_array[0] ) + + # jmframe: this next line is basically a duplicate. + # I guess we should stick with the attribute names instead of a dictionary approach. + self._values[var_name] = internal_array[0] + # JLG 03242022: this isn't really an "error" block as standalone considers value as scalar? + except TypeError: + setattr( self, var_name, internal_array ) + + # jmframe: this next line is basically a duplicate. + # I guess we should stick with the attribute names instead of a dictionary approach. + self._values[var_name] = internal_array + + #------------------------------------------------------------ + def set_value_at_indices(self, var_name: str, inds: np.ndarray, src: np.ndarray): + """ + Set model values at particular indices. + + Parameters + ---------- + var_name : str + Name of variable as CSDMS Standard Name. + inds : np.ndarray + Array of corresponding indices into which to copy the values within ``src``. + src : np.ndarray + Array of new values. + """ + internal_array = self.get_value_ptr(var_name) + for i in range(inds.shape[0]): + internal_array[inds[i]] = src[i] + + #------------------------------------------------------------ + def get_var_nbytes(self, var_name): + """Get units of variable. + Parameters + ---------- + var_name : str + Name of variable as CSDMS Standard Name. + Returns + ------- + int + Size of data array in bytes. + """ + return self.get_var_itemsize(var_name)*len(self.get_value_ptr(var_name)) + + #------------------------------------------------------------ + def get_value_at_indices(self, var_name: str, dest:np.ndarray, indices:np.ndarray) -> np.ndarray: + """Get values at particular indices. + Parameters + ---------- + var_name : str + Name of variable as CSDMS Standard Name. + dest : ndarray + A numpy array into which to place the values. + indices : array_like + Array of indices. + Returns + ------- + array_like + Values at indices. + """ + #NJF This must copy into dest!!! + #Convert to np.array in case of singleton/non numpy type, then flatten + original: np.ndarray = self.get_value_ptr(var_name) + for i in range(indices.shape[0]): + value_index = indices[i] + dest[i] = original[value_index] + return dest + + # Note: remaining grid funcs do not apply for type 'scalar' + # Yet all functions in the BMI must be implemented + # See https://bmi.readthedocs.io/en/latest/bmi.best_practices.html + #------------------------------------------------------------ + def get_grid_edge_count(self, grid): + raise NotImplementedError("get_grid_edge_count") + + #------------------------------------------------------------ + def get_grid_edge_nodes(self, grid, edge_nodes): + raise NotImplementedError("get_grid_edge_nodes") + + #------------------------------------------------------------ + def get_grid_face_count(self, grid): + raise NotImplementedError("get_grid_face_count") + + #------------------------------------------------------------ + def get_grid_face_edges(self, grid, face_edges): + raise NotImplementedError("get_grid_face_edges") + + #------------------------------------------------------------ + def get_grid_face_nodes(self, grid, face_nodes): + raise NotImplementedError("get_grid_face_nodes") + + #------------------------------------------------------------ + def get_grid_node_count(self, grid): + raise NotImplementedError("get_grid_node_count") + + #------------------------------------------------------------ + def get_grid_nodes_per_face(self, grid, nodes_per_face): + raise NotImplementedError("get_grid_nodes_per_face") + + #------------------------------------------------------------ + def get_grid_origin(self, grid_id, origin): + raise NotImplementedError("get_grid_origin") + + #------------------------------------------------------------ + def get_grid_rank(self, grid_id): + + # 0 is the only id we have + if grid_id == 0: + return 1 + + #------------------------------------------------------------ + def get_grid_shape(self, grid_id, shape): + raise NotImplementedError("get_grid_shape") + + #------------------------------------------------------------ + def get_grid_size(self, grid_id): + + # 0 is the only id we have + if grid_id == 0: + return 1 + + #------------------------------------------------------------ + def get_grid_spacing(self, grid_id, spacing): + raise NotImplementedError("get_grid_spacing") + + #------------------------------------------------------------ + def get_grid_type(self, grid_id=0): + + # 0 is the only id we have + if grid_id == 0: + return 'scalar' + + #------------------------------------------------------------ + def get_grid_x(self): + raise NotImplementedError("get_grid_x") + + #------------------------------------------------------------ + def get_grid_y(self): + raise NotImplementedError("get_grid_y") + + #------------------------------------------------------------ + def get_grid_z(self): + raise NotImplementedError("get_grid_z") + + + #------------------------------------------------------------ + #------------------------------------------------------------ + #-- Random utility functions + #------------------------------------------------------------ + #------------------------------------------------------------ + + def _parse_config(self, cfg): + for key, val in cfg.items(): + # Handle 'train_cfg_file' specifically to ensure it is always a list + if key == 'train_cfg_file': + if val is not None and val != "None": + if isinstance(val, list): + cfg[key] = [Path(element) if USE_PATH else element for element in val] + else: + cfg[key] = [Path(val)] if USE_PATH else [val] + else: + cfg[key] = [] + + # Convert all path strings to PosixPath objects for other keys + elif any([key.endswith(x) for x in ['_dir', '_path', '_file', '_files']]): + if val is not None and val != "None": + if isinstance(val, list): + temp_list = [] + for element in val: + if USE_PATH: + temp_list.append(Path(element)) + else: + temp_list.append(element) # (SDP) + cfg[key] = temp_list + else: + if USE_PATH: + cfg[key] = Path(val) + else: + cfg[key] = val # (SDP) + else: + cfg[key] = None + + # Convert Dates to pandas Datetime indexs + elif key.endswith('_date'): + if isinstance(val, list): + temp_list = [] + for elem in val: + temp_list.append(pd.to_datetime(elem, format='%d/%m/%Y')) + cfg[key] = temp_list + else: + cfg[key] = pd.to_datetime(val, format='%d/%m/%Y') + + else: + pass + + # Add more config parsing if necessary + return cfg + + + +def coerce_config(cfg: dict[str, typing.Any]): + for key, val in cfg.items(): + # Handle 'train_cfg_file' specifically to ensure it is always a list + if key == "train_cfg_file": + if val is not None and val != "None": + if isinstance(val, list): + cfg[key] = [Path(element) for element in val] + else: + cfg[key] = [Path(val)] + else: + cfg[key] = [] + + # Convert all path strings to PosixPath objects for other keys + elif any([key.endswith(x) for x in ["_dir", "_path", "_file", "_files"]]): + if val is not None and val != "None": + if isinstance(val, list): + temp_list = [] + for element in val: + temp_list.append(Path(element)) + cfg[key] = temp_list + else: + cfg[key] = Path(val) + else: + cfg[key] = None + + # Convert Dates to pandas Datetime indexs + elif key.endswith("_date"): + if isinstance(val, list): + temp_list = [] + for elem in val: + temp_list.append(pd.to_datetime(elem, format="%d/%m/%Y")) + cfg[key] = temp_list + else: + cfg[key] = pd.to_datetime(val, format="%d/%m/%Y") + + + + diff --git a/src/dHBV_2_0/hbv_2_0.py b/src/dHBV_2_0/hbv_2_0.py index d8e3d28..54fb267 100644 --- a/src/dHBV_2_0/hbv_2_0.py +++ b/src/dHBV_2_0/hbv_2_0.py @@ -1,7 +1,6 @@ -from typing import Any, Dict, Optional, Tuple, Union +from typing import Any, Optional, Union import torch - from hydroDL2.core.calc import change_param_range from hydroDL2.core.calc.uh_routing import UH_conv, UH_gamma @@ -35,7 +34,7 @@ class HBVUnitBasin(torch.nn.Module): """ def __init__( self, - config: Optional[Dict[str, Any]] = None, + config: Optional[dict[str, Any]] = None, device: Optional[torch.device] = None ) -> None: super().__init__() @@ -96,12 +95,12 @@ def __init__( def set_parameters(self) -> None: """Get physical parameters.""" self.phy_param_names = self.parameter_bounds.keys() - if self.routing == True: + if self.routing: self.routing_param_names = self.routing_parameter_bounds.keys() else: self.routing_param_names = [] - self.learnable_param_count1 = len(self.dynamic_params) * self.nmul + self.learnable_param_count1 = len(self.dynamic_params) * self.nmul self.learnable_param_count2 = (len(self.phy_param_names) - len(self.dynamic_params)) * self.nmul \ + len(self.routing_param_names) self.learnable_param_count = self.learnable_param_count1 + self.learnable_param_count2 @@ -109,7 +108,7 @@ def set_parameters(self) -> None: def unpack_parameters( self, parameters: torch.Tensor, - ) -> Dict[str, torch.Tensor]: + ) -> dict[str, torch.Tensor]: """Extract physical model and routing parameters from NN output. Parameters @@ -144,7 +143,7 @@ def unpack_parameters( # Routing parameters routing_params = None - if self.routing == True: + if self.routing: routing_params = parameters[1][:, dif_count * self.nmul:] return phy_dy_params, phy_static_params, routing_params @@ -179,7 +178,7 @@ def descale_phy_dy_parameters( staPar = phy_dy_params[-1, :, i,:].unsqueeze(0).repeat([n_steps, 1, 1]) dynPar = phy_dy_params[:, :, i,:] - drmask = torch.bernoulli(pmat).detach_().cuda() + drmask = torch.bernoulli(pmat).detach_().cuda() comPar = dynPar * (1 - drmask) + staPar * drmask param_dict[name] = change_param_range( param=comPar, @@ -242,9 +241,9 @@ def descale_rout_parameters( def forward( self, - x_dict: Dict[str, torch.Tensor], + x_dict: dict[str, torch.Tensor], parameters: torch.Tensor - ) -> Union[Tuple, Dict[str, torch.Tensor]]: + ) -> Union[tuple, dict[str, torch.Tensor]]: """Forward pass for HBV1.1p. Parameters @@ -302,7 +301,7 @@ def forward( ) # Run the model for the remainder of simulation period. - return self.PBM( + return self.PBM( x, Ac, Elevation, @@ -314,12 +313,12 @@ def forward( def PBM( self, forcing: torch.Tensor, - Ac:torch.Tensor, - Elevation:torch.Tensor, - states: Tuple, - phy_dy_params_dict: Dict, - phy_static_params_dict: Dict - ) -> Union[Tuple, Dict[str, torch.Tensor]]: + Ac: torch.Tensor, + Elevation: torch.Tensor, + states: tuple, + phy_dy_params_dict: dict, + phy_static_params_dict: dict + ) -> Union[tuple, dict[str, torch.Tensor]]: """Run the HBV1.1p model forward. Parameters @@ -459,7 +458,7 @@ def PBM( tosoil_sim[t, :, :] = tosoil PERC_sim[t, :, :] = PERC - # Get the overall average + # Get the overall average # or weighted average using learned weights. if self.muwts is None: Qsimavg = Qsimmu.mean(-1) @@ -493,7 +492,7 @@ def PBM( rf_Q2 = Q2_sim.mean(-1, keepdim=True).permute([1, 2, 0]) Q2_rout = UH_conv(rf_Q2, UH).permute([2, 0, 1]) - if self.comprout: + if self.comprout: # Qs is now shape [time, [gages*num models], vars] Qstemp = Qsrout.view(n_steps, n_grid, self.nmul) if self.muwts is None: diff --git a/src/dHBV_2_0/run_bmi_aorc.py b/src/dHBV_2_0/run_bmi_aorc.py index 567657e..c9211b5 100644 --- a/src/dHBV_2_0/run_bmi_aorc.py +++ b/src/dHBV_2_0/run_bmi_aorc.py @@ -1,105 +1,83 @@ -## TODO: needs to be updated to latest dHBV 2.0 dMG implementation. - import numpy as np -from pathlib import Path -import bmi_dm # Load module bmi_dm (bmi_dm.py) from dhbv_2_0 package. -import os, os.path -lstm_dir = os.path.expanduser('../dhbv_2_0/') -os.chdir( lstm_dir ) -import pandas as pd +from dHBV_2_0.src.dHBV_2_0.bmi import DeltaModelBmi as Bmi -basin_id = "cat-88306" +### Select a basin from the sample data ### +basin_id = "cat-88306" +bmi_config_path = f'C:/Users/LeoLo/Desktop/noaa_owp/dHBV_2_0/bmi_config_files/bmi_config_{basin_id}_5yr.yaml' +### ----------------------------------- ### -# Load the USGS data -# REPLACE THIS PATH WITH YOUR LOCAL FILE PATH: -file_path = f"/Users/jmframe/CAMELS_data_sample/hourly/usgs-streamflow/{basin_id}-usgs-hourly.csv" -df_runoff = pd.read_csv(file_path) -df_runoff = df_runoff.set_index("date") -df_runoff.index = pd.to_datetime(df_runoff.index) -df_runoff = df_runoff[["QObs(mm/h)"]].rename(columns={"QObs(mm/h)": "usgs_obs"}) -df_runoff["model_pred"] = None +# Load the USGS data # REPLACE THIS PATH WITH YOUR LOCAL FILE PATH: -forcing_file_path = f"/Users/jmframe/CAMELS_data_sample/hourly/aorc_hourly/{basin_id}_1980_to_2024_agg_rounded.csv" -df_forcing = pd.read_csv(forcing_file_path) -df_forcing = df_forcing.set_index("time") -df_forcing.index = pd.to_datetime(df_forcing.index) -df_forcing = df_forcing[df_runoff.index[0]:df_runoff.index[-1]] - -# Create an instance of the LSTM model with BMI -model_instance = bmi_lstm.bmi_LSTM() - -# Initialize the model with a configuration file -model_instance.initialize(bmi_cfg_file=Path(f'../bmi_config_files/{basin_id}_nh_AORC_hourly_ensemble.yml')) - -# Add ensemble columns to the runoff DataFrame -for i_ens in range(model_instance.N_ENS): - df_runoff[f"ensemble_{i_ens+1}"] = None # Initialize ensemble columns with None - - -# Iterate through the forcing DataFrame and calculate model predictions -print('Working, please wait...') -for i, (idx, row) in enumerate(df_forcing.iterrows()): - # Extract forcing data for the current timestep - precip = row["APCP_surface"] - temp = row["TMP_2maboveground"] - dlwrf = row["DLWRF_surface"] - dswrf = row["DSWRF_surface"] - pres = row["PRES_surface"] - spfh = row["SPFH_2maboveground"] - ugrd = row["UGRD_10maboveground"] - vgrd = row["VGRD_10maboveground"] - - # Check if any of the inputs are NaN - if np.isnan([precip, temp, dlwrf, dswrf, pres, spfh, ugrd, vgrd]).any(): - if model_instance.verbose > 0: - print(f"Skipping timestep {idx} due to NaN values in inputs.") +forc_path = f'C:/Users/LeoLo/Desktop/noaa_owp/dHBV_2_0/data/aorc/juniata_river_basin/forcings_5yr_{basin_id}.npy' +attr_path = f'C:/Users/LeoLo/Desktop/noaa_owp/dHBV_2_0/data/aorc/juniata_river_basin/attributes_5yr_{basin_id}.npy' +# obs_path = f'/Users/LeoLo/Desktop/noaa_owp/dHBV_2_0/data/aorc/juniata_river_basin/obs_5yr_{basin_id}.npy' + +forc = np.load(forc_path) +attr = np.load(attr_path) +# obs = np.load(obs_path) + +# Create an instance of the dHBV 2.0 through BMI +model = Bmi(config_path=bmi_config_path) + +streamflow_pred = np.zeros(forc.shape[0]) +nan_idx = [] + +# 1) Compile forcing data within BMI to do batch run. +for i in range(0, forc.shape[0]): + # Extract forcing/attribute data for the current time step + prcp = forc[i, :, 0] + temp = forc[i, :, 1] + pet = forc[i, :, 2] + + ## Check if any of the inputs are NaN + if np.isnan([prcp, temp, pet]).any(): + # if model.verbose > 0: + print(f"Skipping timestep {i} due to NaN values in inputs.") + nan_idx.append(i) continue - # Set the model forcings - model_instance.set_value('atmosphere_water__liquid_equivalent_precipitation_rate', precip) - model_instance.set_value('land_surface_air__temperature', temp) - model_instance.set_value('land_surface_radiation~incoming~longwave__energy_flux', dlwrf) - model_instance.set_value('land_surface_radiation~incoming~shortwave__energy_flux', dswrf) - model_instance.set_value('land_surface_air__pressure', pres) - model_instance.set_value('atmosphere_air_water~vapor__relative_saturation', spfh) - model_instance.set_value('land_surface_wind__x_component_of_velocity', ugrd) - model_instance.set_value('land_surface_wind__y_component_of_velocity', vgrd) - - # Update the model - model_instance.update() - - # Retrieve and scale the runoff output - dest_array = np.zeros(1) - model_instance.get_value('land_surface_water__runoff_depth', dest_array) - land_surface_water__runoff_depth = dest_array[0] * 1000 # Convert to mm/hr + model.set_value('atmosphere_water__liquid_equivalent_precipitation_rate', prcp) + model.set_value('land_surface_air__temperature', temp) + model.set_value('land_surface_water__potential_evaporation_volume_flux', pet) - # Add ensemble member values to the DataFrame - for i_ens in range(model_instance.N_ENS): - df_runoff.loc[idx, f"ensemble_{i_ens+1}"] = model_instance.surface_runoff_mm[i_ens] # Add individual ensemble member values +### BMI initialization ### +model.initialize() - # Add the output to the DataFrame - df_runoff.loc[idx, "model_pred"] = land_surface_water__runoff_depth - - if i > 10000: - break - - -# Ensure "model_pred" is numeric -df_runoff["model_pred"] = pd.to_numeric(df_runoff["model_pred"], errors="coerce") - -# Calculate NSE for the model predictions -obs = df_runoff["usgs_obs"].dropna() -sim = df_runoff["model_pred"].dropna() +# 2) DO pseudo model forward and return pre-predicted values at each timestep +for i in range(0, forc.shape[0]): + if i in nan_idx: + # Skip the update for this timestep + continue -# Align indices of observation and simulation for metric calculation -common_index = obs.index.intersection(sim.index) -obs = obs.loc[common_index].values -sim = sim.loc[common_index].values + ### BMI update ### + model.update() -denominator = ((obs - obs.mean()) ** 2).sum() -numerator = ((sim - obs) ** 2).sum() -nse = 1 - numerator / denominator -print(f"NSE: {nse:.2f}") \ No newline at end of file + # Retrieve and scale the runoff output + dest_array = np.zeros(1) + model.get_value('land_surface_water__runoff_volume_flux', dest_array) + + streamflow_pred[i] = dest_array[0] # Convert to mm/day -> mm/hr + + ### BMI finalization ### +model.finalize() + +print("\n=/= -- Streamflow prediction completed -- =/=") +print(f" Basin ID: {basin_id}") +print(f" Total Process Time: {model.bmi_process_time:.4f} seconds") +print(f" Mean streamflow: {streamflow_pred.mean():.4f} mm/day") +print(f" Max streamflow: {streamflow_pred.max():.4f} mm/day") +print(f" Min streamflow: {streamflow_pred.min():.4f} mm/day") +print("=/= ------------------------------------ =/=") + + +# # Calculate NSE for the model predictions +# obs = obs.dropna() +# sim = streamflow_pred.dropna() + +# denom = ((obs - obs.mean()) ** 2).sum() +# num = ((sim - obs) ** 2).sum() +# nse = 1 - num / denom +# print(f"NSE: {nse:.2f}") diff --git a/src/dHBV_2_0/run _bmi_unit_test.py b/src/dHBV_2_0/run_bmi_unit_test.py similarity index 99% rename from src/dHBV_2_0/run _bmi_unit_test.py rename to src/dHBV_2_0/run_bmi_unit_test.py index 9b4e5b3..515c609 100644 --- a/src/dHBV_2_0/run _bmi_unit_test.py +++ b/src/dHBV_2_0/run_bmi_unit_test.py @@ -6,11 +6,11 @@ """ import os import sys -import numpy as np from pathlib import Path -from pathlib import Path -import bmi_dm +import numpy as np + +import dHBV_2_0.src.dHBV_2_0.bmi as bmi # setup a "success counter" for number of passing and failing bmi functions # keep track of function def fails (vs function call) @@ -46,7 +46,7 @@ def bmi_except(fstring): print("No configuration file found, exiting...") sys.exit() -bmi=bmi_dm.BmiDm(cfg_file) +bmi=bmi.BmiDm(cfg_file) #------------------------------------------------------------------- # initialize() @@ -385,11 +385,11 @@ def bmi_except(fstring): bmi.finalize() print (" finalizing...") pass_count += 1 -except: +except : bmi_except('finalize()') # lastly - print test summary print ("\n Total BMI function PASS: " + str(pass_count)) print (" Total BMI function FAIL: " + str(fail_count)) for ff in fail_list: - print (" " + ff) + print (" " + ff)