RHESSys (Regional Hydro-Ecological Simulation System) is a GIS-based, terrestrial ecohydrologic modeling framework designed to simulate carbon, water and nutrient fluxes at the watershed scale. RHESSys models the temporal and spatial variability of ecosystem processes and interactions at a daily time step over multiple years by combining a set of physically based process models and a methodology for partitioning and parameterizing the landscape. Detailed model algorithms are available in Tague and Band (2004)8%3C1:RRHSSO%3E2.0.CO;2).
This notebook demonstrates how to configure an ensemble RHESSys simulation with pyRHESSys, submit it to a HPC resource for execution through CyberGIS Computing Service, visualize model outputs with various tooks integrated in the CyberGIS-Jupyter for Water (CJW).
The model used here is based off of a pre-built RHESSys model for the Coweeta Subbasin 18 (0.124 $km^2$), a subbasins in Coweeta watershed (16 $km^2$), from the Coweeta Long Term Ecological Research (LTER) Program.
import tempfile
import shutil, os
workspace_dir = os.path.join(os.getcwd(), 'workspace')
!mkdir -p {workspace_dir}
proj_folder_name = "CoweetaSub18_30m"
unzip_dir = tempfile.mkdtemp(dir=workspace_dir)
proj_folder = os.path.join(unzip_dir, proj_folder_name)
model_folder = os.path.join(unzip_dir, proj_folder_name, "model")
!mkdir -p {model_folder}
!unzip -o {proj_folder_name}.zip -d {model_folder}
output_folder = os.path.join(model_folder, "output")
!mkdir -p {output_folder}
import os, sys
def replace_string(file, current_string, new_string):
if not os.path.isfile(file):
print ("Error on replace_string, not a regular file: "+file)
sys.exit(1)
f1=open(file,'r').read()
f2=open(file,'w')
m=f1.replace(current_string,new_string)
f2.write(m)
original_path = "<BASEDIR>"
replace_string(os.path.join(model_folder,"worldfiles/worldfile.hdr"), original_path, proj_folder)
replace_string(os.path.join(model_folder,"clim/cwt.base"), original_path, proj_folder)
# import pyrhessys library
import pyrhessys as pr
import os, shutil
PROJECT_NAME = "CoweetaSub18_30m"
CURRENT_PATH = os.getcwd()
#PROJECT_DIR = os.path.join(CURRENT_PATH, PROJECT_NAME)
PROJECT_DIR = proj_folder
# Define RHESsys model iput directory
MODEL_DIR = os.path.join(PROJECT_DIR, 'model')
# Get RHESSys executable full path
EXECUTABLE = os.popen('which rhessysEC.7.2').read().split("\n")[0]
# create pyRHESSys Simulation Object
r = pr.Simulation(EXECUTABLE, MODEL_DIR)
# Set simulation period
start_date = '2003 01 01 01'
end_date = '2005 12 31 01'
# Set Initial parameter values
r.parameters['version'] = 'rhessysEC.7.2'
r.parameters['start_date'] = start_date
r.parameters['end_date'] = end_date
r.parameters['gw1'] = '0.0432482895012945'
r.parameters['gw2'] = '0.137155388656538'
r.parameters['s1'] = '1.0'
r.parameters['s2'] = '0.4'
r.parameters['s3'] = '1.6'
r.parameters['snowEs'] = '1.17543162591755'
r.parameters['snowTs'] = '0.527982610510662'
r.parameters['sv1'] = '3.0'
r.parameters['sv2'] = '90.0'
r.parameters['svalt1'] = '0.928032172983822'
r.parameters['svalt2'] = '0.955452497987305'
r.parameters['locationid'] = '0'
# Create a initial parameters json file
import json
with open(os.path.join(model_folder, 'init_parameters.json'), 'w') as outfile:
json.dump(r.parameters, outfile)
f = open(os.path.join(model_folder, 'init_parameters.json'), 'r')
data = json.load(f)
data
r.run("local")
r.status
# check SUMMA output file
name_list = os.listdir(output_folder)
full_list = [os.path.join(output_folder,i) for i in name_list if i.endswith(".daily")]
sorted_list = sorted(full_list)
# change the format
sim_start_date = start_date.replace(" ", "-")
sim_end_date = end_date.replace(" ", "-")
import pandas as pd
sim_output = []
name_lists = []
for lists in sorted_list:
plot_data = pd.read_csv(os.path.join(output_folder, lists.split("/")[-1].split("_")[0] +'_run_basin.daily'), delimiter=" ")
name_lists.append(lists.split("/")[-1].split("_")[0])
date_index = pd.date_range(sim_start_date, sim_end_date, freq='1D')
plot_data.insert(loc=0, column='Date', value=date_index[:-1].values)
plot_data.set_index('Date')
sim_output.append(plot_data)
import seaborn as sns
import matplotlib.pyplot as plt
def plot_output(sim_data, sim_date_col_name, name_list, num_config, sim_output_variable, pre_trim: int=0, post_trim: int=-1):
LINE_STYLES = ['solid', 'dashed', 'dashdot', 'dashdot', 'dotted']
NUM_STYLES = len(LINE_STYLES)
sns.reset_orig() # get default matplotlib styles back
clrs = sns.color_palette("bright", n_colors=num_config) # a list of RGB tuples
fig, ax = plt.subplots(1,figsize=(15,7),linewidth=3.0)
for i in range(num_config):
lines = ax.plot(sim_data[i][sim_date_col_name][pre_trim:post_trim].values, sim_data[i][sim_output_variable][pre_trim:post_trim].values)
lines[0].set_color(clrs[i])
lines[0].set_linestyle(LINE_STYLES[i%NUM_STYLES])
y_axis = "Total Stream Outflow (mm)"
# add x, y label
ax.set_xlabel('Date', fontsize=15)
ax.set_ylabel(y_axis, fontsize=15)
# show up the legend
ax.tick_params(labelsize=15)
ax.grid('on')
fig.legend(labels=name_list, bbox_to_anchor=(0.9, 0.9), loc='upper left')
plt.show()
plot_output(sim_output, 'Date', name_lists, len(sim_output), 'streamflow', pre_trim =100, post_trim=-1)
<BASEDIR>
(to be localized on HPC)¶# remove old output files
! rm -rvf {output_folder}/*
new_path = "<BASEDIR>"
replace_string(os.path.join(model_folder,"worldfiles/worldfile.hdr"), proj_folder, new_path)
replace_string(os.path.join(model_folder,"clim/cwt.base"), proj_folder, new_path)
%%writefile installTestCases_local.sh
#!/bin/bash
# install the test cases that can be run with the ./runTestCases.sh
# the script creates the necessary output directories and sets the
# paths in the model input files.
# check whether the settings, output and/or testCases_data directories already
# exist to prevent overwriting directories in which a user may have made changes
if [ -d model/clim_copy -o -d model/worldfiles_copy ]
then
echo 'settings, output and/or testCases_data directories already exist.'
echo 'Please remove or move the settings, output and testCases_data'
echo 'directories to prevent overwriting'
exit 1
fi
#cp -rp ${DIR}_org ${DIR}
# create the paths for the output files
#mkdir -p output
# modify the paths in the model input file
# we create a new directories to preserve copies of the original files in case
# something goes wrong
BASEDIR=`pwd`
for DIR in model/clim model/worldfiles
do
cp -rp ${DIR} ${DIR}_copy
for file in `grep -l '<BASEDIR>' -R ${DIR}`
do
sed "s|<BASEDIR>|${BASEDIR}|" $file > junk
mv junk $file
done
rm -rf model/clim_copy model/clim, model/worldfiles_copy
done
echo "TestCases installed"
shutil.copy('installTestCases_local.sh', proj_folder)
# create safe_arange to limit certain length of number
import numpy as np
def safe_arange(start, stop, step):
a = np.arange(start, stop, step)
result =[]
for i in a:
par = round(i, 10)
result = np.append(result, par)
return result
# set start value, end value, interval value for each parameters
param_options = {
's1' : safe_arange(0.01, 20.0, 10.0),
's2' : safe_arange(1.0, 100.0, 50.0),
'sv1': safe_arange(0.01, 20.0, 10.0),
'sv2': safe_arange(1.0, 100.0, 50.0),
}
# create ensemble combination using parameter setting as config and check the number of ensemble combination
config = pr.ensemble.parameter_product(param_options)
len(config)
import json
with open(os.path.join(proj_folder, 'rhessys_options.json'), 'w') as outfile:
json.dump(config, outfile)
# check ensemble parameters
print("Number of ensemble runs: {}".format(len(config)))
print(json.dumps(config, indent=4, sort_keys=True)[:800])
print("...")
from job_supervisor_client import *
communityRHESSysSession = Session('rhessys', isJupyter=True)
communityRHESSysJob = communityRHESSysSession.job() # create new job
communityRHESSysJob.upload(proj_folder)
communityRHESSysJob.submit(payload={
"node": 16,
"machine": "keeling"
})
communityRHESSysJob.events(liveOutput=True)
job_dir = os.path.join(workspace_dir, "{}".format(communityRHESSysJob.id))
!mkdir -p {job_dir}/output
communityRHESSysJob.download(job_dir)
!cd {job_dir} && unzip -d output *.zip
# check output directory
output_path = os.path.join(job_dir, "output")
# check SUMMA output file
name_list = os.listdir(output_path)
full_list = [os.path.join(output_path,i) for i in name_list if i.endswith(".daily")]
sorted_list = sorted(full_list)
for f in sorted_list:
print(f)
print("Number of daily output files: {}".format(len(sorted_list)))
When you plot RHESSys daily output, you can use output variables (RHESSys Output Abbreviation) from the table below
RHESSys Output Abbreviation | Description | Units |
---|---|---|
pot_surface_infil | Rain Throughfall | mm |
snow_thr | Snow Throughfall | mm |
sat_def_z | Saturation Deficit with depth | mm of depth |
sat_def | Saturation Deficit - volume | mm of water |
rz_storage | Rooting Zone Storage | mm of water |
unsat_stor | Unsaturated Storage | mm |
rz_drainage | Rooting Zone Drainage | mm |
unsat_drain | Unsaturated | Storage mm |
cap | Capillary Rise | mm |
evap | Evaporation | mm |
snowpack | Snow Water Equivalent (SWE) | mm |
trans | Transpiration | mm |
baseflow | Baseflow | mm |
return | Return flow | mm |
streamflow | Total Stream Outflow | mm (normalized by basin area) |
psn | Net Photosynthesis | kgC/m2 |
lai | Leaf Area Index | m2/m2 |
gw.Qout | Groundwater Output | mm |
gw.storage | Groundwater Store | mm |
detention_store | Detention Store | mm |
%sat_area | Percent Saturated Area | m2/m2 |
litter_store | Litter intercepted water Store | m2/m2 |
canopy_store | Canopy Intercepted water Store | m2/m2 |
%snow_cover | Percent Snow Cover | m2/m2 |
snow_subl | Snow Sublimation | |
trans_var | Spatial variation in transpiration | |
acc_trans | ||
acctransv_var | ||
pet | Potential Evapotranspiration | mm |
dC13 | ||
precip | Precipitation | mm |
pcp_assim | ||
mortf | Fraction of Basin that have tree mortality | |
tmax | Maximum Temperature | °C |
tmin | Minimum Temperature | °C |
tavg | Average Temperature | °C |
vpd | Vapor Pressure Deficit | Pa |
snowfall | Snowfall | |
recharge | _Recharge of water to soil | |
gpsn | Gross Photosynthesis | kgC/m2 |
resp | Respiration | kgC/m2 |
gs | Canopy Conductance | mm/s? |
rootdepth | Rooting depth | |
plantc | Plant Carbon | kgC/m2 |
snowmelt | Snow Melt | |
canopysubl | Canopy Sublimation | |
routedstreamflow | ||
canopy_snow | Snow Intercepted on Canopy | |
height | Canopy height | |
evap_can | Canopy Evaporation? | |
evap_lit | Litter Evaporation_ | |
evap_soil | Soil Evaporation_ | |
litrc | Litter Carbon_ | |
Kdown | Downward (from atmosphere) Direct Shortwave Radiation_ | |
Ldown | Downward (from atmosphere) Longwave Radiation_ | |
Kup | Reflected (upward) Shortwave Radiation_ | |
Lup | Reflected (upward) Longwave Radiation_ | |
Kstar_can | Absorbed shortwave by canopy | |
Kstar_soil | Absorbed shortwave by soil | |
Kstar_snow | Absorbed shortwave bysnow | |
Lstar_can | Absorbed longwave by canopy | |
Lstar_soil | Absorbed longwave by soil | |
Lstar_snow | Absorbed longwave by snow | |
LE_canopy | Latent heat evaporated by canopy | |
LE_soil La | ||
LE_snow | ||
Lstar_strat | ||
canopydrip | ||
ga | Aerodynamic Conductance | mm/s |
# change the format
sim_start_date = start_date.replace(" ", "-")
sim_end_date = end_date.replace(" ", "-")
import pandas as pd
sim_output = []
name_lists = []
for lists in sorted_list:
plot_data = pd.read_csv(os.path.join(output_path, lists.split("/")[-1].split("_")[0] +'_basin.daily'), delimiter=" ")
name_lists.append(lists.split("/")[-1].split("_")[0])
date_index = pd.date_range(sim_start_date, sim_end_date, freq='1D')
plot_data.insert(loc=0, column='Date', value=date_index[:-1].values)
plot_data.set_index('Date')
sim_output.append(plot_data)
sim_output[0]
plot_output(sim_output, 'Date', name_lists, len(sim_output), 'streamflow', pre_trim =100, post_trim=-1)