In [1]:
# Pure python modules and jupyter notebook functionality
# first you should import the third-party python modules which you'll use later on
# the first line enables that figures are shown inline, directly in the notebook
%pylab inline
import os
import datetime as dt
import pandas as pd
from os import path
import sys
from matplotlib import pyplot as plt
This next step is highly specific on how and where you have installed Shyft. If you have followed the guidelines at github, and cloned the three shyft repositories: i) shyft, ii) shyft-data, and iii) shyft-doc, then you may need to tell jupyter notebooks where to find shyft. Uncomment the relevant lines below.
If you have a 'system' shyft, or used conda install -s sigbjorn shyft
to install shyft, then you probably will want to make sure you have set the SHYFT_DATA directory correctly, as otherwise, Shyft will assume the above structure and fail. This has to be done before import shyft
. In that case, uncomment the relevant lines below.
note: it is most likely that you'll need to do one or the other.
In [2]:
# try to auto-configure the path, -will work in all cases where doc and data
# are checked out at same level
shyft_data_path = path.abspath("../../../shyft-data")
if path.exists(shyft_data_path) and 'SHYFT_DATA' not in os.environ:
os.environ['SHYFT_DATA']=shyft_data_path
# shyft should be available either by it's install in python
# or by PYTHONPATH set by user prior to starting notebook.
# This is equivalent to the two lines below
# shyft_path=path.abspath('../../../shyft')
# sys.path.insert(0,shyft_path)
In [3]:
# importing the shyft modules needed for running a calibration
from shyft.repository.default_state_repository import DefaultStateRepository
from shyft.orchestration.configuration.yaml_configs import YAMLCalibConfig, YAMLSimConfig
from shyft.orchestration.simulators.config_simulator import ConfigCalibrator, ConfigSimulator
In [4]:
# conduct a configured simulation first.
config_file_path = os.path.abspath("../nea-example/nea-config/neanidelva_simulation.yaml")
cfg = YAMLSimConfig(config_file_path, "neanidelva")
simulator = ConfigSimulator(cfg)
# run the model, and we'll just pull the `api.model` from the `simulator`
simulator.run()
state = simulator.region_model.state
Now that we have the initial state, we'll run the calibration (this is not a strictly required step, but we use it later)
In [5]:
# set up configuration using *.yaml configuration files
config_file_path = os.path.abspath("./nea-config/neanidelva_calibration.yaml") # here is the *.yaml file
cfg = YAMLCalibConfig(config_file_path, "neanidelva")
In [6]:
# initialize an instance of the orchestration's ConfigCalcalibrator class, which has all the functionality needed
# to run a calibration using the above initiated configuration
calib = ConfigCalibrator(cfg)
n_cells = calib.region_model.size()
state_repos = DefaultStateRepository(calib.region_model) # Notice that this repository needs the real model
# so that it's able to generate a precise
# default state-with-id vector for this
# specific model
In [7]:
# once the calibrator is set up, all you need to do is running the calibration...
# the calibrated parameters are stored in a model.yaml.
results = calib.calibrate(cfg.sim_config.time_axis, state_repos.get_state(0).state_vector,
cfg.optimization_method['name'],
cfg.optimization_method['params'])
In [8]:
# Get NSE of calibrated run:
result_params = []
for i in range(results.size()):
result_params.append(results.get(i))
print("Final NSE =", 1-calib.optimizer.calculate_goal_function(result_params))
In [9]:
# Check out the calibrated parameters.
diff = 1.0E-3
print("{0:30s} {1:10s}".format("PARAM-NAME", "CALIB-VALUE"))
for i in range(results.size()):
print("{0:30s} {1:10f}".format(results.get_name(i), results.get(i)))
In [10]:
# get the target vector and discharge statistics from the configured calibrator
target_obs = calib.tv[0]
disch_sim = calib.region_model.statistics.discharge(target_obs.catchment_indexes).average(target_obs.ts.time_axis)
disch_obs = target_obs.ts.values
ts_timestamps = [dt.datetime.utcfromtimestamp(p.start) for p in target_obs.ts.time_axis]
In [11]:
# plot up the results
fig, ax = plt.subplots(1, figsize=(15,10))
ax.plot(ts_timestamps, disch_sim.values, lw=2, label = "sim")
ax.plot(ts_timestamps, disch_obs, lw=2, ls='--', label = "obs")
ax.set_title("observed and simulated discharge")
ax.legend()
ax.set_ylabel("discharge [m3 s-1]")
Out[11]:
Instead of changing model parameters in the yaml-configs, reload the configuration and re-run the model, we can also just change the parameters "on-the-fly", and rerun the model. This makes it easy to investigate the influence of certain model parameters on the simulation results.
In the following we'll investigate the impact of manually manipulating this parameter.
In [12]:
parameters = calib.region_model.get_region_parameter() # fetching parameters from the simulator object
print(u"Calibrated rain/snow threshold temp: {} C".format(parameters.gs.tx)) # print current value of hs.tx
In the following, we first set the hs.tx parameter to a higher, and then to a lower value compared to the value the calibration results suggest. We re-run the simulation, respicetively, and plot the results.
In [13]:
calib.optimizer.calculate_goal_function(result_params) # reset the parameters to the values of the calibration
parameters.gs.tx = 4.0 # setting a higher value for tx
s_init = state.extract_state([])
# type(state)
# s0=state_repos.get_state(0)
# s0.state_vector
# state.apply_state(s0, [])
calib.run(state=s_init) # rerun the model, with new parameter
disch_sim_p_high = calib.region_model.statistics.discharge(target_obs.catchment_indexes).average(target_obs.ts.time_axis) # fetch discharge ts
parameters.gs.tx = -4.0 # setting a higher value for tx
calib.run(state=s_init) # rerun the model, with new parameter
disch_sim_p_low = calib.region_model.statistics.discharge(target_obs.catchment_indexes).average(target_obs.ts.time_axis) # fetch discharge ts
fig, ax = plt.subplots(1, figsize=(15,10))
ax.plot(ts_timestamps, disch_sim.values, lw=2, label = "calib")
ax.plot(ts_timestamps, disch_sim_p_high.values, lw=2, label = "high")
ax.plot(ts_timestamps, disch_sim_p_low.values, lw=2, label = "low")
ax.plot(ts_timestamps, disch_obs, lw=2, ls='--', label = "obs")
ax.set_title("investigating parameter gs.tx")
ax.legend()
ax.set_ylabel("discharge [m3 s-1]")
Out[13]:
In [14]:
s_init = state.extract_state([])
# reset the max water parameter
parameters.gs.max_water = 1.0 # setting a higher value for tx
calib.run(state=s_init) # rerun the model, with new parameter
disch_sim_p_high = calib.region_model.statistics.discharge(target_obs.catchment_indexes).average(target_obs.ts.time_axis) # fetch discharge ts
parameters.gs.max_water = .001 # setting a higher value for tx
calib.run(state=s_init) # rerun the model, with new parameter
disch_sim_p_low = calib.region_model.statistics.discharge(target_obs.catchment_indexes).average(target_obs.ts.time_axis) # fetch discharge ts
# plot the results
fig, ax = plt.subplots(1, figsize=(15,10))
ax.plot(ts_timestamps, disch_sim.values, lw=2, label = "calib")
ax.plot(ts_timestamps, disch_sim_p_high.values, lw=2, label = "high")
ax.plot(ts_timestamps, disch_sim_p_low.values, lw=2, label = "low")
ax.plot(ts_timestamps, disch_obs, lw=2, ls='--', label = "obs")
ax.set_title("investigating parameter gs.max_water")
ax.legend()
ax.set_ylabel("discharge [m3 s-1]")
Out[14]:
In [15]:
# at this point we could look at the time series for every cell. Or plot a spatial map...
# TODO: https://data-dive.com/cologne-bike-rentals-interactive-map-bokeh-dynamic-choropleth
from ipywidgets import interact
import numpy as np
from bokeh.io import push_notebook, show, output_notebook
from bokeh.plotting import figure
from bokeh.palettes import viridis
from bokeh.models.sources import ColumnDataSource
output_notebook()
p = figure(title='Parameters', plot_height=300, plot_width=800)
pallette = viridis(10)
ts_timestamps = [dt.datetime.utcfromtimestamp(ta.start) for ta in target_obs.ts.time_axis]
def plot_simobs(calib):
model = calib.region_model
disch_sim = model.statistics.discharge(calib.tv[0].catchment_indexes).average(calib.tv[0].ts.time_axis)
disch_obs = calib.tv[0].ts
data = {
'time': ts_timestamps,
'sim': model.statistics.discharge(calib.tv[0].catchment_indexes).average(calib.tv[0].ts.time_axis).values,
'obs': calib.tv[0].ts.values
}
source = ColumnDataSource(data)
p.line('time', 'sim', source=source, line_color=pallette[0])
p.line('time', 'obs', source=source, line_color='red')
return p
def update(tx=0):
parameters.gs.tx = tx
calib.run(state=s_init)
plot_simobs(calib)
push_notebook()
model = calib.region_model
p = plot_simobs(calib)
show(p, notebook_handle=True)
interact(update, tx=np.arange(-3.,4.))
Out[15]:
In [ ]: