In [ ]:
%pylab notebook
from __future__ import print_function
import charistools.find_best_model as fbm
import charistools.meltModels as mM
from charistools.modelEnv import ModelEnv
import glob
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import math
import os
import pandas as pd
import pdb
import re
import sys
In [ ]:
# Chooose starting DDFs
ddfs_start = list(mM.RandomNewDDFs(initialize=True))
print("Begin DDFs here:")
print(ddfs_start)
In [ ]:
%cd /work/charis/ti_model/calibrations_correct_cost
%ls
In [ ]:
# Simulated annealing parameters
# Number of cycles
numCycles = 100
# Number of trials per cycle
numTrials = 50
# Number of accepted solutions
na = 0.0
# Probability of accepting worse solution at the start
p1 = 0.7
# Probability of accepting worse solution at the end
p50 = 0.001
# Initial temperature
t1 = -1.0 / math.log(p1)
# Final temperature
t50 = -1.0 / math.log(p50)
# Fraction reduction every cycle
frac = (t50/t1)**(1.0/(numCycles-1.0))
# Initialize DDFs
x = np.zeros((numCycles + 1, 4))
x[0] = ddfs_start
xi = np.zeros(4)
xi = ddfs_start
na = na + 1.0
In [ ]:
# Melt model parameters for a given basin
#drainageid = "IN_Hunza_at_DainyorBridge"
#years = [2001, 2002, 2003]
#drainageid = "AM_Vakhsh_at_Komsomolabad"
#years = [2004, 2005, 2007]
#drainageid = "SY_Naryn_at_NarynTown"
#years = [2003, 2005, 2007]
#drainageid = "GA_Karnali_at_Benighat"
#years = [2001, 2003, 2004]
drainageid = "GA_Narayani_at_Devghat"
years = [2001, 2004, 2005]
#drainageid = "GA_SaptaKosi_at_Chatara"
#years = [2003, 2004, 2005]
nstrikes = 3
ablation_method='grsize_scag'
use_daily_threshold_file=True
rainfall_col='diff_km3'
runoff_col='runoff'
config_file='/projects/brodzik/charis_ti_melt/calibration_modelEnv_config.ini'
myEnv = ModelEnv(tileConfigFile=config_file)
# How far away to look from the previous set of DDFs
neighborhood_mm = 10
# label will be used for plot titles and output filenames
label = "%1dstr DDFnbr=%dmm N%03d M%03d" % ( nstrikes, neighborhood_mm, numCycles, numTrials )
print(label)
In [ ]:
# Calculate the first calibration cost,
# this is "Current best results so far"
xc = np.zeros(4)
xc[:] = x[0]
result = mM.CalibrationStats(myEnv, drainageid, years, nstrikes,
xi[0], xi[1], xi[2], xi[3],
rainfall_col=rainfall_col,
runoff_col=runoff_col,
ablation_method=ablation_method,
use_daily_threshold_file=use_daily_threshold_file)
df, stats_df = mM.CalibrationCost(result)
fc = stats_df.z.iloc[0]
fs = np.zeros(numCycles+1)
fs[0] = fc
# Current temperature
t = t1
# DeltaE Average
DeltaE_avg = 0.0
print("Cost of initial DDFs: %.3f" % fs[0])
In [ ]:
%cd /work/charis/ti_model/calibrations_correct_cost
%ls
In [ ]:
logFile = "%s.%s.stats.txt" % ( drainageid, label.replace(" ", "_") )
logf = open(logFile, "w")
print(logFile)
In [ ]:
mM.SaveCalibrationStats(myEnv, drainageid, nstrikes,result,ablation_method=ablation_method,
use_daily_threshold_file=use_daily_threshold_file, file=sys.stderr,header=True)
mM.SaveCalibrationStats(myEnv, drainageid, nstrikes,result,ablation_method=ablation_method,
use_daily_threshold_file=use_daily_threshold_file, file=logf,header=True)
In [ ]:
for i in range(numCycles):
print("Cycle: %d with Temperature: %f" % (i, t))
for j in range(numTrials):
# Generate new trial DDFs, relative to xc "current best results so far"
ddfs = list(mM.RandomNewDDFs(min_snow_ddf=xc[0], max_snow_ddf=xc[1],
min_ice_ddf=xc[2], max_ice_ddf=xc[3],
neighborhood_mm=neighborhood_mm))
xi[0] = ddfs[0]
xi[1] = ddfs[1]
xi[2] = ddfs[2]
xi[3] = ddfs[3]
# Calculate the cost at the new DDFs
result = mM.CalibrationStats(
myEnv, drainageid, years, nstrikes,
xi[0], xi[1], xi[2], xi[3],
rainfall_col=rainfall_col,
runoff_col=runoff_col,
ablation_method=ablation_method,
use_daily_threshold_file=use_daily_threshold_file)
df, stats_df = mM.CalibrationCost(result)
# Save the calibration stats
mM.SaveCalibrationStats(myEnv, drainageid, nstrikes,result,ablation_method=ablation_method,
use_daily_threshold_file=use_daily_threshold_file, file=logf)
trial_cost = stats_df.z.iloc[0]
print("cycle=%d, trial=%d, ddfs=%.2f, %.2f, %.2f, %.2f, best_cost_so_far=%.3f, trial_cost=%.3f" % (
i, j, xi[0], xi[1], xi[2], xi[3], fc, trial_cost), file=sys.stderr)
DeltaE = abs(trial_cost - fc)
if (trial_cost > fc):
# Initialize DeltaE_avg if a worse solution was found
# on the first iteration
if (i==0 and j==0): DeltaE_avg = DeltaE
# objective function is worse
# generate prob of acceptance
p = math.exp(-DeltaE/(DeltaE_avg * t))
# determine whether to accept worse point
if (random.random() < p):
# accept the worse solution
accept = True
else:
# don't accept the worse solution
accept = False
else:
# objective function is lower, automatically accept
accept = True
if (accept == True):
# update the currently accepted solution
xc[0] = xi[0]
xc[1] = xi[1]
xc[2] = xi[2]
xc[3] = xi[3]
fc = trial_cost
# increment number of accepted solutions
na = na + 1.0
# update DeltaE_avg
DeltaE_avg = (DeltaE_avg * (na - 1.0) + DeltaE) / na
print("==> Updating best_cost_so_far", file=sys.stderr)
# Record the best x values at the end of every cycle
x[i+1][0] = xc[0]
x[i+1][1] = xc[1]
x[i+1][2] = xc[2]
x[i+1][3] = xc[3]
fs[i+1] = fc
# Lower the temperature for the next cycle
t = frac * t
# Close the logfile
logf.close()
# print solution
print("Best solution: %s" % xc, file=sys.stderr)
print("Best cost : %f" % fc, file=sys.stderr)
# Coerce answers to DataFrame
caldf = pd.DataFrame(x, columns=['winter_snow_ddf', 'summer_snow_ddf', 'winter_ice_ddf', 'summer_ice_ddf'])
caldf["min_cycle_z"] = fs
caldf
In [ ]:
datafile = "%s.%s.SA_summary.dat" % (drainageid, label.replace(" ", "_"))
caldf.to_pickle(datafile)
print("caldf pickled to %s" % datafile)
In [ ]:
%pwd
In [ ]:
# summary file is like this: AM_Vakhsh_at_Komsomolabad.3str_DDFnbr=10mm_N050_M050.SA_summary.dat
# filename is parsed for drainageid and label
# output will be written to : drainageid_label.SA_summary.png
def plot_sa_summary(file):
caldf = pd.read_pickle(file)
best_model = "%.2f_%.2f_%.2f_%.2f" % (
caldf.iloc[-1].winter_snow_ddf,
caldf.iloc[-1].summer_snow_ddf,
caldf.iloc[-1].winter_ice_ddf,
caldf.iloc[-1].summer_ice_ddf)
# Parse the filename for drainageid and label
parts = file.split('.')
drainageid = parts[0]
label = parts[1]
# Parse the filename for nCycles and nTrials
p = re.compile(r"_N(\d*)_M(\d*)")
m = p.search(file)
numCycles = int(m.group(1))
numTrials = int(m.group(2))
fig, ax = plt.subplots(2, 1, figsize=(8,6))
fig.suptitle(
"%s Best by SA=%s\n(%s)" % (
drainageid,
best_model,
label),
fontsize=12)
caldf["min_cycle_z"].plot(ax=ax[0], style='k.-')
ax[0].legend(["Cost"])
ax[0].set_ylabel('Cost')
ddfs = caldf.drop(labels='min_cycle_z', axis=1)
ddfs.plot(ax=ax[1], style=['b.-', 'b--', 'r.-', 'r--'])
handles, labels = ax[1].get_legend_handles_labels()
ax[1].legend(handles[::-1], labels[::-1])
ax[1].set_xlabel('Calibration Cycle (%d trials/cycle)' % numTrials)
ax[1].set_ylabel('DDF ($mm$)')
fig.tight_layout()
fig.subplots_adjust(top=0.86)
outfile = "%s.%s.z%.3f_Best%s.SA_summary.png" % (
drainageid, label.replace(" ", "_"), caldf.iloc[-1].min_cycle_z, best_model)
plt.savefig(outfile)
print("Plot saved to %s" % outfile)
outlink = "%s.%s.SA_summary.png" % (drainageid, label.replace(" ", "_"))
if os.path.exists( outlink):
os.remove( outlink )
os.symlink( outfile, outlink )
In [ ]:
plot_sa_summary(datafile)
In [ ]:
#caldf = fbm.get_calibration_stats(logFile, verbose=1)
#caldf["stats"]
%pwd
In [ ]:
#def plot_solution_space(caldf, drainageid, nstrikes, label):
def plot_solution_space(file):
# Parse the filename for drainageid and label
parts = file.split('.')
drainageid = parts[0]
label = parts[1]
# Parse the label for nstrikes
parts = label.split('str')
nstrikes=int(parts[0])
# Read the data
caldf = fbm.get_calibration_stats(file)
# Reverse the order of rows, so smallest-ordered values are plotted last
# Best model is now last in data frame
df = caldf["stats"][::-1]
best_i = -1
fig, ax = plt.subplots(2, 3, figsize=(9,6))
fig.suptitle(
"%s Best=%s \n(%d strikes, RMSE=%.3f $km^3$, Voldiff=%.3f%%, numModels=%d)" % (
drainageid, df.index[-1],
nstrikes,
df["monthly_rmse_km3"].iloc[-1],
df["annual_voldiff_pcent"].iloc[-1],
len(df.index)),
fontsize=12)
surfaces = ["ice", "snow"]
markers = ["^", "o"]
for i, surface in enumerate(surfaces):
row = i
min_ddf = "min_%s_ddf" % surface
max_ddf = "max_%s_ddf" % surface
min_ddf_label = "winter_%s_ddf" % surface
max_ddf_label = "summer_%s_ddf" % surface
# Upper Left: RMSE as function of min/max snow ddf
col = 0
rmse_plot = ax[row, col].scatter(df[min_ddf],
df[max_ddf],
c=df["monthly_rmse_km3"],
cmap="Greens",
edgecolor="",
marker=markers[i])
ax[row, col].plot([df[min_ddf].iloc[best_i]],
[df[max_ddf].iloc[best_i]],
marker="x",
color="red")
ax[row, col].set_title('Monthly RMSE ($km^3$)')
ax[row, col].set_xlabel(min_ddf_label)
ax[row, col].set_ylabel(max_ddf_label)
ax[row, col].axis([0., 60., 0., 60.])
ax[row, col].set_aspect('equal', 'box')
fig.colorbar(rmse_plot, ax=ax[row,col])
# Middle: Annual volume difference
col = 1
vmax = np.max(df["abs_voldiff_pcent"])
vmax = 10.0
vmin = -1.0 * vmax
voldiff_plot = ax[row, col].scatter(df[min_ddf],
df[max_ddf],
c=df["annual_voldiff_pcent"],
cmap="BrBG_r",
vmin=vmin,
vmax=vmax,
edgecolor="",
marker=markers[i])
ax[row, col].plot([df[min_ddf].iloc[best_i]],
[df[max_ddf].iloc[best_i]],
marker="x",
color="red")
ax[row, col].set_title('Annual vol diff (%)')
ax[row, col].set_xlabel(min_ddf_label)
ax[row, col].set_ylabel(max_ddf_label)
ax[row, col].axis([0., 60., 0., 60.])
ax[row, col].set_aspect('equal', 'box')
fig.colorbar(voldiff_plot, ax=ax[row,col])
# Right: Combined Z score
col = 2
vmax = np.max(df["z"])
vmax = 200.0;
vmin = 0.0
z_plot = ax[row, col].scatter(df[min_ddf],
df[max_ddf],
c=df["z"],
cmap="Greys",
vmin=vmin,
vmax=vmax,
edgecolor="",
marker=markers[i])
ax[row, col].plot([df[min_ddf].iloc[best_i]],
[df[max_ddf].iloc[best_i]],
marker="x",
color="red")
ax[row, col].set_title('Z')
ax[row, col].set_xlabel(min_ddf_label)
ax[row, col].set_ylabel(max_ddf_label)
ax[row, col].axis([0., 60., 0., 60.])
ax[row, col].set_aspect('equal', 'box')
fig.colorbar(z_plot, ax=ax[row,col])
fig.tight_layout()
fig.subplots_adjust(top=0.86)
outfile = "%s.%s.z%.3f_Best%s.stats.png" % (
drainageid, label.replace(" ", "_"), df["z"].iloc[best_i], df.index[-1])
plt.savefig(outfile)
print("Image saved to %s" % outfile)
outlink = "%s.%s.stats.png" % (drainageid, label.replace(" ", "_"))
if os.path.exists( outlink):
os.remove( outlink )
os.symlink( outfile, outlink )
In [ ]:
plot_solution_space(logFile)
In [ ]:
%pwd
In [ ]:
list = glob.glob("GA_Sap*2str*SA_summary.dat")
print(list)
for file in list:
plot_sa_summary(file)
In [ ]:
%cd /work/charis/ti_model/calibrations_correct_cost
%ls
In [ ]:
list = glob.glob("GA_Sap*2str*stats.txt")
list
In [ ]:
for file in list:
plot_solution_space(file)
In [ ]: