In [ ]:
from __future__ import print_function
%pylab notebook
# import datetime as dt
import glob
import matplotlib.pyplot as plt
import matplotlib.dates as md
#from nose.tools import set_trace
from charistools.hypsometry import Hypsometry
from charistools.meltModels import CalibrationCost
from charistools.modelEnv import ModelEnv
import pandas as pd
import re
import os
In [ ]:
#%cd ~/projects/CHARIS/derived_hypsometries/REECv0_ModelRank000/AM_Vakhsh_at_Komsomolabad
#%ls *snow_on_land*
In [ ]:
def get_ddfs_and_surface(hyps):
'''
Gets the min/max ddfs from the hyps.comments
Returns a tuple with min, max
'''
min_ddf = np.nan
max_ddf = np.nan
surface = "unknown"
pmin = re.compile(r"min_DDF_mm_pday_pdegC")
pmax = re.compile(r"max_DDF_mm_pday_pdegC")
psurface = re.compile(r"melt for")
for line in hyps.comments:
if pmin.search(line):
key, value = line.split(" : ")
min_ddf = float(value)
if pmax.search(line):
key, value = line.split(" : ")
max_ddf = float(value)
if psurface.search(line):
surface, dummy = psurface.split(line)
surface = surface[1:].lstrip().rstrip()
surface = surface.replace(" ", "_")
return (min_ddf, max_ddf, surface)
In [ ]:
drainageIDs = ["IN_Hunza_at_DainyorBridge",
"AM_Vakhsh_at_Komsomolabad",
"SY_Naryn_at_NarynTown",
"GA_SaptaKosi_at_Chatara",
"GA_Karnali_at_Benighat"]
dir = "/Users/brodzik/projects/CHARIS/derived_hypsometries"
for drainageID in drainageIDs:
df = pd.DataFrame([])
#for cycle in np.arange(20) + 81:
for rank in np.arange(20):
for year in np.arange(14) + 2001:
# Get a list of melt filenames for this drainageID, cycle (or rank) and year
# There will be three, one for each surface type
#list = sort(glob.glob("%s/REECv0_Cycle%03d/%s/*%d*" % (dir, cycle, drainageID, year)))
#print(cycle, year)
list = sort(glob.glob("%s/REECv0_ModelRank%03d/%s/*%d*" % (dir, rank, drainageID, year)))
print(rank, year)
print(list)
#row = {"cycle":cycle,
# "drainageID":drainageID}
row = {"rank":rank,
"drainageID":drainageID}
for f in list:
hyps = Hypsometry(f)
total_melt_km3 = hyps.data.sum().sum()
min_ddf, max_ddf, surface = get_ddfs_and_surface(hyps)
row["year"] = hyps.data.index[0].year
row["%s_min_ddf" % (surface)] = min_ddf
row["%s_max_ddf" % (surface)] = max_ddf
row["%s_melt_km3" % (surface)] = total_melt_km3
result = pd.DataFrame.from_dict(row, orient='index').transpose()
#result = result[["year", "cycle", "drainageID",
# "Snow_on_land_min_ddf", "Snow_on_land_max_ddf", "Snow_on_land_melt_km3",
# "Snow_on_ice_min_ddf", "Snow_on_ice_max_ddf", "Snow_on_ice_melt_km3",
# "Exposed_glacier_ice_min_ddf", "Exposed_glacier_ice_max_ddf", "Exposed_glacier_ice_melt_km3"]]
result = result[["year", "rank", "drainageID",
"Snow_on_land_min_ddf", "Snow_on_land_max_ddf", "Snow_on_land_melt_km3",
"Snow_on_ice_min_ddf", "Snow_on_ice_max_ddf", "Snow_on_ice_melt_km3",
"Exposed_glacier_ice_min_ddf", "Exposed_glacier_ice_max_ddf", "Exposed_glacier_ice_melt_km3"]]
df = df.append(result)
#outfile = "%s/REECv0_CycleSummary/%s.annual_melt.last20.dat" % (dir, drainageID)
outfile = "%s/REECv0_ModelRankSummary/%s.annual_melt.best20.dat" % (dir, drainageID)
print("Output to: %s" % outfile, file=sys.stderr)
df.to_pickle(outfile)
In [ ]:
for drainageID in drainageIDs:
#file = "%s/REECv0_CycleSummary/%s.annual_melt.last20.dat" % (dir, drainageID)
#print("last20 file %s" % file, file=sys.stderr)
file = "%s/REECv0_ModelRankSummary/%s.annual_melt.best20.dat" % (dir, drainageID)
print("best20 file %s" % file, file=sys.stderr)
df = pd.read_pickle(file)
melt = df.copy()
melt.drop(['Snow_on_land_min_ddf','Snow_on_land_max_ddf',
'Snow_on_ice_min_ddf','Snow_on_ice_max_ddf',
'Exposed_glacier_ice_min_ddf','Exposed_glacier_ice_max_ddf'], axis=1, inplace=True)
fig, axes = plt.subplots(3, 1, figsize=(8,12))
melt.boxplot(ax=axes[0],
column='Snow_on_ice_melt_km3',
by='year')
axes[0].set_title("Melt from Snow on Ice")
melt.boxplot(ax=axes[1],
column='Exposed_glacier_ice_melt_km3',
by='year')
axes[1].set_title("Melt from Exposed Glacier Ice")
melt.boxplot(ax=axes[2],
column='Snow_on_land_melt_km3',
by='year')
axes[2].set_title("Melt from Snow on Land")
for ax in axes:
ax.set_ylim([0.,
1.1 * melt[['Snow_on_land_melt_km3', 'Snow_on_ice_melt_km3', 'Exposed_glacier_ice_melt_km3']].max().max()])
ax.set_ylabel('Melt ($km^3$)')
fig.suptitle("Best 20 Models, %s" % drainageID)
fig.tight_layout()
fig.subplots_adjust(top=0.94)
outfile = "%s.png" % file
plt.savefig(outfile)
print("Saving plot to %s" % outfile)
In [ ]:
dir = "/Users/brodzik/projects/CHARIS/derived_hypsometries"
drainageIDs = ["IN_Hunza_at_DainyorBridge",
"AM_Vakhsh_at_Komsomolabad",
"SY_Naryn_at_NarynTown",
"GA_SaptaKosi_at_Chatara",
"GA_Karnali_at_Benighat"]alldf = pd.DataFrame([])
for drainageID in drainageIDs:
file = "%s/REECv0_CycleSummary/%s.annual_melt.last20.dat" % (dir, drainageID)
print("last20 file %s" % file, file=sys.stderr)
df = pd.read_pickle(file)
melt = df.copy()
melt.drop(['Snow_on_land_min_ddf','Snow_on_land_max_ddf',
'Snow_on_ice_min_ddf','Snow_on_ice_max_ddf',
'Exposed_glacier_ice_min_ddf','Exposed_glacier_ice_max_ddf'], axis=1, inplace=True)
alldf = alldf.append(melt)
In [ ]:
alldf
In [ ]:
fig, ax = plt.subplots(1, 1, figsize=(10,5))
alldf.boxplot(ax=ax,
column=['Snow_on_ice_melt_km3','Snow_on_land_melt_km3','Exposed_glacier_ice_melt_km3'],
by='drainageID')
ax.set_title("Melt Title")
#ax.set_ylim([0.,
# 1.1 * melt[['Snow_on_land_melt_km3', 'Snow_on_ice_melt_km3', 'Exposed_glacier_ice_melt_km3']].max().max()])
ax.set_ylabel('Melt ($km^3$)')
fig.suptitle("Best Models (2001-2014), last 20 cycles")
fig.tight_layout()
In [ ]:
surf = surf.replace(" ", "_")
surf
In [ ]:
def find_ddf_variation(drainageID, nstrikes=3):
# Read SA summary file, this is DDFs and z at end of each cycle
dir = "/work/charis/ti_model/calibrations_correct_cost"
params = "DDFnbr=10mm_N100_M050"
list = glob.glob("%s/%s.%dstr_%s.SA_summary.dat" % (
dir, drainageID, nstrikes, params))
if 1 != len(list):
print("Error looking for SA_summary file for %s" % drainageID, file=sys.stderr)
SAFile = list[0]
print("SA_summary file : %s" % SAFile, file=sys.stderr)
SAdf = pd.read_pickle(SAFile)
# Drop first 61 rows of SA output, to limit analysis to stable stuff at end
num_cycles_to_drop = 61
stabledf = SAdf.drop(SAdf.index[np.arange(num_cycles_to_drop)])
summarydf = stabledf.describe()
# Read all calibration stats output and parse best model parameters from it
list = glob.glob("%s/%s.%dstr_%s.z*Best*stats.png" % (
dir, drainageID, nstrikes, params))
if 1 != len(list):
print("Error looking for stats file for %s" % drainageID, file=sys.stderr)
statsFile = list[0]
print("stats file : %s" % statsFile, file=sys.stderr)
best = np.zeros(4)
best_low = np.zeros(4)
best_high = np.zeros(4)
# Parse the best model ddfs from the filename
p = re.compile(r'Best(\d+\.\d+)_(\d+\.\d+)_(\d+\.\d+)_(\d+\.\d+)')
m = p.search(statsFile)
for i in np.arange(4):
best[i] = float(m.group(i+1))
# Use the variation from the stable cycles to calculate best_minus1std and best_plus1std
best_low = best.copy()
best_high = best.copy()
ddf = ['winter_snow_ddf', 'summer_snow_ddf', 'winter_ice_ddf', 'summer_ice_ddf']
for i in np.arange(len(ddf)):
best_low[i] = best_low[i] - summarydf.loc['std', ddf[i]]
best_high[i] = best_high[i] + summarydf.loc['std', ddf[i]]
# Do QC to ensure that each range enforces low <= high
if best_low[0] > best_low[1]:
print("Warning: QC problem on low snow DDFs, forcing them to lower value")
best_low[0] = best_low[1]
if best_low[2] > best_low[3]:
print("Warning: QC problem on low ice DDFs, forcing them to lower value")
best_low[2] = best_low[3]
if best_high[0] > best_high[1]:
print("Warning: QC problem on high snow DDFs, forcing them to higher value")
best_high[1] = best_high[0]
if best_high[2] > best_high[3]:
print("Warning: QC problem on high ice DDFs, forcing them to higher value")
best_high[3] = best_high[2]
# Make model strings and return them
best_str = "_".join(["%.2f" % i for i in best])
best_low_str = "_".join(["%.2f" % i for i in best_low])
best_high_str = "_".join(["%.2f" % i for i in best_high])
result = {'drainageID': drainageID,
'nstrikes': nstrikes,
'Best': best_str,
'High': best_high_str,
'Low': best_low_str}
result = pd.DataFrame.from_dict(result, orient='index').transpose()
result.set_index('drainageID', inplace=True)
result = result[['nstrikes', 'Low', 'Best', 'High']]
return result
In [ ]:
drainageID = "GA_Karnali_at_Benighat"
result = find_ddf_variation(drainageID, nstrikes=2)
In [ ]:
drainageIDs = ['SY_Naryn_at_NarynTown',
'AM_Vakhsh_at_Komsomolabad',
'IN_Hunza_at_DainyorBridge',
'GA_Karnali_at_Benighat',
'GA_Narayani_at_Devghat',
'GA_SaptaKosi_at_Chatara']
strikes = [2, 3]
df = pd.DataFrame([])
for drainageID in drainageIDs:
for strike in strikes:
result = find_ddf_variation(drainageID, nstrikes=strike)
df = df.append(result)
In [ ]:
#drainageID = "GA_Karnali_at_Benighat"
#nstrikes = 2
def best_models(drainageID, nstrikes=3):
# Read SA summary file, this is DDFs and z at end of each cycle
dir = "/work/charis/ti_model/calibrations_correct_cost"
params = "DDFnbr=10mm_N100_M050"
list = glob.glob("%s/%s.%dstr_%s.SA_summary.dat" % (
dir, drainageID, nstrikes, params))
if 1 != len(list):
print("Error looking for SA_summary file for %s" % drainageID, file=sys.stderr)
SAFile = list[0]
print("SA_summary file : %s" % SAFile, file=sys.stderr)
df = pd.read_pickle(SAFile)
df.loc[:, "model"] = (
df["winter_snow_ddf"].map(str) + "_" +
df["summer_snow_ddf"].map(str) + "_" +
df["winter_ice_ddf"].map(str) + "_" +
df["summer_ice_ddf"].map(str))
df["nstrikes"] = nstrikes
outfile = "%s/%s.%dstr_%s.SA_summary.best20.dat" % (
dir, drainageID, nstrikes, params)
df.to_pickle(outfile)
print("outfile: %s" % outfile, file=sys.stderr)
In [ ]:
drainageIDs = ['SY_Naryn_at_NarynTown',
'AM_Vakhsh_at_Komsomolabad',
'IN_Hunza_at_DainyorBridge',
'GA_Karnali_at_Benighat',
'GA_Narayani_at_Devghat',
'GA_SaptaKosi_at_Chatara']
strikes = [2, 3]
for drainageID in drainageIDs:
for strike in strikes:
best_models(drainageID, nstrikes=strike)
In [ ]:
#newfile = '/work/charis/ti_model/calibrations_correct_cost/IN_Hunza_at_DainyorBridge.2str_DDFnbr=10mm_N100_M050.SA_summary.best20.dat'
newfile = '/work/charis/ti_model/calibrations_correct_cost/AM_Vakhsh_at_Komsomolabad.2str_DDFnbr=10mm_N100_M050.SA_summary.best20.dat'
new = pd.read_pickle(newfile)
In [ ]:
new
to get the best model for cycle 95:
In [ ]:
new.at[82, "model"]
In [ ]: