Using results from a simulated annealing calibration, determine the best 100 models and create their model strings for running batch processing


In [ ]:
from __future__ import print_function
%pylab notebook
# import datetime as dt
import glob
import matplotlib.pyplot as plt
import matplotlib.dates as md
#from nose.tools import set_trace
from charistools.hypsometry import Hypsometry
from charistools.meltModels import CalibrationCost
from charistools.modelEnv import ModelEnv
import pandas as pd
import re
import os

In [ ]:
%cd /work/charis/ti_model/calibrations_correct_cost
%ls GA_Karn*2str*

In [ ]:
saFile = "GA_Karnali_at_Benighat.2str_DDFnbr=10mm_N100_M050.SA_summary.dat"
#caldf = pd.read_pickle(saFile)

In [ ]:
def find_best_models(drainageID, nstrikes=3):
    
    # Read the SA stats file, this is all models tested 
    dir = "/Users/brodzik/projects/CHARIS/calibrations_correct_cost"
    params = "DDFnbr=10mm_N100_M050"
    list = glob.glob("%s/%s.%dstr_%s.stats.txt" % (
        dir, drainageID, nstrikes, params))
    if 1 != len(list):
        print("Error looking for stats file for %s" % drainageID, file=sys.stderr)
    file = list[0]
    print("stats file : %s" % file, file=sys.stderr)
    df = pd.read_table(file, sep='\s+')
    
    # Sort all models by z-score
    df, stats_df = CalibrationCost(df, verbose=True)
    
    # Move the index to a column of its own
    stats_df.reset_index(inplace=True)
    
    # Save the top 20 models
    outFile = "%s/%s.%dstr_%s.stats.best20.dat" % (dir, drainageID, nstrikes, params)
    stats_df.iloc[:20].to_pickle(outFile)
    print("Best 20 models will be saved in order to: %s" % outFile)

    return df, stats_df

In [ ]:
drainageID = "GA_Karnali_at_Benighat"
df, stats_df = find_best_models(drainageID, nstrikes=2)
stats_df.head()

In [ ]:
file = "/Users/brodzik/projects/CHARIS/calibrations_correct_cost/IN_Hunza_at_DainyorBridge.2str_DDFnbr=10mm_N100_M050.stats.best20.dat"
newdf = pd.read_pickle(file)
newdf

In [ ]:
newdf.iloc[0].model

In [ ]:
drainageIDs = ['SY_Naryn_at_NarynTown',
               'AM_Vakhsh_at_Komsomolabad',
               'IN_Hunza_at_DainyorBridge',
               'GA_Karnali_at_Benighat',
               'GA_Narayani_at_Devghat',
               'GA_SaptaKosi_at_Chatara']
strikes = [2, 3]
for drainageID in drainageIDs:
    for strike in strikes:
        df, stats_df = find_best_models(drainageID, nstrikes=strike)

In [ ]:
#drainageID = "GA_Karnali_at_Benighat"
#nstrikes = 2
def best_models(drainageID, nstrikes=3):
    # Read SA summary file, this is DDFs and z at end of each cycle
    dir = "/work/charis/ti_model/calibrations_correct_cost"
    params = "DDFnbr=10mm_N100_M050"
    list = glob.glob("%s/%s.%dstr_%s.SA_summary.dat" % (
        dir, drainageID, nstrikes, params))
    if 1 != len(list):
        print("Error looking for SA_summary file for %s" % drainageID, file=sys.stderr)
    SAFile = list[0]
    print("SA_summary file : %s" % SAFile, file=sys.stderr)
    df = pd.read_pickle(SAFile)
    
    df.loc[:, "model"] = (
        df["winter_snow_ddf"].map(str) + "_" +
        df["summer_snow_ddf"].map(str) + "_" +
        df["winter_ice_ddf"].map(str) + "_" +
        df["summer_ice_ddf"].map(str))

    df["nstrikes"] = nstrikes

    outfile = "%s/%s.%dstr_%s.SA_summary.best20.dat" % (
        dir, drainageID, nstrikes, params)

    df.to_pickle(outfile)
    print("outfile: %s" % outfile, file=sys.stderr)

In [ ]:
drainageIDs = ['SY_Naryn_at_NarynTown',
               'AM_Vakhsh_at_Komsomolabad',
               'IN_Hunza_at_DainyorBridge',
               'GA_Karnali_at_Benighat',
               'GA_Narayani_at_Devghat',
               'GA_SaptaKosi_at_Chatara']
strikes = [2, 3]
for drainageID in drainageIDs:
    for strike in strikes:
        best_models(drainageID, nstrikes=strike)

In [ ]:
#newfile = '/work/charis/ti_model/calibrations_correct_cost/IN_Hunza_at_DainyorBridge.2str_DDFnbr=10mm_N100_M050.SA_summary.best20.dat'
newfile = '/work/charis/ti_model/calibrations_correct_cost/AM_Vakhsh_at_Komsomolabad.2str_DDFnbr=10mm_N100_M050.SA_summary.best20.dat'
new = pd.read_pickle(newfile)

In [ ]:
new

to get the best model for cycle 95:


In [ ]:
new.at[82, "model"]

In [ ]: