Using the 20 best SA models for a basin, explore the variability in melt data that we generated


In [ ]:
from __future__ import print_function
%pylab notebook
# import datetime as dt
import glob
import matplotlib.pyplot as plt
import matplotlib.dates as md
#from nose.tools import set_trace
from charistools.hypsometry import Hypsometry
from charistools.meltModels import CalibrationCost
from charistools.modelEnv import ModelEnv
import pandas as pd
import re
import os

In [ ]:
#%cd ~/projects/CHARIS/derived_hypsometries/REECv0_ModelRank000/AM_Vakhsh_at_Komsomolabad
#%ls *snow_on_land*

In [ ]:
def get_ddfs_and_surface(hyps):
    '''
    Gets the min/max ddfs from the hyps.comments
    Returns a tuple with min, max
    '''
    min_ddf = np.nan
    max_ddf = np.nan
    surface = "unknown"
    pmin = re.compile(r"min_DDF_mm_pday_pdegC") 
    pmax = re.compile(r"max_DDF_mm_pday_pdegC")  
    psurface = re.compile(r"melt for")
    for line in hyps.comments:
        if pmin.search(line):
            key, value = line.split(" : ")
            min_ddf = float(value)
        if pmax.search(line):
            key, value = line.split(" : ")
            max_ddf = float(value)
        if psurface.search(line):
            surface, dummy = psurface.split(line)
            surface = surface[1:].lstrip().rstrip()
            surface = surface.replace(" ", "_")

    return (min_ddf, max_ddf, surface)

In [ ]:
drainageIDs = ["IN_Hunza_at_DainyorBridge", 
               "AM_Vakhsh_at_Komsomolabad", 
               "SY_Naryn_at_NarynTown", 
               "GA_SaptaKosi_at_Chatara",
               "GA_Karnali_at_Benighat"]

dir = "/Users/brodzik/projects/CHARIS/derived_hypsometries"
for drainageID in drainageIDs:
    df = pd.DataFrame([])
    #for cycle in np.arange(20) + 81:
    for rank in np.arange(20):
        for year in np.arange(14) + 2001:
        
            # Get a list of melt filenames for this drainageID, cycle (or rank) and year
            # There will be three, one for each surface type
            #list = sort(glob.glob("%s/REECv0_Cycle%03d/%s/*%d*" % (dir, cycle, drainageID, year)))
            #print(cycle, year)
            list = sort(glob.glob("%s/REECv0_ModelRank%03d/%s/*%d*" % (dir, rank, drainageID, year)))
            print(rank, year)
            print(list)
            #row = {"cycle":cycle,
            #       "drainageID":drainageID}
            row = {"rank":rank,
                   "drainageID":drainageID}
            for f in list:
                hyps = Hypsometry(f)
                total_melt_km3 = hyps.data.sum().sum()
                min_ddf, max_ddf, surface = get_ddfs_and_surface(hyps)
                row["year"] = hyps.data.index[0].year
                row["%s_min_ddf" % (surface)] = min_ddf
                row["%s_max_ddf" % (surface)] = max_ddf
                row["%s_melt_km3" % (surface)] = total_melt_km3
            
            result = pd.DataFrame.from_dict(row, orient='index').transpose()
        
            #result = result[["year", "cycle", "drainageID", 
            #                 "Snow_on_land_min_ddf", "Snow_on_land_max_ddf", "Snow_on_land_melt_km3",
            #                 "Snow_on_ice_min_ddf", "Snow_on_ice_max_ddf", "Snow_on_ice_melt_km3",
            #                 "Exposed_glacier_ice_min_ddf", "Exposed_glacier_ice_max_ddf", "Exposed_glacier_ice_melt_km3"]]

            result = result[["year", "rank", "drainageID", 
                             "Snow_on_land_min_ddf", "Snow_on_land_max_ddf", "Snow_on_land_melt_km3",
                             "Snow_on_ice_min_ddf", "Snow_on_ice_max_ddf", "Snow_on_ice_melt_km3",
                             "Exposed_glacier_ice_min_ddf", "Exposed_glacier_ice_max_ddf", "Exposed_glacier_ice_melt_km3"]]
        
            df = df.append(result)
    
    #outfile = "%s/REECv0_CycleSummary/%s.annual_melt.last20.dat" % (dir, drainageID)
    outfile = "%s/REECv0_ModelRankSummary/%s.annual_melt.best20.dat" % (dir, drainageID)
    print("Output to: %s" % outfile, file=sys.stderr)
    df.to_pickle(outfile)

In [ ]:
for drainageID in drainageIDs:
    #file = "%s/REECv0_CycleSummary/%s.annual_melt.last20.dat" % (dir, drainageID)
    #print("last20 file %s" % file, file=sys.stderr)
    file = "%s/REECv0_ModelRankSummary/%s.annual_melt.best20.dat" % (dir, drainageID)
    print("best20 file %s" % file, file=sys.stderr)
    df = pd.read_pickle(file)

    melt = df.copy()
    melt.drop(['Snow_on_land_min_ddf','Snow_on_land_max_ddf',
               'Snow_on_ice_min_ddf','Snow_on_ice_max_ddf',
               'Exposed_glacier_ice_min_ddf','Exposed_glacier_ice_max_ddf'], axis=1, inplace=True)

    fig, axes = plt.subplots(3, 1, figsize=(8,12))

    melt.boxplot(ax=axes[0],
                 column='Snow_on_ice_melt_km3',
                 by='year')
    axes[0].set_title("Melt from Snow on Ice")

    melt.boxplot(ax=axes[1],
                 column='Exposed_glacier_ice_melt_km3', 
                 by='year')
    axes[1].set_title("Melt from Exposed Glacier Ice")

    melt.boxplot(ax=axes[2],
                 column='Snow_on_land_melt_km3',
                 by='year')
    axes[2].set_title("Melt from Snow on Land")

    for ax in axes:
        ax.set_ylim([0., 
                     1.1 * melt[['Snow_on_land_melt_km3', 'Snow_on_ice_melt_km3', 'Exposed_glacier_ice_melt_km3']].max().max()])
        ax.set_ylabel('Melt ($km^3$)')

    fig.suptitle("Best 20 Models, %s" % drainageID)
    fig.tight_layout()
    fig.subplots_adjust(top=0.94)
    outfile = "%s.png" % file
    plt.savefig(outfile)
    print("Saving plot to %s" % outfile)

Make a plot of overal variability by basin and surface type


In [ ]:
dir = "/Users/brodzik/projects/CHARIS/derived_hypsometries"
drainageIDs = ["IN_Hunza_at_DainyorBridge", 
               "AM_Vakhsh_at_Komsomolabad", 
               "SY_Naryn_at_NarynTown", 
               "GA_SaptaKosi_at_Chatara",
               "GA_Karnali_at_Benighat"]alldf = pd.DataFrame([])

for drainageID in drainageIDs:
    file = "%s/REECv0_CycleSummary/%s.annual_melt.last20.dat" % (dir, drainageID)
    print("last20 file %s" % file, file=sys.stderr)
    df = pd.read_pickle(file)

    melt = df.copy()
    melt.drop(['Snow_on_land_min_ddf','Snow_on_land_max_ddf',
               'Snow_on_ice_min_ddf','Snow_on_ice_max_ddf',
               'Exposed_glacier_ice_min_ddf','Exposed_glacier_ice_max_ddf'], axis=1, inplace=True)
    alldf = alldf.append(melt)

In [ ]:
alldf

In [ ]:
fig, ax = plt.subplots(1, 1, figsize=(10,5))

alldf.boxplot(ax=ax,
              column=['Snow_on_ice_melt_km3','Snow_on_land_melt_km3','Exposed_glacier_ice_melt_km3'],
              by='drainageID')
ax.set_title("Melt Title")

    #ax.set_ylim([0., 
    #                 1.1 * melt[['Snow_on_land_melt_km3', 'Snow_on_ice_melt_km3', 'Exposed_glacier_ice_melt_km3']].max().max()])
ax.set_ylabel('Melt ($km^3$)')

fig.suptitle("Best Models (2001-2014), last 20 cycles")
fig.tight_layout()

In [ ]:
surf = surf.replace(" ", "_")
surf

In [ ]:
def find_ddf_variation(drainageID, nstrikes=3):
    
    # Read SA summary file, this is DDFs and z at end of each cycle
    dir = "/work/charis/ti_model/calibrations_correct_cost"
    params = "DDFnbr=10mm_N100_M050"
    list = glob.glob("%s/%s.%dstr_%s.SA_summary.dat" % (
        dir, drainageID, nstrikes, params))
    if 1 != len(list):
        print("Error looking for SA_summary file for %s" % drainageID, file=sys.stderr)
    SAFile = list[0]
    print("SA_summary file : %s" % SAFile, file=sys.stderr)
    SAdf = pd.read_pickle(SAFile)
    
    # Drop first 61 rows of SA output, to limit analysis to stable stuff at end
    num_cycles_to_drop = 61
    stabledf = SAdf.drop(SAdf.index[np.arange(num_cycles_to_drop)])
    summarydf = stabledf.describe()
    
    # Read all calibration stats output and parse best model parameters from it
    list = glob.glob("%s/%s.%dstr_%s.z*Best*stats.png" % (
        dir, drainageID, nstrikes, params))
    if 1 != len(list):
        print("Error looking for stats file for %s" % drainageID, file=sys.stderr)
    statsFile = list[0]
    print("stats file : %s" % statsFile, file=sys.stderr)
    
    best = np.zeros(4)
    best_low = np.zeros(4)
    best_high = np.zeros(4)
    
    # Parse the best model ddfs from the filename
    p = re.compile(r'Best(\d+\.\d+)_(\d+\.\d+)_(\d+\.\d+)_(\d+\.\d+)')
    m = p.search(statsFile)
    for i in np.arange(4):
        best[i] = float(m.group(i+1))
    
    # Use the variation from the stable cycles to calculate best_minus1std and best_plus1std
    best_low = best.copy()
    best_high = best.copy()
    
    ddf = ['winter_snow_ddf', 'summer_snow_ddf', 'winter_ice_ddf', 'summer_ice_ddf']
    for i in np.arange(len(ddf)):
        best_low[i] = best_low[i] - summarydf.loc['std', ddf[i]]
        best_high[i] = best_high[i] + summarydf.loc['std', ddf[i]]
        
    # Do QC to ensure that each range enforces low <= high
    if best_low[0] > best_low[1]:
        print("Warning: QC problem on low snow DDFs, forcing them to lower value")
        best_low[0] = best_low[1]
    if best_low[2] > best_low[3]:
        print("Warning: QC problem on low ice DDFs, forcing them to lower value")
        best_low[2] = best_low[3]
        
    if best_high[0] > best_high[1]:
        print("Warning: QC problem on high snow DDFs, forcing them to higher value")
        best_high[1] = best_high[0]
    if best_high[2] > best_high[3]:
        print("Warning: QC problem on high ice DDFs, forcing them to higher value")
        best_high[3] = best_high[2]
    
    # Make model strings and return them
    best_str = "_".join(["%.2f" % i for i in best])
    best_low_str = "_".join(["%.2f" % i for i in best_low])
    best_high_str = "_".join(["%.2f" % i for i in best_high])
    
    result = {'drainageID': drainageID,
              'nstrikes': nstrikes,
              'Best': best_str, 
              'High': best_high_str,
              'Low': best_low_str}
    
    result = pd.DataFrame.from_dict(result, orient='index').transpose()
    result.set_index('drainageID', inplace=True)
    result = result[['nstrikes', 'Low', 'Best', 'High']]
    
    return result

In [ ]:
drainageID = "GA_Karnali_at_Benighat"
result = find_ddf_variation(drainageID, nstrikes=2)

In [ ]:
drainageIDs = ['SY_Naryn_at_NarynTown',
               'AM_Vakhsh_at_Komsomolabad',
               'IN_Hunza_at_DainyorBridge',
               'GA_Karnali_at_Benighat',
               'GA_Narayani_at_Devghat',
               'GA_SaptaKosi_at_Chatara']
strikes = [2, 3]
df = pd.DataFrame([])
for drainageID in drainageIDs:
    for strike in strikes:
        result = find_ddf_variation(drainageID, nstrikes=strike)
        df = df.append(result)

In [ ]:
#drainageID = "GA_Karnali_at_Benighat"
#nstrikes = 2
def best_models(drainageID, nstrikes=3):
    # Read SA summary file, this is DDFs and z at end of each cycle
    dir = "/work/charis/ti_model/calibrations_correct_cost"
    params = "DDFnbr=10mm_N100_M050"
    list = glob.glob("%s/%s.%dstr_%s.SA_summary.dat" % (
        dir, drainageID, nstrikes, params))
    if 1 != len(list):
        print("Error looking for SA_summary file for %s" % drainageID, file=sys.stderr)
    SAFile = list[0]
    print("SA_summary file : %s" % SAFile, file=sys.stderr)
    df = pd.read_pickle(SAFile)
    
    df.loc[:, "model"] = (
        df["winter_snow_ddf"].map(str) + "_" +
        df["summer_snow_ddf"].map(str) + "_" +
        df["winter_ice_ddf"].map(str) + "_" +
        df["summer_ice_ddf"].map(str))

    df["nstrikes"] = nstrikes

    outfile = "%s/%s.%dstr_%s.SA_summary.best20.dat" % (
        dir, drainageID, nstrikes, params)

    df.to_pickle(outfile)
    print("outfile: %s" % outfile, file=sys.stderr)

In [ ]:
drainageIDs = ['SY_Naryn_at_NarynTown',
               'AM_Vakhsh_at_Komsomolabad',
               'IN_Hunza_at_DainyorBridge',
               'GA_Karnali_at_Benighat',
               'GA_Narayani_at_Devghat',
               'GA_SaptaKosi_at_Chatara']
strikes = [2, 3]
for drainageID in drainageIDs:
    for strike in strikes:
        best_models(drainageID, nstrikes=strike)

In [ ]:
#newfile = '/work/charis/ti_model/calibrations_correct_cost/IN_Hunza_at_DainyorBridge.2str_DDFnbr=10mm_N100_M050.SA_summary.best20.dat'
newfile = '/work/charis/ti_model/calibrations_correct_cost/AM_Vakhsh_at_Komsomolabad.2str_DDFnbr=10mm_N100_M050.SA_summary.best20.dat'
new = pd.read_pickle(newfile)

In [ ]:
new

to get the best model for cycle 95:


In [ ]:
new.at[82, "model"]

In [ ]: