Using the 20 best SA models, plot overall variability in melt data that we generated


In [ ]:
from __future__ import print_function
%pylab notebook
# import datetime as dt
import glob
import matplotlib.pyplot as plt
#import matplotlib.dates as md
#from nose.tools import set_trace
import pandas as pd
import re
import seaborn as sns
import os
import sys
sns.set()
sns.axes_style("darkgrid")

Make a plot of overall variability by basin and surface type


In [ ]:
dir = "/Users/brodzik/projects/CHARIS/derived_hypsometries"
drainageIDs = ["IN_Hunza_at_DainyorBridge", 
               "AM_Vakhsh_at_Komsomolabad", 
               "SY_Naryn_at_NarynTown", 
               "GA_SaptaKosi_at_Chatara",
               "GA_Karnali_at_Benighat"]

alldf = pd.DataFrame([])

for drainageID in drainageIDs:
    #file = "%s/REECv0_CycleSummary/%s.annual_melt.last20.dat" % (dir, drainageID)
    #print("last20 file %s" % file, file=sys.stderr)
    file = "%s/REECv0_ModelRankSummary/%s.annual_melt.best20.dat" % (dir, drainageID)
    print("best20 file %s" % file, file=sys.stderr)
    df = pd.read_pickle(file)

    melt = df.copy()
    melt.drop(['Snow_on_land_min_ddf','Snow_on_land_max_ddf',
               'Snow_on_ice_min_ddf','Snow_on_ice_max_ddf',
               'Exposed_glacier_ice_min_ddf','Exposed_glacier_ice_max_ddf'], axis=1, inplace=True)
    # This idiotic step is necessary for seaborn to work in the plots
    melt["Snow_on_land_melt_km3"] = melt["Snow_on_land_melt_km3"].astype(float)
    melt["Snow_on_ice_melt_km3"] = melt["Snow_on_ice_melt_km3"].astype(float)
    melt["Exposed_glacier_ice_melt_km3"] = melt["Exposed_glacier_ice_melt_km3"].astype(float)

    alldf = alldf.append(melt)

In [ ]:
alldf["ID"] = alldf.drainageID.str.extract(r"_(.+)_at")

In [ ]:
alldf

In [ ]:
fig, axes = plt.subplots(3, 1, figsize=(7,10))

alldf.boxplot(ax=axes[0],
              column='Snow_on_ice_melt_km3',
              by='ID',
              rot=0)
axes[0].set_title("Melt from Snow on Ice")

alldf.boxplot(ax=axes[1],
              column='Exposed_glacier_ice_melt_km3',
              by='ID',
              rot=0)
axes[1].set_title("Melt from Exposed Glacier Ice")

alldf.boxplot(ax=axes[2],
              column='Snow_on_land_melt_km3',
              by='ID',
              rot=0)
axes[2].set_title("Melt from Snow on Land")

for ax in axes:                  
    ax.set_ylabel('Melt ($km^3$)')

fig.suptitle("Variability in Melt from 20 Best Models (2001-2014)")

fig.tight_layout()
fig.subplots_adjust(top=0.95)

In [ ]:
fig, axes = plt.subplots(3, 1, figsize=(7,10))

soi_color = '#%02x%02x%02x' % (141, 160, 203)
egi_color = '#%02x%02x%02x' % (252, 141, 98)
sol_color = '#%02x%02x%02x' % (102, 194, 165)

order=['Naryn','Vakhsh','Hunza','Karnali','SaptaKosi']
axes[0] = sns.boxplot(ax=axes[0],
                      x='ID',
                      y='Snow_on_ice_melt_km3',
                      order=order,
                      color=soi_color,
                      data=alldf)
axes[0].set_title("Melt from Snow on Ice")
axes[0].set_xlabel("")
#axes[0].set_xticklabels([])

axes[1] = sns.boxplot(ax=axes[1],
                      x='ID',
                      y='Exposed_glacier_ice_melt_km3',
                      order=order,
                      color=egi_color,
                      data=alldf)
axes[1].set_title("Melt from Exposed Glacier Ice")
axes[1].set_xlabel("")
#axes[1].set_xticklabels([])

axes[2] = sns.boxplot(ax=axes[2],
                      x='ID',
                      y='Snow_on_land_melt_km3',
                      order=order,
                      color=sol_color,
                      data=alldf)
axes[2].set_title("Melt from Snow on Land")
#axes[2].set_xticklabels(['Naryn (SY)','Vakhsh (AM)','Hunza (IN)','Karnali (GA)','SaptaKosi (BR)'])
axes[2].set_xlabel('Calibration Basin (Used for Major Basin)')

#ymax = 1.1 * alldf[['Snow_on_land_melt_km3', 'Snow_on_ice_melt_km3', 'Exposed_glacier_ice_melt_km3']].max().max()
for ax in axes:                  
    ax.set_ylabel('Melt ($km^3$)')
#    ax.set_ylim([0., ymax])
    

fig.suptitle("Variability in Melt from 20 Best Models (2001-2014)")

fig.tight_layout()
fig.subplots_adjust(top=0.93)

How to combine all 3 columns of data into a Single melt column with another column as label


In [ ]:
test = alldf.copy()
#test.drop(['year','cycle','drainageID'],inplace=True,axis=1)
test.drop(['year','rank','drainageID'],inplace=True,axis=1)
test.set_index('ID', inplace=True)

multicol = pd.MultiIndex.from_tuples([('Melt', 'Snow on land melt'),
                                      ('Melt', 'Snow on ice melt'),
                                      ('Melt', 'Exposed glacier ice melt')])
test.columns = multicol
test = test.stack()
test = test.reset_index()
test.columns = ['ID', 'Surface', 'Melt']
test

In [ ]:
plt.rcParams

In [ ]:
params = {'legend.fontsize': 14,
          'legend.handlelength': 2}
plt.rcParams.update(params)
fig, ax = plt.subplots(figsize=(9,6))
my_palette = {"Snow on land melt": sol_color,
              "Exposed glacier ice melt": egi_color,
              "Snow on ice melt": soi_color}
order=['Naryn','Vakhsh','Hunza','Karnali','SaptaKosi']
sns.boxplot(ax=ax,
            x="ID",
            hue="Surface",
            y="Melt",
            order=order,
            data=test,
            palette=my_palette,
            width=0.6)
ax.set_ylabel('Melt ($km^3$)')
ax.set_title("Variability in Melt from 20 Best Models (2001-2014)")
ax.set_xticklabels(['Naryn (SY)','Vakhsh (AM)','Hunza (IN)','Karnali (GA)','SaptaKosi (BR)'])
ax.set_xlabel('Calibration Basin (Used for Major Basin)')
for item in ([ax.xaxis.label, ax.yaxis.label] +
             ax.get_xticklabels() + ax.get_yticklabels()):
    item.set_fontsize(14)
ax.title.set_fontsize(20)
lg = ax.legend(title="Surface", fontsize=14)
title = lg.get_title()
title.set_fontsize(14)

file = "%s/REECv0_ModelRankSummary/Calibration_basins.model_variability.v2.pdf" % (dir)
fig.savefig(file)

In [ ]: