prepare the dataset for the SW Pacific regimes


In [23]:
%matplotlib inline

In [24]:
import sys, os
import numpy as np 
import pandas as pd
from matplotlib import pyplot as plt
from scipy.io.matlab import loadmat
import h5py

# date and time stuff
from datetime import datetime, timedelta
from dateutil import parser, relativedelta

import xray
import seaborn as sns

In [25]:
import sys
sys.path.append('/Users/nicolasf/CODE/paleopy/')

In [26]:
from paleopy import markov

In [27]:
types = ['SW1','SW2', 'SW3', 'SW4', 'SW5', 'SW6']

In [28]:
dict_types = dict(zip(types,range(1,len(types)+14 )))
inv_dict_types = {v: k for k, v in dict_types.items()}

In [29]:
inv_dict_types


Out[29]:
{1: 'SW1', 2: 'SW2', 3: 'SW3', 4: 'SW4', 5: 'SW5', 6: 'SW6'}

In [30]:
# %%writefile 
def select_season(data, season='DJF', complete=True, start = 1948, end = 2014, rm_leap=False):
    from calendar import monthrange
    """
    Select a season from data
    data must be a Pandas Series or DataFrame with a datetime index
    """
        
    seasons_params = {}
    seasons_params['DJF'] = (12,2)
    seasons_params['JFM'] = (1,3)
    seasons_params['FMA'] = (2,4)
    seasons_params['MAM'] = (3,5)
    seasons_params['AMJ'] = (4,6)
    seasons_params['MJJ'] = (5,7)
    seasons_params['JJA'] = (6,8)
    seasons_params['JAS'] = (7,9)
    seasons_params['ASO'] = (8,10)
    seasons_params['SON'] = (9,11)
    seasons_params['OND'] = (10,12)
    seasons_params['NDJ'] = (11,1)
    seasons_params['Warm Season (Dec. - May)'] = (12, 5)
    seasons_params['Cold Season (Jun. - Nov.)'] = (6, 11)
    seasons_params['Year (Jan. - Dec.)'] = (1, 12)
    seasons_params['Hydro. year (Jul. - Jun.)'] = (7, 6)    
        
    ### defines the selector 
    selector = ((data.index.month >= seasons_params[season][0]) | (data.index.month <= seasons_params[season][1]))
    
    ### selects
    data = data[selector]
    
    ### if complete == True, we only select COMPLETE seasons 
    data = data.truncate(before='%s-%s-1' % (start, seasons_params[season][0]),\
                   after='%s-%s-%s' % (end, seasons_params[season][1], monthrange(end,seasons_params[season][1])[1] ))
    
    if rm_leap: 
        data[(data.index.month == 2) & (data.index.day == 29)] = np.nan
        data.dropna(inplace=True)
    
    return data

In [31]:
fname = '/Users/nicolasf/research/NIWA/paleo/Agent_based/data/kmeans_6_class_clusters_TS_Ldomain.csv'

In [32]:
sw_types = pd.read_csv(fname, parse_dates=True, index_col=0, names=['class'], header=None, skiprows=1)

In [33]:
sw_types.head()


Out[33]:
class
1948-01-01 0
1948-01-02 0
1948-01-03 0
1948-01-04 0
1948-01-05 0

In [34]:
# sw_types = sw_types.ix['1981':'2010']

In [35]:
types


Out[35]:
['SW1', 'SW2', 'SW3', 'SW4', 'SW5', 'SW6']

In [36]:
dict_types = dict(zip(types,range(0,len(types)+1)))
inv_dict_types = {v: k for k, v in dict_types.items()}

maptypes = lambda x: inv_dict_types[x]

In [37]:
sw_types['type'] =  sw_types.applymap(maptypes)

In [38]:
sw_types.tail()


Out[38]:
class type
2014-12-27 5 SW6
2014-12-28 5 SW6
2014-12-29 4 SW5
2014-12-30 2 SW3
2014-12-31 5 SW6

In [39]:
sw_types.to_csv('../data/SWPac_Types.csv')

selects the season


In [40]:
lseason = ['AMJ',
 'ASO',
 'DJF',
 'FMA',
 'JAS',
 'JFM',
 'JJA',
 'MAM',
 'MJJ',
 'NDJ',
 'OND',
 'SON',
 'Cold Season (Jun. - Nov.)',
 'Warm Season (Dec. - May)',
 'Hydro. year (Jul. - Jun.)',
 'Year (Jan. - Dec.)']

In [42]:
if os.path.exists('../data/simulations_SWPac_types.hdf5'): 
    os.remove('../data/simulations_SWPac_types.hdf5')

In [53]:
f = h5py.File('../data/simulations_SWPac_types.hdf5', mode='a')

for season in lseason: 
    # calculates the probabilities over the climatological period (1981 - 2010)
    kseason = select_season(sw_types, start=1981, end=2010, season=season, rm_leap=False)
    probs = markov.get_probs(kseason['type'].values, types)
    probs = pd.Series(probs, index=types)
    classes, transition_matrix = markov.get_transition_probs(kseason['type'])
    probs = probs.reindex(classes)
    dict_classes, sim2D = markov.simulate_2D(classes, probs.values, transition_matrix, N=len(kseason), P=1000)
    probs = np.empty((len(classes), sim2D.shape[1]))
    for i in range(sim2D.shape[1]): 
        p = markov.get_probs(sim2D[:,i], np.arange(len(classes)))
        probs[:,i] = p
    f["/{}/probs".format(season)] = probs
    f["/{}/probs".format(season)].attrs['shape'] = '(class, simulation)'
    f["/{}/probs".format(season)].attrs['classes'] = ','.join(list(dict_classes.values()))
    del(probs, p)

In [33]:
f.close()

In [34]:
f.keys()


Out[34]:
KeysViewWithLock(<Closed HDF5 file>)

In [5]:
f = h5py.File('../outputs/simulations_SWPac_types.hdf5', mode='r')

In [6]:
probs = f['DJF']['probs'].value * 100

In [8]:
probs.shape


Out[8]:
(6, 1000)

In [12]:
lseason


Out[12]:
['AMJ',
 'ASO',
 'DJF',
 'FMA',
 'JAS',
 'JFM',
 'JJA',
 'MAM',
 'MJJ',
 'NDJ',
 'OND',
 'SON',
 'Cold Season (Jun. - Nov.)',
 'Warm Season (Dec. - May)',
 'Hydro. year (Jul. - Jun.)',
 'Year (Jan. - Dec.)']

In [21]:
inv_dict_types


Out[21]:
{1: 'SW1', 2: 'SW2', 3: 'SW3', 4: 'SW4', 5: 'SW5', 6: 'SW6'}

In [22]:
sc = ['-',' ','(',')','.']
for season in lseason: 
    print(season)
    season_title = season
    for c in sc: 
        season_title = season_title.replace(c,'_')
    probs = f[season]['probs'].value * 100
    fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(17,11))
    fig.subplots_adjust(wspace=0.3, hspace=0.3)
    axes = axes.flatten()
    for i in range(6): 
        ax = axes[i]
        p = probs[i,:]
        mp = p.mean()
        pu = np.percentile(p, 97.5)
        pl = np.percentile(p, 2.5)

        sns.distplot(p, ax=ax, color='#1B216B', kde_kws={'color':'coral'})
        ax.set_title(inv_dict_types[i+1],fontdict={'weight':'bold'})
        ax.axvline(mp, color='#1B216B')
        ax.axvline(pu, color='#13680D')
        ax.axvline(pl, color='#13680D')
        ax.text(0.01, 0.9, "mean = {:3.1f}\nu. perc. = {:3.1f}\nl. perc. = {:3.1f}".format(mp,pu,pl),\
               transform=ax.transAxes, bbox=dict(facecolor ='w'))
        [l.set_rotation(90) for l in ax.xaxis.get_ticklabels()]
    fig.savefig('/Users/nicolasf/Desktop/distr_SWPac_simus_{}.png'.format(season_title), dpi=200)
    plt.close(fig)


AMJ
ASO
DJF
FMA
JAS
JFM
JJA
MAM
MJJ
NDJ
OND
SON
Cold Season (Jun. - Nov.)
Warm Season (Dec. - May)
Hydro. year (Jul. - Jun.)
Year (Jan. - Dec.)

In [ ]:
f.close()

In [ ]: