In [1]:
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
from datetime import datetime
import seaborn as sb

In [2]:
# minimum number of days for calculating a monthly mean 
minvals = 20

def calcmonsum(x):
    if pd.isnull(x).sum() <= minvals:
        z = x.sum()
    else: 
        z = np.NaN
    return z

In [3]:
%matplotlib inline

In [4]:
iData = pd.read_csv('../data/table_rain24h.csv', index_col=['lsd'], parse_dates=True)

In [5]:
iData.index.name = 'TimeStamp'

In [6]:
iData.sort_index(inplace=True)

In [7]:
from_date = iData.index[0].strftime("%Y-%m-%d")

In [8]:
to_date = iData.index[-1].strftime("%Y-%m-%d")

In [9]:
daterange = pd.period_range(start = from_date, end = to_date, freq = 'D').to_datetime()
iData = iData.reindex(daterange)

In [10]:
iData.head()


Out[10]:
station_no rain_24h
1968-01-06 10001 0
1968-01-07 10001 0
1968-01-08 10001 0
1968-01-09 10001 0
1968-01-10 10001 0

In [11]:
index = pd.date_range(start = datetime(iData.index.year[0], 1, 1), \
                     end = datetime(iData.index.year[-1], 12, 31), freq='1D')

In [12]:
iData = iData.reindex(index)

In [13]:
iData.head()


Out[13]:
station_no rain_24h
1968-01-01 NaN NaN
1968-01-02 NaN NaN
1968-01-03 NaN NaN
1968-01-04 NaN NaN
1968-01-05 NaN NaN

In [14]:
mData = iData.groupby([iData.index.year, iData.index.month])[['rain_24h']].aggregate(calcmonsum)
mData.index = [datetime(x[0],x[1],1) for x in mData.index.get_values()]

In [15]:
mData.head()


Out[15]:
rain_24h
1968-01-01 NaN
1968-02-01 NaN
1968-03-01 NaN
1968-04-01 NaN
1968-05-01 75

In [16]:
clim = mData.groupby(mData.index.month).mean()

In [17]:
mData['anoms'] =  mData - np.resize(clim.values, mData.shape)

In [18]:
mData.head()


Out[18]:
rain_24h anoms
1968-01-01 NaN NaN
1968-02-01 NaN NaN
1968-03-01 NaN NaN
1968-04-01 NaN NaN
1968-05-01 75 -94.761111

In [19]:
soi = pd.read_table('http://www.cgd.ucar.edu/cas/catalog/climind/SOI.signal.ascii', sep='\s*', index_col=0, header=None, na_values=-99.9)


/Users/nicolasf/anaconda/lib/python2.7/site-packages/pandas/io/parsers.py:624: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators; you can avoid this warning by specifying engine='python'.
  ParserWarning)

In [20]:
soi = soi.stack(dropna=False)

soi.index = [datetime(x[0],x[1],1) for x in soi.index.get_values()]

In [21]:
soi.head()


Out[21]:
1866-01-01   -1.2
1866-02-01   -0.3
1866-03-01   -1.0
1866-04-01   -0.7
1866-05-01    0.1
dtype: float64

In [22]:
mData['soi'] = soi

In [23]:
mData.head()


Out[23]:
rain_24h anoms soi
1968-01-01 NaN NaN 0.6
1968-02-01 NaN NaN 1.8
1968-03-01 NaN NaN -0.8
1968-04-01 NaN NaN -0.4
1968-05-01 75 -94.761111 1.9

In [24]:
seasData = pd.rolling_mean(mData[['anoms','soi']], 3, min_periods=2)

In [25]:
groups = seasData.groupby(seasData.index.month)

from scipy.stats import pearsonr

R = []; P =[] 
for i in np.arange(1,13): 
    ds = groups.get_group(i)
    ds = ds.dropna()
    r, p = pearsonr(ds['anoms'], ds['soi'])
    R.append(r)
    P.append(p)
R = np.array(R); P = np.array(P)

In [27]:
seasons = ['NDJ', 'DJF', 'JFM', 'FMA', 'MAM', 'AMJ', 'MJJ', 'JJA', 'JAS', 'ASO', 'SON', 'OND']

In [42]:
"""
facet plot of the regressions between the SOI and the seasonal rainfall anomalies
"""
f, axes = plt.subplots(nrows=4, ncols=3, figsize=(10,14))

f.subplots_adjust(bottom=0.1)

axes = axes.flatten()

for i, m in enumerate(np.arange(1,13)):
    ax = axes[i]
    ds = groups.get_group(m)
    ds = ds.dropna()
    sb.regplot(ds['soi'], ds['anoms'], ax=ax, color='#000099')
    ax.set_title("{}, R = {:4.2f}".format(seasons[i],ds.corr().ix[1,0]))
    
    if m in [1,4,7,10]:
        ax.set_ylabel('anoms. (mm)')
    else:
        ax.set_ylabel('')
    
    if m in [10,11,12]: 
        ax.set_xlabel('SOI')
    else: 
        ax.set_xlabel('')



In [34]:
f.savefig('./Regplot_SoiSeasonalRainfall_WS.png')

In [33]:
import os

In [43]:
import ggplot2


---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-43-d61d381e44c3> in <module>()
----> 1 import ggplot2

ImportError: No module named ggplot2

In [ ]: