drought monitoring tool for CLIDESC


In [1]:
import os, sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats

import datetime

In [2]:
import matplotlib as mpl

In [3]:
mpl.rcParams['axes.labelsize'] = 16
mpl.rcParams['ytick.labelsize'] = 16
mpl.rcParams['xtick.labelsize'] = 16

In [4]:
%matplotlib inline

defines the window and min periods here


In [5]:
window = 90
min_periods = np.int(np.floor(0.6 * window))
smooth_periods = np.int(np.floor(0.1 * window))

In [10]:
data = pd.read_csv('/Users/nicolasf/Desktop/export_table.csv', header=0, index_col='lsd', parse_dates=True, dayfirst=True)

In [11]:
data.head()


Out[11]:
timestamp station_no rain_24h
lsd
1972-01-01 1972-01-01 76200 2.4
1972-01-02 1972-01-02 76200 5.2
1972-01-03 1972-01-03 76200 0.4
1972-01-04 1972-01-04 76200 7.0
1972-01-05 1972-01-05 76200 30.1

In [7]:
data.sort_index(inplace=True)

In [13]:
data.drop(['station_no','timestamp'], axis=1, inplace=True)

In [14]:
data.head()


Out[14]:
rain_24h
lsd
1972-01-01 2.4
1972-01-02 5.2
1972-01-03 0.4
1972-01-04 7.0
1972-01-05 30.1

In [15]:
tindex = pd.date_range(start=min(data.index), end=max(data.index))

creates a continuous time index for the data


In [16]:
data = data.reindex(tindex)

rolling sum over window


In [17]:
datars = pd.rolling_sum(data, window, min_periods=min_periods)

monthly means from data


In [18]:
datam = data[['rain_24h']].resample('1M', how='sum')

In [19]:
datam.head()


Out[19]:
rain_24h
lsd
1972-01-31 540.1
1972-02-29 204.4
1972-03-31 364.8
1972-04-30 269.7
1972-05-31 70.2

In [20]:
datam.columns = [['monthly means']]

In [21]:
datam.head()


Out[21]:
monthly means
lsd
1972-01-31 540.1
1972-02-29 204.4
1972-03-31 364.8
1972-04-30 269.7
1972-05-31 70.2

In [22]:
normal = ['1972-01-01','2012-12-31']

In [23]:
normals = datars[normal[0]:normal[-1]]

In [24]:
normals['year'] = normals.index.year
normals['month'] = normals.index.month
normals['day'] = normals.index.day

In [25]:
datars['year'] = datars.index.year
datars['month'] = datars.index.month
datars['day'] = datars.index.day

In [26]:
normals.head()


Out[26]:
rain_24h year month day
lsd
1972-01-01 NaN 1972 1 1
1972-01-02 NaN 1972 1 2
1972-01-03 NaN 1972 1 3
1972-01-04 NaN 1972 1 4
1972-01-05 NaN 1972 1 5

calculates a Monthly climatology


In [27]:
mclim = data['rain_24h'].resample('1M', how='sum')

In [28]:
mclim = mclim.groupby(mclim.index.month).mean()

In [29]:
def plot_rainfall_clim(mclim, station_name): 
    """
    plots a rainfall climatology for a station
    
    parameters
    -------
    mclim : Pandas.Series object
        as returned from `x.groupby(x.index.month).mean()`
    station_name : str
        station name
        
    returns
    -------
    f : matplotlib figure object
        the figure handle (to save with `f.savefig(outfile)`)
    """
    f, ax = plt.subplots(figsize=(6,6))
    ax.bar(np.arange(len(mclim)), mclim.values, color='steelblue',width=1, edgecolor='b')
    ax.set_xticks(np.arange(0.5, len(mclim) + 0.5))
    ax.set_xticklabels(list('JFMAMJJASOND'))
    ax.set_ylabel('Rainfall (mm)') 
    #ax.set_xlim(-0.11, 12.1)
    ax.yaxis.grid('on')
    ax.set_title(station_name, fontsize=16)
    return f

In [30]:
f = plot_rainfall_clim(mclim, 'Asau')


Much more simpler: express anomalies in terms of percentage of normal and plot horiz. lines


In [31]:
clim = normals.groupby(['month','day'])[['rain_24h']].mean()
#clim = normals.groupby(['month','day'])['rain_24h'].quantile(0.5)

In [32]:
#clim = pd.DataFrame(clim)

In [33]:
clim.head()


Out[33]:
rain_24h
month day
1 1 851.4800
2 857.9100
3 868.0975
4 877.6175
5 879.6425

In [34]:
clim['month'] = clim.index.get_level_values(0)
clim['day'] = clim.index.get_level_values(1)

In [35]:
merged_df_mean = pd.merge(datars, clim, how='left', on=['month','day'])

In [36]:
merged_df_mean.head()


Out[36]:
rain_24h_x year month day rain_24h_y
0 NaN 1972 1 1 851.48
1 1206.3 1973 1 1 851.48
2 1778.2 1974 1 1 851.48
3 1471.0 1975 1 1 851.48
4 744.2 1976 1 1 851.48

In [37]:
merged_df_mean = merged_df_mean.sort(['year','month','day'], axis=0)

In [38]:
merged_df_mean.columns = ['rain_24h', 'year', 'month','day', 'clim']

calculates the anomalies


In [39]:
merged_df_mean['anomalies'] = (merged_df_mean['rain_24h'] / merged_df_mean['clim']) * 100.

In [40]:
merged_df_mean.head()


Out[40]:
rain_24h year month day clim anomalies
0 NaN 1972 1 1 851.4800 NaN
43 NaN 1972 1 2 857.9100 NaN
86 NaN 1972 1 3 868.0975 NaN
129 NaN 1972 1 4 877.6175 NaN
172 NaN 1972 1 5 879.6425 NaN

In [41]:
merged_df_mean.index = datars.index

In [42]:
merged_df_mean['monthly means'] = datam['monthly means']

In [43]:
merged_df_mean['monthly means'].fillna(method='bfill', inplace=True)

In [44]:
subsetper = merged_df_mean['2012-01-01'::]

In [45]:
subsetper.head()


Out[45]:
rain_24h year month day clim anomalies monthly means
lsd
2012-01-01 1140.4 2012 1 1 851.4800 133.931507 449
2012-01-02 1151.2 2012 1 2 857.9100 134.186570 449
2012-01-03 1220.0 2012 1 3 868.0975 140.537209 449
2012-01-04 1233.0 2012 1 4 877.6175 140.494008 449
2012-01-05 1233.8 2012 1 5 879.6425 140.261527 449

In [50]:
f, ax = plt.subplots(figsize=(12,8))

f.subplots_adjust(bottom=0.15, left=0.08, right=1)

### get the axes position and shrink the width to 80 % (for legend placement)
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.78, box.height])

### first plot: monthtly average rainfall (mm)

ax.plot(subsetper.index, subsetper['monthly means'], color='g', alpha=0.5)

ax.fill_between(subsetper.index, subsetper['monthly means'],color='g', alpha=0.3)

ax.set_ylim(0, max(subsetper['monthly means'] + 0.2 * subsetper['monthly means']))

ax.set_ylabel("monthly (calendar month) rainfall (mm)")

ax.grid('off')

[l.set_rotation(90) for l in ax.xaxis.get_ticklabels()]


### second plot [window] days anomalies (% of normal)

ax2 = ax.twinx()
box = ax2.get_position()
ax2.set_position([box.x0, box.y0, box.width * 0.78, box.height])


ax2.plot(subsetper.index, subsetper['anomalies'], lw=2, color='b', label='90 days rainfall', zorder=12)

ax2.axhline(10, color='r', zorder=10, lw=2)
ax2.axhline(40, color='r', zorder=10)

ax2.fill_between(subsetper.index, subsetper['anomalies'], np.ones(len(subsetper)) * 100., \
                 where=subsetper['anomalies'] > 100., color='b', interpolate=True, alpha=0.05)

ax2.fill_between(subsetper.index, subsetper['anomalies'], np.ones(len(subsetper)) * 10., \
                 where=subsetper['anomalies'] < 10., color='#700000', interpolate=True, alpha=0.9)

ax2.fill_between(subsetper.index, subsetper['anomalies'], np.ones(len(subsetper)) * 40., \
                 where=subsetper['anomalies'] < 40., color='r', interpolate=True, alpha=0.4)

ax2.fill_between(subsetper.index, subsetper['anomalies'], np.ones(len(subsetper)) * 100., \
                 where=subsetper['anomalies'] < 100., color='r', interpolate=True, alpha=0.05)

ax2.set_ylabel('percentage of normal', fontsize=16)

ax2.grid('on')

ax2.set_ylim(0, subsetper['anomalies'].max()+5)

ax2.set_title("{} days cumulative rainfall anomalies (% of normal)\nending {}".format(window, subsetper.index[-1].strftime("%d %B %Y")), fontsize=16)

### legend 
p1 = plt.Rectangle((0, 0), 1, 1, fc="r", alpha=0.05)
p2 = plt.Rectangle((0, 0), 1, 1, fc="r", alpha=0.4)
p3 = plt.Rectangle((0, 0), 1, 1, fc="#700000", alpha=0.9)
leg = plt.legend([p1, p2, p3], ['< mean', '< 40%', '< 10%'],\
          loc='upper left', bbox_to_anchor = (1.035, 1.01), fontsize=19)
leg.draw_frame(False)


f.savefig('./images/drought_monitoring_1972-2012_per_{0}days_try.png'.format(window), dpi=200)



In [ ]: