In [2]:
import numpy as np
from numpy import ma
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
from datetime import datetime
In [3]:
%matplotlib inline
In [4]:
data = pd.read_csv('/Users/nicolasf/Dropbox/Samoa_data/76216_ext_obs_aws.csv', index_col=['lsd'])
In [7]:
data.columns.tolist()
Out[7]:
In [36]:
format = "%d/%m/%Y %H:%M:%S"
times = pd.to_datetime(data.Date + ' ' + data.Time, format=format)
data.set_index(times, inplace=True)
# cleanup
data = data.drop(['Date','Time'], axis=1)
In [37]:
data = data.sort_index()
In [38]:
data.head()
Out[38]:
One thing we can calculate is the percentage of total monthly rainfall contributed by different classes of 10 mn, 1 hour, 1 day rainfall
We would need first to only keep days with rainfall
The decide on thresholds
In [39]:
data['Rain(TOT)'][data['Rain(TOT)'] >= 100] = np.nan
In [40]:
data['Rain(TOT)'].max()
Out[40]:
In [41]:
data['2011-05-07 21:30:00':'2011-05-08 08:30:00']['Rain(TOT)']
Out[41]:
In [42]:
data['Rain(TOT)']['2010-11-18 22:00:00':'2010-11-19 22:00:00']
Out[42]:
In [43]:
data['Rain(TOT)']['2010-11-18'] # note that there are NO values for this date
Out[43]:
In [44]:
data['Rain(TOT)'][data['Rain(TOT)'] >= 45] = np.nan
In [45]:
#start = data.index[0]
#end = data.index[-1]
#index = pd.date_range(start=start, end=end, freq='10min')
#data = data.reindex(index)
In [46]:
data = data.resample('10min', how='sum')
In [47]:
start = data.index[0]
end = data.index[-1]
index = pd.date_range(start=start, end=end, freq='10min')
data = data.reindex(index)
In [48]:
len(data)
Out[48]:
In [49]:
data['Rain(TOT)'].plot()
Out[49]:
In [50]:
daily_max = data['Rain(TOT)'].resample('D', how='max')
In [51]:
daily_max.head()
Out[51]:
In [52]:
len(daily_max)
Out[52]:
In [53]:
from astroML.plotting import hist
In [54]:
hourly_rain = data['Rain(TOT)'].resample('H', how='sum')
In [55]:
daily_max.hist()
Out[55]:
In [56]:
hourly_rain.max()
Out[56]:
In [57]:
hourly_rain[hourly_rain==hourly_rain.max()]
Out[57]:
In [58]:
hourly_rain.hist()
Out[58]:
In [59]:
f, ax = plt.subplots(figsize=(10,6))
#data['Rain(TOT)']['2012-12-13'].plot(lw=2, ax =ax)
dates = data['Rain(TOT)']['2012-12-13'].index
values = data['Rain(TOT)']['2012-12-13'].values
ax.fill_between(dates, 0, values)
[l.set_fontsize(13) for l in ax.xaxis.get_ticklabels()]
[l.set_fontsize(13) for l in ax.yaxis.get_ticklabels()]
ax.set_title('10 mn rainfall on 2012-12-13',fontsize=14)
ax.set_ylabel('mm', fontsize=12)
ax.set_xlabel('time', fontsize=12)
f.savefig('../figures/Rainfall_Afiamalu_2012-12-13.png', dpi=200)
In [60]:
daily_max.max()
Out[60]:
In [61]:
daily_max[daily_max == daily_max.max()]
Out[61]:
In [84]:
freqs_def = ['10min', '20min', '30min', '1H', '3H', '6H', '12H', '24H', '36H', '48H']
bins = [-1,0.01,10] + np.arange(20, 160, 10).tolist()
In [85]:
freq_table = np.zeros((len(freqs_def), len(bins)))
In [86]:
for i, freq in enumerate(freqs_def):
table = data['Rain(TOT)'].resample(freq, how='sum')
table = table.dropna()
table = pd.cut(table, bins)
freq_values = pd.value_counts(table) / pd.value_counts(table).sum() * 100.
freq_values = freq_values.values
freq_table[i,0:len(freq_values)] = freq_values
In [87]:
freq_table = np.array(freq_table)
In [88]:
#freq_table = ma.masked_values(freq_table, 0)
In [89]:
# build the columns labels
columns = np.array(bins)[1:]
bins = []
for i in xrange(len(columns)-1):
bins.append("(%4.2f - %4.2f]" % (columns[i],columns[i+1]))
bins = ['0'] + bins + ['> 150']
In [90]:
freq_table = pd.DataFrame(freq_table, index=freqs_def, columns=bins)
In [91]:
freq_table
Out[91]:
In [92]:
from matplotlib.colors import LogNorm
#freq_table[freq_table==0] = 10e-10
f, ax = plt.subplots(figsize=(10,8))
plt.subplots_adjust(bottom=0.2)
cmap = plt.get_cmap('Blues')
#im = ax.imshow(freq_table, aspect='auto', norm=LogNorm(vmin=freq_table.min().min(), vmax=freq_table.max().max()), \
# cmap=cmap, interpolation='nearest')
im = ax.imshow(freq_table, aspect='auto', cmap=cmap, interpolation='nearest', vmin=0, vmax=100)
ax.set_yticks(np.arange(freq_table.shape[0]))
ax.set_yticklabels(freqs_def)
ax.set_xticks(np.arange(freq_table.shape[1]))
ax.set_xticklabels(bins, rotation=90)
ax.set_xlabel('rainfall amount')
ax.set_ylabel('duration')
ax.xaxis.grid('off', lw=0)
ax.yaxis.grid('off', lw=0)
[ax.axvline(x, color='0.8') for x in np.arange(-0.5,freq_table.shape[1]-0.5)]
[ax.axhline(x, color='0.8') for x in np.arange(-0.5,freq_table.shape[0]-0.5)]
cb = plt.colorbar(im, boundaries=np.arange(0,100+10,10), drawedges=True)
cb.set_label('%')
f.savefig('../figures/table_freq_Afiamalu.png')
In [64]:
freq_table.to_csv('../outputs/frequency_intensity_table_Afiamalu.csv')
In [67]:
from mpl_toolkits.mplot3d import Axes3D
In [65]:
y = np.arange(0.5,freq_table.shape[0]+0.5)
x = np.arange(0.5,freq_table.shape[1]+0.5)
X,Y = np.meshgrid(x,y)
Z = freq_table.values
In [80]:
f = plt.figure(figsize=(10,10))
ax = f.add_subplot(111, projection='3d')
plt.subplots_adjust(bottom=0.25)
surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=plt.cm.jet,
linewidth=0, antialiased=True, alpha=0.8)
ax.view_init(elev=20., azim=310)
ax.set_yticks(np.arange(0.5,11.5))
ax.set_yticklabels(freqs_def, rotation=90)
ax.set_xticklabels(bins, rotation=90)
#ax.set_ylabel('duration')
#ax.set_xlabel('amount (mm)')
ax.set_zlabel('%')
ax.set_title('rainfall frequency table for Afiamalu')
f.savefig('../figures/essai3D_surface.png', dpi=300)
#ax.bar(y, x, zs=Z, zdir='y', alpha=0.8)
In [106]:
def get_runs(data, varname, thresh = 0.0, event = 'dry', freq = '24H', how = 'sum'):
table = data[varname].resample(freq, how = how)
bits = table.copy()
if event == 'dry':
bits[bits <= thresh] = 1
elif event == 'wet':
bits[bits > thresh] = 1
bits = bits.values == 1
bounded = np.hstack(([0], bits, [0]))
difs = np.diff(bounded)
run_starts, = np.where(difs > 0)
run_ends, = np.where(difs < 0)
duration = run_ends - run_starts
run_ends -= 1
t_run_starts = table.index[run_starts].to_pydatetime()
t_run_ends = table.index[run_ends].to_pydatetime()
persistence = pd.DataFrame({'run_starts':run_starts,'date_starts':t_run_starts, 'run_ends':run_ends,\
'date_ends':t_run_ends, 'duration':duration})
# sorts the table
persistence = persistence[['run_starts','date_starts','run_ends','date_ends','duration']]
#persistence = pd.DataFrame(np.c_[run_starts, t_run_starts, run_ends,t_run_ends, duration], \
# columns = ['run_starts', 'date_starts', 'run_ends', 'date_ends', 'duration'])
return table, persistence
In [107]:
table, persistence = get_runs(data, 'Rain(TOT)', thresh = 0.5, event='wet')
In [108]:
freq_table = persistence['duration'].value_counts().sort_index(1)
In [109]:
freq_table = pd.DataFrame(np.c_[freq_table.index.values, freq_table.values], columns = ['duration', 'occurences'])
In [110]:
freq_table.T.to_csv('../outputs/daily_wet_events_Afiamalu.csv',header=False)
In [105]:
freq_table.T
Out[105]:
In [ ]: