climate risk analysis


In [2]:
import numpy as np
from numpy import ma
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
from datetime import datetime

In [3]:
%matplotlib inline

In [4]:
data = pd.read_csv('/Users/nicolasf/Dropbox/Samoa_data/76216_ext_obs_aws.csv', index_col=['lsd'])

In [7]:
data.columns.tolist()


Out[7]:
['id',
 'station_no',
 'gmt',
 'lct',
 'insert_datetime',
 'change_datetime',
 'change_user',
 'data_source',
 'qa_flag',
 'measure_period',
 'mn_wind_dir_pt',
 'mn_wind_dir_deg',
 'mn_wind_dir_qa',
 'mn_wind_dir_stddev',
 'mn_wind_dir_stddev_qa',
 'mn_wind_speed',
 'mn_wind_speed_qa',
 'mn_wind_speed_stddev',
 'mn_wind_speed_stddev_qa',
 'mn_gust_speed',
 'mn_gust_speed_qa',
 'mn_gust_time',
 'mn_gust_time_qa',
 'mn_gust_dir_pt',
 'mn_gust_dir_deg',
 'mn_gust_dir_qa',
 'inst_gust_speed',
 'inst_gust_qa',
 'inst_gust_time',
 'inst_gust_time_qa',
 'inst_gust_dir_pt',
 'inst_gust_dir_deg',
 'inst_gust_dir_qa',
 'mn_temp',
 'mn_temp_qa',
 'mn_temp_subaveraging',
 'mn_temp_subaveraging_period',
 'mn_temp_subaveraging_qa',
 'max_temp',
 'max_temp_time',
 'max_temp_time_qa',
 'max_temp_qa',
 'min_temp',
 'min_temp_qa',
 'min_temp_time',
 'min_temp_time_qa',
 'min_grass_temp',
 'min_grass_temp_qa',
 'min_grass_temp_time',
 'min_grass_temp_time_qa',
 'mn_humidity',
 'mn_humidity_qa',
 'max_humidity',
 'max_humidity_qa',
 'max_humidity_time',
 'max_humidity_time_qa',
 'min_humidity',
 'min_humidity_qa',
 'min_humidity_time',
 'min_humidity_time_qa',
 'mn_station_pres',
 'mn_station_pres_qa',
 'mn_sea_level_pres',
 'mn_sea_level_pres_qa',
 'max_pres',
 'max_pres_qa',
 'max_pres_time',
 'max_pres_time_qa',
 'min_pres',
 'min_pres_qa',
 'min_pres_time',
 'min_pres_time_qa',
 'tot_rain',
 'tot_rain_qa',
 'tot_rain_two',
 'tot_rain_two_qa',
 'tot_sun',
 'tot_sun_qa',
 'tot_insolation',
 'tot_insolation_qa',
 'leaf_wetness',
 'leaf_wetness_qa',
 'mn_uv',
 'mn_uv_qa',
 'mn_soil_moisture_10',
 'mn_soil_moisture_10_qa',
 'mn_soil_temp_10',
 'mn_soil_temp_10_qa',
 'mn_soil_moisture_20',
 'mn_soil_moisture_20_qa',
 'mn_soil_temp_20',
 'mn_soil_temp_20_qa',
 'mn_soil_moisture_30',
 'mn_soil_moisture_30_qa',
 'mn_soil_temp_30',
 'mn_soil_temp_30_qa',
 'mn_soil_moisture_50',
 'mn_soil_moisture_50_qa',
 'mn_soil_temp_50',
 'mn_soil_temp_50_qa',
 'mn_soil_moisture_100',
 'mn_soil_moisture_100_qa',
 'mn_soil_temp_100',
 'mn_soil_temp_100_qa']

create a datetime index from the Date and Time columns


In [36]:
format = "%d/%m/%Y %H:%M:%S"
times = pd.to_datetime(data.Date + ' ' + data.Time, format=format)
data.set_index(times, inplace=True)
# cleanup
data = data.drop(['Date','Time'], axis=1)

Sorts in chronological order just in case


In [37]:
data = data.sort_index()

In [38]:
data.head()


Out[38]:
Node ID Logger Name 100cm(AVG) 10cm(AVG) 20cm(AVG) 30cm(AVG) BP(AVG) External Supply(RAW) Gust WD(RAW) Gust WS(RAW) Leaf Wetness(RAW) Mean WD(RAW) Mean WS(RAW) Rain(TOT) RH(AVG) SD WD(RAW) SD WS(RAW) Solar Radiation(AVG) Temperature(AVG) Temperature(MAX)
2010-07-02 10:00:00 NaN Afiamalu AWS 23.3 21.1 20.8 21.6 922.7 13424 173 6.5 619 162 3.3 0.0 98.8 28 1.2 105.8 22.6 23.2 ...
2010-07-02 10:10:00 NaN Afiamalu AWS 24.3 21.3 21.0 22.0 936.6 13397 177 9.1 593 168 3.8 0.2 98.8 26 1.6 109.3 22.4 22.9 ...
2010-07-02 10:20:00 NaN Afiamalu AWS 24.3 21.4 21.1 22.0 936.6 13572 190 7.3 641 165 3.0 0.0 98.8 33 1.2 129.4 22.3 22.7 ...
2010-07-02 10:30:00 NaN Afiamalu AWS 24.3 21.6 21.2 22.1 936.7 13559 178 8.2 545 165 3.5 0.2 98.5 27 1.4 156.4 22.2 22.6 ...
2010-07-02 10:40:00 NaN Afiamalu AWS 24.3 22.3 21.6 22.3 936.7 13908 134 8.3 599 174 3.4 0.4 98.7 26 1.3 309.9 22.3 23.1 ...

5 rows × 22 columns

One thing we can calculate is the percentage of total monthly rainfall contributed by different classes of 10 mn, 1 hour, 1 day rainfall

We would need first to only keep days with rainfall

The decide on thresholds

Get rid of the aberrant values


In [39]:
data['Rain(TOT)'][data['Rain(TOT)'] >= 100] = np.nan

In [40]:
data['Rain(TOT)'].max()


Out[40]:
58.799999999999997

There's also obviously something dead wrong with these values


In [41]:
data['2011-05-07 21:30:00':'2011-05-08 08:30:00']['Rain(TOT)']


Out[41]:
2011-05-07 21:30:36     0.0
2011-05-07 21:40:36     0.0
2011-05-07 21:50:36     0.0
2011-05-07 22:00:36    51.2
2011-05-07 22:10:36    51.2
2011-05-07 22:20:36    51.2
2011-05-07 22:30:36    51.2
2011-05-07 22:40:36    51.2
2011-05-07 22:50:36    51.2
2011-05-07 23:00:36    51.2
2011-05-07 23:10:36    51.2
2011-05-07 23:20:36    51.2
2011-05-07 23:30:36    51.2
2011-05-07 23:40:36    51.2
2011-05-07 23:50:36    51.2
...
2011-05-08 06:00:36    0
2011-05-08 06:10:36    0
2011-05-08 06:20:36    0
2011-05-08 06:30:36    0
2011-05-08 06:40:36    0
2011-05-08 06:50:36    0
2011-05-08 07:00:36    0
2011-05-08 07:10:36    0
2011-05-08 07:20:36    0
2011-05-08 07:30:36    0
2011-05-08 07:40:36    0
2011-05-08 07:50:36    0
2011-05-08 08:00:36    0
2011-05-08 08:10:36    0
2011-05-08 08:20:36    0
Name: Rain(TOT), Length: 66

As well as these ones


In [42]:
data['Rain(TOT)']['2010-11-18 22:00:00':'2010-11-19 22:00:00']


Out[42]:
2010-11-19 01:35:42    47.2
2010-11-19 01:45:42    47.2
2010-11-19 01:55:42    47.2
2010-11-19 02:05:42    47.2
2010-11-19 02:15:42    47.2
2010-11-19 02:25:42    47.0
2010-11-19 02:35:42    47.0
2010-11-19 02:45:42    47.0
2010-11-19 02:55:42    47.0
2010-11-19 03:05:42    47.0
2010-11-19 03:15:42    47.0
2010-11-19 03:25:42    47.0
2010-11-19 03:35:42    47.0
2010-11-19 03:45:42    47.0
2010-11-19 03:55:42    47.0
...
2010-11-19 19:35:42    1.4
2010-11-19 19:45:42    1.2
2010-11-19 19:55:42    1.0
2010-11-19 20:05:42    0.8
2010-11-19 20:15:42    0.6
2010-11-19 20:25:42    0.2
2010-11-19 20:35:42    0.4
2010-11-19 20:45:42    0.2
2010-11-19 20:55:42    0.2
2010-11-19 21:05:42    0.0
2010-11-19 21:15:42    0.0
2010-11-19 21:25:42    0.0
2010-11-19 21:35:42    0.0
2010-11-19 21:45:42    0.0
2010-11-19 21:55:42    0.0
Name: Rain(TOT), Length: 123

In [43]:
data['Rain(TOT)']['2010-11-18'] # note that there are NO values for this date


Out[43]:
Series([], name: Rain(TOT), dtype: float64)

So we just get rid of all instances with more than 45 mm / 10 mn


In [44]:
data['Rain(TOT)'][data['Rain(TOT)'] >= 45] = np.nan

Note that we cannot create a fixed frequency index, as the times at which the station was operated changed (presumably after maintenance)


In [45]:
#start = data.index[0]
#end = data.index[-1]
#index = pd.date_range(start=start, end=end, freq='10min')
#data = data.reindex(index)

In [46]:
data = data.resample('10min', how='sum')

In [47]:
start = data.index[0]
end = data.index[-1]
index = pd.date_range(start=start, end=end, freq='10min')
data = data.reindex(index)

In [48]:
len(data)


Out[48]:
202213

In [49]:
data['Rain(TOT)'].plot()


Out[49]:
<matplotlib.axes.AxesSubplot at 0x9062910>

daily maximum 10 mn rainfall


In [50]:
daily_max = data['Rain(TOT)'].resample('D', how='max')

In [51]:
daily_max.head()


Out[51]:
2010-07-02    0.4
2010-07-03    0.2
2010-07-04    0.0
2010-07-05    1.2
2010-07-06    0.0
Freq: D, Name: Rain(TOT), dtype: float64

In [52]:
len(daily_max)


Out[52]:
1405

resample the raw data to hourly rainfall (the sum of the rainfall is taken, 2010-07-02 10:00:00 means rainfall total from 10AM to 11AM)


In [53]:
from astroML.plotting import hist

In [54]:
hourly_rain = data['Rain(TOT)'].resample('H', how='sum')

In [55]:
daily_max.hist()


Out[55]:
<matplotlib.axes.AxesSubplot at 0xbd0e750>

In [56]:
hourly_rain.max()


Out[56]:
156.79999999999998

To put things in perspective, the Most rainfall ever recorded in one hour: 305 mm (12.0 in) in 42 minutes. Holt, Missouri, United States, 22 June 1947.


In [57]:
hourly_rain[hourly_rain==hourly_rain.max()]


Out[57]:
2012-12-13 15:00:00    156.8
Freq: H, Name: Rain(TOT), dtype: float64

In [58]:
hourly_rain.hist()


Out[58]:
<matplotlib.axes.AxesSubplot at 0x39e9090>

This peak looks legit


In [59]:
f, ax = plt.subplots(figsize=(10,6))
#data['Rain(TOT)']['2012-12-13'].plot(lw=2, ax =ax)
dates = data['Rain(TOT)']['2012-12-13'].index
values = data['Rain(TOT)']['2012-12-13'].values
ax.fill_between(dates, 0, values)
[l.set_fontsize(13) for l in ax.xaxis.get_ticklabels()]
[l.set_fontsize(13) for l in ax.yaxis.get_ticklabels()]
ax.set_title('10 mn rainfall on 2012-12-13',fontsize=14)
ax.set_ylabel('mm', fontsize=12)
ax.set_xlabel('time', fontsize=12)
f.savefig('../figures/Rainfall_Afiamalu_2012-12-13.png', dpi=200)



In [60]:
daily_max.max()


Out[60]:
33.399999999999999

It corresponds to a maximum recorded of 33 mm in 10 mn, during TC EVAN


In [61]:
daily_max[daily_max == daily_max.max()]


Out[61]:
2012-12-13    33.4
Freq: D, Name: Rain(TOT), dtype: float64

Calculate the frequency / amount table


In [84]:
freqs_def = ['10min', '20min', '30min', '1H', '3H', '6H', '12H', '24H', '36H', '48H']
bins = [-1,0.01,10] + np.arange(20, 160, 10).tolist()

In [85]:
freq_table = np.zeros((len(freqs_def), len(bins)))

In [86]:
for i, freq in enumerate(freqs_def):
    table = data['Rain(TOT)'].resample(freq, how='sum')
    table = table.dropna()
    table = pd.cut(table, bins)
    freq_values = pd.value_counts(table) / pd.value_counts(table).sum() * 100.
    freq_values = freq_values.values
    freq_table[i,0:len(freq_values)] = freq_values

In [87]:
freq_table = np.array(freq_table)

In [88]:
#freq_table = ma.masked_values(freq_table, 0)

In [89]:
# build the columns labels
columns = np.array(bins)[1:]
bins = []
for i in xrange(len(columns)-1): 
    bins.append("(%4.2f - %4.2f]" % (columns[i],columns[i+1]))
bins = ['0'] + bins + ['> 150']

In [90]:
freq_table = pd.DataFrame(freq_table, index=freqs_def, columns=bins)

In [91]:
freq_table


Out[91]:
0 (0.01 - 10.00] (10.00 - 20.00] (20.00 - 30.00] (30.00 - 40.00] (40.00 - 50.00] (50.00 - 60.00] (60.00 - 70.00] (70.00 - 80.00] (80.00 - 90.00] (90.00 - 100.00] (100.00 - 110.00] (110.00 - 120.00] (120.00 - 130.00] (130.00 - 140.00] (140.00 - 150.00] > 150
10min 91.401118 8.514717 0.078905 0.003507 0.001753 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0
20min 88.407795 11.261567 0.267548 0.054912 0.004673 0.002337 0.001168 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0
30min 86.127461 13.201598 0.508023 0.119123 0.033284 0.007007 0.001752 0.001752 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0
1H 80.903520 17.440028 1.134652 0.308177 0.143583 0.035020 0.014008 0.014008 0.007004 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0
3H 67.743627 26.885555 3.084024 0.996538 0.587433 0.304206 0.167838 0.104899 0.052449 0.031470 0.031470 0.010490 0.000000 0.000000 0.000000 0.000000 0
6H 56.035205 33.906119 4.903604 2.095557 1.131601 0.712490 0.544845 0.188600 0.146689 0.146689 0.104778 0.041911 0.020956 0.020956 0.000000 0.000000 0
12H 40.903388 40.276035 7.737348 4.098703 2.007528 1.714764 1.296529 0.501882 0.334588 0.292765 0.250941 0.250941 0.125471 0.083647 0.083647 0.041824 0
24H 42.821159 23.845508 12.090680 7.304786 4.366079 3.106633 2.015113 1.511335 0.671704 0.503778 0.503778 0.335852 0.335852 0.251889 0.251889 0.083963 0
36H 40.609137 17.005076 12.690355 7.614213 6.091371 5.964467 3.807107 2.030457 1.142132 0.761421 0.507614 0.507614 0.380711 0.380711 0.380711 0.126904 0
48H 35.144312 13.752122 12.733447 11.035654 7.640068 4.923599 4.753820 2.376910 1.867572 1.358234 1.358234 0.848896 0.679117 0.679117 0.509338 0.339559 0

10 rows × 17 columns


In [92]:
from matplotlib.colors import LogNorm
#freq_table[freq_table==0] = 10e-10
f, ax = plt.subplots(figsize=(10,8))
plt.subplots_adjust(bottom=0.2)
cmap = plt.get_cmap('Blues')
#im = ax.imshow(freq_table, aspect='auto', norm=LogNorm(vmin=freq_table.min().min(), vmax=freq_table.max().max()), \
#          cmap=cmap, interpolation='nearest')
im = ax.imshow(freq_table, aspect='auto', cmap=cmap, interpolation='nearest', vmin=0, vmax=100)
ax.set_yticks(np.arange(freq_table.shape[0]))
ax.set_yticklabels(freqs_def)
ax.set_xticks(np.arange(freq_table.shape[1]))
ax.set_xticklabels(bins, rotation=90)
ax.set_xlabel('rainfall amount') 
ax.set_ylabel('duration')
ax.xaxis.grid('off', lw=0)
ax.yaxis.grid('off', lw=0)
[ax.axvline(x, color='0.8') for x in np.arange(-0.5,freq_table.shape[1]-0.5)]
[ax.axhline(x, color='0.8') for x in np.arange(-0.5,freq_table.shape[0]-0.5)]

cb = plt.colorbar(im, boundaries=np.arange(0,100+10,10), drawedges=True)
cb.set_label('%')

f.savefig('../figures/table_freq_Afiamalu.png')



In [64]:
freq_table.to_csv('../outputs/frequency_intensity_table_Afiamalu.csv')

In [67]:
from mpl_toolkits.mplot3d import Axes3D

In [65]:
y = np.arange(0.5,freq_table.shape[0]+0.5)
x = np.arange(0.5,freq_table.shape[1]+0.5)
X,Y = np.meshgrid(x,y)
Z = freq_table.values

In [80]:
f = plt.figure(figsize=(10,10))
ax = f.add_subplot(111, projection='3d')
plt.subplots_adjust(bottom=0.25)
surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=plt.cm.jet,
        linewidth=0, antialiased=True, alpha=0.8)
ax.view_init(elev=20., azim=310)
ax.set_yticks(np.arange(0.5,11.5))
ax.set_yticklabels(freqs_def, rotation=90)
ax.set_xticklabels(bins, rotation=90)
#ax.set_ylabel('duration')
#ax.set_xlabel('amount (mm)')
ax.set_zlabel('%')
ax.set_title('rainfall frequency table for Afiamalu')
f.savefig('../figures/essai3D_surface.png', dpi=300)
#ax.bar(y, x, zs=Z, zdir='y', alpha=0.8)


Now investigate the runs (consecutive events) of respectively wet spells and dry spells for different durationsm3

One hour rainfall


In [106]:
def get_runs(data, varname, thresh = 0.0, event = 'dry', freq = '24H', how = 'sum'):
    table = data[varname].resample(freq, how = how)
    bits = table.copy()
    if event == 'dry':
        bits[bits <= thresh] = 1
    elif event == 'wet': 
        bits[bits > thresh] = 1
    bits = bits.values == 1
    bounded = np.hstack(([0], bits, [0]))
    difs = np.diff(bounded)
    run_starts, = np.where(difs > 0)
    run_ends, = np.where(difs < 0)
    duration = run_ends - run_starts
    run_ends -= 1
    t_run_starts = table.index[run_starts].to_pydatetime()
    t_run_ends = table.index[run_ends].to_pydatetime()
    
    persistence = pd.DataFrame({'run_starts':run_starts,'date_starts':t_run_starts, 'run_ends':run_ends,\
                                'date_ends':t_run_ends, 'duration':duration})
    # sorts the table
    persistence = persistence[['run_starts','date_starts','run_ends','date_ends','duration']]
    #persistence = pd.DataFrame(np.c_[run_starts, t_run_starts, run_ends,t_run_ends, duration], \
    #                           columns = ['run_starts', 'date_starts', 'run_ends', 'date_ends', 'duration'])
    return table, persistence

In [107]:
table, persistence = get_runs(data, 'Rain(TOT)', thresh = 0.5, event='wet')

In [108]:
freq_table = persistence['duration'].value_counts().sort_index(1)

In [109]:
freq_table = pd.DataFrame(np.c_[freq_table.index.values, freq_table.values], columns = ['duration', 'occurences'])

In [110]:
freq_table.T.to_csv('../outputs/daily_wet_events_Afiamalu.csv',header=False)

In [105]:
freq_table.T


Out[105]:
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
duration 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 18 20 21 23 ...
occurences 56 44 22 27 13 9 5 4 3 2 2 2 2 1 1 1 1 2 1 1 ...

2 rows × 21 columns


In [ ]: