In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
%matplotlib inline
plt.style.use('ggplot')
Read in data
In [2]:
data = pd.read_csv('rotterdam_rg_2003-2014.csv', skipinitialspace=True)
Convert the dates to a readable format...
In [3]:
dates = data['Datum']
time = data['Tijd']
dates = dates.map(str)
date_split = dates.str.extract('(.{4})(.{2})(.{2})', expand=True)
time=time.apply(lambda x: '%04d' % x)
time_split = time.str.extract('(.{2})(.{2})', expand=True)
date_split.loc[:,3] = time_split.loc[:,0]
date_split.loc[:,4] = time_split.loc[:,1]
data.loc[:,'dt'] = pd.to_datetime(dict(year=date_split[0], month=date_split[1], day=date_split[2], hour=date_split[3], minute=date_split[4]))
data.index=data['dt']
data.head()
Out[3]:
Plot all data
In [4]:
plt.plot(data['dt'], data["R10m"])
plt.ylabel('mm/10min')
plt.gcf().autofmt_xdate()
Resample the 10 min dataset to hourly accumulated data
In [5]:
data_1h = pd.DataFrame()
data_1h['mean_rain'] = data.R10m.resample('H').mean()
data_1h['accum_rain'] = data.R10m.resample('H').sum()
In [6]:
data_1h.tail()
Out[6]:
In [7]:
plt.plot(data_1h["accum_rain"])
plt.ylabel('mm/1h')
plt.gcf().autofmt_xdate()
In [8]:
plt.plot(data_1h["mean_rain"])
plt.ylabel(r'mm/10min ($\varnothing$ of 1h)')
plt.gcf().autofmt_xdate()
Resample 10 min dataset to 24 accumulated data
In [9]:
data_24h = pd.DataFrame()
data_24h['mean_rain'] = data.R10m.resample('D').mean()
data_24h['accum_rain'] = data.R10m.resample('D').sum()
In [10]:
plt.plot(data_24h["accum_rain"])
plt.ylabel('mm/24h')
plt.gcf().autofmt_xdate()
In [11]:
plt.plot(data_24h["mean_rain"])
plt.ylabel(r'mm/10min ($\varnothing$ of 24h)')
plt.gcf().autofmt_xdate()
Select summer and winter months as separate datasets
In [12]:
data_summer_1h = data_1h.loc[(data_1h.index.month>=4) & (data_1h.index.month<=9)]
mask_start = (data_1h.index.month >= 1) & (data_1h.index.month <= 3)
mask_end = (data_1h.index.month >= 10) & (data_1h.index.month <= 12)
mask = mask_start | mask_end
data_winter_1h = data_1h.loc[mask]
In [13]:
plt.plot(data_summer_1h["accum_rain"])
plt.ylabel('mm/h')
plt.gcf().autofmt_xdate()
In [14]:
plt.plot(data_winter_1h["accum_rain"])
plt.ylabel('mm/h')
plt.gcf().autofmt_xdate()
Resample 10 min rain data to monthly accumulated data
In [15]:
data_monthly = pd.DataFrame()
data_monthly['mean_rain'] = data.R10m.resample('M').mean()
data_monthly['accum_rain'] = data.R10m.resample('M').sum()
In [16]:
plt.plot(data_monthly["accum_rain"])
plt.ylabel('mm/month')
plt.gcf().autofmt_xdate()
In [17]:
plt.plot(data_monthly["mean_rain"])
plt.ylabel(r'mm/10min ($\varnothing$ per month)')
plt.gcf().autofmt_xdate()
In [18]:
print('Mean: %s' % str(data.R10m.mean()))
print('Std: %s' % str(data.R10m.std()))
print('Skew: %s' % str(data.R10m.skew()))
In [19]:
data.R10m.hist(bins = 100)
plt.xlabel('mm/10min')
plt.gca().set_yscale("log")
In [20]:
cur_data = data.R10m.loc[data.R10m>0]
hist_d = plt.hist(cur_data, bins=100)
plt.xlabel('mm/10min')
plt.gca().set_yscale("log")
In [21]:
print('Mean: %s' % str(data_24h.accum_rain.mean()))
print('Std: %s' % str(data_24h.accum_rain.std()))
print('Skew: %s' % str(data_24h.accum_rain.skew()))
In [22]:
data_24h.accum_rain.hist(bins = 100)
plt.gca().set_yscale("log")
plt.xlabel('mm/24h')
Out[22]:
Ignoring zeros:
In [23]:
cur_data = data_24h.accum_rain.loc[data_24h.accum_rain>0]
hist_d = plt.hist(cur_data, bins=100)
plt.gca().set_yscale("log")
plt.xlabel('mm/24h')
Out[23]:
In [24]:
data_24h.mean_rain.hist(bins = 100)
plt.xlabel(r'mm/10min ($\varnothing$ per 24h)')
plt.gca().set_yscale("log")
In [25]:
selected_monthly_data = data_monthly
#selected_monthly_data = data_monthly[(data_monthly.index >= '2004-01-01')]
selected_monthly_data.head()
Out[25]:
In [26]:
pd.options.mode.chained_assignment = None # default='warn'
selected_monthly_data['mon'] = selected_monthly_data.index.month
selected_monthly_data['year'] = selected_monthly_data.index.year
selected_monthly_data.boxplot(column=['accum_rain'], by='mon', sym='+')
plt.ylabel('mm/month')
Out[26]:
Or per year:
In [27]:
selected_monthly_data.boxplot(column=['accum_rain'], by='year', sym='+')
plt.ylabel('mm/month')
plt.gcf().autofmt_xdate()
In [28]:
data_1h['hour'] = data_1h.index.hour
data_1h.boxplot(column=['accum_rain'], by='hour', sym='+')
plt.ylabel('mm/h')
Out[28]:
Neglecting events <1mm/h
In [29]:
cur_df = data_1h.copy()
cur_df.loc[cur_df.accum_rain<1, 'accum_rain'] = np.nan
cur_df.boxplot(column=['accum_rain'], by='hour', sym='+')
plt.ylabel('mm/h')
Out[29]:
Neglecting events < 3mm/h
In [30]:
cur_df = data_1h.copy()
cur_df.loc[cur_df.accum_rain<3, 'accum_rain'] = np.nan
cur_df.boxplot(column=['accum_rain'], by='hour', sym='+')
plt.ylabel('mm/h')
Out[30]:
In [31]:
data_summer_1h['hour'] = data_summer_1h.index.hour
data_summer_1h.boxplot(column=['accum_rain'], by='hour', sym='+')
plt.ylabel('mm/h')
Out[31]:
Neglecting events <1mm/h
In [32]:
cur_df = data_summer_1h.copy()
cur_df.loc[cur_df.accum_rain<1, 'accum_rain'] = np.nan
cur_df.boxplot(column=['accum_rain'], by='hour', sym='+')
plt.ylabel('mm/h')
Out[32]:
Neglecting events <3mm/h
In [33]:
cur_df = data_summer_1h.copy()
cur_df.loc[cur_df.accum_rain<3, 'accum_rain'] = np.nan
cur_df.boxplot(column=['accum_rain'], by='hour', sym='+')
plt.ylabel('mm/h')
Out[33]:
Repeat with winter
In [34]:
data_winter_1h['hour'] = data_winter_1h.index.hour
data_winter_1h.boxplot(column=['accum_rain'], by='hour', sym='+')
plt.ylabel('mm/h')
Out[34]:
Neglecting events <1mm/h
In [35]:
cur_df = data_winter_1h.copy()
cur_df.loc[cur_df.accum_rain<1, 'accum_rain'] = np.nan
cur_df.boxplot(column=['accum_rain'], by='hour', sym='+')
plt.ylabel('mm/h')
Out[35]:
Neglecting events <3mm/h
In [36]:
cur_df = data_winter_1h.copy()
cur_df.loc[cur_df.accum_rain<3, 'accum_rain'] = np.nan
cur_df.boxplot(column=['accum_rain'], by='hour', sym='+')
plt.ylabel('mm/h')
Out[36]:
Show rainfall events > 10mm /h over entire 1h accumulated dataset
In [37]:
rotterdam_1h_exceeds = data_1h.accum_rain[data_1h.accum_rain>5]
In [38]:
y = np.array(rotterdam_1h_exceeds)
N = len(y)
x = range(N)
plt.bar(x, y)
plt.ylabel('mm/h')
Out[38]:
Amount of events
In [39]:
print(len(rotterdam_1h_exceeds))
Events in summer
In [40]:
rotterdam_1h_exceeds_summer = data_summer_1h.accum_rain[data_summer_1h.accum_rain>10]
y = np.array(rotterdam_1h_exceeds_summer)
N = len(y)
x = range(N)
plt.bar(x, y)
plt.ylabel('mm/h')
Out[40]:
Amount of events
In [41]:
print(len(rotterdam_1h_exceeds_summer))
In [42]:
exceed_hist = plt.hist(rotterdam_1h_exceeds, bins=100)
In [43]:
from scipy.stats import genextreme
In [44]:
x = np.linspace(5, 30, 1000)
y = np.array(rotterdam_1h_exceeds[:])
In [45]:
np.seterr(divide='ignore', invalid='ignore')
genextreme.fit(y)
Out[45]:
In [46]:
np.seterr(divide='ignore', invalid='ignore')
pdf = plt.plot(x, genextreme.pdf(x, *genextreme.fit(y)))
pdf_hist = plt.hist(y, bins=50, normed=True, histtype='stepfilled', alpha=0.8)
In [47]:
genextreme.ppf((1-1/1), *genextreme.fit(y))
Out[47]:
In [48]:
genextreme.ppf((1-1/10), *genextreme.fit(y))
Out[48]:
In [49]:
genextreme.ppf((1-1/100), *genextreme.fit(y))
Out[49]:
Lower since we're missing out the heavy rainfall events in 2015 and 2016 (only 10 year dataset)
In [50]:
from scipy.stats import genpareto
In [51]:
temp_monthly = data_1h.groupby(pd.TimeGrouper(freq='M'))
block_max_y = np.array(temp_monthly.accum_rain.max())
print(block_max_y)
In [52]:
print(len(block_max_y))
In [53]:
x = np.linspace(0, 30, 1000)
pdf_bm = plt.plot(x, genextreme.pdf(x, *genextreme.fit(block_max_y)))
pdf_hist_bm = plt.hist(block_max_y, bins=100, normed=True, histtype='stepfilled', alpha=0.8)
GEV and block maxima of monthly maxima of 1h data
In [54]:
genextreme.fit(block_max_y)
Out[54]:
In [55]:
genextreme.ppf((1-1/10), *genextreme.fit(block_max_y))
Out[55]:
In [56]:
pdf_bm = plt.plot(x, genpareto.pdf(x, *genpareto.fit(y)))
pdf_hist_bm = plt.hist(y, bins=100, normed=True, histtype='stepfilled', alpha=0.8)
GPD and POT of data>10mm/h
In [57]:
genpareto.fit(y)
Out[57]:
In [58]:
genpareto.ppf((1-1/10), *genpareto.fit(y))
Out[58]:
In [ ]:
Boxplot of POT values
In [59]:
event_occurences = pd.DataFrame(rotterdam_1h_exceeds)
event_occurences['hour'] = event_occurences.index.hour
event_occurences.boxplot(column=['accum_rain'], by='hour', sym='+')
Out[59]:
In [60]:
event_occurences.hour.value_counts(sort=False)
Out[60]:
In [61]:
cur_hist = plt.hist(event_occurences.hour, bins=24, histtype='stepfilled')
plt.xticks(range(24))
plt.xlabel('hour')
Out[61]:
In [ ]: