In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
%matplotlib inline
plt.style.use('ggplot')
Show the data source locations. Red dots are available gauges, blue cross denotes the selected station. No specific criteria was chosen to select that specific station.
In [2]:
from mpl_toolkits.basemap import Basemap
In [3]:
def get_basemap(_resolution):
return Basemap(projection='merc', llcrnrlat=25, urcrnrlat=38, llcrnrlon=275, urcrnrlon=285, lat_ts=35.,
resolution=_resolution)
In [4]:
positions = pd.read_csv('./Raw_RG_Data/RG_lat_lon.csv', header=None)
positions.columns=['lat', 'lon']
In [5]:
plt.figure(figsize=(24,12))
m = get_basemap('h')
m.drawcoastlines()
m.drawcountries()
m.fillcontinents(color = 'gray')
m.drawmapboundary()
for index, row in positions.iterrows():
x,y = m(row['lon']+360, row['lat'])
m.plot(x, y, 'ro', markersize=6)
x,y = m(positions['lon'][0]+360, positions['lat'][0])
m.plot(x, y, 'bx', markersize=6)
m.drawstates()
m.drawrivers()
plt.show()
In [6]:
charlotte_rainfall = pd.read_csv('./charlotte_rg_2003-2014.csv', header = None)
In [7]:
#charlotte_rainfall = pd.read_csv('./Raw_RG_Data/Charlotte_CRN_gage_2003.csv', header = None)
#for i in range(2004,2014):
# cur_rainfall = pd.read_csv('./Raw_RG_Data/Charlotte_CRN_gage_%d.csv' % i, header = None)
# charlotte_rainfall = charlotte_rainfall.append(cur_rainfall, ignore_index=True)
In [8]:
#charlotte_rainfall = charlotte_rainfall.iloc[:,:6]
charlotte_rainfall.columns = ["year","month","day", "hour", "min", "Rainfall"]
charlotte_rainfall.loc[:,'dt'] = pd.to_datetime(dict(year=charlotte_rainfall['year'], month=charlotte_rainfall['month'], day=charlotte_rainfall['day'], hour=charlotte_rainfall['hour'], minute=charlotte_rainfall['min']))
charlotte_rainfall.index=charlotte_rainfall['dt']
In [9]:
charlotte_rainfall.head()
Out[9]:
In [10]:
plt.plot(charlotte_rainfall['dt'], charlotte_rainfall["Rainfall"])
plt.ylabel('mm/15min')
plt.gcf().autofmt_xdate()
In [11]:
charlotte_rainfall["Rainfall"] = charlotte_rainfall["Rainfall"].replace(-99, np.nan)
In [12]:
plt.plot(charlotte_rainfall['dt'], charlotte_rainfall["Rainfall"])
plt.ylabel('mm/15min')
plt.gcf().autofmt_xdate()
In [13]:
charlotte_rainfall.head()
Out[13]:
In [14]:
charlotte_24h_rainfall = pd.DataFrame()
charlotte_24h_rainfall['mean_rain'] = charlotte_rainfall.Rainfall.resample('D').mean()
charlotte_24h_rainfall['accum_rain'] = charlotte_rainfall.Rainfall.resample('D').sum()
In [15]:
charlotte_24h_rainfall.head()
Out[15]:
In [16]:
plt.plot(charlotte_24h_rainfall["accum_rain"])
plt.ylabel('mm/24h')
plt.gcf().autofmt_xdate()
In [17]:
plt.plot(charlotte_24h_rainfall["mean_rain"])
plt.ylabel(r'mm/15min ($\varnothing$ of 24h)')
plt.gcf().autofmt_xdate()
In [18]:
charlotte_1h_rainfall = pd.DataFrame()
charlotte_1h_rainfall['mean_rain'] = charlotte_rainfall.Rainfall.resample('H').mean()
charlotte_1h_rainfall['accum_rain'] = charlotte_rainfall.Rainfall.resample('H').sum()
In [19]:
charlotte_1h_rainfall.head()
Out[19]:
In [20]:
plt.plot(charlotte_1h_rainfall["accum_rain"])
plt.ylabel('mm/h')
plt.gcf().autofmt_xdate()
In [21]:
plt.plot(charlotte_1h_rainfall["mean_rain"])
plt.ylabel(r'mm/15min ($\varnothing$ of 1h)')
plt.gcf().autofmt_xdate()
In [22]:
charlotte_summer_1h_rainfall = charlotte_1h_rainfall.loc[(charlotte_1h_rainfall.index.month>=4) & (charlotte_1h_rainfall.index.month<=9)]
In [23]:
plt.plot(charlotte_summer_1h_rainfall["accum_rain"])
plt.ylabel('mm/h')
plt.gcf().autofmt_xdate()
In [24]:
mask_start = (charlotte_1h_rainfall.index.month >= 1) & (charlotte_1h_rainfall.index.month <= 3)
mask_end = (charlotte_1h_rainfall.index.month >= 10) & (charlotte_1h_rainfall.index.month <= 12)
mask = mask_start | mask_end
In [25]:
charlotte_winter_1h_rainfall = charlotte_1h_rainfall.loc[mask]
In [26]:
plt.plot(charlotte_winter_1h_rainfall["accum_rain"])
plt.ylabel('mm/h')
plt.gcf().autofmt_xdate()
In [27]:
charlotte_winter_1h_rainfall.head()
Out[27]:
In [28]:
charlotte_monthly_rainfall = pd.DataFrame()
charlotte_monthly_rainfall['mean_rain'] = charlotte_rainfall.Rainfall.resample('M').mean()
charlotte_monthly_rainfall['accum_rain'] = charlotte_rainfall.Rainfall.resample('M').sum()
In [29]:
plt.plot(charlotte_monthly_rainfall["accum_rain"])
plt.ylabel('mm/month')
plt.gcf().autofmt_xdate()
In [30]:
plt.plot(charlotte_monthly_rainfall["mean_rain"])
plt.ylabel(r'mm/15min ($\varnothing$ per month)')
plt.gcf().autofmt_xdate()
Mean, standard deviation and skewness of the 15 min dataset
In [31]:
print('Mean: %s' % str(charlotte_rainfall.Rainfall.mean()))
print('Std: %s' % str(charlotte_rainfall.Rainfall.std()))
print('Skew: %s' % str(charlotte_rainfall.Rainfall.skew()))
Histogram of the data
In [32]:
charlotte_rainfall.Rainfall.hist(bins = 100)
plt.xlabel('mm/15min')
plt.gca().set_yscale("log")
Histogram of the data without zeros
In [33]:
cur_data = charlotte_rainfall.Rainfall.loc[charlotte_rainfall.Rainfall>0]
hist_d = plt.hist(cur_data, bins=100)
plt.xlabel('mm/15min')
plt.gca().set_yscale("log")
Mean, standard deviation and skewness of 24h accumulated dataset
In [34]:
print('Mean: %s' % str(charlotte_24h_rainfall.accum_rain.mean()))
print('Std: %s' % str(charlotte_24h_rainfall.accum_rain.std()))
print('Skew: %s' % str(charlotte_24h_rainfall.accum_rain.skew()))
Histogram of the dataset
In [35]:
charlotte_24h_rainfall.accum_rain.hist(bins = 100)
plt.xlabel('mm/24h')
plt.gca().set_yscale("log")
In [36]:
charlotte_24h_rainfall.mean_rain.hist(bins = 100)
plt.xlabel(r'mm/15min ($\varnothing$ per 24h)')
plt.gca().set_yscale("log")
Histogram without zeros
In [37]:
cur_data = charlotte_24h_rainfall.accum_rain.loc[charlotte_24h_rainfall.accum_rain>0]
hist_d = plt.hist(cur_data, bins=100)
plt.xlabel('mm/24h')
plt.gca().set_yscale("log")
Boxplot of monthly totals
In [38]:
charlotte_monthly_rainfall['mon'] = charlotte_monthly_rainfall.index.month
charlotte_monthly_rainfall['year'] = charlotte_monthly_rainfall.index.year
charlotte_monthly_rainfall.boxplot(column=['accum_rain'], by='mon', sym='+')
plt.ylabel('mm/month')
Out[38]:
Or on a yearly scale:
In [39]:
charlotte_monthly_rainfall.dropna().boxplot(column=['accum_rain'], by='year', sym='+')
plt.ylabel('mm/month')
plt.gcf().autofmt_xdate()
In [40]:
charlotte_1h_rainfall['hour'] = charlotte_1h_rainfall.index.hour
charlotte_1h_rainfall.boxplot(column=['accum_rain'], by='hour', sym='+')
plt.ylabel('mm/h')
Out[40]:
Neglecting events < 1mm/h
In [41]:
cur_df = charlotte_1h_rainfall.copy()
cur_df.loc[cur_df.accum_rain<1, 'accum_rain'] = np.nan
cur_df.boxplot(column=['accum_rain'], by='hour', sym='+')
plt.ylabel('mm/h')
Out[41]:
Neglecting events < 3mm/h
In [42]:
cur_df = charlotte_1h_rainfall.copy()
cur_df.loc[cur_df.accum_rain<3, 'accum_rain'] = np.nan
cur_df.boxplot(column=['accum_rain'], by='hour', sym='+')
plt.ylabel('mm/h')
Out[42]:
Merge summer hourly data
In [43]:
pd.options.mode.chained_assignment = None # default='warn'
charlotte_summer_1h_rainfall['hour'] = charlotte_summer_1h_rainfall.index.hour
charlotte_summer_1h_rainfall.boxplot(column=['accum_rain'], by='hour', sym='+')
Out[43]:
Neglecting events <1mm/hour
In [44]:
cur_df = charlotte_summer_1h_rainfall.copy()
cur_df.loc[cur_df.accum_rain<1, 'accum_rain'] = np.nan
cur_df.boxplot(column=['accum_rain'], by='hour', sym='+')
plt.ylabel('mm/h')
Out[44]:
Neglecting events <3mm/hour
In [45]:
cur_df = charlotte_summer_1h_rainfall.copy()
cur_df.loc[cur_df.accum_rain<3, 'accum_rain'] = np.nan
cur_df.boxplot(column=['accum_rain'], by='hour', sym='+')
plt.ylabel('mm/h')
Out[45]:
Merge hourly winter data
In [46]:
charlotte_winter_1h_rainfall['hour'] = charlotte_winter_1h_rainfall.index.hour
charlotte_winter_1h_rainfall.boxplot(column=['accum_rain'], by='hour', sym='+')
plt.ylabel('mm/h')
Out[46]:
Neglecting events <1mm/h
In [47]:
cur_df = charlotte_winter_1h_rainfall.copy()
cur_df.loc[cur_df.accum_rain<1, 'accum_rain'] = np.nan
cur_df.boxplot(column=['accum_rain'], by='hour', sym='+')
plt.ylabel('mm/h')
Out[47]:
Neglecting events <3mm/h
In [48]:
cur_df = charlotte_winter_1h_rainfall.copy()
cur_df.loc[cur_df.accum_rain<3, 'accum_rain'] = np.nan
cur_df.boxplot(column=['accum_rain'], by='hour', sym='+')
plt.ylabel('mm/h')
Out[48]:
Show rainfall events > 10mm /h over entire 1h accumulated dataset
In [49]:
charlotte_1h_exceeds = charlotte_1h_rainfall.accum_rain[charlotte_1h_rainfall.accum_rain>10]
Amount of hourly events
In [50]:
print(len(charlotte_1h_exceeds))
In [51]:
y = np.array(charlotte_1h_exceeds)
N = len(y)
x = range(N)
width = 1
plt.bar(x, y, width)
plt.ylabel('mm/h')
Out[51]:
10 mm/h events in summer periods
In [52]:
charlotte_1h_exceeds_summer = charlotte_summer_1h_rainfall.accum_rain[charlotte_summer_1h_rainfall.accum_rain>10]
In [53]:
y = np.array(charlotte_1h_exceeds_summer)
N = len(y)
x = range(N)
width = 1
plt.bar(x, y, width)
plt.ylabel('mm/h')
Out[53]:
Amount of hourly events
In [54]:
print(len(charlotte_1h_exceeds_summer))
In [55]:
plt.plot(charlotte_1h_exceeds)
plt.gcf().autofmt_xdate()
In [56]:
charlotte_1h_exceeds.hist(bins=100)
Out[56]:
In [57]:
from scipy.stats import genextreme
In [58]:
x = np.linspace(0, 80, 1000)
y = np.array(charlotte_1h_exceeds[:])
In [59]:
np.seterr(divide='ignore', invalid='ignore')
genextreme.fit(y)
Out[59]:
In [60]:
pdf = plt.plot(x, genextreme.pdf(x, *genextreme.fit(y)))
pdf_hist = plt.hist(y, bins=50, normed=True, histtype='stepfilled', alpha=0.8)
In [61]:
genextreme.ppf((1-1/1), *genextreme.fit(y))
Out[61]:
In [62]:
genextreme.ppf((1-1/10), *genextreme.fit(y))
Out[62]:
In [63]:
genextreme.ppf((1-1/100), *genextreme.fit(y))
Out[63]:
In [64]:
from scipy.stats import genpareto
In [65]:
temp_monthly = charlotte_1h_rainfall.groupby(pd.TimeGrouper(freq='M'))
block_max_y = np.array(temp_monthly.accum_rain.max())
print(block_max_y)
In [66]:
print(len(block_max_y))
In [67]:
x = np.linspace(0, 100, 1000)
In [68]:
pdf = plt.plot(x, genextreme.pdf(x, *genextreme.fit(block_max_y)))
pdf_hist = plt.hist(block_max_y, bins=50, normed=True, histtype='stepfilled', alpha=0.8)
GEV and block maxima of monthly maxima of 1h data
In [69]:
genextreme.fit(block_max_y)
Out[69]:
In [70]:
genextreme.ppf((1-1/10), *genextreme.fit(block_max_y))
Out[70]:
In [71]:
pdf_bm = plt.plot(x, genpareto.pdf(x, *genpareto.fit(y)))
pdf_hist_bm = plt.hist(y, bins=100, normed=True, histtype='stepfilled', alpha=0.8)
GPD and POT of data>10mm/h
In [72]:
genpareto.fit(y)
Out[72]:
In [73]:
genpareto.ppf((1-1/10), *genpareto.fit(y))
Out[73]:
In [ ]:
Boxplot of POT values
In [74]:
event_occurences = pd.DataFrame(charlotte_1h_exceeds)
event_occurences['hour'] = event_occurences.index.hour
event_occurences.boxplot(column=['accum_rain'], by='hour', sym='+')
Out[74]:
Number of occurences per hour
In [75]:
event_occurences.hour.value_counts(sort=False)
Out[75]:
In [76]:
# plt.plot(asd.hour.value_counts(sort=False))
In [77]:
cur_hist = plt.hist(event_occurences.hour, bins=24, histtype='stepfilled')
plt.xticks(range(24))
plt.xlabel('hour')
Out[77]:
In [ ]: