In [ ]:
from __future__ import print_function, division,generators
import numpy as np
import numpy.ma as ma
import pandas as pd
import matplotlib.pyplot as plt
import math
import scipy as sci
import seaborn as sns
import os
import glob
import datetime as dt
import netCDF4
from scipy.stats import norm as scipy_stats_norm
from scipy import stats
%matplotlib inline
Data obtained from http://www.ncdc.noaa.gov/cdo-web/
In [ ]:
def get_date(date_number):
"""
Turn the int64 value from the DATE of GHCN into a pd.datetime
"""
dstring = str(date_number)
return pd.datetime(int(dstring[0:4]),int(dstring[4:6]),int(dstring[6:8]))
def get_df(fnm, var, no_missing = True):
"""
Create a dataframe for a single station, with a time index, for a single
variable of data given as a key word (e.g. PRECIP, TMAX, TMIN).
Requires file path and name (fnm).
no_missing is a Bool that optionally masks out values < -99 from the df.
"""
df = pd.read_csv(fnm)
dt_indx = [get_date(date) for date in df.DATE]
data_vals = df[var].values
if var is 'PRCP':
data_vals = data_vals / 10. # This is to convert precip data to mm
if no_missing:
tmp_df = pd.DataFrame(data=data_vals,
index=dt_indx,columns=[df.STATION[0][6:]])
mask = tmp_df > -99. # A catchall value for missing data in GHCN
return tmp_df[mask]
else:
return pd.DataFrame(data=data_vals,
index=dt_indx,columns=[df.STATION[0][6:]])
def get_combined_df(fpth, var):
"""
From a given file path, and variable, extract data from all .csv files, and
place in a single dataframe object.
"""
flist = glob.glob(fpth)
df_dic = {}
for f in flist:
df_dic[f[5:]] = get_df(fnm = f, var = var, no_missing=True)
return pd.concat([df_dic[key] for key in df_dic.keys()],axis=1)
Call the Get_combined() function to create dataframes out of all data in a folder.
In [ ]:
%%time
df_tmax = get_combined_df(fpth="Data/zone2/*.csv",var="TMAX")
df_tmin = get_combined_df(fpth="Data/zone2/*.csv",var="TMIN")
df_prcp = get_combined_df(fpth="Data/zone2/*.csv",var="PRCP")
In [ ]:
df_prcp.tail(3)
Data obtained from http://197.254.113.174:8081/repository
In [ ]:
# RCA4_ERAINT
era = netCDF4.Dataset("CORDEX/ECMWF-ERAINT/evaluation/pr30_43_4_13_mean.nc") #zone 1 model data
era2 = netCDF4.Dataset("CORDEX/ECMWF-ERAINT/evaluation/pr28_42_-7_4_mean.nc") #Zone 2 model data
eraint = era2['pr'][:,0,0]
time = era2["time"]
times = netCDF4.num2date(era2['time'][:],units = era2['time'].units)
times_index = pd.to_datetime(times)
#print(times_index)
In [ ]:
#creating a dataframes for RCA4 data
df_era = pd.DataFrame(data=eraint,index=pd.date_range(start='1980-01-01',end='2010-12-31'),columns=['ERAINT'])
In [ ]:
for station in df_prcp:
print(station, np.max(df_prcp[station]))
Plot time series of precipitation for all stations, and also accumulate the data and plot the average rainfall.
Daily Mean and SEM values: Mean uncertainty is given by SEM, where: $SEM = \frac{\sigma}{\sqrt{n-1}}$
In [ ]:
def calc_SEM(data):
"""
Calculate Standard error of the mean. No nan's
should be in the input (numpy) array.
"""
return np.std(data)/np.sqrt(len(data) - 1)
def gather_daily_stats(date, df):
"""
For a specified day, given by date, create a short array of
observed values (obs) excluding the NANs. Return the mean,
and SEM value.
Restrictions: more than one observation on a day, not a missing
value, less than 200 mm per day (which is erroneous).
"""
obs = np.array([df_prcp[key][day] for key in df_prcp.keys()])
obs = obs[(obs > -1) & (obs < 200)]
if len(obs) < 2:
return np.NAN, np.NAN
return np.mean(obs), calc_SEM(obs)
In [ ]:
# Create an accumulated time series (with SEM uncertainty values)
means = []
sems = []
for day in df_prcp.index:
tmp_mean, tmp_sem = gather_daily_stats(date=day, df=df_prcp)#['1961-01-01':'1990-12-31'])
means.append(tmp_mean)
sems.append(tmp_sem)
means = np.array(means)
sems = np.array(sems)
df_prcp['Accumulated']=pd.Series(means,index=df_prcp.index) #adding columns to the dataframe!
df_prcp['Acc_SEM']=pd.Series(sems,index=df_prcp.index)
In [ ]:
print(np.max(df_prcp['Accumulated']))
In [ ]:
# SEM uncertainty values for RCA4_ERA
sems_era = []
for day in df_era.index:
tmp_sem = np.std(eraint)/np.sqrt(len(eraint) - 1)
sems_era.append(tmp_sem)
sems_era = np.array(sems_era)
print(sems_era)
In [ ]:
ts= plt.figure(dpi=300)
ts.set_size_inches(15,5) # Specify the figure size
ax1 = ts.add_subplot(111) # Add an axis frame object to the plot (i.e. a pannel)
#ax1.plot(df_prcp.index, df_prcp.Accumulated,'.g',ms=2.0)
ax1.errorbar(df_era.index, eraint,
yerr=sems_era, c='b', alpha=1., fmt='-')
ax1.set_ylim(0,20)
plt.title("RCA4 ERAINT (Zone 1) Mean Daily Precipitation", fontsize=15)
plt.ylabel("Precipitation (mm day$^{-1}$)", fontsize=15)
plt.xlabel("Years", fontsize=15)
plt.grid(True)
#ts.savefig('Zone1_ERAts.pdf',dpi=300)
In [ ]:
daily_ts = plt.figure(dpi=300)
daily_ts.set_size_inches(15,5) # Specify the figure size
ax1 = daily_ts.add_subplot(111) # Add an axis frame object to the plot (i.e. a pannel)
ax1.errorbar(df_prcp.index, df_prcp.Accumulated,
yerr=df_prcp.Acc_SEM, c='b', alpha=1.,lw=1.5, fmt='-')
ax1.errorbar(df_era.index, eraint,
yerr=sems_era, c='r', alpha=1., lw=1.5, fmt='-')
leg1=ax1.legend(['Station data','RCA4_ERA'],prop={'size':10},numpoints=1,markerscale=5.,frameon=True,fancybox=True)
ax1.set_ylim(0,150)
plt.xlim('1953-01-01','2015-12-31')
plt.title(" East Africa's Zone 2 Daily Average Precipitation", fontsize=15)
plt.ylabel("Precipitation (mm day$^{-1}$)", fontsize=15)
plt.xlabel("Years", fontsize=15)
plt.grid(True)
plt.show(daily_ts)
#daily_ts.savefig('Zone2_ts+ERA.pdf',dpi=300)
In [ ]:
mask = df_prcp.Accumulated > 0.0
In [ ]:
# N.b. the KDE (kernel density estimate) is Gaussian - which is not true
# for precip data (log or power law data)...
daily_dp = plt.figure()
daily_dp.set_size_inches(12,5)
ax = daily_dp.add_subplot(111)
sns.distplot(df_prcp.Accumulated[mask],bins=100,norm_hist = True,kde=False,color = 'r')
sns.kdeplot(df_prcp.Accumulated[mask],shade=True,kernel='cos',cumulative=False,color='b')
leg1=ax.legend(['KDE','Accumulated mean'],prop={'size':11},
numpoints=1,markerscale=5.,frameon=True,fancybox=True)
ax.set_xlim(0,30)
ax.set_title("East Africa Mean(Zone1) Precipitation")
ax.set_xlabel(r'Precip. (mm day$^{-1}$)')
ax.set_ylabel('Density (0-1)')
plt.show(daily_dp)
#daily_dp.savefig('Zone1_Densityplot.pdf',dpi=300)
In [ ]:
sns.kdeplot
In [ ]:
#Histogram
#source code from https://github.com/benlaken/Tanzania/blob/master/Precipitation_Tanzania.ipynb
hist_dp = plt.figure()
hist_dp.set_size_inches(5,5) # Specify the output size
ax1 = hist_dp.add_subplot(211) # Add an axis frame object to the plot (i.e. a pannel)
ax2 = hist_dp.add_subplot(212) # Add an axis frame object to the plot (i.e. a pannel)
# the histogram of the data
ax1.set_title(r'PDFs of Daily Precipitation for Zone 2', fontsize=13)
n, bins, patches = ax1.hist(df_prcp.Accumulated[mask], 100, normed=True, facecolor='blue', alpha=0.75,
histtype='stepfilled')
n, bins, patches = ax1.hist(eraint, 100, normed=True, facecolor='red', alpha=0.75,
histtype='stepfilled')
ax1.grid(True)
ax1.set_ylabel('Density', fontsize =15)
n, bins, patches = ax2.hist(df_prcp.Accumulated[mask], 100, normed=True, facecolor='blue', alpha=0.75,
histtype='stepfilled',cumulative=True)
n, bins, patches = ax2.hist(eraint, 100, normed=True, facecolor='red', alpha=0.75,
histtype='stepfilled',cumulative=True)
leg1=ax1.legend(['Station data','RCA4_ERA'],prop={'size':10},numpoints=1,markerscale=5.,frameon=True,fancybox=True)
leg1=ax2.legend(['Station data','RCA4_ERA'],prop={'size':10}, numpoints=1,markerscale=5.,frameon=True,fancybox=True)
ax1.set_ylim(0,0.5)
plt.xlabel(r'mm day$^{-1}$', fontsize=13)
plt.ylabel('Cumulative density', fontsize =13)
plt.grid(True)
plt.show()
#hist_dp.savefig('Zone2_Density_plots.pdf',dpi=300)
In [ ]:
# define extreme quantiles
percentileZero = min(df_prcp.Accumulated[mask])
percentileHundred = max(df_prcp.Accumulated[mask])
print('Min. precip', percentileZero)
print('Max. precip', percentileHundred)
print("Median", np.percentile(df_prcp.Accumulated[mask],50))
In [ ]:
srtd = sorted(df_prcp.Accumulated[mask])
percent = [val/len(srtd) * 100. for val in range(len(srtd))]
plt.plot(percent,srtd)
plt.grid(True)
In [ ]:
print(np.percentile(df_prcp.Accumulated[mask],90))
print(np.percentile(srtd,10))
*Calculate the Day of Year (DOY) mean over the data-period (climatology).
In [ ]:
doy_mean=[]
doy_sem =[]
for doy in range(366):
index = df_prcp['1961-01-01':'1990-12-31'].index.dayofyear == doy+1
#print(index)
doy_mean.append(np.nanmean(df_prcp['Accumulated']['1961-01-01':'1990-12-31'][index]))
doy_sem.append(calc_SEM(df_prcp['Accumulated']['1961-01-01':'1990-12-31'][index]))
doy_mean = np.array(doy_mean)
doy_sem = np.array(doy_sem)
In [ ]:
doy_eramean=[]
doy_erasem =[]
for doy in range(366):
index = times_index.dayofyear == doy+1
#print(index)
doy_eramean.append(np.nanmean(eraint[index]))
doy_erasem.append(calc_SEM(eraint[index]))
doy_eramean = np.array(doy_eramean)
doy_erasem = np.array(doy_erasem)
In [ ]:
ssn_rmean = pd.rolling_mean(doy_mean, window=30, min_periods=0, center = True)
ssn_eramean = pd.rolling_mean(doy_eramean, window=30, min_periods=0, center = True)
#ssn_rmean[-30:] = np.nan
In [ ]:
#Plot the seasonal climatology East Africa precip data
mnths= ['Jan','Feb','Mar','Apr','May','June','Jul','Aug','Sep','Oct','Nov','Dec']
#mrange = arange(12)
my_sclim = plt.figure(dpi=300)
my_sclim.set_size_inches(10,5) # Specify the output size
ax1 = my_sclim.add_subplot(111) # Add an axis frame object to the plot (i.e. a pannel)
ax1.errorbar(range(366),doy_mean,xerr=None, yerr=doy_sem, c='b', alpha=1., lw=1.5)
ax1.errorbar(range(366),doy_eramean,xerr=None, yerr=doy_erasem, c='r', alpha=1.,lw=1.5)
#ax1.plot(range(366), ssn_rmean,'b-', alpha=1.0)
#ax1.plot(range(366), ssn_eramean,'r-', alpha=1.0)
leg1=ax1.legend(['Station data','RCA4_ERA'],prop={'size':12},numpoints=1,markerscale=5.,frameon=True,fancybox=True)
plt.xlim(0,max(range(366)))
plt.title("Climatology Precipitation for Zone 2 by Day (& 30day smooth)", fontsize=15)
plt.ylabel("Precipitation (mm day$^{-1}$)", fontsize=15)
plt.xlabel("Day of Year (DOY)", fontsize=15)
plt.grid(True)
#my_sclim.savefig('Zone2_SeasonalClimatology_+ERA_plot.pdf',dpi=300)
Anomaly
In [ ]:
# wordy example of how to access/calculate anomaly
for daily_rain in zip(df_prcp.index[5000:5003],df_prcp.Accumulated[5000:5003]):
print('Day {0}, rainfall {1:3.2f}mm'.format(daily_rain[0].date(),daily_rain[1]))
print('DOY is',daily_rain[0].dayofyear)
print("DOY climo value is {0:3.2f}".format(doy_mean[daily_rain[0].dayofyear -1]))
print("Daily anomaly is {0:3.2f}".format(daily_rain[1] - doy_mean[daily_rain[0].dayofyear -1]))
print(np.isnan(daily_rain[1]))
print("")
In [ ]:
#---Create a seasonal deviation from climatology--
#Anomalies = Observation - Climatology
prcp_anom = []
for daily_rain in zip(df_prcp.index,df_prcp.Accumulated):
if np.isnan(daily_rain[1]):
prcp_anom.append(np.NAN)
else:
prcp_anom.append(daily_rain[1] - doy_mean[daily_rain[0].dayofyear -1])
prcp_anom = np.array(prcp_anom)
In [ ]:
#---Create a seasonal deviation from climatology for RCA4
era_anom = []
for daily_rain in zip(df_era.index,df_era.ERAINT):
if np.isnan(daily_rain[1]):
era_anom.append(np.NAN)
else:
era_anom.append(daily_rain[1] - doy_mean[daily_rain[0].dayofyear -1])
era_anom = np.array(era_anom)
In [ ]:
df_prcp['Acc_anomaly'] = prcp_anom #adding columns to the dataframe!
df_era['ERA_anomaly'] = era_anom
In [ ]:
# ---plot the anomalized rainfall data with errors---
my_anom = plt.figure(dpi=300)
my_anom.set_size_inches(10,5) # Specify the output size
ax1 = my_anom.add_subplot(111) # Add an axis frame object to the plot (i.e. a pannel)
ax1.errorbar(df_prcp['Acc_anomaly'].index ,df_prcp['Acc_anomaly'],yerr=df_prcp['Acc_SEM'],
color='b', fmt='.',xerr=None,alpha=0.5)
ax1.errorbar(times_index,era_anom,yerr=sems_era,
color='r', fmt='.',xerr=None,alpha=0.5)
ax1.set_ylim(-20,150)
#plt.xlim('1953-01-01','1990-12-31')
ax1.set_title(r'East Africa (Zone 1) Deseasonalized Daily Precipitation Climatology ($\delta$Precip.)')
ax1.set_ylabel(r'mm day$^{-1}$')
ax1.set_xlabel('Years')
ax1.grid(True)
plt.show(my_anom)
#my_anom.savefig('EA Zone1 Deseasonalized Precip.pdf',dpi=300)
In [ ]:
#doy_values = [doy.dayofyear - 1 for doy in df_prcp.index]
figx = plt.figure(dpi=72)
figx.set_size_inches(10,5) # Specify the figure size
ax1 = figx.add_subplot(111)
#---Plot the seasonal climatology East Africa precip data---
ax1.errorbar(range(366),doy_mean,xerr=None, yerr=doy_sem, color='b', alpha=0.8 )
ax1.errorbar(range(366),doy_eramean,xerr=None, yerr=doy_erasem, color='k', alpha=0.8 )
ax1.plot(df_prcp.index.dayofyear -1 ,df_prcp['Accumulated'],'.',ms=2.5,alpha=1.0,color='r')
plt.xlim(0,max(range(366)))
plt.title("")
plt.ylabel("Precipitation (mm)")
plt.xlabel("Day of Year (DOY)")
plt.grid(True)
In [ ]:
#A mask for the df_prcp to identify categories of Extreme rainfall events
"""
The thresholds used in here are based on the definitions as used
by SWFDP-EA. It should be noted that this hold under natural conditions
No risk - <5mm
Low risk - 5mm-20mm
Medium risk 20 - 50mm
High risk >=50mm
"""
cond1 = df_prcp.Accumulated < 5
cond2 = df_prcp.Accumulated > 5
cond3 = df_prcp.Accumulated < 20
highmed_risk = df_prcp.Accumulated[df_prcp.Accumulated > 20] #Medium to high risk
high_risk = df_prcp.Accumulated[df_prcp.Accumulated > 50]
medium_risk = df_prcp.Accumulated[(df_prcp.Accumulated > 20) & (df_prcp.Accumulated < 50)]
low_risk = df_prcp.Accumulated[cond2 & cond3]
no_risk = df_prcp.Accumulated[cond1]
In [ ]:
#A mask for the df_era to identify if there are similar Extreme rainfall events in RCA4 data
nrisk= df_era.ERAINT[df_era.ERAINT < 5]
yrisk = df_era.ERAINT[(df_era.ERAINT>5) & (df_era.ERAINT<20) ]
print(len(yrisk))
In [ ]:
daily_floodrisk = plt.figure(dpi=72)
daily_floodrisk.set_size_inches(12,7) # Specify the figure size
ax1 = daily_floodrisk.add_subplot(111) #
ax1.plot(high_risk.index, high_risk,'ro',alpha=1.,linewidth = 3, ms=3)
ax1.plot(medium_risk.index, medium_risk,'bo',alpha=0.9,ms=3)
ax1.plot(low_risk.index, low_risk,'co',alpha=0.9,ms=3)
ax1.plot(no_risk.index, no_risk,'go',alpha=0.9,ms=3)
leg1=ax1.legend(['high risk','medium risk','low risk','no risk'],
prop={'size':12},numpoints=1,markerscale=5.,frameon=True,fancybox=True)
#plt.xlim('1953-01-01','1990-12-31')
plt.title(r" Zone 2 Station data Extreme Precipitation Events based on Absolute Threshhold", fontsize=15)
plt.ylabel(r"Amounts (mm day$^{-1}$)", fontsize=15)
plt.xlabel("Years", fontsize=15)
plt.grid(True)
plt.show(daily_floodrisk)
#daily_floodrisk.savefig('Zone1_ExtremeEvent.pdf',dpi=300)
In [ ]:
#low_risk.groupby( [low_risk.index.year, low_risk.index.month, low_risk.index.day] ).count()
lwrisk_freq = low_risk.groupby(low_risk.index.year).count()/365 #To get normalized freq per annum
mdrisk_freq = medium_risk.groupby(medium_risk.index.year).count()/365
mdhgrisk_freq = highmed_risk.groupby(highmed_risk.index.year).count()/365
hgrisk_freq = high_risk.groupby( high_risk.index.year ).count()
#print(hgrisk_freq)
#ERA
yrisk_freq = yrisk.groupby(yrisk.index.year).count()/365
In [ ]:
frq_rmean_low = pd.rolling_mean(lwrisk_freq, window=10, min_periods=0, center = True)
frq_rmean_med = pd.rolling_mean(mdrisk_freq, window=10, min_periods=0, center = True)
frq_rmean_medhig = pd.rolling_mean(mdhgrisk_freq, window=10, min_periods=0, center = True)
frq_rmean_hig = pd.rolling_mean(hgrisk_freq, window=10, min_periods=0, center = True)
#ERA
frq_rmean_yrisk = pd.rolling_mean(yrisk_freq, window=10, min_periods=0, center = True)
In [ ]:
#Frequency plot
freq = plt.figure(dpi=300)
freq.set_size_inches(15,5) # Specify the output size
ax1 = freq.add_subplot(121)
ax2 = freq.add_subplot(122)
ax1.plot(lwrisk_freq.index, lwrisk_freq ,'b-',ms=10.0,alpha=1., lw=1.5)
ax1.plot(yrisk_freq.index, yrisk_freq ,'g-',ms=3.0,alpha=1., lw=1.5)
ax1.plot(lwrisk_freq.index, frq_rmean_low,'r--', linewidth=2, alpha=1.)
ax1.plot(yrisk_freq.index, frq_rmean_yrisk,'r--', linewidth=2, alpha=1.)
ax1.set_title('Zone 2 Frequency of Low risk extreme precip \n based on Absolute Threshhold')
ax1.set_ylabel(r' Normalized counts of low risk events', fontsize = 12)
ax1.set_xlabel(r'Years', fontsize = 12)
#ax1.set_xlim(1953, 1990)
leg1=ax1.legend(['Station data','RCA4_ERA'],prop={'size':11},numpoints=1,markerscale=5.,frameon=True,fancybox=True)
ax1.grid(True)
ax2.plot(mdhgrisk_freq.index, mdhgrisk_freq ,'b-',ms=5.0,alpha=1.,lw=1.5)
ax2.plot(mdhgrisk_freq.index, frq_rmean_medhig,'r--', linewidth=2, alpha=1.,lw=1.5)
ax2.set_title('Zone 2 Frequency of Medium-High risk extreme precip \n based on Absolute Threshhold')
ax2.set_xlabel(r"Years", fontsize = 12)
#ax2.set_xlim(1953, 1990)
ax2.set_ylabel('Normalized counts of medium risk events', fontsize = 12)
leg1=ax2.legend(['Station data'],prop={'size':11},numpoints=1,markerscale=5.,frameon=True,fancybox=True)
ax2.grid(True)
#freq.savefig('Zone2_freq_plot+ERA.pdf',dpi=300)
In [ ]:
low_risk.index[2], low_risk.index[1]
In [ ]:
test = low_risk.index[2] - low_risk.index[1]
print("diffrence in days between first and second lowrisk:",test.days)
In [ ]:
lowrisk_times = 1
for n, date in enumerate(low_risk.index[lowrisk_times - 1:3]):
print(date.date(), (date - low_risk.index[n -1]).days)
In [ ]:
#Duration
lowrisk_dur = []
mediumrisk_dur = []
risk_time = 1
for n, date in enumerate(low_risk.index[risk_time - 1:]):
lowrisk_d = (date - low_risk.index[n -1]).days
lowrisk_dur.append(lowrisk_d)
lowrisk_dur=np.array(lowrisk_dur)
for n, date in enumerate(highmed_risk.index[risk_time - 1:]):
mediumrisk_d = (date - highmed_risk.index[n -1]).days
mediumrisk_dur.append(mediumrisk_d)
mediumrisk_dur=np.array(mediumrisk_dur)
In [ ]:
#Duration plot
freq = plt.figure(dpi=72)
freq.set_size_inches(20,5) # Specify the output size
ax1 = freq.add_subplot(121)
ax2 = freq.add_subplot(122)
ax1.plot(low_risk.index, lowrisk_dur ,'b-',ms=2.0,alpha=1.)
ax1.set_title('Zone 2 Duration of consecutive Low risk precip \n events based on absolute threshhold', fontsize=15)
ax1.set_ylabel(r'Days between low risk events',fontsize = 14)
ax1.set_xlabel(r'Years',fontsize = 14)
#ax1.set_xlim('1953-01-03','1990-12-31')
ax1.set_ylim(0,200)
ax1.grid(True)
ax2.plot(highmed_risk.index, mediumrisk_dur,'b-',ms=3.0,alpha=1.)
ax2.set_title(' Zone 2 Duration of consecutive Medium-High risk precip \n based on absolute threshhold',fontsize=15)
ax2.set_ylabel('Days between Medium-High risk events', fontsize = 14)
ax2.set_ylim(0,1500)
ax2.set_xlabel(r"Years", fontsize = 14)
#ax2.set_xlim('1953-01-03','1990-12-31')
ax2.grid(True)
#freq.savefig('Zone2_Duration_Swfdp1.pdf',dpi=300)
In [ ]:
flood_threshold = np.percentile(df_prcp['Acc_anomaly'][mask],90)
drought_threshold = np.percentile(df_prcp['Acc_anomaly'][mask],10)
print('90th percentile = ',flood_threshold)
print('10th percentile = ',drought_threshold)
print('50th percentile = ',np.percentile(df_prcp['Acc_anomaly'][mask],50))
print('99th percentile = ',np.percentile(df_prcp['Acc_anomaly'][mask],99))
#sns.distplot(df_prcp['Acc_anomaly'][mask])
In [ ]:
flood_era = np.percentile(df_era['ERA_anomaly'],90)
drought_era = np.percentile(df_era['ERA_anomaly'],10)
print('90th percentile = ',flood_era)
print('10th percentile = ',drought_era)
print('50th percentile = ',np.percentile(df_era['ERA_anomaly'],50))
print('99th percentile = ',np.percentile(df_era['ERA_anomaly'],99))
#sns.distplot(df_prcp['Acc_anomaly'][mask])
In [ ]:
my_dist = plt.figure()
my_dist.set_size_inches(10,5) # Specify the output size
ax1 = my_dist.add_subplot(111) # Add an axis frame object to the plot (i.e. a pannel)
#Univeriate distribution of Observed daily precipitation.
sns.distplot(df_prcp['Acc_anomaly'][mask],bins=100, norm_hist=True, kde=False) # Filled bars
sns.kdeplot(df_prcp['Acc_anomaly'][mask],shade=False,kernel='gau',cumulative=False,color='k',lw=3)
sns.distplot(era_anom,bins=100, norm_hist=True, kde=False) # Filled bars
sns.kdeplot(era_anom,shade=False,kernel='gau',cumulative=False,color='r',lw=2)
ax1.vlines(np.percentile(df_prcp['Acc_anomaly'][mask],90), 0.00, 0.40, colors='b',linestyle='--',lw=2.0)
ax1.vlines(np.percentile(df_prcp['Acc_anomaly'][mask],50), 0.00, 0.40, colors='b',lw=2.0) #Marker line of Median
ax1.vlines(np.percentile(df_prcp['Acc_anomaly'][mask],10), 0.00, 0.40, colors='b',linestyle='-.',lw=3.0)
leg1=ax1.legend(['Station data KDE','90th percentile','50th percentile','10th percentile','observed anomalies'],
prop={'size':11},numpoints=1,markerscale=5.,frameon=True,fancybox=True)
ax1.vlines(np.percentile(df_era['ERA_anomaly'],90), 0.00, 0.40, colors='y',linestyle='-',lw=1.5)
ax1.vlines(np.percentile(df_era['ERA_anomaly'],50), 0.00, 0.40, colors='y',lw=1.5) #Marker line of Median
ax1.vlines(np.percentile(df_era['ERA_anomaly'],10), 0.00, 0.40, colors='y',linestyle='-',lw=1.5)
ax1.set_title(r'Zone 1 Normalized Extreme Precipitation Percentile plot', fontsize=15)
ax1.set_ylabel(r' Probability Density',fontsize=15)
ax1.set_xlabel(r'Anomalies(mm)',fontsize=15)
ax1.set_xlim(-10, 20)
ax1.set_ylim(0.00, 0.40)
ax1.grid(True)
my_dist.text(0.71, 0.64, " __ RCA4_ERA KDE" ,fontsize=10,color='r')
plt.show(my_dist)
#my_dist.savefig("Zone1_Normalized_Percentile.pdf",dpi=300)#transparent=True)
In [ ]:
# Make a mask for the df_prcp to identify the extreme dates (flood and drought)
extremes = ((df_prcp['Acc_anomaly'] > flood_threshold) | (df_prcp['Acc_anomaly'] < drought_threshold))
flood = (df_prcp['Acc_anomaly'] > flood_threshold)
drought = (df_prcp['Acc_anomaly'] < drought_threshold)
wet_xtrm = (df_era['ERA_anomaly']> flood_era)
dry_xtrm = (df_era['ERA_anomaly'] < drought_era)
In [ ]:
fig_threshold = plt.figure(dpi=72)
fig_threshold.set_size_inches(8,5) # Specify the figure size
ax1 = fig_threshold.add_subplot(111)
ax1.scatter(df_prcp['Acc_anomaly'][mask].index, df_prcp['Acc_anomaly'][mask],
alpha=0.1, marker='.')
ax1.scatter(df_prcp['Acc_anomaly'][extremes].index, df_prcp['Acc_anomaly'][extremes],
alpha=0.8, marker='.', color='r')
plt.title("Extreme rainfall based on statistical values of daily anomalies and percentiles")
plt.ylabel("Precipitation anomaly (mm)")
plt.xlabel("Year")
plt.xlim('1953-01-01','1990-12-31')
#plt.ylim(-10,70)
ax1.grid(True)
#fig_threshold.savefig('Zone1 Extreme_Threshhold_plot.pdf',dpi=300)
In [ ]:
#groupby can be used to querey the dataset
#OR can write hacks like this, to pull out data based on the index
#---Splitting the data into groups based on extreme threshhold
for year in range(min(df_prcp.index.year),max(df_prcp.index.year)):
wet_extreme = df_prcp["Acc_anomaly"][flood][df_prcp["Acc_anomaly"][flood].index.year == year]
dry_extreme = df_prcp["Acc_anomaly"][drought][df_prcp["Acc_anomaly"][drought].index.year == year]
print(year,len(wet_extreme), year,len(dry_extreme))
break
In [ ]:
#For station dataset
flood_freq = []
drought_freq = []
yr_day_count = []
years = []
flood_mean=[]
flood_sem =[]
drought_mean=[]
drought_sem =[]
for year in range(min(df_prcp.index.year),max(df_prcp.index.year)):
tmp_yr_data = df_prcp["Acc_anomaly"][df_prcp.index.year == year] # pool data for each year
#print(tmp_yr_data)
yr_day_count.append(tmp_yr_data.count())
years.append(year)
if tmp_yr_data.count() > 1:
flood_freq.append(len(tmp_yr_data[flood]))
#print(tmp_yr_data[flood])
drought_freq.append(len(tmp_yr_data[drought]))
flood_mean.append(np.nanmean(tmp_yr_data[flood]))
flood_sem.append(calc_SEM(tmp_yr_data[flood]))
drought_mean.append(np.nanmean(tmp_yr_data[drought]))
drought_sem.append(calc_SEM(tmp_yr_data[drought]))
else:
flood_freq.append(np.NAN)
drought_freq.append(np.NAN)
flood_mean.append(np.NAN)
flood_sem.append(np.NAN)
drought_mean.append(np.NAN)
drought_sem.append(np.NAN)
flood_freq = np.array(flood_freq)
drought_freq = np.array(drought_freq)
yr_day_count = np.array(yr_day_count)
years = np.array(years)
flood_mean = np.array(flood_mean)
flood_sem = np.array(flood_sem)
drought_mean = np.array(drought_mean)
drought_sem = np.array(drought_sem)
In [ ]:
flood_erafreq = []
drought_erafreq = []
yr_day_eracount = []
years_era = []
flood_eramean=[]
flood_erasem =[]
drought_eramean=[]
drought_erasem =[]
for year in range(min(df_era.index.year),max(df_era.index.year)):
tmp_yr_dat = df_era['ERA_anomaly'][df_era.index.year == year] # pool data for each year
#print(tmp_yr_dat)
yr_day_eracount.append(tmp_yr_dat.count())
years_era.append(year)
#print(years_era)
if tmp_yr_dat.count() > 1:
flood_erafreq.append(len(tmp_yr_dat[wet_xtrm]))
#print(tmp_yr_dat[wet_xtrm])
drought_erafreq.append(len(tmp_yr_dat[dry_xtrm]))
flood_eramean.append(np.nanmean(tmp_yr_dat[wet_xtrm]))
flood_erasem.append(calc_SEM(tmp_yr_dat[wet_xtrm]))
drought_eramean.append(np.nanmean(tmp_yr_dat[dry_xtrm]))
drought_erasem.append(calc_SEM(tmp_yr_dat[dry_xtrm]))
else:
flood_erafreq.append(np.NAN)
drought_erafreq.append(np.NAN)
flood_eramean.append(np.NAN)
flood_erasem.append(np.NAN)
drought_eramean.append(np.NAN)
drought_erasem.append(np.NAN)
flood_erafreq = np.array(flood_erafreq)
drought_erafreq = np.array(drought_erafreq)
yr_day_eracount = np.array(yr_day_eracount)
years_era = np.array(years_era)
flood_eramean = np.array(flood_eramean)
flood_erasem = np.array(flood_erasem)
drought_eramean = np.array(drought_eramean)
drought_erasem = np.array(drought_erasem)
In [ ]:
drought_eramean
In [ ]:
#running_test = pd.rolling_mean(synthetic["vals"], window=10, min_periods=3, center = True)
int_rmean = pd.rolling_mean(flood_mean, window=10, min_periods=0, center = True)
int_drmean = pd.rolling_mean(drought_mean, window=10, min_periods=0, center = True)
#ERA
era_rmean = pd.rolling_mean(flood_eramean, window=10, min_periods=0, center = True)
era_drmean = pd.rolling_mean(drought_eramean, window=10, min_periods=0, center = True)
In [ ]:
#Intensity
my_int = plt.figure(dpi=72)
my_int.set_size_inches(15,5) # Specify the output size
ax1 = my_int.add_subplot(121)
ax2 = my_int.add_subplot(122)
ax1.errorbar(years,flood_mean, #Masking missing values
xerr=None, yerr=flood_sem,color='b', fmt='.', alpha=1.)
ax1.errorbar(years_era, flood_eramean,xerr=None, yerr=flood_erasem,color='k', fmt='.', alpha=1.)
ax1.plot(years, int_rmean,'g--', lw=2)
ax1.plot(years_era, era_rmean,'k--', lw=2)
ax1.set_title('Zone 2 Intensity of R90p events based on \n statistical values of daily anomalies and percentiles', fontsize=14)
ax1.set_ylabel(r'Intensity (mm day$^{-1}$)')
ax1.set_xlabel(r'Year')
leg1=ax1.legend(['Station data','RCA4_ERA'],prop={'size':11},numpoints=1,markerscale=5.,frameon=True,fancybox=True)
#ax1.set_ylim(0,12)
ax1.grid(True)
ax2.errorbar(years,drought_mean,
xerr=None, yerr=drought_sem, color='r', fmt='.', alpha=1.)
ax2.errorbar(years_era, drought_eramean,xerr=None, yerr=drought_erasem,color='k', fmt='.', alpha=1.)
ax2.plot(years, int_drmean,'g--',alpha=1., lw=2)
ax2.plot(years_era, era_drmean,'k--')
ax2.set_title(' Zone 2 Intensity of R10p events based on \n statistical values of daily anomalies and percentiles ', fontsize =14)
ax2.set_xlabel(r"Years")
ax2.set_ylabel('Intensity(mm day$^{-1}$)')
leg1=ax2.legend(['Station data','RCA4_ERA'],prop={'size':11},numpoints=1,markerscale=5.,frameon=True,fancybox=True)
ax2.grid(True)
#my_int.savefig('Zone2_Percentile_intensity+ERA1_plot.pdf',dpi=300)
In [ ]:
##calc the trendline (linear fitting)
trend = np.polyfit(flood_freq[yr_day_count > 350], years[yr_day_count > 350], len(flood_freq[yr_day_count > 350]))
In [ ]:
#running_test = pd.rolling_mean(synthetic["vals"], window=10, min_periods=3, center = True)
fre_rmean = pd.rolling_mean(flood_freq, window=10, min_periods=0, center = True)
fre_drmean = pd.rolling_mean(drought_freq, window=10, min_periods=0, center = True)
#ERA
erafreq_rmean = pd.rolling_mean(flood_erafreq, window=10, min_periods=0, center = True)
erafreq_drmean = pd.rolling_mean(drought_erafreq, window=10, min_periods=0, center = True)
In [ ]:
#pd.timedelta_range(start=None, end=None, periods=None, freq='D', name=None, closed=None)
In [ ]:
#Frequency
my_ = plt.figure(dpi=72)
my_.set_size_inches(15,5) # Specify the output size
ax = my_.add_subplot(121) # Add an axis frame object to the plot (i.e. a pannel)
ax1 = my_.add_subplot(122)
ax.plot(years,flood_freq, 'b-', alpha=0.8)
ax.plot(years[yr_day_count > 350],drought_freq[yr_day_count > 350], 'r-',alpha=0.8)
leg=ax.legend(['Floods','Drought',],prop={'size':10},numpoints=1,markerscale=1.,
frameon=True,fancybox=True)
ax.set_ylim(0,100)
ax.set_title('Integer count of Extreme precip events(Frequency) \n based on threshold value detection (Zone2)')
ax.set_ylabel(r'Number of Extreme events (counts)')
ax.set_xlabel('Years')
ax.grid(True)
ax1.plot(years[yr_day_count > 350], flood_freq[yr_day_count > 350]/ #Normalized frequency
yr_day_count[yr_day_count > 350], 'b-', alpha=1.)
ax1.plot(years[yr_day_count > 350],drought_freq[yr_day_count > 350]/
yr_day_count[yr_day_count > 350],'r-', alpha=1.)
leg=ax1.legend(['Floods','Drought',],prop={'size':10},numpoints=1,markerscale=1.,
frameon=True,fancybox=True)
ax1.set_title('Extreme precip events Frequency \n based on threshold value detection(Zone2)')
ax1.set_ylabel(r'Normalized count of Extreme events')
ax1.set_xlabel('Years')
ax1.grid(True)
plt.show(my_)
#my_.savefig('Zone2_Percentile_Frequency_plot.pdf',dpi=300)
In [ ]:
#Frequency
my_ = plt.figure(dpi=300)
my_.set_size_inches(10,5) # Specify the output size
ax1 = my_.add_subplot(111)
ax1.plot(years[yr_day_count > 350], fre_rmean[yr_day_count > 350]/yr_day_count[yr_day_count > 350],'b-', lw=2)
ax1.plot(years[yr_day_count > 350], fre_drmean[yr_day_count > 350]/yr_day_count[yr_day_count > 350],'r-', lw=2)
ax1.plot(years_era, erafreq_rmean/365,'g-', lw=2)
ax1.plot(years_era, erafreq_drmean/365,'k-', lw=2)
ax1.plot(years[yr_day_count > 350], flood_freq[yr_day_count > 350]/yr_day_count[yr_day_count > 350], 'b.', alpha=1.)
ax1.plot(years[yr_day_count > 350],drought_freq[yr_day_count > 350]/yr_day_count[yr_day_count > 350],'r.', alpha=1.)
ax1.plot(years_era,flood_erafreq/yr_day_eracount, 'g.', alpha=0.8)
ax1.plot(years_era,drought_erafreq/yr_day_eracount, 'k.', alpha=0.8)
leg=ax1.legend(['Station data R90p','Station data R10p','RCA4_ERA R90p','RCA4_ERA R10p'],prop={'size':10},
numpoints=1,markerscale=1.,frameon=True,fancybox=True)
ax1.set_title('Zone 2 Frequency of Extreme precip Events \n based on statistical values of daily anomalies and percentiles', fontsize=14)
ax1.set_ylabel(r'Normalized count of Extreme events', fontsize=14)
ax1.set_xlabel(r'Years', fontsize=14)
ax1.grid(True)
plt.show(my_)
#my_.savefig('Zone2_Percentile_ERAFrequency_plot.pdf',dpi=300)
Duration
In [ ]:
test = tmp_yr_data[drought].index[1] - tmp_yr_data[drought].index[0]
print("diffrence in days between first and second flood:",test.days)
In [ ]:
def add_days_since(df, truth_array, name_to_add):
"""
This function takes a dataframe (df) as input, and a truth array
related to that dataframe (e.g. df=df_prcp, truth_array=drought)
and will then go through each date in the dataframe, look if the
truth value is True, and if it is, it will see how long since the
last truth value occured, and give an integer value (for day count)
which is then placed in an array, and appended to the original
dataframe at the end, with the name_to_add as the column name.
"""
days_since_list = []
last_day = df.index[0] # Initilise the state of the 'last' day
for day in df.index:
if truth_array[day] == True:
days_since = day - last_day
days_since = int(days_since.days)
last_day = day # update the state of last day to current true day
if days_since > 1000: # Just check the values aren't silly
days_since = np.NAN
else:
days_since = np.NAN
days_since_list.append(days_since)
days_since_list = np.array(days_since_list)
df[name_to_add]=days_since_list #adding column to the df_prcp data frame
return
In [ ]:
add_days_since(df=df_prcp, truth_array=drought, name_to_add = 'DS_Last_D')#Days since last drought
add_days_since(df=df_prcp, truth_array=flood, name_to_add = 'DS_Last_F') #Days since last flood
#add_days_since(df=df_prcp, truth_array=low_risk, name_to_add = 'DS_Last_R')
In [ ]:
my_duration = plt.figure()
my_duration.set_size_inches(10,5)
ax1 = my_duration.add_subplot(111)
#ax2 = my_duration.add_subplot(122)
ax1.plot(df_prcp.DS_Last_F.index, df_prcp.DS_Last_F,'-b',ms=3.0,alpha=1.0, lw=2)
ax1.plot(df_prcp.DS_Last_D.index, df_prcp.DS_Last_D,'-r',ms=3.0,alpha=1.0,lw=2)
#ax1.bar(df_prcp.DS_Last_F.index, df_prcp.DS_Last_F, width=0.8, color = 'b')
#ax1.bar(df_prcp.DS_Last_D.index, df_prcp.DS_Last_D, width=0.8, color = 'r')
leg=ax1.legend(['DS_Last_R90p event','DS_Last_R10p',],prop={'size':10},numpoints=1,markerscale=5.,
frameon=True,fancybox=True)
ax1.set_ylabel(r'Duration since last event (Days) ',fontsize=14)
ax1.set_xlabel('Year',fontsize=14)
ax1.set_title(r'Duration between extreme events in Zone 2',fontsize=14)
ax1.set_ylim(0, 250)
#ax1.set_xlim('1953-01-01','1990-12-31')
ax1.grid(True)
plt.show(my_duration)
#my_duration.savefig('Zone2_Percentile_duration_curve.pdf',dpi=300)
In [ ]:
cdd_ERA = add_days_since(df=df_era, truth_array=df_era['ERAINT']<1 , name_to_add='RCA4_CDD')
cdd_Obs = add_days_since(df=df_prcp, truth_array=df_prcp['Accumulated']<1 , name_to_add='CDD')
cwd_ERA = add_days_since(df=df_era, truth_array=df_era['ERAINT']>=1 , name_to_add='RCA4_CWD')
cwd_Obs = add_days_since(df=df_prcp, truth_array=df_prcp['Accumulated']>=1 , name_to_add='CWD')
In [ ]:
annera_cdd = df_era.RCA4_CDD.resample('A', how='mean')
annobs_cdd = df_prcp.CDD.resample('A', how='mean')
annera_cwd = df_era.RCA4_CWD.resample('A', how='mean')
annobs_cwd = df_prcp.CWD.resample('A', how='mean')
In [ ]:
cddobs_rav = pd.rolling_mean(annobs_cdd, window=10, min_periods=0, center = True)
cddera_rav = pd.rolling_mean(annera_cdd, window=10, min_periods=0, center = True)
cwdobs_rav = pd.rolling_mean(annobs_cwd, window=10, min_periods=0, center = True)
cwdera_rav = pd.rolling_mean(annera_cwd, window=10, min_periods=0, center = True)
In [ ]:
cdd = plt.figure()
cdd.set_size_inches(15,5)
ax1 = cdd.add_subplot(121)
ax2 = cdd.add_subplot(122)
ax1.plot(df_prcp.CDD.index, df_prcp.CDD,'b',ms=3.0,alpha=1.0,lw=2)
ax1.plot(df_prcp.CWD.index, df_prcp.CWD,'r',ms=3.0,alpha=1.0,lw=2)
leg=ax1.legend(['CDD','CWD'],prop={'size':10},numpoints=1,markerscale=5.,
frameon=True,fancybox=True)
ax1.set_ylabel(r'Duration since last event (Days) ')
ax1.set_xlabel('Years')
ax1.set_title(r'CDD & CWD over Zone 1 (Station data)')
ax1.set_ylim(0,200)
#ax1.set_xlim('1953-01-01','1990-12-31')
ax1.grid(True)
ax2.plot(df_era.RCA4_CDD.index, df_era.RCA4_CDD,'b',ms=3.0,alpha=1.0, lw=2)
ax2.plot(df_era.RCA4_CWD.index, df_era.RCA4_CWD,'r',ms=3.0,alpha=1.0, lw=2)
ax2.set_ylabel(r'Duration since last event (Days) ')
ax2.set_xlabel('Years')
ax2.set_title(r'CDD & CWD over Zone 1 (RCA4_ERA)')
ax2.set_ylim(0,200)
ax2.grid(True)
leg=ax2.legend(['CDD','CWD'],prop={'size':10},numpoints=1,markerscale=5.,
frameon=True,fancybox=True)
plt.show(cdd)
#cdd.savefig('Zone1_cdd.pdf',dpi=300)
In [ ]:
#Changes of annual maximum CWD and CDD
cdd_mean1 = plt.figure()
cdd_mean1.set_size_inches(10,5)
ax1 = cdd_mean1.add_subplot(111)
ax1.plot(annobs_cdd.index, annobs_cdd,'b',ms=3.0,alpha=1.0,lw=1.5)
ax1.plot(annobs_cwd.index, annobs_cwd,'r',ms=3.0,alpha=1.0,lw=1.5)
ax1.plot(annera_cdd.index, annera_cdd,'g',ms=3.0,alpha=1.0, lw=1.5)
ax1.plot(annera_cwd.index, annera_cwd,'k',ms=3.0,alpha=1.0, lw=1.5)
ax1.plot(cddobs_rav.index, cddobs_rav,'b--',ms=3.0,alpha=1.0,lw=2.5)
ax1.plot(cwdobs_rav.index, cwdobs_rav,'r--',ms=3.0,alpha=1.0,lw=2.5)
ax1.plot(cddera_rav.index, cddera_rav,'g--',ms=3.0,alpha=1.0, lw=2.5)
ax1.plot(cwdera_rav.index, cwdera_rav,'k--',ms=3.0,alpha=1.0, lw=2.5)
leg=ax1.legend(['Station data CDD','Station data CWD','RCA4_ERA CDD','RCA4_ERA CWD'],
prop={'size':10},numpoints=1,markerscale=5.,frameon=True,fancybox=True)
ax1.set_ylabel(r'Annual Mean Duration since last event (Days)',fontsize=12)
ax1.set_xlabel('Years', fontsize=15)
ax1.set_title(r'CDD & CWD over Zone 2', fontsize=16)
ax1.set_ylim(0,10)
ax1.set_xlim('1954-01-01','2015-12-31')
ax1.grid(True)
#cdd_mean1.savefig('Zone1_cdd_Sum.pdf',dpi=300)
#Duration between last CDD is increasing while that of CWD is declining, meaning more wet days
In [ ]:
# Type of boxcar function
"""
Interested in the sum of mean precip for current day plus the next 2 days
If the sum is greater than 100mm, then this could be a signal
for potential medium-high risk for flood, low risk if greater than 50mm but
less than 100.
"""
#RX3day: maximum 3-d Precipitation : Highest precipitation amount in 3-d period
running_total = pd.rolling_sum(df_prcp["Accumulated"], window=3, min_periods=3, center = True) #inverse=[::-1]
print(np.max(running_total), np.max(df_prcp["Accumulated"]))
In [ ]:
print(running_total[7000:7005])
In [ ]:
running_total5d = pd.rolling_sum(df_prcp["Accumulated"], window=5, min_periods=5, center = True) #inverse=[::-1]
print(np.max(running_total5d), np.max(df_prcp["Accumulated"]))
In [ ]:
my_total = plt.figure()
my_total.set_size_inches(15,5) # Specify the output size
ax1 = my_total.add_subplot(121)
ax2 = my_total.add_subplot(122)
ax1.plot(running_total.index, running_total,'.g',ms=3.0,alpha=0.75)
ax1.plot(running_total[running_total > 50].index, running_total[running_total > 50],'.r',ms=3.0,alpha=1.0)
leg=ax1.legend(['No risk','flood risk',],prop={'size':10},numpoints=1,markerscale=5.,
frameon=True,fancybox=True)
ax1.set_ylabel(r'Precipitation (mm day$^{-1}$)',fontsize =14)
ax1.set_xlabel('Years',fontsize =14)
ax1.set_title(r'RX3day:Station data maximum 3-d Precipitation in East Africa',fontsize =14)
ax1.set_ylim(0, 120)
ax1.set_xlim('1952-01-01','2015-12-31')
ax1.grid(True)
ax2.plot(running_total5d.index, running_total5d,'.g',ms=3.0,alpha=0.75)
ax2.plot(running_total5d[running_total5d > 50].index, running_total5d[running_total5d > 50],'.r',ms=3.0,alpha=1.0)
leg=ax2.legend(['No risk','flood risk',],prop={'size':10},numpoints=1,markerscale=5.,
frameon=True,fancybox=True)
ax2.set_ylabel(r'Precipitation (mm day$^{-1}$)',fontsize =14)
ax2.set_xlabel('Years', fontsize =14)
ax2.set_title(r'RX5day:Station data Maximum 5-d Precipitation in East Africa',fontsize =14)
ax2.set_ylim(0, 120)
ax2.set_xlim('1952-01-01','2015-12-31')
ax2.grid(True)
plt.show(my_total)
#my_total.savefig('Zone1_total_ts.pdf',dpi=300)
In [ ]:
flood_risk = running_total[running_total > 50] #Gives days when precipitation 3-d precip totals exceeded the flood
flood_risk5d = running_total5d[running_total5d > 50] #threshold
floodrisk_freq = flood_risk.groupby( flood_risk.index.year).count()/365 #Gives Noramlized frequency
floodrisk_freq5d = flood_risk5d.groupby( flood_risk5d.index.year).count()/365
In [ ]:
r3mean = pd.rolling_mean(flood_risk, window=10, min_periods=0, center = True)
r5mean = pd.rolling_mean(flood_risk5d, window=10, min_periods=0, center = True)
In [ ]:
#Frequency plot
flood_freq = plt.figure()
flood_freq.set_size_inches(15,5)
ax1 = flood_freq.add_subplot(121)
ax2 = flood_freq.add_subplot(122)
ax1.plot(floodrisk_freq.index, floodrisk_freq ,'r.',ms=7.0,alpha=1.)
#ax1.plot(r3mean.index, r3mean ,'r-',ms=3.0,alpha=1.)
ax1.set_title('Flood risk events Frequency RX3day (Zone1) based on\n cumulative statistical values (boxcar approach)')
ax1.set_ylabel(r'Normalized count of events')
ax1.set_xlabel(r'Year')
ax1.grid(True)
ax2.plot(floodrisk_freq5d.index, floodrisk_freq5d ,'r.',ms=7.0,alpha=1.)
#ax2.plot(floodrisk_freq5d.index, r3mean ,'r-',ms=3.0,alpha=1.)
ax2.set_title('Flood risk events Frequency RX5day (Zone1) based on\n cumulative statistical values (boxcar approach)')
ax2.set_ylabel(r'Normalized count of events')
ax2.set_xlabel(r'Year')
ax2.grid(True)
#flood_freq.savefig('Zone1_RX3d.pdf',dpi=300)
In [ ]:
####From the Frequency of occurance its evident that duration between flood events takes upto 22 years (1967-1989)
In [ ]:
running_mean = pd.rolling_mean(df_prcp["Accumulated"], window=10, min_periods=3, center = True)
print(np.max(running_mean), np.max(df_prcp["Accumulated"]))
In [ ]:
def date_index(dt):
"""
Turn the int64 value from the YR, MON of indices into a pd.datetime
"""
dstring = str(dt)
return pd.datetime((int(dstring[0:4]),int(dstring[4:6]))) #year Month
def corr_df(fpath, label, clim_index):
print(fpath)
for file in glob.glob(fpath):
data_in = pd.read_fwf(file)
data = []
dates = [date_index(entry) for entry in corr_df.index]
for month in range(12,):
dates.append(corr_df(corr_df.Year[entry]).month())
data.append(data_in)
return pd.DataFrame(data=data, column=[label], index=dates)
In [ ]:
"""
This is aimed at finding the relationship between extreme weather events
in EA and climate indices(ENSO, QBO, IOD), Also what is the correlation
between the data (and their statistical significance)?
"""
#iod=pd.read_fwf("Data/dmi.nc")
enso=pd.read_fwf("Data/noaa_mei.txt")
soi = pd.read_fwf("Data/noaa_soi.txt", index_col='YEAR')
qbo30 =pd.read_fwf("Data/noaa_qbo30.txt", index_col='YEAR')
qbo50 = pd.read_fwf("Data/noaa_qbo50.txt", index_col='YEAR')
# Make the year integers the index
enso.index = pd.date_range(start='1950-01-01',end='2015-07-01',freq='M')
In [ ]:
month_list = soi.keys()
tmp_data = []
for year in soi.index:
for month in month_list:
tmp_data.append(soi[month][year])
#print(month,year,soi[month][year])
tmp_data = np.array(tmp_data)
In [ ]:
mon_list = qbo30.keys()
qbo3_data = []
qbo5_data = []
for year in qbo30.index:
for month in mon_list:
qbo3_data.append(qbo30[month][year])
qbo5_data.append(qbo50[month][year])
#print(month,year,qbo30[month][year])
qbo3_data = np.array(qbo3_data)
qbo5_data = np.array(qbo5_data)
In [ ]:
#Creating their data frames
df_soi = pd.DataFrame(data=tmp_data,index=pd.date_range(start='1951-01-01',end='2015-12-31',freq='M'),columns=['SOI'])
df_qbo3=pd.DataFrame(data=qbo3_data,index=pd.date_range(start='1979-01-01',end='2015-12-31',freq='M'),columns=['QBO3'])
df_qbo5=pd.DataFrame(data=qbo5_data,index=pd.date_range(start='1979-01-01',end='2015-12-31',freq='M'),columns=['QBO5'])
In [ ]:
#to convert a DataFrame to a TimeSeries
soii = df_soi.unstack()
qbo3 = df_qbo3.unstack()
qbo5 = df_qbo5.unstack()
print(np.mean(soii), np.mean(qbo3), np.mean(qbo5))
In [ ]:
#Indian Ocean Dipole Index
f = netCDF4.Dataset("Data/dmi.nc") # Assign the netcdf file
#print(f.variables) # Show what is in the netcdf file
dmi = f["DMI"][:] # Call the DMI and dates and assign them to a variable
dmi_date = f["WEDCEN2"][:]
dmi_dates = netCDF4.num2date(f['WEDCEN2'][:],units = f['WEDCEN2'].units)
dmi_index = pd.to_datetime(dmi_dates)
f.close() # close the connection to the netcdf file
In [ ]:
df_iod = pd.DataFrame(data=dmi,index=dmi_index,columns=['IOD'])
iod = df_iod.unstack() #convert a DataFrame to a TimeSeries
dmi_mon = df_iod['IOD'].resample('M', how='mean') #Resampling the weekly IOD data into monthly for correlation.
In [ ]:
acc_monthly = df_prcp.Acc_anomaly.resample('M', how='mean') #monthly Deseasonalized precip
era_monthly = df_era.ERA_anomaly.resample('M', how = 'mean')
extreme_monthly = df_prcp["Acc_anomaly"][extremes].resample('M', how='mean')
In [ ]:
#Plots to explore the data
enso_fig = plt.figure(dpi=72)
enso_fig.set_size_inches(20,5)
ax = enso_fig.add_subplot(111)
ax.plot(acc_monthly['1955-01-01':'1990-12-31'].index,acc_monthly['1955-01-01':'1990-12-31'], 'b',ms=3.0,alpha=0.75)
ax.plot(era_monthly['1981-01-01':'1990-12-31'].index,era_monthly['1981-01-01':'1990-12-31'], 'g',ms=3.0,alpha=0.75)
ax.plot(enso['1955-01-01':'1990-12-31'].index,enso['ANOM.3']['1955-01-01':'1990-12-31'], 'r',ms=3.0,alpha=0.75)
ax.grid(True)
In [ ]:
good_dat = acc_monthly['1961-01-01':'1990-12-31'] == acc_monthly['1961-01-01':'1990-12-31']
good_era = era_monthly['1981-01-01':'2010-12-31'] == era_monthly['1981-01-01':'2010-12-31']
enso_stats = stats.linregress(enso['ANOM.3']['1961-01-01':'1990-12-31'][good_dat],
acc_monthly['1961-01-01':'1990-12-31'][good_dat])
enso_stats
#Results shows a very low (<10%) correlation of EA monthly precipitation with ENSO index
In [ ]:
era_stats = stats.linregress(enso['ANOM.3']['1981-01-01':'2010-12-31'][good_era],
era_monthly['1981-01-01':'2010-12-31'][good_era])
era_stats
In [ ]:
corr_enso = plt.figure(dpi=72)
corr_enso.set_size_inches(5,5)
ax1 = corr_enso.add_subplot(111)
ax1.plot(enso['ANOM.3']['1961-01-01':'1990-12-31'],acc_monthly['1961-01-01':'1990-12-31'],'r.')
ax1.plot(enso['ANOM.3']['1981-01-01':'2010-12-31'],era_monthly['1981-01-01':'2010-12-31'],'b.')
fit = np.arange(-3,3,0.1)*enso_stats[0] + enso_stats[0]
fit1 = np.arange(-3,3,0.1)*era_stats[0] + era_stats[0]
ax1.plot(np.arange(-3,3,0.1), fit, 'r-')
ax1.plot(np.arange(-3,3,0.1), fit1, 'b-')
ax1.set_ylabel(r'Monthly Precip (Anomalies)', fontsize=14)
ax1.set_title(' ENSO Scatter plot Zone 1', fontsize=14)
ax1.set_xlabel(r'ENSO(3.4) Index', fontsize=14)
corr_enso.text(0.4, 0.85, r'r = 0.0821 - Station data' ,fontsize=12,color='r')
corr_enso.text(0.4, 0.80, r'r = -0.025 - RCA4_ERA' ,fontsize=12,color='b')
#corr_enso.savefig('Zone2_Index_ENSO+ERA.pdf',dpi=300)
In [ ]:
good_dt = acc_monthly['1997'] == acc_monthly['1997']
enso_stat = stats.linregress(enso['ANOM.3']['1997'][good_dt],
acc_monthly['1997'][good_dt])
enso_stat
#Results shows significant (67%) correlation of EA monthly precipitation during the 97/98
#El nino episode ###Interesting ???
In [ ]:
rvalue=0.59
print("r-squared:", rvalue**2)
In [ ]:
plt.plot(acc_monthly['1955 -01-01':'1990-12-30'].index,acc_monthly['1955-01-01':'1990-12-30'])
plt.plot(df_soi['1955-01-01':'1990-12-30'].index, df_soi['1955-01-01':'1990-12-30'])
In [ ]:
soi_stats = stats.linregress(df_soi['SOI']['1961-01-01':'1990-12-31'][good_dat],
acc_monthly['1961-01-01':'1990-12-31'][good_dat])
soiera_stats = stats.linregress(df_soi['SOI']['1981-01-01':'2010-12-31'][good_era],
era_monthly['1981-01-01':'2010-12-31'][good_era])
print("soi correlation statistics:", soi_stats)
print("RCA4, soi correlation_stats:", soiera_stats)
In [ ]:
df_qbo5['QBO5'].head()
In [ ]:
qbo_gd = acc_monthly['1981-01-01':'1990-12-31'] == acc_monthly['1981-01-01':'1990-12-31']
qbo3_stats = stats.linregress(df_qbo3['QBO3']['1981-01-01':'1990-12-31'][qbo_gd],
acc_monthly['1981-01-01':'1990-12-31'][qbo_gd])
qbo3era_stats = stats.linregress(df_qbo3['QBO3']['1981-01-01':'2010-12-31'][good_era],
era_monthly['1981-01-01':'2010-12-31'][good_era])
print("QBO correlation statistics:",qbo3_stats)
print("RCA4, QBO correlation_stats:",qbo3era_stats)
In [ ]:
stats.linregress(dmi_mon,dmi_mon)
In [ ]:
#Linear Regression of Monthly anomalized data with IOD
gd_data = acc_monthly['1981-11-30':'1990-12-31'] == acc_monthly['1981-11-30':'1990-12-31']
iod_stats = stats.linregress(dmi_mon['1981-11-30':'1990-12-31'][gd_data],
acc_monthly['1981-11-30':'1990-12-31'][gd_data])
iodera_stats = stats.linregress(dmi_mon['1981-11-30':'2010-12-31'][good_era],
era_monthly['1981-11-30':'2010-12-31'][good_era])
print(iod_stats)
print(iodera_stats)
In [ ]:
corr_iod = plt.figure(dpi=72)
corr_iod.set_size_inches(5,5)
ax1 = corr_iod.add_subplot(111)
ax1.plot(dmi_mon['1981-11-30':'1990-08-31'],acc_monthly['1981-11-30':'1990-08-31'],'r.')
fit = np.arange(-3,3,0.1)*iod_stats[0] + iod_stats[0]
ax1.plot(np.arange(-3,3,0.1), fit, 'r-')
ax1.set_ylabel(r'Monthly Precip (Anomalies)')
ax1.set_title('IOD Scatter plot for Zone 1')
ax1.set_xlabel(r'IOD Index ')
corr_iod.text(0.4, 0.85, r'r = -0.0097' ,fontsize=12,color='b')
#corr_iod.savefig('Zone1_Index_IOD.pdf',dpi=300)
In [ ]:
selector = acc_monthly.index.month
selectors = enso['ANOM.3'].index.month
selectorss = dmi_mon.index.month
selector_era = era_monthly.index.month
In [ ]:
era_monthly[selector_era == 1].head()
In [ ]:
monthly_corr = plt.figure(dpi=300)
monthly_corr.set_size_inches(15,5)
ax = monthly_corr.add_subplot(111)
ax.plot(acc_monthly[selector == 11].index, acc_monthly[selector ==11], 'b', ms=3.0,alpha=0.75)
ax.plot(enso['ANOM.3'][selectors==11].index, enso['ANOM.3'][selectors==11], 'r', ms=3.0,alpha=0.75)
ax.plot(dmi_mon[selectorss==11].index,dmi_mon[selectorss==11], 'g', ms=3.0,alpha=0.75)
leg=ax.legend(['Precip','ENSO3.4 Index','IOD Index'],prop={'size':12},numpoints=1,markerscale=1.,
frameon=True,fancybox=True)
ax.set_ylabel(r'Anomalies')
ax.set_title(r'Relationship of EA precips Vs ENSO Index & IOD Index for the Month of November (Zone 2) ')
ax.set_xlabel(r'Time (months_Nov)')
ax.grid(True)
#monthly_corr.savefig('Zone1_Correlation_plot.pdf',dpi=300)
In [ ]:
#gd = acc_monthly[selector == 5]['1955-01-01':'1990-12-31'] == acc_monthly[selector == 5]['1955-01-01':'1990-12-31']
#stats = stats.linregress(enso['ANOM.3'][selectors==5]['1955-01-01':'1990-12-31'][gd],
# acc_monthly[selector==5]['1955-01-01':'1990-12-31'][gd])
#stats
#High r values Mar=-30% April=25% May =44% Oct=35%, Nov=34% ENSO
#High r values June=29% May=42% Oct=42%, Nov=70% Dec=20% IOD
In [ ]:
corr_mon = plt.figure(dpi=72)
corr_mon.set_size_inches(5,5)
ax1 = corr_mon.add_subplot(111)
ax1.plot(enso['ANOM.3'][selectors==11]['1955-01-01':'1990-12-31'],
acc_monthly[selector==11]['1955-01-01':'1990-12-31'],'r.')
#fit = np.arange(-3,3,0.1)*stats[0] + stats[0]
ax1.plot(np.arange(-3,3,0.1), fit, 'r-')
ax1.set_ylabel(r'May precip (mm)')
ax1.set_title('MAY (ENSO) Scatter plot Zone 2')
ax1.set_xlabel(r' ENSO Index ($^{o}$C)')
corr_mon.text(0.2, 0.85, r'r = -0.20968898413341716' ,fontsize=12,color='b')
#corr_mon.savefig('Zone1_Index_MAY.pdf',dpi=300)
In [ ]:
#Expoloring here the seasonal correlations
'''
To do seasonal anomaly correlation
'''
acc_seasonal = acc_monthly.resample('Q', how='mean') #Seasons "JFM, AMJ, JAS, OND"
xtrm_seasonal = extreme_monthly.resample('Q', how='mean') #Seasons "JFM, AMJ, JAS, OND"
era_seasonal = era_monthly.resample('Q', how='mean')
enso_seasonal = enso['ANOM.3'].resample('Q', how='mean')
dmi_seasonal = df_iod['IOD'].resample('Q', how='mean')
soi_seasonal = df_soi['SOI'].resample('Q', how='mean')
qbo_seasonal = df_qbo3['QBO3'].resample('Q', how='mean')
#xtrm_seasonal = extreme_monthly['1954-01-01':'2015-12-31'].resample('4M', how='mean') #Seasons "FMAM, JJAS, ONDJ"
#acc_ssn = acc_monthly.resample('BQ-FEB', how = 'mean') #Seasons "DJF, MAM, JJA, SON"
#enso_ssn = enso['ANOM.3'].resample('BQ-FEB', how='mean') #Seasons "DJF, MAM, JJA, SON"
#dmi_ssn = df_iod['IOD'].resample('BQ-FEB', how='mean')
In [ ]:
x = acc_seasonal.index.month
x1 = enso_seasonal.index.month
x2 = era_seasonal.index.month
x3 = xtrm_seasonal.index.month
x4 = dmi_seasonal.index.month
x5 = soi_seasonal.index.month
x6 = qbo_seasonal.index.month
In [ ]:
OND_Obs = acc_seasonal[x == 12]
OND_ens = enso_seasonal[x1 == 12]
OND_era = era_seasonal[x2 == 12]
OND_xtr = xtrm_seasonal[x3 == 12]
OND_iod = dmi_seasonal[x4 == 12]
OND_soi = soi_seasonal[x5 == 12]
OND_qbo = qbo_seasonal[x6 == 12]
#JJAS season was used for zone 1 while OND used in Zone 2
In [ ]:
clima = plt.figure()
clima.set_size_inches(15,5)
ax = clima.add_subplot(111)
ax.plot(OND_xtr.index, OND_xtr, 'r', ms=3.0,alpha=1.0,lw=2)
ax.plot(OND_ens.index, OND_ens, 'b', ms=3.0,alpha=1.0,lw=2)
ax.plot(OND_iod.index, OND_iod, 'g', ms=3.0,alpha=1.0,lw=2)
ax.set_title('OND Average ENSO/SOI Indices Correlation with Zone 2 Extreme Precipitation ', fontsize = 15)
ax.set_xlabel(r'Years', fontsize = 15)
ax.set_ylabel(r'Anomaly ', fontsize = 15)
#ax.set_xlim('1950' '1990')
#ax.set_ylim(-5,10)
leg=ax.legend(['Station data','ENSO 3.4','IOD'],prop={'size':10},
numpoints=1,markerscale=5.,frameon=True,fancybox=True)
ax.grid(True)
#clima.savefig('Zone1_ClimIndices_EXtreme.pdf',dpi=300)
In [ ]:
clim = plt.figure()
clim.set_size_inches(15,5)
ax = clim.add_subplot(111)
ax.plot(OND_Obs.index, OND_Obs, 'r', ms=3.0,alpha=1.0,lw=2)
ax.plot(OND_era.index, OND_era, 'k', ms=3.0,alpha=1.0,lw=3)
ax.plot(OND_ens.index, OND_ens, 'b', ms=3.0,alpha=1.0,lw=2)
ax.plot(OND_iod.index, OND_iod, 'g', ms=3.0,alpha=1.0,lw=2)
#ax.plot(OND_soi.index, OND_soi, 'm', ms=3.0,alpha=1.0,lw=1.5)
#ax.plot(OND_qbo.index, OND_qbo, 'y', ms=3.0,alpha=1.0,lw=1.5)
ax.set_title(' OND Average ENSO/IOD Indices Correlation with Zone 2 Precipitation ', fontsize = 15)
ax.set_xlabel(r'Years', fontsize = 15)
ax.set_ylabel(r'Anomaly ', fontsize = 15)
ax.set_xlim('1952', '2014')
ax.set_ylim(-5,8)
leg=ax.legend(['Station data','RCA4_ERA','ENSO 3.4','IOD','SOI','QBO'],prop={'size':10},
numpoints=1,markerscale=5.,frameon=True,fancybox=True)
ax.grid(True)
#clim.savefig('Zone2_ClimIndices_ENSO+IOD.pdf',dpi=300)
In [ ]:
iod = plt.figure(dpi=72)
iod.set_size_inches(15,5)
ax = iod.add_subplot(111)
#plt.plot(acc_monthly['1981-01-01':'2006-12-31'].index, acc_monthly['1981-01-01':'2006-12-31'])
ax.plot(OND_iod.index, OND_iod, 'r')
ax.plot(OND_Obs.index, OND_Obs, 'b')
#ax.plot(enso['1981-08-01':'2015-12-31'].index, enso['ANOM.3']['1981-08-01':'2015-12-31'], 'b')
ax.set_title(' Monthly IOD index Time Series ')
ax.set_xlabel(r'Time')
ax.set_ylabel(r'IOD Index ($^{o}$C)')
ax.grid(True)
#iod.savefig('Zone1_IOD.pdf',dpi=300)
In [ ]:
ssn_ensostats = stats.linregress(OND_ens['1953-01-01':'2015-12-31'], OND_Obs['1953-01-01':'2015-12-31'])
ssn_soistats =stats.linregress(OND_soi['1981-01-01':'2010-12-31'], OND_Obs['1981-01-01':'2010-12-31'])
ssn_iodstats =stats.linregress(OND_iod['1981-12-31':'2014-12-31'], OND_Obs['1981-12-31':'2014-12-31'])
ssn_qbostats =stats.linregress(OND_qbo['1981-12-31':'2010-12-31'], OND_Obs['1981-12-31':'2010-12-31'])
print("ENSO Correlation Stats:",ssn_ensostats)
print("SOI Correlation Stats:",ssn_soistats)
print("IOD Correlation Stats:",ssn_iodstats)
print("QBO Correlation Stats:",ssn_qbostats)
In [ ]:
#xerr = np.std(OND_iod['1982-01-01':'2010-12-31'])/np.sqrt(len(OND_iod['1982-01-01':'2010-12-31']) - 1)
xerr = np.std(OND_ens['1953-01-01':'2015-12-31'])
yerr = np.std(OND_Obs['1953-01-01':'2015-12-31'])
In [ ]:
enso_sca = plt.figure()
enso_sca.set_size_inches(5,5)
ax1 = enso_sca.add_subplot(111)
ax1.errorbar(OND_ens['1953-01-01':'2015-12-31'], OND_Obs['1953-01-01':'2015-12-31'],yerr, xerr,'ro')
fit = np.arange(-4,4,0.1)*ssn_ensostats[0] + ssn_ensostats[0]
ax1.plot(np.arange(-4,4,0.1), fit, 'r-', lw=2)
ax1.set_ylabel(r'Precipitation Anomalies', fontsize=14)
ax1.set_title(' Relationship between obs_rain and ENSO in Zone 2', fontsize=12)
ax1.set_xlabel(r'ENSO Index', fontsize=14)
ax1.grid(True)
enso_sca.text(0.4, 0.85, r'rvalue = 0.29 ' ,fontsize=13,color='b')
enso_sca.text(0.4, 0.80, r'slope = 0.41 ' ,fontsize=13,color='b')
enso_sca.text(0.4, 0.75, r'pvalue = 0.02' ,fontsize=13,color='b')
#enso_sca.savefig('Zone2_Index-ENSO_Obs.pdf',dpi=300)
In [ ]: