In [ ]:
from __future__ import print_function, division,generators
import numpy as np
import numpy.ma as ma
import pandas as pd
import matplotlib.pyplot as plt
import math
import scipy as sci
import seaborn as sns
import os
import glob
import datetime as dt
import netCDF4
from scipy.stats import norm as scipy_stats_norm
from scipy import stats
%matplotlib inline

1. Read the station data downloaded from GHCN archive

Data obtained from http://www.ncdc.noaa.gov/cdo-web/


In [ ]:
def get_date(date_number):
    """
    Turn the int64 value from the DATE of GHCN into a pd.datetime
    """
    dstring = str(date_number)
    return pd.datetime(int(dstring[0:4]),int(dstring[4:6]),int(dstring[6:8]))

def get_df(fnm, var, no_missing = True):
    """
    Create a dataframe for a single station, with a time index, for a single
    variable of data given as a key word (e.g. PRECIP, TMAX, TMIN).
    Requires file path and name (fnm).
    no_missing is a Bool that optionally masks out values < -99 from the df.
    """
    df = pd.read_csv(fnm)
    dt_indx = [get_date(date) for date in df.DATE]
    data_vals = df[var].values
    if var is 'PRCP':
        data_vals = data_vals / 10.  # This is to convert precip data to mm
    if no_missing:
        tmp_df = pd.DataFrame(data=data_vals,
                              index=dt_indx,columns=[df.STATION[0][6:]])
        mask = tmp_df > -99.  # A catchall value for missing data in GHCN
        return tmp_df[mask]
    else:
        return pd.DataFrame(data=data_vals,
                             index=dt_indx,columns=[df.STATION[0][6:]])

def get_combined_df(fpth, var):
    """
    From a given file path, and variable, extract data from all .csv files, and
    place in a single dataframe object.
    """
    flist = glob.glob(fpth)
    df_dic = {}
    for f in flist:
        df_dic[f[5:]] = get_df(fnm = f, var = var, no_missing=True)
    return pd.concat([df_dic[key] for key in df_dic.keys()],axis=1)

Call the Get_combined() function to create dataframes out of all data in a folder.


In [ ]:
%%time
df_tmax = get_combined_df(fpth="Data/zone2/*.csv",var="TMAX")
df_tmin = get_combined_df(fpth="Data/zone2/*.csv",var="TMIN")
df_prcp = get_combined_df(fpth="Data/zone2/*.csv",var="PRCP")

In [ ]:
df_prcp.tail(3)

Reading the RCA4 - CORDEX data downloaded ICPAC-Kenya data repository

Data obtained from http://197.254.113.174:8081/repository


In [ ]:
# RCA4_ERAINT
era = netCDF4.Dataset("CORDEX/ECMWF-ERAINT/evaluation/pr30_43_4_13_mean.nc") #zone 1 model data
era2 = netCDF4.Dataset("CORDEX/ECMWF-ERAINT/evaluation/pr28_42_-7_4_mean.nc") #Zone 2 model data
eraint = era2['pr'][:,0,0]

time = era2["time"]
times = netCDF4.num2date(era2['time'][:],units = era2['time'].units)
times_index = pd.to_datetime(times)
#print(times_index)

In [ ]:
#creating a dataframes for RCA4 data
df_era = pd.DataFrame(data=eraint,index=pd.date_range(start='1980-01-01',end='2010-12-31'),columns=['ERAINT'])

RX1day: maximum 1-d Precipitation : Highest precipitation amount in 1-d period


In [ ]:
for station in df_prcp:
    print(station, np.max(df_prcp[station]))

Plot time series of precipitation for all stations, and also accumulate the data and plot the average rainfall.

2. Time series plots

Daily Mean and SEM values: Mean uncertainty is given by SEM, where: $SEM = \frac{\sigma}{\sqrt{n-1}}$


In [ ]:
def calc_SEM(data):
    """
    Calculate Standard error of the mean. No nan's 
    should be in the input (numpy) array.
    """
    return np.std(data)/np.sqrt(len(data) - 1)


def gather_daily_stats(date, df):
    """
    For a specified day, given by date, create a short array of 
    observed values (obs) excluding the NANs. Return the mean, 
    and SEM value.
    Restrictions: more than one observation on a day, not a missing
    value, less than 200 mm per day (which is erroneous).
    """
    obs = np.array([df_prcp[key][day] for key in df_prcp.keys()])
    obs = obs[(obs > -1) & (obs < 200)]
    
    if len(obs) < 2:
        return np.NAN, np.NAN
    return np.mean(obs), calc_SEM(obs)

In [ ]:
# Create an accumulated time series (with SEM uncertainty values)
means = []
sems = []
for day in df_prcp.index:
    tmp_mean, tmp_sem = gather_daily_stats(date=day, df=df_prcp)#['1961-01-01':'1990-12-31'])
    means.append(tmp_mean)
    sems.append(tmp_sem)
means = np.array(means)
sems = np.array(sems)
df_prcp['Accumulated']=pd.Series(means,index=df_prcp.index)  #adding columns to the dataframe!
df_prcp['Acc_SEM']=pd.Series(sems,index=df_prcp.index)

In [ ]:
print(np.max(df_prcp['Accumulated']))

In [ ]:
# SEM uncertainty values for RCA4_ERA
sems_era = []
for day in df_era.index:
    tmp_sem = np.std(eraint)/np.sqrt(len(eraint) - 1)
    sems_era.append(tmp_sem)
sems_era = np.array(sems_era)
print(sems_era)

In [ ]:
ts= plt.figure(dpi=300)
ts.set_size_inches(15,5)      # Specify the figure size
ax1 = ts.add_subplot(111)     # Add an axis frame object to the plot (i.e. a pannel)

#ax1.plot(df_prcp.index, df_prcp.Accumulated,'.g',ms=2.0)

ax1.errorbar(df_era.index, eraint,
             yerr=sems_era, c='b', alpha=1.,  fmt='-')
ax1.set_ylim(0,20)
plt.title("RCA4 ERAINT (Zone 1) Mean Daily Precipitation", fontsize=15)
plt.ylabel("Precipitation (mm day$^{-1}$)", fontsize=15)
plt.xlabel("Years", fontsize=15)
plt.grid(True)
#ts.savefig('Zone1_ERAts.pdf',dpi=300)

SDII Simple pricipitation intensity index = daily precipitation amount on wet days, w (RR ≥ 1mm) in period


In [ ]:
daily_ts = plt.figure(dpi=300)
daily_ts.set_size_inches(15,5)      # Specify the figure size
ax1 = daily_ts.add_subplot(111)     # Add an axis frame object to the plot (i.e. a pannel)

ax1.errorbar(df_prcp.index, df_prcp.Accumulated,
             yerr=df_prcp.Acc_SEM, c='b', alpha=1.,lw=1.5, fmt='-')
ax1.errorbar(df_era.index, eraint,
             yerr=sems_era, c='r', alpha=1., lw=1.5, fmt='-')
leg1=ax1.legend(['Station data','RCA4_ERA'],prop={'size':10},numpoints=1,markerscale=5.,frameon=True,fancybox=True)
ax1.set_ylim(0,150)
plt.xlim('1953-01-01','2015-12-31')
plt.title(" East Africa's Zone 2 Daily Average Precipitation", fontsize=15)
plt.ylabel("Precipitation (mm day$^{-1}$)", fontsize=15)
plt.xlabel("Years", fontsize=15)
plt.grid(True)
plt.show(daily_ts)
#daily_ts.savefig('Zone2_ts+ERA.pdf',dpi=300)

3. Density plots


In [ ]:
mask = df_prcp.Accumulated > 0.0

In [ ]:
# N.b. the KDE (kernel density estimate) is Gaussian - which is not true
# for precip data (log or power law data)...
daily_dp = plt.figure()
daily_dp.set_size_inches(12,5)
ax = daily_dp.add_subplot(111)

sns.distplot(df_prcp.Accumulated[mask],bins=100,norm_hist = True,kde=False,color = 'r')
sns.kdeplot(df_prcp.Accumulated[mask],shade=True,kernel='cos',cumulative=False,color='b')
leg1=ax.legend(['KDE','Accumulated mean'],prop={'size':11},
                numpoints=1,markerscale=5.,frameon=True,fancybox=True)

ax.set_xlim(0,30)
ax.set_title("East Africa Mean(Zone1) Precipitation")
ax.set_xlabel(r'Precip. (mm day$^{-1}$)')
ax.set_ylabel('Density (0-1)')

plt.show(daily_dp)
#daily_dp.savefig('Zone1_Densityplot.pdf',dpi=300)

In [ ]:
sns.kdeplot

In [ ]:
#Histogram
#source code from https://github.com/benlaken/Tanzania/blob/master/Precipitation_Tanzania.ipynb
hist_dp = plt.figure()
hist_dp.set_size_inches(5,5)          # Specify the output size
ax1 = hist_dp.add_subplot(211)        # Add an axis frame object to the plot (i.e. a pannel)
ax2 = hist_dp.add_subplot(212)        # Add an axis frame object to the plot (i.e. a pannel)

# the histogram of the data
ax1.set_title(r'PDFs of Daily Precipitation for Zone 2', fontsize=13)
n, bins, patches = ax1.hist(df_prcp.Accumulated[mask], 100, normed=True, facecolor='blue', alpha=0.75,
                            histtype='stepfilled')
n, bins, patches = ax1.hist(eraint, 100, normed=True, facecolor='red', alpha=0.75,
                            histtype='stepfilled')
ax1.grid(True)
ax1.set_ylabel('Density', fontsize =15)
n, bins, patches = ax2.hist(df_prcp.Accumulated[mask], 100, normed=True, facecolor='blue', alpha=0.75,
                            histtype='stepfilled',cumulative=True)
n, bins, patches = ax2.hist(eraint, 100, normed=True, facecolor='red', alpha=0.75,
                            histtype='stepfilled',cumulative=True)
leg1=ax1.legend(['Station data','RCA4_ERA'],prop={'size':10},numpoints=1,markerscale=5.,frameon=True,fancybox=True)
leg1=ax2.legend(['Station data','RCA4_ERA'],prop={'size':10}, numpoints=1,markerscale=5.,frameon=True,fancybox=True)
ax1.set_ylim(0,0.5)
plt.xlabel(r'mm day$^{-1}$', fontsize=13)
plt.ylabel('Cumulative density', fontsize =13)
plt.grid(True)

plt.show()
#hist_dp.savefig('Zone2_Density_plots.pdf',dpi=300)

In [ ]:
# define extreme quantiles
percentileZero    = min(df_prcp.Accumulated[mask])
percentileHundred = max(df_prcp.Accumulated[mask])

print('Min. precip', percentileZero)
print('Max. precip', percentileHundred)
print("Median", np.percentile(df_prcp.Accumulated[mask],50))

In [ ]:
srtd = sorted(df_prcp.Accumulated[mask])
percent = [val/len(srtd) * 100. for val in range(len(srtd))]
plt.plot(percent,srtd)
plt.grid(True)

In [ ]:
print(np.percentile(df_prcp.Accumulated[mask],90))
print(np.percentile(srtd,10))

4. Seasonality

*Calculate the Day of Year (DOY) mean over the data-period (climatology).

In [ ]:
doy_mean=[]
doy_sem =[]

for doy in range(366):
    index = df_prcp['1961-01-01':'1990-12-31'].index.dayofyear == doy+1 
    #print(index)
    doy_mean.append(np.nanmean(df_prcp['Accumulated']['1961-01-01':'1990-12-31'][index]))
    doy_sem.append(calc_SEM(df_prcp['Accumulated']['1961-01-01':'1990-12-31'][index]))

doy_mean = np.array(doy_mean)
doy_sem = np.array(doy_sem)

In [ ]:
doy_eramean=[]
doy_erasem =[]

for doy in range(366):
    index = times_index.dayofyear == doy+1 
    #print(index)
    doy_eramean.append(np.nanmean(eraint[index]))
    doy_erasem.append(calc_SEM(eraint[index]))

doy_eramean = np.array(doy_eramean)
doy_erasem = np.array(doy_erasem)

In [ ]:
ssn_rmean = pd.rolling_mean(doy_mean, window=30, min_periods=0, center = True)
ssn_eramean = pd.rolling_mean(doy_eramean, window=30, min_periods=0, center = True)
#ssn_rmean[-30:] = np.nan

In [ ]:
#Plot the seasonal climatology East Africa precip data
mnths= ['Jan','Feb','Mar','Apr','May','June','Jul','Aug','Sep','Oct','Nov','Dec']
#mrange = arange(12)

my_sclim = plt.figure(dpi=300)
my_sclim.set_size_inches(10,5)        # Specify the output size
ax1 = my_sclim.add_subplot(111)        # Add an axis frame object to the plot (i.e. a pannel)

ax1.errorbar(range(366),doy_mean,xerr=None, yerr=doy_sem, c='b', alpha=1., lw=1.5)
ax1.errorbar(range(366),doy_eramean,xerr=None, yerr=doy_erasem, c='r', alpha=1.,lw=1.5)
#ax1.plot(range(366), ssn_rmean,'b-', alpha=1.0)
#ax1.plot(range(366), ssn_eramean,'r-', alpha=1.0)

leg1=ax1.legend(['Station data','RCA4_ERA'],prop={'size':12},numpoints=1,markerscale=5.,frameon=True,fancybox=True)
plt.xlim(0,max(range(366)))
plt.title("Climatology Precipitation for Zone 2 by Day (& 30day smooth)", fontsize=15)
plt.ylabel("Precipitation (mm day$^{-1}$)", fontsize=15)
plt.xlabel("Day of Year (DOY)", fontsize=15)
plt.grid(True)  
#my_sclim.savefig('Zone2_SeasonalClimatology_+ERA_plot.pdf',dpi=300)

Anomaly

  • The seasonal DOY mean was used to calculate deviations (anomaly) from the daily mean

In [ ]:
# wordy example of how to access/calculate anomaly
for daily_rain in zip(df_prcp.index[5000:5003],df_prcp.Accumulated[5000:5003]):
    print('Day {0}, rainfall {1:3.2f}mm'.format(daily_rain[0].date(),daily_rain[1]))
    print('DOY is',daily_rain[0].dayofyear)
    print("DOY climo value is {0:3.2f}".format(doy_mean[daily_rain[0].dayofyear -1]))
    print("Daily anomaly is {0:3.2f}".format(daily_rain[1] - doy_mean[daily_rain[0].dayofyear -1]))
    print(np.isnan(daily_rain[1]))
    print("")

In [ ]:
#---Create a seasonal deviation from climatology--
#Anomalies = Observation - Climatology
prcp_anom = []
for daily_rain in zip(df_prcp.index,df_prcp.Accumulated):
    if np.isnan(daily_rain[1]):
        prcp_anom.append(np.NAN)
    else:
        prcp_anom.append(daily_rain[1] - doy_mean[daily_rain[0].dayofyear -1])
prcp_anom = np.array(prcp_anom)

In [ ]:
#---Create a seasonal deviation from climatology for RCA4
era_anom = []
for daily_rain in zip(df_era.index,df_era.ERAINT):
    if np.isnan(daily_rain[1]):
        era_anom.append(np.NAN)
    else:
        era_anom.append(daily_rain[1] - doy_mean[daily_rain[0].dayofyear -1])
era_anom = np.array(era_anom)

In [ ]:
df_prcp['Acc_anomaly'] = prcp_anom  #adding columns to the dataframe!
df_era['ERA_anomaly'] = era_anom

In [ ]:
# ---plot the anomalized rainfall data with errors---
my_anom = plt.figure(dpi=300)
my_anom.set_size_inches(10,5)        # Specify the output size
ax1 = my_anom.add_subplot(111)        # Add an axis frame object to the plot (i.e. a pannel)

ax1.errorbar(df_prcp['Acc_anomaly'].index ,df_prcp['Acc_anomaly'],yerr=df_prcp['Acc_SEM'],
             color='b', fmt='.',xerr=None,alpha=0.5)
             
ax1.errorbar(times_index,era_anom,yerr=sems_era,
             color='r', fmt='.',xerr=None,alpha=0.5)
ax1.set_ylim(-20,150)
#plt.xlim('1953-01-01','1990-12-31')
ax1.set_title(r'East Africa (Zone 1) Deseasonalized  Daily Precipitation Climatology ($\delta$Precip.)')
ax1.set_ylabel(r'mm day$^{-1}$')
ax1.set_xlabel('Years')
ax1.grid(True)
plt.show(my_anom)
#my_anom.savefig('EA Zone1 Deseasonalized Precip.pdf',dpi=300)

In [ ]:
#doy_values = [doy.dayofyear - 1 for doy in df_prcp.index]
figx = plt.figure(dpi=72)
figx.set_size_inches(10,5)      # Specify the figure size
ax1 = figx.add_subplot(111)   

#---Plot the seasonal climatology East Africa precip data---
ax1.errorbar(range(366),doy_mean,xerr=None, yerr=doy_sem, color='b', alpha=0.8 )
ax1.errorbar(range(366),doy_eramean,xerr=None, yerr=doy_erasem, color='k', alpha=0.8 )
ax1.plot(df_prcp.index.dayofyear -1 ,df_prcp['Accumulated'],'.',ms=2.5,alpha=1.0,color='r')
plt.xlim(0,max(range(366)))
plt.title("")
plt.ylabel("Precipitation (mm)")
plt.xlabel("Day of Year (DOY)")
plt.grid(True)

5. Extreme Precip Events

Extreme events have been defined by absolute threshhold set by SWFDP-RSMC-Nairobi (Rnn mm = Count of days where RR ≥ user-defined threshold in mm)


In [ ]:
#A mask for the df_prcp to identify categories of Extreme rainfall events
"""
The thresholds used in here are based on the definitions as used
by SWFDP-EA. It should be noted that this hold under natural conditions
No risk  - <5mm
Low risk - 5mm-20mm
Medium risk 20 - 50mm
High risk >=50mm
"""
cond1 = df_prcp.Accumulated < 5
cond2 = df_prcp.Accumulated > 5
cond3 = df_prcp.Accumulated < 20
highmed_risk = df_prcp.Accumulated[df_prcp.Accumulated > 20] #Medium to high risk
high_risk = df_prcp.Accumulated[df_prcp.Accumulated > 50]
medium_risk = df_prcp.Accumulated[(df_prcp.Accumulated > 20) & (df_prcp.Accumulated < 50)]
low_risk = df_prcp.Accumulated[cond2 & cond3]
no_risk = df_prcp.Accumulated[cond1]

In [ ]:
#A mask for the df_era to identify if there are similar Extreme rainfall events in RCA4 data
nrisk= df_era.ERAINT[df_era.ERAINT < 5]
yrisk = df_era.ERAINT[(df_era.ERAINT>5) & (df_era.ERAINT<20) ]
print(len(yrisk))

In [ ]:
daily_floodrisk = plt.figure(dpi=72)
daily_floodrisk.set_size_inches(12,7)      # Specify the figure size
ax1 = daily_floodrisk.add_subplot(111)     #

ax1.plot(high_risk.index, high_risk,'ro',alpha=1.,linewidth = 3, ms=3)
ax1.plot(medium_risk.index, medium_risk,'bo',alpha=0.9,ms=3)
ax1.plot(low_risk.index, low_risk,'co',alpha=0.9,ms=3)
ax1.plot(no_risk.index, no_risk,'go',alpha=0.9,ms=3)
leg1=ax1.legend(['high risk','medium risk','low risk','no risk'],
                prop={'size':12},numpoints=1,markerscale=5.,frameon=True,fancybox=True)
#plt.xlim('1953-01-01','1990-12-31')
plt.title(r" Zone 2 Station data Extreme Precipitation Events based on Absolute Threshhold", fontsize=15)
plt.ylabel(r"Amounts (mm day$^{-1}$)", fontsize=15)
plt.xlabel("Years", fontsize=15)
plt.grid(True)
plt.show(daily_floodrisk)
#daily_floodrisk.savefig('Zone1_ExtremeEvent.pdf',dpi=300)

Summary statistics

Frequency of extreme events based on absolute threshhold set by SWFDP-RSMC-Nairobi

In [ ]:
#low_risk.groupby( [low_risk.index.year, low_risk.index.month, low_risk.index.day] ).count()
lwrisk_freq = low_risk.groupby(low_risk.index.year).count()/365 #To get normalized freq per annum
mdrisk_freq = medium_risk.groupby(medium_risk.index.year).count()/365
mdhgrisk_freq = highmed_risk.groupby(highmed_risk.index.year).count()/365
hgrisk_freq = high_risk.groupby( high_risk.index.year ).count()
#print(hgrisk_freq)
#ERA
yrisk_freq = yrisk.groupby(yrisk.index.year).count()/365

In [ ]:
frq_rmean_low = pd.rolling_mean(lwrisk_freq, window=10, min_periods=0, center = True)
frq_rmean_med = pd.rolling_mean(mdrisk_freq, window=10, min_periods=0, center = True)
frq_rmean_medhig = pd.rolling_mean(mdhgrisk_freq, window=10, min_periods=0, center = True)
frq_rmean_hig = pd.rolling_mean(hgrisk_freq, window=10, min_periods=0, center = True)

#ERA
frq_rmean_yrisk = pd.rolling_mean(yrisk_freq, window=10, min_periods=0, center = True)

In [ ]:
#Frequency plot 
freq = plt.figure(dpi=300)
freq.set_size_inches(15,5)        # Specify the output size
ax1 = freq.add_subplot(121)
ax2 = freq.add_subplot(122)

ax1.plot(lwrisk_freq.index, lwrisk_freq ,'b-',ms=10.0,alpha=1., lw=1.5)
ax1.plot(yrisk_freq.index, yrisk_freq ,'g-',ms=3.0,alpha=1., lw=1.5)
ax1.plot(lwrisk_freq.index, frq_rmean_low,'r--', linewidth=2, alpha=1.)
ax1.plot(yrisk_freq.index, frq_rmean_yrisk,'r--', linewidth=2, alpha=1.)
ax1.set_title('Zone 2 Frequency of Low risk extreme precip \n based on Absolute Threshhold')
ax1.set_ylabel(r' Normalized counts of low risk events', fontsize = 12)
ax1.set_xlabel(r'Years', fontsize = 12)
#ax1.set_xlim(1953, 1990)
leg1=ax1.legend(['Station data','RCA4_ERA'],prop={'size':11},numpoints=1,markerscale=5.,frameon=True,fancybox=True)

ax1.grid(True)

ax2.plot(mdhgrisk_freq.index, mdhgrisk_freq ,'b-',ms=5.0,alpha=1.,lw=1.5)
ax2.plot(mdhgrisk_freq.index, frq_rmean_medhig,'r--', linewidth=2, alpha=1.,lw=1.5)
ax2.set_title('Zone 2 Frequency of Medium-High risk extreme precip \n based on Absolute Threshhold')
ax2.set_xlabel(r"Years", fontsize = 12)
#ax2.set_xlim(1953, 1990)
ax2.set_ylabel('Normalized counts of medium risk events', fontsize = 12)
leg1=ax2.legend(['Station data'],prop={'size':11},numpoints=1,markerscale=5.,frameon=True,fancybox=True)

ax2.grid(True)

#freq.savefig('Zone2_freq_plot+ERA.pdf',dpi=300)

Duration of extreme events

  • Here I calculate the time between heavy precipitation (risk) from the observed time directly.

In [ ]:
low_risk.index[2], low_risk.index[1]

In [ ]:
test = low_risk.index[2] - low_risk.index[1]
print("diffrence in days between first and second lowrisk:",test.days)

In [ ]:
lowrisk_times = 1
for n, date in enumerate(low_risk.index[lowrisk_times - 1:3]):
    print(date.date(), (date - low_risk.index[n -1]).days)

In [ ]:
#Duration
lowrisk_dur = []
mediumrisk_dur = []
risk_time = 1
for n, date in enumerate(low_risk.index[risk_time - 1:]):
    lowrisk_d = (date - low_risk.index[n -1]).days
    
    lowrisk_dur.append(lowrisk_d)
    
lowrisk_dur=np.array(lowrisk_dur)    
for n, date in enumerate(highmed_risk.index[risk_time - 1:]):    
    mediumrisk_d = (date - highmed_risk.index[n -1]).days
    
    mediumrisk_dur.append(mediumrisk_d)
    
mediumrisk_dur=np.array(mediumrisk_dur)

In [ ]:
#Duration plot 
freq = plt.figure(dpi=72)
freq.set_size_inches(20,5)        # Specify the output size
ax1 = freq.add_subplot(121)
ax2 = freq.add_subplot(122)

ax1.plot(low_risk.index, lowrisk_dur ,'b-',ms=2.0,alpha=1.)
ax1.set_title('Zone 2 Duration of consecutive Low risk precip \n events based on absolute threshhold', fontsize=15)
ax1.set_ylabel(r'Days between low risk events',fontsize = 14)
ax1.set_xlabel(r'Years',fontsize = 14)
#ax1.set_xlim('1953-01-03','1990-12-31')
ax1.set_ylim(0,200)
ax1.grid(True)

ax2.plot(highmed_risk.index, mediumrisk_dur,'b-',ms=3.0,alpha=1.)
ax2.set_title(' Zone 2 Duration of consecutive Medium-High risk precip \n based on absolute threshhold',fontsize=15)
ax2.set_ylabel('Days between Medium-High risk events', fontsize = 14)
ax2.set_ylim(0,1500)
ax2.set_xlabel(r"Years", fontsize = 14)
#ax2.set_xlim('1953-01-03','1990-12-31')
ax2.grid(True)
#freq.savefig('Zone2_Duration_Swfdp1.pdf',dpi=300)

Extreme events based on statistical values of daily anomalies and percentiles


In [ ]:
flood_threshold = np.percentile(df_prcp['Acc_anomaly'][mask],90)
drought_threshold = np.percentile(df_prcp['Acc_anomaly'][mask],10)

print('90th percentile = ',flood_threshold)
print('10th percentile = ',drought_threshold)
print('50th percentile = ',np.percentile(df_prcp['Acc_anomaly'][mask],50))
print('99th percentile = ',np.percentile(df_prcp['Acc_anomaly'][mask],99))
#sns.distplot(df_prcp['Acc_anomaly'][mask])

In [ ]:
flood_era = np.percentile(df_era['ERA_anomaly'],90)
drought_era = np.percentile(df_era['ERA_anomaly'],10)

print('90th percentile = ',flood_era)
print('10th percentile = ',drought_era)
print('50th percentile = ',np.percentile(df_era['ERA_anomaly'],50))
print('99th percentile = ',np.percentile(df_era['ERA_anomaly'],99))
#sns.distplot(df_prcp['Acc_anomaly'][mask])

In [ ]:
my_dist = plt.figure()
my_dist.set_size_inches(10,5)               # Specify the output size
ax1 = my_dist.add_subplot(111)              # Add an axis frame object to the plot (i.e. a pannel)

#Univeriate distribution of Observed daily precipitation.
sns.distplot(df_prcp['Acc_anomaly'][mask],bins=100, norm_hist=True, kde=False) # Filled bars  
sns.kdeplot(df_prcp['Acc_anomaly'][mask],shade=False,kernel='gau',cumulative=False,color='k',lw=3)
sns.distplot(era_anom,bins=100, norm_hist=True, kde=False) # Filled bars  
sns.kdeplot(era_anom,shade=False,kernel='gau',cumulative=False,color='r',lw=2)
ax1.vlines(np.percentile(df_prcp['Acc_anomaly'][mask],90), 0.00, 0.40, colors='b',linestyle='--',lw=2.0)
ax1.vlines(np.percentile(df_prcp['Acc_anomaly'][mask],50), 0.00, 0.40, colors='b',lw=2.0) #Marker line of Median
ax1.vlines(np.percentile(df_prcp['Acc_anomaly'][mask],10), 0.00, 0.40, colors='b',linestyle='-.',lw=3.0)
leg1=ax1.legend(['Station data KDE','90th percentile','50th percentile','10th percentile','observed anomalies'],
                prop={'size':11},numpoints=1,markerscale=5.,frameon=True,fancybox=True)
ax1.vlines(np.percentile(df_era['ERA_anomaly'],90), 0.00, 0.40, colors='y',linestyle='-',lw=1.5)
ax1.vlines(np.percentile(df_era['ERA_anomaly'],50), 0.00, 0.40, colors='y',lw=1.5) #Marker line of Median
ax1.vlines(np.percentile(df_era['ERA_anomaly'],10), 0.00, 0.40, colors='y',linestyle='-',lw=1.5)
ax1.set_title(r'Zone 1 Normalized Extreme Precipitation Percentile plot', fontsize=15)
ax1.set_ylabel(r' Probability Density',fontsize=15)
ax1.set_xlabel(r'Anomalies(mm)',fontsize=15)
ax1.set_xlim(-10, 20)
ax1.set_ylim(0.00, 0.40)
ax1.grid(True)
my_dist.text(0.71, 0.64, " __   RCA4_ERA KDE" ,fontsize=10,color='r')
plt.show(my_dist)

#my_dist.savefig("Zone1_Normalized_Percentile.pdf",dpi=300)#transparent=True)

In [ ]:
# Make a mask for the df_prcp to identify the extreme dates (flood and drought)
extremes = ((df_prcp['Acc_anomaly'] > flood_threshold) | (df_prcp['Acc_anomaly'] < drought_threshold))
flood = (df_prcp['Acc_anomaly'] > flood_threshold)
drought = (df_prcp['Acc_anomaly'] < drought_threshold)
wet_xtrm = (df_era['ERA_anomaly']> flood_era)
dry_xtrm = (df_era['ERA_anomaly'] < drought_era)

In [ ]:
fig_threshold = plt.figure(dpi=72)
fig_threshold.set_size_inches(8,5)      # Specify the figure size
ax1 = fig_threshold.add_subplot(111)   
ax1.scatter(df_prcp['Acc_anomaly'][mask].index, df_prcp['Acc_anomaly'][mask],
            alpha=0.1, marker='.')
ax1.scatter(df_prcp['Acc_anomaly'][extremes].index, df_prcp['Acc_anomaly'][extremes],
            alpha=0.8, marker='.', color='r')
plt.title("Extreme rainfall based on statistical values of daily anomalies and percentiles")
plt.ylabel("Precipitation anomaly (mm)")
plt.xlabel("Year")
plt.xlim('1953-01-01','1990-12-31')
#plt.ylim(-10,70)
ax1.grid(True)
#fig_threshold.savefig('Zone1 Extreme_Threshhold_plot.pdf',dpi=300)

Summary statistics

Intensity, Duration and Frequency (IDF) of extreme events based on defined statistical extreme threshold


In [ ]:
#groupby can be used to querey the dataset 
#OR can write hacks like this, to pull out data based on the index
#---Splitting the data into groups based on extreme threshhold
for year in range(min(df_prcp.index.year),max(df_prcp.index.year)):
    wet_extreme = df_prcp["Acc_anomaly"][flood][df_prcp["Acc_anomaly"][flood].index.year == year]
    dry_extreme = df_prcp["Acc_anomaly"][drought][df_prcp["Acc_anomaly"][drought].index.year == year]
    
    print(year,len(wet_extreme), year,len(dry_extreme))
    break

In [ ]:
#For station dataset
flood_freq = []
drought_freq = []
yr_day_count = []
years = []
flood_mean=[]
flood_sem =[]
drought_mean=[]
drought_sem =[]

for year in range(min(df_prcp.index.year),max(df_prcp.index.year)):
    tmp_yr_data = df_prcp["Acc_anomaly"][df_prcp.index.year == year]  # pool data for each year
    #print(tmp_yr_data)
    yr_day_count.append(tmp_yr_data.count())
    years.append(year)
    if tmp_yr_data.count() > 1:
        flood_freq.append(len(tmp_yr_data[flood]))
        #print(tmp_yr_data[flood])
        drought_freq.append(len(tmp_yr_data[drought]))
        flood_mean.append(np.nanmean(tmp_yr_data[flood]))
        flood_sem.append(calc_SEM(tmp_yr_data[flood]))
        drought_mean.append(np.nanmean(tmp_yr_data[drought]))
        drought_sem.append(calc_SEM(tmp_yr_data[drought]))
    else:
        flood_freq.append(np.NAN)
        drought_freq.append(np.NAN)
     
        flood_mean.append(np.NAN)
        flood_sem.append(np.NAN)
        drought_mean.append(np.NAN)
        drought_sem.append(np.NAN)
        
    
flood_freq = np.array(flood_freq)
drought_freq = np.array(drought_freq)
yr_day_count = np.array(yr_day_count)
years = np.array(years)
flood_mean = np.array(flood_mean)
flood_sem = np.array(flood_sem)
drought_mean = np.array(drought_mean)
drought_sem = np.array(drought_sem)

In [ ]:
flood_erafreq = []
drought_erafreq = []
yr_day_eracount = []
years_era = []
flood_eramean=[]
flood_erasem =[]
drought_eramean=[]
drought_erasem =[]

for year in range(min(df_era.index.year),max(df_era.index.year)):
    tmp_yr_dat = df_era['ERA_anomaly'][df_era.index.year == year]  # pool data for each year
    #print(tmp_yr_dat)
    yr_day_eracount.append(tmp_yr_dat.count())
    years_era.append(year)
    #print(years_era)
    if tmp_yr_dat.count() > 1:
        flood_erafreq.append(len(tmp_yr_dat[wet_xtrm]))
        #print(tmp_yr_dat[wet_xtrm])
        drought_erafreq.append(len(tmp_yr_dat[dry_xtrm]))
        flood_eramean.append(np.nanmean(tmp_yr_dat[wet_xtrm]))
        flood_erasem.append(calc_SEM(tmp_yr_dat[wet_xtrm]))
        drought_eramean.append(np.nanmean(tmp_yr_dat[dry_xtrm]))
        drought_erasem.append(calc_SEM(tmp_yr_dat[dry_xtrm]))
    else:
        flood_erafreq.append(np.NAN)
        drought_erafreq.append(np.NAN)
     
        flood_eramean.append(np.NAN)
        flood_erasem.append(np.NAN)
        drought_eramean.append(np.NAN)
        drought_erasem.append(np.NAN)
        
flood_erafreq = np.array(flood_erafreq)
drought_erafreq = np.array(drought_erafreq)
yr_day_eracount = np.array(yr_day_eracount)
years_era = np.array(years_era)
flood_eramean = np.array(flood_eramean)
flood_erasem = np.array(flood_erasem)
drought_eramean = np.array(drought_eramean)
drought_erasem = np.array(drought_erasem)

In [ ]:
drought_eramean

In [ ]:
#running_test = pd.rolling_mean(synthetic["vals"], window=10, min_periods=3, center = True) 
int_rmean = pd.rolling_mean(flood_mean, window=10, min_periods=0, center = True)
int_drmean = pd.rolling_mean(drought_mean, window=10, min_periods=0, center = True)
#ERA
era_rmean = pd.rolling_mean(flood_eramean, window=10, min_periods=0, center = True)
era_drmean = pd.rolling_mean(drought_eramean, window=10, min_periods=0, center = True)
Intensity of extreme rainfall events based on defined statistical extreme threshold

In [ ]:
#Intensity
my_int = plt.figure(dpi=72)
my_int.set_size_inches(15,5)        # Specify the output size
ax1 = my_int.add_subplot(121)
ax2 = my_int.add_subplot(122)

ax1.errorbar(years,flood_mean,  #Masking missing values
             xerr=None, yerr=flood_sem,color='b', fmt='.', alpha=1.)
ax1.errorbar(years_era, flood_eramean,xerr=None, yerr=flood_erasem,color='k', fmt='.', alpha=1.)

ax1.plot(years, int_rmean,'g--', lw=2)
ax1.plot(years_era, era_rmean,'k--', lw=2)
ax1.set_title('Zone 2 Intensity of R90p events based on \n statistical values of daily anomalies and percentiles', fontsize=14)
ax1.set_ylabel(r'Intensity (mm day$^{-1}$)')
ax1.set_xlabel(r'Year')
leg1=ax1.legend(['Station data','RCA4_ERA'],prop={'size':11},numpoints=1,markerscale=5.,frameon=True,fancybox=True)
#ax1.set_ylim(0,12)
ax1.grid(True)

ax2.errorbar(years,drought_mean,
             xerr=None, yerr=drought_sem, color='r', fmt='.', alpha=1.)
ax2.errorbar(years_era, drought_eramean,xerr=None, yerr=drought_erasem,color='k', fmt='.', alpha=1.)
ax2.plot(years, int_drmean,'g--',alpha=1., lw=2)
ax2.plot(years_era, era_drmean,'k--')
ax2.set_title(' Zone 2 Intensity of R10p events based on \n statistical values of daily anomalies and percentiles ', fontsize =14)
ax2.set_xlabel(r"Years")
ax2.set_ylabel('Intensity(mm day$^{-1}$)')
leg1=ax2.legend(['Station data','RCA4_ERA'],prop={'size':11},numpoints=1,markerscale=5.,frameon=True,fancybox=True)
ax2.grid(True)
#my_int.savefig('Zone2_Percentile_intensity+ERA1_plot.pdf',dpi=300)

In [ ]:
##calc the trendline (linear fitting)
trend = np.polyfit(flood_freq[yr_day_count > 350], years[yr_day_count > 350], len(flood_freq[yr_day_count > 350]))

In [ ]:
#running_test = pd.rolling_mean(synthetic["vals"], window=10, min_periods=3, center = True) 
fre_rmean = pd.rolling_mean(flood_freq, window=10, min_periods=0, center = True)
fre_drmean = pd.rolling_mean(drought_freq, window=10, min_periods=0, center = True)
#ERA
erafreq_rmean = pd.rolling_mean(flood_erafreq, window=10, min_periods=0, center = True)
erafreq_drmean = pd.rolling_mean(drought_erafreq, window=10, min_periods=0, center = True)

In [ ]:
#pd.timedelta_range(start=None, end=None, periods=None, freq='D', name=None, closed=None)
Frequency of extreme rainfall events based on defined statistical extreme threshold

In [ ]:
#Frequency 
my_ = plt.figure(dpi=72)
my_.set_size_inches(15,5)        # Specify the output size
ax = my_.add_subplot(121)        # Add an axis frame object to the plot (i.e. a pannel)
ax1 = my_.add_subplot(122) 


ax.plot(years,flood_freq, 'b-', alpha=0.8)
ax.plot(years[yr_day_count > 350],drought_freq[yr_day_count > 350], 'r-',alpha=0.8)
leg=ax.legend(['Floods','Drought',],prop={'size':10},numpoints=1,markerscale=1.,
                frameon=True,fancybox=True)

ax.set_ylim(0,100)
ax.set_title('Integer count of Extreme precip events(Frequency) \n based on threshold value detection (Zone2)')
ax.set_ylabel(r'Number of Extreme events (counts)')
ax.set_xlabel('Years')
ax.grid(True)


ax1.plot(years[yr_day_count > 350], flood_freq[yr_day_count > 350]/ #Normalized frequency
        yr_day_count[yr_day_count > 350], 'b-', alpha=1.)
ax1.plot(years[yr_day_count > 350],drought_freq[yr_day_count > 350]/
        yr_day_count[yr_day_count > 350],'r-', alpha=1.)
leg=ax1.legend(['Floods','Drought',],prop={'size':10},numpoints=1,markerscale=1.,
                frameon=True,fancybox=True)

ax1.set_title('Extreme precip events Frequency \n based on threshold value detection(Zone2)')
ax1.set_ylabel(r'Normalized count of Extreme events')
ax1.set_xlabel('Years')
ax1.grid(True)
plt.show(my_)
#my_.savefig('Zone2_Percentile_Frequency_plot.pdf',dpi=300)

In [ ]:
#Frequency 
my_ = plt.figure(dpi=300)
my_.set_size_inches(10,5)        # Specify the output size
ax1 = my_.add_subplot(111) 


ax1.plot(years[yr_day_count > 350], fre_rmean[yr_day_count > 350]/yr_day_count[yr_day_count > 350],'b-', lw=2)
ax1.plot(years[yr_day_count > 350], fre_drmean[yr_day_count > 350]/yr_day_count[yr_day_count > 350],'r-', lw=2)
ax1.plot(years_era, erafreq_rmean/365,'g-',  lw=2)
ax1.plot(years_era, erafreq_drmean/365,'k-',  lw=2)

ax1.plot(years[yr_day_count > 350], flood_freq[yr_day_count > 350]/yr_day_count[yr_day_count > 350], 'b.', alpha=1.)
ax1.plot(years[yr_day_count > 350],drought_freq[yr_day_count > 350]/yr_day_count[yr_day_count > 350],'r.', alpha=1.)
ax1.plot(years_era,flood_erafreq/yr_day_eracount, 'g.', alpha=0.8)
ax1.plot(years_era,drought_erafreq/yr_day_eracount, 'k.', alpha=0.8)
leg=ax1.legend(['Station data R90p','Station data R10p','RCA4_ERA R90p','RCA4_ERA R10p'],prop={'size':10},
               numpoints=1,markerscale=1.,frameon=True,fancybox=True)
ax1.set_title('Zone 2 Frequency of Extreme precip Events  \n based on statistical values of daily anomalies and percentiles', fontsize=14)
ax1.set_ylabel(r'Normalized count of Extreme events', fontsize=14)
ax1.set_xlabel(r'Years', fontsize=14)
ax1.grid(True)
plt.show(my_)
#my_.savefig('Zone2_Percentile_ERAFrequency_plot.pdf',dpi=300)
Duration of extreme rainfall events based on defined statistical extreme threshold

Duration

  • Operations on time differences using timedelta in the Pandas / datetime packages.

In [ ]:
test = tmp_yr_data[drought].index[1] - tmp_yr_data[drought].index[0]
print("diffrence in days between first and second flood:",test.days)

In [ ]:
def add_days_since(df, truth_array, name_to_add):
    """
    This function takes a dataframe (df) as input, and a truth array
    related to that dataframe (e.g. df=df_prcp, truth_array=drought)
    and will then go through each date in the dataframe, look if the
    truth value is True, and if it is, it will see how long since the
    last truth value occured, and give an integer value (for day count)
    which is then placed in an array, and appended to the original
    dataframe at the end, with the name_to_add as the column name.
    """
    days_since_list = []
    last_day = df.index[0]  # Initilise the state of the 'last' day
    for day in df.index:
        if truth_array[day] == True:
            days_since = day - last_day
            days_since = int(days_since.days)
            last_day = day  # update the state of last day to current true day
            if days_since > 1000: # Just check the values aren't silly
                days_since = np.NAN
        else:
            days_since = np.NAN
        days_since_list.append(days_since)
    days_since_list = np.array(days_since_list)
    df[name_to_add]=days_since_list #adding column to the df_prcp data frame
    return

In [ ]:
add_days_since(df=df_prcp, truth_array=drought, name_to_add = 'DS_Last_D')#Days  since last drought
add_days_since(df=df_prcp, truth_array=flood, name_to_add = 'DS_Last_F') #Days since last flood
#add_days_since(df=df_prcp, truth_array=low_risk, name_to_add = 'DS_Last_R')

In [ ]:
my_duration = plt.figure()
my_duration.set_size_inches(10,5)        
ax1 = my_duration.add_subplot(111)
#ax2 = my_duration.add_subplot(122) 

ax1.plot(df_prcp.DS_Last_F.index, df_prcp.DS_Last_F,'-b',ms=3.0,alpha=1.0, lw=2)
ax1.plot(df_prcp.DS_Last_D.index, df_prcp.DS_Last_D,'-r',ms=3.0,alpha=1.0,lw=2)

#ax1.bar(df_prcp.DS_Last_F.index, df_prcp.DS_Last_F, width=0.8, color = 'b')
#ax1.bar(df_prcp.DS_Last_D.index, df_prcp.DS_Last_D, width=0.8, color = 'r')
leg=ax1.legend(['DS_Last_R90p event','DS_Last_R10p',],prop={'size':10},numpoints=1,markerscale=5.,
                frameon=True,fancybox=True)

ax1.set_ylabel(r'Duration since last event (Days) ',fontsize=14)
ax1.set_xlabel('Year',fontsize=14)
ax1.set_title(r'Duration between extreme events in Zone 2',fontsize=14)
ax1.set_ylim(0, 250)
#ax1.set_xlim('1953-01-01','1990-12-31')
ax1.grid(True)
plt.show(my_duration)
#my_duration.savefig('Zone2_Percentile_duration_curve.pdf',dpi=300)

CDD & CWD

Consecutive dry days & Consecutive wet days


In [ ]:
cdd_ERA = add_days_since(df=df_era, truth_array=df_era['ERAINT']<1 , name_to_add='RCA4_CDD')
cdd_Obs = add_days_since(df=df_prcp, truth_array=df_prcp['Accumulated']<1 , name_to_add='CDD')
cwd_ERA = add_days_since(df=df_era, truth_array=df_era['ERAINT']>=1 , name_to_add='RCA4_CWD')
cwd_Obs = add_days_since(df=df_prcp, truth_array=df_prcp['Accumulated']>=1 , name_to_add='CWD')

In [ ]:
annera_cdd = df_era.RCA4_CDD.resample('A', how='mean')  
annobs_cdd = df_prcp.CDD.resample('A', how='mean') 
annera_cwd = df_era.RCA4_CWD.resample('A', how='mean') 
annobs_cwd = df_prcp.CWD.resample('A', how='mean')

In [ ]:
cddobs_rav = pd.rolling_mean(annobs_cdd, window=10, min_periods=0, center = True)
cddera_rav = pd.rolling_mean(annera_cdd, window=10, min_periods=0, center = True)
cwdobs_rav = pd.rolling_mean(annobs_cwd, window=10, min_periods=0, center = True)
cwdera_rav = pd.rolling_mean(annera_cwd, window=10, min_periods=0, center = True)

In [ ]:
cdd = plt.figure()
cdd.set_size_inches(15,5)        
ax1 = cdd.add_subplot(121)
ax2 = cdd.add_subplot(122)

ax1.plot(df_prcp.CDD.index, df_prcp.CDD,'b',ms=3.0,alpha=1.0,lw=2)
ax1.plot(df_prcp.CWD.index, df_prcp.CWD,'r',ms=3.0,alpha=1.0,lw=2)

leg=ax1.legend(['CDD','CWD'],prop={'size':10},numpoints=1,markerscale=5.,
                frameon=True,fancybox=True)

ax1.set_ylabel(r'Duration since last event (Days) ')
ax1.set_xlabel('Years')
ax1.set_title(r'CDD & CWD over Zone 1 (Station data)')
ax1.set_ylim(0,200)
#ax1.set_xlim('1953-01-01','1990-12-31')
ax1.grid(True)

ax2.plot(df_era.RCA4_CDD.index, df_era.RCA4_CDD,'b',ms=3.0,alpha=1.0, lw=2)
ax2.plot(df_era.RCA4_CWD.index, df_era.RCA4_CWD,'r',ms=3.0,alpha=1.0, lw=2)
ax2.set_ylabel(r'Duration since last event (Days) ')
ax2.set_xlabel('Years')
ax2.set_title(r'CDD & CWD over Zone 1 (RCA4_ERA)')
ax2.set_ylim(0,200)
ax2.grid(True)


leg=ax2.legend(['CDD','CWD'],prop={'size':10},numpoints=1,markerscale=5.,
                frameon=True,fancybox=True)
plt.show(cdd)
#cdd.savefig('Zone1_cdd.pdf',dpi=300)

In [ ]:
#Changes of annual maximum CWD and CDD
cdd_mean1 = plt.figure()
cdd_mean1.set_size_inches(10,5)        
ax1 = cdd_mean1.add_subplot(111)

ax1.plot(annobs_cdd.index, annobs_cdd,'b',ms=3.0,alpha=1.0,lw=1.5)
ax1.plot(annobs_cwd.index, annobs_cwd,'r',ms=3.0,alpha=1.0,lw=1.5)
ax1.plot(annera_cdd.index, annera_cdd,'g',ms=3.0,alpha=1.0, lw=1.5)
ax1.plot(annera_cwd.index, annera_cwd,'k',ms=3.0,alpha=1.0, lw=1.5)
ax1.plot(cddobs_rav.index, cddobs_rav,'b--',ms=3.0,alpha=1.0,lw=2.5)
ax1.plot(cwdobs_rav.index, cwdobs_rav,'r--',ms=3.0,alpha=1.0,lw=2.5)
ax1.plot(cddera_rav.index, cddera_rav,'g--',ms=3.0,alpha=1.0, lw=2.5)
ax1.plot(cwdera_rav.index, cwdera_rav,'k--',ms=3.0,alpha=1.0, lw=2.5)

leg=ax1.legend(['Station data CDD','Station data CWD','RCA4_ERA CDD','RCA4_ERA CWD'],
               prop={'size':10},numpoints=1,markerscale=5.,frameon=True,fancybox=True)

ax1.set_ylabel(r'Annual Mean Duration since last event (Days)',fontsize=12)
ax1.set_xlabel('Years', fontsize=15)
ax1.set_title(r'CDD & CWD over Zone 2', fontsize=16)
ax1.set_ylim(0,10)
ax1.set_xlim('1954-01-01','2015-12-31')
ax1.grid(True)

#cdd_mean1.savefig('Zone1_cdd_Sum.pdf',dpi=300)
#Duration between last CDD is increasing while that of CWD is declining, meaning more wet days

extreme event based on cumulative statistical values (boxcar approach)


In [ ]:
# Type of boxcar function
"""
Interested in the sum of mean precip for current day plus the next 2 days
If the sum is greater than 100mm, then this could be a signal
for potential medium-high risk for flood, low risk if greater than 50mm but
less than 100. 
"""
#RX3day: maximum 3-d Precipitation : Highest precipitation amount in 3-d period
running_total = pd.rolling_sum(df_prcp["Accumulated"], window=3, min_periods=3, center = True) #inverse=[::-1]
print(np.max(running_total), np.max(df_prcp["Accumulated"]))

In [ ]:
print(running_total[7000:7005])

RX5day: maximum 5-d Precipitation : Highest precipitation amount in 5-d period


In [ ]:
running_total5d = pd.rolling_sum(df_prcp["Accumulated"], window=5, min_periods=5, center = True) #inverse=[::-1]
print(np.max(running_total5d), np.max(df_prcp["Accumulated"]))

In [ ]:
my_total = plt.figure()
my_total.set_size_inches(15,5)        # Specify the output size
ax1 = my_total.add_subplot(121)
ax2 = my_total.add_subplot(122)

ax1.plot(running_total.index, running_total,'.g',ms=3.0,alpha=0.75)
ax1.plot(running_total[running_total > 50].index, running_total[running_total > 50],'.r',ms=3.0,alpha=1.0)
leg=ax1.legend(['No risk','flood risk',],prop={'size':10},numpoints=1,markerscale=5.,
                frameon=True,fancybox=True)
ax1.set_ylabel(r'Precipitation (mm day$^{-1}$)',fontsize =14)
ax1.set_xlabel('Years',fontsize =14)
ax1.set_title(r'RX3day:Station data maximum 3-d Precipitation in East Africa',fontsize =14)
ax1.set_ylim(0, 120)
ax1.set_xlim('1952-01-01','2015-12-31')
ax1.grid(True)


ax2.plot(running_total5d.index, running_total5d,'.g',ms=3.0,alpha=0.75)
ax2.plot(running_total5d[running_total5d > 50].index, running_total5d[running_total5d > 50],'.r',ms=3.0,alpha=1.0)
leg=ax2.legend(['No risk','flood risk',],prop={'size':10},numpoints=1,markerscale=5.,
                frameon=True,fancybox=True)
ax2.set_ylabel(r'Precipitation (mm day$^{-1}$)',fontsize =14)
ax2.set_xlabel('Years', fontsize =14)
ax2.set_title(r'RX5day:Station data Maximum 5-d Precipitation in East Africa',fontsize =14)
ax2.set_ylim(0, 120)
ax2.set_xlim('1952-01-01','2015-12-31')
ax2.grid(True)
plt.show(my_total)
#my_total.savefig('Zone1_total_ts.pdf',dpi=300)

Summary statistics

Frequency of flood risk events based on cumulative statistical values (boxcar approach)

In [ ]:
flood_risk = running_total[running_total > 50]   #Gives days when precipitation 3-d precip totals exceeded the flood
flood_risk5d = running_total5d[running_total5d > 50] #threshold

floodrisk_freq = flood_risk.groupby( flood_risk.index.year).count()/365   #Gives Noramlized frequency    
floodrisk_freq5d = flood_risk5d.groupby( flood_risk5d.index.year).count()/365

In [ ]:
r3mean = pd.rolling_mean(flood_risk, window=10, min_periods=0, center = True)
r5mean = pd.rolling_mean(flood_risk5d, window=10, min_periods=0, center = True)

In [ ]:
#Frequency plot 
flood_freq = plt.figure()
flood_freq.set_size_inches(15,5)        
ax1 = flood_freq.add_subplot(121)
ax2 = flood_freq.add_subplot(122)

ax1.plot(floodrisk_freq.index, floodrisk_freq ,'r.',ms=7.0,alpha=1.)
#ax1.plot(r3mean.index, r3mean ,'r-',ms=3.0,alpha=1.)
ax1.set_title('Flood risk events Frequency RX3day (Zone1) based on\n cumulative statistical values (boxcar approach)')
ax1.set_ylabel(r'Normalized count of events')
ax1.set_xlabel(r'Year')
ax1.grid(True)

ax2.plot(floodrisk_freq5d.index, floodrisk_freq5d ,'r.',ms=7.0,alpha=1.)
#ax2.plot(floodrisk_freq5d.index, r3mean ,'r-',ms=3.0,alpha=1.)
ax2.set_title('Flood risk events Frequency RX5day (Zone1) based on\n cumulative statistical values (boxcar approach)')
ax2.set_ylabel(r'Normalized count of events')
ax2.set_xlabel(r'Year')
ax2.grid(True)
#flood_freq.savefig('Zone1_RX3d.pdf',dpi=300)
Duration of flood risk events based on cumulative statistical values (boxcar approach)

In [ ]:
####From the Frequency of occurance its evident that duration between flood events takes upto 22 years (1967-1989)

In [ ]:
running_mean = pd.rolling_mean(df_prcp["Accumulated"], window=10, min_periods=3, center = True)
print(np.max(running_mean), np.max(df_prcp["Accumulated"]))

Teleconnections

Correlation between climate indices & extreme precips

In [ ]:
def date_index(dt):
    """
    Turn the int64 value from the YR, MON of indices into a pd.datetime
    """
    dstring = str(dt)
    return pd.datetime((int(dstring[0:4]),int(dstring[4:6]))) #year Month
                       
def corr_df(fpath, label, clim_index):
    print(fpath)
    for file in glob.glob(fpath):
        data_in = pd.read_fwf(file)
        
        data = []
        dates = [date_index(entry) for entry in corr_df.index]
        for month in range(12,):
                dates.append(corr_df(corr_df.Year[entry]).month())
                data.append(data_in)
    return pd.DataFrame(data=data, column=[label], index=dates)

Fetching data from web source using Pandas

IOD data,
ENSO data,
SOI data,
QBO30mb data,
QBO50mb data.


In [ ]:
"""
This is aimed at finding the relationship between extreme weather events 
in EA and climate indices(ENSO, QBO, IOD), Also what is the correlation  
between the data (and their statistical significance)?
"""
#iod=pd.read_fwf("Data/dmi.nc")
enso=pd.read_fwf("Data/noaa_mei.txt")
soi = pd.read_fwf("Data/noaa_soi.txt", index_col='YEAR')
qbo30 =pd.read_fwf("Data/noaa_qbo30.txt", index_col='YEAR')
qbo50 = pd.read_fwf("Data/noaa_qbo50.txt", index_col='YEAR')
# Make the year integers the index
enso.index = pd.date_range(start='1950-01-01',end='2015-07-01',freq='M')

In [ ]:
month_list = soi.keys()
tmp_data = []
for year in soi.index:
    for month in month_list:
        tmp_data.append(soi[month][year])
        #print(month,year,soi[month][year])
tmp_data = np.array(tmp_data)

In [ ]:
mon_list = qbo30.keys()
qbo3_data = []
qbo5_data = []
for year in qbo30.index:
    for month in mon_list:
        qbo3_data.append(qbo30[month][year])
        qbo5_data.append(qbo50[month][year])
        #print(month,year,qbo30[month][year])
qbo3_data = np.array(qbo3_data)
qbo5_data = np.array(qbo5_data)

In [ ]:
#Creating their data frames
df_soi = pd.DataFrame(data=tmp_data,index=pd.date_range(start='1951-01-01',end='2015-12-31',freq='M'),columns=['SOI'])
df_qbo3=pd.DataFrame(data=qbo3_data,index=pd.date_range(start='1979-01-01',end='2015-12-31',freq='M'),columns=['QBO3'])
df_qbo5=pd.DataFrame(data=qbo5_data,index=pd.date_range(start='1979-01-01',end='2015-12-31',freq='M'),columns=['QBO5'])

In [ ]:
#to convert a DataFrame to a TimeSeries
soii = df_soi.unstack()
qbo3 = df_qbo3.unstack()
qbo5 = df_qbo5.unstack()
print(np.mean(soii), np.mean(qbo3), np.mean(qbo5))

In [ ]:
#Indian Ocean Dipole Index 
f = netCDF4.Dataset("Data/dmi.nc") # Assign the netcdf file
#print(f.variables)                # Show what is in the netcdf file
dmi = f["DMI"][:]                  # Call the DMI and dates and assign them to a variable
dmi_date = f["WEDCEN2"][:]
dmi_dates = netCDF4.num2date(f['WEDCEN2'][:],units = f['WEDCEN2'].units)
dmi_index = pd.to_datetime(dmi_dates)
f.close()                        # close the connection to the netcdf file

In [ ]:
df_iod = pd.DataFrame(data=dmi,index=dmi_index,columns=['IOD']) 
iod = df_iod.unstack()    #convert a DataFrame to a TimeSeries
dmi_mon = df_iod['IOD'].resample('M', how='mean') #Resampling the weekly IOD data into monthly for correlation.

Possible dependent Variable


In [ ]:
acc_monthly = df_prcp.Acc_anomaly.resample('M', how='mean') #monthly Deseasonalized precip 
era_monthly = df_era.ERA_anomaly.resample('M', how = 'mean')
extreme_monthly = df_prcp["Acc_anomaly"][extremes].resample('M', how='mean')

Correlation with ENSO


In [ ]:
#Plots to explore the data
enso_fig = plt.figure(dpi=72)
enso_fig.set_size_inches(20,5)        
ax = enso_fig.add_subplot(111)

ax.plot(acc_monthly['1955-01-01':'1990-12-31'].index,acc_monthly['1955-01-01':'1990-12-31'], 'b',ms=3.0,alpha=0.75) 
ax.plot(era_monthly['1981-01-01':'1990-12-31'].index,era_monthly['1981-01-01':'1990-12-31'], 'g',ms=3.0,alpha=0.75) 
ax.plot(enso['1955-01-01':'1990-12-31'].index,enso['ANOM.3']['1955-01-01':'1990-12-31'], 'r',ms=3.0,alpha=0.75)
ax.grid(True)

In [ ]:
good_dat = acc_monthly['1961-01-01':'1990-12-31'] == acc_monthly['1961-01-01':'1990-12-31']
good_era = era_monthly['1981-01-01':'2010-12-31'] == era_monthly['1981-01-01':'2010-12-31']
enso_stats = stats.linregress(enso['ANOM.3']['1961-01-01':'1990-12-31'][good_dat],
                             acc_monthly['1961-01-01':'1990-12-31'][good_dat])
enso_stats
#Results shows a very low (<10%) correlation of EA monthly precipitation with ENSO index

In [ ]:
era_stats = stats.linregress(enso['ANOM.3']['1981-01-01':'2010-12-31'][good_era],
                             era_monthly['1981-01-01':'2010-12-31'][good_era])
era_stats

In [ ]:
corr_enso = plt.figure(dpi=72)
corr_enso.set_size_inches(5,5)        
ax1 = corr_enso.add_subplot(111)

ax1.plot(enso['ANOM.3']['1961-01-01':'1990-12-31'],acc_monthly['1961-01-01':'1990-12-31'],'r.')
ax1.plot(enso['ANOM.3']['1981-01-01':'2010-12-31'],era_monthly['1981-01-01':'2010-12-31'],'b.')
fit = np.arange(-3,3,0.1)*enso_stats[0] + enso_stats[0]
fit1 = np.arange(-3,3,0.1)*era_stats[0] + era_stats[0]
ax1.plot(np.arange(-3,3,0.1), fit, 'r-')
ax1.plot(np.arange(-3,3,0.1), fit1, 'b-')
ax1.set_ylabel(r'Monthly Precip (Anomalies)', fontsize=14)
ax1.set_title(' ENSO Scatter plot Zone 1', fontsize=14)
ax1.set_xlabel(r'ENSO(3.4) Index', fontsize=14)

corr_enso.text(0.4, 0.85, r'r = 0.0821 - Station data' ,fontsize=12,color='r')
corr_enso.text(0.4, 0.80, r'r = -0.025 - RCA4_ERA' ,fontsize=12,color='b')
#corr_enso.savefig('Zone2_Index_ENSO+ERA.pdf',dpi=300)

In [ ]:
good_dt = acc_monthly['1997'] == acc_monthly['1997']
enso_stat = stats.linregress(enso['ANOM.3']['1997'][good_dt],
                             acc_monthly['1997'][good_dt])
enso_stat
#Results shows significant (67%) correlation of EA monthly precipitation during the 97/98
#El nino episode ###Interesting ???

In [ ]:
rvalue=0.59
print("r-squared:", rvalue**2)

Correlation with SOI


In [ ]:
plt.plot(acc_monthly['1955 -01-01':'1990-12-30'].index,acc_monthly['1955-01-01':'1990-12-30'])
plt.plot(df_soi['1955-01-01':'1990-12-30'].index, df_soi['1955-01-01':'1990-12-30'])

In [ ]:
soi_stats = stats.linregress(df_soi['SOI']['1961-01-01':'1990-12-31'][good_dat],
                             acc_monthly['1961-01-01':'1990-12-31'][good_dat])
soiera_stats = stats.linregress(df_soi['SOI']['1981-01-01':'2010-12-31'][good_era],
                             era_monthly['1981-01-01':'2010-12-31'][good_era])
print("soi correlation statistics:", soi_stats)
print("RCA4, soi correlation_stats:", soiera_stats)

Correlation with QBO


In [ ]:
df_qbo5['QBO5'].head()

In [ ]:
qbo_gd = acc_monthly['1981-01-01':'1990-12-31'] == acc_monthly['1981-01-01':'1990-12-31']
qbo3_stats = stats.linregress(df_qbo3['QBO3']['1981-01-01':'1990-12-31'][qbo_gd],
                             acc_monthly['1981-01-01':'1990-12-31'][qbo_gd])
qbo3era_stats = stats.linregress(df_qbo3['QBO3']['1981-01-01':'2010-12-31'][good_era],
                             era_monthly['1981-01-01':'2010-12-31'][good_era])
print("QBO correlation statistics:",qbo3_stats)
print("RCA4,  QBO correlation_stats:",qbo3era_stats)

Correlation with IOD


In [ ]:
stats.linregress(dmi_mon,dmi_mon)

In [ ]:
#Linear Regression of Monthly anomalized data with IOD
gd_data = acc_monthly['1981-11-30':'1990-12-31'] == acc_monthly['1981-11-30':'1990-12-31']
iod_stats = stats.linregress(dmi_mon['1981-11-30':'1990-12-31'][gd_data],
                             acc_monthly['1981-11-30':'1990-12-31'][gd_data])
iodera_stats = stats.linregress(dmi_mon['1981-11-30':'2010-12-31'][good_era],
                             era_monthly['1981-11-30':'2010-12-31'][good_era])
print(iod_stats)
print(iodera_stats)

In [ ]:
corr_iod = plt.figure(dpi=72)
corr_iod.set_size_inches(5,5)        
ax1 = corr_iod.add_subplot(111)

ax1.plot(dmi_mon['1981-11-30':'1990-08-31'],acc_monthly['1981-11-30':'1990-08-31'],'r.')
fit = np.arange(-3,3,0.1)*iod_stats[0] + iod_stats[0]
ax1.plot(np.arange(-3,3,0.1), fit, 'r-')
ax1.set_ylabel(r'Monthly Precip (Anomalies)')
ax1.set_title('IOD Scatter plot for Zone 1')
ax1.set_xlabel(r'IOD Index ')

corr_iod.text(0.4, 0.85, r'r = -0.0097' ,fontsize=12,color='b')
#corr_iod.savefig('Zone1_Index_IOD.pdf',dpi=300)

Monthly Correlations


In [ ]:
selector = acc_monthly.index.month
selectors = enso['ANOM.3'].index.month
selectorss  = dmi_mon.index.month
selector_era = era_monthly.index.month

In [ ]:
era_monthly[selector_era == 1].head()

In [ ]:
monthly_corr = plt.figure(dpi=300)
monthly_corr.set_size_inches(15,5)        
ax = monthly_corr.add_subplot(111)

ax.plot(acc_monthly[selector == 11].index, acc_monthly[selector ==11], 'b', ms=3.0,alpha=0.75) 
ax.plot(enso['ANOM.3'][selectors==11].index, enso['ANOM.3'][selectors==11], 'r', ms=3.0,alpha=0.75)
ax.plot(dmi_mon[selectorss==11].index,dmi_mon[selectorss==11], 'g', ms=3.0,alpha=0.75)
leg=ax.legend(['Precip','ENSO3.4 Index','IOD Index'],prop={'size':12},numpoints=1,markerscale=1.,
                frameon=True,fancybox=True)

ax.set_ylabel(r'Anomalies')
ax.set_title(r'Relationship of EA precips Vs ENSO Index & IOD Index for the Month of November (Zone 2) ')
ax.set_xlabel(r'Time (months_Nov)')
ax.grid(True)
#monthly_corr.savefig('Zone1_Correlation_plot.pdf',dpi=300)

In [ ]:
#gd = acc_monthly[selector == 5]['1955-01-01':'1990-12-31'] == acc_monthly[selector == 5]['1955-01-01':'1990-12-31']
#stats = stats.linregress(enso['ANOM.3'][selectors==5]['1955-01-01':'1990-12-31'][gd],
#                             acc_monthly[selector==5]['1955-01-01':'1990-12-31'][gd])
#stats
#High r values Mar=-30% April=25% May =44% Oct=35%, Nov=34% ENSO
#High r values June=29% May=42% Oct=42%, Nov=70% Dec=20% IOD

In [ ]:
corr_mon = plt.figure(dpi=72)
corr_mon.set_size_inches(5,5)        
ax1 = corr_mon.add_subplot(111)

ax1.plot(enso['ANOM.3'][selectors==11]['1955-01-01':'1990-12-31'], 
         acc_monthly[selector==11]['1955-01-01':'1990-12-31'],'r.')
#fit = np.arange(-3,3,0.1)*stats[0] + stats[0]
ax1.plot(np.arange(-3,3,0.1), fit, 'r-')
ax1.set_ylabel(r'May precip (mm)')
ax1.set_title('MAY (ENSO) Scatter plot Zone 2')
ax1.set_xlabel(r' ENSO Index ($^{o}$C)')

corr_mon.text(0.2, 0.85, r'r = -0.20968898413341716' ,fontsize=12,color='b')
#corr_mon.savefig('Zone1_Index_MAY.pdf',dpi=300)

Seasonal Correlarion statistics


In [ ]:
#Expoloring here the seasonal correlations
'''
To do seasonal anomaly correlation
'''
acc_seasonal = acc_monthly.resample('Q', how='mean') #Seasons "JFM, AMJ, JAS, OND"
xtrm_seasonal = extreme_monthly.resample('Q', how='mean') #Seasons "JFM, AMJ, JAS, OND"
era_seasonal = era_monthly.resample('Q', how='mean')
enso_seasonal = enso['ANOM.3'].resample('Q', how='mean') 
dmi_seasonal = df_iod['IOD'].resample('Q', how='mean')
soi_seasonal = df_soi['SOI'].resample('Q', how='mean')
qbo_seasonal = df_qbo3['QBO3'].resample('Q', how='mean')

#xtrm_seasonal = extreme_monthly['1954-01-01':'2015-12-31'].resample('4M', how='mean') #Seasons "FMAM, JJAS, ONDJ"

#acc_ssn = acc_monthly.resample('BQ-FEB', how = 'mean') #Seasons "DJF, MAM, JJA, SON"
#enso_ssn = enso['ANOM.3'].resample('BQ-FEB', how='mean') #Seasons "DJF, MAM, JJA, SON"
#dmi_ssn = df_iod['IOD'].resample('BQ-FEB', how='mean')

In [ ]:
x = acc_seasonal.index.month
x1 = enso_seasonal.index.month
x2 = era_seasonal.index.month
x3 = xtrm_seasonal.index.month
x4 = dmi_seasonal.index.month
x5 = soi_seasonal.index.month
x6 = qbo_seasonal.index.month

In [ ]:
OND_Obs = acc_seasonal[x == 12]
OND_ens = enso_seasonal[x1 == 12]
OND_era = era_seasonal[x2 == 12]
OND_xtr = xtrm_seasonal[x3 == 12]
OND_iod = dmi_seasonal[x4 == 12]
OND_soi = soi_seasonal[x5 == 12]
OND_qbo = qbo_seasonal[x6 == 12]
#JJAS season was used for zone 1 while OND used in Zone 2

In [ ]:
clima = plt.figure()
clima.set_size_inches(15,5)        
ax = clima.add_subplot(111)
ax.plot(OND_xtr.index, OND_xtr, 'r', ms=3.0,alpha=1.0,lw=2)
ax.plot(OND_ens.index, OND_ens, 'b', ms=3.0,alpha=1.0,lw=2)
ax.plot(OND_iod.index, OND_iod, 'g', ms=3.0,alpha=1.0,lw=2)
ax.set_title('OND Average ENSO/SOI Indices Correlation with Zone 2 Extreme Precipitation ', fontsize = 15)
ax.set_xlabel(r'Years', fontsize = 15)
ax.set_ylabel(r'Anomaly ', fontsize = 15)
#ax.set_xlim('1950' '1990')
#ax.set_ylim(-5,10)
leg=ax.legend(['Station data','ENSO 3.4','IOD'],prop={'size':10},
              numpoints=1,markerscale=5.,frameon=True,fancybox=True)
ax.grid(True)
#clima.savefig('Zone1_ClimIndices_EXtreme.pdf',dpi=300)

In [ ]:
clim = plt.figure()
clim.set_size_inches(15,5)        
ax = clim.add_subplot(111)
ax.plot(OND_Obs.index, OND_Obs, 'r', ms=3.0,alpha=1.0,lw=2)
ax.plot(OND_era.index, OND_era, 'k', ms=3.0,alpha=1.0,lw=3)
ax.plot(OND_ens.index, OND_ens, 'b', ms=3.0,alpha=1.0,lw=2)
ax.plot(OND_iod.index, OND_iod, 'g', ms=3.0,alpha=1.0,lw=2)
#ax.plot(OND_soi.index, OND_soi, 'm', ms=3.0,alpha=1.0,lw=1.5)
#ax.plot(OND_qbo.index, OND_qbo, 'y', ms=3.0,alpha=1.0,lw=1.5)

ax.set_title(' OND Average ENSO/IOD Indices Correlation with Zone 2 Precipitation ', fontsize = 15)
ax.set_xlabel(r'Years', fontsize = 15)
ax.set_ylabel(r'Anomaly ', fontsize = 15)
ax.set_xlim('1952', '2014')
ax.set_ylim(-5,8)
leg=ax.legend(['Station data','RCA4_ERA','ENSO 3.4','IOD','SOI','QBO'],prop={'size':10},
              numpoints=1,markerscale=5.,frameon=True,fancybox=True)
ax.grid(True)
#clim.savefig('Zone2_ClimIndices_ENSO+IOD.pdf',dpi=300)

In [ ]:
iod = plt.figure(dpi=72)
iod.set_size_inches(15,5)        
ax = iod.add_subplot(111)
#plt.plot(acc_monthly['1981-01-01':'2006-12-31'].index, acc_monthly['1981-01-01':'2006-12-31']) 
ax.plot(OND_iod.index, OND_iod, 'r')
ax.plot(OND_Obs.index, OND_Obs, 'b')
#ax.plot(enso['1981-08-01':'2015-12-31'].index, enso['ANOM.3']['1981-08-01':'2015-12-31'], 'b')

ax.set_title(' Monthly IOD index Time Series ')
ax.set_xlabel(r'Time')
ax.set_ylabel(r'IOD Index ($^{o}$C)')
ax.grid(True)
#iod.savefig('Zone1_IOD.pdf',dpi=300)

In [ ]:
ssn_ensostats = stats.linregress(OND_ens['1953-01-01':'2015-12-31'], OND_Obs['1953-01-01':'2015-12-31'])
ssn_soistats =stats.linregress(OND_soi['1981-01-01':'2010-12-31'], OND_Obs['1981-01-01':'2010-12-31'])
ssn_iodstats =stats.linregress(OND_iod['1981-12-31':'2014-12-31'], OND_Obs['1981-12-31':'2014-12-31'])
ssn_qbostats =stats.linregress(OND_qbo['1981-12-31':'2010-12-31'], OND_Obs['1981-12-31':'2010-12-31'])
print("ENSO Correlation Stats:",ssn_ensostats)
print("SOI Correlation Stats:",ssn_soistats)
print("IOD Correlation Stats:",ssn_iodstats)
print("QBO Correlation Stats:",ssn_qbostats)

In [ ]:
#xerr = np.std(OND_iod['1982-01-01':'2010-12-31'])/np.sqrt(len(OND_iod['1982-01-01':'2010-12-31']) - 1) 
xerr = np.std(OND_ens['1953-01-01':'2015-12-31']) 
yerr = np.std(OND_Obs['1953-01-01':'2015-12-31'])

In [ ]:
enso_sca = plt.figure()
enso_sca.set_size_inches(5,5)        
ax1 = enso_sca.add_subplot(111)

ax1.errorbar(OND_ens['1953-01-01':'2015-12-31'], OND_Obs['1953-01-01':'2015-12-31'],yerr, xerr,'ro')
fit = np.arange(-4,4,0.1)*ssn_ensostats[0] + ssn_ensostats[0]
ax1.plot(np.arange(-4,4,0.1), fit, 'r-', lw=2)
ax1.set_ylabel(r'Precipitation Anomalies', fontsize=14)
ax1.set_title(' Relationship between obs_rain and ENSO in Zone 2', fontsize=12)
ax1.set_xlabel(r'ENSO Index', fontsize=14)
ax1.grid(True)

enso_sca.text(0.4, 0.85, r'rvalue  =  0.29 ' ,fontsize=13,color='b')
enso_sca.text(0.4, 0.80, r'slope   =  0.41 ' ,fontsize=13,color='b')
enso_sca.text(0.4, 0.75, r'pvalue  =  0.02' ,fontsize=13,color='b')

#enso_sca.savefig('Zone2_Index-ENSO_Obs.pdf',dpi=300)

In [ ]: