In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
In [2]:
%matplotlib inline
In [3]:
charlotte_rainfall = pd.read_csv('./charlotte_2_rg_2011.csv', header = None)
In [4]:
charlotte_rainfall.head()
Out[4]:
In [5]:
charlotte_rainfall.columns = ["year","month","day", "hour", "min", "Rainfall_1", "Rainfall_2"]
charlotte_rainfall.loc[:,'dt'] = pd.to_datetime(dict(year=charlotte_rainfall['year'], month=charlotte_rainfall['month'], day=charlotte_rainfall['day'], hour=charlotte_rainfall['hour'], minute=charlotte_rainfall['min']))
charlotte_rainfall.index=charlotte_rainfall['dt']
In [6]:
charlotte_rainfall.head()
Out[6]:
In [7]:
charlotte_rainfall.drop('year', 1, inplace=True)
charlotte_rainfall.drop('month', 1, inplace=True)
charlotte_rainfall.drop('day', 1, inplace=True)
charlotte_rainfall.drop('hour', 1, inplace=True)
charlotte_rainfall.drop('min', 1, inplace=True)
charlotte_rainfall.drop('dt', 1, inplace=True)
In [8]:
charlotte_rainfall.head()
Out[8]:
In [9]:
charlotte_rainfall.index.name = 'dt'
In [10]:
charlotte_rainfall.head()
Out[10]:
In [11]:
charlotte_rainfall["Rainfall_1"] = charlotte_rainfall["Rainfall_1"].replace(-99, np.nan)
fig = plt.figure()
plt.plot(charlotte_rainfall.index, charlotte_rainfall["Rainfall_1"])
plt.ylabel('mm/15min')
fig.autofmt_xdate()
In [12]:
charlotte_rainfall["Rainfall_2"] = charlotte_rainfall["Rainfall_2"].replace(-99, np.nan)
fig = plt.figure()
plt.plot(charlotte_rainfall.index, charlotte_rainfall["Rainfall_2"])
plt.ylabel('mm/15min')
fig.autofmt_xdate()
In [138]:
#mask = (charlotte_rainfall.Rainfall_1 == 0) | (charlotte_rainfall.Rainfall_2 == 0)
#charlotte_rainfall = charlotte_rainfall.loc[~mask]
#mask = (np.isnan(charlotte_rainfall.Rainfall_1)) | (np.isnan(charlotte_rainfall.Rainfall_2))
#charlotte_rainfall = charlotte_rainfall.loc[~mask]
Resample to 24h accumulation
In [13]:
charlotte_24h_rainfall = pd.DataFrame()
charlotte_24h_rainfall['mean_rain_1'] = charlotte_rainfall.Rainfall_1.resample('D').mean()
charlotte_24h_rainfall['accum_rain_1'] = charlotte_rainfall.Rainfall_1.resample('D').sum()
charlotte_24h_rainfall['mean_rain_2'] = charlotte_rainfall.Rainfall_2.resample('D').mean()
charlotte_24h_rainfall['accum_rain_2'] = charlotte_rainfall.Rainfall_2.resample('D').sum()
In [14]:
mask = (charlotte_24h_rainfall.accum_rain_1 == 0) | (charlotte_24h_rainfall.accum_rain_2 == 0)
charlotte_24h_rainfall = charlotte_24h_rainfall.loc[~mask]
mask = (np.isnan(charlotte_24h_rainfall.accum_rain_1)) | (np.isnan(charlotte_24h_rainfall.accum_rain_2))
charlotte_24h_rainfall = charlotte_24h_rainfall.loc[~mask]
In [15]:
fig = plt.figure()
plt.plot(charlotte_24h_rainfall.index, charlotte_24h_rainfall['accum_rain_1'])
plt.ylabel('mm/24h')
fig.autofmt_xdate()
In [16]:
fig = plt.figure()
plt.plot(charlotte_24h_rainfall.index, charlotte_24h_rainfall['accum_rain_2'])
plt.ylabel('mm/24h')
fig.autofmt_xdate()
Try plotting Semivariogram
In [156]:
#charlotte_24h_rainfall['minutes'] = np.arange(0, 24*60*len(charlotte_24h_rainfall.accum_rain_1), 24*60)
In [80]:
charlotte_24h_rainfall['seconds'] = charlotte_24h_rainfall.index.astype(np.int64) // 10 ** 9 - ( charlotte_24h_rainfall.index.astype(np.int64)[0] // 10 ** 9)
Set seconds from zero to length
In [ ]:
In [18]:
charlotte_24h_rainfall['seconds'] = np.arange(0,len(charlotte_24h_rainfall.accum_rain_1), 1)
charlotte_24h_rainfall['zeros'] = np.zeros((len(charlotte_24h_rainfall.accum_rain_1), 1), dtype=np.int8)
In [19]:
charlotte_24h_rainfall.head()
Out[19]:
New try
In [20]:
from rpy2.robjects.packages import importr
from rpy2.robjects import r
In [21]:
sp = importr('sp')
gstat = importr('gstat')
intamap = importr('intamap')
In [22]:
r('jet.colors <- c("#00007F","blue","#007FFF","cyan","#7FFF7F","yellow","#FF7F00","red","#7F0000")')
r('col.palette <- colorRampPalette(jet.colors)')
Out[22]:
In [88]:
rain1 = charlotte_24h_rainfall[['accum_rain_1', 'seconds', 'zeros']]
rain2 = charlotte_24h_rainfall[['accum_rain_2', 'seconds', 'zeros']]
In [24]:
from rpy2.robjects import pandas2ri
pandas2ri.activate()
In [190]:
r_df = pandas2ri.py2ri(rain1)
r.assign('mydata', r_df)
r_df2 = pandas2ri.py2ri(rain2)
r.assign('mydata2', r_df2)
Out[190]:
In [191]:
r('''
mydata <- data.frame(mydata)
coordinates(mydata) <- ~seconds+zeros
mydata2 <- data.frame(mydata2)
coordinates(mydata2) <- ~seconds+zeros
''')
Out[191]:
In [198]:
p_myiso = r('myiso <- variogram(accum_rain_1~1,mydata,width=5,cutoff=100)')
p_myiso2 = r('myiso2 <- variogram(accum_rain_2~1,mydata2,width=5,cutoff=100)')
In [29]:
import matplotlib.pyplot as plt
%matplotlib inline
In [199]:
plt.plot(p_myiso['dist'], p_myiso['gamma'], '-o')
Out[199]:
In [200]:
plt.plot(p_myiso2['dist'], p_myiso2['gamma'], '-o')
Out[200]:
In [58]:
p_myiso
Out[58]:
In [59]:
p_myiso2
Out[59]:
In [94]:
r('''
variogram_map <- variogram(accum_rain_1~1,mydata,width=1,cutoff=50,map=TRUE)
png("variogram_map_temporal.png",height=600,width=600)
print(plot(variogram_map))
dev.off()
''')
Out[94]:
In [134]:
r('''
initial_vario_sph <- vgm(psill=100,model="Lin",range=8,nugget=350)
fitted_vario_sph <- fit.variogram(myiso,initial_vario_sph)
range <- fitted_vario_sph$range[2] # fitted range
nugget <- fitted_vario_sph$psill[1] # fitted nugget
sill <- sum(fitted_vario_sph$psill) # fitted sill (total sill)
SSErr_sph <- attributes(fitted_vario_sph)$SSErr # sum of squared errors (goodness of fit)
png("fitted_isotropic_variogram_sph_temporal.png",height=600,width=900)
print(plot(myiso,fitted_vario_sph))
dev.off()
''')
Out[134]:
In [135]:
r('range')
Out[135]:
In [136]:
r('nugget')
Out[136]:
In [137]:
r('sill')
Out[137]:
In [138]:
r('SSErr_sph')
Out[138]:
In [ ]:
Rainfall values on 5th august 2011
In [147]:
event_rain = charlotte_rainfall.loc['2011-08-05 00:00:00':'2011-08-05 23:59:59']
event_rain_1 = event_rain.Rainfall_1
event_rain_2 = event_rain.Rainfall_2
In [144]:
import datetime as dt
In [148]:
event_rain_2.index = event_rain_2.index + dt.timedelta(hours=72)
In [173]:
event_rain_1 = pd.Series(event_rain_1.Rainfall_1)
In [175]:
my_event = event_rain_1.append(event_rain_2)
In [177]:
my_event = pd.DataFrame(my_event)
my_event.columns = ['R']
my_event['seconds'] = my_event.index.astype(np.int64) // 10 ** 9 - ( my_event.index.astype(np.int64)[0] // 10 ** 9)
my_event['zeros'] = np.zeros((len(my_event.R), 1), dtype=np.int8)
In [241]:
my_event.tail()
Out[241]:
In [208]:
r_df = pandas2ri.py2ri(my_event)
r.assign('mydata_event', r_df)
Out[208]:
In [209]:
r('''
mydata_event <- data.frame(mydata_event)
coordinates(mydata_event) <- ~seconds+zeros
''')
Out[209]:
In [235]:
p_myiso = r('myiso_event <- variogram(R~1,mydata_event,width=1800,cutoff=250000)')
In [236]:
plt.plot(p_myiso['dist'], p_myiso['gamma'], '-o')
Out[236]:
In [237]:
from pandas.plotting import autocorrelation_plot
In [238]:
autocorrelation_plot(my_event.R)
Out[238]:
In [239]:
autocorrelation_plot(rain1.accum_rain_1)
Out[239]:
In [240]:
autocorrelation_plot(rain2.accum_rain_2)
Out[240]:
In [ ]:
In [ ]:
In [ ]:
In [26]:
from scipy.spatial.distance import pdist, squareform
In [20]:
data = np.array(charlotte_24h_rainfall[['seconds', 'zeros', 'accum_rain_1']])
lags = np.arange(86400, 864000, 86400)
sill = np.var(data[:,2])
In [22]:
print(sill)
print(lags)
In [32]:
distances = squareform(pdist(data[:,:2]))
In [37]:
print(distances)
In [44]:
tail = data[0:-1,2]
head = data[1::,2]
cur_diff = (tail-head)**2
cur_sum = sum(cur_diff)
cur_var = 1/(2*len(tail))*cur_sum
print(cur_var)
In [58]:
tail = data[0:-2,2]
head = data[2::,2]
print(len(head))
cur_diff = (tail-head)**2
cur_sum = sum(cur_diff)
cur_var = 1/(2*len(tail))*cur_sum
print(cur_var)
In [46]:
tail = data[0:-3,2]
head = data[3::,2]
cur_diff = (tail-head)**2
cur_sum = sum(cur_diff)
cur_var = 1/(2*len(tail))*cur_sum
print(cur_var)
In [ ]:
In [47]:
tail = data[0:-4,2]
head = data[4::,2]
cur_diff = (tail-head)**2
cur_sum = sum(cur_diff)
cur_var = 1/(2*len(tail))*cur_sum
print(cur_var)
In [59]:
tail = data[0:-5,2]
head = data[5::,2]
cur_diff = (tail-head)**2
cur_sum = sum(cur_diff)
cur_var = 1/(2*len(tail))*cur_sum
print(cur_var)
In [60]:
tail = data[0:-6,2]
head = data[6::,2]
cur_diff = (tail-head)**2
cur_sum = sum(cur_diff)
cur_var = 1/(2*len(tail))*cur_sum
print(cur_var)
In [61]:
tail = data[0:-7,2]
head = data[7::,2]
cur_diff = (tail-head)**2
cur_sum = sum(cur_diff)
cur_var = 1/(2*len(tail))*cur_sum
print(cur_var)
In [ ]:
In [251]:
data = np.array(charlotte_24h_rainfall[['seconds', 'zeros', 'accum_rain_1']])
npoints, cols = data.shape
In [252]:
npoints
Out[252]:
In [253]:
cols
Out[253]:
In [254]:
from scipy.spatial.distance import pdist, squareform
In [255]:
square_data = squareform(pdist(data[:,:2]))
In [256]:
lags = np.arange(1*7*24*60*60, 6*30*24*60*60, 1*7*24*60*60 )
sill = np.var(data[:,2])
In [257]:
sill
Out[257]:
In [258]:
lags.shape
Out[258]:
In [259]:
def lagindices( pwdist, lag, tol ):
'''
Input: (pwdist) square NumPy array of pairwise distances
(lag) the distance, h, between points
(tol) the tolerance we are comfortable with around (lag)
Output: (ind) list of tuples; the first element is the row of
(data) for one point, the second element is the row
of a point (lag)+/-(tol) away from the first point,
e.g., (3,5) corresponds fo data[3,:], and data[5,:]
'''
# grab the coordinates in a given range: lag +/- tolerance
i, j = np.where( ( pwdist >= lag - tol )&( pwdist < lag + tol ) )
# zip the coordinates into a list
indices = zip( i, j )
# take out the repeated elements,
# since p is a *symmetric* distance matrix
indices = np.array([ i for i in indices if i[1] > i[0] ])
return indices
def semivariance( data, indices ):
'''
Input: (data) NumPy array where the fris t two columns
are the spatial coordinates, x and y, and
the third column is the variable of interest
(indices) indices of paired data points in (data)
Output: (z) semivariance value at lag (h) +/- (tol)
'''
# take the squared difference between
# the values of the variable of interest
z = [ ( data[i,2] - data[j,2] )**2.0 for i,j in indices ]
# the semivariance is half the mean squared difference
return np.mean( z ) / 2.0
In [260]:
index = [ lagindices( square_data, lag, 0.1 ) for lag in lags ]
In [261]:
lags
Out[261]:
In [262]:
v = np.array([ semivariance( data, indices ) for indices in index ])
In [263]:
v
Out[263]:
In [269]:
v.size
Out[269]:
In [264]:
vario = np.array( zip( lags, v ) ).T
In [267]:
h, sv = np.array( zip( lags, v ) ).T
In [265]:
h, sv = lags, v
In [271]:
plt.plot( h, sv, 'ko-' )
Out[271]:
In [224]:
def SVh( P, h, bw ):
'''
Experimental semivariogram for a single lag
'''
pd = squareform( pdist( P[:,:2] ) )
N = pd.shape[0]
Z = list()
for i in range(N):
for j in range(i+1,N):
if( pd[i,j] >= h-bw )and( pd[i,j] <= h+bw ):
Z.append( ( P[i,2] - P[j,2] )**2.0 )
return np.sum( Z ) / ( 2.0 * len( Z ) )
def SV( P, hs, bw ):
'''
Experimental variogram for a collection of lags
'''
sv = list()
for h in hs:
sv.append( SVh( P, h, bw ) )
sv = [ [ hs[i], sv[i] ] for i in range( len( hs ) ) if sv[i] > 0 ]
return np.array( sv ).T
def C( P, h, bw ):
'''
Calculate the sill
'''
c0 = np.var( P[:,2] )
if h == 0:
return c0
return c0 - SVh( P, h, bw )
In [241]:
86400/2.0
Out[241]:
In [238]:
lags
Out[238]:
In [250]:
lags = np.arange(86400, 86400*15, 86400/2.0)
print(lags)
In [249]:
sv = SV( data, lags, 10000 )
plt.plot( sv[0], sv[1], '.-' )
Out[249]:
In [ ]: