In [2]:
# Load Biospytial modules and etc.
%matplotlib inline
import sys
sys.path.append('/apps')
import django
django.setup()
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
## Use the ggplot style
plt.style.use('ggplot')
In [2]:
from external_plugins.spystats import tools
%run ../testvariogram.py
In [3]:
%time vg = tools.Variogram(new_data,'residuals1',using_distance_threshold=500000)
In [4]:
### Test creation of chunks
chunks = tools.PartitionDataSet(new_data,namecolumnx='newLon',namecolumny='newLat',n_chunks=3)
In [5]:
sizes = map(lambda c : c.shape[0],chunks)
In [6]:
vg0 = tools.Variogram(chunks[0],response_variable_name='residuals1',using_distance_threshold=500000)
vg1 = tools.Variogram(chunks[1],response_variable_name='residuals1',using_distance_threshold=500000)
vg2 = tools.Variogram(chunks[2],response_variable_name='residuals1',using_distance_threshold=500000)
vg3 = tools.Variogram(chunks[3],response_variable_name='residuals1',using_distance_threshold=500000)
In [ ]:
%time vg0.plot(num_iterations=50,with_envelope=True)
In [ ]:
chunks[0].plot(column='residuals1')
In [ ]:
%time vg1.plot(num_iterations=50,with_envelope=True)
In [ ]:
chunks[1].plot(column='residuals1')
In [ ]:
%time vg2.plot(num_iterations=50,with_envelope=True)
In [ ]:
chunks[2].plot(column='residuals1')
In [ ]:
%time vg3.plot(num_iterations=50,with_envelope=True)
In [ ]:
chunks[3].plot(column='residuals1')
In [ ]:
envelopes = map(lambda c : c.envelope,chunks)
In [ ]:
c = chunks[0]
In [ ]:
variograms = [vg0,vg1,vg2,vg3]
In [ ]:
envelopes = map(lambda v : v.envelope,variograms)
In [ ]:
colors = plt.rcParams['axes.prop_cycle']
colors = ['red','green','grey','orange']
In [ ]:
plt.figure(figsize=(12, 6))
for i,envelope in enumerate(envelopes):
plt.plot(envelope.lags,envelope.envhigh,'k--')
plt.plot(envelope.lags,envelope.envlow,'k--')
plt.fill_between(envelope.lags,envelope.envlow,envelope.envhigh,alpha=0.5,color=colors[i])
plt.plot(envelope.lags,envelope.variogram,'o--',lw=2.0,color=colors[i])
plt.legend(labels=['97.5%','emp. varig','2.5%'])
In [1]:
filename = "../HEC_runs/results/low_q/data_envelope.csv"
envelope_data = pd.read_csv(filename)
In [8]:
def gaussianVariogram(h,sill=0,range_a=0,nugget=0):
g_h = ((sill - nugget)*(1 - np.exp(-(h**2 / range_a**2)))) + nugget
return g_h
In [12]:
hx = np.linspace(0,600000,100)
vg = tools.Variogram(new_data,'residuals1',using_distance_threshold=500000)
vg.envelope = envelope_data
vg.empirical = vg.envelope.variogram
vg.lags = vg.envelope.lags
vdata = vg.envelope.dropna()
In [13]:
from scipy.optimize import curve_fit
s = 0.345
r = 100000.0
nugget = 0.33
init_vals = [0.34, 50000, 0.33] # for [amp, cen, wid]
best_vals, covar = curve_fit(gaussianVariogram, xdata=vdata.lags, ydata=vdata.variogram, p0=init_vals)
s =best_vals[0]
r = best_vals[1]
nugget = best_vals[2]
fitted_gaussianVariogram = lambda x : gaussianVariogram(x,sill=s,range_a=r,nugget=nugget)
gammas = pd.DataFrame(map(fitted_gaussianVariogram,hx))
import functools
fitted_gaussian2 = functools.partial(gaussianVariogram,s,r,nugget)
In [14]:
print(s)
print(r)
print(nugget)
In [ ]:
vg.plot(refresh=False)
plt.plot(hx,gammas,'green',lw=2)
In [ ]:
Mdist = vg.distance_coordinates.flatten()
In [ ]:
## Let's do a small subset
ch = Mdist[0:10000000]
In [ ]:
#%time covariance_matrix = map(fitted_gaussianVariogram,Mdist)
%time vars = np.array(map(fitted_gaussianVariogram,ch))
In [ ]:
plt.imshow(vars.reshape(1000,1000))
In [ ]:
## Save it in redis
import redis
con = redis.StrictRedis(host='redis')
In [ ]:
con.set('small_dist_mat1',vars)
In [ ]:
import multiprocessing as multi
from multiprocessing import Manager
In [ ]:
manager = Manager()
p=multi.Pool(processes=4)
In [ ]:
%time vars = p.map(fitted_gaussian2,ch,chunksize=len(ch)/3)
In [ ]:
%time vars = np.array(map(fitted_gaussianVariogram,ch))
In [17]:
88.36*30
Out[17]:
In [ ]: