Process the global variogram



In [2]:

    
# Load Biospytial modules and etc.
%matplotlib inline
import sys
sys.path.append('/apps')
import django
django.setup()
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
## Use the ggplot style
plt.style.use('ggplot')



In [2]:

    
from external_plugins.spystats import tools
%run ../testvariogram.py









    



/opt/conda/envs/biospytial/lib/python2.7/site-packages/IPython/core/pylabtools.py:168: DtypeWarning: Columns (24) have mixed types. Specify dtype option on import or set low_memory=False.
  safe_execfile(fname,*where,**kw)



In [3]:

    
%time vg = tools.Variogram(new_data,'residuals1',using_distance_threshold=500000)









    



CPU times: user 8 ms, sys: 0 ns, total: 8 ms
Wall time: 594 µs



In [4]:

    
### Test creation of chunks
chunks = tools.PartitionDataSet(new_data,namecolumnx='newLon',namecolumny='newLat',n_chunks=3)



In [5]:

    
sizes = map(lambda c : c.shape[0],chunks)



In [6]:

    
vg0 = tools.Variogram(chunks[0],response_variable_name='residuals1',using_distance_threshold=500000)
vg1 = tools.Variogram(chunks[1],response_variable_name='residuals1',using_distance_threshold=500000)
vg2 = tools.Variogram(chunks[2],response_variable_name='residuals1',using_distance_threshold=500000)
vg3 = tools.Variogram(chunks[3],response_variable_name='residuals1',using_distance_threshold=500000)

STOP HERE This will calculate the variogram with chunks



In [ ]:

    
%time vg0.plot(num_iterations=50,with_envelope=True)



In [ ]:

    
chunks[0].plot(column='residuals1')



In [ ]:

    
%time vg1.plot(num_iterations=50,with_envelope=True)



In [ ]:

    
chunks[1].plot(column='residuals1')



In [ ]:

    
%time vg2.plot(num_iterations=50,with_envelope=True)



In [ ]:

    
chunks[2].plot(column='residuals1')



In [ ]:

    
%time vg3.plot(num_iterations=50,with_envelope=True)



In [ ]:

    
chunks[3].plot(column='residuals1')



In [ ]:

    
envelopes = map(lambda c : c.envelope,chunks)



In [ ]:

    
c = chunks[0]



In [ ]:

    
variograms = [vg0,vg1,vg2,vg3]



In [ ]:

    
envelopes = map(lambda v : v.envelope,variograms)



In [ ]:

    
colors = plt.rcParams['axes.prop_cycle']
colors = ['red','green','grey','orange']



In [ ]:

    
plt.figure(figsize=(12, 6))
for i,envelope in enumerate(envelopes):
    plt.plot(envelope.lags,envelope.envhigh,'k--')
    plt.plot(envelope.lags,envelope.envlow,'k--')
    plt.fill_between(envelope.lags,envelope.envlow,envelope.envhigh,alpha=0.5,color=colors[i])
    plt.plot(envelope.lags,envelope.variogram,'o--',lw=2.0,color=colors[i])
    
plt.legend(labels=['97.5%','emp. varig','2.5%'])

Now the global variogram

For doing this I need to take a weighted average. Or.. you can run it in the HEC machine! (as me did)



In [1]:

    
filename = "../HEC_runs/results/low_q/data_envelope.csv"
envelope_data = pd.read_csv(filename)









    




NameErrorTraceback (most recent call last)
<ipython-input-1-f8586b6394dd> in <module>()
      1 filename = "../HEC_runs/results/low_q/data_envelope.csv"
----> 2 envelope_data = pd.read_csv(filename)

NameError: name 'pd' is not defined

Gaussian semivariogram

$\gamma (h)=(s-n)\left(1-\exp \left(-{\frac {h^{2}}{r^{2}a}}\right)\right)+n1_{{(0,\infty )}}(h)$



In [8]:

    
def gaussianVariogram(h,sill=0,range_a=0,nugget=0):
    g_h = ((sill - nugget)*(1 - np.exp(-(h**2 / range_a**2)))) + nugget
    return g_h



In [12]:

    
hx = np.linspace(0,600000,100)
vg = tools.Variogram(new_data,'residuals1',using_distance_threshold=500000)
vg.envelope = envelope_data
vg.empirical = vg.envelope.variogram
vg.lags = vg.envelope.lags
vdata = vg.envelope.dropna()



In [13]:

    
from scipy.optimize import curve_fit
s = 0.345
r = 100000.0
nugget = 0.33
init_vals = [0.34, 50000, 0.33]     # for [amp, cen, wid]
best_vals, covar = curve_fit(gaussianVariogram, xdata=vdata.lags, ydata=vdata.variogram, p0=init_vals)
s =best_vals[0]
r = best_vals[1]
nugget = best_vals[2]
fitted_gaussianVariogram = lambda x : gaussianVariogram(x,sill=s,range_a=r,nugget=nugget)
gammas = pd.DataFrame(map(fitted_gaussianVariogram,hx))
import functools
fitted_gaussian2 = functools.partial(gaussianVariogram,s,r,nugget)



In [14]:

    
print(s)
print(r)
print(nugget)









    



0.345255240992
65857.797111
0.332850902482



In [ ]:

    
vg.plot(refresh=False)
plt.plot(hx,gammas,'green',lw=2)



In [ ]:

    
Mdist = vg.distance_coordinates.flatten()



In [ ]:

    
## Let's do a small subset
ch = Mdist[0:10000000]



In [ ]:

    
#%time covariance_matrix = map(fitted_gaussianVariogram,Mdist)
%time vars = np.array(map(fitted_gaussianVariogram,ch))



In [ ]:

    
plt.imshow(vars.reshape(1000,1000))



In [ ]:

    
## Save it in redis
import redis
con = redis.StrictRedis(host='redis')



In [ ]:

    
con.set('small_dist_mat1',vars)



In [ ]:

    
import multiprocessing as multi
from multiprocessing import Manager



In [ ]:

    
manager = Manager()

p=multi.Pool(processes=4)



In [ ]:

    
%time vars = p.map(fitted_gaussian2,ch,chunksize=len(ch)/3)



In [ ]:

    
%time vars = np.array(map(fitted_gaussianVariogram,ch))



In [17]:

    
88.36*30









    Out[17]:





2650.8



In [ ]: