In [8]:
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import pairwise_distances
from itertools import product
import pandas as pd

%matplotlib inline

1. Test of curve_fit function


In [2]:
def func(x, a, c, d):
    """ self-defined function """
    return c * (1 - np.exp(- (x / a) ** d))

np.random.seed(0)
xdata = np.linspace(0, 4, 1000)
y = func(xdata, 2.1, 1.1, 1.1)
y_noise = 0.2 * np.random.normal(size=xdata.size)
ydata = y + y_noise

popt, pcov = curve_fit(func, xdata, ydata, bounds=(0, [3., 2., 1.]))
popt


Out[2]:
array([ 3.        ,  1.32474288,  0.98855683])

In [3]:
plt.figure()
plt.plot(xdata, ydata, 'b-', label='data')
plt.plot(xdata, func(xdata, *popt), 'r-', label='fit')
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.show()


2. Radiation data simulation

  • Assume variance of y is 0

In [13]:
np.random.seed(10)
n = 200
z = np.random.poisson(70, n)  # random sample 100 points
x = np.random.sample(n) * 100  # x location
y = np.random.sample(n) * 100  # y location
t = np.ones(n)  # time of measurement
loc = np.concatenate((x[:, np.newaxis], y[:, np.newaxis]), axis=1)
mu = np.mean(z * t)

distance = pairwise_distances(loc, metric='l2', n_jobs=8)

In [131]:
space = 10
h = np.arange(0, 140, space)

d = [0]
gamma = [0]

for k in range(1, len(h)):
    low = h[k] - space / 2.0
    upp = h[k] + space / 2.0
    index1, index2 = np.where((distance >= low) & (distance <= upp))
    if len(index1) == 0:
        continue
    gamma.append(np.mean([(z[i] - z[j]) ** 2 for (i, j) in zip(index1, index2)]) / 2.0)      
    d.append(h[k])

plt.figure()
plt.plot(d, gamma, '.-')
plt.show()



In [133]:
# reference code

from pylab import *
import numpy as np
from pandas import DataFrame, Series
from scipy.spatial.distance import pdist, squareform

def SVh( P, h, bw ):
    '''
    Experimental semivariogram for a single lag
    '''
    pd = squareform( pdist( P[:,:2] ) )
    N = pd.shape[0]
    Z = list()
    for i in range(N):
        for j in range(i+1,N):
            if( pd[i,j] >= h-bw / 2.0) and( pd[i,j] <= h+bw / 2.0):
                Z.append( ( P[i,2] - P[j,2] )**2.0 )
    return np.sum( Z ) / ( 2.0 * len( Z ) )
 
def SV( P, hs, bw ):
    '''
    Experimental variogram for a collection of lags
    '''
    sv = list()
    for h in hs:
        sv.append( SVh( P, h, bw ) )
    sv = [ [ hs[i], sv[i] ] for i in range( len( hs ) ) if sv[i] > 0 ]
    return np.array( sv ).T
 
def C( P, h, bw ):
    '''
    Calculate the sill
    '''
    c0 = np.var( P[:,2] )
    if h == 0:
        return c0
    return c0 - SVh( P, h, bw )

# part of our data set recording porosity
P = np.concatenate((loc, z[:, np.newaxis]), axis=1)
# bandwidth, plus or minus 250 meters
bw = 10
# lags in 500 meter increments from zero to 10,000
hs = np.arange(0,140,bw)
sv = SV( P, hs, bw )
sv[1][0] = 0
plt.figure()
plot( sv[0], sv[1], '.-' )
xlabel('Lag [m]')
ylabel('Semivariance')
title('Sample Semivariogram')
plt.show()


  • Assume the variance of y is not zero

In [2]:
np.random.seed(10)
n = 200
y0 = np.random.normal(70, 5, n)
z = np.random.poisson(y0)
x = np.random.sample(n) * 100  # x location
y = np.random.sample(n) * 100  # y location
t = np.ones(n)  # time of measurement
loc = np.concatenate((x[:, np.newaxis], y[:, np.newaxis]), axis=1)
mu = np.mean(z * t)

distance = pairwise_distances(loc, metric='l2', n_jobs=8)

In [10]:
a = np.reshape(distance, (1, -1))

In [19]:
plt.hist(a[0])
plt.show()



In [18]:
space = 10
h = np.arange(0, 90, space)

d = [0]
gamma = [0]

for k in range(1, len(h)):
    low = h[k] - space / 2.0
    upp = h[k] + space / 2.0
    index1, index2 = np.where((distance >= low) & (distance <= upp))
    if len(index1) == 0:
        continue
    gamma.append(np.mean([(z[i] - z[j]) ** 2 for (i, j) in zip(index1, index2)]) / 2.0 - mu)      
    d.append(h[k])

plt.figure()
plt.plot(d, gamma, '.-')
plt.show()


3. Test with ZoneA data


In [134]:
with open('./data/ZoneA.dat', 'r') as f:
    z = f.readlines()

z = [i.strip().split() for i in z[10:] ]
z = np.array(z, dtype=np.float )
z = pd.DataFrame(z, columns=['x','y','thk','por','perm','lperm','lpermp','lpermr'])

n = len(z)
distance = pairwise_distances(z[['x', 'y']].values, metric='l2', n_jobs=8)

t = np.ones(n)  # time of measurement
loc = z[['x', 'y']].values
z = z['por'].values
mu = np.mean(z * t)

In [144]:
space = 1000
h = np.arange(0, 20000, space)

d = [0]
gamma = [0]

for k in range(1, len(h)):
    low = h[k] - space / 2
    upp = h[k] + space / 2
    index1, index2 = np.where((distance >= low) & (distance < upp))
    if len(index1) == 0:
        continue
    gamma.append(np.mean([(z[i] - z[j]) ** 2 for (i, j) in zip(index1, index2)]) / 2.0)      
    d.append(h[k])
    
plt.figure()
plt.plot(d, gamma, '.-')
plt.show()



In [145]:
def func(x, a, c, d):
    """ self-defined function """
    return c * (1 - np.exp(- (x / a) ** d))

popt, pcov = curve_fit(func, d, gamma, p0=[1, 1, 2])
print(popt)

plt.figure()
plt.plot(d, func(d, *popt), '.-')
plt.plot(d, gamma, 'r.-')
plt.show()


[ 1.          0.82186331  2.        ]
//anaconda/lib/python3.5/site-packages/scipy/optimize/minpack.py:715: OptimizeWarning: Covariance of the parameters could not be estimated
  category=OptimizeWarning)

In [146]:
def func(x, a, c):
    """ self-defined function """
    return c * (1 - np.exp(- 3 * (x / a) ** 2))

popt, pcov = curve_fit(func, d, gamma)
print(popt)

plt.figure()
plt.plot(d, func(d, *popt), '.-')
plt.plot(d, gamma, 'r.-')
plt.show()


[ 1.          0.82186331]
//anaconda/lib/python3.5/site-packages/scipy/optimize/minpack.py:715: OptimizeWarning: Covariance of the parameters could not be estimated
  category=OptimizeWarning)

In [147]:
def func(x, a, c):
    """ self-defined function """
    return c * x ** a

popt, pcov = curve_fit(func, d, gamma)
print(popt)

plt.figure()
plt.plot(d, func(d, *popt), '.-')
plt.plot(d, gamma, 'r.-')
plt.show()


[ 0.33244046  0.04025484]

In [148]:
def func(x, c0, c1, a):
    """ self-defined function """
    return c0 + c1 * (1 - np.exp(- x / a))

popt, pcov = curve_fit(func, d, gamma)
print(popt)

plt.figure()
plt.plot(d, func(np.array(d), *popt), '.-')
plt.plot(d, gamma, 'r.-')
plt.show()


[ -2.18292051e-12   8.21863305e-01   1.00000000e+00]
//anaconda/lib/python3.5/site-packages/scipy/optimize/minpack.py:715: OptimizeWarning: Covariance of the parameters could not be estimated
  category=OptimizeWarning)

In [149]:
space = 1000
h = np.arange(0, 20500, space)

d = [0]
gamma = [0]

for k in range(1, len(h)):
    low = h[k] - space / 2
    upp = h[k] + space / 2
    Z = list()
    for i in range(n):
        for j in range(i + 1, n):
            if( distance[i,j] >= low)and( distance[i,j] <= upp ):
                Z.append(0.5 * (z[i] - z[j])**2.0)
                
    gamma.append(np.sum( Z ) / ( len( Z ) ))
    d.append(h[k])

plt.figure()
plt.plot(d, gamma, '.-')
plt.show()


Reference


In [151]:
from pylab import *
import numpy as np
from pandas import DataFrame, Series
from scipy.spatial.distance import pdist, squareform
 
z = open( './data/ZoneA.dat','r' ).readlines()
z = [ i.strip().split() for i in z[10:] ]
z = np.array( z, dtype=np.float )
z = DataFrame( z, columns=['x','y','thk','por','perm','lperm','lpermp','lpermr'] )

def SVh( P, h, bw ):
    '''
    Experimental semivariogram for a single lag
    '''
    pd = squareform( pdist( P[:,:2] ) )
    N = pd.shape[0]
    Z = list()
    for i in range(N):
        for j in range(i+1,N):
            if( pd[i,j] >= h-bw)and( pd[i,j] <= h+bw):
#                 print(h-bw, h+bw)
                Z.append( ( P[i,2] - P[j,2] )**2.0 )
    return np.sum( Z ) / ( 2.0 * len( Z ) )
 
def SV( P, hs, bw ):
    '''
    Experimental variogram for a collection of lags
    '''
    sv = list()
    for h in hs:
        sv.append( SVh( P, h, bw ) )
    sv = [ [ hs[i], sv[i] ] for i in range( len( hs ) ) if sv[i] > 0 ]
    return np.array( sv ).T
 
def C( P, h, bw ):
    '''
    Calculate the sill
    '''
    c0 = np.var( P[:,2] )
    if h == 0:
        return c0
    return c0 - SVh( P, h, bw )

# part of our data set recording porosity
P = np.array( z[['x','y','por']] )
# bandwidth, plus or minus 250 meters
bw = 500
# lags in 500 meter increments from zero to 10,000
hs = np.arange(0,10500,bw)
sv = SV( P, hs, bw )
plt.figure()
plot( sv[0], sv[1], '.-' )
xlabel('Lag [m]')
ylabel('Semivariance')
title('Sample Semivariogram')
plt.show()



In [154]:
np.var(z['por'].values)


Out[154]:
0.77757733618823532

In [ ]:


In [ ]: