In [51]:
import gamv
import pygslib
import pandas as pd
import numpy as np
In [52]:
data = pygslib.gslib.read_gslib_file('../pygslib/data/cluster.dat')
In [53]:
data['Zlocation']= 0.0
data['Domain'] = 0
data.head()
Out[53]:
In [54]:
#-----------------------------------------------------------------------------------------------------------------
#
# Variograms GAMV low level functions
#
#-----------------------------------------------------------------------------------------------------------------
def gamv(parameters):
"""Calculate experimental variograms on sparse data
This function is similar to the GSLIB gamv function but with some minor differences:
- the indicator automatic transformation is not applied; you may define
indicator variables externally.
- a bhid variable is always required, to avoid downhole variogram use
``bhid=None`` or ``bhid = 0``.
- Z variable is required, you can use constant coordinate to reduce
dimensions, for example use Z=None in the parameter file or
Z = 0 in the file.
- The variogram cloud was implemented but it only works for the first
variogram and first direction. It only works for some variogram types.
Parameters
----------
parameters : dict
dictionary with calculation parameters
The dictionary with parameters may be as follows::
parameters = {
'data' : data, # Pandas dataframe
'columns': ['X','Y','Z','BHID'], # X,Y,Z, and BHID column names
'vr' : ['v1','v2'], # Variables names
'tmin' : -1.0e21, # trimming limits, float, if None or basent = -1.0e21
'tmax' : 1.0e21, # trimming limits, float, if None or basent = 1.0e21
'nlag' : 10, # number of lags, int
'xlag' : 4, # lag separation distance, float
'xltol' : 2, # lag tolerance, float
'azm' : [0,0,90], # azimuth, array('f') with bounds (ndir)
'atol' : [90,22.5,22.5], # azimuth tolerance, array('f') with bounds (ndir)
'bandwh' : [50,10,10], # bandwidth 'horizontal', array('f') with bounds (ndir)
'dip' : [0,0,0], # dip, array('f') with bounds (ndir)
'dtol' : [10,10,10], # dip tolerance, array('f') with bounds (ndir)
'bandwd' : [10,10,10], # bandwidth 'vertical', array('f') with bounds (ndir)
'isill' : 0, # standardize sills? (0=no, 1=yes), int
'sills' : [100, 200], # variance used to std the sills, array('f') with bounds (nv)
'ivtail' : 1,# tail var., array('i') with bounds (nvarg), nvarg is number of variograms
'ivhead' : 1,# head var., array('i') with bounds (nvarg)
'ivtype' : 1,# variogram type, array('i') with bounds (nvarg)
'maxclp' : 50000} # maximum number of variogram point cloud to use, input int
Warnings
--------
bhid must be an array of integers
Returns
-------
pdis : Distance of pairs falling into this lag
pgam : Semivariogram, covariance, correlogram,... value
phm : Mean of the tail data
ptm : Mean of the head data
phv : Variance of the tail data
ptv : Variance of the head data
pnump: Number of pairs
cldi : data index of the head
cldj : data index of the tail
cldg : Semivariogram, covariance, ... value
cldh : Distance of each pair
Note
-----
The output variables with prefix *cld* are for variogram cloud and
with prefix *p* are for directional variograms
The variables with prefix *p* are similar to the output generated
by the GSLIB standalone program **gamv**.
The variogram cloud only works for the first variogram/direction
and only if the variogram of type 1, 2, 6, 7 and 8.
The variogram types are:
- traditional semivariogram (1)
- traditional cross semivariogram (2)
- pairwise relative semivariogram (6)
- semivariogram of logarithms (7)
- semimadogram(8)
"""
# we only use variograms from 1 to 8, gslib uses to 10
assert (parameters['ivtype']>=0 and parameters['ivtype']<=8)
newpar = {}
newpar['x'] = parameters['data'][parameters['columns'][0]].values
newpar['y'] = parameters['data'][parameters['columns'][1]].values
newpar['z'] = parameters['data'][parameters['columns'][2]].values
newpar['bhid'] = parameters['data'][parameters['columns'][3]].values
newpar['vr'] = parameters['data'][parameters['vr']].values
if 'tmin' in parameters:
if parameters['tmin'] is not None:
newpar['tmin'] = parameters['tmin']
else:
newpar['tmin'] = -1.0e21
else:
newpar['tmin'] = -1.0e21
if 'tmax' in parameters:
if parameters['tmax'] is not None:
newpar['tmax'] = parameters['tmax']
else:
newpar['tmax'] = 1.0e21
else:
newpar['tmax'] = 1.0e21
if 'sills' in parameters:
if parameters['sills'] is not None:
newpar['sills'] = parameters['sills']
else:
newpar['sills'] = []
for ivr in parameters['vr']:
newpar['sills'].append(parameters['data'][ivr].var())
else:
newpar['sills'] = []
for ivr in parameters['vr']:
newpar['sills'].append(parameters['data'][ivr].var())
newpar['nlag'] = parameters['nlag']
newpar['xlag'] = parameters['xlag']
newpar['xltol'] = parameters['xltol']
newpar['azm'] = parameters['azm']
newpar['atol'] = parameters['atol']
newpar['bandwh'] = parameters['bandwh']
newpar['dip'] = parameters['dip']
newpar['dtol'] = parameters['dtol']
newpar['bandwd'] = parameters['bandwd']
newpar['isill'] = [parameters['isill']]
newpar['ivtail'] = [parameters['ivtail']]
newpar['ivhead'] = [parameters['ivhead']]
newpar['ivtype'] = [parameters['ivtype']]
newpar['maxclp'] = parameters['maxclp']
npt,dis, gam, hm, tm, hv, tv, cldi, cldj, cldg, cldh, l = pygslib.gslib.__variograms.gamv(**newpar)
if l==parameters['maxclp']:
warnings.warn( 'Warning: l == maxclp; maximum number ' + \
'of point clouds reached, increase maxclp' + \
'or modify variogram parameters')
#remove crap data from variogram cloud
cldi=cldi[:l]
cldj=cldj[:l]
cldg=cldg[:l]
cldh=cldh[:l]
# get output like in gslib
ndir = len(parameters['azm'])
nlag = parameters['nlag']
pdis,pgam, phm,ptm,phv,ptv,pnump = pygslib.gslib.__variograms.writeout(1,ndir,nlag,npt,dis,gam,hm,tm,hv,tv)
# each is an array of (nvarg, ndir, nlag+2)
lag = []
azm = []
dip = []
vtail= []
vhead= []
vtype= []
dirID = []
for idr in range(ndir):
lag.append(0)
lag.append(0.5)
azm.append(parameters['azm'][idr])
dip.append(parameters['dip'][idr])
azm.append(parameters['azm'][idr])
dip.append(parameters['dip'][idr])
vtail.append(parameters['vr'][parameters['ivtail']-1])
vhead.append(parameters['vr'][parameters['ivhead']-1])
vtype.append(parameters['ivtype'])
vtail.append(parameters['vr'][parameters['ivtail']-1])
vhead.append(parameters['vr'][parameters['ivhead']-1])
vtype.append(parameters['ivtype'])
dirID.append(idr)
dirID.append(idr)
for ilg in range(2,nlag+2):
lag.append(ilg-1)
azm.append(parameters['azm'][idr])
dip.append(parameters['dip'][idr])
vtail.append(parameters['vr'][parameters['ivtail']-1])
vhead.append(parameters['vr'][parameters['ivhead']-1])
vtype.append(parameters['ivtype'])
dirID.append(idr)
vtab = pd.DataFrame(
{
'Dip dir': azm,
'Dip': dip,
'Head' : vhead,
'Tail' : vtail,
'Lag distance': pdis.ravel()*0.0,
'X': pdis.ravel()*0.0,
'Y': pdis.ravel()*0.0,
'Z': pdis.ravel()*0.0,
'Average distance': pdis.ravel(),
'Spatial function value': pgam.ravel(),
'Spatial function type' : vtype,
'Mean head': phm.ravel(),
'Mean tail': ptm.ravel(),
'Variance head': phv.ravel(),
'Variance tail': ptv.ravel(),
'Number of pairs': pnump.ravel(),
'Dir ID': dirID,
'Lag': lag,
}
)
DEG2RAD=3.14159265/180.0
vtab['Lag distance'] = vtab['Lag']*parameters['xlag']
vtab['X'] = np.sin(DEG2RAD*vtab['Dip dir'])*np.cos(DEG2RAD*vtab['Dip'])*vtab['Lag distance']
vtab['Y'] = np.cos(DEG2RAD*vtab['Dip dir'])*np.cos(DEG2RAD*vtab['Dip'])*vtab['Lag distance']
vtab['Z'] = np.sin(DEG2RAD*vtab['Dip'])*vtab['Lag distance']
vtab.loc[vtab['Lag distance']==0, 'Spatial function value']=np.nan
vtab.loc[vtab['Number of pairs']==0, 'Spatial function value']=np.nan
cloud = pd.DataFrame(
{
'Index head': cldi,
'Index tail': cldj,
'Spatial function value': cldg,
'Distance': cldh
}
)
return vtab, cloud
In [55]:
htol = 10
dtol = 10
wh = 100000
wd = 100000
dips = np.arange(-85,90,dtol)
azms = np.arange(-5,365,htol)
ndip = dips.shape[0]
nazm = azms.shape[0]
azm = np.ones(nazm*ndip)
dip = np.ones(nazm*ndip)
di = -1
for i in range(ndip):
for j in range(nazm):
di = di+1
azm[di]=azms[j]
dip[di]=dips[i]
atol = np.ones(nazm*ndip)*htol
dtol = np.ones(nazm*ndip)*dtol
bandwh = np.ones(nazm*ndip)*wh
bandwd = np.ones(nazm*ndip)*wd
parameters = {
'data' : data, # Pandas dataframe
'columns': ['Xlocation','Ylocation','Zlocation','Domain'], # X,Y,Z, and BHID column names
'vr' : ['Primary'], # Variables names
'nlag' : 10, # number of lags, int
'xlag' : 4, # lag separation distance, float
'xltol' : 2, # lag tolerance, float
'azm' : azm, # azimuth, array('f') with bounds (ndir)
'atol' : atol, # azimuth tolerance, array('f') with bounds (ndir)
'bandwh' : bandwh, # bandwidth 'horizontal', array('f') with bounds (ndir)
'dip' : dip, # dip, array('f') with bounds (ndir)
'dtol' : dtol, # dip tolerance, array('f') with bounds (ndir)
'bandwd' : bandwd, # bandwidth 'vertical', array('f') with bounds (ndir)
'isill' : 1, # standardize sills? (0=no, 1=yes), int
'ivtail' : 1,# tail var., array('i') with bounds (nvarg), nvarg is number of variograms
'ivhead' : 1,# head var., array('i') with bounds (nvarg)
'ivtype' : 4,# variogram type, array('i') with bounds (nvarg)
'maxclp' : 50000} # maximum number of variogram point cloud to use, input int
In [56]:
vtab, cloud= gamv(parameters)
In [57]:
vtab.tail()
Out[57]:
In [58]:
vtab.to_csv('vtab.csv', index=False)
In [59]:
# variance and varianze from head/tail is not equal but close
print (data['Primary'].var())
vtab.loc[vtab['Lag distance']==0,'Variance head']
Out[59]:
In [60]:
import altair as alt
alt.renderers.enable('default')
vtab['tmp'] = '-->'
vtab['tmp2'] = '|'
vtab['test'] = vtab['Dip dir'].astype(str) + vtab['tmp'] + vtab['Dip'].astype(str) + \
vtab['tmp2'] + vtab['Dir ID'].astype(str) + \
vtab['tmp2'] + vtab['Spatial function type'].astype(str)
alt.Chart(vtab).mark_line().encode(
x='Average distance',
y='Spatial function value',
color='test',
tooltip = 'Number of pairs'
) + \
alt.Chart(pd.DataFrame({'x':[0,45],'y':[0.3,.3]})).mark_line().encode(
x='x',
y='y'
)
Out[60]:
In [61]:
# get order as in a vtk structured grid
vtab['ijk']=0
vtab.loc[vtab.sort_values(by = ['Dip','Lag','Dip dir']).index,'ijk']= np.arange(vtab.shape[0])
alt.Chart(vtab.reset_index()).mark_point().encode(
x='X',
y='Y',
color='Spatial function value',
tooltip='ijk:O'
)
Out[61]:
In [62]:
import vtk
from vtk.numpy_interface import dataset_adapter as vtkdsa
points = vtk.vtkPoints()
dataval = []
stGrid = vtk.vtkStructuredGrid()
properties = ['Spatial function value', 'Number of pairs']
for i in range(len(properties)):
dataval.append(vtk.vtkDoubleArray())
dataval[i].SetName(properties[i])
for i in range(vtab.shape[0]):
x,y,z = vtab.loc[vtab['ijk']==i,['X','Y','Z']].values[0]
points.InsertNextPoint(x, y, z)
values = vtab.loc[vtab['ijk']==i, properties].values[0]
for i in range(len(properties)):
dataval[i].InsertNextTuple([values[i]])
stGrid.SetDimensions(nazm,parameters['nlag']+2,ndip)
stGrid.SetPoints(points)
for i in range(len(properties)):
stGrid.GetPointData().AddArray(dataval[i])
writer = vtk.vtkXMLStructuredGridWriter()
writer.SetFileName('grid.vts')
writer.SetDataModeToBinary()
writer.SetInputData(stGrid)
writer.Write()
Out[62]:
In [63]:
vtab.shape[0]
Out[63]:
In [64]:
a.sort()
In [ ]:
a.flags
In [ ]:
a
In [ ]: