In [1]:
# Load Biospytial modules and etc.
%matplotlib inline
import sys
sys.path.append('/apps')
sys.path.append('..')
sys.path.append('../spystats')
import django
django.setup()
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
## Use the ggplot style
plt.style.use('ggplot')
import tools
Use this to automate the process. Be carefull it can overwrite current results
run ../HEC_runs/fit_fia_logbiomass_logspp_GLS.py /RawDataCSV/idiv_share/plotsClimateData_11092017.csv /apps/external_plugins/spystats/HEC_runs/results/logbiomas_logsppn_res.csv -85 -80 30 35
We will use the FIA dataset and for exemplary purposes we will take a subsample of this data. Also important. The empirical variogram has been calculated for the entire data set using the residuals of an OLS model.
We will use some auxiliary functions defined in the fit_fia_logbiomass_logspp_GLS
.
You can inspect the functions using the ?? symbol.
In [ ]:
In [2]:
from HEC_runs.fit_fia_logbiomass_logspp_GLS import prepareDataFrame,loadVariogramFromData,buildSpatialStructure, calculateGLS, initAnalysis, fitGLSRobust
In [3]:
section = initAnalysis("/RawDataCSV/idiv_share/FIA_Plots_Biomass_11092017.csv",
"/apps/external_plugins/spystats/HEC_runs/results/variogram/data_envelope.csv",
-130,-60,30,40)
In [4]:
#section = initAnalysis("/RawDataCSV/idiv_share/plotsClimateData_11092017.csv",
# "/apps/external_plugins/spystats/HEC_runs/results/variogram/data_envelope.csv",
# -85,-80,30,35)
# IN HEC
#section = initAnalysis("/home/hpc/28/escamill/csv_data/idiv/FIA_Plots_Biomass_11092017.csv","/home/hpc/28/escamill/spystats/HEC_runs/results/variogram/data_envelope.csv",-85,-80,30,35)
In [5]:
section.shape
Out[5]:
Now we will obtain the data from the calculated empirical variogram.
In [6]:
gvg,tt = loadVariogramFromData("/apps/external_plugins/spystats/HEC_runs/results/variogram/data_envelope.csv",section)
In [7]:
gvg.plot(refresh=False,with_envelope=True)
In [8]:
resum,gvgn,resultspd,results = fitGLSRobust(section,gvg,num_iterations=1,distance_threshold=1000000)
In [9]:
resum.as_text
Out[9]:
restricted w/ all data spatial correlation parameters Log-Likelihood: -16607 AIC: 3.322e+04
restricted w/ restricted spatial correlation parameters Log-Likelihood: -16502. AIC: 3.301e+04
In [ ]:
plt.plot(resultspd.rsq)
plt.title("GLS feedback algorithm")
plt.xlabel("Number of iterations")
plt.ylabel("R-sq fitness estimator")
In [ ]:
resultspd.columns
In [ ]:
a = map(lambda x : x.to_dict(), resultspd['params'])
In [ ]:
paramsd = pd.DataFrame(a)
In [ ]:
paramsd
In [ ]:
plt.plot(paramsd.Intercept.loc[1:])
plt.get_yaxis().get_major_formatter().set_useOffset(False)
In [ ]:
fig = plt.figure(figsize=(10,10))
plt.plot(paramsd.logSppN.iloc[1:])
In [ ]:
variogram_data_path = "/apps/external_plugins/spystats/HEC_runs/results/variogram/data_envelope.csv"
thrs_dist = 100000
emp_var_log_log = pd.read_csv(variogram_data_path)
In [ ]:
gvg = tools.Variogram(section,'logBiomass',using_distance_threshold=thrs_dist)
gvg.envelope = emp_var_log_log
gvg.empirical = emp_var_log_log.variogram
gvg.lags = emp_var_log_log.lags
#emp_var_log_log = emp_var_log_log.dropna()
#vdata = gvg.envelope.dropna()
In [ ]:
matern_model = tools.MaternVariogram(sill=0.34,range_a=100000,nugget=0.33,kappa=4)
whittle_model = tools.WhittleVariogram(sill=0.34,range_a=100000,nugget=0.0,alpha=3)
exp_model = tools.ExponentialVariogram(sill=0.34,range_a=100000,nugget=0.33)
gaussian_model = tools.GaussianVariogram(sill=0.34,range_a=100000,nugget=0.33)
spherical_model = tools.SphericalVariogram(sill=0.34,range_a=100000,nugget=0.33)
In [ ]:
gvg.model = whittle_model
#gvg.model = matern_model
#models = map(lambda model : gvg.fitVariogramModel(model),[matern_model,whittle_model,exp_model,gaussian_model,spherical_model])
gvg.fitVariogramModel(whittle_model)
In [ ]:
import numpy as np
xx = np.linspace(0,1000000,1000)
gvg.plot(refresh=False,with_envelope=True)
plt.plot(xx,whittle_model.f(xx),lw=2.0,c='k')
plt.title("Empirical Variogram with fitted Whittle Model")
In [ ]:
def randomSelection(n,p):
idxs = np.random.choice(n,p,replace=False)
random_sample = new_data.iloc[idxs]
return random_sample
#################
n = len(new_data)
p = 3000 # The amount of samples taken (let's do it without replacement)
In [ ]:
random_sample = randomSelection(n,100)