In [1]:
# Load Biospytial modules and etc.
%matplotlib inline
import sys
sys.path.append('/apps')
import django
django.setup()
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
## Use the ggplot style
plt.style.use('ggplot')
sys.path.append('..')
import tools
import geopandas as gpd
from HEC_runs.fit_fia_logbiomass_logspp_GLS import prepareDataFrame, createVariogram, buildSpatialStructure,calculateGLS, bundleToGLS, fitLinearLogLogModel

new_data['residuals1'] = results.resid


In [2]:
new_data = prepareDataFrame("/RawDataCSV/idiv_share/plotsClimateData_11092017.csv")
## En Hec
#new_data = prepareDataFrame("/home/hpc/28/escamill/csv_data/idiv/plotsClimateData_11092017.csv")


/opt/conda/envs/biospytial/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2821: DtypeWarning: Columns (24) have mixed types. Specify dtype option on import or set low_memory=False.
  if self.run_code(code, result):
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Reprojecting to Alberts equal area
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Removing possible duplicates. 
 This avoids problems of Non Positive semidefinite

Subseting the data

Three different methods for subsetting the data.

  1. Using a systematic selection by index modulus
  2. Using a random uniform selection by indices.
  3. A geographic subselection (Clip)

Systematic selection


In [3]:
def systSelection(dataframe,k):
    n = len(dataframe)
    idxs = range(0,n,k)
    systematic_sample = dataframe.iloc[idxs]
    return systematic_sample
##################
k = 10 # The k-th element to take as a sample

In [4]:
systematic_sample = systSelection(new_data,k)

In [5]:
ax= systematic_sample.plot(column='logBiomass',figsize=(16,10),cmap=plt.cm.Blues,edgecolors='')


Random (Uniform) selection


In [234]:
def randomSelection(dataframe,p):
    n = len(dataframe)
    idxs = np.random.choice(n,p,replace=False)
    random_sample = dataframe.iloc[idxs]
    return random_sample
#################
n = len(new_data)
p = 3000 # The amount of samples taken (let's do it without replacement)

In [7]:
random_sample = randomSelection(n,p)

In [8]:
ax= random_sample.plot(column='logBiomass',figsize=(16,10),cmap=plt.cm.Blues,edgecolors='')


Geographic subselection


In [9]:
def subselectDataFrameByCoordinates(dataframe,namecolumnx,namecolumny,minx,maxx,miny,maxy):
    """
    Returns a subselection by coordinates using the dataframe/
    """
    minx = float(minx)
    maxx = float(maxx)
    miny = float(miny)
    maxy = float(maxy)
    section = dataframe[lambda x:  (x[namecolumnx] > minx) & (x[namecolumnx] < maxx) & (x[namecolumny] > miny) & (x[namecolumny] < maxy) ]
    return section

In [10]:
# COnsider the the following subregion
minx = -100
maxx = -85
miny = 30
maxy = 35

section = subselectDataFrameByCoordinates(new_data,'LON','LAT',minx,maxx,miny,maxy)

#section = new_data[lambda x:  (x.LON > minx) & (x.LON < maxx) & (x.LAT > miny) & (x.LAT < maxy) ]
section.plot(column='logBiomass')


Out[10]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fccdf058290>

Model Analysis with the empirical variogram


In [11]:
gvg,tt = createVariogram("/apps/external_plugins/spystats/HEC_runs/results/logbiomas_logsppn_res.csv",new_data)
#For HEC
#gvg,tt = createVariogram("/home/hpc/28/escamill/spystats/HEC_runs/results/logbiomas_logsppn_res.csv",new_data)


INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Reading the empirical Variogram file
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Instantiating a Variogram object with the values calculated before
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Dropping possible Nans
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Instantiating Matern Model...
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:fitting Whittle Model with the empirical variogram
../tools.py:549: RuntimeWarning: divide by zero encountered in power
  g_h = ((sill - nugget)*(1 - np.exp(-(h**alpha / range_a**alpha)))) + nugget*Ih
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Model fitted

In [26]:
import numpy as np
xx = np.linspace(0,1000000,1000)

gvg.plot(refresh=False)
plt.plot(xx,gvg.model.f(xx),lw=2.0,c='k')
plt.title("Empirical Variogram with fitted Whittle Model")


Out[26]:
<matplotlib.text.Text at 0x7fccdf238890>

In [24]:
gvg.model


Out[24]:
< Whittle Variogram : sill 0.340274656891, range 41061.6971399, nugget 0.329817414704, alpha1.12113685018 >

In [13]:
%time n_obs,rsq,params,pvals,conf_int = bundleToGLS(systematic_sample,gvg.model)


INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
CPU times: user 2min 38s, sys: 1.31 s, total: 2min 39s
Wall time: 1min 48s

In [235]:
samples = map(lambda i : systSelection(new_data,i), range(20,2,-1))
samples = map(lambda i : randomSelection(new_data,3000),range(100))

In [236]:
plt.plot(map(lambda s : s.shape[0],samples))


Out[236]:
[<matplotlib.lines.Line2D at 0x7fccd801c750>]

Analysis and Results for the systematic sample


In [103]:
### read csv files
conf_ints = pd.read_csv("/outputs/gls_confidence_int.csv")
params = pd.read_csv("/outputs/params_gls.csv")
params2 = pd.read_csv("/outputs/params2_gls.csv")

pvals = pd.read_csv("/outputs/pvalues_gls.csv")
pnobs = pd.read_csv("/outputs/n_obs.csv")
prsqs = pd.read_csv("/outputs/rsqs.csv")

In [57]:
params


Out[57]:
Unnamed: 0 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
0 Intercept 8.458627 8.501959 8.444683 8.504319 8.455233 8.487719 8.514224 8.489710 8.430563 8.492380 8.511993 8.459749 8.439471 8.496132 8.430853 8.464045 8.477898 8.447797
1 logSppN 0.388850 0.374571 0.405324 0.363968 0.394236 0.385002 0.369316 0.380915 0.419709 0.376678 0.361736 0.395713 0.407464 0.379930 0.415325 0.392312 0.388304 0.404760

In [58]:
conf_ints


Out[58]:
Unnamed: 0 0 1 0.1 1.1 0.2 1.2 0.3 1.3 0.4 ... 0.13 1.13 0.14 1.14 0.15 1.15 0.16 1.16 0.17 1.17
0 Intercept 8.382200 8.535054 8.424685 8.579233 8.371694 8.517673 8.430691 8.577947 8.386613 ... 8.449598 8.542666 8.387293 8.474413 8.424108 8.503982 8.442086 8.513711 8.416168 8.479427
1 logSppN 0.341435 0.436265 0.326552 0.422590 0.359728 0.450920 0.318351 0.409585 0.351409 ... 0.351194 0.408666 0.388426 0.442224 0.367768 0.416855 0.366302 0.410307 0.385440 0.424081

2 rows × 37 columns


In [59]:
pvals


Out[59]:
Unnamed: 0 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
0 Intercept 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.0
1 logSppN 1.439457e-54 5.777970e-50 1.444574e-63 2.304296e-52 3.139409e-68 2.182079e-69 2.336946e-70 5.199794e-82 3.306206e-99 3.711292e-88 1.160717e-91 8.572717e-116 2.894346e-137 1.655089e-139 1.054292e-187 1.977681e-202 1.153619e-246 0.0

In [67]:
plt.plot(pnobs.n_obs,prsqs.rsq)
plt.title("$R^2$ statistic for GLS on logBiomass ~ logSppn using Sp.autocor")
plt.xlabel("Number of observations")


Out[67]:
<matplotlib.text.Text at 0x7fccd90985d0>

In [120]:
tt = params.transpose()

In [121]:
tt.columns = tt.iloc[0]

In [123]:
tt = tt.drop(tt.index[0])

In [130]:
plt.plot(pnobs.n_obs,tt.Intercept)
plt.title("Intercept parameter")


Out[130]:
<matplotlib.text.Text at 0x7fccd81a20d0>

In [131]:
plt.plot(pnobs.n_obs,tt.logSppN)
plt.title("logSppn parameter")


Out[131]:
<matplotlib.text.Text at 0x7fccd8101dd0>

Test for analysis


In [237]:
ccs = map(lambda s : bundleToGLS(s,gvg.model),samples)


INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix
INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS

In [207]:
#bundleToGLS(samples[22],gvg.model)
covMat = buildSpatialStructure(samples[8],gvg.model)


INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix

In [208]:
#np.linalg.pinv(covMat)
calculateGLS(samples[8],covMat)
#tt =  covMat.flatten()


INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS

LinAlgErrorTraceback (most recent call last)
<ipython-input-208-b2bd56754290> in <module>()
      1 #np.linalg.pinv(covMat)
----> 2 calculateGLS(samples[8],covMat)
      3 #tt =  covMat.flatten()

/apps/external_plugins/spystats/HEC_runs/fit_fia_logbiomass_logspp_GLS.py in calculateGLS(geodataframe, CovMat)
    135     Stupid wrapper function for calculating spatial covariance matrix
    136     """
--> 137     secvg = tools.Variogram(geodataframe,'logBiomass',model=theoretical_model)
    138     logger.info("Calculating Distance Matrix")
    139     CovMat = secvg.calculateCovarianceMatrix()

/opt/conda/envs/biospytial/lib/python2.7/site-packages/statsmodels/base/model.pyc in from_formula(cls, formula, data, subset, *args, **kwargs)
    148         kwargs.update({'missing_idx': missing_idx,
    149                        'missing': missing})
--> 150         mod = cls(endog, exog, *args, **kwargs)
    151         mod.formula = formula
    152 

/opt/conda/envs/biospytial/lib/python2.7/site-packages/statsmodels/regression/linear_model.pyc in __init__(self, endog, exog, sigma, missing, hasconst, **kwargs)
    438     #TODO: add options igls, for iterative fgls if sigma is None
    439     #TODO: default if sigma is none should be two-step GLS
--> 440         sigma, cholsigmainv = _get_sigma(sigma, len(endog))
    441 
    442         super(GLS, self).__init__(endog, exog, missing=missing,

/opt/conda/envs/biospytial/lib/python2.7/site-packages/statsmodels/regression/linear_model.pyc in _get_sigma(sigma, nobs)
     77             raise ValueError("Sigma must be a scalar, 1d of length %s or a 2d "
     78                              "array of shape %s x %s" % (nobs, nobs, nobs))
---> 79         cholsigmainv = np.linalg.cholesky(np.linalg.pinv(sigma)).T
     80 
     81     return sigma, cholsigmainv

/opt/conda/envs/biospytial/lib/python2.7/site-packages/numpy/linalg/linalg.pyc in pinv(a, rcond)
   1615     _assertNoEmpty2d(a)
   1616     a = a.conjugate()
-> 1617     u, s, vt = svd(a, 0)
   1618     m = u.shape[0]
   1619     n = vt.shape[1]

/opt/conda/envs/biospytial/lib/python2.7/site-packages/numpy/linalg/linalg.pyc in svd(a, full_matrices, compute_uv)
   1357 
   1358         signature = 'D->DdD' if isComplexType(t) else 'd->ddd'
-> 1359         u, s, vt = gufunc(a, signature=signature, extobj=extobj)
   1360         u = u.astype(result_t, copy=False)
   1361         s = s.astype(_realType(result_t), copy=False)

/opt/conda/envs/biospytial/lib/python2.7/site-packages/numpy/linalg/linalg.pyc in _raise_linalgerror_svd_nonconvergence(err, flag)
     97 
     98 def _raise_linalgerror_svd_nonconvergence(err, flag):
---> 99     raise LinAlgError("SVD did not converge")
    100 
    101 def get_linalg_error_extobj(callback):

LinAlgError: SVD did not converge

In [225]:
secvg = tools.Variogram(samples[8],'logBiomass',model=gvg.model)

In [226]:
DM = secvg.distance_coordinates

In [227]:
dm =  DM.flatten()

In [228]:
dm.sort()

In [229]:
pdm = pd.DataFrame(dm)

In [230]:
xxx = pdm.loc[pdm[0] > 0].sort()


/opt/conda/envs/biospytial/lib/python2.7/site-packages/ipykernel/__main__.py:1: FutureWarning: sort(....) is deprecated, use sort_index(.....)
  if __name__ == '__main__':

In [231]:
xxx.shape


Out[231]:
(8996772, 1)

In [232]:
8996780 + 3000 - (3000 * 3000)


Out[232]:
-220

In [190]:
pdm.shape


Out[190]:
(9000000, 1)

In [194]:
dd = samples[22].drop_duplicates(subset=['newLon','newLat'])

In [198]:
secvg2 = tools.Variogram(dd,'logBiomass',model=gvg.model)

In [199]:
covMat = buildSpatialStructure(dd,gvg.model)


INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Calculating Distance Matrix

In [200]:
calculateGLS(dd,covMat)


INFO:HEC_runs.fit_fia_logbiomass_logspp_GLS:Fitting linear model using GLS
Out[200]:
(<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x7fccd8132510>,
 <class 'statsmodels.iolib.summary.Summary'>
 """
                             GLS Regression Results                            
 ==============================================================================
 Dep. Variable:             logBiomass   R-squared:                       0.544
 Model:                            GLS   Adj. R-squared:                  0.543
 Method:                 Least Squares   F-statistic:                     3440.
 Date:                Thu, 18 Jan 2018   Prob (F-statistic):               0.00
 Time:                        19:57:44   Log-Likelihood:                -2917.3
 No. Observations:                2891   AIC:                             5839.
 Df Residuals:                    2889   BIC:                             5850.
 Df Model:                           1                                         
 Covariance Type:            nonrobust                                         
 ==============================================================================
                  coef    std err          t      P>|t|      [95.0% Conf. Int.]
 ------------------------------------------------------------------------------
 Intercept      8.4682      0.032    263.667      0.000         8.405     8.531
 logSppN        0.3972      0.020     19.837      0.000         0.358     0.436
 ==============================================================================
 Omnibus:                       83.032   Durbin-Watson:                   1.996
 Prob(Omnibus):                  0.000   Jarque-Bera (JB):              105.565
 Skew:                          -0.338   Prob(JB):                     1.19e-23
 Kurtosis:                       3.647   Cond. No.                         5.07
 ==============================================================================
 
 Warnings:
 [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
 """)

In [ ]:


In [ ]:


In [197]:
samples[22].shape


Out[197]:
(3000, 46)

In [181]:
gvg.model.corr_f(xxx.values())



TypeErrorTraceback (most recent call last)
<ipython-input-181-7653dae2ed47> in <module>()
----> 1 gvg.model.corr_f(xxx.values())

TypeError: 'numpy.ndarray' object is not callable

In [166]:
kk


Out[166]:
array([ 0.32981741,  0.32981741,  0.32981741, ...,  0.48093745,
        0.4972302 ,  0.51122074])

In [139]:
gvg.model.corr_f([100])


Out[139]:
array([ 0.99882585])

In [140]:
gvg.model.corr_f([10])


Out[140]:
array([ 0.99991112])

In [ ]: