In [1]:
# Load Biospytial modules and etc.
%matplotlib inline
import sys
sys.path.append('/apps')
import django
django.setup()
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
## Use the ggplot style
plt.style.use('ggplot')
sys.path.append('..')
import tools
import geopandas as gpd
from HEC_runs.fit_fia_logbiomass_logspp_GLS import prepareDataFrame, createVariogram, buildSpatialStructure,calculateGLS, bundleToGLS, fitLinearLogLogModel
new_data['residuals1'] = results.resid
In [2]:
new_data = prepareDataFrame("/RawDataCSV/idiv_share/plotsClimateData_11092017.csv")
## En Hec
#new_data = prepareDataFrame("/home/hpc/28/escamill/csv_data/idiv/plotsClimateData_11092017.csv")
In [3]:
def systSelection(dataframe,k):
n = len(dataframe)
idxs = range(0,n,k)
systematic_sample = dataframe.iloc[idxs]
return systematic_sample
##################
k = 10 # The k-th element to take as a sample
In [4]:
systematic_sample = systSelection(new_data,k)
In [5]:
ax= systematic_sample.plot(column='logBiomass',figsize=(16,10),cmap=plt.cm.Blues,edgecolors='')
In [234]:
def randomSelection(dataframe,p):
n = len(dataframe)
idxs = np.random.choice(n,p,replace=False)
random_sample = dataframe.iloc[idxs]
return random_sample
#################
n = len(new_data)
p = 3000 # The amount of samples taken (let's do it without replacement)
In [7]:
random_sample = randomSelection(n,p)
In [8]:
ax= random_sample.plot(column='logBiomass',figsize=(16,10),cmap=plt.cm.Blues,edgecolors='')
In [9]:
def subselectDataFrameByCoordinates(dataframe,namecolumnx,namecolumny,minx,maxx,miny,maxy):
"""
Returns a subselection by coordinates using the dataframe/
"""
minx = float(minx)
maxx = float(maxx)
miny = float(miny)
maxy = float(maxy)
section = dataframe[lambda x: (x[namecolumnx] > minx) & (x[namecolumnx] < maxx) & (x[namecolumny] > miny) & (x[namecolumny] < maxy) ]
return section
In [10]:
# COnsider the the following subregion
minx = -100
maxx = -85
miny = 30
maxy = 35
section = subselectDataFrameByCoordinates(new_data,'LON','LAT',minx,maxx,miny,maxy)
#section = new_data[lambda x: (x.LON > minx) & (x.LON < maxx) & (x.LAT > miny) & (x.LAT < maxy) ]
section.plot(column='logBiomass')
Out[10]:
In [11]:
gvg,tt = createVariogram("/apps/external_plugins/spystats/HEC_runs/results/logbiomas_logsppn_res.csv",new_data)
#For HEC
#gvg,tt = createVariogram("/home/hpc/28/escamill/spystats/HEC_runs/results/logbiomas_logsppn_res.csv",new_data)
In [26]:
import numpy as np
xx = np.linspace(0,1000000,1000)
gvg.plot(refresh=False)
plt.plot(xx,gvg.model.f(xx),lw=2.0,c='k')
plt.title("Empirical Variogram with fitted Whittle Model")
Out[26]:
In [24]:
gvg.model
Out[24]:
In [13]:
%time n_obs,rsq,params,pvals,conf_int = bundleToGLS(systematic_sample,gvg.model)
In [235]:
samples = map(lambda i : systSelection(new_data,i), range(20,2,-1))
samples = map(lambda i : randomSelection(new_data,3000),range(100))
In [236]:
plt.plot(map(lambda s : s.shape[0],samples))
Out[236]:
In [103]:
### read csv files
conf_ints = pd.read_csv("/outputs/gls_confidence_int.csv")
params = pd.read_csv("/outputs/params_gls.csv")
params2 = pd.read_csv("/outputs/params2_gls.csv")
pvals = pd.read_csv("/outputs/pvalues_gls.csv")
pnobs = pd.read_csv("/outputs/n_obs.csv")
prsqs = pd.read_csv("/outputs/rsqs.csv")
In [57]:
params
Out[57]:
In [58]:
conf_ints
Out[58]:
In [59]:
pvals
Out[59]:
In [67]:
plt.plot(pnobs.n_obs,prsqs.rsq)
plt.title("$R^2$ statistic for GLS on logBiomass ~ logSppn using Sp.autocor")
plt.xlabel("Number of observations")
Out[67]:
In [120]:
tt = params.transpose()
In [121]:
tt.columns = tt.iloc[0]
In [123]:
tt = tt.drop(tt.index[0])
In [130]:
plt.plot(pnobs.n_obs,tt.Intercept)
plt.title("Intercept parameter")
Out[130]:
In [131]:
plt.plot(pnobs.n_obs,tt.logSppN)
plt.title("logSppn parameter")
Out[131]:
In [237]:
ccs = map(lambda s : bundleToGLS(s,gvg.model),samples)
In [207]:
#bundleToGLS(samples[22],gvg.model)
covMat = buildSpatialStructure(samples[8],gvg.model)
In [208]:
#np.linalg.pinv(covMat)
calculateGLS(samples[8],covMat)
#tt = covMat.flatten()
In [225]:
secvg = tools.Variogram(samples[8],'logBiomass',model=gvg.model)
In [226]:
DM = secvg.distance_coordinates
In [227]:
dm = DM.flatten()
In [228]:
dm.sort()
In [229]:
pdm = pd.DataFrame(dm)
In [230]:
xxx = pdm.loc[pdm[0] > 0].sort()
In [231]:
xxx.shape
Out[231]:
In [232]:
8996780 + 3000 - (3000 * 3000)
Out[232]:
In [190]:
pdm.shape
Out[190]:
In [194]:
dd = samples[22].drop_duplicates(subset=['newLon','newLat'])
In [198]:
secvg2 = tools.Variogram(dd,'logBiomass',model=gvg.model)
In [199]:
covMat = buildSpatialStructure(dd,gvg.model)
In [200]:
calculateGLS(dd,covMat)
Out[200]:
In [ ]:
In [ ]:
In [197]:
samples[22].shape
Out[197]:
In [181]:
gvg.model.corr_f(xxx.values())
In [166]:
kk
Out[166]:
In [139]:
gvg.model.corr_f([100])
Out[139]:
In [140]:
gvg.model.corr_f([10])
Out[140]:
In [ ]: