In [1]:
# Load Biospytial modules and etc.
%matplotlib inline
import sys
sys.path.append('/apps')
import django
django.setup()
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
## Use the ggplot style
plt.style.use('ggplot')
In [2]:
from external_plugins.spystats import tools
In [3]:
# My mac
data = pd.read_csv("/RawDataCSV/plotsClimateData_11092017.csv")
# My Linux desktop
#data = pd.read_csv("/RawDataCSV/idiv_share/plotsClimateData_11092017.csv")
In [4]:
data.SppN.mean()
Out[4]:
In [5]:
%time new_data = tools.toGeoDataFrame(pandas_dataframe=data,xcoord_name='LON',ycoord_name='LAT')
proj string taken from: http://spatialreference.org/
In [6]:
new_data = new_data.to_crs("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs ")
In [7]:
new_data['logBiomass'] = new_data.apply(lambda x : np.log(x.plotBiomass),axis=1)
In [8]:
new_data['newLon'] = new_data.apply(lambda c : c.geometry.x, axis=1)
new_data['newLat'] = new_data.apply(lambda c : c.geometry.y, axis=1)
In [9]:
new_data.plot(column='SppN')
Out[9]:
In [10]:
new_data['logBiomass'] = np.log(new_data.plotBiomass)
In [11]:
new_data.logBiomass.plot.hist()
Out[11]:
In [12]:
### Now with statsmodels.api
#xx = X.SppN.values.reshape(-1,1)
#xx = sm.add_constant(xx)
#model = sm.OLS(Y.values.reshape(-1,1),xx)
import statsmodels.formula.api as smf
model = smf.ols(formula='logBiomass ~ SppN',data=new_data)
results = model.fit()
param_model = results.params
results.summary()
Out[12]:
In [13]:
new_data['residuals1'] = results.resid
In [14]:
# COnsider the the following subregion
section = new_data[lambda x: (x.LON > -90) & (x.LON < -85) & (x.LAT > 30) & (x.LAT < 35) ]
In [15]:
section.plot(column='residuals1')
Out[15]:
In [16]:
section.plot(column='plotBiomass')
Out[16]:
In [17]:
section.shape
Out[17]:
In [18]:
len(data.lon)
#X = data[['AET','StandAge','lon','lat']]
X = section[['SppN','lon','lat']]
X = section[['SppN','newLon','newLat']]
#X = data[['lon','lat']]
Y = section['plotBiomass']
logY = section['logBiomass']
#Y = data[['SppN']]
## First step in spatial autocorrelation
#Y = pd.DataFrame(np.zeros(len(Y)))
## Let´s take a small sample only for the spatial autocorrelation
import numpy as np
sample_size = 2000
randindx = np.random.randint(0,X.shape[0],sample_size)
nX = X.loc[randindx]
nY = Y.loc[randindx]
In [19]:
plt.scatter(X.SppN,Y)
Out[19]:
In [20]:
Y.plot.hist(title="Biomass histogram")
Out[20]:
In [21]:
logY.plot.hist(title="log-Biomass hist")
Out[21]:
In [22]:
## Let´s make a simple linear trend here.
import statsmodels.api as sm
import statsmodels.formula.api as smf
In [23]:
### Now with statsmodels.api
#xx = X.SppN.values.reshape(-1,1)
#xx = sm.add_constant(xx)
#model = sm.OLS(Y.values.reshape(-1,1),xx)
model = smf.ols(formula='logBiomass ~ SppN',data=section)
results = model.fit()
param_model = results.params
results.summary()
Out[23]:
In [24]:
Y_hat = results.predict(section)
In [25]:
ress = (section.logBiomass - Y_hat)
In [26]:
## They are eq. test this ress - results.resid
In [27]:
param_model.Intercept
Out[27]:
In [28]:
conf_int = results.conf_int(alpha=0.05)
plt.scatter(section.SppN,section.plotBiomass)
#plt.plot(section.SppN,param_model.Intercept + param_model.SppN * section.SppN)
plt.plot(section.SppN,Y_hat)
#plt.fill_between(Y_hat,Y_hat + conf_int , Y_hat - conf_int)
conf_int
Out[28]:
In [29]:
plt.scatter(section.newLon,section.newLat,c=results.resid)
plt.colorbar()
Out[29]:
In [30]:
X = section[['newLon','newLat']].as_matrix()
Y = section[['logBiomass']].as_matrix()
In [31]:
dMM = tools._getDistanceMatrix(section)
dYY = tools._getDistResponseVariable(section,response_variable_name='logBiomass')
y = section['logBiomass']
yy = y.as_matrix()
#y2 = section[['logBiomass']]
#yy = y.values.reshape(-1,1)
#y2.shape
#dYY = sp.distance_matrix(yy,yy,p=2.0)
In [32]:
import scipy.spatial as sp
In [33]:
dM = sp.distance_matrix(X,X,p=2.0)
dY = sp.distance_matrix(Y,Y,p=2.0)
In [34]:
plt.imshow(dM)
Out[34]:
In [35]:
plt.imshow(dM)
Out[35]:
In [36]:
plt.imshow(dY)
Out[36]:
In [37]:
#d = pd.DataFrame({ 'dist' : dM.flatten(), 'ydif' : dY.flatten()})
dist = dMM.flatten()
ydif = dYY.flatten()
d = pd.DataFrame({'dist': dist,'ydif':ydif})
In [38]:
vg = tools.Variogram(section,'logBiomass')
In [39]:
vg.calculate_empirical()
plt.scatter(vg.lags,vg.empirical)
Out[39]:
In [ ]:
%time env = vg.calculateEnvelope()
In [40]:
ml = vg.distance_coordinates.flatten()
yl = vg.distance_responses.flatten()
X = np.vstack([ml,yl])
X.shape
n = np.random.shuffle(X)
In [41]:
titi = tools.montecarloEnvelope(ml,yl,num_iterations=99)
In [43]:
titi
Out[43]:
In [ ]:
np.random.shuffle(X)
In [ ]:
plt.plot(X)
In [ ]:
n_bins = 100
distances = dMM.flatten()
mdist = min(distances)
Mdist = max(distances)
partitions = np.linspace(mdist,Mdist,n_bins)
In [ ]:
In [ ]:
cosas = map(lambda (i,x) : 0.5* (d[ ( d.dist < partitions[i+1]) & (d.dist>partitions[i])].ydif.mean()),enumerate(partitions[:len(partitions) - 1]))
In [ ]:
plt.scatter(partitions[:len(partitions) -1] ,cosas)
In [ ]:
c.ydif.mean()
In [ ]:
# Import GPFlow
import GPflow as gf
k = gf.kernels.Matern12(2, active_dims = [0,1] )
In [ ]:
results.resid.plot.hist()
In [ ]:
model = gf.gpr.GPR(section[['newLon','newLat']].as_matrix(),results.resid.values.reshape(-1,1),k)
In [ ]:
%time model.optimize()
In [ ]:
model.get_parameter_dict()
In [ ]:
import numpy as np
Nn = 300
dsc = section
predicted_x = np.linspace(min(dsc.newLon),max(dsc.newLon),Nn)
predicted_y = np.linspace(min(dsc.newLat),max(dsc.newLat),Nn)
Xx, Yy = np.meshgrid(predicted_x,predicted_y)
## Fake richness
fake_sp_rich = np.ones(len(Xx.ravel()))
predicted_coordinates = np.vstack([ Xx.ravel(), Yy.ravel()]).transpose()
#predicted_coordinates = np.vstack([section.SppN, section.newLon,section.newLat]).transpose()
In [ ]:
predicted_coordinates.shape
In [ ]:
means,variances = model.predict_y(predicted_coordinates)
In [ ]:
means = map(lambda x : np.exp(x),means)
variances = map(lambda x : np.exp(x),variances)
In [ ]:
sum(means)
In [ ]:
fig = plt.figure(figsize=(16,10), dpi= 80, facecolor='w', edgecolor='w')
#plt.pcolor(Xx,Yy,np.sqrt(variances.reshape(Nn,Nn))) #,cmap=plt.cm.Greens)
plt.pcolormesh(Xx,Yy,np.sqrt(variances.reshape(Nn,Nn)))
plt.colorbar()
plt.scatter(dsc.newLon,dsc.newLat,c=dsc.SppN,edgecolors='')
plt.title("VAriance Biomass")
plt.colorbar()
In [ ]:
import cartopy
plt.figure(figsize=(17,11))
proj = cartopy.crs.PlateCarree()
ax = plt.subplot(111, projection=proj)
ax = plt.axes(projection=proj)
#algo = new_data.plot(column='SppN',ax=ax,cmap=colormap,edgecolors='')
#ax.set_extent([-93, -70, 30, 50])
#ax.set_extent([-100, -60, 20, 50])
ax.set_extent([-95, -70, 25, 45])
#ax.add_feature(cartopy.feature.LAND)
ax.add_feature(cartopy.feature.OCEAN)
ax.add_feature(cartopy.feature.COASTLINE)
ax.add_feature(cartopy.feature.BORDERS, linestyle=':')
ax.add_feature(cartopy.feature.LAKES, alpha=0.9)
ax.stock_img()
#ax.add_geometries(new_data.geometry,crs=cartopy.crs.PlateCarree())
#ax.add_feature(cartopy.feature.RIVERS)
mm = ax.pcolormesh(Xx,Yy,means.reshape(Nn,Nn),transform=proj )
#cs = plt.contour(Xx,Yy,np.sqrt(variances).reshape(Nn,Nn),linewidths=2,cmap=plt.cm.Greys_r,linestyles='dotted')
cs = plt.contour(Xx,Yy,means.reshape(Nn,Nn),linewidths=2,colors='k',linestyles='dotted',levels=[4.0,5.0,6.0,7.0,8.0])
plt.clabel(cs, fontsize=16,inline=True,fmt='%1.1f')
#ax.scatter(new_data.lon,new_data.lat,c=new_data.error,edgecolors='',transform=proj,cmap=plt.cm.Greys,alpha=0.2)
plt.colorbar(mm)
plt.title("Predicted Species Richness")
#(x.LON > -90) & (x.LON < -80) & (x.LAT > 40) & (x.LAT < 50)
In [ ]:
model.kern
In [ ]:
model.likelihood
In [ ]:
X_ = section[['LON','LAT']]
%time resid_hat, resid_hat_variance = model.predict_y(X_)
In [ ]:
resid_hat.shape
In [ ]:
qr_errors = qr_errors.values.reshape(len(qr_errors),1)
In [ ]:
#new_data= new_data.assign(error=lambda y : (y.SppN - y.pred_y)**2 )
sqr_error_errors = pd.DataFrame((qr_errors - resid_hat)**2)
In [ ]:
print(sqr_error_errors[0].mean())
print(sqr_error_errors[0].std())
In [ ]:
true_error
In [ ]:
sqr_error_errors.mean()
In [ ]:
k.ARD?
In [ ]: