In [13]:
# Load Biospytial modules and etc.
%matplotlib inline
import sys
sys.path.append('/apps')
import django
django.setup()
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
## Use the ggplot style
plt.style.use('ggplot')
In [14]:
# My mac
#data = pd.read_csv("/RawDataCSV/plotsClimateData_11092017.csv")
# My Linux desktop
data = pd.read_csv("/RawDataCSV/idiv_share/plotsClimateData_11092017.csv")
In [15]:
data.SppN.mean()
Out[15]:
In [16]:
import geopandas as gpd
In [17]:
from django.contrib.gis import geos
from shapely.geometry import Point
In [18]:
data['geometry'] = data.apply(lambda z: Point(z.LON, z.LAT), axis=1)
new_data = gpd.GeoDataFrame(data)
In [19]:
new_data.crs = {'init':'epsg:4326'}
proj string taken from: http://spatialreference.org/
In [20]:
new_data = new_data.to_crs("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs ")
new_data['newLon'] = new_data.apply(lambda c : c.geometry.x, axis=1)
new_data['newLat'] = new_data.apply(lambda c : c.geometry.y, axis=1)
In [21]:
new_data['logBiomass'] = np.log(new_data.plotBiomass)
new_data['logSppN'] = np.log(new_data.SppN)
In [22]:
## Let´s make a simple linear trend here.
import statsmodels.api as sm
import statsmodels.formula.api as smf
In [23]:
## All data
### Now with statsmodels.api
#xx = X.SppN.values.reshape(-1,1)
#xx = sm.add_constant(xx)
#model = sm.OLS(Y.values.reshape(-1,1),xx)
model = smf.ols(formula='logBiomass ~ logSppN',data=new_data)
results = model.fit()
param_model = results.params
results.summary()
Out[23]:
In [24]:
new_data['residuals1'] = results.resid
In [25]:
# COnsider the the following subregion
section = new_data[lambda x: (x.LON > -100) & (x.LON < -85) & (x.LAT > 30) & (x.LAT < 35) ]
section.plot(column='SppN')
section.plot(column='plotBiomass')
section.shape
Out[25]:
In [14]:
Y_hat = results.predict(section)
In [15]:
ress = (section.logBiomass - Y_hat)
In [16]:
param_model.Intercept
Out[16]:
In [17]:
conf_int = results.conf_int(alpha=0.05)
plt.scatter(section.SppN,section.plotBiomass)
#plt.plot(section.SppN,param_model.Intercept + param_model.SppN * section.SppN)
plt.plot(section.SppN,Y_hat)
#plt.fill_between(Y_hat,Y_hat + conf_int , Y_hat - conf_int)
conf_int
Out[17]:
In [20]:
plt.scatter(section.logSppN,section.residuals1)
Out[20]:
In [23]:
plt.scatter(section.newLon,section.newLat,c=section.residuals1)
plt.colorbar()
Out[23]:
In [24]:
# Import GPFlow
import GPflow as gf
k = gf.kernels.Matern12(2, lengthscales=0.2, active_dims = [0,1] ) + gf.kernels.Constant(2,active_dims=[0,1])
In [25]:
results.resid.plot.hist()
Out[25]:
In [28]:
model = gf.gpr.GPR(section[['newLon','newLat']].as_matrix(),section.residuals1.values.reshape(-1,1),k)
In [29]:
%time model.optimize()
Out[29]:
In [30]:
k.get_parameter_dict()
Out[30]:
In [ ]:
model.get_parameter_dict()
In [ ]:
import numpy as np
Nn = 300
dsc = section
predicted_x = np.linspace(min(dsc.newLon),max(dsc.newLon),Nn)
predicted_y = np.linspace(min(dsc.newLat),max(dsc.newLat),Nn)
Xx, Yy = np.meshgrid(predicted_x,predicted_y)
## Fake richness
fake_sp_rich = np.ones(len(Xx.ravel()))
predicted_coordinates = np.vstack([ Xx.ravel(), Yy.ravel()]).transpose()
#predicted_coordinates = np.vstack([section.SppN, section.newLon,section.newLat]).transpose()
In [ ]:
predicted_coordinates.shape
In [ ]:
means,variances = model.predict_y(predicted_coordinates)
In [ ]:
sum(means)
In [ ]:
fig = plt.figure(figsize=(16,10), dpi= 80, facecolor='w', edgecolor='w')
#plt.pcolor(Xx,Yy,np.sqrt(variances.reshape(Nn,Nn))) #,cmap=plt.cm.Greens)
plt.pcolormesh(Xx,Yy,np.sqrt(variances.reshape(Nn,Nn)))
plt.colorbar()
plt.scatter(dsc.newLon,dsc.newLat,c=dsc.SppN,edgecolors='')
plt.title("VAriance Biomass")
plt.colorbar()
In [ ]:
import cartopy
plt.figure(figsize=(17,11))
proj = cartopy.crs.PlateCarree()
ax = plt.subplot(111, projection=proj)
ax = plt.axes(projection=proj)
#algo = new_data.plot(column='SppN',ax=ax,cmap=colormap,edgecolors='')
#ax.set_extent([-93, -70, 30, 50])
#ax.set_extent([-100, -60, 20, 50])
ax.set_extent([-95, -70, 25, 45])
#ax.add_feature(cartopy.feature.LAND)
ax.add_feature(cartopy.feature.OCEAN)
ax.add_feature(cartopy.feature.COASTLINE)
ax.add_feature(cartopy.feature.BORDERS, linestyle=':')
ax.add_feature(cartopy.feature.LAKES, alpha=0.9)
ax.stock_img()
#ax.add_geometries(new_data.geometry,crs=cartopy.crs.PlateCarree())
#ax.add_feature(cartopy.feature.RIVERS)
mm = ax.pcolormesh(Xx,Yy,means.reshape(Nn,Nn),transform=proj )
#cs = plt.contour(Xx,Yy,np.sqrt(variances).reshape(Nn,Nn),linewidths=2,cmap=plt.cm.Greys_r,linestyles='dotted')
cs = plt.contour(Xx,Yy,means.reshape(Nn,Nn),linewidths=2,colors='k',linestyles='dotted',levels=[4.0,5.0,6.0,7.0,8.0])
plt.clabel(cs, fontsize=16,inline=True,fmt='%1.1f')
#ax.scatter(new_data.lon,new_data.lat,c=new_data.error,edgecolors='',transform=proj,cmap=plt.cm.Greys,alpha=0.2)
plt.colorbar(mm)
plt.title("Predicted Species Richness")
#(x.LON > -90) & (x.LON < -80) & (x.LAT > 40) & (x.LAT < 50)
In [48]:
from external_plugins.spystats import tools
In [53]:
filename = "../HEC_runs/results/low_q/data_envelope.csv"
envelope_data = pd.read_csv(filename)
gvg = tools.Variogram(new_data,'logBiomass',using_distance_threshold=600000)
gvg.envelope = envelope_data
gvg.empirical = gvg.envelope.variogram
gvg.lags = gvg.envelope.lags
vdata = gvg.envelope.dropna()
In [54]:
gvg.plot(refresh=False)
In [59]:
Out[59]:
In [83]:
from scipy.optimize import curve_fit
s = 0.345
r = 100000.0
nugget = 0.33
init_vals = [0.34, 50000, 0.33] # for [amp, cen, wid]
best_vals_gaussian, covar_gaussian = curve_fit(exponentialVariogram, xdata=vdata.lags.values, ydata=vdata.variogram.values, p0=init_vals)
#best_vals_gaussian, covar_gaussian = curve_fit(exponentialVariogram, xdata=vdata.lags, ydata=vdata.variogram, p0=init_vals)
#best_vals_gaussian, covar_gaussian = curve_fit(sphericalVariogram, xdata=vdata.lags, ydata=vdata.variogram, p0=init_vals)
v
In [63]:
gaussianVariogram(hx)
In [79]:
s =best_vals[0]
r = best_vals[1]
nugget = best_vals[2]
fitted_gaussianVariogram = lambda x : exponentialVariogram(x,sill=s,range_a=r,nugget=nugget)
gammas = pd.DataFrame(map(fitted_gaussianVariogram,hx))
import functools
fitted_gaussian2 = functools.partial(gaussianVariogram,s,r,nugget)
In [ ]:
In [33]:
hx = np.linspace(0,600000,100)
vg = tools.Variogram(section,'residuals1',using_distance_threshold=500000)
## already fitted previously
s = 0.345255240992
r = 65857.797111
nugget = 0.332850902482
In [60]:
def gaussianVariogram(h,sill=0,range_a=0,nugget=0):
Ih = 1.0 if h >= 0 else 0.0
g_h = ((sill - nugget)*(1 - np.exp(-(h**2 / range_a**2)))) + nugget*Ih
return g_h
In [81]:
def exponentialVariogram(h,sill=0,range_a=0,nugget=0):
if isinstance(h,np.array):
Ih = [1.0 if hx >= 0.0 else 0.0 for hx in h]
else:
Ih = 1.0 if h >= 0 else 0.0
g_h = (sill - nugget)*(1 - np.exp(-h/range_a)) + (nugget*Ih)
return g_h
In [82]:
h = 2
[1.0 if hx >= 0.0 else 0.0 for hx in i
In [32]:
def sphericalVariogram(h,sill=0,range_a=0,nugget=0):
Ih = 1.0 if h >= 0 else 0.0
I0r = 1.0 if h <= range_a else 0.0
Irinf = 1.0 if h > range_a else 0.0
g_h = (sill - nugget)((3*h / float(2*range_a))*I0r + Irinf) - (h**3 / float(2*range_a)) + (nugget*Ih)
return g_h
In [35]:
def theoreticalVariogram(model_function,sill,range_a,nugget):
return lambda x : model_function(x,sill,range_a,nugget)
In [36]:
tvariogram = theoreticalVariogram(gaussianVariogram,s,r,nugget)
tvariogram = theoreticalVariogram(,s,r,nugget)
In [37]:
%time gs = np.array(map(tvariogram,hx))
In [38]:
x = vg.plot(with_envelope=True,num_iterations=30,refresh=False)
plt.plot(hx,gs,color='blue')
Out[38]:
In [39]:
import statsmodels.regression.linear_model as lm
In [42]:
Mdist = vg.distance_coordinates.flatten()
%time vars = np.array(map(tvariogram,Mdist))
In [43]:
CovMat = vars.reshape(len(section),len(section))
X = section.logSppN.values
Y = section.logBiomass.values
In [44]:
plt.imshow(CovMat)
Out[44]:
In [46]:
%time model = lm.GLS(Y,X,sigma=CovMat)
In [ ]:
%time results = model.fit()
In [ ]: