In [2]:
import numpy as np
import pandas as pd
import pysal as ps
from pysal.contrib.gwr.sel_bw import Sel_BW
from pysal.contrib.gwr.gwr import GWR
from pysal.contrib.glm.family import Gaussian
from scipy.stats import pearsonr
In [3]:
#Data
data = '/Users/toshan/dev/pysal/pysal/examples/georgia/GData_utm.csv'
data = pd.read_csv(data)
#shp = gp.read_file('/Users/toshan/dev/pysal/pysal/examples/georgia/G_utm.shp')
#shp.plot()
In [4]:
# Prep data into design matrix and coordinates
#Dependent variable
y = data.PctBach.reshape((-1,1))
#Design matrix - covariates - intercept added automatically
pov = data.PctPov.reshape((-1,1))
rural = data.PctRural.reshape((-1,1))
blk = data.PctBlack.reshape((-1,1))
X = np.hstack([rural, pov, blk])
labels = ['Intercept', 'PctPov', 'PctRural', 'PctBlack']
#Coordinates for calibration points
u = data.X
v = data.Y
coords = np.array(zip(u,v))
In [7]:
index = np.arange(len(y))
train = index[0:-10]
test = index[-10:]
y_train = y[train]
X_train = X[train]
coords_train = list(coords[train])
y_test = y[test]
X_test = X[test]
coords_test = list(coords[test])
bw = Sel_BW(coords, y, X, kernel='bisquare', fixed=False)
bw = bw.search(search='golden_section', criterion='AICc')
print bw
model = GWR(coords, y, X, bw, family=Gaussian(), fixed=False, kernel='bisquare')
results = model.predict(coords_test, X_test)
In [8]:
results.predictions
Out[8]:
In [50]:
bw = Sel_BW(coords, y, X, kernel='bisquare', fixed=False)
bw = bw.search(search='golden_section', criterion='AICc')
print bw
t = GWR(coords, y, X, bw).fit()
model = GWR(coords, y, X, bw, family=Gaussian(), fixed=False, kernel='bisquare')
results = model.predict(coords_test, X_test)
In [51]:
print pearsonr(results.predictions, y_test)
print results.params[0:]
print t.params[-10:]
In [26]:
#Find optimal bandwidth using golden section search to minimize AICc
#Instantiate bandwidth selection class - bisquare NN (adaptive)
bw = Sel_BW(coords, y, X, kernel='bisquare', fixed=False)
#Find optimal bandwidth by minimizing AICc using golden section search algorithm
bw = bw.search(search='golden_section', criterion='AICc')
print bw
model_a = GWR(coords, y, X, bw, family=Gaussian(), fixed=False, kernel='bisquare')
results_a = model_a.fit()
print len(results_a.localR2)
print np.mean(results_a.localR2)
exog_scale = results_a.scale
exog_resid = results_a.resid_response
In [9]:
model_b = GWR(coords, y, X, 93, family=Gaussian(), fixed=False, kernel='bisquare', points=coords_b, exog_scale=exog_scale, exog_resid=exog_resid)
results_b = model_b.fit()
print len(results_b.localR2)
print np.mean(results_b.localR2)
results_b.params
In [9]:
print results_b.bse[0:10, 1]
print results_b.tvalues[0:10, 1]
print results_b.localR2
In [12]:
#Results in a set of mappable results
results.params.shape
In [ ]:
#Map Parameter estimates and T-vals for each covariate
for param in range(results.params.shape[1]):
shp[str(param)] = results.params[:,param]
vmin, vmax = np.min(shp[str(param)]), np.max(shp[str(param)])
ax = shp.plot(str(param), vmin=vmin, vmax=vmax, figsize=(8,8), cmap='YlOrRd')
ax.set_title(labels[param] + ' Estimates')
fig = ax.get_figure()
cax = fig.add_axes([0.9, 0.1, 0.03, 0.8])
sm = plt.cm.ScalarMappable(norm=plt.Normalize(vmin=vmin, vmax=vmax), cmap='YlOrRd')
sm._A = []
fig.colorbar(sm, cax=cax)
shp[str(param)] = results.tvalues[:,param]
vmin, vmax = np.min(shp[str(param)]), np.max(shp[str(param)])
ax = shp.plot(str(param), vmin=vmin, vmax=vmax, figsize=(8,8), cmap='Greys')
ax.set_title(labels[param] + ' T-vals')
fig = ax.get_figure()
cax = fig.add_axes([0.9, 0.1, 0.03, 0.8])
sm = plt.cm.ScalarMappable(norm=plt.Normalize(vmin=vmin, vmax=vmax), cmap='Greys')
sm._A = []
fig.colorbar(sm, cax=cax)
In [121]:
#Map local R-square values which is a weighted R-square at each observation location
shp['localR2'] = results.localR2
vmin, vmax = np.min(shp['localR2']), np.max(shp['localR2'])
ax = shp.plot('localR2', vmin=vmin, vmax=vmax, figsize=(8,8), cmap='PuBuGn')
ax.set_title('Local R-Squared')
fig = ax.get_figure()
cax = fig.add_axes([0.9, 0.1, 0.03, 0.8])
sm = plt.cm.ScalarMappable(norm=plt.Normalize(vmin=vmin, vmax=vmax), cmap='PuBuGn')
sm._A = []
fig.colorbar(sm, cax=cax)
Out[121]:
In [9]:
??GWR
In [11]:
dists = np.zeros((10,159))
for x, point1 in enumerate(coords_b):
for y, point2 in enumerate(coords):
dists[x,y] = np.sqrt((point1[0] - point2[0])**2 + (point1[1] - point2[1])**2)
In [12]:
dists
Out[12]:
In [11]:
from pysal.contrib.gwr.kernels import adapt_bisquare, _Kernel
In [26]:
all_coords = np.vstack([coords, coords_b])
W = adapt_bisquare(all_coords, 93)
i = len(coords_b)
j = len(coords)
W = W[i:, :-j].T
In [10]:
test = _Kernel(coords, fixed=False, function='bisquare', truncate=False, k=92, points=coords_b)
In [11]:
test.kernel
Out[11]:
In [49]:
W[0]
Out[49]:
In [13]:
from pysal.contrib.gwr.kernels import adapt_bisquare, _Kernel
#W = adapt_bisquare(coords, 93, coords_b)
w = _Kernel(coords, fixed=False, k=93-1, function='bisquare', points=coords_b, truncate=False)
#W = _Kernel(coords, fixed=False, function='bisquare', truncate=False, k=92, points=coords_b)
In [16]:
w.bandwidth
Out[16]:
In [10]:
??adapt_bisquare()
In [8]:
results_a.TSS / results_a.RSS
In [10]:
results_a.localR2
Out[10]:
In [47]:
np.sum(results_b.W[0].reshape((-1,1)) * (results_b.y.reshape((-1,1)) - results_a.predy.reshape((-1,1)))**2)
Out[47]:
In [19]:
(results_a.y - results_a.mu)[0:5]
Out[19]:
In [39]:
(results_a.y.reshape((-1,1)) - results_a.predy.reshape((-1,1)))[0:5]
Out[39]:
In [41]:
results_a.resid_response[0:5]
Out[41]:
In [40]:
print len(test)
In [41]:
print(train)
In [42]:
print len(coords)
In [ ]: