In [1]:
from __future__ import division
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import pandas as pd
import pysal as ps
import geopandas as gpd
from geopandas import GeoSeries, GeoDataFrame
from shapely.geometry import Point
from sklearn import neighbors
sns.set(style="white")
sns.set_context({"figure.figsize": (24, 10)})
pd.options.display.float_format = '{:.2f}'.format
abb_link = './tfg/dbases/development3.csv'
zc_link = './tfg/mapas/barrios_area.shp'
muestra = pd.read_csv(abb_link)
barrios = gpd.read_file(zc_link)
geometry = [Point(xy) for xy in zip(muestra['lon'], muestra['lat'])]
crs = {'init': 'epsg:4326'}
geo_df = GeoDataFrame(muestra, crs=crs, geometry=geometry)
db = gpd.sjoin(geo_df, barrios, how="inner", op='intersects')
metro = pd.read_csv('./tfg/dbases/distance_matrix_metro.csv')
db = db.join(metro.set_index('InputID'),
on='id', how='left')
db = db.rename(index=str, columns={"DESBDT": "subdistrict_f", "Distance": "metro_distance", "NUMPOINTS": "metro_number"})
db = pd.DataFrame(db)
db['floor']=db['floor'].replace(['Ground floor', 'Mezzanine', 'Semi-basement', 'Basement', 'ground', 'Floor -2', 'Floor -1'], 0,regex=True)
#db.replace(u'\xe', 'A')
db['floor'] = pd.to_numeric(db['floor'])
In [2]:
varis = ['pricems', 'rooms', 'floor', 'needs_renovating', 'garden', 'terrace', 'new_dev', 'garage']
In [31]:
zona = dict()
mcl = dict()
mscl = dict()
In [33]:
for clu in range(0, 8):
zona[clu] = db[(db["cl"] == clu) & (db["share_loc"] == 1)].drop_duplicates(subset=["lat", "lon"] )
y = np.log(zona[clu]['pricems'])
yxs = zona[clu].loc[:, varis + ['pricems']].dropna()
w = ps.knnW_from_array(zona[clu].loc[\
yxs.index, \
['lon', 'lat']\
].values)
w.transform = 'R'
mcl[clu] = ps.spreg.GM_Lag(y.values[:, None], yxs.drop('pricems', axis=1).values, \
w=w, spat_diag=True, \
name_x=yxs.drop('pricems', axis=1).columns.tolist(), name_y='ln(pricems)', \
name_ds = 'zona ' + str([clu]))
mscl[clu] = mse(y, mcl[clu].predy.flatten())
print(mcl[clu].summary)
Endogena
In [48]:
zona = db[(db["share_loc"] == 1)].drop_duplicates(subset=["lat", "lon"] )
y = np.log(zona['pricems'])
yxs = zona.loc[:, varis + ['pricems']].dropna()
w = ps.knnW_from_array(zona.loc[\
yxs.index, \
['lon', 'lat']\
].values)
w.transform = 'R'
mreg = ps.spreg.GM_Lag(y.values[:, None], yxs.drop('pricems', axis=1).values, \
w=w, spat_diag=True, \
name_x=yxs.drop('pricems', axis=1).columns.tolist(), name_y='ln(pricems)', \
name_ds = 'madrid')
print(mreg.summary)
In [25]:
varis = ['pricems', 'rooms', 'floor', 'needs_renovating', 'terrace', 'new_dev', 'garage']
zona = db[(db["share_loc"] == 1)].drop_duplicates(subset=["lat", "lon"] )
y = np.log(zona['pricems'])
yxs = zona.loc[:, varis + ['pricems']].dropna()
w = ps.knnW_from_array(zona.loc[\
yxs.index, \
['lon', 'lat']\
].values)
w.transform = 'R'
mreg = ps.spreg.GM_Lag(y.values[:, None], yxs.drop('pricems', axis=1).values, \
w=w, spat_diag=True, \
name_x=yxs.drop('pricems', axis=1).columns.tolist(), name_y='ln(pricems)', \
name_ds = 'madrid')
print(mreg.summary)
Regresores
In [6]:
varis = ['pricems', 'rooms', 'floor', 'needs_renovating', 'garden', 'terrace', 'new_dev', 'garage']
zona = db[(db["share_loc"] == 1)].drop_duplicates(subset=["lat", "lon"] )
y = np.log(zona['pricems'])
yxs = zona.loc[:, varis + ['pricems']].dropna()
w_garden = ps.knnW_from_array(zona.loc[\
yxs.index, \
['lon', 'lat']\
].values)
yxs_w = yxs.assign(w_garden=ps.lag_spatial(w_garden, yxs['garden'].values))
In [7]:
m2 = ps.spreg.OLS(y.values[:, None], \
yxs_w.drop('pricems', axis=1).values, \
w=w, spat_diag=True, \
name_x=yxs_w.drop('pricems', axis=1).columns.tolist(), name_y='ln(pricems)', name_ds = 'madrid')
print(m2.summary)
In [8]:
varis = ['pricems', 'floor', 'needs_renovating', 'garden', 'terrace', 'garage']
zona = db[(db["share_loc"] == 1)].drop_duplicates(subset=["lat", "lon"] )
y = np.log(zona['pricems'])
yxs = zona.loc[:, varis + ['pricems']].dropna()
w_garden = ps.knnW_from_array(zona.loc[\
yxs.index, \
['lon', 'lat']\
].values)
yxs_w = yxs.assign(w_garden=ps.lag_spatial(w_garden, yxs['garden'].values))
m2 = ps.spreg.OLS(y.values[:, None], \
yxs_w.drop('pricems', axis=1).values, \
w=w, spat_diag=True, \
name_x=yxs_w.drop('pricems', axis=1).columns.tolist(), name_y='ln(pricems)', name_ds = 'madrid')
print(m2.summary)
In [ ]:
In [ ]:
In [ ]:
In [ ]: