In [1]:
from __future__ import division
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import pandas as pd
import pysal as ps
import geopandas as gpd
from geopandas import GeoSeries, GeoDataFrame
from shapely.geometry import Point
from sklearn import neighbors

sns.set(style="white")
sns.set_context({"figure.figsize": (24, 10)})

pd.options.display.float_format = '{:.2f}'.format

abb_link = './tfg/dbases/development3.csv'
zc_link = './tfg/mapas/barrios_area.shp'

muestra = pd.read_csv(abb_link)
barrios = gpd.read_file(zc_link)

geometry = [Point(xy) for xy in zip(muestra['lon'], muestra['lat'])]
crs = {'init': 'epsg:4326'}
geo_df = GeoDataFrame(muestra, crs=crs, geometry=geometry)

db = gpd.sjoin(geo_df, barrios, how="inner", op='intersects')

metro = pd.read_csv('./tfg/dbases/distance_matrix_metro.csv')

db = db.join(metro.set_index('InputID'),
                            on='id', how='left')

db = db.rename(index=str, columns={"DESBDT": "subdistrict_f", "Distance": "metro_distance", "NUMPOINTS": "metro_number"})

db = pd.DataFrame(db)
db['floor']=db['floor'].replace(['Ground floor', 'Mezzanine', 'Semi-basement', 'Basement', 'ground', 'Floor -2', 'Floor -1'], 0,regex=True)
#db.replace(u'\xe', 'A')
db['floor'] = pd.to_numeric(db['floor'])

In [68]:
inputs = []
for index, row in db.iterrows():
    inputs.append([ row["pricems"],
                    row["rooms"],
                    row["floor"],
                    row["air_conditioning"],
                    row["wardrobes"],
                    row["lift"],
                    row["exterior"],
                    row["garden"],
                    row["terrace"],
                    row["storeroom"],
                    row["garage"],
                    row["cl"],
                    row["metro_number"]])
    
outputs = []
for index, row in db.iterrows():
    outputs.append(row["new_dev"])

In [ ]:


In [69]:
X = inputs
y = outputs

In [70]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [71]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X_train)


Out[71]:
StandardScaler(copy=True, with_mean=True, with_std=True)

In [72]:
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [73]:
from sklearn.neural_network import MLPClassifier

In [93]:
mlp = MLPClassifier(hidden_layer_sizes=(13,13,13))

In [94]:
mlp.fit(X_train,y_train)


Out[94]:
MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(13, 13, 13), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [95]:
predictions = mlp.predict(X_test)

In [96]:
from sklearn.metrics import classification_report,confusion_matrix
print(confusion_matrix(y_test,predictions))


[[4608   41]
 [  51   95]]

In [97]:
print(classification_report(y_test,predictions))


             precision    recall  f1-score   support

          0       0.99      0.99      0.99      4649
          1       0.70      0.65      0.67       146

avg / total       0.98      0.98      0.98      4795


In [78]:
print(classification_report(y_test,predictions))


             precision    recall  f1-score   support

          0       0.99      0.99      0.99      4649
          1       0.72      0.66      0.69       146

avg / total       0.98      0.98      0.98      4795


In [82]:
print(classification_report(y_test,predictions))


             precision    recall  f1-score   support

          0       0.99      0.99      0.99      4649
          1       0.69      0.65      0.67       146

avg / total       0.98      0.98      0.98      4795


In [87]:
print(classification_report(y_test,predictions))


             precision    recall  f1-score   support

          0       0.99      0.99      0.99      4649
          1       0.77      0.58      0.66       146

avg / total       0.98      0.98      0.98      4795


In [ ]: