In [1]:
from __future__ import division
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import pandas as pd
import pysal as ps
import geopandas as gpd
from geopandas import GeoSeries, GeoDataFrame
from shapely.geometry import Point
from sklearn import neighbors

sns.set(style="white")
sns.set_context({"figure.figsize": (24, 10)})

pd.options.display.float_format = '{:.2f}'.format

abb_link = './tfg/dbases/development3.csv'
zc_link = './tfg/mapas/barrios_area.shp'

muestra = pd.read_csv(abb_link)
barrios = gpd.read_file(zc_link)

geometry = [Point(xy) for xy in zip(muestra['lon'], muestra['lat'])]
crs = {'init': 'epsg:4326'}
geo_df = GeoDataFrame(muestra, crs=crs, geometry=geometry)

db = gpd.sjoin(geo_df, barrios, how="inner", op='intersects')

metro = pd.read_csv('./tfg/dbases/distance_matrix_metro.csv')

db = db.join(metro.set_index('InputID'),
                            on='id', how='left')

db = db.rename(index=str, columns={"DESBDT": "subdistrict_f", "Distance": "metro_distance", "NUMPOINTS": "metro_number"})

db = pd.DataFrame(db)
db['floor']=db['floor'].replace(['Ground floor', 'Mezzanine', 'Semi-basement', 'Basement', 'ground', 'Floor -2', 'Floor -1'], 0,regex=True)
#db.replace(u'\xe', 'A')
db['floor'] = pd.to_numeric(db['floor'])

In [4]:
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pysal as ps
import geopandas as gpd
from sklearn import cluster
from sklearn.preprocessing import scale

Agregaci\'on de variables a nivel barrio


In [50]:
varis = ['pricems', 'rooms', 'floor', 'needs_renovating', 'garden', 'terrace', 'new_dev', 'garage']

In [10]:
aves = db.groupby('GEOCODIGO')[varis].mean()
aves.info()


<class 'pandas.core.frame.DataFrame'>
Index: 128 entries, 079011 to 079215
Data columns (total 9 columns):
pricems             128 non-null float64
rooms               128 non-null float64
floor               128 non-null float64
needs_renovating    128 non-null float64
garden              128 non-null float64
terrace             128 non-null float64
rooms               128 non-null float64
new_dev             128 non-null float64
garage              128 non-null float64
dtypes: float64(9)
memory usage: 10.0+ KB

In [12]:
types = pd.get_dummies(db['metro_number'])
prop_types = types.join(db['GEOCODIGO'])\
                    .groupby('GEOCODIGO')\
                    .sum()
prop_types_pct = (prop_types * 100).div(prop_types)
prop_types_pct.info()


<class 'pandas.core.frame.DataFrame'>
Index: 128 entries, 079011 to 079215
Data columns (total 17 columns):
0     19 non-null float64
1     8 non-null float64
2     15 non-null float64
3     15 non-null float64
4     18 non-null float64
5     10 non-null float64
6     7 non-null float64
7     10 non-null float64
8     8 non-null float64
9     1 non-null float64
10    4 non-null float64
11    4 non-null float64
12    4 non-null float64
13    2 non-null float64
14    1 non-null float64
15    1 non-null float64
16    1 non-null float64
dtypes: float64(17)
memory usage: 18.0+ KB

In [13]:
aves_props = aves.join(prop_types_pct)
#eliminar valores nulos
aves_props = aves_props.fillna(value=0)

In [14]:
db1 = pd.DataFrame(\
                 scale(aves_props), \
                 index=aves_props.index, \
                 columns=aves_props.columns)\
       #.rename(lambda x: str(int(x))              )

In [16]:
#zc = gpd.read_file(zc_link)
#zc.plot(color='green')
#sns.plt.show()

In [162]:
db1.info()


<class 'pandas.core.frame.DataFrame'>
Index: 128 entries, 079011 to 079215
Data columns (total 25 columns):
pricems             128 non-null float64
rooms               128 non-null float64
floor               128 non-null float64
needs_renovating    128 non-null float64
terrace             128 non-null float64
rooms               128 non-null float64
new_dev             128 non-null float64
garage              128 non-null float64
0                   128 non-null float64
1                   128 non-null float64
2                   128 non-null float64
3                   128 non-null float64
4                   128 non-null float64
5                   128 non-null float64
6                   128 non-null float64
7                   128 non-null float64
8                   128 non-null float64
9                   128 non-null float64
10                  128 non-null float64
11                  128 non-null float64
12                  128 non-null float64
13                  128 non-null float64
14                  128 non-null float64
15                  128 non-null float64
16                  128 non-null float64
dtypes: float64(25)
memory usage: 26.0+ KB

In [17]:
#zdb = db1.set_index('subdistrict_f').join(zc[['DESBDT', 'geometry']], on='DESBDT').dropna()

zdb = zc[['geometry', 'GEOCODIGO']].join(db1, on='GEOCODIGO')\
                                         .dropna()

In [46]:
km5 = cluster.KMeans(n_clusters=3)
km5cls = km5.fit(zdb.drop(['geometry', 'GEOCODIGO'], axis=1).values)
f, ax = plt.subplots(1, figsize=(9, 9))

zdb.assign(cl=km5cls.labels_)\
   .plot(column='cl', categorical=True, legend=True, \
         linewidth=0.1, edgecolor='white', ax=ax)
ax.set_axis_off()
plt.show()



In [47]:
km5cls.labels_


Out[47]:
array([1, 1, 2, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 2, 2, 1, 2,
       2, 2, 2, 1, 1, 2, 2, 0, 1, 2, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 0, 0, 0,
       1, 0, 1, 0, 0, 2, 2, 2, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1,
       1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0], dtype=int32)

In [48]:
cl_pcts = prop_types_pct.reindex(zdb['GEOCODIGO'])\
                          .assign(cl=km5cls.labels_)\
                          .groupby('cl')\
                          .count()

N\'umero de bocas de metro en los barrios que componen la zona


In [49]:
cl_pcts


Out[49]:
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
cl
0 11 3 3 4 2 1 2 2 0 0 0 0 0 1 0 0 0
1 8 4 11 11 14 5 4 6 3 1 2 1 2 0 1 0 0
2 0 1 1 0 2 4 1 2 5 0 2 3 2 1 0 1 1

In [44]:
f, ax = plt.subplots(1, figsize=(18, 9))
cl_pcts.plot(kind='bar', stacked=False, ax=ax, \
             cmap='Set2', linewidth=2)
ax.legend(ncol=1, loc="right");

In [45]:
plt.show()



In [176]:
type(cl_pcts)


Out[176]:
pandas.core.frame.DataFrame

In [226]:
zdb.info()


<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 128 entries, 0 to 127
Data columns (total 27 columns):
geometry            128 non-null object
GEOCODIGO           128 non-null object
pricems             128 non-null float64
rooms               128 non-null float64
floor               128 non-null float64
needs_renovating    128 non-null float64
terrace             128 non-null float64
rooms               128 non-null float64
new_dev             128 non-null float64
garage              128 non-null float64
0                   128 non-null float64
1                   128 non-null float64
2                   128 non-null float64
3                   128 non-null float64
4                   128 non-null float64
5                   128 non-null float64
6                   128 non-null float64
7                   128 non-null float64
8                   128 non-null float64
9                   128 non-null float64
10                  128 non-null float64
11                  128 non-null float64
12                  128 non-null float64
13                  128 non-null float64
14                  128 non-null float64
15                  128 non-null float64
16                  128 non-null float64
dtypes: float64(25), object(2)
memory usage: 28.0+ KB

In [51]:
rt_av = db.groupby('GEOCODIGO')[varis]\
           .mean()\
           .rename(lambda x: str(int(x)))
           
#pasar a int para join
#rt_av.index = rt_av.index.astype(int)

In [52]:
rt_av.describe()


Out[52]:
pricems rooms floor needs_renovating garden terrace new_dev garage
count 128.00 128.00 128.00 128.00 128.00 128.00 128.00 128.00
mean 3038.07 2.95 2.97 0.16 0.31 0.42 0.03 0.35
std 1200.85 0.48 0.82 0.10 0.26 0.12 0.08 0.24
min 1177.56 2.03 1.24 0.00 0.00 0.14 0.00 0.00
25% 2008.79 2.62 2.46 0.09 0.10 0.35 0.00 0.16
50% 2884.95 2.81 2.87 0.16 0.21 0.42 0.00 0.28
75% 3740.04 3.36 3.36 0.23 0.49 0.49 0.01 0.47
max 7070.90 4.11 5.69 0.40 0.93 0.88 0.57 0.92

In [53]:
zc['GEOCODIGO'] = pd.to_numeric(zc['GEOCODIGO'])

In [54]:
#pasar a int para join
rt_av.index = rt_av.index.astype(int)


zrt = zc[['geometry', 'GEOCODIGO']].join(rt_av, on='GEOCODIGO')\
                                         .dropna()
zrt.info()


<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 128 entries, 0 to 127
Data columns (total 10 columns):
geometry            128 non-null object
GEOCODIGO           128 non-null int64
pricems             128 non-null float64
rooms               128 non-null float64
floor               128 non-null float64
needs_renovating    128 non-null float64
garden              128 non-null float64
terrace             128 non-null float64
new_dev             128 non-null float64
garage              128 non-null float64
dtypes: float64(8), int64(1), object(1)
memory usage: 11.0+ KB

In [230]:
zrt


Out[230]:
geometry GEOCODIGO pricems rooms floor needs_renovating garden terrace rooms new_dev garage
0 POLYGON ((-3.709387921226222 40.42245753407111... 79011 4303.37 2.61 2.48 0.19 0.04 0.16 2.61 0.00 0.08
1 POLYGON ((-3.702839417757738 40.4139218689474,... 79012 3573.84 2.39 2.28 0.18 0.02 0.17 2.39 0.00 0.05
2 POLYGON ((-3.696964466155972 40.41895010196534... 79013 4479.07 3.44 2.37 0.30 0.01 0.14 3.44 0.05 0.14
3 POLYGON ((-3.699419604899507 40.42842378657503... 79014 5280.17 3.07 2.62 0.20 0.01 0.21 3.07 0.03 0.18
4 POLYGON ((-3.712238928618468 40.43022529948269... 79015 4402.41 2.61 2.94 0.18 0.02 0.22 2.61 0.03 0.08
5 POLYGON ((-3.704164127607747 40.42018917644739... 79016 4489.82 3.36 3.11 0.21 0.00 0.19 3.36 0.00 0.02
6 POLYGON ((-3.714653734661522 40.40017001671615... 79021 3189.81 2.77 2.96 0.19 0.21 0.42 2.77 0.00 0.16
7 POLYGON ((-3.704378357831487 40.39534252885921... 79022 3365.35 2.56 2.95 0.17 0.11 0.33 2.56 0.00 0.14
8 POLYGON ((-3.69777391612309 40.38929634997568,... 79023 2942.19 2.86 2.88 0.30 0.10 0.35 2.86 0.00 0.07
9 POLYGON ((-3.677683491441507 40.39329718871196... 79024 4042.00 2.46 4.28 0.06 0.58 0.29 2.46 0.12 0.48
10 POLYGON ((-3.694275614693738 40.40078721931518... 79025 3285.43 2.70 2.18 0.21 0.29 0.36 2.70 0.00 0.25
11 POLYGON ((-3.692807756170978 40.40702208537189... 79026 3344.95 2.41 2.48 0.18 0.04 0.40 2.41 0.00 0.10
12 POLYGON ((-3.690424029706606 40.40776318694795... 79027 3755.34 2.75 2.25 0.08 0.25 0.25 2.75 0.00 0.33
13 POLYGON ((-3.67842591757978 40.40745446493272,... 79031 3271.86 3.12 3.23 0.22 0.14 0.31 3.12 0.00 0.20
14 POLYGON ((-3.666675997650642 40.40383533575822... 79032 3254.62 2.53 3.55 0.08 0.26 0.50 2.53 0.00 0.28
15 POLYGON ((-3.66396964499304 40.40885539313336,... 79033 3444.14 3.36 4.00 0.39 0.36 0.49 3.36 0.00 0.44
16 POLYGON ((-3.669183986101125 40.41906808848597... 79034 4894.81 3.53 4.17 0.40 0.03 0.38 3.53 0.00 0.25
17 POLYGON ((-3.6790469031701 40.41903505771023, ... 79035 5991.07 3.36 3.34 0.28 0.06 0.31 3.36 0.00 0.29
18 POLYGON ((-3.670135442224579 40.40754708781509... 79036 4297.09 3.63 3.69 0.27 0.35 0.42 3.63 0.00 0.35
19 POLYGON ((-3.692472833840029 40.41947374398502... 79041 7070.90 3.91 2.96 0.25 0.05 0.33 3.91 0.00 0.29
20 POLYGON ((-3.6751941319097 40.42867756449152, ... 79042 4923.48 3.53 3.09 0.27 0.01 0.34 3.53 0.00 0.16
21 POLYGON ((-3.660799141948554 40.43136482449032... 79043 3622.70 2.93 2.62 0.23 0.12 0.39 2.93 0.00 0.12
22 POLYGON ((-3.65997880409425 40.43560665819661,... 79044 3517.20 3.16 3.35 0.29 0.08 0.45 3.16 0.01 0.20
23 POLYGON ((-3.679115394968796 40.43744764274194... 79045 4929.05 3.59 3.36 0.32 0.05 0.38 3.59 0.00 0.26
24 POLYGON ((-3.686119438732546 40.4377817836404,... 79046 6261.33 3.98 3.63 0.37 0.04 0.46 3.98 0.00 0.37
25 POLYGON ((-3.677703128584076 40.45048835861623... 79051 5540.24 3.97 2.97 0.30 0.33 0.55 3.97 0.00 0.61
26 POLYGON ((-3.661477498277177 40.44603650678808... 79052 3470.67 2.91 3.83 0.27 0.16 0.46 2.91 0.00 0.30
27 POLYGON ((-3.665631503894818 40.45233131447322... 79053 3734.94 2.63 2.82 0.19 0.17 0.41 2.63 0.00 0.23
28 POLYGON ((-3.690050189590395 40.4586568128549,... 79054 4557.72 3.52 4.33 0.31 0.20 0.51 3.52 0.01 0.43
29 POLYGON ((-3.675793380847276 40.46756502414711... 79055 4911.24 3.61 3.64 0.26 0.42 0.47 3.61 0.01 0.63
... ... ... ... ... ... ... ... ... ... ... ...
98 POLYGON ((-3.664950691790454 40.46006700232407... 79158 3513.07 3.37 2.15 0.22 0.78 0.48 3.37 0.00 0.67
99 POLYGON ((-3.669289844933735 40.48409887391701... 79159 3611.28 3.46 5.69 0.11 0.81 0.43 3.46 0.01 0.77
100 POLYGON ((-3.605984141060083 40.45308986553049... 79161 4299.32 3.47 2.84 0.02 0.88 0.42 3.47 0.00 0.79
101 POLYGON ((-3.623620426997361 40.46274887432375... 79162 4628.69 3.46 2.55 0.05 0.86 0.51 3.46 0.14 0.90
102 POLYGON ((-3.642589944798869 40.46911932661152... 79163 3036.49 2.79 3.14 0.17 0.42 0.53 2.79 0.00 0.36
103 POLYGON ((-3.639686227276274 40.47784966072265... 79164 2482.38 2.94 2.76 0.19 0.32 0.55 2.94 0.00 0.26
104 POLYGON ((-3.653399948220466 40.47768241550163... 79165 2568.04 3.27 2.76 0.22 0.33 0.40 3.27 0.00 0.36
105 POLYGON ((-3.652393384373118 40.51137809414914... 79166 3360.64 2.62 3.22 0.02 0.75 0.42 2.62 0.10 0.63
106 POLYGON ((-3.702841593152358 40.34752091868198... 79171 1471.80 2.68 2.21 0.14 0.30 0.45 2.68 0.05 0.25
107 POLYGON ((-3.683769739830679 40.34272323668343... 79172 1177.56 2.89 3.55 0.16 0.04 0.21 2.89 0.00 0.00
108 POLYGON ((-3.676819327658604 40.35547918097556... 79173 1822.92 2.67 3.30 0.01 0.70 0.39 2.67 0.40 0.77
109 POLYGON ((-3.681940577856947 40.36114666330185... 79174 1625.14 2.81 2.68 0.09 0.31 0.47 2.81 0.00 0.28
110 POLYGON ((-3.693862471943604 40.36401392689692... 79175 1612.90 2.90 3.74 0.07 0.49 0.37 2.90 0.35 0.47
111 POLYGON ((-3.618383541947141 40.38081503118028... 79181 2188.91 2.30 2.94 0.02 0.50 0.35 2.30 0.05 0.64
112 POLYGON ((-3.606755122201004 40.38635637148857... 79182 1922.32 3.17 3.37 0.04 0.61 0.51 3.17 0.29 0.47
113 POLYGON ((-3.576185329458775 40.41308101476726... 79191 2009.20 2.59 2.51 0.06 0.47 0.40 2.59 0.08 0.54
114 POLYGON ((-3.610558466773488 40.41624716294946... 79192 1573.40 2.64 2.86 0.23 0.14 0.43 2.64 0.00 0.28
115 POLYGON ((-3.613143496061609 40.43692527764004... 79201 2471.70 2.52 2.55 0.09 0.47 0.35 2.52 0.00 0.46
116 POLYGON ((-3.612052603244388 40.43397005346029... 79202 1576.61 2.89 2.39 0.09 0.05 0.23 2.89 0.00 0.02
117 POLYGON ((-3.615256547423901 40.42793900173848... 79203 1560.76 2.45 1.77 0.19 0.00 0.23 2.45 0.00 0.06
118 POLYGON ((-3.611226712999448 40.42754330699804... 79204 2007.54 2.76 3.14 0.14 0.25 0.24 2.76 0.00 0.25
119 POLYGON ((-3.572068193314149 40.43848495743558... 79205 2746.05 2.73 3.34 0.05 0.66 0.38 2.73 0.00 0.64
120 POLYGON ((-3.585405486367261 40.44985903008624... 79206 2678.56 2.17 2.56 0.06 0.76 0.35 2.17 0.00 0.73
121 POLYGON ((-3.608566903982153 40.44958940574202... 79207 1988.91 2.85 2.64 0.20 0.18 0.47 2.85 0.08 0.22
122 POLYGON ((-3.637459220204925 40.44851429370006... 79208 3401.20 3.12 2.50 0.08 0.74 0.58 3.12 0.00 0.62
123 POLYGON ((-3.582688240809708 40.46399803010144... 79211 2766.37 3.10 2.84 0.16 0.79 0.59 3.10 0.00 0.53
124 POLYGON ((-3.542990529201036 40.49490569453524... 79212 2248.37 2.25 1.62 0.19 0.31 0.19 2.25 0.00 0.25
125 POLYGON ((-3.578539551719767 40.47919998516569... 79213 2443.32 2.42 1.27 0.03 0.15 0.28 2.42 0.00 0.25
126 POLYGON ((-3.561754526382883 40.51115685889141... 79214 2839.00 3.11 3.95 0.01 0.64 0.71 3.11 0.57 0.83
127 POLYGON ((-3.586664171274545 40.46389995576114... 79215 3610.02 2.47 1.38 0.02 0.88 0.53 2.47 0.00 0.72

128 rows × 11 columns


In [55]:
zrt.to_file('tmp')
#matriz de pesos espaciales
w = ps.queen_from_shapefile('tmp/tmp.shp', idVariable='GEOCODIGO')

#rm -r tmp
w


Out[55]:
<pysal.weights.Contiguity.Queen at 0x7f1770fcc6d0>

Establecer minimo de viviendas para region


In [56]:
n_rev = db.groupby('GEOCODIGO')\
           .count()\
           ['price']\
           
thr = np.round(0.1 * n_rev.sum())
thr


Out[56]:
1918.0

In [57]:
np.random.seed(1234)
z = zrt.drop(['geometry', 'GEOCODIGO'], axis=1).values
maxp = ps.region.Maxp(w, z, thr, n_rev, initial=1000)

Inferencia para comprobar que los resultados son mejores que definiendo zonas al azar


In [220]:
np.random.seed(1234)
maxp.cinference(nperm=20)


maxp.cpvalue


Out[220]:
0.047619047619047616

In [58]:
lbls = pd.Series(maxp.area2region).reindex(zrt['GEOCODIGO'])

In [59]:
f, ax = plt.subplots(1, figsize=(9, 9))

zrt.assign(cl=lbls.values)\
   .plot(column='cl', categorical=True, legend=True, \
         linewidth=0.1, edgecolor='white', ax=ax)

ax.set_axis_off()

plt.show()



In [217]:
lbls


Out[217]:
GEOCODIGO
79011    1
79012    1
79013    6
79014    7
79015    6
79016    6
79021    0
79022    0
79023    0
79024    0
79025    0
79026    0
79027    0
79031    0
79032    0
79033    0
79034    6
79035    6
79036    6
79041    7
79042    6
79043    0
79044    1
79045    6
79046    7
79051    7
79052    6
79053    6
79054    7
79055    1
        ..
79158    1
79159    1
79161    1
79162    1
79163    5
79164    5
79165    5
79166    5
79171    2
79172    4
79173    2
79174    4
79175    4
79181    2
79182    4
79191    2
79192    3
79201    3
79202    3
79203    3
79204    3
79205    3
79206    5
79207    3
79208    0
79211    5
79212    5
79213    5
79214    5
79215    1
dtype: int64

In [60]:
#ver stats
zrt[varis].groupby(lbls.values).mean().T


Out[60]:
0 1 2 3 4 5 6 7
pricems 3066.53 3856.76 1979.78 2120.01 1587.31 2805.10 4557.58 5809.32
rooms 2.76 3.34 2.56 2.75 2.74 2.80 3.28 3.76
floor 2.85 2.97 2.71 3.00 3.02 2.93 3.23 3.28
needs_renovating 0.16 0.17 0.12 0.15 0.10 0.14 0.27 0.29
garden 0.28 0.48 0.32 0.26 0.23 0.43 0.08 0.11
terrace 0.40 0.48 0.41 0.37 0.46 0.45 0.33 0.42
new_dev 0.02 0.02 0.06 0.00 0.04 0.05 0.01 0.01
garage 0.26 0.55 0.34 0.26 0.27 0.42 0.19 0.39

In [1]:
#ver stats
zrt[varis].groupby(lbls.values).mean().T


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-3de4962859ea> in <module>()
      1 
      2 #ver stats
----> 3 zrt[varis].groupby(lbls.values).mean().T

NameError: name 'zrt' is not defined

In [ ]: