In [12]:
import clusterpy
datos = clusterpy.importArcData('data/distritos_variables')
datos.cluster('arisel', ['viv'], 50, wType='rook', inits=10, dissolve=0)
datos.fieldNames


Loading data/distritos_variables.dbf
Loading data/distritos_variables.shp
Done
Getting variables
Variables successfully extracted
Running original Arisel algorithm
Number of areas:  156
Number of regions:  50
initial Solution:  [25, 25, 29, 21, 38, 2, 34, 38, 30, 28, 27, 12, 24, 42, 30, 39, 29, 44, 10, 15, 38, 16, 21, 0, 48, 30, 5, 1, 49, 6, 16, 15, 20, 18, 24, 6, 16, 8, 6, 15, 33, 49, 14, 30, 15, 35, 30, 20, 14, 25, 26, 39, 7, 43, 24, 49, 17, 38, 4, 29, 14, 25, 47, 0, 11, 36, 25, 29, 39, 25, 21, 16, 34, 31, 44, 25, 32, 23, 33, 3, 27, 42, 21, 29, 9, 40, 46, 25, 25, 29, 38, 48, 20, 25, 49, 21, 45, 25, 21, 25, 16, 49, 15, 45, 29, 35, 22, 13, 37, 41, 21, 30, 25, 30, 40, 39, 46, 30, 7, 13, 19, 21, 25, 36, 30, 25, 30, 33, 21, 21, 29, 49, 22, 49, 41, 15, 38, 28, 16, 13, 25, 15, 30, 4, 15, 38, 21, 25, 47, 27, 25, 45, 47, 15, 20, 47]
initial O.F:  3540007005.0
FINAL SOLUTION:  [25, 25, 29, 21, 38, 2, 34, 38, 30, 23, 40, 12, 24, 35, 30, 39, 29, 44, 10, 15, 38, 16, 21, 0, 48, 30, 5, 1, 49, 6, 16, 15, 20, 18, 24, 6, 21, 8, 6, 15, 25, 49, 14, 30, 15, 35, 30, 20, 14, 25, 26, 39, 7, 43, 24, 49, 17, 38, 4, 29, 14, 25, 47, 25, 11, 36, 25, 29, 39, 25, 21, 21, 34, 31, 44, 25, 28, 23, 33, 3, 27, 35, 21, 29, 9, 0, 46, 15, 25, 29, 38, 48, 20, 25, 49, 32, 45, 25, 21, 25, 16, 49, 15, 39, 29, 35, 42, 13, 37, 41, 21, 30, 25, 30, 0, 39, 46, 30, 7, 13, 19, 21, 25, 36, 30, 15, 30, 33, 21, 31, 29, 49, 22, 49, 49, 15, 38, 23, 16, 13, 25, 15, 30, 4, 15, 38, 4, 25, 47, 40, 38, 39, 47, 15, 20, 24]
FINAL OF:  2383192095.0
Done
Adding variables
Done
Out[12]:
['ID',
 'comercio',
 'cve_dist',
 'entrada',
 'gid',
 'id',
 'loop',
 'ocio',
 'pob',
 'salida',
 'servicios',
 'viv',
 'arisel_20160406114543']

In [13]:
datos.cluster('arisel', ['comercio'], 50, wType='rook', inits=10, dissolve=0)
datos.fieldNames


Getting variables
Variables successfully extracted
Running original Arisel algorithm
Number of areas:  156
Number of regions:  50
initial Solution:  [16, 20, 43, 42, 20, 11, 9, 43, 7, 31, 45, 36, 4, 10, 1, 36, 43, 42, 32, 36, 43, 26, 42, 15, 38, 7, 43, 6, 13, 8, 39, 36, 11, 21, 14, 20, 39, 20, 20, 13, 20, 13, 24, 7, 13, 10, 13, 23, 11, 20, 36, 13, 3, 13, 4, 13, 0, 20, 5, 43, 13, 28, 4, 15, 13, 13, 3, 43, 13, 45, 42, 42, 4, 18, 42, 20, 40, 26, 41, 10, 35, 36, 25, 43, 0, 45, 39, 38, 41, 7, 20, 36, 23, 20, 13, 40, 12, 20, 25, 19, 41, 13, 22, 36, 7, 34, 49, 13, 37, 2, 17, 1, 20, 7, 45, 36, 39, 32, 13, 13, 2, 25, 20, 13, 7, 7, 7, 41, 47, 48, 43, 2, 49, 13, 2, 36, 43, 40, 29, 13, 45, 27, 13, 44, 33, 20, 0, 3, 4, 45, 20, 36, 4, 13, 30, 46]
initial O.F:  24936479.0
FINAL SOLUTION:  [16, 20, 43, 42, 20, 11, 9, 43, 7, 31, 45, 36, 4, 10, 1, 36, 43, 42, 32, 33, 43, 26, 42, 15, 38, 20, 43, 6, 13, 8, 39, 36, 23, 21, 14, 20, 39, 20, 20, 13, 20, 24, 2, 7, 13, 10, 13, 23, 11, 20, 36, 13, 3, 13, 4, 24, 0, 20, 44, 43, 13, 28, 4, 15, 13, 24, 3, 43, 13, 45, 42, 42, 9, 18, 42, 20, 29, 26, 41, 10, 35, 36, 25, 43, 0, 45, 39, 33, 41, 20, 20, 5, 23, 20, 13, 40, 12, 20, 25, 19, 41, 24, 22, 36, 7, 34, 10, 13, 37, 2, 17, 1, 20, 20, 45, 36, 39, 32, 13, 13, 2, 25, 20, 13, 7, 7, 7, 41, 47, 48, 43, 2, 49, 13, 2, 33, 43, 29, 29, 13, 45, 27, 13, 44, 33, 43, 0, 3, 9, 45, 20, 33, 4, 36, 30, 46]
FINAL OF:  20005070.0
Done
Adding variables
Done
Out[13]:
['ID',
 'comercio',
 'cve_dist',
 'entrada',
 'gid',
 'id',
 'loop',
 'ocio',
 'pob',
 'salida',
 'servicios',
 'viv',
 'arisel_20160406114543',
 'arisel_20160406114812']

In [14]:
datos.cluster('arisel', ['entrada'], 50, wType='rook', inits=10, dissolve=0)
datos.fieldNames


Getting variables
Variables successfully extracted
Running original Arisel algorithm
Number of areas:  156
Number of regions:  50
initial Solution:  [20, 4, 15, 23, 2, 0, 39, 37, 21, 2, 49, 31, 46, 8, 5, 48, 18, 27, 29, 18, 15, 37, 34, 49, 18, 21, 24, 8, 26, 16, 23, 48, 0, 8, 7, 10, 23, 10, 10, 48, 4, 47, 47, 21, 38, 18, 22, 0, 42, 20, 31, 47, 11, 49, 13, 47, 25, 16, 18, 15, 42, 19, 39, 6, 49, 43, 20, 15, 48, 12, 9, 23, 39, 27, 23, 10, 37, 28, 30, 31, 49, 18, 36, 17, 9, 6, 33, 18, 30, 15, 2, 31, 49, 3, 47, 27, 48, 10, 40, 32, 28, 47, 32, 48, 15, 18, 8, 0, 39, 39, 40, 32, 14, 10, 49, 48, 33, 29, 35, 39, 48, 40, 20, 47, 21, 41, 21, 28, 27, 9, 15, 39, 8, 45, 39, 48, 2, 37, 33, 0, 19, 48, 43, 40, 48, 10, 18, 1, 39, 49, 44, 18, 39, 22, 0, 46]
initial O.F:  26823685414.0
FINAL SOLUTION:  [20, 4, 17, 23, 2, 0, 39, 37, 21, 2, 49, 31, 7, 8, 5, 48, 18, 23, 29, 18, 15, 37, 34, 49, 18, 21, 24, 8, 26, 16, 23, 18, 0, 8, 7, 10, 23, 10, 10, 18, 28, 47, 47, 21, 38, 18, 22, 0, 42, 44, 31, 47, 11, 49, 13, 47, 25, 44, 18, 15, 42, 19, 39, 6, 49, 43, 20, 15, 47, 12, 9, 23, 39, 27, 23, 10, 37, 28, 30, 31, 49, 18, 36, 17, 9, 6, 33, 18, 2, 17, 2, 31, 49, 3, 47, 27, 48, 10, 40, 43, 28, 43, 38, 8, 17, 18, 8, 0, 39, 39, 36, 32, 14, 10, 49, 48, 33, 29, 35, 39, 48, 36, 20, 47, 21, 41, 21, 2, 27, 9, 17, 39, 8, 45, 39, 18, 2, 37, 33, 0, 19, 48, 43, 36, 18, 10, 18, 1, 39, 49, 44, 18, 39, 22, 0, 46]
FINAL OF:  20259491144.0
Done
Adding variables
Done
Out[14]:
['ID',
 'comercio',
 'cve_dist',
 'entrada',
 'gid',
 'id',
 'loop',
 'ocio',
 'pob',
 'salida',
 'servicios',
 'viv',
 'arisel_20160406114543',
 'arisel_20160406114812',
 'arisel_20160406115125']

In [15]:
datos.exportArcData('data/variables_regiones')


Writing ESRI files
Writing DBF file
Done
ESRI files created

In [16]:
from geopandas import GeoDataFrame
import numpy as np
datos = GeoDataFrame.from_file('data/variables_regiones.shp')
intensidad = datos['comercio'] + datos['viv'] + datos['ocio'] + datos['servicios']
prop_comercio = datos['comercio'] / intensidad
prop_viv = datos['viv'] / intensidad
prop_ocio = datos['ocio'] / intensidad
prop_servicios = datos['servicios'] / intensidad
entropia = (prop_comercio*np.log(prop_comercio) + prop_viv*np.log(prop_viv) + prop_ocio*np.log(prop_ocio)
           + prop_servicios*np.log(prop_servicios))/np.log(4)
entropia.head()


Out[16]:
0   -0.443781
1   -0.250027
2   -0.303138
3   -0.178275
4   -0.235811
dtype: float64

In [17]:
datos['entropia'] = entropia
datos.head()


Out[17]:
ID comercio cve_dist entrada geometry gid id_1 loop ocio pob region_1 region_2 region_3 salida servicios viv entropia
0 1 9614 034 201286 POLYGON ((486947.2187500237 2148842.250066909,... 1 None 27627 95 2125687 25 16 20 200217 579 32618 -0.443781
1 2 2905 083 209264 POLYGON ((478110.2187500393 2134697.250066519,... 2 None 76022 82 3563363 25 20 4 209859 949 42238 -0.250027
2 3 2944 113 65701 POLYGON ((501006.0312499983 2148019.750066887,... 3 None 10611 72 3128496 29 43 17 66706 459 26003 -0.303138
3 4 2425 131 42385 (POLYGON ((507974.718749986 2125105.250066254,... 4 None 8468 60 4842162 21 42 23 43106 256 44080 -0.178275
4 5 2849 063 128608 POLYGON ((487190.7500000236 2140396.250066676,... 5 None 24472 30 4146145 38 20 2 128756 441 35981 -0.235811

In [18]:
datos.to_file('data/variables_regiones.shp')

In [19]:
datos = GeoDataFrame.from_file('data/variables_regiones.shp')
datos.head()


Out[19]:
ID comercio cve_dist entrada entropia geometry gid id_1 loop ocio pob region_1 region_2 region_3 salida servicios viv
0 1 9614 034 201286 -0.443781 POLYGON ((486947.2187500237 2148842.250066909,... 1 None 27627 95 2125687 25 16 20 200217 579 32618
1 2 2905 083 209264 -0.250027 POLYGON ((478110.2187500393 2134697.250066519,... 2 None 76022 82 3563363 25 20 4 209859 949 42238
2 3 2944 113 65701 -0.303138 POLYGON ((501006.0312499983 2148019.750066887,... 3 None 10611 72 3128496 29 43 17 66706 459 26003
3 4 2425 131 42385 -0.178275 (POLYGON ((507974.718749986 2125105.250066254,... 4 None 8468 60 4842162 21 42 23 43106 256 44080
4 5 2849 063 128608 -0.235811 POLYGON ((487190.7500000236 2140396.250066676,... 5 None 24472 30 4146145 38 20 2 128756 441 35981