In [3]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns;
from numpy import nan
from math import sqrt, sin, cos, atan2, log
plt.style.use('ggplot')
from IPython.core.display import HTML
css = open('style-table.css').read()+ open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))
Out[3]:
In [7]:
#test = pd.read_csv('TR_EXP_INMUEBLE_CONTROL.txt', header=0, sep='|', encoding='latin-1')
/Users/luis/anaconda/lib/python3.4/site-packages/IPython/core/interactiveshell.py:2902: DtypeWarning: Columns (8,15,21,22) have mixed types. Specify dtype option on import or set low_memory=False.
interactivity=interactivity, compiler=compiler, result=result)
In [109]:
infr = pd.read_csv('infra_corregido.csv')
In [63]:
#df = pd.read_excel('INMUEBLES.xlsx')
In [67]:
#df = df[df['P3']<6]
In [68]:
#df.ix[df.P12>2, 'P12'] = 3
#df['P12'].replace({1: 1.5, 2: 1, 3: 0},inplace=True)
In [69]:
#df.ix[df.P20>2,'P20']=0
#df.ix[df.P22>2,'P22']=0
In [70]:
#df.ix[(df.P22==1),'P20']=0 #se ignora p20 si p22=1
In [71]:
#df['P20'].replace({1: 3, 2: 0},inplace=True)
In [99]:
#df.ix[df['P12'].isnull(),'P12']=0
#df.ix[df['P20'].isnull(),'P20']=0
In [109]:
#infr['P20'].unique()
Out[109]:
array([ 0., 3.])
In [107]:
#infr.ix[:,'P20'] = df['P20'].values
#infr.ix[:,'P12'] = df['P12'].values
In [95]:
#infr.to_csv('infra_corregido.csv')
In [114]:
#Init
secciones = [['P3'], ['P11', 'P12', 'P13A', 'P14', 'P15', 'P16'], ['P17A', 'P18A', 'P19', 'P20', 'P21', 'P22'], ['P23', 'P24'], ['P25', 'P26', 'P27', 'P28', 'P29', 'P30', 'P31', 'P32', 'P33', 'P34', 'P35', 'P36', 'P37', 'P38', 'P39', 'P40', 'P41'], ['P42', 'P44', 'P46', 'P47', 'P48', 'P49', 'P52', 'P62', 'P72', 'P82', 'P92', 'P102', 'P103', 'P112', 'P113', 'P117', 'P122', 'P123', 'P125'], ['P126'], ['P133', 'P134', 'P135', 'P136', 'P137', 'P138', 'P139', 'P140', 'P141', 'P142', 'P143'], ['P144', 'P145']]
max_cal = [11, 13, 13, 11, 14, 17, 2, 11, 8]
sec_lst = []
for i in range(1,10):
sec_lst.append('seccion_'+str(i))
In [115]:
i=1
for seccion in secciones:
infr['seccion_'+str(i)]=infr[seccion].sum(axis=1)
i+=1
#dict(zip([1,2,3,4], [a,b,c,d]))
In [116]:
new_columns = ['ENT','MUN','LOC','AGEB','MZA','ID_INM']
new_columns.extend(sec_lst)
infr_secciones = infr[new_columns]
In [119]:
#infr.to_csv('infra_corregido.csv')
infr_secciones.to_csv('infr_secciones.csv')
In [120]:
for j in range(9):
print("Sección",j,":\t",infr[secciones[j]].sum(axis=1).max())
Sección 0 : 11
Sección 1 : 13.0
Sección 2 : 13.0
Sección 3 : 11.0
Sección 4 : 14.0
Sección 5 : 10.0
Sección 6 : 2.0
Sección 7 : 11.0
Sección 8 : 8.0
In [121]:
infr_secciones[sec_lst].max()
Out[121]:
seccion_1 11
seccion_2 13
seccion_3 13
seccion_4 11
seccion_5 14
seccion_6 10
seccion_7 2
seccion_8 11
seccion_9 8
dtype: float64
In [167]:
infr = pd.read_csv('infr_secciones.csv')
infr.head()
Out[167]:
Unnamed: 0
ENT
MUN
LOC
AGEB
MZA
ID_INM
seccion_1
seccion_2
seccion_3
seccion_4
seccion_5
seccion_6
seccion_7
seccion_8
seccion_9
0
0
2
4
1
2876
23
1914
8
8.0
11
11.0
11.75
4.80
0
8.8
4
1
1
2
4
1
2880
103
1915
8
8.5
11
5.5
11.50
2.75
0
4.8
8
2
2
2
4
1
3677
5
1916
8
9.0
11
5.5
11.75
2.75
0
4.8
4
3
3
2
4
1
3677
9
1917
8
8.0
11
11.0
11.75
2.65
0
10.0
0
4
4
2
4
1
3677
23
1918
8
8.0
11
5.5
10.70
5.35
0
8.4
8
In [123]:
loc = pd.read_csv('TR_EXP_INMUEBLE_CONTROL.txt', header=0, sep='|', encoding='latin-1', low_memory=False)
In [124]:
#loc[~loc['DESCRUBIC'].isnull()]['DESCRUBIC']#loc[['NOMVIAL', 'NUMEXT1', 'NEXTALF1', 'NUMINT', 'NUMINTALF', 'ENTRECA', 'YCALLE', 'DESCRUBIC']]
Y=loc['Y'].map(lambda x: float(str(x).replace(",","."))/10000)
X=loc['X'].map(lambda x: float(str(x).replace(",","."))/100000)
XY=pd.DataFrame({'Longitud':Y, 'Latitud':X})
In [125]:
loc_headers=['ENT', 'MUN','LOC', 'AGEB', 'MZA', 'X', 'Y']
common_headers=['ENT', 'MUN','LOC', 'AGEB', 'MZA']
In [126]:
location = loc[loc_headers]
In [127]:
len(infr[common_headers])/len(location)
Out[127]:
1.181369674508792
In [168]:
full_data = pd.merge(infr, loc[loc_headers], how='outer',left_on=common_headers, right_on=common_headers)
inner_data = pd.merge(infr, loc[loc_headers], how='inner',left_on=common_headers, right_on=common_headers)
left_data = pd.merge(infr, loc[loc_headers], how='left',left_on=common_headers, right_on=common_headers)
In [191]:
inner_data
Out[191]:
Unnamed: 0
ENT
MUN
LOC
AGEB
MZA
ID_INM
seccion_1
seccion_2
seccion_3
seccion_4
seccion_5
seccion_6
seccion_7
seccion_8
seccion_9
X
Y
cal_final
0
24
2
4
1
3412
2
1772
11
12.0
11
5.5
11.75
4.30
0
7.2
4
1084889.4391
2344929.5143
0.6675
1
72829
2
4
1
3412
2
2473
8
12.0
11
0.0
11.75
2.90
0
9.6
4
1084889.4391
2344929.5143
0.5925
2
105
2
4
1
436A
14
1886
11
12.0
12
5.5
11.45
3.00
0
8.4
4
1081038.1387
2346568.3414
0.6735
3
72827
2
4
1
436A
14
2471
11
9.0
12
0.0
14.00
5.60
0
8.4
4
1081038.1387
2346568.3414
0.6400
4
122
2
4
1
3041
2
1905
11
9.0
11
5.5
11.75
3.80
0
7.2
4
1077173.8573
2346448.7400
0.6325
5
129
2
4
1
3713
48
1992
11
11.5
9
0.0
8.35
2.70
0
7.2
8
1079930.4474
2343442.2052
0.5775
6
129
2
4
1
3713
48
1992
11
11.5
9
0.0
8.35
2.70
0
7.2
8
1079946.8545
2343442.1063
0.5775
7
129
2
4
1
3713
48
1992
11
11.5
9
0.0
8.35
2.70
0
7.2
8
1079895.2853
2343442.4170
0.5775
8
130
2
4
1
3713
48
1993
11
10.0
12
0.0
11.75
3.80
0
8.4
8
1079930.4474
2343442.2052
0.6495
9
130
2
4
1
3713
48
1993
11
10.0
12
0.0
11.75
3.80
0
8.4
8
1079946.8545
2343442.1063
0.6495
10
130
2
4
1
3713
48
1993
11
10.0
12
0.0
11.75
3.80
0
8.4
8
1079895.2853
2343442.4170
0.6495
11
231
2
4
1
3713
48
1991
11
12.0
12
0.0
7.55
3.20
0
8.4
8
1079930.4474
2343442.2052
0.6215
12
231
2
4
1
3713
48
1991
11
12.0
12
0.0
7.55
3.20
0
8.4
8
1079946.8545
2343442.1063
0.6215
13
231
2
4
1
3713
48
1991
11
12.0
12
0.0
7.55
3.20
0
8.4
8
1079895.2853
2343442.4170
0.6215
14
131
2
4
1
3728
49
1994
11
13.0
12
5.5
9.25
5.40
0
6.0
8
1079628.9259
2343104.6840
0.7015
15
131
2
4
1
3728
49
1994
11
13.0
12
5.5
9.25
5.40
0
6.0
8
1079626.8385
2343062.5743
0.7015
16
132
2
4
1
5635
10
1995
11
8.5
11
5.5
13.75
5.85
2
9.6
4
1081325.9932
2343469.4629
0.7120
17
133
2
4
1
5635
14
1996
11
9.0
12
0.0
10.50
2.45
2
8.9
8
1081077.8720
2343843.3932
0.6385
18
134
2
4
1
3501
48
1997
8
11.5
11
11.0
11.75
1.85
0
7.6
8
1081340.6433
2341368.6145
0.7070
19
135
2
4
1
3732
16
1998
8
12.0
12
5.5
7.65
2.75
0
7.2
8
1080133.1094
2342393.7799
0.6310
20
135
2
4
1
3732
16
1998
8
12.0
12
5.5
7.65
2.75
0
7.2
8
1080184.6203
2342265.3863
0.6310
21
136
2
4
1
3732
16
1999
8
9.0
12
5.5
10.50
1.85
0
8.4
8
1080133.1094
2342393.7799
0.6325
22
136
2
4
1
3732
16
1999
8
9.0
12
5.5
10.50
1.85
0
8.4
8
1080184.6203
2342265.3863
0.6325
23
137
2
4
1
3732
47
2000
8
11.5
12
11.0
10.25
2.65
0
5.2
8
1080491.5883
2342049.7875
0.6860
24
138
2
4
1
3785
2
2001
8
10.5
11
0.0
11.75
4.30
0
8.9
8
1081308.2090
2342556.8061
0.6245
25
138
2
4
1
3785
2
2001
8
10.5
11
0.0
11.75
4.30
0
8.9
8
1081395.5334
2342672.7702
0.6245
26
139
2
4
1
3785
2
2002
8
11.5
12
11.0
11.20
1.85
0
8.9
8
1081308.2090
2342556.8061
0.7245
27
139
2
4
1
3785
2
2002
8
11.5
12
11.0
11.20
1.85
0
8.9
8
1081395.5334
2342672.7702
0.7245
28
140
2
4
1
3785
7
2004
11
9.0
11
5.5
10.50
1.85
0
8.4
8
1080946.8150
2343142.2856
0.6525
29
140
2
4
1
3785
7
2004
11
9.0
11
5.5
10.50
1.85
0
8.4
8
1080953.1424
2343163.5934
0.6525
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
147641
151069
31
13
1
0076
4
190874
11
13.0
9
11.0
11.75
2.45
2
2.2
4
3789117.8790
1062694.1182
0.6640
147642
151071
31
54
1
0030
17
191044
11
12.5
7
5.5
11.75
2.45
0
2.2
0
3809028.0212
1061365.7127
0.5240
147643
151072
31
54
6
0045
5
191045
11
12.0
12
0.0
11.75
0.25
0
1.2
8
3807680.1370
1060112.5966
0.5620
147644
151072
31
54
6
0045
5
191045
11
12.0
12
0.0
11.75
0.25
0
1.2
8
3807684.3397
1060111.2397
0.5620
147645
151073
31
52
1
0182
15
191046
11
13.0
9
5.5
9.50
0.80
0
8.4
8
3813798.7329
1067758.7445
0.6520
147646
151075
31
50
75
6562
13
191048
11
13.0
11
5.5
10.65
2.75
0
3.6
4
3770866.5845
1055275.1014
0.6150
147647
151076
31
50
75
6577
13
191049
11
12.5
12
11.0
10.50
2.70
0
7.2
0
3770354.2840
1055020.5690
0.6690
147648
151077
31
50
81
2042
3
191052
11
12.5
12
0.0
9.50
0.90
0
4.6
8
3770220.3375
1060790.2010
0.5850
147649
151078
31
50
88
2042
13
191053
11
13.0
9
5.5
7.70
2.45
0
10.6
4
3773232.3171
1059105.4721
0.6325
147650
151079
30
91
55
003A
5
189318
11
9.0
10
0.0
10.20
3.00
0
7.0
0
3261980.4628
631619.97252
0.5020
147651
151080
30
91
55
003A
10
189319
11
8.5
8
0.0
6.95
0.25
0
3.6
8
3261982.8639
631604.01021
0.4630
147652
151081
30
91
56
0063
2
189320
11
11.5
9
0.0
7.70
0.25
0
7.7
8
3248444.1915
623213.90977
0.5515
147653
151082
30
91
57
003A
1
189321
11
11.0
10
0.0
7.35
0.25
0
5.3
4
3267154.4442
631875.88116
0.4890
147654
151082
30
91
57
003A
1
189321
11
11.0
10
0.0
7.35
0.25
0
5.3
4
3267147.4561
631804.15645
0.4890
147655
151083
30
91
57
003A
1
189322
11
11.0
7
0.0
8.35
1.05
0
3.6
4
3267154.4442
631875.88116
0.4600
147656
151083
30
91
57
003A
1
189322
11
11.0
7
0.0
8.35
1.05
0
3.6
4
3267147.4561
631804.15645
0.4600
147657
151084
30
91
61
0063
3
189323
11
10.0
6
0.0
5.85
0.00
0
6.0
0
3248526.0565
626649.68515
0.3885
147658
151084
30
91
61
0063
3
189323
11
10.0
6
0.0
5.85
0.00
0
6.0
0
3248538.1343
626559.64459
0.3885
147659
151085
30
91
61
0063
3
189324
11
11.0
7
0.0
3.00
1.15
0
1.7
8
3248526.0565
626649.68515
0.4285
147660
151085
30
91
61
0063
3
189324
11
11.0
7
0.0
3.00
1.15
0
1.7
8
3248538.1343
626559.64459
0.4285
147661
151086
30
91
61
0063
2
189325
11
7.0
6
0.0
4.60
1.15
0
1.2
0
3248574.1356
626695.97516
0.3095
147662
151087
30
91
70
0063
4
189326
11
11.5
6
0.0
6.35
0.25
0
6.0
8
3249248.6607
626583.75220
0.4910
147663
151088
30
91
76
003A
800
189327
11
11.5
8
0.0
9.50
0.00
0
3.6
8
3268469.1765
634215.8402
0.5160
147664
151089
30
91
100
0082
800
189328
11
8.5
5
0.0
4.85
0.25
0
3.6
4
3260885.3823
624631.21009
0.3720
147665
151090
30
3
1
0323
42
188407
11
13.0
11
0.0
10.60
2.20
0
8.9
4
3249442.0328
680735.86987
0.6070
147666
151091
30
3
1
0126
4
188408
8
11.5
11
0.0
4.10
1.95
0
6.0
0
3248445.8473
679870.99807
0.4255
147667
151092
30
3
1
0126
14
188409
8
11.5
11
0.0
8.35
1.95
0
7.6
8
3248678.1322
680385.75222
0.5640
147668
151093
30
3
1
0130
35
188410
8
12.0
11
5.5
11.45
2.75
0
7.2
8
3249117.8092
680078.11555
0.6590
147669
151094
30
3
1
015A
25
188411
11
12.5
11
0.0
11.25
2.45
0
6.0
4
3248718.0635
679487.59219
0.5820
147670
151095
30
3
1
015A
27
188412
11
12.0
12
5.5
11.20
2.75
0
5.3
8
3248877.4810
679621.04745
0.6775
147671 rows × 19 columns
In [218]:
full_data.max()
Out[218]:
Unnamed: 0 151095
ENT 32
MUN 570
LOC 5077
AGEB NaN
MZA 800
ID_INM 223669
seccion_1 11
seccion_2 13
seccion_3 13
seccion_4 11
seccion_5 14
seccion_6 10
seccion_7 2
seccion_8 11
seccion_9 8
X NaN
Y NaN
dtype: float64
In [ ]:
In [64]:
print(len(full_data))
print(len(inner_data))
232236
147671
In [215]:
full_data.to_csv('full_data.csv')
inner_data.to_csv('inner_data.csv')
left_data.to_csv('left_data.csv')
In [214]:
inner_data
Out[214]:
Unnamed: 0
ENT
MUN
LOC
AGEB
MZA
ID_INM
seccion_1
seccion_2
seccion_3
seccion_4
seccion_5
seccion_6
seccion_7
seccion_8
seccion_9
X
Y
cal_final
0
24
2
4
1
3412
2
1772
11
12.0
11
5.5
11.75
4.30
0
7.2
4
1084889.4391
2344929.5143
66.75
1
72829
2
4
1
3412
2
2473
8
12.0
11
0.0
11.75
2.90
0
9.6
4
1084889.4391
2344929.5143
59.25
2
105
2
4
1
436A
14
1886
11
12.0
12
5.5
11.45
3.00
0
8.4
4
1081038.1387
2346568.3414
67.35
3
72827
2
4
1
436A
14
2471
11
9.0
12
0.0
14.00
5.60
0
8.4
4
1081038.1387
2346568.3414
64.00
4
122
2
4
1
3041
2
1905
11
9.0
11
5.5
11.75
3.80
0
7.2
4
1077173.8573
2346448.7400
63.25
5
129
2
4
1
3713
48
1992
11
11.5
9
0.0
8.35
2.70
0
7.2
8
1079930.4474
2343442.2052
57.75
6
129
2
4
1
3713
48
1992
11
11.5
9
0.0
8.35
2.70
0
7.2
8
1079946.8545
2343442.1063
57.75
7
129
2
4
1
3713
48
1992
11
11.5
9
0.0
8.35
2.70
0
7.2
8
1079895.2853
2343442.4170
57.75
8
130
2
4
1
3713
48
1993
11
10.0
12
0.0
11.75
3.80
0
8.4
8
1079930.4474
2343442.2052
64.95
9
130
2
4
1
3713
48
1993
11
10.0
12
0.0
11.75
3.80
0
8.4
8
1079946.8545
2343442.1063
64.95
10
130
2
4
1
3713
48
1993
11
10.0
12
0.0
11.75
3.80
0
8.4
8
1079895.2853
2343442.4170
64.95
11
231
2
4
1
3713
48
1991
11
12.0
12
0.0
7.55
3.20
0
8.4
8
1079930.4474
2343442.2052
62.15
12
231
2
4
1
3713
48
1991
11
12.0
12
0.0
7.55
3.20
0
8.4
8
1079946.8545
2343442.1063
62.15
13
231
2
4
1
3713
48
1991
11
12.0
12
0.0
7.55
3.20
0
8.4
8
1079895.2853
2343442.4170
62.15
14
131
2
4
1
3728
49
1994
11
13.0
12
5.5
9.25
5.40
0
6.0
8
1079628.9259
2343104.6840
70.15
15
131
2
4
1
3728
49
1994
11
13.0
12
5.5
9.25
5.40
0
6.0
8
1079626.8385
2343062.5743
70.15
16
132
2
4
1
5635
10
1995
11
8.5
11
5.5
13.75
5.85
2
9.6
4
1081325.9932
2343469.4629
71.20
17
133
2
4
1
5635
14
1996
11
9.0
12
0.0
10.50
2.45
2
8.9
8
1081077.8720
2343843.3932
63.85
18
134
2
4
1
3501
48
1997
8
11.5
11
11.0
11.75
1.85
0
7.6
8
1081340.6433
2341368.6145
70.70
19
135
2
4
1
3732
16
1998
8
12.0
12
5.5
7.65
2.75
0
7.2
8
1080133.1094
2342393.7799
63.10
20
135
2
4
1
3732
16
1998
8
12.0
12
5.5
7.65
2.75
0
7.2
8
1080184.6203
2342265.3863
63.10
21
136
2
4
1
3732
16
1999
8
9.0
12
5.5
10.50
1.85
0
8.4
8
1080133.1094
2342393.7799
63.25
22
136
2
4
1
3732
16
1999
8
9.0
12
5.5
10.50
1.85
0
8.4
8
1080184.6203
2342265.3863
63.25
23
137
2
4
1
3732
47
2000
8
11.5
12
11.0
10.25
2.65
0
5.2
8
1080491.5883
2342049.7875
68.60
24
138
2
4
1
3785
2
2001
8
10.5
11
0.0
11.75
4.30
0
8.9
8
1081308.2090
2342556.8061
62.45
25
138
2
4
1
3785
2
2001
8
10.5
11
0.0
11.75
4.30
0
8.9
8
1081395.5334
2342672.7702
62.45
26
139
2
4
1
3785
2
2002
8
11.5
12
11.0
11.20
1.85
0
8.9
8
1081308.2090
2342556.8061
72.45
27
139
2
4
1
3785
2
2002
8
11.5
12
11.0
11.20
1.85
0
8.9
8
1081395.5334
2342672.7702
72.45
28
140
2
4
1
3785
7
2004
11
9.0
11
5.5
10.50
1.85
0
8.4
8
1080946.8150
2343142.2856
65.25
29
140
2
4
1
3785
7
2004
11
9.0
11
5.5
10.50
1.85
0
8.4
8
1080953.1424
2343163.5934
65.25
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
147641
151069
31
13
1
0076
4
190874
11
13.0
9
11.0
11.75
2.45
2
2.2
4
3789117.8790
1062694.1182
66.40
147642
151071
31
54
1
0030
17
191044
11
12.5
7
5.5
11.75
2.45
0
2.2
0
3809028.0212
1061365.7127
52.40
147643
151072
31
54
6
0045
5
191045
11
12.0
12
0.0
11.75
0.25
0
1.2
8
3807680.1370
1060112.5966
56.20
147644
151072
31
54
6
0045
5
191045
11
12.0
12
0.0
11.75
0.25
0
1.2
8
3807684.3397
1060111.2397
56.20
147645
151073
31
52
1
0182
15
191046
11
13.0
9
5.5
9.50
0.80
0
8.4
8
3813798.7329
1067758.7445
65.20
147646
151075
31
50
75
6562
13
191048
11
13.0
11
5.5
10.65
2.75
0
3.6
4
3770866.5845
1055275.1014
61.50
147647
151076
31
50
75
6577
13
191049
11
12.5
12
11.0
10.50
2.70
0
7.2
0
3770354.2840
1055020.5690
66.90
147648
151077
31
50
81
2042
3
191052
11
12.5
12
0.0
9.50
0.90
0
4.6
8
3770220.3375
1060790.2010
58.50
147649
151078
31
50
88
2042
13
191053
11
13.0
9
5.5
7.70
2.45
0
10.6
4
3773232.3171
1059105.4721
63.25
147650
151079
30
91
55
003A
5
189318
11
9.0
10
0.0
10.20
3.00
0
7.0
0
3261980.4628
631619.97252
50.20
147651
151080
30
91
55
003A
10
189319
11
8.5
8
0.0
6.95
0.25
0
3.6
8
3261982.8639
631604.01021
46.30
147652
151081
30
91
56
0063
2
189320
11
11.5
9
0.0
7.70
0.25
0
7.7
8
3248444.1915
623213.90977
55.15
147653
151082
30
91
57
003A
1
189321
11
11.0
10
0.0
7.35
0.25
0
5.3
4
3267154.4442
631875.88116
48.90
147654
151082
30
91
57
003A
1
189321
11
11.0
10
0.0
7.35
0.25
0
5.3
4
3267147.4561
631804.15645
48.90
147655
151083
30
91
57
003A
1
189322
11
11.0
7
0.0
8.35
1.05
0
3.6
4
3267154.4442
631875.88116
46.00
147656
151083
30
91
57
003A
1
189322
11
11.0
7
0.0
8.35
1.05
0
3.6
4
3267147.4561
631804.15645
46.00
147657
151084
30
91
61
0063
3
189323
11
10.0
6
0.0
5.85
0.00
0
6.0
0
3248526.0565
626649.68515
38.85
147658
151084
30
91
61
0063
3
189323
11
10.0
6
0.0
5.85
0.00
0
6.0
0
3248538.1343
626559.64459
38.85
147659
151085
30
91
61
0063
3
189324
11
11.0
7
0.0
3.00
1.15
0
1.7
8
3248526.0565
626649.68515
42.85
147660
151085
30
91
61
0063
3
189324
11
11.0
7
0.0
3.00
1.15
0
1.7
8
3248538.1343
626559.64459
42.85
147661
151086
30
91
61
0063
2
189325
11
7.0
6
0.0
4.60
1.15
0
1.2
0
3248574.1356
626695.97516
30.95
147662
151087
30
91
70
0063
4
189326
11
11.5
6
0.0
6.35
0.25
0
6.0
8
3249248.6607
626583.75220
49.10
147663
151088
30
91
76
003A
800
189327
11
11.5
8
0.0
9.50
0.00
0
3.6
8
3268469.1765
634215.8402
51.60
147664
151089
30
91
100
0082
800
189328
11
8.5
5
0.0
4.85
0.25
0
3.6
4
3260885.3823
624631.21009
37.20
147665
151090
30
3
1
0323
42
188407
11
13.0
11
0.0
10.60
2.20
0
8.9
4
3249442.0328
680735.86987
60.70
147666
151091
30
3
1
0126
4
188408
8
11.5
11
0.0
4.10
1.95
0
6.0
0
3248445.8473
679870.99807
42.55
147667
151092
30
3
1
0126
14
188409
8
11.5
11
0.0
8.35
1.95
0
7.6
8
3248678.1322
680385.75222
56.40
147668
151093
30
3
1
0130
35
188410
8
12.0
11
5.5
11.45
2.75
0
7.2
8
3249117.8092
680078.11555
65.90
147669
151094
30
3
1
015A
25
188411
11
12.5
11
0.0
11.25
2.45
0
6.0
4
3248718.0635
679487.59219
58.20
147670
151095
30
3
1
015A
27
188412
11
12.0
12
5.5
11.20
2.75
0
5.3
8
3248877.4810
679621.04745
67.75
147671 rows × 19 columns
In [131]:
#inner_data['X'].apply(lambda x: float(str(x).replace(",","."))/100000)
print(min(inner_data['X']))
print(max(inner_data['X']))
print(min(inner_data['Y']))
print(max(inner_data['Y']))
1076159.9242
4066239.0212
1000010.4931
999994.69286
In [132]:
inner_data['Y'].map(lambda x: -float(str(x).replace(",","."))/10000).hist()
Out[132]:
<matplotlib.axes._subplots.AxesSubplot at 0x129005898>
In [133]:
inner_data['X'].map(lambda x: float(str(x).replace(",","."))/10000).hist()
Out[133]:
<matplotlib.axes._subplots.AxesSubplot at 0x109e4c588>
In [134]:
sns.jointplot(x='Latitud', y='Longitud', data=XY)
Out[134]:
<seaborn.axisgrid.JointGrid at 0x10d369c50>
/Users/luis/anaconda/lib/python3.4/site-packages/matplotlib/collections.py:590: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
if self._edgecolors == str('face'):
In [135]:
sns.jointplot(x='Latitud', y='Longitud', data=XY, size=10, ratio=5,s=1)
Out[135]:
<seaborn.axisgrid.JointGrid at 0x10ff76860>
/Users/luis/anaconda/lib/python3.4/site-packages/matplotlib/collections.py:590: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
if self._edgecolors == str('face'):
In [136]:
XY.head()
Out[136]:
Latitud
Longitud
0
28.034619
85.692449
1
28.026875
85.679924
2
27.992703
84.988437
3
27.992288
85.114371
4
27.994609
84.962842
In [137]:
diferencias = ((loc['LATITUD_CARGA'].map(lambda x: float(str(x).replace(",",".")))- loc['X'].map(lambda x: float(str(x).replace(",","."))))**2+(loc['LONGITUD_CARGA'].map(lambda x: float(str(x).replace(",",".")))-loc['Y'].map(lambda x: float(str(x).replace(",","."))))**2).map(lambda x: sqrt(x))
In [138]:
x1 = loc['LATITUD_CARGA'].map(lambda x: float(str(x).replace(",",".")))
x2 = loc['X'].map(lambda x: float(str(x).replace(",",".")))
y1 = loc['LONGITUD_CARGA'].map(lambda x: float(str(x).replace(",",".")))
y2 = loc['Y'].map(lambda x: float(str(x).replace(",",".")))
In [139]:
plt.figure(figsize=(12,4))
diferencias.hist(bins = int(sqrt(len(diferencias))))
Out[139]:
<matplotlib.axes._subplots.AxesSubplot at 0x10ffbac88>
In [140]:
def heaversine(x1,y1,x2,y2):
a = sin((x2-x1)/2)**2+cos(x1)*cos(x2)*sin((y2-y1)/2)**2
c = 2*atan2(sqrt(a),sqrt(1-a))
return 6371*c
In [141]:
vals=loc[['LATITUD_CARGA', 'LONGITUD_CARGA','X']].groupby(['LATITUD_CARGA', 'LONGITUD_CARGA']).count()['X'].values
In [142]:
vals = pd.DataFrame(vals)
In [143]:
vals.hist(bins=320)
Out[143]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x112a6ef98>]], dtype=object)
In [144]:
data = pd.DataFrame({'x1': x1, 'y1': y1, 'x2': x2, 'y2': y2})
In [145]:
distancias_transformadas = data.apply(lambda s: heaversine(s['x1'],s['y1'],s['x2'],s['y2']),axis=1)
In [146]:
distancias_transformadas.map(lambda x:log(x)).hist(bins=int(sqrt(len(distancias_transformadas))))
Out[146]:
<matplotlib.axes._subplots.AxesSubplot at 0x10a74bf60>
In [147]:
inner_data = pd.read_csv('inner_data.csv')
/Users/luis/anaconda/lib/python3.4/site-packages/IPython/core/interactiveshell.py:2902: DtypeWarning: Columns (16,17) have mixed types. Specify dtype option on import or set low_memory=False.
interactivity=interactivity, compiler=compiler, result=result)
In [7]:
secciones = ['seccion_1', 'seccion_2', 'seccion_3', 'seccion_4', 'seccion_5', 'seccion_6', 'seccion_7', 'seccion_8', 'seccion_9']
resumen = ['ENT']
resumen.extend(secciones)
resumen.extend(['cal_final'])
indicadores = ['seccion_1', 'seccion_2', 'seccion_3', 'seccion_4', 'seccion_5', 'seccion_6', 'seccion_7', 'seccion_8', 'seccion_9', 'cal_final']
In [195]:
inner_data
Out[195]:
Unnamed: 0
ENT
MUN
LOC
AGEB
MZA
ID_INM
seccion_1
seccion_2
seccion_3
seccion_4
seccion_5
seccion_6
seccion_7
seccion_8
seccion_9
X
Y
cal_final
0
24
2
4
1
3412
2
1772
11
12.0
11
5.5
11.75
4.30
0
7.2
4
1084889.4391
2344929.5143
0.6675
1
72829
2
4
1
3412
2
2473
8
12.0
11
0.0
11.75
2.90
0
9.6
4
1084889.4391
2344929.5143
0.5925
2
105
2
4
1
436A
14
1886
11
12.0
12
5.5
11.45
3.00
0
8.4
4
1081038.1387
2346568.3414
0.6735
3
72827
2
4
1
436A
14
2471
11
9.0
12
0.0
14.00
5.60
0
8.4
4
1081038.1387
2346568.3414
0.6400
4
122
2
4
1
3041
2
1905
11
9.0
11
5.5
11.75
3.80
0
7.2
4
1077173.8573
2346448.7400
0.6325
5
129
2
4
1
3713
48
1992
11
11.5
9
0.0
8.35
2.70
0
7.2
8
1079930.4474
2343442.2052
0.5775
6
129
2
4
1
3713
48
1992
11
11.5
9
0.0
8.35
2.70
0
7.2
8
1079946.8545
2343442.1063
0.5775
7
129
2
4
1
3713
48
1992
11
11.5
9
0.0
8.35
2.70
0
7.2
8
1079895.2853
2343442.4170
0.5775
8
130
2
4
1
3713
48
1993
11
10.0
12
0.0
11.75
3.80
0
8.4
8
1079930.4474
2343442.2052
0.6495
9
130
2
4
1
3713
48
1993
11
10.0
12
0.0
11.75
3.80
0
8.4
8
1079946.8545
2343442.1063
0.6495
10
130
2
4
1
3713
48
1993
11
10.0
12
0.0
11.75
3.80
0
8.4
8
1079895.2853
2343442.4170
0.6495
11
231
2
4
1
3713
48
1991
11
12.0
12
0.0
7.55
3.20
0
8.4
8
1079930.4474
2343442.2052
0.6215
12
231
2
4
1
3713
48
1991
11
12.0
12
0.0
7.55
3.20
0
8.4
8
1079946.8545
2343442.1063
0.6215
13
231
2
4
1
3713
48
1991
11
12.0
12
0.0
7.55
3.20
0
8.4
8
1079895.2853
2343442.4170
0.6215
14
131
2
4
1
3728
49
1994
11
13.0
12
5.5
9.25
5.40
0
6.0
8
1079628.9259
2343104.6840
0.7015
15
131
2
4
1
3728
49
1994
11
13.0
12
5.5
9.25
5.40
0
6.0
8
1079626.8385
2343062.5743
0.7015
16
132
2
4
1
5635
10
1995
11
8.5
11
5.5
13.75
5.85
2
9.6
4
1081325.9932
2343469.4629
0.7120
17
133
2
4
1
5635
14
1996
11
9.0
12
0.0
10.50
2.45
2
8.9
8
1081077.8720
2343843.3932
0.6385
18
134
2
4
1
3501
48
1997
8
11.5
11
11.0
11.75
1.85
0
7.6
8
1081340.6433
2341368.6145
0.7070
19
135
2
4
1
3732
16
1998
8
12.0
12
5.5
7.65
2.75
0
7.2
8
1080133.1094
2342393.7799
0.6310
20
135
2
4
1
3732
16
1998
8
12.0
12
5.5
7.65
2.75
0
7.2
8
1080184.6203
2342265.3863
0.6310
21
136
2
4
1
3732
16
1999
8
9.0
12
5.5
10.50
1.85
0
8.4
8
1080133.1094
2342393.7799
0.6325
22
136
2
4
1
3732
16
1999
8
9.0
12
5.5
10.50
1.85
0
8.4
8
1080184.6203
2342265.3863
0.6325
23
137
2
4
1
3732
47
2000
8
11.5
12
11.0
10.25
2.65
0
5.2
8
1080491.5883
2342049.7875
0.6860
24
138
2
4
1
3785
2
2001
8
10.5
11
0.0
11.75
4.30
0
8.9
8
1081308.2090
2342556.8061
0.6245
25
138
2
4
1
3785
2
2001
8
10.5
11
0.0
11.75
4.30
0
8.9
8
1081395.5334
2342672.7702
0.6245
26
139
2
4
1
3785
2
2002
8
11.5
12
11.0
11.20
1.85
0
8.9
8
1081308.2090
2342556.8061
0.7245
27
139
2
4
1
3785
2
2002
8
11.5
12
11.0
11.20
1.85
0
8.9
8
1081395.5334
2342672.7702
0.7245
28
140
2
4
1
3785
7
2004
11
9.0
11
5.5
10.50
1.85
0
8.4
8
1080946.8150
2343142.2856
0.6525
29
140
2
4
1
3785
7
2004
11
9.0
11
5.5
10.50
1.85
0
8.4
8
1080953.1424
2343163.5934
0.6525
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
147641
151069
31
13
1
0076
4
190874
11
13.0
9
11.0
11.75
2.45
2
2.2
4
3789117.8790
1062694.1182
0.6640
147642
151071
31
54
1
0030
17
191044
11
12.5
7
5.5
11.75
2.45
0
2.2
0
3809028.0212
1061365.7127
0.5240
147643
151072
31
54
6
0045
5
191045
11
12.0
12
0.0
11.75
0.25
0
1.2
8
3807680.1370
1060112.5966
0.5620
147644
151072
31
54
6
0045
5
191045
11
12.0
12
0.0
11.75
0.25
0
1.2
8
3807684.3397
1060111.2397
0.5620
147645
151073
31
52
1
0182
15
191046
11
13.0
9
5.5
9.50
0.80
0
8.4
8
3813798.7329
1067758.7445
0.6520
147646
151075
31
50
75
6562
13
191048
11
13.0
11
5.5
10.65
2.75
0
3.6
4
3770866.5845
1055275.1014
0.6150
147647
151076
31
50
75
6577
13
191049
11
12.5
12
11.0
10.50
2.70
0
7.2
0
3770354.2840
1055020.5690
0.6690
147648
151077
31
50
81
2042
3
191052
11
12.5
12
0.0
9.50
0.90
0
4.6
8
3770220.3375
1060790.2010
0.5850
147649
151078
31
50
88
2042
13
191053
11
13.0
9
5.5
7.70
2.45
0
10.6
4
3773232.3171
1059105.4721
0.6325
147650
151079
30
91
55
003A
5
189318
11
9.0
10
0.0
10.20
3.00
0
7.0
0
3261980.4628
631619.97252
0.5020
147651
151080
30
91
55
003A
10
189319
11
8.5
8
0.0
6.95
0.25
0
3.6
8
3261982.8639
631604.01021
0.4630
147652
151081
30
91
56
0063
2
189320
11
11.5
9
0.0
7.70
0.25
0
7.7
8
3248444.1915
623213.90977
0.5515
147653
151082
30
91
57
003A
1
189321
11
11.0
10
0.0
7.35
0.25
0
5.3
4
3267154.4442
631875.88116
0.4890
147654
151082
30
91
57
003A
1
189321
11
11.0
10
0.0
7.35
0.25
0
5.3
4
3267147.4561
631804.15645
0.4890
147655
151083
30
91
57
003A
1
189322
11
11.0
7
0.0
8.35
1.05
0
3.6
4
3267154.4442
631875.88116
0.4600
147656
151083
30
91
57
003A
1
189322
11
11.0
7
0.0
8.35
1.05
0
3.6
4
3267147.4561
631804.15645
0.4600
147657
151084
30
91
61
0063
3
189323
11
10.0
6
0.0
5.85
0.00
0
6.0
0
3248526.0565
626649.68515
0.3885
147658
151084
30
91
61
0063
3
189323
11
10.0
6
0.0
5.85
0.00
0
6.0
0
3248538.1343
626559.64459
0.3885
147659
151085
30
91
61
0063
3
189324
11
11.0
7
0.0
3.00
1.15
0
1.7
8
3248526.0565
626649.68515
0.4285
147660
151085
30
91
61
0063
3
189324
11
11.0
7
0.0
3.00
1.15
0
1.7
8
3248538.1343
626559.64459
0.4285
147661
151086
30
91
61
0063
2
189325
11
7.0
6
0.0
4.60
1.15
0
1.2
0
3248574.1356
626695.97516
0.3095
147662
151087
30
91
70
0063
4
189326
11
11.5
6
0.0
6.35
0.25
0
6.0
8
3249248.6607
626583.75220
0.4910
147663
151088
30
91
76
003A
800
189327
11
11.5
8
0.0
9.50
0.00
0
3.6
8
3268469.1765
634215.8402
0.5160
147664
151089
30
91
100
0082
800
189328
11
8.5
5
0.0
4.85
0.25
0
3.6
4
3260885.3823
624631.21009
0.3720
147665
151090
30
3
1
0323
42
188407
11
13.0
11
0.0
10.60
2.20
0
8.9
4
3249442.0328
680735.86987
0.6070
147666
151091
30
3
1
0126
4
188408
8
11.5
11
0.0
4.10
1.95
0
6.0
0
3248445.8473
679870.99807
0.4255
147667
151092
30
3
1
0126
14
188409
8
11.5
11
0.0
8.35
1.95
0
7.6
8
3248678.1322
680385.75222
0.5640
147668
151093
30
3
1
0130
35
188410
8
12.0
11
5.5
11.45
2.75
0
7.2
8
3249117.8092
680078.11555
0.6590
147669
151094
30
3
1
015A
25
188411
11
12.5
11
0.0
11.25
2.45
0
6.0
4
3248718.0635
679487.59219
0.5820
147670
151095
30
3
1
015A
27
188412
11
12.0
12
5.5
11.20
2.75
0
5.3
8
3248877.4810
679621.04745
0.6775
147671 rows × 19 columns
In [149]:
x=300
tmp_data = inner_data[['ENT', 'seccion_1', 'seccion_2', 'seccion_3', 'seccion_4', 'seccion_5', 'seccion_6', 'seccion_7', 'seccion_8', 'seccion_9']]
In [ ]:
In [203]:
#plt.scatter([1, 2, 3, 4, 5, 6, 7, 8, 9], tmp_data['ENT'].values, s=tmp_data.ix[x:x+9][secciones].apply(lambda x: 10*x))
#tmp_data.groupby(['ENT']).quantile(0.99)
calificaciones = inner_data[secciones].sum(axis=1)
In [211]:
calificaciones.hist(bins=int(sqrt(calificaciones.count())))
Out[211]:
<matplotlib.axes._subplots.AxesSubplot at 0x120d7c2e8>
In [212]:
inner_data['cal_final'] = calificaciones
In [178]:
inner_data['cal_final'].hist(bins = int(sqrt(len(calificaciones))))
Out[178]:
<matplotlib.axes._subplots.AxesSubplot at 0x1345f77b8>
In [238]:
inner_agg = inner_data[resumen].groupby(['ENT']).agg('mean')[secciones]
inner_agg_median = inner_data[resumen].groupby(['ENT']).agg('median')[secciones]
calificaciones = inner_agg[secciones].sum(axis=1)/100
calificaciones_median = inner_agg_median[secciones].sum(axis=1)/100
for i in inner_agg.index.values:
inner_agg.ix[i]=inner_agg.ix[i]/max_vals
inner_agg['cal_final'] = calificaciones
for i in inner_agg_median.index.values:
inner_agg_median.ix[i]=inner_agg_median.ix[i]/max_vals
inner_agg_median['cal_final'] = calificaciones
In [247]:
inner_agg.boxplot()
/Users/luis/anaconda/lib/python3.4/site-packages/ipykernel/__main__.py:1: FutureWarning:
The default value for 'return_type' will change to 'axes' in a future release.
To use the future behavior now, set return_type='axes'.
To keep the previous behavior and silence this warning, set return_type='dict'.
if __name__ == '__main__':
Out[247]:
{'boxes': [<matplotlib.lines.Line2D at 0x120b21cc0>,
<matplotlib.lines.Line2D at 0x120b32ef0>,
<matplotlib.lines.Line2D at 0x120bef860>,
<matplotlib.lines.Line2D at 0x120c00e80>,
<matplotlib.lines.Line2D at 0x120c157f0>,
<matplotlib.lines.Line2D at 0x120c25f98>,
<matplotlib.lines.Line2D at 0x120c3a780>,
<matplotlib.lines.Line2D at 0x120c44e10>,
<matplotlib.lines.Line2D at 0x120c5a5f8>,
<matplotlib.lines.Line2D at 0x120d2bda0>],
'caps': [<matplotlib.lines.Line2D at 0x120b2e668>,
<matplotlib.lines.Line2D at 0x120b2ee48>,
<matplotlib.lines.Line2D at 0x120b3ddd8>,
<matplotlib.lines.Line2D at 0x120bea5f8>,
<matplotlib.lines.Line2D at 0x120bfa5c0>,
<matplotlib.lines.Line2D at 0x120bfada0>,
<matplotlib.lines.Line2D at 0x120c0ad68>,
<matplotlib.lines.Line2D at 0x120c0afd0>,
<matplotlib.lines.Line2D at 0x120c19f98>,
<matplotlib.lines.Line2D at 0x120c1ed30>,
<matplotlib.lines.Line2D at 0x120c2fcf8>,
<matplotlib.lines.Line2D at 0x120c2ff60>,
<matplotlib.lines.Line2D at 0x120c3df28>,
<matplotlib.lines.Line2D at 0x120af4cf8>,
<matplotlib.lines.Line2D at 0x120c4fb70>,
<matplotlib.lines.Line2D at 0x120c4fdd8>,
<matplotlib.lines.Line2D at 0x120c5fda0>,
<matplotlib.lines.Line2D at 0x120c65b38>,
<matplotlib.lines.Line2D at 0x120d35b00>,
<matplotlib.lines.Line2D at 0x120d35d68>],
'fliers': [<matplotlib.lines.Line2D at 0x120b32e10>,
<matplotlib.lines.Line2D at 0x120bef5f8>,
<matplotlib.lines.Line2D at 0x120c00da0>,
<matplotlib.lines.Line2D at 0x120c0ffd0>,
<matplotlib.lines.Line2D at 0x120c25d30>,
<matplotlib.lines.Line2D at 0x120c35f60>,
<matplotlib.lines.Line2D at 0x120c44ba8>,
<matplotlib.lines.Line2D at 0x120c55dd8>,
<matplotlib.lines.Line2D at 0x120d2bb38>,
<matplotlib.lines.Line2D at 0x120d3ad68>],
'means': [],
'medians': [<matplotlib.lines.Line2D at 0x120b320b8>,
<matplotlib.lines.Line2D at 0x120bea860>,
<matplotlib.lines.Line2D at 0x120c00048>,
<matplotlib.lines.Line2D at 0x120c0f7f0>,
<matplotlib.lines.Line2D at 0x120c1ef98>,
<matplotlib.lines.Line2D at 0x120c35780>,
<matplotlib.lines.Line2D at 0x120add5f8>,
<matplotlib.lines.Line2D at 0x120c555f8>,
<matplotlib.lines.Line2D at 0x120c65da0>,
<matplotlib.lines.Line2D at 0x120d3a588>],
'whiskers': [<matplotlib.lines.Line2D at 0x120b28668>,
<matplotlib.lines.Line2D at 0x120b28e48>,
<matplotlib.lines.Line2D at 0x120b37dd8>,
<matplotlib.lines.Line2D at 0x120b3d5f8>,
<matplotlib.lines.Line2D at 0x120bf65c0>,
<matplotlib.lines.Line2D at 0x120bf6da0>,
<matplotlib.lines.Line2D at 0x120c06d68>,
<matplotlib.lines.Line2D at 0x120c06fd0>,
<matplotlib.lines.Line2D at 0x120c15f98>,
<matplotlib.lines.Line2D at 0x120c19d30>,
<matplotlib.lines.Line2D at 0x120c2acf8>,
<matplotlib.lines.Line2D at 0x120c2af60>,
<matplotlib.lines.Line2D at 0x120c3af28>,
<matplotlib.lines.Line2D at 0x120c3dcc0>,
<matplotlib.lines.Line2D at 0x120c4ab70>,
<matplotlib.lines.Line2D at 0x120c4add8>,
<matplotlib.lines.Line2D at 0x120c5ada0>,
<matplotlib.lines.Line2D at 0x120c5fb38>,
<matplotlib.lines.Line2D at 0x120d2eb00>,
<matplotlib.lines.Line2D at 0x120d2ed68>]}
In [202]:
tmp_sum=inner_agg.sum(axis=1)
tmp_sum
Out[202]:
ENT
2 5.462742
3 5.303114
4 4.567306
5 5.925606
6 5.262592
7 3.316895
8 4.484446
9 5.546940
10 4.422440
11 4.542022
12 3.887059
13 3.839614
14 4.649610
15 5.242571
16 4.482073
18 5.550203
19 5.260497
20 3.359407
21 4.678407
23 4.127940
24 4.212829
25 4.519968
26 5.013343
27 4.457016
29 3.644955
30 4.311153
31 4.746783
32 4.288673
dtype: float64
In [9]:
max_vals = np.array([11, 13, 13, 11, 14, 17, 2, 11, 8])
In [ ]:
In [197]:
new_names = dict(zip(secciones,['1', '2', '3', '4', '5', '6', '7', '8', '9']))
inner_agg = inner_agg.rename(columns = new_names)
In [198]:
for i in inner_agg.index.values:
inner_agg.ix[i]=inner_agg.ix[i]/max_vals
In [5]:
inner_data = pd.read_csv('inner_data.csv')
inner_data
/Users/luis/anaconda/lib/python3.4/site-packages/IPython/core/interactiveshell.py:2902: DtypeWarning: Columns (18,19) have mixed types. Specify dtype option on import or set low_memory=False.
interactivity=interactivity, compiler=compiler, result=result)
Out[5]:
Unnamed: 0
Unnamed: 0.1
Unnamed: 0.1
ENT
MUN
LOC
AGEB
MZA
ID_INM
seccion_1
...
seccion_3
seccion_4
seccion_5
seccion_6
seccion_7
seccion_8
seccion_9
X
Y
cal_final
0
0
24
24
2
4
1
3412
2
1772
1.000000
...
0.846154
0.5
0.839286
0.252941
0
0.654545
0.5
1084889.4391
2344929.5143
0.6675
1
1
72829
72829
2
4
1
3412
2
2473
0.727273
...
0.846154
0.0
0.839286
0.170588
0
0.872727
0.5
1084889.4391
2344929.5143
0.5925
2
2
105
105
2
4
1
436A
14
1886
1.000000
...
0.923077
0.5
0.817857
0.176471
0
0.763636
0.5
1081038.1387
2346568.3414
0.6735
3
3
72827
72827
2
4
1
436A
14
2471
1.000000
...
0.923077
0.0
1.000000
0.329412
0
0.763636
0.5
1081038.1387
2346568.3414
0.6400
4
4
122
122
2
4
1
3041
2
1905
1.000000
...
0.846154
0.5
0.839286
0.223529
0
0.654545
0.5
1077173.8573
2346448.7400
0.6325
5
5
129
129
2
4
1
3713
48
1992
1.000000
...
0.692308
0.0
0.596429
0.158824
0
0.654545
1.0
1079930.4474
2343442.2052
0.5775
6
6
129
129
2
4
1
3713
48
1992
1.000000
...
0.692308
0.0
0.596429
0.158824
0
0.654545
1.0
1079946.8545
2343442.1063
0.5775
7
7
129
129
2
4
1
3713
48
1992
1.000000
...
0.692308
0.0
0.596429
0.158824
0
0.654545
1.0
1079895.2853
2343442.4170
0.5775
8
8
130
130
2
4
1
3713
48
1993
1.000000
...
0.923077
0.0
0.839286
0.223529
0
0.763636
1.0
1079930.4474
2343442.2052
0.6495
9
9
130
130
2
4
1
3713
48
1993
1.000000
...
0.923077
0.0
0.839286
0.223529
0
0.763636
1.0
1079946.8545
2343442.1063
0.6495
10
10
130
130
2
4
1
3713
48
1993
1.000000
...
0.923077
0.0
0.839286
0.223529
0
0.763636
1.0
1079895.2853
2343442.4170
0.6495
11
11
231
231
2
4
1
3713
48
1991
1.000000
...
0.923077
0.0
0.539286
0.188235
0
0.763636
1.0
1079930.4474
2343442.2052
0.6215
12
12
231
231
2
4
1
3713
48
1991
1.000000
...
0.923077
0.0
0.539286
0.188235
0
0.763636
1.0
1079946.8545
2343442.1063
0.6215
13
13
231
231
2
4
1
3713
48
1991
1.000000
...
0.923077
0.0
0.539286
0.188235
0
0.763636
1.0
1079895.2853
2343442.4170
0.6215
14
14
131
131
2
4
1
3728
49
1994
1.000000
...
0.923077
0.5
0.660714
0.317647
0
0.545455
1.0
1079628.9259
2343104.6840
0.7015
15
15
131
131
2
4
1
3728
49
1994
1.000000
...
0.923077
0.5
0.660714
0.317647
0
0.545455
1.0
1079626.8385
2343062.5743
0.7015
16
16
132
132
2
4
1
5635
10
1995
1.000000
...
0.846154
0.5
0.982143
0.344118
1
0.872727
0.5
1081325.9932
2343469.4629
0.7120
17
17
133
133
2
4
1
5635
14
1996
1.000000
...
0.923077
0.0
0.750000
0.144118
1
0.809091
1.0
1081077.8720
2343843.3932
0.6385
18
18
134
134
2
4
1
3501
48
1997
0.727273
...
0.846154
1.0
0.839286
0.108824
0
0.690909
1.0
1081340.6433
2341368.6145
0.7070
19
19
135
135
2
4
1
3732
16
1998
0.727273
...
0.923077
0.5
0.546429
0.161765
0
0.654545
1.0
1080133.1094
2342393.7799
0.6310
20
20
135
135
2
4
1
3732
16
1998
0.727273
...
0.923077
0.5
0.546429
0.161765
0
0.654545
1.0
1080184.6203
2342265.3863
0.6310
21
21
136
136
2
4
1
3732
16
1999
0.727273
...
0.923077
0.5
0.750000
0.108824
0
0.763636
1.0
1080133.1094
2342393.7799
0.6325
22
22
136
136
2
4
1
3732
16
1999
0.727273
...
0.923077
0.5
0.750000
0.108824
0
0.763636
1.0
1080184.6203
2342265.3863
0.6325
23
23
137
137
2
4
1
3732
47
2000
0.727273
...
0.923077
1.0
0.732143
0.155882
0
0.472727
1.0
1080491.5883
2342049.7875
0.6860
24
24
138
138
2
4
1
3785
2
2001
0.727273
...
0.846154
0.0
0.839286
0.252941
0
0.809091
1.0
1081308.2090
2342556.8061
0.6245
25
25
138
138
2
4
1
3785
2
2001
0.727273
...
0.846154
0.0
0.839286
0.252941
0
0.809091
1.0
1081395.5334
2342672.7702
0.6245
26
26
139
139
2
4
1
3785
2
2002
0.727273
...
0.923077
1.0
0.800000
0.108824
0
0.809091
1.0
1081308.2090
2342556.8061
0.7245
27
27
139
139
2
4
1
3785
2
2002
0.727273
...
0.923077
1.0
0.800000
0.108824
0
0.809091
1.0
1081395.5334
2342672.7702
0.7245
28
28
140
140
2
4
1
3785
7
2004
1.000000
...
0.846154
0.5
0.750000
0.108824
0
0.763636
1.0
1080946.8150
2343142.2856
0.6525
29
29
140
140
2
4
1
3785
7
2004
1.000000
...
0.846154
0.5
0.750000
0.108824
0
0.763636
1.0
1080953.1424
2343163.5934
0.6525
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
147641
147641
151069
151069
31
13
1
0076
4
190874
1.000000
...
0.692308
1.0
0.839286
0.144118
1
0.200000
0.5
3789117.8790
1062694.1182
0.6640
147642
147642
151071
151071
31
54
1
0030
17
191044
1.000000
...
0.538462
0.5
0.839286
0.144118
0
0.200000
0.0
3809028.0212
1061365.7127
0.5240
147643
147643
151072
151072
31
54
6
0045
5
191045
1.000000
...
0.923077
0.0
0.839286
0.014706
0
0.109091
1.0
3807680.1370
1060112.5966
0.5620
147644
147644
151072
151072
31
54
6
0045
5
191045
1.000000
...
0.923077
0.0
0.839286
0.014706
0
0.109091
1.0
3807684.3397
1060111.2397
0.5620
147645
147645
151073
151073
31
52
1
0182
15
191046
1.000000
...
0.692308
0.5
0.678571
0.047059
0
0.763636
1.0
3813798.7329
1067758.7445
0.6520
147646
147646
151075
151075
31
50
75
6562
13
191048
1.000000
...
0.846154
0.5
0.760714
0.161765
0
0.327273
0.5
3770866.5845
1055275.1014
0.6150
147647
147647
151076
151076
31
50
75
6577
13
191049
1.000000
...
0.923077
1.0
0.750000
0.158824
0
0.654545
0.0
3770354.2840
1055020.5690
0.6690
147648
147648
151077
151077
31
50
81
2042
3
191052
1.000000
...
0.923077
0.0
0.678571
0.052941
0
0.418182
1.0
3770220.3375
1060790.2010
0.5850
147649
147649
151078
151078
31
50
88
2042
13
191053
1.000000
...
0.692308
0.5
0.550000
0.144118
0
0.963636
0.5
3773232.3171
1059105.4721
0.6325
147650
147650
151079
151079
30
91
55
003A
5
189318
1.000000
...
0.769231
0.0
0.728571
0.176471
0
0.636364
0.0
3261980.4628
631619.97252
0.5020
147651
147651
151080
151080
30
91
55
003A
10
189319
1.000000
...
0.615385
0.0
0.496429
0.014706
0
0.327273
1.0
3261982.8639
631604.01021
0.4630
147652
147652
151081
151081
30
91
56
0063
2
189320
1.000000
...
0.692308
0.0
0.550000
0.014706
0
0.700000
1.0
3248444.1915
623213.90977
0.5515
147653
147653
151082
151082
30
91
57
003A
1
189321
1.000000
...
0.769231
0.0
0.525000
0.014706
0
0.481818
0.5
3267154.4442
631875.88116
0.4890
147654
147654
151082
151082
30
91
57
003A
1
189321
1.000000
...
0.769231
0.0
0.525000
0.014706
0
0.481818
0.5
3267147.4561
631804.15645
0.4890
147655
147655
151083
151083
30
91
57
003A
1
189322
1.000000
...
0.538462
0.0
0.596429
0.061765
0
0.327273
0.5
3267154.4442
631875.88116
0.4600
147656
147656
151083
151083
30
91
57
003A
1
189322
1.000000
...
0.538462
0.0
0.596429
0.061765
0
0.327273
0.5
3267147.4561
631804.15645
0.4600
147657
147657
151084
151084
30
91
61
0063
3
189323
1.000000
...
0.461538
0.0
0.417857
0.000000
0
0.545455
0.0
3248526.0565
626649.68515
0.3885
147658
147658
151084
151084
30
91
61
0063
3
189323
1.000000
...
0.461538
0.0
0.417857
0.000000
0
0.545455
0.0
3248538.1343
626559.64459
0.3885
147659
147659
151085
151085
30
91
61
0063
3
189324
1.000000
...
0.538462
0.0
0.214286
0.067647
0
0.154545
1.0
3248526.0565
626649.68515
0.4285
147660
147660
151085
151085
30
91
61
0063
3
189324
1.000000
...
0.538462
0.0
0.214286
0.067647
0
0.154545
1.0
3248538.1343
626559.64459
0.4285
147661
147661
151086
151086
30
91
61
0063
2
189325
1.000000
...
0.461538
0.0
0.328571
0.067647
0
0.109091
0.0
3248574.1356
626695.97516
0.3095
147662
147662
151087
151087
30
91
70
0063
4
189326
1.000000
...
0.461538
0.0
0.453571
0.014706
0
0.545455
1.0
3249248.6607
626583.75220
0.4910
147663
147663
151088
151088
30
91
76
003A
800
189327
1.000000
...
0.615385
0.0
0.678571
0.000000
0
0.327273
1.0
3268469.1765
634215.8402
0.5160
147664
147664
151089
151089
30
91
100
0082
800
189328
1.000000
...
0.384615
0.0
0.346429
0.014706
0
0.327273
0.5
3260885.3823
624631.21009
0.3720
147665
147665
151090
151090
30
3
1
0323
42
188407
1.000000
...
0.846154
0.0
0.757143
0.129412
0
0.809091
0.5
3249442.0328
680735.86987
0.6070
147666
147666
151091
151091
30
3
1
0126
4
188408
0.727273
...
0.846154
0.0
0.292857
0.114706
0
0.545455
0.0
3248445.8473
679870.99807
0.4255
147667
147667
151092
151092
30
3
1
0126
14
188409
0.727273
...
0.846154
0.0
0.596429
0.114706
0
0.690909
1.0
3248678.1322
680385.75222
0.5640
147668
147668
151093
151093
30
3
1
0130
35
188410
0.727273
...
0.846154
0.5
0.817857
0.161765
0
0.654545
1.0
3249117.8092
680078.11555
0.6590
147669
147669
151094
151094
30
3
1
015A
25
188411
1.000000
...
0.846154
0.0
0.803571
0.144118
0
0.545455
0.5
3248718.0635
679487.59219
0.5820
147670
147670
151095
151095
30
3
1
015A
27
188412
1.000000
...
0.923077
0.5
0.800000
0.161765
0
0.481818
1.0
3248877.4810
679621.04745
0.6775
147671 rows × 21 columns
In [184]:
inner_agg['total'] = tmp_sum
In [185]:
inner_agg
Out[185]:
1
2
3
4
5
6
7
8
9
total
ENT
2
0.905273
0.792511
0.788793
0.395282
0.753682
0.200733
0.218067
0.626755
0.781646
0.624107
3
0.911422
0.878856
0.830533
0.457436
0.720168
0.162555
0.291282
0.431375
0.619487
0.604089
4
0.927649
0.932916
0.760286
0.257106
0.581848
0.120410
0.114341
0.384056
0.488695
0.535996
5
0.793792
0.969043
0.911820
0.414634
0.806794
0.179412
0.658537
0.459867
0.731707
0.643183
6
0.933566
0.933925
0.876726
0.333333
0.734249
0.159766
0.282051
0.534615
0.474359
0.607096
7
0.852880
0.622362
0.585957
0.092025
0.412869
0.117713
0.034619
0.290845
0.307625
0.396129
8
0.891406
0.783828
0.738228
0.254687
0.662540
0.153636
0.108175
0.293837
0.598110
0.525146
9
0.903780
0.954153
0.903672
0.290823
0.789570
0.239824
0.184403
0.796426
0.484288
0.654271
10
0.933088
0.829115
0.769643
0.241010
0.655383
0.113312
0.118210
0.178135
0.584545
0.516729
11
0.927961
0.887870
0.801900
0.206186
0.666036
0.111531
0.065207
0.249409
0.625920
0.535445
12
0.904353
0.770769
0.677701
0.059082
0.491643
0.116673
0.041796
0.383866
0.441176
0.461299
13
0.962659
0.820968
0.732837
0.037097
0.515084
0.084244
0.001075
0.201779
0.483871
0.459328
14
0.907455
0.842609
0.841942
0.180758
0.673511
0.134244
0.048929
0.487586
0.532577
0.553027
15
0.918704
0.912617
0.856746
0.245453
0.712556
0.183394
0.079585
0.650293
0.683223
0.616791
16
0.894255
0.890558
0.826322
0.114149
0.639912
0.138128
0.062500
0.331179
0.585069
0.531674
18
1.000000
0.923077
0.923077
0.000000
0.671429
0.223529
1.000000
0.309091
0.500000
0.576000
19
0.910373
0.899570
0.801149
0.376322
0.745900
0.180982
0.245434
0.462030
0.638738
0.604654
20
0.905518
0.741799
0.666412
0.036084
0.492100
0.098757
0.018280
0.099370
0.301088
0.407710
21
0.911968
0.860686
0.777718
0.135310
0.604782
0.120335
0.057260
0.600843
0.609504
0.549318
23
0.912052
0.897394
0.724630
0.263844
0.635284
0.107262
0.078176
0.251969
0.257329
0.497252
24
0.912579
0.869823
0.741285
0.126167
0.580499
0.113492
0.045627
0.244430
0.578926
0.498384
25
0.917216
0.883553
0.777566
0.153712
0.648066
0.128725
0.144141
0.253224
0.613765
0.526199
26
0.929886
0.797429
0.817362
0.288593
0.723329
0.160189
0.248597
0.461150
0.586808
0.575097
27
0.911503
0.877890
0.735384
0.175096
0.590148
0.112030
0.167612
0.344259
0.543094
0.515586
29
0.727273
0.692308
0.846154
0.000000
0.442857
0.000000
0.000000
0.436364
0.500000
0.430000
30
0.923771
0.833389
0.740629
0.086622
0.569393
0.111261
0.047243
0.433832
0.565013
0.508262
31
0.919519
0.948770
0.806081
0.349465
0.686585
0.150235
0.133776
0.364025
0.388327
0.563165
32
0.937749
0.831928
0.802964
0.166247
0.628412
0.106737
0.085011
0.161060
0.568566
0.505001
In [219]:
coef = 1000
#COLORS
colors = ['peru', 'hotpink', 'crimson', 'darkslategray', 'lightsalmon', 'gray', 'firebrick', 'lightgreen', 'olive', 'y']
#colors = [[color[0]]*9, [color[1]]*9, [color[2]]*9 ]
#for i in range(3):
# colors[i].extend('y')
#GRID
fig = plt.figure(figsize=(10,30))
ax = fig.add_subplot(1,1,1)
ax.set_axis_bgcolor('white')
major_yticks = np.arange(0, 33, 5)
ax.set_yticks(major_yticks)
major_xticks = np.arange(0, 11, 5)
ax.set_xticks(major_xticks)
minor_yticks = np.arange(0, 33, 1)
ax.set_yticks(minor_yticks, minor=True)
minor_xticks = np.arange(0, 11, 1)
ax.set_xticks(minor_xticks, minor=True)
#ax.grid(which='major', c = 'white', linestyle='-')
ax.grid(which='minor', c = 'green', linestyle=':')
for i in inner_agg.index.values:
plt.scatter(range(1,11),[i]*10, s=coef, c = colors, alpha=0.1, edgecolor='k', lw=1)
plt.scatter(range(1,11), [i]*10, s=inner_agg.ix[i].map(lambda x: coef*x), c = colors)
plt.savefig('bubble_plot.png', bbox_inches='tight')
plt.show()
/Users/luis/anaconda/lib/python3.4/site-packages/matplotlib/collections.py:590: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
if self._edgecolors == str('face'):
In [220]:
inner_agg['total'].plot()
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-220-2549e82e1b14> in <module>()
----> 1 inner_agg['total'].plot()
/Users/luis/anaconda/lib/python3.4/site-packages/pandas/core/frame.py in __getitem__(self, key)
1912 return self._getitem_multilevel(key)
1913 else:
-> 1914 return self._getitem_column(key)
1915
1916 def _getitem_column(self, key):
/Users/luis/anaconda/lib/python3.4/site-packages/pandas/core/frame.py in _getitem_column(self, key)
1919 # get column
1920 if self.columns.is_unique:
-> 1921 return self._get_item_cache(key)
1922
1923 # duplicate columns & possible reduce dimensionaility
/Users/luis/anaconda/lib/python3.4/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
1088 res = cache.get(item)
1089 if res is None:
-> 1090 values = self._data.get(item)
1091 res = self._box_item_values(item, values)
1092 cache[item] = res
/Users/luis/anaconda/lib/python3.4/site-packages/pandas/core/internals.py in get(self, item, fastpath)
3100
3101 if not isnull(item):
-> 3102 loc = self.items.get_loc(item)
3103 else:
3104 indexer = np.arange(len(self.items))[isnull(self.items)]
/Users/luis/anaconda/lib/python3.4/site-packages/pandas/core/index.py in get_loc(self, key, method, tolerance)
1690 raise ValueError('tolerance argument only valid if using pad, '
1691 'backfill or nearest lookups')
-> 1692 return self._engine.get_loc(_values_from_object(key))
1693
1694 indexer = self.get_indexer([key], method=method,
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:3979)()
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:3843)()
pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12265)()
pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12216)()
KeyError: 'total'
In [221]:
inner_agg.plot(figsize=(20,8), kind='bar', stacked=True)
Out[221]:
<matplotlib.axes._subplots.AxesSubplot at 0x121afbe48>
In [164]:
fig=plt.figure()
<matplotlib.figure.Figure at 0x1200e1080>
In [126]:
In [127]:
ax
Out[127]:
<matplotlib.axes._subplots.AxesSubplot at 0x117678b00>
In [150]:
minor_xticks
Out[150]:
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
In [2]:
inner_data = pd.read_csv('inner_data.csv')
/Users/luis/anaconda/lib/python3.4/site-packages/IPython/core/interactiveshell.py:2902: DtypeWarning: Columns (16,17) have mixed types. Specify dtype option on import or set low_memory=False.
interactivity=interactivity, compiler=compiler, result=result)
In [222]:
inner_agg
Out[222]:
1
2
3
4
5
6
7
8
9
ENT
2
0.905273
0.792511
0.788793
0.395282
0.753682
0.200733
0.218067
0.626755
0.781646
3
0.911422
0.878856
0.830533
0.457436
0.720168
0.162555
0.291282
0.431375
0.619487
4
0.927649
0.932916
0.760286
0.257106
0.581848
0.120410
0.114341
0.384056
0.488695
5
0.793792
0.969043
0.911820
0.414634
0.806794
0.179412
0.658537
0.459867
0.731707
6
0.933566
0.933925
0.876726
0.333333
0.734249
0.159766
0.282051
0.534615
0.474359
7
0.852880
0.622362
0.585957
0.092025
0.412869
0.117713
0.034619
0.290845
0.307625
8
0.891406
0.783828
0.738228
0.254687
0.662540
0.153636
0.108175
0.293837
0.598110
9
0.903780
0.954153
0.903672
0.290823
0.789570
0.239824
0.184403
0.796426
0.484288
10
0.933088
0.829115
0.769643
0.241010
0.655383
0.113312
0.118210
0.178135
0.584545
11
0.927961
0.887870
0.801900
0.206186
0.666036
0.111531
0.065207
0.249409
0.625920
12
0.904353
0.770769
0.677701
0.059082
0.491643
0.116673
0.041796
0.383866
0.441176
13
0.962659
0.820968
0.732837
0.037097
0.515084
0.084244
0.001075
0.201779
0.483871
14
0.907455
0.842609
0.841942
0.180758
0.673511
0.134244
0.048929
0.487586
0.532577
15
0.918704
0.912617
0.856746
0.245453
0.712556
0.183394
0.079585
0.650293
0.683223
16
0.894255
0.890558
0.826322
0.114149
0.639912
0.138128
0.062500
0.331179
0.585069
18
1.000000
0.923077
0.923077
0.000000
0.671429
0.223529
1.000000
0.309091
0.500000
19
0.910373
0.899570
0.801149
0.376322
0.745900
0.180982
0.245434
0.462030
0.638738
20
0.905518
0.741799
0.666412
0.036084
0.492100
0.098757
0.018280
0.099370
0.301088
21
0.911968
0.860686
0.777718
0.135310
0.604782
0.120335
0.057260
0.600843
0.609504
23
0.912052
0.897394
0.724630
0.263844
0.635284
0.107262
0.078176
0.251969
0.257329
24
0.912579
0.869823
0.741285
0.126167
0.580499
0.113492
0.045627
0.244430
0.578926
25
0.917216
0.883553
0.777566
0.153712
0.648066
0.128725
0.144141
0.253224
0.613765
26
0.929886
0.797429
0.817362
0.288593
0.723329
0.160189
0.248597
0.461150
0.586808
27
0.911503
0.877890
0.735384
0.175096
0.590148
0.112030
0.167612
0.344259
0.543094
29
0.727273
0.692308
0.846154
0.000000
0.442857
0.000000
0.000000
0.436364
0.500000
30
0.923771
0.833389
0.740629
0.086622
0.569393
0.111261
0.047243
0.433832
0.565013
31
0.919519
0.948770
0.806081
0.349465
0.686585
0.150235
0.133776
0.364025
0.388327
32
0.937749
0.831928
0.802964
0.166247
0.628412
0.106737
0.085011
0.161060
0.568566
In [41]:
#inner_data['seccion_1'].replace({1:11, 2:8, 3:3, 4:0, 5:0},inplace=True)
In [227]:
inner_agg['cal_final'] = inner_data['cal_final']/100
In [228]:
inner_agg
Out[228]:
1
2
3
4
5
6
7
8
9
cal_final
ENT
2
0.905273
0.792511
0.788793
0.395282
0.753682
0.200733
0.218067
0.626755
0.781646
0.6735
3
0.911422
0.878856
0.830533
0.457436
0.720168
0.162555
0.291282
0.431375
0.619487
0.6400
4
0.927649
0.932916
0.760286
0.257106
0.581848
0.120410
0.114341
0.384056
0.488695
0.6325
5
0.793792
0.969043
0.911820
0.414634
0.806794
0.179412
0.658537
0.459867
0.731707
0.5775
6
0.933566
0.933925
0.876726
0.333333
0.734249
0.159766
0.282051
0.534615
0.474359
0.5775
7
0.852880
0.622362
0.585957
0.092025
0.412869
0.117713
0.034619
0.290845
0.307625
0.5775
8
0.891406
0.783828
0.738228
0.254687
0.662540
0.153636
0.108175
0.293837
0.598110
0.6495
9
0.903780
0.954153
0.903672
0.290823
0.789570
0.239824
0.184403
0.796426
0.484288
0.6495
10
0.933088
0.829115
0.769643
0.241010
0.655383
0.113312
0.118210
0.178135
0.584545
0.6495
11
0.927961
0.887870
0.801900
0.206186
0.666036
0.111531
0.065207
0.249409
0.625920
0.6215
12
0.904353
0.770769
0.677701
0.059082
0.491643
0.116673
0.041796
0.383866
0.441176
0.6215
13
0.962659
0.820968
0.732837
0.037097
0.515084
0.084244
0.001075
0.201779
0.483871
0.6215
14
0.907455
0.842609
0.841942
0.180758
0.673511
0.134244
0.048929
0.487586
0.532577
0.7015
15
0.918704
0.912617
0.856746
0.245453
0.712556
0.183394
0.079585
0.650293
0.683223
0.7015
16
0.894255
0.890558
0.826322
0.114149
0.639912
0.138128
0.062500
0.331179
0.585069
0.7120
18
1.000000
0.923077
0.923077
0.000000
0.671429
0.223529
1.000000
0.309091
0.500000
0.7070
19
0.910373
0.899570
0.801149
0.376322
0.745900
0.180982
0.245434
0.462030
0.638738
0.6310
20
0.905518
0.741799
0.666412
0.036084
0.492100
0.098757
0.018280
0.099370
0.301088
0.6310
21
0.911968
0.860686
0.777718
0.135310
0.604782
0.120335
0.057260
0.600843
0.609504
0.6325
23
0.912052
0.897394
0.724630
0.263844
0.635284
0.107262
0.078176
0.251969
0.257329
0.6860
24
0.912579
0.869823
0.741285
0.126167
0.580499
0.113492
0.045627
0.244430
0.578926
0.6245
25
0.917216
0.883553
0.777566
0.153712
0.648066
0.128725
0.144141
0.253224
0.613765
0.6245
26
0.929886
0.797429
0.817362
0.288593
0.723329
0.160189
0.248597
0.461150
0.586808
0.7245
27
0.911503
0.877890
0.735384
0.175096
0.590148
0.112030
0.167612
0.344259
0.543094
0.7245
29
0.727273
0.692308
0.846154
0.000000
0.442857
0.000000
0.000000
0.436364
0.500000
0.6525
30
0.923771
0.833389
0.740629
0.086622
0.569393
0.111261
0.047243
0.433832
0.565013
0.6200
31
0.919519
0.948770
0.806081
0.349465
0.686585
0.150235
0.133776
0.364025
0.388327
0.6200
32
0.937749
0.831928
0.802964
0.166247
0.628412
0.106737
0.085011
0.161060
0.568566
0.5740
In [11]:
inner_normalized = inner_data[resumen]
#inner_normalized[secciones] = inner_normalized[secciones]/max_vals
#inner_normalized['cal_final'] = inner_normalized['cal_final']/100
inner_normalized
Out[11]:
ENT
seccion_1
seccion_2
seccion_3
seccion_4
seccion_5
seccion_6
seccion_7
seccion_8
seccion_9
cal_final
0
2
1.000000
0.923077
0.846154
0.5
0.839286
0.252941
0
0.654545
0.5
0.6675
1
2
0.727273
0.923077
0.846154
0.0
0.839286
0.170588
0
0.872727
0.5
0.5925
2
2
1.000000
0.923077
0.923077
0.5
0.817857
0.176471
0
0.763636
0.5
0.6735
3
2
1.000000
0.692308
0.923077
0.0
1.000000
0.329412
0
0.763636
0.5
0.6400
4
2
1.000000
0.692308
0.846154
0.5
0.839286
0.223529
0
0.654545
0.5
0.6325
5
2
1.000000
0.884615
0.692308
0.0
0.596429
0.158824
0
0.654545
1.0
0.5775
6
2
1.000000
0.884615
0.692308
0.0
0.596429
0.158824
0
0.654545
1.0
0.5775
7
2
1.000000
0.884615
0.692308
0.0
0.596429
0.158824
0
0.654545
1.0
0.5775
8
2
1.000000
0.769231
0.923077
0.0
0.839286
0.223529
0
0.763636
1.0
0.6495
9
2
1.000000
0.769231
0.923077
0.0
0.839286
0.223529
0
0.763636
1.0
0.6495
10
2
1.000000
0.769231
0.923077
0.0
0.839286
0.223529
0
0.763636
1.0
0.6495
11
2
1.000000
0.923077
0.923077
0.0
0.539286
0.188235
0
0.763636
1.0
0.6215
12
2
1.000000
0.923077
0.923077
0.0
0.539286
0.188235
0
0.763636
1.0
0.6215
13
2
1.000000
0.923077
0.923077
0.0
0.539286
0.188235
0
0.763636
1.0
0.6215
14
2
1.000000
1.000000
0.923077
0.5
0.660714
0.317647
0
0.545455
1.0
0.7015
15
2
1.000000
1.000000
0.923077
0.5
0.660714
0.317647
0
0.545455
1.0
0.7015
16
2
1.000000
0.653846
0.846154
0.5
0.982143
0.344118
1
0.872727
0.5
0.7120
17
2
1.000000
0.692308
0.923077
0.0
0.750000
0.144118
1
0.809091
1.0
0.6385
18
2
0.727273
0.884615
0.846154
1.0
0.839286
0.108824
0
0.690909
1.0
0.7070
19
2
0.727273
0.923077
0.923077
0.5
0.546429
0.161765
0
0.654545
1.0
0.6310
20
2
0.727273
0.923077
0.923077
0.5
0.546429
0.161765
0
0.654545
1.0
0.6310
21
2
0.727273
0.692308
0.923077
0.5
0.750000
0.108824
0
0.763636
1.0
0.6325
22
2
0.727273
0.692308
0.923077
0.5
0.750000
0.108824
0
0.763636
1.0
0.6325
23
2
0.727273
0.884615
0.923077
1.0
0.732143
0.155882
0
0.472727
1.0
0.6860
24
2
0.727273
0.807692
0.846154
0.0
0.839286
0.252941
0
0.809091
1.0
0.6245
25
2
0.727273
0.807692
0.846154
0.0
0.839286
0.252941
0
0.809091
1.0
0.6245
26
2
0.727273
0.884615
0.923077
1.0
0.800000
0.108824
0
0.809091
1.0
0.7245
27
2
0.727273
0.884615
0.923077
1.0
0.800000
0.108824
0
0.809091
1.0
0.7245
28
2
1.000000
0.692308
0.846154
0.5
0.750000
0.108824
0
0.763636
1.0
0.6525
29
2
1.000000
0.692308
0.846154
0.5
0.750000
0.108824
0
0.763636
1.0
0.6525
...
...
...
...
...
...
...
...
...
...
...
...
147641
31
1.000000
1.000000
0.692308
1.0
0.839286
0.144118
1
0.200000
0.5
0.6640
147642
31
1.000000
0.961538
0.538462
0.5
0.839286
0.144118
0
0.200000
0.0
0.5240
147643
31
1.000000
0.923077
0.923077
0.0
0.839286
0.014706
0
0.109091
1.0
0.5620
147644
31
1.000000
0.923077
0.923077
0.0
0.839286
0.014706
0
0.109091
1.0
0.5620
147645
31
1.000000
1.000000
0.692308
0.5
0.678571
0.047059
0
0.763636
1.0
0.6520
147646
31
1.000000
1.000000
0.846154
0.5
0.760714
0.161765
0
0.327273
0.5
0.6150
147647
31
1.000000
0.961538
0.923077
1.0
0.750000
0.158824
0
0.654545
0.0
0.6690
147648
31
1.000000
0.961538
0.923077
0.0
0.678571
0.052941
0
0.418182
1.0
0.5850
147649
31
1.000000
1.000000
0.692308
0.5
0.550000
0.144118
0
0.963636
0.5
0.6325
147650
30
1.000000
0.692308
0.769231
0.0
0.728571
0.176471
0
0.636364
0.0
0.5020
147651
30
1.000000
0.653846
0.615385
0.0
0.496429
0.014706
0
0.327273
1.0
0.4630
147652
30
1.000000
0.884615
0.692308
0.0
0.550000
0.014706
0
0.700000
1.0
0.5515
147653
30
1.000000
0.846154
0.769231
0.0
0.525000
0.014706
0
0.481818
0.5
0.4890
147654
30
1.000000
0.846154
0.769231
0.0
0.525000
0.014706
0
0.481818
0.5
0.4890
147655
30
1.000000
0.846154
0.538462
0.0
0.596429
0.061765
0
0.327273
0.5
0.4600
147656
30
1.000000
0.846154
0.538462
0.0
0.596429
0.061765
0
0.327273
0.5
0.4600
147657
30
1.000000
0.769231
0.461538
0.0
0.417857
0.000000
0
0.545455
0.0
0.3885
147658
30
1.000000
0.769231
0.461538
0.0
0.417857
0.000000
0
0.545455
0.0
0.3885
147659
30
1.000000
0.846154
0.538462
0.0
0.214286
0.067647
0
0.154545
1.0
0.4285
147660
30
1.000000
0.846154
0.538462
0.0
0.214286
0.067647
0
0.154545
1.0
0.4285
147661
30
1.000000
0.538462
0.461538
0.0
0.328571
0.067647
0
0.109091
0.0
0.3095
147662
30
1.000000
0.884615
0.461538
0.0
0.453571
0.014706
0
0.545455
1.0
0.4910
147663
30
1.000000
0.884615
0.615385
0.0
0.678571
0.000000
0
0.327273
1.0
0.5160
147664
30
1.000000
0.653846
0.384615
0.0
0.346429
0.014706
0
0.327273
0.5
0.3720
147665
30
1.000000
1.000000
0.846154
0.0
0.757143
0.129412
0
0.809091
0.5
0.6070
147666
30
0.727273
0.884615
0.846154
0.0
0.292857
0.114706
0
0.545455
0.0
0.4255
147667
30
0.727273
0.884615
0.846154
0.0
0.596429
0.114706
0
0.690909
1.0
0.5640
147668
30
0.727273
0.923077
0.846154
0.5
0.817857
0.161765
0
0.654545
1.0
0.6590
147669
30
1.000000
0.961538
0.846154
0.0
0.803571
0.144118
0
0.545455
0.5
0.5820
147670
30
1.000000
0.923077
0.923077
0.5
0.800000
0.161765
0
0.481818
1.0
0.6775
147671 rows × 11 columns
In [15]:
inner_normalized[inner_normalized['ENT']==25][indicadores].hist()
Out[15]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x10c4a0ba8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10c52e198>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10c5ac978>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x10cea6c50>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10cef3b00>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10cf327b8>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x10cf79da0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10f94f198>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10f995a90>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x10fe0d748>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10fe4c240>,
<matplotlib.axes._subplots.AxesSubplot object at 0x112f72400>]], dtype=object)
In [16]:
inner_normalized.groupby('ENT').agg('std').hist()
Out[16]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x112fadc18>,
<matplotlib.axes._subplots.AxesSubplot object at 0x11356ab00>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1141a0128>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x1141d9a58>,
<matplotlib.axes._subplots.AxesSubplot object at 0x114225828>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1142645f8>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x1142aabe0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1142e5f98>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1143358d0>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x114383588>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1143c1080>,
<matplotlib.axes._subplots.AxesSubplot object at 0x11451e240>]], dtype=object)
In [17]:
plt.matshow(inner_normalized[secciones].corr())
Out[17]:
<matplotlib.image.AxesImage at 0x114a316a0>
In [18]:
import itertools
In [283]:
def scatterplot_matrix(data, names, **kwargs):
"""Plots a scatterplot matrix of subplots. Each row of "data" is plotted
against other rows, resulting in a nrows by nrows grid of subplots with the
diagonal subplots labeled with "names". Additional keyword arguments are
passed on to matplotlib's "plot" command. Returns the matplotlib figure
object containg the subplot grid."""
numvars, numdata = data.shape
fig, axes = plt.subplots(nrows=numvars, ncols=numvars, figsize=(8,8))
fig.subplots_adjust(hspace=0.05, wspace=0.05)
for ax in axes.flat:
# Hide all ticks and labels
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
# Set up ticks only on one side for the "edge" subplots...
if ax.is_first_col():
ax.yaxis.set_ticks_position('left')
if ax.is_last_col():
ax.yaxis.set_ticks_position('right')
if ax.is_first_row():
ax.xaxis.set_ticks_position('top')
if ax.is_last_row():
ax.xaxis.set_ticks_position('bottom')
# Plot the data.
for i, j in zip(*np.triu_indices_from(axes, k=1)):
for x, y in [(i,j), (j,i)]:
axes[x,y].plot(data[x], data[y], **kwargs)
# Label the diagonal subplots...
for i, label in enumerate(names):
axes[i,i].annotate(label, (0.5, 0.5), xycoords='axes fraction',
ha='center', va='center')
# Turn on the proper x or y axes ticks.
for i, j in zip(range(numvars), itertools.cycle((-1, 0))):
axes[j,i].xaxis.set_visible(True)
axes[i,j].yaxis.set_visible(True)
return fig
In [21]:
pd.scatter_matrix(inner_normalized[indicadores], alpha=0.2, figsize=(20, 20), diagonal='kde')
Out[21]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x10c7c3b38>,
<matplotlib.axes._subplots.AxesSubplot object at 0x114c60b38>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10c90e7f0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10c955898>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10c992b38>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10c9cf208>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10ca0f8d0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10ca47ba8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10ca964e0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10cae3198>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x10cc2ec50>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10cc7be10>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10ccbb668>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10cd05828>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10cd3fef0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10cd8c438>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10cdc68d0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10ce18208>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10ce5ed68>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10db4b828>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x10db988d0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10dbd5358>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10df33518>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10df6dcf8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10dfb8438>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10dff26d8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10e03bfd0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10e089b70>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10e0c9668>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10e215710>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x10e253198>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10e2a0358>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10e2d7b38>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10e326278>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10e360518>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10e3abe10>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10e3f99b0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x10e4374a8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1103ad550>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1103e5f98>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x110550198>,
<matplotlib.axes._subplots.AxesSubplot object at 0x110588978>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1105d50b8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x110713358>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1107234a8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x110770080>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1107a8e48>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1107f6c18>,
<matplotlib.axes._subplots.AxesSubplot object at 0x110a34b00>,
<matplotlib.axes._subplots.AxesSubplot object at 0x110a80da0>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x110abf4e0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x110b04d68>,
<matplotlib.axes._subplots.AxesSubplot object at 0x110b2c5f8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1226517b8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x12269c470>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1226d5c88>,
<matplotlib.axes._subplots.AxesSubplot object at 0x122723a58>,
<matplotlib.axes._subplots.AxesSubplot object at 0x122761940>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1227adbe0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1227ee320>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x122833ba8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x122859438>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1228bd5f8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x122a0a2b0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x122a45ac8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x122a8f898>,
<matplotlib.axes._subplots.AxesSubplot object at 0x122acf780>,
<matplotlib.axes._subplots.AxesSubplot object at 0x122c1ba20>,
<matplotlib.axes._subplots.AxesSubplot object at 0x122c59160>,
<matplotlib.axes._subplots.AxesSubplot object at 0x122c9f9e8>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x122cc6278>,
<matplotlib.axes._subplots.AxesSubplot object at 0x122d2b438>,
<matplotlib.axes._subplots.AxesSubplot object at 0x122d790f0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x122db0908>,
<matplotlib.axes._subplots.AxesSubplot object at 0x122dfd6d8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x122f3b5c0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x122f88860>,
<matplotlib.axes._subplots.AxesSubplot object at 0x122fbbf60>,
<matplotlib.axes._subplots.AxesSubplot object at 0x12380f828>,
<matplotlib.axes._subplots.AxesSubplot object at 0x12382feb8>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x123899278>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1238dfef0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x123a1f748>,
<matplotlib.axes._subplots.AxesSubplot object at 0x123a6b518>,
<matplotlib.axes._subplots.AxesSubplot object at 0x123aa9400>,
<matplotlib.axes._subplots.AxesSubplot object at 0x123af56a0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x123c29da0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x123c79668>,
<matplotlib.axes._subplots.AxesSubplot object at 0x123c9deb8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x123d060b8>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x123d4cd30>,
<matplotlib.axes._subplots.AxesSubplot object at 0x123d8b588>,
<matplotlib.axes._subplots.AxesSubplot object at 0x123dd8358>,
<matplotlib.axes._subplots.AxesSubplot object at 0x123e17240>,
<matplotlib.axes._subplots.AxesSubplot object at 0x123e644e0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x123e9dbe0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x123ee84a8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x12400bcf8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x12406deb8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1240bab70>]], dtype=object)
/Users/luis/anaconda/lib/python3.4/site-packages/matplotlib/collections.py:590: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
if self._edgecolors == str('face'):
In [ ]:
pd.scatter_matrix(inner_normalized[indicadores], alpha=0.2, figsize=(6, 6), diagonal='kde')
Content source: pablocastelo/mexprimero
Similar notebooks: