In [3]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns;
from numpy import nan
from math import sqrt, sin, cos, atan2, log
plt.style.use('ggplot')
from IPython.core.display import HTML
css = open('style-table.css').read()+ open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))


Out[3]:

In [7]:
#test = pd.read_csv('TR_EXP_INMUEBLE_CONTROL.txt', header=0, sep='|', encoding='latin-1')


/Users/luis/anaconda/lib/python3.4/site-packages/IPython/core/interactiveshell.py:2902: DtypeWarning: Columns (8,15,21,22) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)

In [109]:
infr = pd.read_csv('infra_corregido.csv')

In [63]:
#df = pd.read_excel('INMUEBLES.xlsx')

In [67]:
#df = df[df['P3']<6]

In [68]:
#df.ix[df.P12>2, 'P12'] = 3
#df['P12'].replace({1: 1.5, 2: 1, 3: 0},inplace=True)

In [69]:
#df.ix[df.P20>2,'P20']=0
#df.ix[df.P22>2,'P22']=0

In [70]:
#df.ix[(df.P22==1),'P20']=0 #se ignora p20 si p22=1

In [71]:
#df['P20'].replace({1: 3, 2: 0},inplace=True)

In [99]:
#df.ix[df['P12'].isnull(),'P12']=0
#df.ix[df['P20'].isnull(),'P20']=0

In [109]:
#infr['P20'].unique()


Out[109]:
array([ 0.,  3.])

In [107]:
#infr.ix[:,'P20'] = df['P20'].values
#infr.ix[:,'P12'] = df['P12'].values

In [95]:
#infr.to_csv('infra_corregido.csv')

In [114]:
#Init
secciones = [['P3'], ['P11', 'P12', 'P13A', 'P14', 'P15', 'P16'], ['P17A', 'P18A', 'P19', 'P20', 'P21', 'P22'], ['P23', 'P24'], ['P25', 'P26', 'P27', 'P28', 'P29', 'P30', 'P31', 'P32', 'P33', 'P34', 'P35', 'P36', 'P37', 'P38', 'P39', 'P40', 'P41'], ['P42', 'P44', 'P46', 'P47', 'P48', 'P49', 'P52', 'P62', 'P72', 'P82', 'P92', 'P102', 'P103', 'P112', 'P113', 'P117', 'P122', 'P123', 'P125'], ['P126'], ['P133', 'P134', 'P135', 'P136', 'P137', 'P138', 'P139', 'P140', 'P141', 'P142', 'P143'], ['P144', 'P145']]
max_cal = [11, 13, 13, 11, 14, 17, 2, 11, 8]
sec_lst = []
for i in range(1,10):
    sec_lst.append('seccion_'+str(i))

In [115]:
i=1
for seccion in secciones:
    infr['seccion_'+str(i)]=infr[seccion].sum(axis=1)
    i+=1

 
#dict(zip([1,2,3,4], [a,b,c,d]))

In [116]:
new_columns = ['ENT','MUN','LOC','AGEB','MZA','ID_INM']
new_columns.extend(sec_lst)
infr_secciones = infr[new_columns]

In [119]:
#infr.to_csv('infra_corregido.csv')
infr_secciones.to_csv('infr_secciones.csv')

In [120]:
for j in range(9):
    print("Sección",j,":\t",infr[secciones[j]].sum(axis=1).max())


Sección 0 :	 11
Sección 1 :	 13.0
Sección 2 :	 13.0
Sección 3 :	 11.0
Sección 4 :	 14.0
Sección 5 :	 10.0
Sección 6 :	 2.0
Sección 7 :	 11.0
Sección 8 :	 8.0

In [121]:
infr_secciones[sec_lst].max()


Out[121]:
seccion_1    11
seccion_2    13
seccion_3    13
seccion_4    11
seccion_5    14
seccion_6    10
seccion_7     2
seccion_8    11
seccion_9     8
dtype: float64

In [167]:
infr = pd.read_csv('infr_secciones.csv')
infr.head()


Out[167]:
Unnamed: 0 ENT MUN LOC AGEB MZA ID_INM seccion_1 seccion_2 seccion_3 seccion_4 seccion_5 seccion_6 seccion_7 seccion_8 seccion_9
0 0 2 4 1 2876 23 1914 8 8.0 11 11.0 11.75 4.80 0 8.8 4
1 1 2 4 1 2880 103 1915 8 8.5 11 5.5 11.50 2.75 0 4.8 8
2 2 2 4 1 3677 5 1916 8 9.0 11 5.5 11.75 2.75 0 4.8 4
3 3 2 4 1 3677 9 1917 8 8.0 11 11.0 11.75 2.65 0 10.0 0
4 4 2 4 1 3677 23 1918 8 8.0 11 5.5 10.70 5.35 0 8.4 8

In [123]:
loc = pd.read_csv('TR_EXP_INMUEBLE_CONTROL.txt', header=0, sep='|', encoding='latin-1', low_memory=False)

In [124]:
#loc[~loc['DESCRUBIC'].isnull()]['DESCRUBIC']#loc[['NOMVIAL', 'NUMEXT1', 'NEXTALF1', 'NUMINT', 'NUMINTALF', 'ENTRECA', 'YCALLE', 'DESCRUBIC']]
Y=loc['Y'].map(lambda x: float(str(x).replace(",","."))/10000)
X=loc['X'].map(lambda x: float(str(x).replace(",","."))/100000)
XY=pd.DataFrame({'Longitud':Y, 'Latitud':X})

In [125]:
loc_headers=['ENT', 'MUN','LOC', 'AGEB', 'MZA', 'X', 'Y']
common_headers=['ENT', 'MUN','LOC', 'AGEB', 'MZA']

In [126]:
location = loc[loc_headers]

In [127]:
len(infr[common_headers])/len(location)


Out[127]:
1.181369674508792

In [168]:
full_data = pd.merge(infr, loc[loc_headers], how='outer',left_on=common_headers, right_on=common_headers)
inner_data = pd.merge(infr, loc[loc_headers], how='inner',left_on=common_headers, right_on=common_headers)
left_data = pd.merge(infr, loc[loc_headers], how='left',left_on=common_headers, right_on=common_headers)

In [191]:
inner_data


Out[191]:
Unnamed: 0 ENT MUN LOC AGEB MZA ID_INM seccion_1 seccion_2 seccion_3 seccion_4 seccion_5 seccion_6 seccion_7 seccion_8 seccion_9 X Y cal_final
0 24 2 4 1 3412 2 1772 11 12.0 11 5.5 11.75 4.30 0 7.2 4 1084889.4391 2344929.5143 0.6675
1 72829 2 4 1 3412 2 2473 8 12.0 11 0.0 11.75 2.90 0 9.6 4 1084889.4391 2344929.5143 0.5925
2 105 2 4 1 436A 14 1886 11 12.0 12 5.5 11.45 3.00 0 8.4 4 1081038.1387 2346568.3414 0.6735
3 72827 2 4 1 436A 14 2471 11 9.0 12 0.0 14.00 5.60 0 8.4 4 1081038.1387 2346568.3414 0.6400
4 122 2 4 1 3041 2 1905 11 9.0 11 5.5 11.75 3.80 0 7.2 4 1077173.8573 2346448.7400 0.6325
5 129 2 4 1 3713 48 1992 11 11.5 9 0.0 8.35 2.70 0 7.2 8 1079930.4474 2343442.2052 0.5775
6 129 2 4 1 3713 48 1992 11 11.5 9 0.0 8.35 2.70 0 7.2 8 1079946.8545 2343442.1063 0.5775
7 129 2 4 1 3713 48 1992 11 11.5 9 0.0 8.35 2.70 0 7.2 8 1079895.2853 2343442.4170 0.5775
8 130 2 4 1 3713 48 1993 11 10.0 12 0.0 11.75 3.80 0 8.4 8 1079930.4474 2343442.2052 0.6495
9 130 2 4 1 3713 48 1993 11 10.0 12 0.0 11.75 3.80 0 8.4 8 1079946.8545 2343442.1063 0.6495
10 130 2 4 1 3713 48 1993 11 10.0 12 0.0 11.75 3.80 0 8.4 8 1079895.2853 2343442.4170 0.6495
11 231 2 4 1 3713 48 1991 11 12.0 12 0.0 7.55 3.20 0 8.4 8 1079930.4474 2343442.2052 0.6215
12 231 2 4 1 3713 48 1991 11 12.0 12 0.0 7.55 3.20 0 8.4 8 1079946.8545 2343442.1063 0.6215
13 231 2 4 1 3713 48 1991 11 12.0 12 0.0 7.55 3.20 0 8.4 8 1079895.2853 2343442.4170 0.6215
14 131 2 4 1 3728 49 1994 11 13.0 12 5.5 9.25 5.40 0 6.0 8 1079628.9259 2343104.6840 0.7015
15 131 2 4 1 3728 49 1994 11 13.0 12 5.5 9.25 5.40 0 6.0 8 1079626.8385 2343062.5743 0.7015
16 132 2 4 1 5635 10 1995 11 8.5 11 5.5 13.75 5.85 2 9.6 4 1081325.9932 2343469.4629 0.7120
17 133 2 4 1 5635 14 1996 11 9.0 12 0.0 10.50 2.45 2 8.9 8 1081077.8720 2343843.3932 0.6385
18 134 2 4 1 3501 48 1997 8 11.5 11 11.0 11.75 1.85 0 7.6 8 1081340.6433 2341368.6145 0.7070
19 135 2 4 1 3732 16 1998 8 12.0 12 5.5 7.65 2.75 0 7.2 8 1080133.1094 2342393.7799 0.6310
20 135 2 4 1 3732 16 1998 8 12.0 12 5.5 7.65 2.75 0 7.2 8 1080184.6203 2342265.3863 0.6310
21 136 2 4 1 3732 16 1999 8 9.0 12 5.5 10.50 1.85 0 8.4 8 1080133.1094 2342393.7799 0.6325
22 136 2 4 1 3732 16 1999 8 9.0 12 5.5 10.50 1.85 0 8.4 8 1080184.6203 2342265.3863 0.6325
23 137 2 4 1 3732 47 2000 8 11.5 12 11.0 10.25 2.65 0 5.2 8 1080491.5883 2342049.7875 0.6860
24 138 2 4 1 3785 2 2001 8 10.5 11 0.0 11.75 4.30 0 8.9 8 1081308.2090 2342556.8061 0.6245
25 138 2 4 1 3785 2 2001 8 10.5 11 0.0 11.75 4.30 0 8.9 8 1081395.5334 2342672.7702 0.6245
26 139 2 4 1 3785 2 2002 8 11.5 12 11.0 11.20 1.85 0 8.9 8 1081308.2090 2342556.8061 0.7245
27 139 2 4 1 3785 2 2002 8 11.5 12 11.0 11.20 1.85 0 8.9 8 1081395.5334 2342672.7702 0.7245
28 140 2 4 1 3785 7 2004 11 9.0 11 5.5 10.50 1.85 0 8.4 8 1080946.8150 2343142.2856 0.6525
29 140 2 4 1 3785 7 2004 11 9.0 11 5.5 10.50 1.85 0 8.4 8 1080953.1424 2343163.5934 0.6525
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
147641 151069 31 13 1 0076 4 190874 11 13.0 9 11.0 11.75 2.45 2 2.2 4 3789117.8790 1062694.1182 0.6640
147642 151071 31 54 1 0030 17 191044 11 12.5 7 5.5 11.75 2.45 0 2.2 0 3809028.0212 1061365.7127 0.5240
147643 151072 31 54 6 0045 5 191045 11 12.0 12 0.0 11.75 0.25 0 1.2 8 3807680.1370 1060112.5966 0.5620
147644 151072 31 54 6 0045 5 191045 11 12.0 12 0.0 11.75 0.25 0 1.2 8 3807684.3397 1060111.2397 0.5620
147645 151073 31 52 1 0182 15 191046 11 13.0 9 5.5 9.50 0.80 0 8.4 8 3813798.7329 1067758.7445 0.6520
147646 151075 31 50 75 6562 13 191048 11 13.0 11 5.5 10.65 2.75 0 3.6 4 3770866.5845 1055275.1014 0.6150
147647 151076 31 50 75 6577 13 191049 11 12.5 12 11.0 10.50 2.70 0 7.2 0 3770354.2840 1055020.5690 0.6690
147648 151077 31 50 81 2042 3 191052 11 12.5 12 0.0 9.50 0.90 0 4.6 8 3770220.3375 1060790.2010 0.5850
147649 151078 31 50 88 2042 13 191053 11 13.0 9 5.5 7.70 2.45 0 10.6 4 3773232.3171 1059105.4721 0.6325
147650 151079 30 91 55 003A 5 189318 11 9.0 10 0.0 10.20 3.00 0 7.0 0 3261980.4628 631619.97252 0.5020
147651 151080 30 91 55 003A 10 189319 11 8.5 8 0.0 6.95 0.25 0 3.6 8 3261982.8639 631604.01021 0.4630
147652 151081 30 91 56 0063 2 189320 11 11.5 9 0.0 7.70 0.25 0 7.7 8 3248444.1915 623213.90977 0.5515
147653 151082 30 91 57 003A 1 189321 11 11.0 10 0.0 7.35 0.25 0 5.3 4 3267154.4442 631875.88116 0.4890
147654 151082 30 91 57 003A 1 189321 11 11.0 10 0.0 7.35 0.25 0 5.3 4 3267147.4561 631804.15645 0.4890
147655 151083 30 91 57 003A 1 189322 11 11.0 7 0.0 8.35 1.05 0 3.6 4 3267154.4442 631875.88116 0.4600
147656 151083 30 91 57 003A 1 189322 11 11.0 7 0.0 8.35 1.05 0 3.6 4 3267147.4561 631804.15645 0.4600
147657 151084 30 91 61 0063 3 189323 11 10.0 6 0.0 5.85 0.00 0 6.0 0 3248526.0565 626649.68515 0.3885
147658 151084 30 91 61 0063 3 189323 11 10.0 6 0.0 5.85 0.00 0 6.0 0 3248538.1343 626559.64459 0.3885
147659 151085 30 91 61 0063 3 189324 11 11.0 7 0.0 3.00 1.15 0 1.7 8 3248526.0565 626649.68515 0.4285
147660 151085 30 91 61 0063 3 189324 11 11.0 7 0.0 3.00 1.15 0 1.7 8 3248538.1343 626559.64459 0.4285
147661 151086 30 91 61 0063 2 189325 11 7.0 6 0.0 4.60 1.15 0 1.2 0 3248574.1356 626695.97516 0.3095
147662 151087 30 91 70 0063 4 189326 11 11.5 6 0.0 6.35 0.25 0 6.0 8 3249248.6607 626583.75220 0.4910
147663 151088 30 91 76 003A 800 189327 11 11.5 8 0.0 9.50 0.00 0 3.6 8 3268469.1765 634215.8402 0.5160
147664 151089 30 91 100 0082 800 189328 11 8.5 5 0.0 4.85 0.25 0 3.6 4 3260885.3823 624631.21009 0.3720
147665 151090 30 3 1 0323 42 188407 11 13.0 11 0.0 10.60 2.20 0 8.9 4 3249442.0328 680735.86987 0.6070
147666 151091 30 3 1 0126 4 188408 8 11.5 11 0.0 4.10 1.95 0 6.0 0 3248445.8473 679870.99807 0.4255
147667 151092 30 3 1 0126 14 188409 8 11.5 11 0.0 8.35 1.95 0 7.6 8 3248678.1322 680385.75222 0.5640
147668 151093 30 3 1 0130 35 188410 8 12.0 11 5.5 11.45 2.75 0 7.2 8 3249117.8092 680078.11555 0.6590
147669 151094 30 3 1 015A 25 188411 11 12.5 11 0.0 11.25 2.45 0 6.0 4 3248718.0635 679487.59219 0.5820
147670 151095 30 3 1 015A 27 188412 11 12.0 12 5.5 11.20 2.75 0 5.3 8 3248877.4810 679621.04745 0.6775

147671 rows × 19 columns


In [218]:
full_data.max()


Out[218]:
Unnamed: 0    151095
ENT               32
MUN              570
LOC             5077
AGEB             NaN
MZA              800
ID_INM        223669
seccion_1         11
seccion_2         13
seccion_3         13
seccion_4         11
seccion_5         14
seccion_6         10
seccion_7          2
seccion_8         11
seccion_9          8
X                NaN
Y                NaN
dtype: float64

In [ ]:


In [64]:
print(len(full_data))
print(len(inner_data))


232236
147671

In [215]:
full_data.to_csv('full_data.csv')
inner_data.to_csv('inner_data.csv')
left_data.to_csv('left_data.csv')

In [214]:
inner_data


Out[214]:
Unnamed: 0 ENT MUN LOC AGEB MZA ID_INM seccion_1 seccion_2 seccion_3 seccion_4 seccion_5 seccion_6 seccion_7 seccion_8 seccion_9 X Y cal_final
0 24 2 4 1 3412 2 1772 11 12.0 11 5.5 11.75 4.30 0 7.2 4 1084889.4391 2344929.5143 66.75
1 72829 2 4 1 3412 2 2473 8 12.0 11 0.0 11.75 2.90 0 9.6 4 1084889.4391 2344929.5143 59.25
2 105 2 4 1 436A 14 1886 11 12.0 12 5.5 11.45 3.00 0 8.4 4 1081038.1387 2346568.3414 67.35
3 72827 2 4 1 436A 14 2471 11 9.0 12 0.0 14.00 5.60 0 8.4 4 1081038.1387 2346568.3414 64.00
4 122 2 4 1 3041 2 1905 11 9.0 11 5.5 11.75 3.80 0 7.2 4 1077173.8573 2346448.7400 63.25
5 129 2 4 1 3713 48 1992 11 11.5 9 0.0 8.35 2.70 0 7.2 8 1079930.4474 2343442.2052 57.75
6 129 2 4 1 3713 48 1992 11 11.5 9 0.0 8.35 2.70 0 7.2 8 1079946.8545 2343442.1063 57.75
7 129 2 4 1 3713 48 1992 11 11.5 9 0.0 8.35 2.70 0 7.2 8 1079895.2853 2343442.4170 57.75
8 130 2 4 1 3713 48 1993 11 10.0 12 0.0 11.75 3.80 0 8.4 8 1079930.4474 2343442.2052 64.95
9 130 2 4 1 3713 48 1993 11 10.0 12 0.0 11.75 3.80 0 8.4 8 1079946.8545 2343442.1063 64.95
10 130 2 4 1 3713 48 1993 11 10.0 12 0.0 11.75 3.80 0 8.4 8 1079895.2853 2343442.4170 64.95
11 231 2 4 1 3713 48 1991 11 12.0 12 0.0 7.55 3.20 0 8.4 8 1079930.4474 2343442.2052 62.15
12 231 2 4 1 3713 48 1991 11 12.0 12 0.0 7.55 3.20 0 8.4 8 1079946.8545 2343442.1063 62.15
13 231 2 4 1 3713 48 1991 11 12.0 12 0.0 7.55 3.20 0 8.4 8 1079895.2853 2343442.4170 62.15
14 131 2 4 1 3728 49 1994 11 13.0 12 5.5 9.25 5.40 0 6.0 8 1079628.9259 2343104.6840 70.15
15 131 2 4 1 3728 49 1994 11 13.0 12 5.5 9.25 5.40 0 6.0 8 1079626.8385 2343062.5743 70.15
16 132 2 4 1 5635 10 1995 11 8.5 11 5.5 13.75 5.85 2 9.6 4 1081325.9932 2343469.4629 71.20
17 133 2 4 1 5635 14 1996 11 9.0 12 0.0 10.50 2.45 2 8.9 8 1081077.8720 2343843.3932 63.85
18 134 2 4 1 3501 48 1997 8 11.5 11 11.0 11.75 1.85 0 7.6 8 1081340.6433 2341368.6145 70.70
19 135 2 4 1 3732 16 1998 8 12.0 12 5.5 7.65 2.75 0 7.2 8 1080133.1094 2342393.7799 63.10
20 135 2 4 1 3732 16 1998 8 12.0 12 5.5 7.65 2.75 0 7.2 8 1080184.6203 2342265.3863 63.10
21 136 2 4 1 3732 16 1999 8 9.0 12 5.5 10.50 1.85 0 8.4 8 1080133.1094 2342393.7799 63.25
22 136 2 4 1 3732 16 1999 8 9.0 12 5.5 10.50 1.85 0 8.4 8 1080184.6203 2342265.3863 63.25
23 137 2 4 1 3732 47 2000 8 11.5 12 11.0 10.25 2.65 0 5.2 8 1080491.5883 2342049.7875 68.60
24 138 2 4 1 3785 2 2001 8 10.5 11 0.0 11.75 4.30 0 8.9 8 1081308.2090 2342556.8061 62.45
25 138 2 4 1 3785 2 2001 8 10.5 11 0.0 11.75 4.30 0 8.9 8 1081395.5334 2342672.7702 62.45
26 139 2 4 1 3785 2 2002 8 11.5 12 11.0 11.20 1.85 0 8.9 8 1081308.2090 2342556.8061 72.45
27 139 2 4 1 3785 2 2002 8 11.5 12 11.0 11.20 1.85 0 8.9 8 1081395.5334 2342672.7702 72.45
28 140 2 4 1 3785 7 2004 11 9.0 11 5.5 10.50 1.85 0 8.4 8 1080946.8150 2343142.2856 65.25
29 140 2 4 1 3785 7 2004 11 9.0 11 5.5 10.50 1.85 0 8.4 8 1080953.1424 2343163.5934 65.25
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
147641 151069 31 13 1 0076 4 190874 11 13.0 9 11.0 11.75 2.45 2 2.2 4 3789117.8790 1062694.1182 66.40
147642 151071 31 54 1 0030 17 191044 11 12.5 7 5.5 11.75 2.45 0 2.2 0 3809028.0212 1061365.7127 52.40
147643 151072 31 54 6 0045 5 191045 11 12.0 12 0.0 11.75 0.25 0 1.2 8 3807680.1370 1060112.5966 56.20
147644 151072 31 54 6 0045 5 191045 11 12.0 12 0.0 11.75 0.25 0 1.2 8 3807684.3397 1060111.2397 56.20
147645 151073 31 52 1 0182 15 191046 11 13.0 9 5.5 9.50 0.80 0 8.4 8 3813798.7329 1067758.7445 65.20
147646 151075 31 50 75 6562 13 191048 11 13.0 11 5.5 10.65 2.75 0 3.6 4 3770866.5845 1055275.1014 61.50
147647 151076 31 50 75 6577 13 191049 11 12.5 12 11.0 10.50 2.70 0 7.2 0 3770354.2840 1055020.5690 66.90
147648 151077 31 50 81 2042 3 191052 11 12.5 12 0.0 9.50 0.90 0 4.6 8 3770220.3375 1060790.2010 58.50
147649 151078 31 50 88 2042 13 191053 11 13.0 9 5.5 7.70 2.45 0 10.6 4 3773232.3171 1059105.4721 63.25
147650 151079 30 91 55 003A 5 189318 11 9.0 10 0.0 10.20 3.00 0 7.0 0 3261980.4628 631619.97252 50.20
147651 151080 30 91 55 003A 10 189319 11 8.5 8 0.0 6.95 0.25 0 3.6 8 3261982.8639 631604.01021 46.30
147652 151081 30 91 56 0063 2 189320 11 11.5 9 0.0 7.70 0.25 0 7.7 8 3248444.1915 623213.90977 55.15
147653 151082 30 91 57 003A 1 189321 11 11.0 10 0.0 7.35 0.25 0 5.3 4 3267154.4442 631875.88116 48.90
147654 151082 30 91 57 003A 1 189321 11 11.0 10 0.0 7.35 0.25 0 5.3 4 3267147.4561 631804.15645 48.90
147655 151083 30 91 57 003A 1 189322 11 11.0 7 0.0 8.35 1.05 0 3.6 4 3267154.4442 631875.88116 46.00
147656 151083 30 91 57 003A 1 189322 11 11.0 7 0.0 8.35 1.05 0 3.6 4 3267147.4561 631804.15645 46.00
147657 151084 30 91 61 0063 3 189323 11 10.0 6 0.0 5.85 0.00 0 6.0 0 3248526.0565 626649.68515 38.85
147658 151084 30 91 61 0063 3 189323 11 10.0 6 0.0 5.85 0.00 0 6.0 0 3248538.1343 626559.64459 38.85
147659 151085 30 91 61 0063 3 189324 11 11.0 7 0.0 3.00 1.15 0 1.7 8 3248526.0565 626649.68515 42.85
147660 151085 30 91 61 0063 3 189324 11 11.0 7 0.0 3.00 1.15 0 1.7 8 3248538.1343 626559.64459 42.85
147661 151086 30 91 61 0063 2 189325 11 7.0 6 0.0 4.60 1.15 0 1.2 0 3248574.1356 626695.97516 30.95
147662 151087 30 91 70 0063 4 189326 11 11.5 6 0.0 6.35 0.25 0 6.0 8 3249248.6607 626583.75220 49.10
147663 151088 30 91 76 003A 800 189327 11 11.5 8 0.0 9.50 0.00 0 3.6 8 3268469.1765 634215.8402 51.60
147664 151089 30 91 100 0082 800 189328 11 8.5 5 0.0 4.85 0.25 0 3.6 4 3260885.3823 624631.21009 37.20
147665 151090 30 3 1 0323 42 188407 11 13.0 11 0.0 10.60 2.20 0 8.9 4 3249442.0328 680735.86987 60.70
147666 151091 30 3 1 0126 4 188408 8 11.5 11 0.0 4.10 1.95 0 6.0 0 3248445.8473 679870.99807 42.55
147667 151092 30 3 1 0126 14 188409 8 11.5 11 0.0 8.35 1.95 0 7.6 8 3248678.1322 680385.75222 56.40
147668 151093 30 3 1 0130 35 188410 8 12.0 11 5.5 11.45 2.75 0 7.2 8 3249117.8092 680078.11555 65.90
147669 151094 30 3 1 015A 25 188411 11 12.5 11 0.0 11.25 2.45 0 6.0 4 3248718.0635 679487.59219 58.20
147670 151095 30 3 1 015A 27 188412 11 12.0 12 5.5 11.20 2.75 0 5.3 8 3248877.4810 679621.04745 67.75

147671 rows × 19 columns


In [131]:
#inner_data['X'].apply(lambda x: float(str(x).replace(",","."))/100000)
print(min(inner_data['X']))
print(max(inner_data['X']))
print(min(inner_data['Y']))
print(max(inner_data['Y']))


1076159.9242
4066239.0212
1000010.4931
999994.69286

In [132]:
inner_data['Y'].map(lambda x: -float(str(x).replace(",","."))/10000).hist()


Out[132]:
<matplotlib.axes._subplots.AxesSubplot at 0x129005898>

In [133]:
inner_data['X'].map(lambda x: float(str(x).replace(",","."))/10000).hist()


Out[133]:
<matplotlib.axes._subplots.AxesSubplot at 0x109e4c588>

In [134]:
sns.jointplot(x='Latitud', y='Longitud', data=XY)


Out[134]:
<seaborn.axisgrid.JointGrid at 0x10d369c50>
/Users/luis/anaconda/lib/python3.4/site-packages/matplotlib/collections.py:590: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  if self._edgecolors == str('face'):

In [135]:
sns.jointplot(x='Latitud', y='Longitud', data=XY, size=10, ratio=5,s=1)


Out[135]:
<seaborn.axisgrid.JointGrid at 0x10ff76860>
/Users/luis/anaconda/lib/python3.4/site-packages/matplotlib/collections.py:590: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  if self._edgecolors == str('face'):

In [136]:
XY.head()


Out[136]:
Latitud Longitud
0 28.034619 85.692449
1 28.026875 85.679924
2 27.992703 84.988437
3 27.992288 85.114371
4 27.994609 84.962842

In [137]:
diferencias = ((loc['LATITUD_CARGA'].map(lambda x: float(str(x).replace(",",".")))- loc['X'].map(lambda x: float(str(x).replace(",","."))))**2+(loc['LONGITUD_CARGA'].map(lambda x: float(str(x).replace(",",".")))-loc['Y'].map(lambda x: float(str(x).replace(",","."))))**2).map(lambda x: sqrt(x))

In [138]:
x1 = loc['LATITUD_CARGA'].map(lambda x: float(str(x).replace(",",".")))
x2 = loc['X'].map(lambda x: float(str(x).replace(",",".")))
y1 = loc['LONGITUD_CARGA'].map(lambda x: float(str(x).replace(",",".")))
y2 = loc['Y'].map(lambda x: float(str(x).replace(",",".")))

In [139]:
plt.figure(figsize=(12,4))
diferencias.hist(bins = int(sqrt(len(diferencias))))


Out[139]:
<matplotlib.axes._subplots.AxesSubplot at 0x10ffbac88>

In [140]:
def heaversine(x1,y1,x2,y2):
    a = sin((x2-x1)/2)**2+cos(x1)*cos(x2)*sin((y2-y1)/2)**2
    c = 2*atan2(sqrt(a),sqrt(1-a))
    return 6371*c

In [141]:
vals=loc[['LATITUD_CARGA', 'LONGITUD_CARGA','X']].groupby(['LATITUD_CARGA', 'LONGITUD_CARGA']).count()['X'].values

In [142]:
vals = pd.DataFrame(vals)

In [143]:
vals.hist(bins=320)


Out[143]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x112a6ef98>]], dtype=object)

In [144]:
data = pd.DataFrame({'x1': x1, 'y1': y1, 'x2': x2, 'y2': y2})

In [145]:
distancias_transformadas = data.apply(lambda s: heaversine(s['x1'],s['y1'],s['x2'],s['y2']),axis=1)

In [146]:
distancias_transformadas.map(lambda x:log(x)).hist(bins=int(sqrt(len(distancias_transformadas))))


Out[146]:
<matplotlib.axes._subplots.AxesSubplot at 0x10a74bf60>

In [147]:
inner_data = pd.read_csv('inner_data.csv')


/Users/luis/anaconda/lib/python3.4/site-packages/IPython/core/interactiveshell.py:2902: DtypeWarning: Columns (16,17) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)

In [7]:
secciones = ['seccion_1', 'seccion_2', 'seccion_3', 'seccion_4', 'seccion_5', 'seccion_6', 'seccion_7', 'seccion_8', 'seccion_9']
resumen = ['ENT']
resumen.extend(secciones)
resumen.extend(['cal_final'])
indicadores = ['seccion_1', 'seccion_2', 'seccion_3', 'seccion_4', 'seccion_5', 'seccion_6', 'seccion_7', 'seccion_8', 'seccion_9', 'cal_final']

In [195]:
inner_data


Out[195]:
Unnamed: 0 ENT MUN LOC AGEB MZA ID_INM seccion_1 seccion_2 seccion_3 seccion_4 seccion_5 seccion_6 seccion_7 seccion_8 seccion_9 X Y cal_final
0 24 2 4 1 3412 2 1772 11 12.0 11 5.5 11.75 4.30 0 7.2 4 1084889.4391 2344929.5143 0.6675
1 72829 2 4 1 3412 2 2473 8 12.0 11 0.0 11.75 2.90 0 9.6 4 1084889.4391 2344929.5143 0.5925
2 105 2 4 1 436A 14 1886 11 12.0 12 5.5 11.45 3.00 0 8.4 4 1081038.1387 2346568.3414 0.6735
3 72827 2 4 1 436A 14 2471 11 9.0 12 0.0 14.00 5.60 0 8.4 4 1081038.1387 2346568.3414 0.6400
4 122 2 4 1 3041 2 1905 11 9.0 11 5.5 11.75 3.80 0 7.2 4 1077173.8573 2346448.7400 0.6325
5 129 2 4 1 3713 48 1992 11 11.5 9 0.0 8.35 2.70 0 7.2 8 1079930.4474 2343442.2052 0.5775
6 129 2 4 1 3713 48 1992 11 11.5 9 0.0 8.35 2.70 0 7.2 8 1079946.8545 2343442.1063 0.5775
7 129 2 4 1 3713 48 1992 11 11.5 9 0.0 8.35 2.70 0 7.2 8 1079895.2853 2343442.4170 0.5775
8 130 2 4 1 3713 48 1993 11 10.0 12 0.0 11.75 3.80 0 8.4 8 1079930.4474 2343442.2052 0.6495
9 130 2 4 1 3713 48 1993 11 10.0 12 0.0 11.75 3.80 0 8.4 8 1079946.8545 2343442.1063 0.6495
10 130 2 4 1 3713 48 1993 11 10.0 12 0.0 11.75 3.80 0 8.4 8 1079895.2853 2343442.4170 0.6495
11 231 2 4 1 3713 48 1991 11 12.0 12 0.0 7.55 3.20 0 8.4 8 1079930.4474 2343442.2052 0.6215
12 231 2 4 1 3713 48 1991 11 12.0 12 0.0 7.55 3.20 0 8.4 8 1079946.8545 2343442.1063 0.6215
13 231 2 4 1 3713 48 1991 11 12.0 12 0.0 7.55 3.20 0 8.4 8 1079895.2853 2343442.4170 0.6215
14 131 2 4 1 3728 49 1994 11 13.0 12 5.5 9.25 5.40 0 6.0 8 1079628.9259 2343104.6840 0.7015
15 131 2 4 1 3728 49 1994 11 13.0 12 5.5 9.25 5.40 0 6.0 8 1079626.8385 2343062.5743 0.7015
16 132 2 4 1 5635 10 1995 11 8.5 11 5.5 13.75 5.85 2 9.6 4 1081325.9932 2343469.4629 0.7120
17 133 2 4 1 5635 14 1996 11 9.0 12 0.0 10.50 2.45 2 8.9 8 1081077.8720 2343843.3932 0.6385
18 134 2 4 1 3501 48 1997 8 11.5 11 11.0 11.75 1.85 0 7.6 8 1081340.6433 2341368.6145 0.7070
19 135 2 4 1 3732 16 1998 8 12.0 12 5.5 7.65 2.75 0 7.2 8 1080133.1094 2342393.7799 0.6310
20 135 2 4 1 3732 16 1998 8 12.0 12 5.5 7.65 2.75 0 7.2 8 1080184.6203 2342265.3863 0.6310
21 136 2 4 1 3732 16 1999 8 9.0 12 5.5 10.50 1.85 0 8.4 8 1080133.1094 2342393.7799 0.6325
22 136 2 4 1 3732 16 1999 8 9.0 12 5.5 10.50 1.85 0 8.4 8 1080184.6203 2342265.3863 0.6325
23 137 2 4 1 3732 47 2000 8 11.5 12 11.0 10.25 2.65 0 5.2 8 1080491.5883 2342049.7875 0.6860
24 138 2 4 1 3785 2 2001 8 10.5 11 0.0 11.75 4.30 0 8.9 8 1081308.2090 2342556.8061 0.6245
25 138 2 4 1 3785 2 2001 8 10.5 11 0.0 11.75 4.30 0 8.9 8 1081395.5334 2342672.7702 0.6245
26 139 2 4 1 3785 2 2002 8 11.5 12 11.0 11.20 1.85 0 8.9 8 1081308.2090 2342556.8061 0.7245
27 139 2 4 1 3785 2 2002 8 11.5 12 11.0 11.20 1.85 0 8.9 8 1081395.5334 2342672.7702 0.7245
28 140 2 4 1 3785 7 2004 11 9.0 11 5.5 10.50 1.85 0 8.4 8 1080946.8150 2343142.2856 0.6525
29 140 2 4 1 3785 7 2004 11 9.0 11 5.5 10.50 1.85 0 8.4 8 1080953.1424 2343163.5934 0.6525
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
147641 151069 31 13 1 0076 4 190874 11 13.0 9 11.0 11.75 2.45 2 2.2 4 3789117.8790 1062694.1182 0.6640
147642 151071 31 54 1 0030 17 191044 11 12.5 7 5.5 11.75 2.45 0 2.2 0 3809028.0212 1061365.7127 0.5240
147643 151072 31 54 6 0045 5 191045 11 12.0 12 0.0 11.75 0.25 0 1.2 8 3807680.1370 1060112.5966 0.5620
147644 151072 31 54 6 0045 5 191045 11 12.0 12 0.0 11.75 0.25 0 1.2 8 3807684.3397 1060111.2397 0.5620
147645 151073 31 52 1 0182 15 191046 11 13.0 9 5.5 9.50 0.80 0 8.4 8 3813798.7329 1067758.7445 0.6520
147646 151075 31 50 75 6562 13 191048 11 13.0 11 5.5 10.65 2.75 0 3.6 4 3770866.5845 1055275.1014 0.6150
147647 151076 31 50 75 6577 13 191049 11 12.5 12 11.0 10.50 2.70 0 7.2 0 3770354.2840 1055020.5690 0.6690
147648 151077 31 50 81 2042 3 191052 11 12.5 12 0.0 9.50 0.90 0 4.6 8 3770220.3375 1060790.2010 0.5850
147649 151078 31 50 88 2042 13 191053 11 13.0 9 5.5 7.70 2.45 0 10.6 4 3773232.3171 1059105.4721 0.6325
147650 151079 30 91 55 003A 5 189318 11 9.0 10 0.0 10.20 3.00 0 7.0 0 3261980.4628 631619.97252 0.5020
147651 151080 30 91 55 003A 10 189319 11 8.5 8 0.0 6.95 0.25 0 3.6 8 3261982.8639 631604.01021 0.4630
147652 151081 30 91 56 0063 2 189320 11 11.5 9 0.0 7.70 0.25 0 7.7 8 3248444.1915 623213.90977 0.5515
147653 151082 30 91 57 003A 1 189321 11 11.0 10 0.0 7.35 0.25 0 5.3 4 3267154.4442 631875.88116 0.4890
147654 151082 30 91 57 003A 1 189321 11 11.0 10 0.0 7.35 0.25 0 5.3 4 3267147.4561 631804.15645 0.4890
147655 151083 30 91 57 003A 1 189322 11 11.0 7 0.0 8.35 1.05 0 3.6 4 3267154.4442 631875.88116 0.4600
147656 151083 30 91 57 003A 1 189322 11 11.0 7 0.0 8.35 1.05 0 3.6 4 3267147.4561 631804.15645 0.4600
147657 151084 30 91 61 0063 3 189323 11 10.0 6 0.0 5.85 0.00 0 6.0 0 3248526.0565 626649.68515 0.3885
147658 151084 30 91 61 0063 3 189323 11 10.0 6 0.0 5.85 0.00 0 6.0 0 3248538.1343 626559.64459 0.3885
147659 151085 30 91 61 0063 3 189324 11 11.0 7 0.0 3.00 1.15 0 1.7 8 3248526.0565 626649.68515 0.4285
147660 151085 30 91 61 0063 3 189324 11 11.0 7 0.0 3.00 1.15 0 1.7 8 3248538.1343 626559.64459 0.4285
147661 151086 30 91 61 0063 2 189325 11 7.0 6 0.0 4.60 1.15 0 1.2 0 3248574.1356 626695.97516 0.3095
147662 151087 30 91 70 0063 4 189326 11 11.5 6 0.0 6.35 0.25 0 6.0 8 3249248.6607 626583.75220 0.4910
147663 151088 30 91 76 003A 800 189327 11 11.5 8 0.0 9.50 0.00 0 3.6 8 3268469.1765 634215.8402 0.5160
147664 151089 30 91 100 0082 800 189328 11 8.5 5 0.0 4.85 0.25 0 3.6 4 3260885.3823 624631.21009 0.3720
147665 151090 30 3 1 0323 42 188407 11 13.0 11 0.0 10.60 2.20 0 8.9 4 3249442.0328 680735.86987 0.6070
147666 151091 30 3 1 0126 4 188408 8 11.5 11 0.0 4.10 1.95 0 6.0 0 3248445.8473 679870.99807 0.4255
147667 151092 30 3 1 0126 14 188409 8 11.5 11 0.0 8.35 1.95 0 7.6 8 3248678.1322 680385.75222 0.5640
147668 151093 30 3 1 0130 35 188410 8 12.0 11 5.5 11.45 2.75 0 7.2 8 3249117.8092 680078.11555 0.6590
147669 151094 30 3 1 015A 25 188411 11 12.5 11 0.0 11.25 2.45 0 6.0 4 3248718.0635 679487.59219 0.5820
147670 151095 30 3 1 015A 27 188412 11 12.0 12 5.5 11.20 2.75 0 5.3 8 3248877.4810 679621.04745 0.6775

147671 rows × 19 columns


In [149]:
x=300
tmp_data = inner_data[['ENT', 'seccion_1', 'seccion_2', 'seccion_3', 'seccion_4', 'seccion_5', 'seccion_6', 'seccion_7', 'seccion_8', 'seccion_9']]

In [ ]:


In [203]:
#plt.scatter([1, 2, 3, 4, 5, 6, 7, 8, 9], tmp_data['ENT'].values, s=tmp_data.ix[x:x+9][secciones].apply(lambda x: 10*x))
#tmp_data.groupby(['ENT']).quantile(0.99)
calificaciones = inner_data[secciones].sum(axis=1)

In [211]:
calificaciones.hist(bins=int(sqrt(calificaciones.count())))


Out[211]:
<matplotlib.axes._subplots.AxesSubplot at 0x120d7c2e8>

In [212]:
inner_data['cal_final'] = calificaciones

In [178]:
inner_data['cal_final'].hist(bins = int(sqrt(len(calificaciones))))


Out[178]:
<matplotlib.axes._subplots.AxesSubplot at 0x1345f77b8>

In [238]:
inner_agg = inner_data[resumen].groupby(['ENT']).agg('mean')[secciones]
inner_agg_median = inner_data[resumen].groupby(['ENT']).agg('median')[secciones]

calificaciones = inner_agg[secciones].sum(axis=1)/100
calificaciones_median = inner_agg_median[secciones].sum(axis=1)/100

for i in inner_agg.index.values:
    inner_agg.ix[i]=inner_agg.ix[i]/max_vals
inner_agg['cal_final'] = calificaciones

for i in inner_agg_median.index.values:
    inner_agg_median.ix[i]=inner_agg_median.ix[i]/max_vals
inner_agg_median['cal_final'] = calificaciones

In [247]:
inner_agg.boxplot()


/Users/luis/anaconda/lib/python3.4/site-packages/ipykernel/__main__.py:1: FutureWarning: 
The default value for 'return_type' will change to 'axes' in a future release.
 To use the future behavior now, set return_type='axes'.
 To keep the previous behavior and silence this warning, set return_type='dict'.
  if __name__ == '__main__':
Out[247]:
{'boxes': [<matplotlib.lines.Line2D at 0x120b21cc0>,
  <matplotlib.lines.Line2D at 0x120b32ef0>,
  <matplotlib.lines.Line2D at 0x120bef860>,
  <matplotlib.lines.Line2D at 0x120c00e80>,
  <matplotlib.lines.Line2D at 0x120c157f0>,
  <matplotlib.lines.Line2D at 0x120c25f98>,
  <matplotlib.lines.Line2D at 0x120c3a780>,
  <matplotlib.lines.Line2D at 0x120c44e10>,
  <matplotlib.lines.Line2D at 0x120c5a5f8>,
  <matplotlib.lines.Line2D at 0x120d2bda0>],
 'caps': [<matplotlib.lines.Line2D at 0x120b2e668>,
  <matplotlib.lines.Line2D at 0x120b2ee48>,
  <matplotlib.lines.Line2D at 0x120b3ddd8>,
  <matplotlib.lines.Line2D at 0x120bea5f8>,
  <matplotlib.lines.Line2D at 0x120bfa5c0>,
  <matplotlib.lines.Line2D at 0x120bfada0>,
  <matplotlib.lines.Line2D at 0x120c0ad68>,
  <matplotlib.lines.Line2D at 0x120c0afd0>,
  <matplotlib.lines.Line2D at 0x120c19f98>,
  <matplotlib.lines.Line2D at 0x120c1ed30>,
  <matplotlib.lines.Line2D at 0x120c2fcf8>,
  <matplotlib.lines.Line2D at 0x120c2ff60>,
  <matplotlib.lines.Line2D at 0x120c3df28>,
  <matplotlib.lines.Line2D at 0x120af4cf8>,
  <matplotlib.lines.Line2D at 0x120c4fb70>,
  <matplotlib.lines.Line2D at 0x120c4fdd8>,
  <matplotlib.lines.Line2D at 0x120c5fda0>,
  <matplotlib.lines.Line2D at 0x120c65b38>,
  <matplotlib.lines.Line2D at 0x120d35b00>,
  <matplotlib.lines.Line2D at 0x120d35d68>],
 'fliers': [<matplotlib.lines.Line2D at 0x120b32e10>,
  <matplotlib.lines.Line2D at 0x120bef5f8>,
  <matplotlib.lines.Line2D at 0x120c00da0>,
  <matplotlib.lines.Line2D at 0x120c0ffd0>,
  <matplotlib.lines.Line2D at 0x120c25d30>,
  <matplotlib.lines.Line2D at 0x120c35f60>,
  <matplotlib.lines.Line2D at 0x120c44ba8>,
  <matplotlib.lines.Line2D at 0x120c55dd8>,
  <matplotlib.lines.Line2D at 0x120d2bb38>,
  <matplotlib.lines.Line2D at 0x120d3ad68>],
 'means': [],
 'medians': [<matplotlib.lines.Line2D at 0x120b320b8>,
  <matplotlib.lines.Line2D at 0x120bea860>,
  <matplotlib.lines.Line2D at 0x120c00048>,
  <matplotlib.lines.Line2D at 0x120c0f7f0>,
  <matplotlib.lines.Line2D at 0x120c1ef98>,
  <matplotlib.lines.Line2D at 0x120c35780>,
  <matplotlib.lines.Line2D at 0x120add5f8>,
  <matplotlib.lines.Line2D at 0x120c555f8>,
  <matplotlib.lines.Line2D at 0x120c65da0>,
  <matplotlib.lines.Line2D at 0x120d3a588>],
 'whiskers': [<matplotlib.lines.Line2D at 0x120b28668>,
  <matplotlib.lines.Line2D at 0x120b28e48>,
  <matplotlib.lines.Line2D at 0x120b37dd8>,
  <matplotlib.lines.Line2D at 0x120b3d5f8>,
  <matplotlib.lines.Line2D at 0x120bf65c0>,
  <matplotlib.lines.Line2D at 0x120bf6da0>,
  <matplotlib.lines.Line2D at 0x120c06d68>,
  <matplotlib.lines.Line2D at 0x120c06fd0>,
  <matplotlib.lines.Line2D at 0x120c15f98>,
  <matplotlib.lines.Line2D at 0x120c19d30>,
  <matplotlib.lines.Line2D at 0x120c2acf8>,
  <matplotlib.lines.Line2D at 0x120c2af60>,
  <matplotlib.lines.Line2D at 0x120c3af28>,
  <matplotlib.lines.Line2D at 0x120c3dcc0>,
  <matplotlib.lines.Line2D at 0x120c4ab70>,
  <matplotlib.lines.Line2D at 0x120c4add8>,
  <matplotlib.lines.Line2D at 0x120c5ada0>,
  <matplotlib.lines.Line2D at 0x120c5fb38>,
  <matplotlib.lines.Line2D at 0x120d2eb00>,
  <matplotlib.lines.Line2D at 0x120d2ed68>]}

In [202]:
tmp_sum=inner_agg.sum(axis=1)
tmp_sum


Out[202]:
ENT
2     5.462742
3     5.303114
4     4.567306
5     5.925606
6     5.262592
7     3.316895
8     4.484446
9     5.546940
10    4.422440
11    4.542022
12    3.887059
13    3.839614
14    4.649610
15    5.242571
16    4.482073
18    5.550203
19    5.260497
20    3.359407
21    4.678407
23    4.127940
24    4.212829
25    4.519968
26    5.013343
27    4.457016
29    3.644955
30    4.311153
31    4.746783
32    4.288673
dtype: float64

In [9]:
max_vals = np.array([11, 13, 13, 11, 14, 17, 2, 11, 8])

In [ ]:


In [197]:
new_names = dict(zip(secciones,['1', '2', '3', '4', '5', '6', '7', '8', '9']))
inner_agg = inner_agg.rename(columns = new_names)

In [198]:
for i in inner_agg.index.values:
    inner_agg.ix[i]=inner_agg.ix[i]/max_vals

In [5]:
inner_data = pd.read_csv('inner_data.csv')
inner_data


/Users/luis/anaconda/lib/python3.4/site-packages/IPython/core/interactiveshell.py:2902: DtypeWarning: Columns (18,19) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
Out[5]:
Unnamed: 0 Unnamed: 0.1 Unnamed: 0.1 ENT MUN LOC AGEB MZA ID_INM seccion_1 ... seccion_3 seccion_4 seccion_5 seccion_6 seccion_7 seccion_8 seccion_9 X Y cal_final
0 0 24 24 2 4 1 3412 2 1772 1.000000 ... 0.846154 0.5 0.839286 0.252941 0 0.654545 0.5 1084889.4391 2344929.5143 0.6675
1 1 72829 72829 2 4 1 3412 2 2473 0.727273 ... 0.846154 0.0 0.839286 0.170588 0 0.872727 0.5 1084889.4391 2344929.5143 0.5925
2 2 105 105 2 4 1 436A 14 1886 1.000000 ... 0.923077 0.5 0.817857 0.176471 0 0.763636 0.5 1081038.1387 2346568.3414 0.6735
3 3 72827 72827 2 4 1 436A 14 2471 1.000000 ... 0.923077 0.0 1.000000 0.329412 0 0.763636 0.5 1081038.1387 2346568.3414 0.6400
4 4 122 122 2 4 1 3041 2 1905 1.000000 ... 0.846154 0.5 0.839286 0.223529 0 0.654545 0.5 1077173.8573 2346448.7400 0.6325
5 5 129 129 2 4 1 3713 48 1992 1.000000 ... 0.692308 0.0 0.596429 0.158824 0 0.654545 1.0 1079930.4474 2343442.2052 0.5775
6 6 129 129 2 4 1 3713 48 1992 1.000000 ... 0.692308 0.0 0.596429 0.158824 0 0.654545 1.0 1079946.8545 2343442.1063 0.5775
7 7 129 129 2 4 1 3713 48 1992 1.000000 ... 0.692308 0.0 0.596429 0.158824 0 0.654545 1.0 1079895.2853 2343442.4170 0.5775
8 8 130 130 2 4 1 3713 48 1993 1.000000 ... 0.923077 0.0 0.839286 0.223529 0 0.763636 1.0 1079930.4474 2343442.2052 0.6495
9 9 130 130 2 4 1 3713 48 1993 1.000000 ... 0.923077 0.0 0.839286 0.223529 0 0.763636 1.0 1079946.8545 2343442.1063 0.6495
10 10 130 130 2 4 1 3713 48 1993 1.000000 ... 0.923077 0.0 0.839286 0.223529 0 0.763636 1.0 1079895.2853 2343442.4170 0.6495
11 11 231 231 2 4 1 3713 48 1991 1.000000 ... 0.923077 0.0 0.539286 0.188235 0 0.763636 1.0 1079930.4474 2343442.2052 0.6215
12 12 231 231 2 4 1 3713 48 1991 1.000000 ... 0.923077 0.0 0.539286 0.188235 0 0.763636 1.0 1079946.8545 2343442.1063 0.6215
13 13 231 231 2 4 1 3713 48 1991 1.000000 ... 0.923077 0.0 0.539286 0.188235 0 0.763636 1.0 1079895.2853 2343442.4170 0.6215
14 14 131 131 2 4 1 3728 49 1994 1.000000 ... 0.923077 0.5 0.660714 0.317647 0 0.545455 1.0 1079628.9259 2343104.6840 0.7015
15 15 131 131 2 4 1 3728 49 1994 1.000000 ... 0.923077 0.5 0.660714 0.317647 0 0.545455 1.0 1079626.8385 2343062.5743 0.7015
16 16 132 132 2 4 1 5635 10 1995 1.000000 ... 0.846154 0.5 0.982143 0.344118 1 0.872727 0.5 1081325.9932 2343469.4629 0.7120
17 17 133 133 2 4 1 5635 14 1996 1.000000 ... 0.923077 0.0 0.750000 0.144118 1 0.809091 1.0 1081077.8720 2343843.3932 0.6385
18 18 134 134 2 4 1 3501 48 1997 0.727273 ... 0.846154 1.0 0.839286 0.108824 0 0.690909 1.0 1081340.6433 2341368.6145 0.7070
19 19 135 135 2 4 1 3732 16 1998 0.727273 ... 0.923077 0.5 0.546429 0.161765 0 0.654545 1.0 1080133.1094 2342393.7799 0.6310
20 20 135 135 2 4 1 3732 16 1998 0.727273 ... 0.923077 0.5 0.546429 0.161765 0 0.654545 1.0 1080184.6203 2342265.3863 0.6310
21 21 136 136 2 4 1 3732 16 1999 0.727273 ... 0.923077 0.5 0.750000 0.108824 0 0.763636 1.0 1080133.1094 2342393.7799 0.6325
22 22 136 136 2 4 1 3732 16 1999 0.727273 ... 0.923077 0.5 0.750000 0.108824 0 0.763636 1.0 1080184.6203 2342265.3863 0.6325
23 23 137 137 2 4 1 3732 47 2000 0.727273 ... 0.923077 1.0 0.732143 0.155882 0 0.472727 1.0 1080491.5883 2342049.7875 0.6860
24 24 138 138 2 4 1 3785 2 2001 0.727273 ... 0.846154 0.0 0.839286 0.252941 0 0.809091 1.0 1081308.2090 2342556.8061 0.6245
25 25 138 138 2 4 1 3785 2 2001 0.727273 ... 0.846154 0.0 0.839286 0.252941 0 0.809091 1.0 1081395.5334 2342672.7702 0.6245
26 26 139 139 2 4 1 3785 2 2002 0.727273 ... 0.923077 1.0 0.800000 0.108824 0 0.809091 1.0 1081308.2090 2342556.8061 0.7245
27 27 139 139 2 4 1 3785 2 2002 0.727273 ... 0.923077 1.0 0.800000 0.108824 0 0.809091 1.0 1081395.5334 2342672.7702 0.7245
28 28 140 140 2 4 1 3785 7 2004 1.000000 ... 0.846154 0.5 0.750000 0.108824 0 0.763636 1.0 1080946.8150 2343142.2856 0.6525
29 29 140 140 2 4 1 3785 7 2004 1.000000 ... 0.846154 0.5 0.750000 0.108824 0 0.763636 1.0 1080953.1424 2343163.5934 0.6525
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
147641 147641 151069 151069 31 13 1 0076 4 190874 1.000000 ... 0.692308 1.0 0.839286 0.144118 1 0.200000 0.5 3789117.8790 1062694.1182 0.6640
147642 147642 151071 151071 31 54 1 0030 17 191044 1.000000 ... 0.538462 0.5 0.839286 0.144118 0 0.200000 0.0 3809028.0212 1061365.7127 0.5240
147643 147643 151072 151072 31 54 6 0045 5 191045 1.000000 ... 0.923077 0.0 0.839286 0.014706 0 0.109091 1.0 3807680.1370 1060112.5966 0.5620
147644 147644 151072 151072 31 54 6 0045 5 191045 1.000000 ... 0.923077 0.0 0.839286 0.014706 0 0.109091 1.0 3807684.3397 1060111.2397 0.5620
147645 147645 151073 151073 31 52 1 0182 15 191046 1.000000 ... 0.692308 0.5 0.678571 0.047059 0 0.763636 1.0 3813798.7329 1067758.7445 0.6520
147646 147646 151075 151075 31 50 75 6562 13 191048 1.000000 ... 0.846154 0.5 0.760714 0.161765 0 0.327273 0.5 3770866.5845 1055275.1014 0.6150
147647 147647 151076 151076 31 50 75 6577 13 191049 1.000000 ... 0.923077 1.0 0.750000 0.158824 0 0.654545 0.0 3770354.2840 1055020.5690 0.6690
147648 147648 151077 151077 31 50 81 2042 3 191052 1.000000 ... 0.923077 0.0 0.678571 0.052941 0 0.418182 1.0 3770220.3375 1060790.2010 0.5850
147649 147649 151078 151078 31 50 88 2042 13 191053 1.000000 ... 0.692308 0.5 0.550000 0.144118 0 0.963636 0.5 3773232.3171 1059105.4721 0.6325
147650 147650 151079 151079 30 91 55 003A 5 189318 1.000000 ... 0.769231 0.0 0.728571 0.176471 0 0.636364 0.0 3261980.4628 631619.97252 0.5020
147651 147651 151080 151080 30 91 55 003A 10 189319 1.000000 ... 0.615385 0.0 0.496429 0.014706 0 0.327273 1.0 3261982.8639 631604.01021 0.4630
147652 147652 151081 151081 30 91 56 0063 2 189320 1.000000 ... 0.692308 0.0 0.550000 0.014706 0 0.700000 1.0 3248444.1915 623213.90977 0.5515
147653 147653 151082 151082 30 91 57 003A 1 189321 1.000000 ... 0.769231 0.0 0.525000 0.014706 0 0.481818 0.5 3267154.4442 631875.88116 0.4890
147654 147654 151082 151082 30 91 57 003A 1 189321 1.000000 ... 0.769231 0.0 0.525000 0.014706 0 0.481818 0.5 3267147.4561 631804.15645 0.4890
147655 147655 151083 151083 30 91 57 003A 1 189322 1.000000 ... 0.538462 0.0 0.596429 0.061765 0 0.327273 0.5 3267154.4442 631875.88116 0.4600
147656 147656 151083 151083 30 91 57 003A 1 189322 1.000000 ... 0.538462 0.0 0.596429 0.061765 0 0.327273 0.5 3267147.4561 631804.15645 0.4600
147657 147657 151084 151084 30 91 61 0063 3 189323 1.000000 ... 0.461538 0.0 0.417857 0.000000 0 0.545455 0.0 3248526.0565 626649.68515 0.3885
147658 147658 151084 151084 30 91 61 0063 3 189323 1.000000 ... 0.461538 0.0 0.417857 0.000000 0 0.545455 0.0 3248538.1343 626559.64459 0.3885
147659 147659 151085 151085 30 91 61 0063 3 189324 1.000000 ... 0.538462 0.0 0.214286 0.067647 0 0.154545 1.0 3248526.0565 626649.68515 0.4285
147660 147660 151085 151085 30 91 61 0063 3 189324 1.000000 ... 0.538462 0.0 0.214286 0.067647 0 0.154545 1.0 3248538.1343 626559.64459 0.4285
147661 147661 151086 151086 30 91 61 0063 2 189325 1.000000 ... 0.461538 0.0 0.328571 0.067647 0 0.109091 0.0 3248574.1356 626695.97516 0.3095
147662 147662 151087 151087 30 91 70 0063 4 189326 1.000000 ... 0.461538 0.0 0.453571 0.014706 0 0.545455 1.0 3249248.6607 626583.75220 0.4910
147663 147663 151088 151088 30 91 76 003A 800 189327 1.000000 ... 0.615385 0.0 0.678571 0.000000 0 0.327273 1.0 3268469.1765 634215.8402 0.5160
147664 147664 151089 151089 30 91 100 0082 800 189328 1.000000 ... 0.384615 0.0 0.346429 0.014706 0 0.327273 0.5 3260885.3823 624631.21009 0.3720
147665 147665 151090 151090 30 3 1 0323 42 188407 1.000000 ... 0.846154 0.0 0.757143 0.129412 0 0.809091 0.5 3249442.0328 680735.86987 0.6070
147666 147666 151091 151091 30 3 1 0126 4 188408 0.727273 ... 0.846154 0.0 0.292857 0.114706 0 0.545455 0.0 3248445.8473 679870.99807 0.4255
147667 147667 151092 151092 30 3 1 0126 14 188409 0.727273 ... 0.846154 0.0 0.596429 0.114706 0 0.690909 1.0 3248678.1322 680385.75222 0.5640
147668 147668 151093 151093 30 3 1 0130 35 188410 0.727273 ... 0.846154 0.5 0.817857 0.161765 0 0.654545 1.0 3249117.8092 680078.11555 0.6590
147669 147669 151094 151094 30 3 1 015A 25 188411 1.000000 ... 0.846154 0.0 0.803571 0.144118 0 0.545455 0.5 3248718.0635 679487.59219 0.5820
147670 147670 151095 151095 30 3 1 015A 27 188412 1.000000 ... 0.923077 0.5 0.800000 0.161765 0 0.481818 1.0 3248877.4810 679621.04745 0.6775

147671 rows × 21 columns


In [184]:
inner_agg['total'] = tmp_sum

In [185]:
inner_agg


Out[185]:
1 2 3 4 5 6 7 8 9 total
ENT
2 0.905273 0.792511 0.788793 0.395282 0.753682 0.200733 0.218067 0.626755 0.781646 0.624107
3 0.911422 0.878856 0.830533 0.457436 0.720168 0.162555 0.291282 0.431375 0.619487 0.604089
4 0.927649 0.932916 0.760286 0.257106 0.581848 0.120410 0.114341 0.384056 0.488695 0.535996
5 0.793792 0.969043 0.911820 0.414634 0.806794 0.179412 0.658537 0.459867 0.731707 0.643183
6 0.933566 0.933925 0.876726 0.333333 0.734249 0.159766 0.282051 0.534615 0.474359 0.607096
7 0.852880 0.622362 0.585957 0.092025 0.412869 0.117713 0.034619 0.290845 0.307625 0.396129
8 0.891406 0.783828 0.738228 0.254687 0.662540 0.153636 0.108175 0.293837 0.598110 0.525146
9 0.903780 0.954153 0.903672 0.290823 0.789570 0.239824 0.184403 0.796426 0.484288 0.654271
10 0.933088 0.829115 0.769643 0.241010 0.655383 0.113312 0.118210 0.178135 0.584545 0.516729
11 0.927961 0.887870 0.801900 0.206186 0.666036 0.111531 0.065207 0.249409 0.625920 0.535445
12 0.904353 0.770769 0.677701 0.059082 0.491643 0.116673 0.041796 0.383866 0.441176 0.461299
13 0.962659 0.820968 0.732837 0.037097 0.515084 0.084244 0.001075 0.201779 0.483871 0.459328
14 0.907455 0.842609 0.841942 0.180758 0.673511 0.134244 0.048929 0.487586 0.532577 0.553027
15 0.918704 0.912617 0.856746 0.245453 0.712556 0.183394 0.079585 0.650293 0.683223 0.616791
16 0.894255 0.890558 0.826322 0.114149 0.639912 0.138128 0.062500 0.331179 0.585069 0.531674
18 1.000000 0.923077 0.923077 0.000000 0.671429 0.223529 1.000000 0.309091 0.500000 0.576000
19 0.910373 0.899570 0.801149 0.376322 0.745900 0.180982 0.245434 0.462030 0.638738 0.604654
20 0.905518 0.741799 0.666412 0.036084 0.492100 0.098757 0.018280 0.099370 0.301088 0.407710
21 0.911968 0.860686 0.777718 0.135310 0.604782 0.120335 0.057260 0.600843 0.609504 0.549318
23 0.912052 0.897394 0.724630 0.263844 0.635284 0.107262 0.078176 0.251969 0.257329 0.497252
24 0.912579 0.869823 0.741285 0.126167 0.580499 0.113492 0.045627 0.244430 0.578926 0.498384
25 0.917216 0.883553 0.777566 0.153712 0.648066 0.128725 0.144141 0.253224 0.613765 0.526199
26 0.929886 0.797429 0.817362 0.288593 0.723329 0.160189 0.248597 0.461150 0.586808 0.575097
27 0.911503 0.877890 0.735384 0.175096 0.590148 0.112030 0.167612 0.344259 0.543094 0.515586
29 0.727273 0.692308 0.846154 0.000000 0.442857 0.000000 0.000000 0.436364 0.500000 0.430000
30 0.923771 0.833389 0.740629 0.086622 0.569393 0.111261 0.047243 0.433832 0.565013 0.508262
31 0.919519 0.948770 0.806081 0.349465 0.686585 0.150235 0.133776 0.364025 0.388327 0.563165
32 0.937749 0.831928 0.802964 0.166247 0.628412 0.106737 0.085011 0.161060 0.568566 0.505001

In [219]:
coef = 1000
#COLORS
colors = ['peru', 'hotpink', 'crimson', 'darkslategray', 'lightsalmon', 'gray', 'firebrick', 'lightgreen', 'olive', 'y']
#colors = [[color[0]]*9, [color[1]]*9, [color[2]]*9 ]
#for i in range(3):
#    colors[i].extend('y')
#GRID
fig = plt.figure(figsize=(10,30))
ax = fig.add_subplot(1,1,1)
ax.set_axis_bgcolor('white')

major_yticks = np.arange(0, 33, 5)
ax.set_yticks(major_yticks)                                           
major_xticks = np.arange(0, 11, 5)
ax.set_xticks(major_xticks)


minor_yticks = np.arange(0, 33, 1)
ax.set_yticks(minor_yticks, minor=True)                                           
minor_xticks = np.arange(0, 11, 1)
ax.set_xticks(minor_xticks, minor=True)



#ax.grid(which='major', c = 'white', linestyle='-')
ax.grid(which='minor', c = 'green', linestyle=':')


for i in inner_agg.index.values:
    plt.scatter(range(1,11),[i]*10, s=coef, c = colors, alpha=0.1, edgecolor='k', lw=1)
    plt.scatter(range(1,11), [i]*10, s=inner_agg.ix[i].map(lambda x: coef*x), c = colors)
plt.savefig('bubble_plot.png', bbox_inches='tight')
plt.show()


/Users/luis/anaconda/lib/python3.4/site-packages/matplotlib/collections.py:590: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  if self._edgecolors == str('face'):

In [220]:
inner_agg['total'].plot()


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-220-2549e82e1b14> in <module>()
----> 1 inner_agg['total'].plot()

/Users/luis/anaconda/lib/python3.4/site-packages/pandas/core/frame.py in __getitem__(self, key)
   1912             return self._getitem_multilevel(key)
   1913         else:
-> 1914             return self._getitem_column(key)
   1915 
   1916     def _getitem_column(self, key):

/Users/luis/anaconda/lib/python3.4/site-packages/pandas/core/frame.py in _getitem_column(self, key)
   1919         # get column
   1920         if self.columns.is_unique:
-> 1921             return self._get_item_cache(key)
   1922 
   1923         # duplicate columns & possible reduce dimensionaility

/Users/luis/anaconda/lib/python3.4/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
   1088         res = cache.get(item)
   1089         if res is None:
-> 1090             values = self._data.get(item)
   1091             res = self._box_item_values(item, values)
   1092             cache[item] = res

/Users/luis/anaconda/lib/python3.4/site-packages/pandas/core/internals.py in get(self, item, fastpath)
   3100 
   3101             if not isnull(item):
-> 3102                 loc = self.items.get_loc(item)
   3103             else:
   3104                 indexer = np.arange(len(self.items))[isnull(self.items)]

/Users/luis/anaconda/lib/python3.4/site-packages/pandas/core/index.py in get_loc(self, key, method, tolerance)
   1690                 raise ValueError('tolerance argument only valid if using pad, '
   1691                                  'backfill or nearest lookups')
-> 1692             return self._engine.get_loc(_values_from_object(key))
   1693 
   1694         indexer = self.get_indexer([key], method=method,

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:3979)()

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:3843)()

pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12265)()

pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12216)()

KeyError: 'total'

In [221]:
inner_agg.plot(figsize=(20,8), kind='bar', stacked=True)


Out[221]:
<matplotlib.axes._subplots.AxesSubplot at 0x121afbe48>

In [164]:
fig=plt.figure()


<matplotlib.figure.Figure at 0x1200e1080>

In [126]:


In [127]:
ax


Out[127]:
<matplotlib.axes._subplots.AxesSubplot at 0x117678b00>

In [150]:
minor_xticks


Out[150]:
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [2]:
inner_data = pd.read_csv('inner_data.csv')


/Users/luis/anaconda/lib/python3.4/site-packages/IPython/core/interactiveshell.py:2902: DtypeWarning: Columns (16,17) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)

In [222]:
inner_agg


Out[222]:
1 2 3 4 5 6 7 8 9
ENT
2 0.905273 0.792511 0.788793 0.395282 0.753682 0.200733 0.218067 0.626755 0.781646
3 0.911422 0.878856 0.830533 0.457436 0.720168 0.162555 0.291282 0.431375 0.619487
4 0.927649 0.932916 0.760286 0.257106 0.581848 0.120410 0.114341 0.384056 0.488695
5 0.793792 0.969043 0.911820 0.414634 0.806794 0.179412 0.658537 0.459867 0.731707
6 0.933566 0.933925 0.876726 0.333333 0.734249 0.159766 0.282051 0.534615 0.474359
7 0.852880 0.622362 0.585957 0.092025 0.412869 0.117713 0.034619 0.290845 0.307625
8 0.891406 0.783828 0.738228 0.254687 0.662540 0.153636 0.108175 0.293837 0.598110
9 0.903780 0.954153 0.903672 0.290823 0.789570 0.239824 0.184403 0.796426 0.484288
10 0.933088 0.829115 0.769643 0.241010 0.655383 0.113312 0.118210 0.178135 0.584545
11 0.927961 0.887870 0.801900 0.206186 0.666036 0.111531 0.065207 0.249409 0.625920
12 0.904353 0.770769 0.677701 0.059082 0.491643 0.116673 0.041796 0.383866 0.441176
13 0.962659 0.820968 0.732837 0.037097 0.515084 0.084244 0.001075 0.201779 0.483871
14 0.907455 0.842609 0.841942 0.180758 0.673511 0.134244 0.048929 0.487586 0.532577
15 0.918704 0.912617 0.856746 0.245453 0.712556 0.183394 0.079585 0.650293 0.683223
16 0.894255 0.890558 0.826322 0.114149 0.639912 0.138128 0.062500 0.331179 0.585069
18 1.000000 0.923077 0.923077 0.000000 0.671429 0.223529 1.000000 0.309091 0.500000
19 0.910373 0.899570 0.801149 0.376322 0.745900 0.180982 0.245434 0.462030 0.638738
20 0.905518 0.741799 0.666412 0.036084 0.492100 0.098757 0.018280 0.099370 0.301088
21 0.911968 0.860686 0.777718 0.135310 0.604782 0.120335 0.057260 0.600843 0.609504
23 0.912052 0.897394 0.724630 0.263844 0.635284 0.107262 0.078176 0.251969 0.257329
24 0.912579 0.869823 0.741285 0.126167 0.580499 0.113492 0.045627 0.244430 0.578926
25 0.917216 0.883553 0.777566 0.153712 0.648066 0.128725 0.144141 0.253224 0.613765
26 0.929886 0.797429 0.817362 0.288593 0.723329 0.160189 0.248597 0.461150 0.586808
27 0.911503 0.877890 0.735384 0.175096 0.590148 0.112030 0.167612 0.344259 0.543094
29 0.727273 0.692308 0.846154 0.000000 0.442857 0.000000 0.000000 0.436364 0.500000
30 0.923771 0.833389 0.740629 0.086622 0.569393 0.111261 0.047243 0.433832 0.565013
31 0.919519 0.948770 0.806081 0.349465 0.686585 0.150235 0.133776 0.364025 0.388327
32 0.937749 0.831928 0.802964 0.166247 0.628412 0.106737 0.085011 0.161060 0.568566

In [41]:
#inner_data['seccion_1'].replace({1:11, 2:8, 3:3, 4:0, 5:0},inplace=True)

In [227]:
inner_agg['cal_final'] = inner_data['cal_final']/100

In [228]:
inner_agg


Out[228]:
1 2 3 4 5 6 7 8 9 cal_final
ENT
2 0.905273 0.792511 0.788793 0.395282 0.753682 0.200733 0.218067 0.626755 0.781646 0.6735
3 0.911422 0.878856 0.830533 0.457436 0.720168 0.162555 0.291282 0.431375 0.619487 0.6400
4 0.927649 0.932916 0.760286 0.257106 0.581848 0.120410 0.114341 0.384056 0.488695 0.6325
5 0.793792 0.969043 0.911820 0.414634 0.806794 0.179412 0.658537 0.459867 0.731707 0.5775
6 0.933566 0.933925 0.876726 0.333333 0.734249 0.159766 0.282051 0.534615 0.474359 0.5775
7 0.852880 0.622362 0.585957 0.092025 0.412869 0.117713 0.034619 0.290845 0.307625 0.5775
8 0.891406 0.783828 0.738228 0.254687 0.662540 0.153636 0.108175 0.293837 0.598110 0.6495
9 0.903780 0.954153 0.903672 0.290823 0.789570 0.239824 0.184403 0.796426 0.484288 0.6495
10 0.933088 0.829115 0.769643 0.241010 0.655383 0.113312 0.118210 0.178135 0.584545 0.6495
11 0.927961 0.887870 0.801900 0.206186 0.666036 0.111531 0.065207 0.249409 0.625920 0.6215
12 0.904353 0.770769 0.677701 0.059082 0.491643 0.116673 0.041796 0.383866 0.441176 0.6215
13 0.962659 0.820968 0.732837 0.037097 0.515084 0.084244 0.001075 0.201779 0.483871 0.6215
14 0.907455 0.842609 0.841942 0.180758 0.673511 0.134244 0.048929 0.487586 0.532577 0.7015
15 0.918704 0.912617 0.856746 0.245453 0.712556 0.183394 0.079585 0.650293 0.683223 0.7015
16 0.894255 0.890558 0.826322 0.114149 0.639912 0.138128 0.062500 0.331179 0.585069 0.7120
18 1.000000 0.923077 0.923077 0.000000 0.671429 0.223529 1.000000 0.309091 0.500000 0.7070
19 0.910373 0.899570 0.801149 0.376322 0.745900 0.180982 0.245434 0.462030 0.638738 0.6310
20 0.905518 0.741799 0.666412 0.036084 0.492100 0.098757 0.018280 0.099370 0.301088 0.6310
21 0.911968 0.860686 0.777718 0.135310 0.604782 0.120335 0.057260 0.600843 0.609504 0.6325
23 0.912052 0.897394 0.724630 0.263844 0.635284 0.107262 0.078176 0.251969 0.257329 0.6860
24 0.912579 0.869823 0.741285 0.126167 0.580499 0.113492 0.045627 0.244430 0.578926 0.6245
25 0.917216 0.883553 0.777566 0.153712 0.648066 0.128725 0.144141 0.253224 0.613765 0.6245
26 0.929886 0.797429 0.817362 0.288593 0.723329 0.160189 0.248597 0.461150 0.586808 0.7245
27 0.911503 0.877890 0.735384 0.175096 0.590148 0.112030 0.167612 0.344259 0.543094 0.7245
29 0.727273 0.692308 0.846154 0.000000 0.442857 0.000000 0.000000 0.436364 0.500000 0.6525
30 0.923771 0.833389 0.740629 0.086622 0.569393 0.111261 0.047243 0.433832 0.565013 0.6200
31 0.919519 0.948770 0.806081 0.349465 0.686585 0.150235 0.133776 0.364025 0.388327 0.6200
32 0.937749 0.831928 0.802964 0.166247 0.628412 0.106737 0.085011 0.161060 0.568566 0.5740

In [11]:
inner_normalized = inner_data[resumen]
#inner_normalized[secciones] = inner_normalized[secciones]/max_vals
#inner_normalized['cal_final'] = inner_normalized['cal_final']/100
inner_normalized


Out[11]:
ENT seccion_1 seccion_2 seccion_3 seccion_4 seccion_5 seccion_6 seccion_7 seccion_8 seccion_9 cal_final
0 2 1.000000 0.923077 0.846154 0.5 0.839286 0.252941 0 0.654545 0.5 0.6675
1 2 0.727273 0.923077 0.846154 0.0 0.839286 0.170588 0 0.872727 0.5 0.5925
2 2 1.000000 0.923077 0.923077 0.5 0.817857 0.176471 0 0.763636 0.5 0.6735
3 2 1.000000 0.692308 0.923077 0.0 1.000000 0.329412 0 0.763636 0.5 0.6400
4 2 1.000000 0.692308 0.846154 0.5 0.839286 0.223529 0 0.654545 0.5 0.6325
5 2 1.000000 0.884615 0.692308 0.0 0.596429 0.158824 0 0.654545 1.0 0.5775
6 2 1.000000 0.884615 0.692308 0.0 0.596429 0.158824 0 0.654545 1.0 0.5775
7 2 1.000000 0.884615 0.692308 0.0 0.596429 0.158824 0 0.654545 1.0 0.5775
8 2 1.000000 0.769231 0.923077 0.0 0.839286 0.223529 0 0.763636 1.0 0.6495
9 2 1.000000 0.769231 0.923077 0.0 0.839286 0.223529 0 0.763636 1.0 0.6495
10 2 1.000000 0.769231 0.923077 0.0 0.839286 0.223529 0 0.763636 1.0 0.6495
11 2 1.000000 0.923077 0.923077 0.0 0.539286 0.188235 0 0.763636 1.0 0.6215
12 2 1.000000 0.923077 0.923077 0.0 0.539286 0.188235 0 0.763636 1.0 0.6215
13 2 1.000000 0.923077 0.923077 0.0 0.539286 0.188235 0 0.763636 1.0 0.6215
14 2 1.000000 1.000000 0.923077 0.5 0.660714 0.317647 0 0.545455 1.0 0.7015
15 2 1.000000 1.000000 0.923077 0.5 0.660714 0.317647 0 0.545455 1.0 0.7015
16 2 1.000000 0.653846 0.846154 0.5 0.982143 0.344118 1 0.872727 0.5 0.7120
17 2 1.000000 0.692308 0.923077 0.0 0.750000 0.144118 1 0.809091 1.0 0.6385
18 2 0.727273 0.884615 0.846154 1.0 0.839286 0.108824 0 0.690909 1.0 0.7070
19 2 0.727273 0.923077 0.923077 0.5 0.546429 0.161765 0 0.654545 1.0 0.6310
20 2 0.727273 0.923077 0.923077 0.5 0.546429 0.161765 0 0.654545 1.0 0.6310
21 2 0.727273 0.692308 0.923077 0.5 0.750000 0.108824 0 0.763636 1.0 0.6325
22 2 0.727273 0.692308 0.923077 0.5 0.750000 0.108824 0 0.763636 1.0 0.6325
23 2 0.727273 0.884615 0.923077 1.0 0.732143 0.155882 0 0.472727 1.0 0.6860
24 2 0.727273 0.807692 0.846154 0.0 0.839286 0.252941 0 0.809091 1.0 0.6245
25 2 0.727273 0.807692 0.846154 0.0 0.839286 0.252941 0 0.809091 1.0 0.6245
26 2 0.727273 0.884615 0.923077 1.0 0.800000 0.108824 0 0.809091 1.0 0.7245
27 2 0.727273 0.884615 0.923077 1.0 0.800000 0.108824 0 0.809091 1.0 0.7245
28 2 1.000000 0.692308 0.846154 0.5 0.750000 0.108824 0 0.763636 1.0 0.6525
29 2 1.000000 0.692308 0.846154 0.5 0.750000 0.108824 0 0.763636 1.0 0.6525
... ... ... ... ... ... ... ... ... ... ... ...
147641 31 1.000000 1.000000 0.692308 1.0 0.839286 0.144118 1 0.200000 0.5 0.6640
147642 31 1.000000 0.961538 0.538462 0.5 0.839286 0.144118 0 0.200000 0.0 0.5240
147643 31 1.000000 0.923077 0.923077 0.0 0.839286 0.014706 0 0.109091 1.0 0.5620
147644 31 1.000000 0.923077 0.923077 0.0 0.839286 0.014706 0 0.109091 1.0 0.5620
147645 31 1.000000 1.000000 0.692308 0.5 0.678571 0.047059 0 0.763636 1.0 0.6520
147646 31 1.000000 1.000000 0.846154 0.5 0.760714 0.161765 0 0.327273 0.5 0.6150
147647 31 1.000000 0.961538 0.923077 1.0 0.750000 0.158824 0 0.654545 0.0 0.6690
147648 31 1.000000 0.961538 0.923077 0.0 0.678571 0.052941 0 0.418182 1.0 0.5850
147649 31 1.000000 1.000000 0.692308 0.5 0.550000 0.144118 0 0.963636 0.5 0.6325
147650 30 1.000000 0.692308 0.769231 0.0 0.728571 0.176471 0 0.636364 0.0 0.5020
147651 30 1.000000 0.653846 0.615385 0.0 0.496429 0.014706 0 0.327273 1.0 0.4630
147652 30 1.000000 0.884615 0.692308 0.0 0.550000 0.014706 0 0.700000 1.0 0.5515
147653 30 1.000000 0.846154 0.769231 0.0 0.525000 0.014706 0 0.481818 0.5 0.4890
147654 30 1.000000 0.846154 0.769231 0.0 0.525000 0.014706 0 0.481818 0.5 0.4890
147655 30 1.000000 0.846154 0.538462 0.0 0.596429 0.061765 0 0.327273 0.5 0.4600
147656 30 1.000000 0.846154 0.538462 0.0 0.596429 0.061765 0 0.327273 0.5 0.4600
147657 30 1.000000 0.769231 0.461538 0.0 0.417857 0.000000 0 0.545455 0.0 0.3885
147658 30 1.000000 0.769231 0.461538 0.0 0.417857 0.000000 0 0.545455 0.0 0.3885
147659 30 1.000000 0.846154 0.538462 0.0 0.214286 0.067647 0 0.154545 1.0 0.4285
147660 30 1.000000 0.846154 0.538462 0.0 0.214286 0.067647 0 0.154545 1.0 0.4285
147661 30 1.000000 0.538462 0.461538 0.0 0.328571 0.067647 0 0.109091 0.0 0.3095
147662 30 1.000000 0.884615 0.461538 0.0 0.453571 0.014706 0 0.545455 1.0 0.4910
147663 30 1.000000 0.884615 0.615385 0.0 0.678571 0.000000 0 0.327273 1.0 0.5160
147664 30 1.000000 0.653846 0.384615 0.0 0.346429 0.014706 0 0.327273 0.5 0.3720
147665 30 1.000000 1.000000 0.846154 0.0 0.757143 0.129412 0 0.809091 0.5 0.6070
147666 30 0.727273 0.884615 0.846154 0.0 0.292857 0.114706 0 0.545455 0.0 0.4255
147667 30 0.727273 0.884615 0.846154 0.0 0.596429 0.114706 0 0.690909 1.0 0.5640
147668 30 0.727273 0.923077 0.846154 0.5 0.817857 0.161765 0 0.654545 1.0 0.6590
147669 30 1.000000 0.961538 0.846154 0.0 0.803571 0.144118 0 0.545455 0.5 0.5820
147670 30 1.000000 0.923077 0.923077 0.5 0.800000 0.161765 0 0.481818 1.0 0.6775

147671 rows × 11 columns


In [15]:
inner_normalized[inner_normalized['ENT']==25][indicadores].hist()


Out[15]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x10c4a0ba8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10c52e198>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10c5ac978>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x10cea6c50>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10cef3b00>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10cf327b8>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x10cf79da0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10f94f198>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10f995a90>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x10fe0d748>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10fe4c240>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x112f72400>]], dtype=object)

In [16]:
inner_normalized.groupby('ENT').agg('std').hist()


Out[16]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x112fadc18>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x11356ab00>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1141a0128>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x1141d9a58>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x114225828>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1142645f8>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x1142aabe0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1142e5f98>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1143358d0>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x114383588>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1143c1080>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x11451e240>]], dtype=object)

In [17]:
plt.matshow(inner_normalized[secciones].corr())


Out[17]:
<matplotlib.image.AxesImage at 0x114a316a0>

In [18]:
import itertools

In [283]:
def scatterplot_matrix(data, names, **kwargs):
    """Plots a scatterplot matrix of subplots.  Each row of "data" is plotted
    against other rows, resulting in a nrows by nrows grid of subplots with the
    diagonal subplots labeled with "names".  Additional keyword arguments are
    passed on to matplotlib's "plot" command. Returns the matplotlib figure
    object containg the subplot grid."""
    numvars, numdata = data.shape
    fig, axes = plt.subplots(nrows=numvars, ncols=numvars, figsize=(8,8))
    fig.subplots_adjust(hspace=0.05, wspace=0.05)

    for ax in axes.flat:
        # Hide all ticks and labels
        ax.xaxis.set_visible(False)
        ax.yaxis.set_visible(False)

        # Set up ticks only on one side for the "edge" subplots...
        if ax.is_first_col():
            ax.yaxis.set_ticks_position('left')
        if ax.is_last_col():
            ax.yaxis.set_ticks_position('right')
        if ax.is_first_row():
            ax.xaxis.set_ticks_position('top')
        if ax.is_last_row():
            ax.xaxis.set_ticks_position('bottom')

    # Plot the data.
    for i, j in zip(*np.triu_indices_from(axes, k=1)):
        for x, y in [(i,j), (j,i)]:
            axes[x,y].plot(data[x], data[y], **kwargs)

    # Label the diagonal subplots...
    for i, label in enumerate(names):
        axes[i,i].annotate(label, (0.5, 0.5), xycoords='axes fraction',
                ha='center', va='center')

    # Turn on the proper x or y axes ticks.
    for i, j in zip(range(numvars), itertools.cycle((-1, 0))):
        axes[j,i].xaxis.set_visible(True)
        axes[i,j].yaxis.set_visible(True)

    return fig

In [21]:
pd.scatter_matrix(inner_normalized[indicadores], alpha=0.2, figsize=(20, 20), diagonal='kde')


Out[21]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x10c7c3b38>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x114c60b38>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10c90e7f0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10c955898>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10c992b38>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10c9cf208>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10ca0f8d0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10ca47ba8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10ca964e0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10cae3198>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x10cc2ec50>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10cc7be10>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10ccbb668>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10cd05828>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10cd3fef0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10cd8c438>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10cdc68d0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10ce18208>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10ce5ed68>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10db4b828>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x10db988d0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10dbd5358>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10df33518>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10df6dcf8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10dfb8438>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10dff26d8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10e03bfd0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10e089b70>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10e0c9668>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10e215710>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x10e253198>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10e2a0358>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10e2d7b38>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10e326278>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10e360518>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10e3abe10>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10e3f99b0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x10e4374a8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1103ad550>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1103e5f98>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x110550198>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x110588978>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1105d50b8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x110713358>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1107234a8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x110770080>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1107a8e48>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1107f6c18>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x110a34b00>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x110a80da0>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x110abf4e0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x110b04d68>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x110b2c5f8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1226517b8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x12269c470>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1226d5c88>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x122723a58>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x122761940>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1227adbe0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1227ee320>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x122833ba8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x122859438>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1228bd5f8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x122a0a2b0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x122a45ac8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x122a8f898>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x122acf780>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x122c1ba20>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x122c59160>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x122c9f9e8>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x122cc6278>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x122d2b438>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x122d790f0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x122db0908>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x122dfd6d8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x122f3b5c0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x122f88860>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x122fbbf60>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x12380f828>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x12382feb8>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x123899278>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1238dfef0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x123a1f748>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x123a6b518>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x123aa9400>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x123af56a0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x123c29da0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x123c79668>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x123c9deb8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x123d060b8>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x123d4cd30>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x123d8b588>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x123dd8358>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x123e17240>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x123e644e0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x123e9dbe0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x123ee84a8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x12400bcf8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x12406deb8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1240bab70>]], dtype=object)
/Users/luis/anaconda/lib/python3.4/site-packages/matplotlib/collections.py:590: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  if self._edgecolors == str('face'):

In [ ]:
pd.scatter_matrix(inner_normalized[indicadores], alpha=0.2, figsize=(6, 6), diagonal='kde')