In [1]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
from numpy import nan
plt.style.use('ggplot')
In [2]:
from IPython.core.display import HTML
css = open('style-table.css').read() + open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))
Out[2]:
In [3]:
infr = pd.read_csv("infraestructura.csv")
In [23]:
secciones = [['P3'], ['P11', 'P12', 'P13A', 'P14', 'P15', 'P16'], ['P17A', 'P18A', 'P19', 'P20', 'P21', 'P22'], ['P23', 'P24'], ['P25', 'P26', 'P27', 'P28', 'P29', 'P30', 'P31', 'P32', 'P33', 'P34', 'P35', 'P36', 'P37', 'P38', 'P39', 'P40', 'P41'], ['P42', 'P44', 'P46', 'P47', 'P48', 'P49', 'P52', 'P62', 'P72', 'P82', 'P92', 'P102', 'P103', 'P112', 'P113', 'P117', 'P122', 'P123', 'P125'], ['P126'], ['P133', 'P134', 'P135', 'P136', 'P137', 'P138', 'P139', 'P140', 'P141', 'P142', 'P143'], ['P144', 'P145']]
max_cal = [11, 13, 13, 11, 14, 17, 2, 11, 8]
In [5]:
i=1
for seccion in secciones:
infr['seccion_'+str(i)]=infr[seccion].sum(axis=1)
i+=1
In [7]:
sec_lst = []
for i in range(1,10):
sec_lst.append('seccion_'+str(i))
#dict(zip([1,2,3,4], [a,b,c,d]))
In [8]:
infr[sec_lst]
Out[8]:
seccion_1
seccion_2
seccion_3
seccion_4
seccion_5
seccion_6
seccion_7
seccion_8
seccion_9
0
2
7.5
11
11.0
11.75
4.80
0
8.8
4
1
2
8.0
11
5.5
11.50
2.75
0
4.8
8
2
2
8.5
11
5.5
11.75
2.75
0
4.8
4
3
2
7.5
11
11.0
11.75
2.65
0
10.0
0
4
2
7.5
11
5.5
10.70
5.35
0
8.4
8
5
2
11.0
11
0.0
11.45
2.75
0
6.0
8
6
2
11.5
11
0.0
8.00
4.85
0
8.2
8
7
2
5.5
11
0.0
11.75
3.80
0
7.2
8
8
1
12.5
11
11.0
11.50
1.40
0
1.2
4
9
1
11.0
11
5.5
10.50
4.05
0
6.0
8
10
1
11.5
12
0.0
12.75
3.80
0
4.8
4
11
1
11.5
11
0.0
11.45
3.50
0
8.4
8
12
1
8.5
11
11.0
9.50
3.75
2
8.4
8
13
1
11.5
11
11.0
9.20
3.50
0
3.6
8
14
2
11.5
11
0.0
11.75
3.00
0
9.6
4
15
1
12.0
11
5.5
8.95
4.60
0
4.8
8
16
1
11.5
11
11.0
11.75
4.30
0
9.6
8
17
1
9.5
11
5.5
11.75
5.35
0
9.6
8
18
1
8.5
12
0.0
11.45
5.35
0
9.6
8
19
1
11.5
11
5.5
11.75
5.10
0
8.4
4
20
1
11.5
12
5.5
11.75
4.80
0
0.4
4
21
2
11.5
11
0.0
11.75
4.05
0
9.6
4
22
1
12.5
11
0.0
9.50
2.95
0
9.6
0
23
1
11.5
12
5.5
11.75
3.75
0
7.2
0
24
1
11.5
11
5.5
11.75
4.30
0
7.2
4
25
2
11.5
11
5.5
9.50
3.25
0
8.4
4
26
1
12.5
11
0.0
9.00
1.85
0
9.6
0
27
2
11.5
12
5.5
11.75
4.05
0
8.4
8
28
1
11.5
11
5.5
11.75
4.05
0
7.2
0
29
2
11.0
12
5.5
9.00
0.80
0
0.0
0
...
...
...
...
...
...
...
...
...
...
151066
1
11.5
8
0.0
9.50
2.20
0
1.7
0
151067
1
12.5
8
0.0
9.25
3.80
0
1.0
8
151068
1
12.5
8
5.5
8.00
3.80
0
1.7
8
151069
1
12.5
9
11.0
11.75
2.45
2
2.2
4
151070
1
13.0
9
11.0
11.75
2.20
0
3.4
8
151071
1
12.0
7
5.5
11.75
2.45
0
2.2
0
151072
1
11.5
12
0.0
11.75
0.25
0
1.2
8
151073
1
12.5
9
5.5
9.50
0.80
0
8.4
8
151074
1
12.0
11
11.0
10.20
3.00
0
2.9
0
151075
1
12.5
11
5.5
10.65
2.75
0
3.6
4
151076
1
12.0
12
11.0
10.50
2.70
0
7.2
0
151077
1
13.5
12
0.0
9.50
0.90
0
4.6
8
151078
1
12.5
9
5.5
7.70
2.45
0
10.6
4
151079
1
8.5
10
0.0
10.20
3.00
0
7.0
0
151080
1
8.0
8
0.0
6.95
0.25
0
3.6
8
151081
1
11.0
9
0.0
7.70
0.25
0
7.7
8
151082
1
12.0
10
0.0
7.35
0.25
0
5.3
4
151083
1
12.0
7
0.0
8.35
1.05
0
3.6
4
151084
1
11.0
6
0.0
5.85
0.00
0
6.0
0
151085
1
12.0
7
0.0
3.00
1.15
0
1.7
8
151086
1
8.0
6
0.0
4.60
1.15
0
1.2
0
151087
1
11.0
6
0.0
6.35
0.25
0
6.0
8
151088
1
11.0
8
0.0
9.50
0.00
0
3.6
8
151089
1
8.0
5
0.0
4.85
0.25
0
3.6
4
151090
1
12.5
11
0.0
10.60
2.20
0
8.9
4
151091
2
12.5
11
0.0
4.10
1.95
0
6.0
0
151092
2
12.5
11
0.0
8.35
1.95
0
7.6
8
151093
2
11.5
11
5.5
11.45
2.75
0
7.2
8
151094
1
13.5
11
0.0
11.25
2.45
0
6.0
4
151095
1
11.5
12
5.5
11.20
2.75
0
5.3
8
151096 rows × 9 columns
In [16]:
infr[infr['P12']==2]['P12'].count()
Out[16]:
28892
In [67]:
new_columns = ['ENT','MUN','LOC','AGEB','MZA','ID_INM']
new_columns.extend(sec_lst)
In [68]:
infr_secciones = infr[new_columns]
In [44]:
infr.to_csv('infraestuctura.csv')
infr_secciones.to_csv('infr_secciones.csv')
In [39]:
def densidad_inversa(j):
#-column+max_cal
#infr_secciones[sec_lst[j]] = max_cal[j]-infr_secciones[sec_lst[j]]
return max_cal[j]-infr_secciones[sec_lst[j]]
#infr_secciones[col]
In [45]:
lst2= [11, 13, 13, 11, 14, 17, 2, 11, 8]
for i in range(9):
print("Sección ", i+1, ":\t", max(infr_secciones[sec_lst[i]])<=lst2[i])
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-45-400b7f55768b> in <module>()
1 lst2= [11, 13, 13, 11, 14, 17, 2, 11, 8]
2 for i in range(9):
----> 3 print("Sección ", i+1, ":\t", max(infr_secciones[sec_lst[i]])<=lst2[i])
NameError: name 'infr_secciones' is not defined
In [58]:
#j=1
#print(min(-infrtmp.ix[:,sec_lst[j]]+max_cal[j]))
#print(max(-infrtmp.ix[:,sec_lst[j]]+max_cal[j]))
infr[sec_lst].max()
Out[58]:
seccion_1 5.0
seccion_2 13.5
seccion_3 16.0
seccion_4 11.0
seccion_5 14.0
seccion_6 10.0
seccion_7 2.0
seccion_8 11.0
seccion_9 8.0
dtype: float64
In [59]:
infr[infr[sec_lst[1]]>13][sec_lst[1]]
Out[59]:
61 13.5
64 13.5
72 13.5
239 13.5
335 13.5
492 13.5
540 13.5
544 13.5
603 13.5
613 13.5
621 13.5
650 13.5
679 13.5
681 13.5
700 13.5
738 13.5
756 13.5
814 13.5
819 13.5
987 13.5
1029 13.5
1104 13.5
1213 13.5
1214 13.5
1359 13.5
1428 13.5
1716 13.5
1773 13.5
1774 13.5
1775 13.5
...
149785 13.5
149806 13.5
149860 13.5
149861 13.5
149978 13.5
150084 13.5
150085 13.5
150088 13.5
150201 13.5
150246 13.5
150268 13.5
150296 13.5
150337 13.5
150405 13.5
150406 13.5
150408 13.5
150410 13.5
150439 13.5
150544 13.5
150564 13.5
150567 13.5
150593 13.5
150626 13.5
150711 13.5
150735 13.5
150774 13.5
150815 13.5
151044 13.5
151077 13.5
151094 13.5
Name: seccion_2, dtype: float64
In [46]:
print("nan:\t",len(infr[infr['P12'].isnull()]))
print("0:\t",len(infr[infr['P12']==0]))
print("1:\t",len(infr[infr['P12']==1]))
print("2:\t",len(infr[infr['P12']==2]))
print("total:\t", len(infr['P12']))
nan: 217
0: 11799
1: 110188
2: 28892
total: 151096
In [60]:
pd.Series(infr['P12'].values.ravel()).unique()
Out[60]:
array([ 1., 2., 0., nan])
In [137]:
11059-11799
Out[137]:
-740
In [138]:
#nan: 217 -> nan: 217 0
#1: 110794 -> 1: 110188 606
#2: 29026 -> 2: 28892 134
#3: 11059 -> 0: 11799 -740
#total: 151096 -> total: 151096 0
len(infr[infr['P12'].isnull()])
Out[138]:
217
In [11]:
infr
Out[11]:
Unnamed: 0
ENT
MUN
LOC
AGEB
MZA
ID_INM
P2A
P2B
P3
...
P145
seccion_1
seccion_2
seccion_3
seccion_4
seccion_5
seccion_6
seccion_7
seccion_8
seccion_9
0
0
2
4
1
2876
23
1914
1
NaN
2
...
0
2
7.5
11
11.0
11.75
4.80
0
8.8
4
1
1
2
4
1
2880
103
1915
1
NaN
2
...
4
2
8.0
11
5.5
11.50
2.75
0
4.8
8
2
2
2
4
1
3677
5
1916
1
NaN
2
...
0
2
8.5
11
5.5
11.75
2.75
0
4.8
4
3
3
2
4
1
3677
9
1917
1
NaN
2
...
0
2
7.5
11
11.0
11.75
2.65
0
10.0
0
4
4
2
4
1
3677
23
1918
7
NaN
2
...
4
2
7.5
11
5.5
10.70
5.35
0
8.4
8
5
5
2
4
1
3681
17
1919
1
NaN
2
...
4
2
11.0
11
0.0
11.45
2.75
0
6.0
8
6
6
2
4
1
3681
25
1920
1
NaN
2
...
4
2
11.5
11
0.0
8.00
4.85
0
8.2
8
7
7
2
4
1
3817
9
1921
1
NaN
2
...
4
2
5.5
11
0.0
11.75
3.80
0
7.2
8
8
8
2
4
1
1191
39
1922
2
NaN
1
...
4
1
12.5
11
11.0
11.50
1.40
0
1.2
4
9
9
2
4
1
1191
39
1923
1
NaN
1
...
4
1
11.0
11
5.5
10.50
4.05
0
6.0
8
10
10
2
4
1
1191
39
1924
1
NaN
1
...
4
1
11.5
12
0.0
12.75
3.80
0
4.8
4
11
11
2
4
1
2467
18
1925
1
NaN
1
...
4
1
11.5
11
0.0
11.45
3.50
0
8.4
8
12
12
2
4
1
1806
57
1926
2
NaN
1
...
4
1
8.5
11
11.0
9.50
3.75
2
8.4
8
13
13
2
4
1
1806
57
1927
1
NaN
1
...
4
1
11.5
11
11.0
9.20
3.50
0
3.6
8
14
14
2
4
1
1806
50
1928
1
NaN
2
...
0
2
11.5
11
0.0
11.75
3.00
0
9.6
4
15
15
2
4
1
1806
65
1929
1
NaN
1
...
4
1
12.0
11
5.5
8.95
4.60
0
4.8
8
16
16
2
4
1
6525
19
1763
1
NaN
1
...
4
1
11.5
11
11.0
11.75
4.30
0
9.6
8
17
17
2
4
1
3821
59
1765
1
NaN
1
...
4
1
9.5
11
5.5
11.75
5.35
0
9.6
8
18
18
2
4
1
4266
1
1766
5
NaN
1
...
4
1
8.5
12
0.0
11.45
5.35
0
9.6
8
19
19
2
4
1
4463
15
1767
1
NaN
1
...
0
1
11.5
11
5.5
11.75
5.10
0
8.4
4
20
20
2
4
1
2927
29
1768
1
NaN
1
...
0
1
11.5
12
5.5
11.75
4.80
0
0.4
4
21
21
2
4
1
5601
9
1769
1
NaN
2
...
0
2
11.5
11
0.0
11.75
4.05
0
9.6
4
22
22
2
4
1
0210
25
1770
2
NaN
1
...
0
1
12.5
11
0.0
9.50
2.95
0
9.6
0
23
23
2
4
1
0475
52
1771
1
NaN
1
...
0
1
11.5
12
5.5
11.75
3.75
0
7.2
0
24
24
2
4
1
3412
2
1772
1
NaN
1
...
0
1
11.5
11
5.5
11.75
4.30
0
7.2
4
25
25
2
4
1
0441
9
1773
1
NaN
2
...
0
2
11.5
11
5.5
9.50
3.25
0
8.4
4
26
26
2
4
1
6582
65
1775
1
NaN
1
...
0
1
12.5
11
0.0
9.00
1.85
0
9.6
0
27
27
2
4
1
3501
25
1776
1
NaN
2
...
4
2
11.5
12
5.5
11.75
4.05
0
8.4
8
28
28
2
4
1
4054
34
1778
1
NaN
1
...
0
1
11.5
11
5.5
11.75
4.05
0
7.2
0
29
29
1
5
1
0497
34
1509
8
RESPONSABLE DEL SERVICIO
2
...
0
2
11.0
12
5.5
9.00
0.80
0
0.0
0
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
151066
166106
31
13
1
0061
11
190871
1
NaN
1
...
0
1
11.5
8
0.0
9.50
2.20
0
1.7
0
151067
166107
31
13
1
0061
11
190872
1
NaN
1
...
4
1
12.5
8
0.0
9.25
3.80
0
1.0
8
151068
166108
31
13
1
0061
11
190873
2
NaN
1
...
4
1
12.5
8
5.5
8.00
3.80
0
1.7
8
151069
166109
31
13
1
0076
4
190874
1
NaN
1
...
4
1
12.5
9
11.0
11.75
2.45
2
2.2
4
151070
166110
31
13
1
0095
9
190875
2
NaN
1
...
4
1
13.0
9
11.0
11.75
2.20
0
3.4
8
151071
166111
31
54
1
0030
17
191044
2
NaN
1
...
0
1
12.0
7
5.5
11.75
2.45
0
2.2
0
151072
166112
31
54
6
0045
5
191045
8
MAESTRA DE GRUPO NO SE ENCONTRO EL DIRECTOR
1
...
4
1
11.5
12
0.0
11.75
0.25
0
1.2
8
151073
166113
31
52
1
0182
15
191046
2
NaN
1
...
4
1
12.5
9
5.5
9.50
0.80
0
8.4
8
151074
166114
31
50
1
5687
11
191047
2
NaN
1
...
0
1
12.0
11
11.0
10.20
3.00
0
2.9
0
151075
166115
31
50
75
6562
13
191048
1
NaN
1
...
0
1
12.5
11
5.5
10.65
2.75
0
3.6
4
151076
166116
31
50
75
6577
13
191049
2
NaN
1
...
0
1
12.0
12
11.0
10.50
2.70
0
7.2
0
151077
166118
31
50
81
2042
3
191052
1
NaN
1
...
4
1
13.5
12
0.0
9.50
0.90
0
4.6
8
151078
166119
31
50
88
2042
13
191053
1
NaN
1
...
0
1
12.5
9
5.5
7.70
2.45
0
10.6
4
151079
166120
30
91
55
003A
5
189318
1
NaN
1
...
0
1
8.5
10
0.0
10.20
3.00
0
7.0
0
151080
166121
30
91
55
003A
10
189319
1
NaN
1
...
4
1
8.0
8
0.0
6.95
0.25
0
3.6
8
151081
166122
30
91
56
0063
2
189320
1
NaN
1
...
4
1
11.0
9
0.0
7.70
0.25
0
7.7
8
151082
166123
30
91
57
003A
1
189321
1
NaN
1
...
0
1
12.0
10
0.0
7.35
0.25
0
5.3
4
151083
166124
30
91
57
003A
1
189322
1
NaN
1
...
0
1
12.0
7
0.0
8.35
1.05
0
3.6
4
151084
166125
30
91
61
0063
3
189323
1
NaN
1
...
0
1
11.0
6
0.0
5.85
0.00
0
6.0
0
151085
166126
30
91
61
0063
3
189324
1
NaN
1
...
4
1
12.0
7
0.0
3.00
1.15
0
1.7
8
151086
166127
30
91
61
0063
2
189325
1
NaN
1
...
0
1
8.0
6
0.0
4.60
1.15
0
1.2
0
151087
166128
30
91
70
0063
4
189326
1
NaN
1
...
4
1
11.0
6
0.0
6.35
0.25
0
6.0
8
151088
166129
30
91
76
003A
800
189327
1
NaN
1
...
4
1
11.0
8
0.0
9.50
0.00
0
3.6
8
151089
166130
30
91
100
0082
800
189328
1
NaN
1
...
0
1
8.0
5
0.0
4.85
0.25
0
3.6
4
151090
166132
30
3
1
0323
42
188407
1
NaN
1
...
4
1
12.5
11
0.0
10.60
2.20
0
8.9
4
151091
166133
30
3
1
0126
4
188408
1
NaN
2
...
0
2
12.5
11
0.0
4.10
1.95
0
6.0
0
151092
166134
30
3
1
0126
14
188409
1
NaN
2
...
4
2
12.5
11
0.0
8.35
1.95
0
7.6
8
151093
166135
30
3
1
0130
35
188410
1
NaN
2
...
4
2
11.5
11
5.5
11.45
2.75
0
7.2
8
151094
166136
30
3
1
015A
25
188411
1
NaN
1
...
0
1
13.5
11
0.0
11.25
2.45
0
6.0
4
151095
166137
30
3
1
015A
27
188412
1
NaN
1
...
4
1
11.5
12
5.5
11.20
2.75
0
5.3
8
151096 rows × 169 columns
In [41]:
infr.ix[(infr.P22==3)& (infr.P20==3),'P20']=0
In [42]:
infr.to_csv("infra_corregido.csv")
In [44]:
df = pd.read_excel('INMUEBLES.xlsx')
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
<ipython-input-44-cd9dfe73e5d1> in <module>()
----> 1 df = pd.read_excel('INMUEBLES.xlsx')
/Users/luis/anaconda/lib/python3.4/site-packages/pandas/io/excel.py in read_excel(io, sheetname, header, skiprows, skip_footer, index_col, parse_cols, parse_dates, date_parser, na_values, thousands, convert_float, has_index_names, converters, engine, **kwds)
161
162 if not isinstance(io, ExcelFile):
--> 163 io = ExcelFile(io, engine=engine)
164
165 return io._parse_excel(
/Users/luis/anaconda/lib/python3.4/site-packages/pandas/io/excel.py in __init__(self, io, **kwds)
204 self.book = xlrd.open_workbook(file_contents=data)
205 else:
--> 206 self.book = xlrd.open_workbook(io)
207 elif engine == 'xlrd' and isinstance(io, xlrd.Book):
208 self.book = io
/Users/luis/anaconda/lib/python3.4/site-packages/xlrd/__init__.py in open_workbook(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows)
420 formatting_info=formatting_info,
421 on_demand=on_demand,
--> 422 ragged_rows=ragged_rows,
423 )
424 return bk
/Users/luis/anaconda/lib/python3.4/site-packages/xlrd/xlsx.py in open_workbook_2007_xml(zf, component_names, logfile, verbosity, use_mmap, formatting_info, on_demand, ragged_rows)
792 x12sheet = X12Sheet(sheet, logfile, verbosity)
793 heading = "Sheet %r (sheetx=%d) from %r" % (sheet.name, sheetx, fname)
--> 794 x12sheet.process_stream(zflo, heading)
795 del zflo
796 comments_fname = 'xl/comments%d.xml' % (sheetx + 1)
/Users/luis/anaconda/lib/python3.4/site-packages/xlrd/xlsx.py in own_process_stream(self, stream, heading)
529 for event, elem in ET.iterparse(stream):
530 if elem.tag == row_tag:
--> 531 self_do_row(elem)
532 elem.clear() # destroy all child elements (cells)
533 elif elem.tag == U_SSML12 + "dimension":
/Users/luis/anaconda/lib/python3.4/site-packages/xlrd/xlsx.py in do_row(self, row_elem)
641 child_tag = child.tag
642 if child_tag == V_TAG:
--> 643 tvalue = child.text
644 elif child_tag == F_TAG:
645 formula = cooked_text(self, child)
KeyboardInterrupt:
In [ ]:
df = df[df['P3']<6]
df.ix[df.P12>2, 'P12'] = 3
df['P12'].replace({1: 1.5, 2: 1, 3: 0},inplace=True)
len(df['P12'])
In [49]:
max(infr[sec_lst[2]])
Out[49]:
16.0
In [ ]: