In [1]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
from numpy import nan
plt.style.use('ggplot')

In [2]:
from IPython.core.display import HTML
css = open('style-table.css').read() + open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))


Out[2]:

In [3]:
infr = pd.read_csv("infraestructura.csv")

In [23]:
secciones = [['P3'], ['P11', 'P12', 'P13A', 'P14', 'P15', 'P16'], ['P17A', 'P18A', 'P19', 'P20', 'P21', 'P22'], ['P23', 'P24'], ['P25', 'P26', 'P27', 'P28', 'P29', 'P30', 'P31', 'P32', 'P33', 'P34', 'P35', 'P36', 'P37', 'P38', 'P39', 'P40', 'P41'], ['P42', 'P44', 'P46', 'P47', 'P48', 'P49', 'P52', 'P62', 'P72', 'P82', 'P92', 'P102', 'P103', 'P112', 'P113', 'P117', 'P122', 'P123', 'P125'], ['P126'], ['P133', 'P134', 'P135', 'P136', 'P137', 'P138', 'P139', 'P140', 'P141', 'P142', 'P143'], ['P144', 'P145']]
max_cal = [11, 13, 13, 11, 14, 17, 2, 11, 8]

In [5]:
i=1
for seccion in secciones:
    infr['seccion_'+str(i)]=infr[seccion].sum(axis=1)
    i+=1

In [7]:
sec_lst = []
for i in range(1,10):
    sec_lst.append('seccion_'+str(i)) 
#dict(zip([1,2,3,4], [a,b,c,d]))

In [8]:
infr[sec_lst]


Out[8]:
seccion_1 seccion_2 seccion_3 seccion_4 seccion_5 seccion_6 seccion_7 seccion_8 seccion_9
0 2 7.5 11 11.0 11.75 4.80 0 8.8 4
1 2 8.0 11 5.5 11.50 2.75 0 4.8 8
2 2 8.5 11 5.5 11.75 2.75 0 4.8 4
3 2 7.5 11 11.0 11.75 2.65 0 10.0 0
4 2 7.5 11 5.5 10.70 5.35 0 8.4 8
5 2 11.0 11 0.0 11.45 2.75 0 6.0 8
6 2 11.5 11 0.0 8.00 4.85 0 8.2 8
7 2 5.5 11 0.0 11.75 3.80 0 7.2 8
8 1 12.5 11 11.0 11.50 1.40 0 1.2 4
9 1 11.0 11 5.5 10.50 4.05 0 6.0 8
10 1 11.5 12 0.0 12.75 3.80 0 4.8 4
11 1 11.5 11 0.0 11.45 3.50 0 8.4 8
12 1 8.5 11 11.0 9.50 3.75 2 8.4 8
13 1 11.5 11 11.0 9.20 3.50 0 3.6 8
14 2 11.5 11 0.0 11.75 3.00 0 9.6 4
15 1 12.0 11 5.5 8.95 4.60 0 4.8 8
16 1 11.5 11 11.0 11.75 4.30 0 9.6 8
17 1 9.5 11 5.5 11.75 5.35 0 9.6 8
18 1 8.5 12 0.0 11.45 5.35 0 9.6 8
19 1 11.5 11 5.5 11.75 5.10 0 8.4 4
20 1 11.5 12 5.5 11.75 4.80 0 0.4 4
21 2 11.5 11 0.0 11.75 4.05 0 9.6 4
22 1 12.5 11 0.0 9.50 2.95 0 9.6 0
23 1 11.5 12 5.5 11.75 3.75 0 7.2 0
24 1 11.5 11 5.5 11.75 4.30 0 7.2 4
25 2 11.5 11 5.5 9.50 3.25 0 8.4 4
26 1 12.5 11 0.0 9.00 1.85 0 9.6 0
27 2 11.5 12 5.5 11.75 4.05 0 8.4 8
28 1 11.5 11 5.5 11.75 4.05 0 7.2 0
29 2 11.0 12 5.5 9.00 0.80 0 0.0 0
... ... ... ... ... ... ... ... ... ...
151066 1 11.5 8 0.0 9.50 2.20 0 1.7 0
151067 1 12.5 8 0.0 9.25 3.80 0 1.0 8
151068 1 12.5 8 5.5 8.00 3.80 0 1.7 8
151069 1 12.5 9 11.0 11.75 2.45 2 2.2 4
151070 1 13.0 9 11.0 11.75 2.20 0 3.4 8
151071 1 12.0 7 5.5 11.75 2.45 0 2.2 0
151072 1 11.5 12 0.0 11.75 0.25 0 1.2 8
151073 1 12.5 9 5.5 9.50 0.80 0 8.4 8
151074 1 12.0 11 11.0 10.20 3.00 0 2.9 0
151075 1 12.5 11 5.5 10.65 2.75 0 3.6 4
151076 1 12.0 12 11.0 10.50 2.70 0 7.2 0
151077 1 13.5 12 0.0 9.50 0.90 0 4.6 8
151078 1 12.5 9 5.5 7.70 2.45 0 10.6 4
151079 1 8.5 10 0.0 10.20 3.00 0 7.0 0
151080 1 8.0 8 0.0 6.95 0.25 0 3.6 8
151081 1 11.0 9 0.0 7.70 0.25 0 7.7 8
151082 1 12.0 10 0.0 7.35 0.25 0 5.3 4
151083 1 12.0 7 0.0 8.35 1.05 0 3.6 4
151084 1 11.0 6 0.0 5.85 0.00 0 6.0 0
151085 1 12.0 7 0.0 3.00 1.15 0 1.7 8
151086 1 8.0 6 0.0 4.60 1.15 0 1.2 0
151087 1 11.0 6 0.0 6.35 0.25 0 6.0 8
151088 1 11.0 8 0.0 9.50 0.00 0 3.6 8
151089 1 8.0 5 0.0 4.85 0.25 0 3.6 4
151090 1 12.5 11 0.0 10.60 2.20 0 8.9 4
151091 2 12.5 11 0.0 4.10 1.95 0 6.0 0
151092 2 12.5 11 0.0 8.35 1.95 0 7.6 8
151093 2 11.5 11 5.5 11.45 2.75 0 7.2 8
151094 1 13.5 11 0.0 11.25 2.45 0 6.0 4
151095 1 11.5 12 5.5 11.20 2.75 0 5.3 8

151096 rows × 9 columns


In [16]:
infr[infr['P12']==2]['P12'].count()


Out[16]:
28892

In [67]:
new_columns = ['ENT','MUN','LOC','AGEB','MZA','ID_INM']
new_columns.extend(sec_lst)

In [68]:
infr_secciones = infr[new_columns]

In [44]:
infr.to_csv('infraestuctura.csv')
infr_secciones.to_csv('infr_secciones.csv')

In [39]:
def densidad_inversa(j):
    #-column+max_cal
    #infr_secciones[sec_lst[j]] = max_cal[j]-infr_secciones[sec_lst[j]]
    return max_cal[j]-infr_secciones[sec_lst[j]]

    #infr_secciones[col]

In [45]:
lst2= [11, 13, 13, 11, 14, 17, 2, 11, 8]
for i in range(9):
    print("Sección ", i+1, ":\t", max(infr_secciones[sec_lst[i]])<=lst2[i])


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-45-400b7f55768b> in <module>()
      1 lst2= [11, 13, 13, 11, 14, 17, 2, 11, 8]
      2 for i in range(9):
----> 3     print("Sección ", i+1, ":\t", max(infr_secciones[sec_lst[i]])<=lst2[i])

NameError: name 'infr_secciones' is not defined
2,3,5 11, 13, 13, 11, 14, 17, 2, 11, 8

In [58]:
#j=1
#print(min(-infrtmp.ix[:,sec_lst[j]]+max_cal[j]))
#print(max(-infrtmp.ix[:,sec_lst[j]]+max_cal[j]))
infr[sec_lst].max()


Out[58]:
seccion_1     5.0
seccion_2    13.5
seccion_3    16.0
seccion_4    11.0
seccion_5    14.0
seccion_6    10.0
seccion_7     2.0
seccion_8    11.0
seccion_9     8.0
dtype: float64

In [59]:
infr[infr[sec_lst[1]]>13][sec_lst[1]]


Out[59]:
61        13.5
64        13.5
72        13.5
239       13.5
335       13.5
492       13.5
540       13.5
544       13.5
603       13.5
613       13.5
621       13.5
650       13.5
679       13.5
681       13.5
700       13.5
738       13.5
756       13.5
814       13.5
819       13.5
987       13.5
1029      13.5
1104      13.5
1213      13.5
1214      13.5
1359      13.5
1428      13.5
1716      13.5
1773      13.5
1774      13.5
1775      13.5
          ... 
149785    13.5
149806    13.5
149860    13.5
149861    13.5
149978    13.5
150084    13.5
150085    13.5
150088    13.5
150201    13.5
150246    13.5
150268    13.5
150296    13.5
150337    13.5
150405    13.5
150406    13.5
150408    13.5
150410    13.5
150439    13.5
150544    13.5
150564    13.5
150567    13.5
150593    13.5
150626    13.5
150711    13.5
150735    13.5
150774    13.5
150815    13.5
151044    13.5
151077    13.5
151094    13.5
Name: seccion_2, dtype: float64

In [46]:
print("nan:\t",len(infr[infr['P12'].isnull()]))
print("0:\t",len(infr[infr['P12']==0]))
print("1:\t",len(infr[infr['P12']==1]))
print("2:\t",len(infr[infr['P12']==2]))
print("total:\t", len(infr['P12']))


nan:	 217
0:	 11799
1:	 110188
2:	 28892
total:	 151096

In [60]:
pd.Series(infr['P12'].values.ravel()).unique()


Out[60]:
array([  1.,   2.,   0.,  nan])

In [137]:
11059-11799


Out[137]:
-740

In [138]:
#nan:	 217    ->  nan:	 217     0
#1:	 110794     ->  1:	 110188      606
#2:	 29026      ->  2:	 28892       134
#3:	 11059      ->  0:	 11799       -740
#total:	 151096 ->  total:	 151096  0
len(infr[infr['P12'].isnull()])


Out[138]:
217

In [11]:
infr


Out[11]:
Unnamed: 0 ENT MUN LOC AGEB MZA ID_INM P2A P2B P3 ... P145 seccion_1 seccion_2 seccion_3 seccion_4 seccion_5 seccion_6 seccion_7 seccion_8 seccion_9
0 0 2 4 1 2876 23 1914 1 NaN 2 ... 0 2 7.5 11 11.0 11.75 4.80 0 8.8 4
1 1 2 4 1 2880 103 1915 1 NaN 2 ... 4 2 8.0 11 5.5 11.50 2.75 0 4.8 8
2 2 2 4 1 3677 5 1916 1 NaN 2 ... 0 2 8.5 11 5.5 11.75 2.75 0 4.8 4
3 3 2 4 1 3677 9 1917 1 NaN 2 ... 0 2 7.5 11 11.0 11.75 2.65 0 10.0 0
4 4 2 4 1 3677 23 1918 7 NaN 2 ... 4 2 7.5 11 5.5 10.70 5.35 0 8.4 8
5 5 2 4 1 3681 17 1919 1 NaN 2 ... 4 2 11.0 11 0.0 11.45 2.75 0 6.0 8
6 6 2 4 1 3681 25 1920 1 NaN 2 ... 4 2 11.5 11 0.0 8.00 4.85 0 8.2 8
7 7 2 4 1 3817 9 1921 1 NaN 2 ... 4 2 5.5 11 0.0 11.75 3.80 0 7.2 8
8 8 2 4 1 1191 39 1922 2 NaN 1 ... 4 1 12.5 11 11.0 11.50 1.40 0 1.2 4
9 9 2 4 1 1191 39 1923 1 NaN 1 ... 4 1 11.0 11 5.5 10.50 4.05 0 6.0 8
10 10 2 4 1 1191 39 1924 1 NaN 1 ... 4 1 11.5 12 0.0 12.75 3.80 0 4.8 4
11 11 2 4 1 2467 18 1925 1 NaN 1 ... 4 1 11.5 11 0.0 11.45 3.50 0 8.4 8
12 12 2 4 1 1806 57 1926 2 NaN 1 ... 4 1 8.5 11 11.0 9.50 3.75 2 8.4 8
13 13 2 4 1 1806 57 1927 1 NaN 1 ... 4 1 11.5 11 11.0 9.20 3.50 0 3.6 8
14 14 2 4 1 1806 50 1928 1 NaN 2 ... 0 2 11.5 11 0.0 11.75 3.00 0 9.6 4
15 15 2 4 1 1806 65 1929 1 NaN 1 ... 4 1 12.0 11 5.5 8.95 4.60 0 4.8 8
16 16 2 4 1 6525 19 1763 1 NaN 1 ... 4 1 11.5 11 11.0 11.75 4.30 0 9.6 8
17 17 2 4 1 3821 59 1765 1 NaN 1 ... 4 1 9.5 11 5.5 11.75 5.35 0 9.6 8
18 18 2 4 1 4266 1 1766 5 NaN 1 ... 4 1 8.5 12 0.0 11.45 5.35 0 9.6 8
19 19 2 4 1 4463 15 1767 1 NaN 1 ... 0 1 11.5 11 5.5 11.75 5.10 0 8.4 4
20 20 2 4 1 2927 29 1768 1 NaN 1 ... 0 1 11.5 12 5.5 11.75 4.80 0 0.4 4
21 21 2 4 1 5601 9 1769 1 NaN 2 ... 0 2 11.5 11 0.0 11.75 4.05 0 9.6 4
22 22 2 4 1 0210 25 1770 2 NaN 1 ... 0 1 12.5 11 0.0 9.50 2.95 0 9.6 0
23 23 2 4 1 0475 52 1771 1 NaN 1 ... 0 1 11.5 12 5.5 11.75 3.75 0 7.2 0
24 24 2 4 1 3412 2 1772 1 NaN 1 ... 0 1 11.5 11 5.5 11.75 4.30 0 7.2 4
25 25 2 4 1 0441 9 1773 1 NaN 2 ... 0 2 11.5 11 5.5 9.50 3.25 0 8.4 4
26 26 2 4 1 6582 65 1775 1 NaN 1 ... 0 1 12.5 11 0.0 9.00 1.85 0 9.6 0
27 27 2 4 1 3501 25 1776 1 NaN 2 ... 4 2 11.5 12 5.5 11.75 4.05 0 8.4 8
28 28 2 4 1 4054 34 1778 1 NaN 1 ... 0 1 11.5 11 5.5 11.75 4.05 0 7.2 0
29 29 1 5 1 0497 34 1509 8 RESPONSABLE DEL SERVICIO 2 ... 0 2 11.0 12 5.5 9.00 0.80 0 0.0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
151066 166106 31 13 1 0061 11 190871 1 NaN 1 ... 0 1 11.5 8 0.0 9.50 2.20 0 1.7 0
151067 166107 31 13 1 0061 11 190872 1 NaN 1 ... 4 1 12.5 8 0.0 9.25 3.80 0 1.0 8
151068 166108 31 13 1 0061 11 190873 2 NaN 1 ... 4 1 12.5 8 5.5 8.00 3.80 0 1.7 8
151069 166109 31 13 1 0076 4 190874 1 NaN 1 ... 4 1 12.5 9 11.0 11.75 2.45 2 2.2 4
151070 166110 31 13 1 0095 9 190875 2 NaN 1 ... 4 1 13.0 9 11.0 11.75 2.20 0 3.4 8
151071 166111 31 54 1 0030 17 191044 2 NaN 1 ... 0 1 12.0 7 5.5 11.75 2.45 0 2.2 0
151072 166112 31 54 6 0045 5 191045 8 MAESTRA DE GRUPO NO SE ENCONTRO EL DIRECTOR 1 ... 4 1 11.5 12 0.0 11.75 0.25 0 1.2 8
151073 166113 31 52 1 0182 15 191046 2 NaN 1 ... 4 1 12.5 9 5.5 9.50 0.80 0 8.4 8
151074 166114 31 50 1 5687 11 191047 2 NaN 1 ... 0 1 12.0 11 11.0 10.20 3.00 0 2.9 0
151075 166115 31 50 75 6562 13 191048 1 NaN 1 ... 0 1 12.5 11 5.5 10.65 2.75 0 3.6 4
151076 166116 31 50 75 6577 13 191049 2 NaN 1 ... 0 1 12.0 12 11.0 10.50 2.70 0 7.2 0
151077 166118 31 50 81 2042 3 191052 1 NaN 1 ... 4 1 13.5 12 0.0 9.50 0.90 0 4.6 8
151078 166119 31 50 88 2042 13 191053 1 NaN 1 ... 0 1 12.5 9 5.5 7.70 2.45 0 10.6 4
151079 166120 30 91 55 003A 5 189318 1 NaN 1 ... 0 1 8.5 10 0.0 10.20 3.00 0 7.0 0
151080 166121 30 91 55 003A 10 189319 1 NaN 1 ... 4 1 8.0 8 0.0 6.95 0.25 0 3.6 8
151081 166122 30 91 56 0063 2 189320 1 NaN 1 ... 4 1 11.0 9 0.0 7.70 0.25 0 7.7 8
151082 166123 30 91 57 003A 1 189321 1 NaN 1 ... 0 1 12.0 10 0.0 7.35 0.25 0 5.3 4
151083 166124 30 91 57 003A 1 189322 1 NaN 1 ... 0 1 12.0 7 0.0 8.35 1.05 0 3.6 4
151084 166125 30 91 61 0063 3 189323 1 NaN 1 ... 0 1 11.0 6 0.0 5.85 0.00 0 6.0 0
151085 166126 30 91 61 0063 3 189324 1 NaN 1 ... 4 1 12.0 7 0.0 3.00 1.15 0 1.7 8
151086 166127 30 91 61 0063 2 189325 1 NaN 1 ... 0 1 8.0 6 0.0 4.60 1.15 0 1.2 0
151087 166128 30 91 70 0063 4 189326 1 NaN 1 ... 4 1 11.0 6 0.0 6.35 0.25 0 6.0 8
151088 166129 30 91 76 003A 800 189327 1 NaN 1 ... 4 1 11.0 8 0.0 9.50 0.00 0 3.6 8
151089 166130 30 91 100 0082 800 189328 1 NaN 1 ... 0 1 8.0 5 0.0 4.85 0.25 0 3.6 4
151090 166132 30 3 1 0323 42 188407 1 NaN 1 ... 4 1 12.5 11 0.0 10.60 2.20 0 8.9 4
151091 166133 30 3 1 0126 4 188408 1 NaN 2 ... 0 2 12.5 11 0.0 4.10 1.95 0 6.0 0
151092 166134 30 3 1 0126 14 188409 1 NaN 2 ... 4 2 12.5 11 0.0 8.35 1.95 0 7.6 8
151093 166135 30 3 1 0130 35 188410 1 NaN 2 ... 4 2 11.5 11 5.5 11.45 2.75 0 7.2 8
151094 166136 30 3 1 015A 25 188411 1 NaN 1 ... 0 1 13.5 11 0.0 11.25 2.45 0 6.0 4
151095 166137 30 3 1 015A 27 188412 1 NaN 1 ... 4 1 11.5 12 5.5 11.20 2.75 0 5.3 8

151096 rows × 169 columns


In [41]:
infr.ix[(infr.P22==3)& (infr.P20==3),'P20']=0

In [42]:
infr.to_csv("infra_corregido.csv")

In [44]:
df = pd.read_excel('INMUEBLES.xlsx')


---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-44-cd9dfe73e5d1> in <module>()
----> 1 df = pd.read_excel('INMUEBLES.xlsx')

/Users/luis/anaconda/lib/python3.4/site-packages/pandas/io/excel.py in read_excel(io, sheetname, header, skiprows, skip_footer, index_col, parse_cols, parse_dates, date_parser, na_values, thousands, convert_float, has_index_names, converters, engine, **kwds)
    161 
    162     if not isinstance(io, ExcelFile):
--> 163         io = ExcelFile(io, engine=engine)
    164 
    165     return io._parse_excel(

/Users/luis/anaconda/lib/python3.4/site-packages/pandas/io/excel.py in __init__(self, io, **kwds)
    204                 self.book = xlrd.open_workbook(file_contents=data)
    205             else:
--> 206                 self.book = xlrd.open_workbook(io)
    207         elif engine == 'xlrd' and isinstance(io, xlrd.Book):
    208             self.book = io

/Users/luis/anaconda/lib/python3.4/site-packages/xlrd/__init__.py in open_workbook(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows)
    420                 formatting_info=formatting_info,
    421                 on_demand=on_demand,
--> 422                 ragged_rows=ragged_rows,
    423                 )
    424             return bk

/Users/luis/anaconda/lib/python3.4/site-packages/xlrd/xlsx.py in open_workbook_2007_xml(zf, component_names, logfile, verbosity, use_mmap, formatting_info, on_demand, ragged_rows)
    792         x12sheet = X12Sheet(sheet, logfile, verbosity)
    793         heading = "Sheet %r (sheetx=%d) from %r" % (sheet.name, sheetx, fname)
--> 794         x12sheet.process_stream(zflo, heading)
    795         del zflo
    796         comments_fname = 'xl/comments%d.xml' % (sheetx + 1)

/Users/luis/anaconda/lib/python3.4/site-packages/xlrd/xlsx.py in own_process_stream(self, stream, heading)
    529         for event, elem in ET.iterparse(stream):
    530             if elem.tag == row_tag:
--> 531                 self_do_row(elem)
    532                 elem.clear() # destroy all child elements (cells)
    533             elif elem.tag == U_SSML12 + "dimension":

/Users/luis/anaconda/lib/python3.4/site-packages/xlrd/xlsx.py in do_row(self, row_elem)
    641                     child_tag = child.tag
    642                     if child_tag == V_TAG:
--> 643                         tvalue = child.text
    644                     elif child_tag == F_TAG:
    645                         formula = cooked_text(self, child)

KeyboardInterrupt: 

In [ ]:
df = df[df['P3']<6]
df.ix[df.P12>2, 'P12'] = 3
df['P12'].replace({1: 1.5, 2: 1, 3: 0},inplace=True)
len(df['P12'])

In [49]:
max(infr[sec_lst[2]])


Out[49]:
16.0

In [ ]: