In [36]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
from numpy import nan
plt.style.use('ggplot')
import itertools

In [37]:
from IPython.core.display import HTML
css = open('style-table.css').read() + open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))


Out[37]:

In [3]:
infr = pd.read_csv("infraestructura.csv")

In [29]:
secciones = [['P3'], ['P11', 'P12', 'P13A', 'P14', 'P15', 'P16'], ['P17A', 'P18A', 'P19', 'P20', 'P21', 'P22'], ['P23', 'P24'], ['P25', 'P26', 'P27', 'P28', 'P29', 'P30', 'P31', 'P32', 'P33', 'P34', 'P35', 'P36', 'P37', 'P38', 'P39', 'P40', 'P41'], ['P42', 'P44', 'P46', 'P47', 'P48', 'P49', 'P52', 'P62', 'P72', 'P82', 'P92', 'P102', 'P103', 'P112', 'P113', 'P117', 'P122', 'P123', 'P125'], ['P126'], ['P133', 'P134', 'P135', 'P136', 'P137', 'P138', 'P139', 'P140', 'P141', 'P142', 'P143'], ['P144', 'P145']]
max_cal = [11, 13, 13, 11, 14, 17, 2, 11, 8]

In [30]:
i=1
for seccion in secciones:
    infr['seccion_'+str(i)]=infr[seccion].sum(axis=1)
    i+=1

In [31]:
sec_lst = []
for i in range(1,10):
    sec_lst.append('seccion_'+str(i)) 
#dict(zip([1,2,3,4], [a,b,c,d]))

In [32]:
infr[sec_lst]


Out[32]:
seccion_1 seccion_2 seccion_3 seccion_4 seccion_5 seccion_6 seccion_7 seccion_8 seccion_9
0 8 8.0 11 11.0 11.75 4.80 0 8.8 4
1 8 8.5 11 5.5 11.50 2.75 0 4.8 8
2 8 9.0 11 5.5 11.75 2.75 0 4.8 4
3 8 8.0 11 11.0 11.75 2.65 0 10.0 0
4 8 8.0 11 5.5 10.70 5.35 0 8.4 8
5 8 11.5 11 0.0 11.45 2.75 0 6.0 8
6 8 12.0 11 0.0 8.00 4.85 0 8.2 8
7 8 6.0 11 0.0 11.75 3.80 0 7.2 8
8 11 11.5 11 11.0 11.50 1.40 0 1.2 4
9 11 11.5 11 5.5 10.50 4.05 0 6.0 8
10 11 12.0 12 0.0 12.75 3.80 0 4.8 4
11 11 12.0 11 0.0 11.45 3.50 0 8.4 8
12 11 9.0 11 11.0 9.50 3.75 2 8.4 8
13 11 12.0 11 11.0 9.20 3.50 0 3.6 8
14 8 12.0 11 0.0 11.75 3.00 0 9.6 4
15 11 11.0 11 5.5 8.95 4.60 0 4.8 8
16 11 12.0 11 11.0 11.75 4.30 0 9.6 8
17 11 10.0 11 5.5 11.75 5.35 0 9.6 8
18 11 9.0 12 0.0 11.45 5.35 0 9.6 8
19 11 12.0 11 5.5 11.75 5.10 0 8.4 4
20 11 12.0 12 5.5 11.75 4.80 0 0.4 4
21 8 12.0 11 0.0 11.75 4.05 0 9.6 4
22 11 13.0 11 0.0 9.50 2.95 0 9.6 0
23 11 12.0 12 5.5 11.75 3.75 0 7.2 0
24 11 12.0 11 5.5 11.75 4.30 0 7.2 4
25 8 12.0 11 5.5 9.50 3.25 0 8.4 4
26 11 13.0 11 0.0 9.00 1.85 0 9.6 0
27 8 12.0 12 5.5 11.75 4.05 0 8.4 8
28 11 12.0 11 5.5 11.75 4.05 0 7.2 0
29 8 11.5 12 5.5 9.00 0.80 0 0.0 0
... ... ... ... ... ... ... ... ... ...
151066 11 12.0 8 0.0 9.50 2.20 0 1.7 0
151067 11 13.0 8 0.0 9.25 3.80 0 1.0 8
151068 11 13.0 8 5.5 8.00 3.80 0 1.7 8
151069 11 13.0 9 11.0 11.75 2.45 2 2.2 4
151070 11 12.0 9 11.0 11.75 2.20 0 3.4 8
151071 11 12.5 7 5.5 11.75 2.45 0 2.2 0
151072 11 12.0 12 0.0 11.75 0.25 0 1.2 8
151073 11 13.0 9 5.5 9.50 0.80 0 8.4 8
151074 11 12.5 11 11.0 10.20 3.00 0 2.9 0
151075 11 13.0 11 5.5 10.65 2.75 0 3.6 4
151076 11 12.5 12 11.0 10.50 2.70 0 7.2 0
151077 11 12.5 12 0.0 9.50 0.90 0 4.6 8
151078 11 13.0 9 5.5 7.70 2.45 0 10.6 4
151079 11 9.0 10 0.0 10.20 3.00 0 7.0 0
151080 11 8.5 8 0.0 6.95 0.25 0 3.6 8
151081 11 11.5 9 0.0 7.70 0.25 0 7.7 8
151082 11 11.0 10 0.0 7.35 0.25 0 5.3 4
151083 11 11.0 7 0.0 8.35 1.05 0 3.6 4
151084 11 10.0 6 0.0 5.85 0.00 0 6.0 0
151085 11 11.0 7 0.0 3.00 1.15 0 1.7 8
151086 11 7.0 6 0.0 4.60 1.15 0 1.2 0
151087 11 11.5 6 0.0 6.35 0.25 0 6.0 8
151088 11 11.5 8 0.0 9.50 0.00 0 3.6 8
151089 11 8.5 5 0.0 4.85 0.25 0 3.6 4
151090 11 13.0 11 0.0 10.60 2.20 0 8.9 4
151091 8 11.5 11 0.0 4.10 1.95 0 6.0 0
151092 8 11.5 11 0.0 8.35 1.95 0 7.6 8
151093 8 12.0 11 5.5 11.45 2.75 0 7.2 8
151094 11 12.5 11 0.0 11.25 2.45 0 6.0 4
151095 11 12.0 12 5.5 11.20 2.75 0 5.3 8

151096 rows × 9 columns


In [33]:
infr[infr['P12']==2]['P12'].count()


Out[33]:
0

In [34]:
new_columns = ['ENT','MUN','LOC','AGEB','MZA','ID_INM']
new_columns.extend(sec_lst)

In [35]:
infr_secciones = infr[new_columns]

In [44]:
infr.to_csv('infraestuctura.csv')
infr_secciones.to_csv('infr_secciones.csv')

In [39]:
def densidad_inversa(j):
    #-column+max_cal
    #infr_secciones[sec_lst[j]] = max_cal[j]-infr_secciones[sec_lst[j]]
    return max_cal[j]-infr_secciones[sec_lst[j]]

    #infr_secciones[col]

In [45]:
lst2= [11, 13, 13, 11, 14, 17, 2, 11, 8]
for i in range(9):
    print("Sección ", i+1, ":\t", max(infr_secciones[sec_lst[i]])<=lst2[i])


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-45-400b7f55768b> in <module>()
      1 lst2= [11, 13, 13, 11, 14, 17, 2, 11, 8]
      2 for i in range(9):
----> 3     print("Sección ", i+1, ":\t", max(infr_secciones[sec_lst[i]])<=lst2[i])

NameError: name 'infr_secciones' is not defined
2,3,5 11, 13, 13, 11, 14, 17, 2, 11, 8

In [58]:
#j=1
#print(min(-infrtmp.ix[:,sec_lst[j]]+max_cal[j]))
#print(max(-infrtmp.ix[:,sec_lst[j]]+max_cal[j]))
infr[sec_lst].max()


Out[58]:
seccion_1     5.0
seccion_2    13.5
seccion_3    16.0
seccion_4    11.0
seccion_5    14.0
seccion_6    10.0
seccion_7     2.0
seccion_8    11.0
seccion_9     8.0
dtype: float64

In [59]:
infr[infr[sec_lst[1]]>13][sec_lst[1]]


Out[59]:
61        13.5
64        13.5
72        13.5
239       13.5
335       13.5
492       13.5
540       13.5
544       13.5
603       13.5
613       13.5
621       13.5
650       13.5
679       13.5
681       13.5
700       13.5
738       13.5
756       13.5
814       13.5
819       13.5
987       13.5
1029      13.5
1104      13.5
1213      13.5
1214      13.5
1359      13.5
1428      13.5
1716      13.5
1773      13.5
1774      13.5
1775      13.5
          ... 
149785    13.5
149806    13.5
149860    13.5
149861    13.5
149978    13.5
150084    13.5
150085    13.5
150088    13.5
150201    13.5
150246    13.5
150268    13.5
150296    13.5
150337    13.5
150405    13.5
150406    13.5
150408    13.5
150410    13.5
150439    13.5
150544    13.5
150564    13.5
150567    13.5
150593    13.5
150626    13.5
150711    13.5
150735    13.5
150774    13.5
150815    13.5
151044    13.5
151077    13.5
151094    13.5
Name: seccion_2, dtype: float64

In [46]:
print("nan:\t",len(infr[infr['P12'].isnull()]))
print("0:\t",len(infr[infr['P12']==0]))
print("1:\t",len(infr[infr['P12']==1]))
print("2:\t",len(infr[infr['P12']==2]))
print("total:\t", len(infr['P12']))


nan:	 217
0:	 11799
1:	 110188
2:	 28892
total:	 151096

In [60]:
pd.Series(infr['P12'].values.ravel()).unique()


Out[60]:
array([  1.,   2.,   0.,  nan])

In [137]:
11059-11799


Out[137]:
-740

In [138]:
#nan:	 217    ->  nan:	 217     0
#1:	 110794     ->  1:	 110188      606
#2:	 29026      ->  2:	 28892       134
#3:	 11059      ->  0:	 11799       -740
#total:	 151096 ->  total:	 151096  0
len(infr[infr['P12'].isnull()])


Out[138]:
217

In [18]:
infr['P3'].replace({1:11, 2:8, 3:3, 4:0, 5:0},inplace=True)

In [41]:
infr.ix[(infr.P22==3)& (infr.P20==3),'P20']=0

In [28]:
infr.to_csv("infra_corregido.csv")

In [44]:
df = pd.read_excel('INMUEBLES.xlsx')


---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-44-cd9dfe73e5d1> in <module>()
----> 1 df = pd.read_excel('INMUEBLES.xlsx')

/Users/luis/anaconda/lib/python3.4/site-packages/pandas/io/excel.py in read_excel(io, sheetname, header, skiprows, skip_footer, index_col, parse_cols, parse_dates, date_parser, na_values, thousands, convert_float, has_index_names, converters, engine, **kwds)
    161 
    162     if not isinstance(io, ExcelFile):
--> 163         io = ExcelFile(io, engine=engine)
    164 
    165     return io._parse_excel(

/Users/luis/anaconda/lib/python3.4/site-packages/pandas/io/excel.py in __init__(self, io, **kwds)
    204                 self.book = xlrd.open_workbook(file_contents=data)
    205             else:
--> 206                 self.book = xlrd.open_workbook(io)
    207         elif engine == 'xlrd' and isinstance(io, xlrd.Book):
    208             self.book = io

/Users/luis/anaconda/lib/python3.4/site-packages/xlrd/__init__.py in open_workbook(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows)
    420                 formatting_info=formatting_info,
    421                 on_demand=on_demand,
--> 422                 ragged_rows=ragged_rows,
    423                 )
    424             return bk

/Users/luis/anaconda/lib/python3.4/site-packages/xlrd/xlsx.py in open_workbook_2007_xml(zf, component_names, logfile, verbosity, use_mmap, formatting_info, on_demand, ragged_rows)
    792         x12sheet = X12Sheet(sheet, logfile, verbosity)
    793         heading = "Sheet %r (sheetx=%d) from %r" % (sheet.name, sheetx, fname)
--> 794         x12sheet.process_stream(zflo, heading)
    795         del zflo
    796         comments_fname = 'xl/comments%d.xml' % (sheetx + 1)

/Users/luis/anaconda/lib/python3.4/site-packages/xlrd/xlsx.py in own_process_stream(self, stream, heading)
    529         for event, elem in ET.iterparse(stream):
    530             if elem.tag == row_tag:
--> 531                 self_do_row(elem)
    532                 elem.clear() # destroy all child elements (cells)
    533             elif elem.tag == U_SSML12 + "dimension":

/Users/luis/anaconda/lib/python3.4/site-packages/xlrd/xlsx.py in do_row(self, row_elem)
    641                     child_tag = child.tag
    642                     if child_tag == V_TAG:
--> 643                         tvalue = child.text
    644                     elif child_tag == F_TAG:
    645                         formula = cooked_text(self, child)

KeyboardInterrupt: 

In [ ]:
df = df[df['P3']<6]
df.ix[df.P12>2, 'P12'] = 3
df['P12'].replace({1: 1.5, 2: 1, 3: 0},inplace=True)
len(df['P12'])

In [49]:
max(infr[sec_lst[2]])


Out[49]:
16.0