notebook.community

Edit and run



In [36]:

    
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
from numpy import nan
plt.style.use('ggplot')
import itertools



In [37]:

    
from IPython.core.display import HTML
css = open('style-table.css').read() + open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))









    Out[37]:



In [3]:

    
infr = pd.read_csv("infraestructura.csv")



In [29]:

    
secciones = [['P3'], ['P11', 'P12', 'P13A', 'P14', 'P15', 'P16'], ['P17A', 'P18A', 'P19', 'P20', 'P21', 'P22'], ['P23', 'P24'], ['P25', 'P26', 'P27', 'P28', 'P29', 'P30', 'P31', 'P32', 'P33', 'P34', 'P35', 'P36', 'P37', 'P38', 'P39', 'P40', 'P41'], ['P42', 'P44', 'P46', 'P47', 'P48', 'P49', 'P52', 'P62', 'P72', 'P82', 'P92', 'P102', 'P103', 'P112', 'P113', 'P117', 'P122', 'P123', 'P125'], ['P126'], ['P133', 'P134', 'P135', 'P136', 'P137', 'P138', 'P139', 'P140', 'P141', 'P142', 'P143'], ['P144', 'P145']]
max_cal = [11, 13, 13, 11, 14, 17, 2, 11, 8]



In [30]:

    
i=1
for seccion in secciones:
    infr['seccion_'+str(i)]=infr[seccion].sum(axis=1)
    i+=1



In [31]:

    
sec_lst = []
for i in range(1,10):
    sec_lst.append('seccion_'+str(i)) 
#dict(zip([1,2,3,4], [a,b,c,d]))



In [32]:

    
infr[sec_lst]









    Out[32]:






  
    
      
      seccion_1
      seccion_2
      seccion_3
      seccion_4
      seccion_5
      seccion_6
      seccion_7
      seccion_8
      seccion_9
    
  
  
    
      0
      8
      8.0
      11
      11.0
      11.75
      4.80
      0
      8.8
      4
    
    
      1
      8
      8.5
      11
      5.5
      11.50
      2.75
      0
      4.8
      8
    
    
      2
      8
      9.0
      11
      5.5
      11.75
      2.75
      0
      4.8
      4
    
    
      3
      8
      8.0
      11
      11.0
      11.75
      2.65
      0
      10.0
      0
    
    
      4
      8
      8.0
      11
      5.5
      10.70
      5.35
      0
      8.4
      8
    
    
      5
      8
      11.5
      11
      0.0
      11.45
      2.75
      0
      6.0
      8
    
    
      6
      8
      12.0
      11
      0.0
      8.00
      4.85
      0
      8.2
      8
    
    
      7
      8
      6.0
      11
      0.0
      11.75
      3.80
      0
      7.2
      8
    
    
      8
      11
      11.5
      11
      11.0
      11.50
      1.40
      0
      1.2
      4
    
    
      9
      11
      11.5
      11
      5.5
      10.50
      4.05
      0
      6.0
      8
    
    
      10
      11
      12.0
      12
      0.0
      12.75
      3.80
      0
      4.8
      4
    
    
      11
      11
      12.0
      11
      0.0
      11.45
      3.50
      0
      8.4
      8
    
    
      12
      11
      9.0
      11
      11.0
      9.50
      3.75
      2
      8.4
      8
    
    
      13
      11
      12.0
      11
      11.0
      9.20
      3.50
      0
      3.6
      8
    
    
      14
      8
      12.0
      11
      0.0
      11.75
      3.00
      0
      9.6
      4
    
    
      15
      11
      11.0
      11
      5.5
      8.95
      4.60
      0
      4.8
      8
    
    
      16
      11
      12.0
      11
      11.0
      11.75
      4.30
      0
      9.6
      8
    
    
      17
      11
      10.0
      11
      5.5
      11.75
      5.35
      0
      9.6
      8
    
    
      18
      11
      9.0
      12
      0.0
      11.45
      5.35
      0
      9.6
      8
    
    
      19
      11
      12.0
      11
      5.5
      11.75
      5.10
      0
      8.4
      4
    
    
      20
      11
      12.0
      12
      5.5
      11.75
      4.80
      0
      0.4
      4
    
    
      21
      8
      12.0
      11
      0.0
      11.75
      4.05
      0
      9.6
      4
    
    
      22
      11
      13.0
      11
      0.0
      9.50
      2.95
      0
      9.6
      0
    
    
      23
      11
      12.0
      12
      5.5
      11.75
      3.75
      0
      7.2
      0
    
    
      24
      11
      12.0
      11
      5.5
      11.75
      4.30
      0
      7.2
      4
    
    
      25
      8
      12.0
      11
      5.5
      9.50
      3.25
      0
      8.4
      4
    
    
      26
      11
      13.0
      11
      0.0
      9.00
      1.85
      0
      9.6
      0
    
    
      27
      8
      12.0
      12
      5.5
      11.75
      4.05
      0
      8.4
      8
    
    
      28
      11
      12.0
      11
      5.5
      11.75
      4.05
      0
      7.2
      0
    
    
      29
      8
      11.5
      12
      5.5
      9.00
      0.80
      0
      0.0
      0
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      151066
      11
      12.0
      8
      0.0
      9.50
      2.20
      0
      1.7
      0
    
    
      151067
      11
      13.0
      8
      0.0
      9.25
      3.80
      0
      1.0
      8
    
    
      151068
      11
      13.0
      8
      5.5
      8.00
      3.80
      0
      1.7
      8
    
    
      151069
      11
      13.0
      9
      11.0
      11.75
      2.45
      2
      2.2
      4
    
    
      151070
      11
      12.0
      9
      11.0
      11.75
      2.20
      0
      3.4
      8
    
    
      151071
      11
      12.5
      7
      5.5
      11.75
      2.45
      0
      2.2
      0
    
    
      151072
      11
      12.0
      12
      0.0
      11.75
      0.25
      0
      1.2
      8
    
    
      151073
      11
      13.0
      9
      5.5
      9.50
      0.80
      0
      8.4
      8
    
    
      151074
      11
      12.5
      11
      11.0
      10.20
      3.00
      0
      2.9
      0
    
    
      151075
      11
      13.0
      11
      5.5
      10.65
      2.75
      0
      3.6
      4
    
    
      151076
      11
      12.5
      12
      11.0
      10.50
      2.70
      0
      7.2
      0
    
    
      151077
      11
      12.5
      12
      0.0
      9.50
      0.90
      0
      4.6
      8
    
    
      151078
      11
      13.0
      9
      5.5
      7.70
      2.45
      0
      10.6
      4
    
    
      151079
      11
      9.0
      10
      0.0
      10.20
      3.00
      0
      7.0
      0
    
    
      151080
      11
      8.5
      8
      0.0
      6.95
      0.25
      0
      3.6
      8
    
    
      151081
      11
      11.5
      9
      0.0
      7.70
      0.25
      0
      7.7
      8
    
    
      151082
      11
      11.0
      10
      0.0
      7.35
      0.25
      0
      5.3
      4
    
    
      151083
      11
      11.0
      7
      0.0
      8.35
      1.05
      0
      3.6
      4
    
    
      151084
      11
      10.0
      6
      0.0
      5.85
      0.00
      0
      6.0
      0
    
    
      151085
      11
      11.0
      7
      0.0
      3.00
      1.15
      0
      1.7
      8
    
    
      151086
      11
      7.0
      6
      0.0
      4.60
      1.15
      0
      1.2
      0
    
    
      151087
      11
      11.5
      6
      0.0
      6.35
      0.25
      0
      6.0
      8
    
    
      151088
      11
      11.5
      8
      0.0
      9.50
      0.00
      0
      3.6
      8
    
    
      151089
      11
      8.5
      5
      0.0
      4.85
      0.25
      0
      3.6
      4
    
    
      151090
      11
      13.0
      11
      0.0
      10.60
      2.20
      0
      8.9
      4
    
    
      151091
      8
      11.5
      11
      0.0
      4.10
      1.95
      0
      6.0
      0
    
    
      151092
      8
      11.5
      11
      0.0
      8.35
      1.95
      0
      7.6
      8
    
    
      151093
      8
      12.0
      11
      5.5
      11.45
      2.75
      0
      7.2
      8
    
    
      151094
      11
      12.5
      11
      0.0
      11.25
      2.45
      0
      6.0
      4
    
    
      151095
      11
      12.0
      12
      5.5
      11.20
      2.75
      0
      5.3
      8
    
  

151096 rows × 9 columns



In [33]:

    
infr[infr['P12']==2]['P12'].count()









    Out[33]:





0



In [34]:

    
new_columns = ['ENT','MUN','LOC','AGEB','MZA','ID_INM']
new_columns.extend(sec_lst)



In [35]:

    
infr_secciones = infr[new_columns]



In [44]:

    
infr.to_csv('infraestuctura.csv')
infr_secciones.to_csv('infr_secciones.csv')



In [39]:

    
def densidad_inversa(j):
    #-column+max_cal
    #infr_secciones[sec_lst[j]] = max_cal[j]-infr_secciones[sec_lst[j]]
    return max_cal[j]-infr_secciones[sec_lst[j]]

    #infr_secciones[col]



In [45]:

    
lst2= [11, 13, 13, 11, 14, 17, 2, 11, 8]
for i in range(9):
    print("Sección ", i+1, ":\t", max(infr_secciones[sec_lst[i]])<=lst2[i])









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-45-400b7f55768b> in <module>()
      1 lst2= [11, 13, 13, 11, 14, 17, 2, 11, 8]
      2 for i in range(9):
----> 3     print("Sección ", i+1, ":\t", max(infr_secciones[sec_lst[i]])<=lst2[i])

NameError: name 'infr_secciones' is not defined

2,3,5 11, 13, 13, 11, 14, 17, 2, 11, 8



In [58]:

    
#j=1
#print(min(-infrtmp.ix[:,sec_lst[j]]+max_cal[j]))
#print(max(-infrtmp.ix[:,sec_lst[j]]+max_cal[j]))
infr[sec_lst].max()









    Out[58]:





seccion_1     5.0
seccion_2    13.5
seccion_3    16.0
seccion_4    11.0
seccion_5    14.0
seccion_6    10.0
seccion_7     2.0
seccion_8    11.0
seccion_9     8.0
dtype: float64



In [59]:

    
infr[infr[sec_lst[1]]>13][sec_lst[1]]









    Out[59]:





61        13.5
64        13.5
72        13.5
239       13.5
335       13.5
492       13.5
540       13.5
544       13.5
603       13.5
613       13.5
621       13.5
650       13.5
679       13.5
681       13.5
700       13.5
738       13.5
756       13.5
814       13.5
819       13.5
987       13.5
1029      13.5
1104      13.5
1213      13.5
1214      13.5
1359      13.5
1428      13.5
1716      13.5
1773      13.5
1774      13.5
1775      13.5
          ... 
149785    13.5
149806    13.5
149860    13.5
149861    13.5
149978    13.5
150084    13.5
150085    13.5
150088    13.5
150201    13.5
150246    13.5
150268    13.5
150296    13.5
150337    13.5
150405    13.5
150406    13.5
150408    13.5
150410    13.5
150439    13.5
150544    13.5
150564    13.5
150567    13.5
150593    13.5
150626    13.5
150711    13.5
150735    13.5
150774    13.5
150815    13.5
151044    13.5
151077    13.5
151094    13.5
Name: seccion_2, dtype: float64



In [46]:

    
print("nan:\t",len(infr[infr['P12'].isnull()]))
print("0:\t",len(infr[infr['P12']==0]))
print("1:\t",len(infr[infr['P12']==1]))
print("2:\t",len(infr[infr['P12']==2]))
print("total:\t", len(infr['P12']))









    



nan:	 217
0:	 11799
1:	 110188
2:	 28892
total:	 151096



In [60]:

    
pd.Series(infr['P12'].values.ravel()).unique()









    Out[60]:





array([  1.,   2.,   0.,  nan])



In [137]:

    
11059-11799









    Out[137]:





-740



In [138]:

    
#nan:	 217    ->  nan:	 217     0
#1:	 110794     ->  1:	 110188      606
#2:	 29026      ->  2:	 28892       134
#3:	 11059      ->  0:	 11799       -740
#total:	 151096 ->  total:	 151096  0
len(infr[infr['P12'].isnull()])









    Out[138]:





217



In [18]:

    
infr['P3'].replace({1:11, 2:8, 3:3, 4:0, 5:0},inplace=True)



In [41]:

    
infr.ix[(infr.P22==3)& (infr.P20==3),'P20']=0



In [28]:

    
infr.to_csv("infra_corregido.csv")



In [44]:

    
df = pd.read_excel('INMUEBLES.xlsx')









    



---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-44-cd9dfe73e5d1> in <module>()
----> 1 df = pd.read_excel('INMUEBLES.xlsx')

/Users/luis/anaconda/lib/python3.4/site-packages/pandas/io/excel.py in read_excel(io, sheetname, header, skiprows, skip_footer, index_col, parse_cols, parse_dates, date_parser, na_values, thousands, convert_float, has_index_names, converters, engine, **kwds)
    161 
    162     if not isinstance(io, ExcelFile):
--> 163         io = ExcelFile(io, engine=engine)
    164 
    165     return io._parse_excel(

/Users/luis/anaconda/lib/python3.4/site-packages/pandas/io/excel.py in __init__(self, io, **kwds)
    204                 self.book = xlrd.open_workbook(file_contents=data)
    205             else:
--> 206                 self.book = xlrd.open_workbook(io)
    207         elif engine == 'xlrd' and isinstance(io, xlrd.Book):
    208             self.book = io

/Users/luis/anaconda/lib/python3.4/site-packages/xlrd/__init__.py in open_workbook(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows)
    420                 formatting_info=formatting_info,
    421                 on_demand=on_demand,
--> 422                 ragged_rows=ragged_rows,
    423                 )
    424             return bk

/Users/luis/anaconda/lib/python3.4/site-packages/xlrd/xlsx.py in open_workbook_2007_xml(zf, component_names, logfile, verbosity, use_mmap, formatting_info, on_demand, ragged_rows)
    792         x12sheet = X12Sheet(sheet, logfile, verbosity)
    793         heading = "Sheet %r (sheetx=%d) from %r" % (sheet.name, sheetx, fname)
--> 794         x12sheet.process_stream(zflo, heading)
    795         del zflo
    796         comments_fname = 'xl/comments%d.xml' % (sheetx + 1)

/Users/luis/anaconda/lib/python3.4/site-packages/xlrd/xlsx.py in own_process_stream(self, stream, heading)
    529         for event, elem in ET.iterparse(stream):
    530             if elem.tag == row_tag:
--> 531                 self_do_row(elem)
    532                 elem.clear() # destroy all child elements (cells)
    533             elif elem.tag == U_SSML12 + "dimension":

/Users/luis/anaconda/lib/python3.4/site-packages/xlrd/xlsx.py in do_row(self, row_elem)
    641                     child_tag = child.tag
    642                     if child_tag == V_TAG:
--> 643                         tvalue = child.text
    644                     elif child_tag == F_TAG:
    645                         formula = cooked_text(self, child)

KeyboardInterrupt:



In [ ]:

    
df = df[df['P3']<6]
df.ix[df.P12>2, 'P12'] = 3
df['P12'].replace({1: 1.5, 2: 1, 3: 0},inplace=True)
len(df['P12'])



In [49]:

    
max(infr[sec_lst[2]])









    Out[49]:





16.0

	seccion_1	seccion_2	seccion_3	seccion_4	seccion_5	seccion_6	seccion_7	seccion_8	seccion_9
0	8	8.0	11	11.0	11.75	4.80	0	8.8	4
1	8	8.5	11	5.5	11.50	2.75	0	4.8	8
2	8	9.0	11	5.5	11.75	2.75	0	4.8	4
3	8	8.0	11	11.0	11.75	2.65	0	10.0	0
4	8	8.0	11	5.5	10.70	5.35	0	8.4	8
5	8	11.5	11	0.0	11.45	2.75	0	6.0	8
6	8	12.0	11	0.0	8.00	4.85	0	8.2	8
7	8	6.0	11	0.0	11.75	3.80	0	7.2	8
8	11	11.5	11	11.0	11.50	1.40	0	1.2	4
9	11	11.5	11	5.5	10.50	4.05	0	6.0	8
10	11	12.0	12	0.0	12.75	3.80	0	4.8	4
11	11	12.0	11	0.0	11.45	3.50	0	8.4	8
12	11	9.0	11	11.0	9.50	3.75	2	8.4	8
13	11	12.0	11	11.0	9.20	3.50	0	3.6	8
14	8	12.0	11	0.0	11.75	3.00	0	9.6	4
15	11	11.0	11	5.5	8.95	4.60	0	4.8	8
16	11	12.0	11	11.0	11.75	4.30	0	9.6	8
17	11	10.0	11	5.5	11.75	5.35	0	9.6	8
18	11	9.0	12	0.0	11.45	5.35	0	9.6	8
19	11	12.0	11	5.5	11.75	5.10	0	8.4	4
20	11	12.0	12	5.5	11.75	4.80	0	0.4	4
21	8	12.0	11	0.0	11.75	4.05	0	9.6	4
22	11	13.0	11	0.0	9.50	2.95	0	9.6	0
23	11	12.0	12	5.5	11.75	3.75	0	7.2	0
24	11	12.0	11	5.5	11.75	4.30	0	7.2	4
25	8	12.0	11	5.5	9.50	3.25	0	8.4	4
26	11	13.0	11	0.0	9.00	1.85	0	9.6	0
27	8	12.0	12	5.5	11.75	4.05	0	8.4	8
28	11	12.0	11	5.5	11.75	4.05	0	7.2	0
29	8	11.5	12	5.5	9.00	0.80	0	0.0	0
...	...	...	...	...	...	...	...	...	...
151066	11	12.0	8	0.0	9.50	2.20	0	1.7	0
151067	11	13.0	8	0.0	9.25	3.80	0	1.0	8
151068	11	13.0	8	5.5	8.00	3.80	0	1.7	8
151069	11	13.0	9	11.0	11.75	2.45	2	2.2	4
151070	11	12.0	9	11.0	11.75	2.20	0	3.4	8
151071	11	12.5	7	5.5	11.75	2.45	0	2.2	0
151072	11	12.0	12	0.0	11.75	0.25	0	1.2	8
151073	11	13.0	9	5.5	9.50	0.80	0	8.4	8
151074	11	12.5	11	11.0	10.20	3.00	0	2.9	0
151075	11	13.0	11	5.5	10.65	2.75	0	3.6	4
151076	11	12.5	12	11.0	10.50	2.70	0	7.2	0
151077	11	12.5	12	0.0	9.50	0.90	0	4.6	8
151078	11	13.0	9	5.5	7.70	2.45	0	10.6	4
151079	11	9.0	10	0.0	10.20	3.00	0	7.0	0
151080	11	8.5	8	0.0	6.95	0.25	0	3.6	8
151081	11	11.5	9	0.0	7.70	0.25	0	7.7	8
151082	11	11.0	10	0.0	7.35	0.25	0	5.3	4
151083	11	11.0	7	0.0	8.35	1.05	0	3.6	4
151084	11	10.0	6	0.0	5.85	0.00	0	6.0	0
151085	11	11.0	7	0.0	3.00	1.15	0	1.7	8
151086	11	7.0	6	0.0	4.60	1.15	0	1.2	0
151087	11	11.5	6	0.0	6.35	0.25	0	6.0	8
151088	11	11.5	8	0.0	9.50	0.00	0	3.6	8
151089	11	8.5	5	0.0	4.85	0.25	0	3.6	4
151090	11	13.0	11	0.0	10.60	2.20	0	8.9	4
151091	8	11.5	11	0.0	4.10	1.95	0	6.0	0
151092	8	11.5	11	0.0	8.35	1.95	0	7.6	8
151093	8	12.0	11	5.5	11.45	2.75	0	7.2	8
151094	11	12.5	11	0.0	11.25	2.45	0	6.0	4
151095	11	12.0	12	5.5	11.20	2.75	0	5.3	8