In [1]:
import pandas as pd
import numpy as np
import os
import sys
import simpledbf
In [2]:
trimestre = 't310'
dbf = simpledbf.Dbf5('data/Individual_' + trimestre + '.dbf',codec='latin1')
indRaw = dbf.to_dataframe()
In [3]:
indRaw.head()
Out[3]:
In [4]:
mask = indRaw.NRO_HOGAR > 1
hog2 = indRaw.loc[mask,:]
print hog2.shape
hog2.head()
Out[4]:
In [8]:
hog2.PP04B1.value_counts()
Out[8]:
In [11]:
indRaw.CH03.value_counts()
Out[11]:
In [9]:
hog2.CH03.value_counts()
Out[9]:
In [27]:
hog2.query('ESTADO == 1').PP04B1.value_counts()
Out[27]:
In [13]:
hog = pd.read_csv('data/cleanDataHouseholdt310.csv')
In [14]:
hog.head()
Out[14]:
In [35]:
hog2.head()
Out[35]:
In [57]:
cantPersonas = hog2.CODUSU.groupby(by = hog2.CODUSU).count()
In [58]:
cantPersonas = pd.DataFrame(cantPersonas)
cantPersonas.columns = ['cantPers']
cantPersonas['CODUSU'] = cantPersonas.index
cantPersonas.columns
Out[58]:
In [48]:
hog2.head()
Out[48]:
In [59]:
mergeData = pd.merge(left = hog2, right=cantPersonas, how = 'left', on = 'CODUSU')
In [61]:
pd.crosstab(mergeData.NRO_HOGAR,mergeData.cantPers)
Out[61]:
In [62]:
pd.crosstab(mergeData.NRO_HOGAR,mergeData.CH03)
Out[62]:
In [ ]: