In [1]:
import pandas as pd
import numpy as np
import os
import sys
import simpledbf
from getEPH import getEPHdbf

In [2]:
## select the trimester that you like to download ex. t311, t310
trimestre = 't311'
getEPHdbf(trimestre)


('Downloading', 't311')
file in place, creating CSV file
csv file cleanDataHousehold t311 .csv successfully created in folder data/
csv file cleanData t311 .csv successfully created in folder data/

In [3]:
ind = pd.read_csv('data/cleanData' + trimestre + '.csv')
ind.head()


Out[3]:
CODUSU NRO_HOGAR COMPONENTE AGLOMERADO PONDERA familyRelation female age schoolLevel finishedYear lastYear activity empCond unempCond ITF IPCF P47T
0 300641 1 1 33 1481 1 2 51 2 2 3.0 3 0 4 1200 300.00 1200
1 300641 1 2 33 1481 10 1 12 3 2 6.0 3 0 3 1200 300.00 0
2 300641 1 3 33 1481 10 2 9 3 2 3.0 4 0 3 1200 300.00 0
3 300641 1 4 33 1481 10 2 14 3 2 7.0 3 0 3 1200 300.00 0
4 300647 1 1 33 2452 1 1 55 2 1 NaN 1 3 0 13502 4500.67 3500

In [4]:
ind2 = pd.read_csv('data/cleanDataHouseHold' + trimestre + '.csv')
ind2.head()


Out[4]:
CODUSU NRO_HOGAR REGION PONDERA HomeType HomeTypeesp RoomsNumber FloorMaterial FloorMaterialesp RoofMaterial ... HouseMembers Memberless10 Membermore10 TotalHouseHoldIncome DomesticService1 DomesticService2 DomesticService3 DomesticService4 DomesticService5 DomesticService6
0 303878 1 1 1365 2 NaN 3 1 NaN 1 ... 1 0 1 7700 1 0 98 0 0 0
1 305757 1 1 1481 2 NaN 3 1 NaN 9 ... 1 0 1 650 96 0 98 0 0 0
2 307861 1 1 1627 2 NaN 2 1 NaN 1 ... 4 1 3 4400 1 2 98 0 0 0
3 308762 1 1 1549 2 NaN 4 1 NaN 9 ... 1 0 1 5200 1 0 98 0 0 0
4 307029 1 1 1585 1 NaN 4 1 NaN 9 ... 1 0 1 700 96 0 98 0 0 0

5 rows × 44 columns


In [ ]: