In [2]:
import pandas as pd
import numpy as np
import os
import sys
import simpledbf
In [3]:
#download data, is necesary to have a folder /data
url = 'http://www.indec.gob.ar/ftp/cuadros/menusuperior/eph/t310_dbf.zip'
os.system('wget ' + url)
os.system("mv " + 't310_dbf.zip data')
os.system('unzip data/t310_dbf.zip -d data/ephDBF')
Out[3]:
In [17]:
dbf = simpledbf.Dbf5('data/ephDBF/Individual_t310.dbf',codec='latin1')
indRaw = dbf.to_dataframe()
indRaw.head()
Out[17]:
In [18]:
indNoW = indRaw.loc[indRaw.REGION == 1,['CODUSU',
'NRO_HOGAR',
'COMPONENTE',
'AGLOMERADO',
'PONDERA',
'CH03',
'CH04',
'CH06',
'CH10',
'CH12',
'CH13',
'CH14',
'ESTADO',
'NIVEL_ED',
'CAT_OCUP',
'CAT_INAC',
'ITF',
'IPCF',
'P47T']]
indNoW.head()
Out[18]:
In [19]:
indNoW.columns = ['CODUSU',
'NRO_HOGAR',
'COMPONENTE',
'AGLOMERADO',
'PONDERA',
'familyRelation',
'female',
'age',
'schooled',
'schoolYear',
'finishedYear',
'lastYear',
'activity',
'educLevel',
'empCond',
'unempCond',
'ITF',
'IPCF',
'P47T']
indNoW.index =range(0,indNoW.shape[0])
indNoW.head()
Out[19]:
In [20]:
indNoW.to_csv('data/cleanData.csv', index = False)
In [ ]: