notebook.community

Edit and run



In [1]:

    
import getEPH
import functionsForModels
import make_dummy
import schoolYears
import categorize
import functionsForModels
import createVariables

import pandas as pd
#http://statsmodels.sourceforge.net/devel/examples/generated/example_wls.html
import numpy as np
from scipy import stats
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.sandbox.regression.predstd import wls_prediction_std
from statsmodels.iolib.table import (SimpleTable, default_txt_fmt)
np.random.seed(1024)
%matplotlib inline









    



/home/pipe/anaconda2/lib/python2.7/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)



In [2]:

    
getEPH.getEPHdbf('t310')









    



('Downloading', 't310')
file in place, creating CSV file
csv file cleanDataHousehold t310 .csv successfully created in folder data/
csv file cleanData t310 .csv successfully created in folder data/



In [2]:

    
data1 = pd.read_csv('data/cleanDatat310.csv')
data1.columns









    Out[2]:





Index([u'CODUSU', u'NRO_HOGAR', u'COMPONENTE', u'AGLOMERADO', u'PONDERA',
       u'familyRelation', u'female', u'age', u'schoolLevel', u'finishedYear',
       u'lastYear', u'activity', u'empCond', u'unempCond', u'ITF', u'IPCF',
       u'P47T', u'P21', u'DECCFR', u'DECIFR', u'maritalStatus', u'reading',
       u'placeOfBirth'],
      dtype='object')



In [3]:

    
data2 = categorize.categorize(data1)



In [4]:

    
data3 = schoolYears.schoolYears(data2)
data4 = createVariables.createVariables(data3)



In [5]:

    
data4.columns









    Out[5]:





Index([u'CODUSU', u'NRO_HOGAR', u'COMPONENTE', u'AGLOMERADO', u'PONDERA',
       u'familyRelation', u'female', u'age', u'schoolLevel', u'finishedYear',
       u'lastYear', u'activity', u'empCond', u'unempCond', u'ITF', u'IPCF',
       u'P47T', u'P21', u'DECCFR', u'DECIFR', u'maritalStatus', u'reading',
       u'placeOfBirth', u'primary', u'secondary', u'university', u'education',
       u'education2', u'age2', u'id', u'lnIncome', u'lnIncomeT'],
      dtype='object')

modelo para predecir ingreso individual



In [6]:

    
#considerar los ingresos del decil 0 como los trabajamos
jobsAndIncome = (data4.activity == 1) & (data4.P21 > 1) 
headAndSpouse = (data4.familyRelation == 1)|(data4.familyRelation == 2)
dataParaModelo = data4.copy().loc[jobsAndIncome,:]



In [7]:

    
variablesOfInterest = ['age',
                              'age2',
                              'female',
                              'education',
                              'education2']



In [8]:

    
model = functionsForModels.runModel(dataset = dataParaModelo, income = 'lnIncome', variables = variablesOfInterest)









    



                            WLS Regression Results                            
==============================================================================
Dep. Variable:                      y   R-squared:                       0.287
Model:                            WLS   Adj. R-squared:                  0.286
Method:                 Least Squares   F-statistic:                     289.9
Date:                Wed, 07 Dec 2016   Prob (F-statistic):          2.70e-261
Time:                        13:55:52   Log-Likelihood:                -4091.5
No. Observations:                3608   AIC:                             8195.
Df Residuals:                    3602   BIC:                             8232.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
const          5.5938      0.138     40.537      0.000         5.323     5.864
x1             0.0659      0.005     12.328      0.000         0.055     0.076
x2            -0.0007   6.16e-05    -11.488      0.000        -0.001    -0.001
x3            -0.5528      0.025    -22.012      0.000        -0.602    -0.504
x4             0.0277      0.016      1.779      0.075        -0.003     0.058
x5             0.0032      0.001      4.525      0.000         0.002     0.005
==============================================================================
Omnibus:                      641.219   Durbin-Watson:                   1.858
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             1458.019
Skew:                          -1.012   Prob(JB):                         0.00
Kurtosis:                       5.367   Cond. No.                     2.45e+04
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.45e+04. This might indicate that there are
strong multicollinearity or other numerical problems.
x1: age
x2: age2
x3: female
x4: education
x5: education2
IS R-squared for 1000 times is 0.298124864616
OS R-squared for 1000 times is 0.293081804822



In [9]:

    
X = sm.add_constant(dataParaModelo.copy().loc[:,variablesOfInterest].values)



In [10]:

    
dataParaModelo['predictedLnIncome'] = model.predict(X)



In [11]:

    
#dataParaModelo.loc[:,['id','predictedLnIncome']]

merge con la base individual



In [12]:

    
paraMerge = dataParaModelo.loc[:,['CODUSU', 'NRO_HOGAR', 'COMPONENTE','predictedLnIncome']]
print paraMerge.shape



In [13]:

    
paraMerge.head()









    Out[13]:






  
    
      
      CODUSU
      NRO_HOGAR
      COMPONENTE
      predictedLnIncome
    
  
  
    
      2
      307861
      1
      1
      7.478150
    
    
      3
      307861
      1
      2
      7.991038
    
    
      7
      308278
      1
      1
      8.485189
    
    
      8
      308278
      1
      2
      7.750667
    
    
      9
      311937
      1
      1
      8.336136



In [14]:

    
data4.shape









    Out[14]:





(8360, 32)



In [15]:

    
data = pd.merge(left = data4 , right = paraMerge, on = ['CODUSU', 'NRO_HOGAR', 'COMPONENTE'], how = 'left')



In [16]:

    
data.shape









    Out[16]:





(8360, 33)



In [17]:

    
data.predictedLnIncome[data.predictedLnIncome.isnull()] = 0









    



/home/pipe/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':

crear nuevas variables individuales



In [18]:

    
#crea variable job
data['job'] = (data.activity==1).astype(int)
data['noJob'] = (data.activity!=1).astype(int)
data['schoolAndJob'] = data.job * data.education



In [19]:

    
print data.shape
data.head()









    



(8360, 36)






    Out[19]:






  
    
      
      CODUSU
      NRO_HOGAR
      COMPONENTE
      AGLOMERADO
      PONDERA
      familyRelation
      female
      age
      schoolLevel
      finishedYear
      ...
      education
      education2
      age2
      id
      lnIncome
      lnIncomeT
      predictedLnIncome
      job
      noJob
      schoolAndJob
    
  
  
    
      0
      302468
      1
      1
      32
      1287
      1
      1
      20
      7.0
      2
      ...
      13.0
      169.0
      400
      3024681
      0.000000
      7.600902
      0.000000
      0
      1
      0.0
    
    
      1
      302468
      1
      2
      32
      1287
      10
      1
      20
      6.0
      2
      ...
      13.0
      169.0
      400
      3024681
      0.000000
      7.600902
      0.000000
      0
      1
      0.0
    
    
      2
      307861
      1
      1
      32
      1674
      1
      0
      42
      2.0
      1
      ...
      7.0
      49.0
      1764
      3078611
      8.006368
      8.006368
      7.478150
      1
      0
      7.0
    
    
      3
      307861
      1
      2
      32
      1674
      2
      1
      44
      7.0
      1
      ...
      17.0
      289.0
      1936
      3078611
      7.937375
      7.937375
      7.991038
      1
      0
      17.0
    
    
      4
      307861
      1
      3
      32
      1674
      3
      0
      13
      4.0
      2
      ...
      7.0
      49.0
      169
      3078611
      0.000000
      0.000000
      0.000000
      0
      1
      0.0
    
  

5 rows × 36 columns



In [20]:

    
#crear cantidad de activos en el hogar
cantidadActivos = data.job.groupby(by=data['id']).sum()
#crear cantidad de inactivos en el hogar
cantidadInactivos = data.noJob.groupby(by=data['id']).sum()
#anos de escolaridad puestos a trabajar en el hogar
schoolAndJob = data.schoolAndJob.groupby(by=data['id']).sum()



In [21]:

    
dfJobsAndEduc = pd.merge(left = schoolAndJob.to_frame() ,
                         right = cantidadInactivos.to_frame(),
                         left_index = True,
                         right_index = True)

dfJobsAndEduc = pd.merge(left = dfJobsAndEduc ,
                         right = cantidadActivos.to_frame(),
                         left_index = True,
                         right_index = True)


print dfJobsAndEduc.shape
dfJobsAndEduc.head()









    



(2703, 3)






    Out[21]:






  
    
      
      schoolAndJob
      noJob
      job
    
    
      id
      
      
      
    
  
  
    
      1250971
      40.0
      1
      3
    
    
      1253451
      12.0
      1
      1
    
    
      1254461
      0.0
      1
      0
    
    
      1256691
      14.0
      2
      2
    
    
      1256892
      24.0
      1
      2

crear dataset pivoteado para mergear con hogar



In [24]:

    
cleanData = data.copy().loc[(headAndSpouse),
                              ['id',
                               'AGLOMERADO',
                            'familyRelation',
                              'age',
                              'age2',
                              'female',
                              'education',
                              'education2',
                              'primary',
                              'secondary',
                              'university',
                              'P21',
                              'P47T',
                              'lnIncome',
                               u'lnIncomeT',
                              'predictedLnIncome',
                              'job',
                              'DECCFR',
                               'DECIFR',
                               'maritalStatus',
                               'reading',
                               'placeOfBirth',
                              ]]



In [25]:

    
cleanData.head()









    Out[25]:






  
    
      
      id
      AGLOMERADO
      familyRelation
      age
      age2
      female
      education
      education2
      primary
      secondary
      ...
      P47T
      lnIncome
      lnIncomeT
      predictedLnIncome
      job
      DECCFR
      DECIFR
      maritalStatus
      reading
      placeOfBirth
    
  
  
    
      0
      3024681
      32
      1
      20
      400
      1
      13.0
      169.0
      7.0
      5.0
      ...
      2000
      0.000000
      7.600902
      0.000000
      0
      8
      6
      5
      1
      1
    
    
      2
      3078611
      32
      1
      42
      1764
      0
      7.0
      49.0
      7.0
      0.0
      ...
      3000
      8.006368
      8.006368
      7.478150
      1
      6
      8
      2
      1
      2
    
    
      3
      3078611
      32
      2
      44
      1936
      1
      17.0
      289.0
      7.0
      5.0
      ...
      2800
      7.937375
      7.937375
      7.991038
      1
      6
      8
      2
      1
      2
    
    
      6
      3087621
      32
      1
      68
      4624
      1
      17.0
      289.0
      7.0
      5.0
      ...
      3200
      0.000000
      8.070906
      0.000000
      0
      9
      5
      4
      1
      3
    
    
      7
      3082781
      32
      1
      38
      1444
      0
      17.0
      289.0
      7.0
      5.0
      ...
      6000
      8.517193
      8.699515
      8.485189
      1
      10
      10
      2
      1
      1
    
  

5 rows × 22 columns



In [26]:

    
pivot = cleanData.pivot(index='id', columns='familyRelation')
pivot.head()









    Out[26]:






  
    
      
      AGLOMERADO
      age
      age2
      female
      education
      ...
      DECCFR
      DECIFR
      maritalStatus
      reading
      placeOfBirth
    
    
      familyRelation
      1
      2
      1
      2
      1
      2
      1
      2
      1
      2
      ...
      1
      2
      1
      2
      1
      2
      1
      2
      1
      2
    
    
      id
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1250971
      33.0
      NaN
      57.0
      NaN
      3249.0
      NaN
      1.0
      NaN
      14.0
      NaN
      ...
      8.0
      NaN
      9.0
      NaN
      4.0
      NaN
      1.0
      NaN
      1.0
      NaN
    
    
      1253451
      32.0
      NaN
      66.0
      NaN
      4356.0
      NaN
      1.0
      NaN
      7.0
      NaN
      ...
      8.0
      NaN
      6.0
      NaN
      3.0
      NaN
      1.0
      NaN
      1.0
      NaN
    
    
      1254461
      32.0
      NaN
      79.0
      NaN
      6241.0
      NaN
      1.0
      NaN
      12.0
      NaN
      ...
      7.0
      NaN
      2.0
      NaN
      4.0
      NaN
      1.0
      NaN
      1.0
      NaN
    
    
      1256691
      33.0
      NaN
      61.0
      NaN
      3721.0
      NaN
      0.0
      NaN
      7.0
      NaN
      ...
      8.0
      NaN
      9.0
      NaN
      4.0
      NaN
      1.0
      NaN
      1.0
      NaN
    
    
      1256892
      32.0
      32.0
      31.0
      23.0
      961.0
      529.0
      0.0
      1.0
      12.0
      12.0
      ...
      5.0
      5.0
      5.0
      5.0
      1.0
      1.0
      1.0
      1.0
      4.0
      4.0
    
  

5 rows × 40 columns



In [27]:

    
print pivot.shape
pivot.columns









    



(2703, 40)






    Out[27]:





MultiIndex(levels=[[u'AGLOMERADO', u'age', u'age2', u'female', u'education', u'education2', u'primary', u'secondary', u'university', u'P21', u'P47T', u'lnIncome', u'lnIncomeT', u'predictedLnIncome', u'job', u'DECCFR', u'DECIFR', u'maritalStatus', u'reading', u'placeOfBirth'], [1, 2]],
           labels=[[0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19], [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]],
           names=[None, u'familyRelation'])



In [28]:

    
pivot.to_csv('data/pivotInd.csv')



In [29]:

    
#para leer en otro archivo
dataN = pd.read_csv('data/pivotInd.csv', names = ['id','AGLO1','AGLO2','headAge','spouseAge','headAge2','spouseAge2',
                                                  'headFemale','spouseFemale','headEduc','spouseEduc',
                                                  'headEduc2','spouseEduc2','headPrimary','spousePrimary',
                                                  'headSecondary','spouseSecondary','headUniversity','spouseUniversity',
                                                  'headP21','spouseP21','headP47T','spouseP47T',
                                                  'headLnIncome','spouseLnIncome','headLnIncomeT','spouseLnIncomeT',
                                                  'headPredictedLnIncome','spousePredictedLnIncome','headJob','spouseJob',
                                                  'headDECCFR','spouseDECCFR','headDECIFR','spouseDECIFR',
                                                  'headMaritalStatus','spouseMaritalStatus',
                                                  'headReading','spouseReading','headPlaceOfBirth','spouseplaceOfBirth',
                                                 ],skiprows = 3)
dataN.head()









    Out[29]:






  
    
      
      id
      AGLO1
      AGLO2
      headAge
      spouseAge
      headAge2
      spouseAge2
      headFemale
      spouseFemale
      headEduc
      ...
      headDECCFR
      spouseDECCFR
      headDECIFR
      spouseDECIFR
      headMaritalStatus
      spouseMaritalStatus
      headReading
      spouseReading
      headPlaceOfBirth
      spouseplaceOfBirth
    
  
  
    
      0
      1250971
      33.0
      NaN
      57.0
      NaN
      3249.0
      NaN
      1.0
      NaN
      14.0
      ...
      8.0
      NaN
      9.0
      NaN
      4.0
      NaN
      1.0
      NaN
      1.0
      NaN
    
    
      1
      1253451
      32.0
      NaN
      66.0
      NaN
      4356.0
      NaN
      1.0
      NaN
      7.0
      ...
      8.0
      NaN
      6.0
      NaN
      3.0
      NaN
      1.0
      NaN
      1.0
      NaN
    
    
      2
      1254461
      32.0
      NaN
      79.0
      NaN
      6241.0
      NaN
      1.0
      NaN
      12.0
      ...
      7.0
      NaN
      2.0
      NaN
      4.0
      NaN
      1.0
      NaN
      1.0
      NaN
    
    
      3
      1256691
      33.0
      NaN
      61.0
      NaN
      3721.0
      NaN
      0.0
      NaN
      7.0
      ...
      8.0
      NaN
      9.0
      NaN
      4.0
      NaN
      1.0
      NaN
      1.0
      NaN
    
    
      4
      1256892
      32.0
      32.0
      31.0
      23.0
      961.0
      529.0
      0.0
      1.0
      12.0
      ...
      5.0
      5.0
      5.0
      5.0
      1.0
      1.0
      1.0
      1.0
      4.0
      4.0
    
  

5 rows × 41 columns



In [30]:

    
dfJobsAndEduc['id'] = dfJobsAndEduc.index
dfJobsAndEduc['id'] = dfJobsAndEduc['id'].astype(int)



In [31]:

    
dataN.head()









    Out[31]:






  
    
      
      id
      AGLO1
      AGLO2
      headAge
      spouseAge
      headAge2
      spouseAge2
      headFemale
      spouseFemale
      headEduc
      ...
      headDECCFR
      spouseDECCFR
      headDECIFR
      spouseDECIFR
      headMaritalStatus
      spouseMaritalStatus
      headReading
      spouseReading
      headPlaceOfBirth
      spouseplaceOfBirth
    
  
  
    
      0
      1250971
      33.0
      NaN
      57.0
      NaN
      3249.0
      NaN
      1.0
      NaN
      14.0
      ...
      8.0
      NaN
      9.0
      NaN
      4.0
      NaN
      1.0
      NaN
      1.0
      NaN
    
    
      1
      1253451
      32.0
      NaN
      66.0
      NaN
      4356.0
      NaN
      1.0
      NaN
      7.0
      ...
      8.0
      NaN
      6.0
      NaN
      3.0
      NaN
      1.0
      NaN
      1.0
      NaN
    
    
      2
      1254461
      32.0
      NaN
      79.0
      NaN
      6241.0
      NaN
      1.0
      NaN
      12.0
      ...
      7.0
      NaN
      2.0
      NaN
      4.0
      NaN
      1.0
      NaN
      1.0
      NaN
    
    
      3
      1256691
      33.0
      NaN
      61.0
      NaN
      3721.0
      NaN
      0.0
      NaN
      7.0
      ...
      8.0
      NaN
      9.0
      NaN
      4.0
      NaN
      1.0
      NaN
      1.0
      NaN
    
    
      4
      1256892
      32.0
      32.0
      31.0
      23.0
      961.0
      529.0
      0.0
      1.0
      12.0
      ...
      5.0
      5.0
      5.0
      5.0
      1.0
      1.0
      1.0
      1.0
      4.0
      4.0
    
  

5 rows × 41 columns



In [32]:

    
print type(dataN['id'][0]),type(dfJobsAndEduc['id'][0])









    



<type 'numpy.int64'> <type 'numpy.int64'>



In [33]:

    
dataFinalCSV = pd.merge(left = dfJobsAndEduc ,
                         right = dataN,
                         left_on = 'id',
                         right_on = 'id')
print dataFinalCSV.shape
dataFinalCSV.head()









    



(2703, 44)






    Out[33]:






  
    
      
      schoolAndJob
      noJob
      job
      id
      AGLO1
      AGLO2
      headAge
      spouseAge
      headAge2
      spouseAge2
      ...
      headDECCFR
      spouseDECCFR
      headDECIFR
      spouseDECIFR
      headMaritalStatus
      spouseMaritalStatus
      headReading
      spouseReading
      headPlaceOfBirth
      spouseplaceOfBirth
    
  
  
    
      0
      40.0
      1
      3
      1250971
      33.0
      NaN
      57.0
      NaN
      3249.0
      NaN
      ...
      8.0
      NaN
      9.0
      NaN
      4.0
      NaN
      1.0
      NaN
      1.0
      NaN
    
    
      1
      12.0
      1
      1
      1253451
      32.0
      NaN
      66.0
      NaN
      4356.0
      NaN
      ...
      8.0
      NaN
      6.0
      NaN
      3.0
      NaN
      1.0
      NaN
      1.0
      NaN
    
    
      2
      0.0
      1
      0
      1254461
      32.0
      NaN
      79.0
      NaN
      6241.0
      NaN
      ...
      7.0
      NaN
      2.0
      NaN
      4.0
      NaN
      1.0
      NaN
      1.0
      NaN
    
    
      3
      14.0
      2
      2
      1256691
      33.0
      NaN
      61.0
      NaN
      3721.0
      NaN
      ...
      8.0
      NaN
      9.0
      NaN
      4.0
      NaN
      1.0
      NaN
      1.0
      NaN
    
    
      4
      24.0
      1
      2
      1256892
      32.0
      32.0
      31.0
      23.0
      961.0
      529.0
      ...
      5.0
      5.0
      5.0
      5.0
      1.0
      1.0
      1.0
      1.0
      4.0
      4.0
    
  

5 rows × 44 columns



In [34]:

    
dataFinalCSV.to_csv('data/pivotInd.csv',index=False)



In [35]:

    
caca = pd.read_csv('data/pivotInd.csv')
caca.head()









    Out[35]:






  
    
      
      schoolAndJob
      noJob
      job
      id
      AGLO1
      AGLO2
      headAge
      spouseAge
      headAge2
      spouseAge2
      ...
      headDECCFR
      spouseDECCFR
      headDECIFR
      spouseDECIFR
      headMaritalStatus
      spouseMaritalStatus
      headReading
      spouseReading
      headPlaceOfBirth
      spouseplaceOfBirth
    
  
  
    
      0
      40.0
      1
      3
      1250971
      33.0
      NaN
      57.0
      NaN
      3249.0
      NaN
      ...
      8.0
      NaN
      9.0
      NaN
      4.0
      NaN
      1.0
      NaN
      1.0
      NaN
    
    
      1
      12.0
      1
      1
      1253451
      32.0
      NaN
      66.0
      NaN
      4356.0
      NaN
      ...
      8.0
      NaN
      6.0
      NaN
      3.0
      NaN
      1.0
      NaN
      1.0
      NaN
    
    
      2
      0.0
      1
      0
      1254461
      32.0
      NaN
      79.0
      NaN
      6241.0
      NaN
      ...
      7.0
      NaN
      2.0
      NaN
      4.0
      NaN
      1.0
      NaN
      1.0
      NaN
    
    
      3
      14.0
      2
      2
      1256691
      33.0
      NaN
      61.0
      NaN
      3721.0
      NaN
      ...
      8.0
      NaN
      9.0
      NaN
      4.0
      NaN
      1.0
      NaN
      1.0
      NaN
    
    
      4
      24.0
      1
      2
      1256892
      32.0
      32.0
      31.0
      23.0
      961.0
      529.0
      ...
      5.0
      5.0
      5.0
      5.0
      1.0
      1.0
      1.0
      1.0
      4.0
      4.0
    
  

5 rows × 44 columns



In [ ]:

    
np.where(np.isnan(pivot.values),0,pivot.values)

dataN = dataN.dropna(axis = 0)
dataN.head()



In [41]:

    
dataN.shape









    Out[41]:





(2703, 39)



In [ ]:

	CODUSU	NRO_HOGAR	COMPONENTE	predictedLnIncome
2	307861	1	1	7.478150
3	307861	1	2	7.991038
7	308278	1	1	8.485189
8	308278	1	2	7.750667
9	311937	1	1	8.336136

	CODUSU	NRO_HOGAR	COMPONENTE	AGLOMERADO	PONDERA	familyRelation	female	age	schoolLevel	finishedYear	...	education	education2	age2	id	lnIncome	lnIncomeT	predictedLnIncome	job	noJob	schoolAndJob
0	302468	1	1	32	1287	1	1	20	7.0	2	...	13.0	169.0	400	3024681	0.000000	7.600902	0.000000	0	1	0.0
1	302468	1	2	32	1287	10	1	20	6.0	2	...	13.0	169.0	400	3024681	0.000000	7.600902	0.000000	0	1	0.0
2	307861	1	1	32	1674	1	0	42	2.0	1	...	7.0	49.0	1764	3078611	8.006368	8.006368	7.478150	1	0	7.0
3	307861	1	2	32	1674	2	1	44	7.0	1	...	17.0	289.0	1936	3078611	7.937375	7.937375	7.991038	1	0	17.0
4	307861	1	3	32	1674	3	0	13	4.0	2	...	7.0	49.0	169	3078611	0.000000	0.000000	0.000000	0	1	0.0

	schoolAndJob	noJob	job
id
1250971	40.0	1	3
1253451	12.0	1	1
1254461	0.0	1	0
1256691	14.0	2	2
1256892	24.0	1	2

	AGLOMERADO		age		age2		female		education		...	DECCFR		DECIFR		maritalStatus		reading		placeOfBirth
familyRelation	1	2	1	2	1	2	1	2	1	2	...	1	2	1	2	1	2	1	2	1	2
id
1250971	33.0	NaN	57.0	NaN	3249.0	NaN	1.0	NaN	14.0	NaN	...	8.0	NaN	9.0	NaN	4.0	NaN	1.0	NaN	1.0	NaN
1253451	32.0	NaN	66.0	NaN	4356.0	NaN	1.0	NaN	7.0	NaN	...	8.0	NaN	6.0	NaN	3.0	NaN	1.0	NaN	1.0	NaN
1254461	32.0	NaN	79.0	NaN	6241.0	NaN	1.0	NaN	12.0	NaN	...	7.0	NaN	2.0	NaN	4.0	NaN	1.0	NaN	1.0	NaN
1256691	33.0	NaN	61.0	NaN	3721.0	NaN	0.0	NaN	7.0	NaN	...	8.0	NaN	9.0	NaN	4.0	NaN	1.0	NaN	1.0	NaN
1256892	32.0	32.0	31.0	23.0	961.0	529.0	0.0	1.0	12.0	12.0	...	5.0	5.0	5.0	5.0	1.0	1.0	1.0	1.0	4.0	4.0