In [1]:
import pandas as pd
import numpy as np
from getEPH import *
import os
In [2]:
getEPHdbf('t310')
('Downloading', 't310')
file in place, creating CSV file
csv file cleanData t310 .csv successfully created in folder data/
In [30]:
data = pd.read_csv('data/cleanDatat310.csv')
data.head()
Out[30]:
CODUSU
NRO_HOGAR
COMPONENTE
AGLOMERADO
PONDERA
familyRelation
female
age
schoolLevel
finishedYear
lastYear
activity
empCond
unempCond
ITF
IPCF
P47T
0
302468
1
1
32
1287
1
2
20
7
2
1.0
3
0
3
4000
2000.0
2000
1
302468
1
2
32
1287
10
2
20
6
2
1.0
3
0
3
4000
2000.0
2000
2
307861
1
1
32
1674
1
1
42
2
1
NaN
1
3
0
5800
1450.0
3000
3
307861
1
2
32
1674
2
2
44
7
1
NaN
1
3
0
5800
1450.0
2800
4
307861
1
3
32
1674
3
1
13
4
2
0.0
3
0
3
5800
1450.0
0
In [29]:
def categorize(df):
df.female = (df.female == 2).astype(int)
df.schoolLevel.replace(to_replace=[99], value=[np.nan] , inplace=True, axis=None)
df.lastYear.replace(to_replace=[98,99], value=[np.nan, np.nan] , inplace=True, axis=None)
df.activity.replace(to_replace=[0], value=[np.nan] , inplace=True, axis=None)
df.empCond.replace(to_replace=[0], value=[np.nan] , inplace=True, axis=None)
df.unempCond.replace(to_replace=[0], value=[np.nan] , inplace=True, axis=None)
return df
In [32]:
categorize(data)
Out[32]:
CODUSU
NRO_HOGAR
COMPONENTE
AGLOMERADO
PONDERA
familyRelation
female
age
schoolLevel
finishedYear
lastYear
activity
empCond
unempCond
ITF
IPCF
P47T
0
302468
1
1
32
1287
1
True
20
7.0
2
1.0
3.0
NaN
3.0
4000
2000.00
2000
1
302468
1
2
32
1287
10
True
20
6.0
2
1.0
3.0
NaN
3.0
4000
2000.00
2000
2
307861
1
1
32
1674
1
False
42
2.0
1
NaN
1.0
3.0
NaN
5800
1450.00
3000
3
307861
1
2
32
1674
2
True
44
7.0
1
NaN
1.0
3.0
NaN
5800
1450.00
2800
4
307861
1
3
32
1674
3
False
13
4.0
2
0.0
3.0
NaN
3.0
5800
1450.00
0
5
307861
1
4
32
1674
3
False
8
2.0
2
2.0
4.0
NaN
3.0
5800
1450.00
0
6
308762
1
1
32
1522
1
True
68
7.0
1
NaN
3.0
NaN
1.0
3200
3200.00
3200
7
308278
1
1
32
1320
1
False
38
8.0
1
NaN
1.0
1.0
NaN
10000
5000.00
6000
8
308278
1
2
32
1320
2
True
28
7.0
1
NaN
1.0
3.0
NaN
10000
5000.00
4000
9
311937
1
1
32
1281
1
False
63
7.0
1
NaN
1.0
3.0
NaN
11000
2750.00
3800
10
311937
1
2
32
1281
2
True
63
7.0
1
NaN
3.0
NaN
1.0
11000
2750.00
2500
11
311937
1
3
32
1281
3
False
39
7.0
2
3.0
1.0
3.0
NaN
11000
2750.00
4700
12
311937
1
4
32
1281
4
True
30
7.0
2
4.0
3.0
NaN
3.0
11000
2750.00
0
13
311356
1
1
32
1348
1
False
46
4.0
1
NaN
1.0
3.0
NaN
7000
1750.00
6300
14
311356
1
2
32
1348
2
True
48
2.0
1
NaN
1.0
3.0
NaN
7000
1750.00
700
15
311356
1
3
32
1348
3
False
17
4.0
2
4.0
3.0
NaN
3.0
7000
1750.00
0
16
311356
1
4
32
1348
3
True
13
4.0
2
0.0
3.0
NaN
3.0
7000
1750.00
0
17
156300
1
1
32
1174
1
False
49
7.0
1
NaN
1.0
3.0
NaN
11850
2962.50
8400
18
156300
1
2
32
1174
2
True
48
7.0
1
NaN
3.0
NaN
4.0
11850
2962.50
0
19
156300
1
3
32
1174
3
False
24
7.0
1
NaN
1.0
3.0
NaN
11850
2962.50
1450
20
156300
1
4
32
1174
3
False
20
7.0
2
NaN
1.0
3.0
NaN
11850
2962.50
2000
21
255481
1
1
32
1553
1
False
72
7.0
1
NaN
3.0
NaN
1.0
8835
4417.50
1335
22
255481
1
2
32
1553
2
True
65
7.0
1
NaN
1.0
3.0
NaN
8835
4417.50
7500
23
228731
1
1
32
1553
1
True
84
7.0
1
NaN
3.0
NaN
1.0
2300
2300.00
2300
24
259897
1
1
32
1259
1
True
57
7.0
1
NaN
3.0
NaN
2.0
6700
2233.33
3000
25
259897
1
3
32
1259
3
False
26
7.0
2
4.0
1.0
3.0
NaN
6700
2233.33
3700
26
259897
1
4
32
1259
3
True
21
7.0
2
1.0
3.0
NaN
3.0
6700
2233.33
0
27
259897
2
51
32
1259
1
True
45
6.0
1
NaN
1.0
3.0
NaN
0
0.00
0
28
285841
2
51
32
1311
1
True
45
2.0
1
NaN
1.0
3.0
NaN
1800
1800.00
1800
29
178057
1
1
32
1662
1
False
68
7.0
1
NaN
3.0
NaN
1.0
1940
970.00
1400
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
8330
131543
1
1
33
2562
1
True
64
2.0
2
NaN
1.0
3.0
NaN
5950
743.75
1400
8331
131543
1
2
33
2562
3
False
41
2.0
1
NaN
1.0
2.0
NaN
5950
743.75
1300
8332
131543
1
3
33
2562
5
False
18
3.0
2
NaN
2.0
3.0
NaN
5950
743.75
2000
8333
131543
1
4
33
2562
9
True
19
3.0
2
NaN
1.0
3.0
NaN
5950
743.75
0
8334
131543
1
5
33
2562
9
False
2
0.0
0
NaN
4.0
NaN
5.0
5950
743.75
0
8335
131543
1
6
33
2562
10
True
37
2.0
1
NaN
1.0
3.0
NaN
5950
743.75
650
8336
131543
1
7
33
2562
9
False
25
4.0
2
2.0
1.0
3.0
NaN
5950
743.75
600
8337
131543
1
8
33
2562
9
False
1
0.0
0
NaN
4.0
NaN
5.0
5950
743.75
0
8338
209368
1
1
33
2161
1
True
55
0.0
0
NaN
1.0
2.0
NaN
4544
757.33
2000
8339
209368
1
3
33
2161
3
False
23
4.0
1
NaN
2.0
2.0
NaN
4544
757.33
400
8340
209368
1
4
33
2161
3
False
20
5.0
2
0.0
1.0
3.0
NaN
4544
757.33
2000
8341
209368
1
5
33
2161
3
False
18
3.0
2
8.0
2.0
3.0
NaN
4544
757.33
0
8342
209368
1
6
33
2161
3
True
15
3.0
2
8.0
3.0
NaN
3.0
4544
757.33
144
8343
209368
1
7
33
2161
3
True
13
3.0
2
6.0
3.0
NaN
3.0
4544
757.33
0
8344
292460
1
1
33
2161
1
False
31
4.0
2
3.0
1.0
2.0
NaN
2488
497.60
1200
8345
292460
1
2
33
2161
2
True
31
2.0
1
NaN
1.0
3.0
NaN
2488
497.60
1288
8346
292460
1
3
33
2161
3
False
11
2.0
2
6.0
3.0
NaN
3.0
2488
497.60
0
8347
292460
1
4
33
2161
3
True
6
2.0
2
0.0
4.0
NaN
3.0
2488
497.60
0
8348
292460
1
5
33
2161
3
True
2
0.0
0
NaN
4.0
NaN
5.0
2488
497.60
0
8349
153032
1
1
33
2183
1
False
44
2.0
1
NaN
1.0
3.0
NaN
4800
960.00
2500
8350
153032
1
2
33
2183
2
True
44
4.0
2
4.0
3.0
NaN
4.0
4800
960.00
300
8351
153032
1
3
33
2183
3
True
26
6.0
1
NaN
1.0
3.0
NaN
4800
960.00
2000
8352
153032
1
4
33
2183
3
True
13
3.0
2
7.0
3.0
NaN
3.0
4800
960.00
0
8353
153032
1
5
33
2183
3
True
16
5.0
2
1.0
3.0
NaN
3.0
4800
960.00
0
8354
288994
1
1
33
2183
1
False
27
4.0
1
NaN
1.0
3.0
NaN
3765
1255.00
2500
8355
288994
1
2
33
2183
2
True
24
4.0
1
NaN
2.0
3.0
NaN
3765
1255.00
1265
8356
288994
1
3
33
2183
3
True
1
0.0
0
NaN
4.0
NaN
5.0
3765
1255.00
0
8357
279097
1
1
33
2403
1
False
58
2.0
2
2.0
1.0
3.0
NaN
780
390.00
280
8358
279097
1
2
33
2403
10
False
64
2.0
2
NaN
1.0
3.0
NaN
780
390.00
500
8359
174584
1
1
33
2369
1
False
61
2.0
1
NaN
3.0
NaN
1.0
890
890.00
890
8360 rows × 17 columns
In [ ]:
In [ ]:
Content source: alephcero/adsProject
Similar notebooks: