In [8]:
import pandas as pd
In [20]:
surveys_df = pd.read_csv('./data/surveys.csv')
In [22]:
type(surveys_df)
Out[22]:
pandas.core.frame.DataFrame
In [27]:
surveys_df.dtypes
Out[27]:
record_id int64
month int64
day int64
year int64
plot_id int64
species_id object
sex object
hindfoot_length float64
weight float64
dtype: object
In [14]:
surveys_df.tail()
Out[14]:
record_id
month
day
year
plot_id
species_id
sex
hindfoot_length
weight
35544
35545
12
31
2002
15
AH
NaN
NaN
NaN
35545
35546
12
31
2002
15
AH
NaN
NaN
NaN
35546
35547
12
31
2002
10
RM
F
15.0
14.0
35547
35548
12
31
2002
7
DO
M
36.0
51.0
35548
35549
12
31
2002
5
NaN
NaN
NaN
NaN
In [32]:
surveys_df.columns
Out[32]:
Index(['record_id', 'month', 'day', 'year', 'plot_id', 'species_id', 'sex',
'hindfoot_length', 'weight'],
dtype='object')
In [31]:
surveys_df.shape
Out[31]:
(35549, 9)
In [39]:
surveys_df['species_id'].unique()
Out[39]:
array(['NL', 'DM', 'PF', 'PE', 'DS', 'PP', 'SH', 'OT', 'DO', 'OX', 'SS',
'OL', 'RM', nan, 'SA', 'PM', 'AH', 'DX', 'AB', 'CB', 'CM', 'CQ',
'RF', 'PC', 'PG', 'PH', 'PU', 'CV', 'UR', 'UP', 'ZL', 'UL', 'CS',
'SC', 'BA', 'SF', 'RO', 'AS', 'SO', 'PI', 'ST', 'CU', 'SU', 'RX',
'PB', 'PL', 'PX', 'CT', 'US'], dtype=object)
In [41]:
surveys_df['species_id'].describe()
Out[41]:
count 34786
unique 48
top DM
freq 10596
Name: species_id, dtype: object
In [48]:
sorted_data = surveys_df.groupby('sex')
type(sorted_data)
Out[48]:
pandas.core.groupby.DataFrameGroupBy
In [51]:
sorted_data.describe().stack()
Out[51]:
day
hindfoot_length
month
plot_id
record_id
weight
year
sex
F
count
15690.000000
14894.000000
15690.000000
15690.000000
15690.000000
15303.000000
15690.000000
mean
16.007138
28.836780
6.583047
11.440854
18036.412046
42.170555
1990.644997
std
8.271144
9.463789
3.367350
6.870684
10423.089000
36.847958
7.598725
min
1.000000
7.000000
1.000000
1.000000
3.000000
4.000000
1977.000000
25%
9.000000
21.000000
4.000000
5.000000
8917.500000
20.000000
1984.000000
50%
16.000000
27.000000
7.000000
12.000000
18075.500000
34.000000
1990.000000
75%
23.000000
36.000000
10.000000
17.000000
27250.000000
46.000000
1997.000000
max
31.000000
64.000000
12.000000
24.000000
35547.000000
274.000000
2002.000000
M
count
17348.000000
16476.000000
17348.000000
17348.000000
17348.000000
16879.000000
17348.000000
mean
16.184286
29.709578
6.392668
11.098282
17754.835601
42.995379
1990.480401
std
8.199274
9.629246
3.420806
6.728713
10132.203323
36.184981
7.403655
min
1.000000
2.000000
1.000000
1.000000
1.000000
4.000000
1977.000000
25%
9.000000
21.000000
3.000000
5.000000
8969.750000
20.000000
1984.000000
50%
16.000000
34.000000
6.000000
11.000000
17727.500000
39.000000
1990.000000
75%
23.000000
36.000000
9.000000
17.000000
26454.250000
49.000000
1997.000000
max
31.000000
58.000000
12.000000
24.000000
35548.000000
280.000000
2002.000000
In [52]:
sorted_data.mean()
Out[52]:
record_id
month
day
year
plot_id
hindfoot_length
weight
sex
F
18036.412046
6.583047
16.007138
1990.644997
11.440854
28.836780
42.170555
M
17754.835601
6.392668
16.184286
1990.480401
11.098282
29.709578
42.995379
In [54]:
surveys_df.groupby(['plot_id', 'sex']).describe().stack()
Out[54]:
day
hindfoot_length
month
record_id
weight
year
plot_id
sex
1
F
count
848.000000
808.000000
848.000000
848.000000
826.000000
848.000000
mean
15.338443
31.733911
6.597877
18390.384434
46.311138
1990.933962
std
8.325993
8.894939
3.366246
10469.790852
33.240958
7.678171
min
1.000000
14.000000
1.000000
9.000000
5.000000
1977.000000
25%
9.000000
22.000000
4.000000
8783.500000
26.000000
1983.000000
50%
15.000000
34.000000
7.000000
19182.500000
40.000000
1991.000000
75%
22.000000
36.000000
10.000000
27691.750000
50.000000
1998.000000
max
31.000000
64.000000
12.000000
35393.000000
196.000000
2002.000000
M
count
1095.000000
1047.000000
1095.000000
1095.000000
1072.000000
1095.000000
mean
15.905936
34.302770
6.121461
17197.740639
55.950560
1990.091324
std
8.053257
8.979955
3.418795
9911.570595
41.035686
7.265208
min
1.000000
12.000000
1.000000
6.000000
4.000000
1977.000000
25%
9.000000
32.000000
3.000000
8638.000000
37.000000
1983.000000
50%
16.000000
36.000000
6.000000
17043.000000
46.000000
1990.000000
75%
23.000000
37.000000
9.000000
25251.500000
54.000000
1997.000000
max
31.000000
57.000000
12.000000
35390.000000
231.000000
2002.000000
2
F
count
970.000000
918.000000
970.000000
970.000000
954.000000
970.000000
mean
16.288660
30.161220
6.426804
17714.753608
52.561845
1990.449485
std
8.046509
8.677937
3.537694
10300.015076
45.547697
7.519910
min
1.000000
14.000000
1.000000
3.000000
5.000000
1977.000000
25%
10.000000
21.000000
3.000000
9580.250000
25.000000
1984.000000
50%
16.000000
33.000000
6.000000
18104.500000
40.000000
1990.000000
75%
23.000000
36.000000
10.000000
26586.500000
51.000000
1997.000000
max
31.000000
57.000000
12.000000
35405.000000
274.000000
2002.000000
M
count
1144.000000
1077.000000
1144.000000
1144.000000
1114.000000
1144.000000
mean
15.440559
30.353760
6.340035
18085.458042
51.391382
1990.756119
std
8.414667
9.016312
3.623430
10555.331260
46.690887
7.714444
min
1.000000
13.000000
1.000000
1.000000
5.000000
1977.000000
25%
9.000000
21.000000
3.000000
8653.000000
24.000000
1983.000000
50%
15.000000
33.000000
6.000000
19024.500000
42.000000
1991.000000
...
...
...
...
...
...
...
...
...
23
F
std
8.776973
6.455268
3.353006
8854.378716
18.933945
6.454297
min
1.000000
14.000000
1.000000
41.000000
8.000000
1977.000000
25%
9.000000
16.000000
4.000000
9536.000000
12.000000
1984.000000
50%
15.000000
18.000000
7.000000
14692.000000
16.000000
1988.000000
75%
24.000000
20.000000
10.000000
21169.000000
23.000000
1993.000000
max
30.000000
52.000000
12.000000
35489.000000
199.000000
2002.000000
M
count
207.000000
200.000000
207.000000
207.000000
205.000000
207.000000
mean
16.077295
19.850000
6.391304
17091.338164
18.941463
1989.961353
std
7.955203
5.980496
3.543971
8852.413083
17.979740
6.509027
min
1.000000
14.000000
1.000000
55.000000
4.000000
1977.000000
25%
10.000000
16.000000
3.000000
11274.000000
10.000000
1986.000000
50%
16.000000
18.000000
6.000000
15693.000000
12.000000
1989.000000
75%
23.000000
20.250000
10.000000
24740.500000
22.000000
1996.000000
max
31.000000
50.000000
12.000000
35282.000000
131.000000
2002.000000
24
F
count
486.000000
453.000000
486.000000
486.000000
479.000000
486.000000
mean
16.393004
26.993377
6.596708
13702.224280
47.914405
1987.485597
std
8.367578
8.561462
3.327782
8692.118528
49.112574
6.340412
min
1.000000
12.000000
1.000000
1963.000000
6.000000
1979.000000
25%
9.000000
20.000000
4.000000
7024.750000
21.000000
1982.000000
50%
16.000000
22.000000
7.000000
11560.500000
33.000000
1986.000000
75%
24.000000
35.000000
10.000000
19442.000000
44.000000
1991.000000
max
31.000000
52.000000
12.000000
35283.000000
251.000000
2002.000000
M
count
485.000000
446.000000
485.000000
485.000000
479.000000
485.000000
mean
16.971134
25.786996
6.360825
15208.136082
39.321503
1988.641237
std
8.431738
8.350303
3.452708
9395.610252
42.003947
6.825992
min
1.000000
12.000000
1.000000
2063.000000
4.000000
1979.000000
25%
10.000000
19.000000
3.000000
6992.000000
17.000000
1982.000000
50%
17.000000
21.000000
6.000000
12918.000000
24.000000
1987.000000
75%
24.000000
35.000000
10.000000
22841.000000
45.000000
1995.000000
max
31.000000
51.000000
12.000000
35479.000000
230.000000
2002.000000
384 rows × 6 columns
In [62]:
surveys_df.groupby('sex').sum().stack()
Out[62]:
sex
F record_id 282991305.0
month 103288.0
day 251152.0
year 31233220.0
plot_id 179507.0
hindfoot_length 429495.0
weight 645336.0
M record_id 308010888.0
month 110900.0
day 280765.0
year 34530854.0
plot_id 192533.0
hindfoot_length 489495.0
weight 725719.0
dtype: float64
In [68]:
surveys_df.groupby('species_id').count()['record_id']['DO']
Out[68]:
3027
In [74]:
surveys_df['weight'] * 100
Out[74]:
0 NaN
1 NaN
2 NaN
3 NaN
4 NaN
5 NaN
6 NaN
7 NaN
8 NaN
9 NaN
10 NaN
11 NaN
12 NaN
13 NaN
14 NaN
15 NaN
16 NaN
17 NaN
18 NaN
19 NaN
20 NaN
21 NaN
22 NaN
23 NaN
24 NaN
25 NaN
26 NaN
27 NaN
28 NaN
29 NaN
...
35519 3600.0
35520 4800.0
35521 4500.0
35522 4400.0
35523 2700.0
35524 2600.0
35525 2400.0
35526 4300.0
35527 NaN
35528 2500.0
35529 NaN
35530 NaN
35531 4300.0
35532 4800.0
35533 5600.0
35534 5300.0
35535 4200.0
35536 4600.0
35537 3100.0
35538 6800.0
35539 2300.0
35540 3100.0
35541 2900.0
35542 3400.0
35543 NaN
35544 NaN
35545 NaN
35546 1400.0
35547 5100.0
35548 NaN
Name: weight, Length: 35549, dtype: float64
In [81]:
surveys_df[['sex', 'species_id']]
Out[81]:
sex
species_id
0
M
NL
1
M
NL
2
F
DM
3
M
DM
4
M
DM
5
M
PF
6
F
PE
7
M
DM
8
F
DM
9
F
PF
10
F
DS
11
M
DM
12
M
DM
13
NaN
DM
14
F
DM
15
F
DM
16
F
DS
17
M
PP
18
NaN
PF
19
F
DS
20
F
DM
21
F
NL
22
M
DM
23
M
SH
24
M
DM
25
M
DM
26
M
DM
27
M
DM
28
M
PP
29
F
DS
...
...
...
35519
NaN
SF
35520
M
DM
35521
F
DM
35522
F
DM
35523
F
PB
35524
M
OL
35525
F
OT
35526
F
DO
35527
NaN
US
35528
F
PB
35529
F
OT
35530
F
PB
35531
F
DM
35532
F
DM
35533
M
DM
35534
M
DM
35535
F
DM
35536
F
DM
35537
F
PB
35538
M
SF
35539
F
PB
35540
F
PB
35541
F
PB
35542
F
PB
35543
NaN
US
35544
NaN
AH
35545
NaN
AH
35546
F
RM
35547
M
DO
35548
NaN
NaN
35549 rows × 2 columns
In [84]:
surveys_df[['plot_id', 'sex', 'species_id']]
Out[84]:
plot_id
sex
species_id
0
2
M
NL
1
3
M
NL
2
2
F
DM
3
7
M
DM
4
3
M
DM
5
1
M
PF
6
2
F
PE
7
1
M
DM
8
1
F
DM
9
6
F
PF
10
5
F
DS
11
7
M
DM
12
3
M
DM
13
8
NaN
DM
14
6
F
DM
15
4
F
DM
16
3
F
DS
17
2
M
PP
18
4
NaN
PF
19
11
F
DS
20
14
F
DM
21
15
F
NL
22
13
M
DM
23
13
M
SH
24
9
M
DM
25
15
M
DM
26
15
M
DM
27
11
M
DM
28
11
M
PP
29
10
F
DS
...
...
...
...
35519
9
NaN
SF
35520
9
M
DM
35521
9
F
DM
35522
9
F
DM
35523
9
F
PB
35524
9
M
OL
35525
8
F
OT
35526
13
F
DO
35527
13
NaN
US
35528
13
F
PB
35529
13
F
OT
35530
13
F
PB
35531
14
F
DM
35532
14
F
DM
35533
14
M
DM
35534
14
M
DM
35535
14
F
DM
35536
14
F
DM
35537
15
F
PB
35538
15
M
SF
35539
15
F
PB
35540
15
F
PB
35541
15
F
PB
35542
15
F
PB
35543
15
NaN
US
35544
15
NaN
AH
35545
15
NaN
AH
35546
10
F
RM
35547
7
M
DO
35548
5
NaN
NaN
35549 rows × 3 columns
In [93]:
surveys_df[0:41:5]
Out[93]:
record_id
month
day
year
plot_id
species_id
sex
hindfoot_length
weight
0
1
7
16
1977
2
NL
M
32.0
NaN
5
6
7
16
1977
1
PF
M
14.0
NaN
10
11
7
16
1977
5
DS
F
53.0
NaN
15
16
7
16
1977
4
DM
F
36.0
NaN
20
21
7
17
1977
14
DM
F
34.0
NaN
25
26
7
17
1977
15
DM
M
31.0
NaN
30
31
7
17
1977
15
DM
F
37.0
NaN
35
36
7
17
1977
16
OT
F
22.0
NaN
40
41
7
18
1977
23
DM
F
34.0
NaN
In [99]:
surveys_df.iloc[0:3, 1:4]
Out[99]:
month
day
year
0
7
16
1977
1
7
16
1977
2
7
16
1977
In [100]:
surveys_df.loc[[0, 10], :]
Out[100]:
record_id
month
day
year
plot_id
species_id
sex
hindfoot_length
weight
0
1
7
16
1977
2
NL
M
32.0
NaN
10
11
7
16
1977
5
DS
F
53.0
NaN
In [102]:
surveys_df.loc[100, ['plot_id', 'species_id', 'weight']]
Out[102]:
plot_id 5
species_id DM
weight 46
Name: 100, dtype: object
In [108]:
surveys_df.loc[100, 'day':'month']
Out[108]:
Series([], Name: 100, dtype: object)
In [119]:
surveys_df[(surveys_df.year == 2002) & (surveys_df.month == 2)]
Out[119]:
record_id
month
day
year
plot_id
species_id
sex
hindfoot_length
weight
33396
33397
2
9
2002
1
RO
M
17.0
8.0
33397
33398
2
9
2002
1
DO
F
35.0
35.0
33398
33399
2
9
2002
1
PB
M
28.0
48.0
33399
33400
2
9
2002
1
DO
M
37.0
60.0
33400
33401
2
9
2002
1
DM
M
37.0
47.0
33401
33402
2
9
2002
1
DO
M
35.0
40.0
33402
33403
2
9
2002
1
OT
M
20.0
26.0
33403
33404
2
9
2002
2
PB
M
27.0
54.0
33404
33405
2
9
2002
2
RM
M
19.0
8.0
33405
33406
2
9
2002
2
DO
F
36.0
49.0
33406
33407
2
9
2002
2
DO
M
35.0
52.0
33407
33408
2
9
2002
2
PB
M
28.0
41.0
33408
33409
2
9
2002
2
PB
M
27.0
49.0
33409
33410
2
9
2002
2
DM
F
35.0
46.0
33410
33411
2
9
2002
2
PE
M
19.0
25.0
33411
33412
2
9
2002
2
PP
M
22.0
17.0
33412
33413
2
9
2002
2
SH
F
30.0
57.0
33413
33414
2
9
2002
2
PB
F
27.0
31.0
33414
33415
2
9
2002
2
NL
M
33.0
120.0
33415
33416
2
9
2002
3
PB
M
26.0
44.0
33416
33417
2
9
2002
3
DM
M
36.0
48.0
33417
33418
2
9
2002
3
PB
F
26.0
26.0
33418
33419
2
9
2002
3
DM
M
37.0
54.0
33419
33420
2
9
2002
3
OT
F
21.0
24.0
33420
33421
2
9
2002
3
PB
F
26.0
28.0
33421
33422
2
9
2002
3
PB
M
30.0
55.0
33422
33423
2
9
2002
3
DM
M
36.0
49.0
33423
33424
2
9
2002
3
DM
F
36.0
47.0
33424
33425
2
9
2002
3
DO
F
34.0
51.0
33425
33426
2
9
2002
3
DM
M
37.0
52.0
...
...
...
...
...
...
...
...
...
...
33540
33541
2
10
2002
13
PB
M
29.0
49.0
33541
33542
2
10
2002
13
PP
F
21.0
15.0
33542
33543
2
10
2002
14
DM
M
36.0
48.0
33543
33544
2
10
2002
14
AH
NaN
NaN
NaN
33544
33545
2
10
2002
14
DM
F
35.0
39.0
33545
33546
2
10
2002
14
OT
F
21.0
21.0
33546
33547
2
10
2002
14
DM
F
36.0
44.0
33547
33548
2
10
2002
14
DM
F
36.0
46.0
33548
33549
2
10
2002
14
NL
M
33.0
222.0
33549
33550
2
10
2002
15
PB
F
25.0
31.0
33550
33551
2
10
2002
15
RM
F
17.0
7.0
33551
33552
2
10
2002
15
AH
NaN
NaN
NaN
33552
33553
2
10
2002
15
AH
NaN
NaN
NaN
33553
33554
2
10
2002
15
RM
F
17.0
10.0
33554
33555
2
10
2002
15
PB
M
27.0
45.0
33555
33556
2
10
2002
5
RO
M
15.0
9.0
33556
33557
2
10
2002
5
RM
F
17.0
9.0
33557
33558
2
10
2002
16
PB
F
26.0
25.0
33558
33559
2
10
2002
16
DM
M
36.0
38.0
33559
33560
2
10
2002
16
DO
F
36.0
51.0
33560
33561
2
10
2002
10
RM
F
17.0
8.0
33561
33562
2
10
2002
10
DO
F
35.0
50.0
33562
33563
2
10
2002
10
DO
M
34.0
52.0
33563
33564
2
10
2002
10
DO
M
38.0
51.0
33564
33565
2
10
2002
10
RO
F
16.0
8.0
33565
33566
2
10
2002
7
DM
M
35.0
42.0
33566
33567
2
10
2002
7
DO
M
36.0
62.0
33567
33568
2
10
2002
7
DO
F
37.0
55.0
33568
33569
2
10
2002
7
DO
F
38.0
47.0
33569
33570
2
10
2002
7
DO
F
35.0
54.0
174 rows × 9 columns
In [127]:
surveys_df[(surveys_df.year >= 2000) & (surveys_df.year <= 2004)].head()
Out[127]:
record_id
month
day
year
plot_id
species_id
sex
hindfoot_length
weight
30158
30159
1
8
2000
1
PP
F
22.0
17.0
30159
30160
1
8
2000
1
DO
M
35.0
53.0
30160
30161
1
8
2000
1
PP
F
21.0
17.0
30161
30162
1
8
2000
1
DM
M
36.0
50.0
30162
30163
1
8
2000
1
PP
M
20.0
16.0
In [132]:
surveys_df[surveys_df['species_id'].isin(['NL', 'DM', 'PF', 'PE'])].head()
Out[132]:
record_id
month
day
year
plot_id
species_id
sex
hindfoot_length
weight
0
1
7
16
1977
2
NL
M
32.0
NaN
1
2
7
16
1977
3
NL
M
33.0
NaN
2
3
7
16
1977
2
DM
F
37.0
NaN
3
4
7
16
1977
7
DM
M
36.0
NaN
4
5
7
16
1977
3
DM
M
35.0
NaN
In [137]:
surveys_df[~pd.isnull(surveys_df).any(axis=1)].head()
Out[137]:
record_id
month
day
year
plot_id
species_id
sex
hindfoot_length
weight
62
63
8
19
1977
3
DM
M
35.0
40.0
63
64
8
19
1977
7
DM
M
37.0
48.0
64
65
8
19
1977
4
DM
F
34.0
29.0
65
66
8
19
1977
4
DM
F
35.0
46.0
66
67
8
19
1977
7
DM
M
35.0
36.0
In [166]:
idx = pd.IndexSlice
def clean_date(df):
return pd.to_datetime(df[['year', 'month', 'day']],
errors='cerror')
In [167]:
weights = (surveys_df.
assign(date = clean_date).
set_index(['date', 'species_id', 'sex']))['weight']
---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
<ipython-input-167-d33e73fe7373> in <module>()
1 weights = (surveys_df.
----> 2 assign(date = clean_date).
3 set_index(['date', 'species_id', 'sex']))['weight']
/usr/local/lib/python3.6/site-packages/pandas/core/frame.py in assign(self, **kwargs)
2495 results = {}
2496 for k, v in kwargs.items():
-> 2497 results[k] = com._apply_if_callable(v, data)
2498
2499 # ... and then assign
/usr/local/lib/python3.6/site-packages/pandas/core/common.py in _apply_if_callable(maybe_callable, obj, **kwargs)
439 """
440 if callable(maybe_callable):
--> 441 return maybe_callable(obj, **kwargs)
442 return maybe_callable
443
<ipython-input-166-5817482470a1> in clean_date(df)
3 def clean_date(df):
4 return pd.to_datetime(df[['year', 'month', 'day']],
----> 5 errors='cerror')
/usr/local/lib/python3.6/site-packages/pandas/core/tools/datetimes.py in to_datetime(arg, errors, dayfirst, yearfirst, utc, box, format, exact, unit, infer_datetime_format, origin)
510 result = Series(values, index=arg.index, name=arg.name)
511 elif isinstance(arg, (ABCDataFrame, MutableMapping)):
--> 512 result = _assemble_from_unit_mappings(arg, errors=errors)
513 elif isinstance(arg, ABCIndexClass):
514 result = _convert_listlike(arg, box, format, name=arg.name)
/usr/local/lib/python3.6/site-packages/pandas/core/tools/datetimes.py in _assemble_from_unit_mappings(arg, errors)
611 coerce(arg[unit_rev['day']]))
612 try:
--> 613 values = to_datetime(values, format='%Y%m%d', errors=errors)
614 except (TypeError, ValueError) as e:
615 raise ValueError("cannot assemble the "
/usr/local/lib/python3.6/site-packages/pandas/core/tools/datetimes.py in to_datetime(arg, errors, dayfirst, yearfirst, utc, box, format, exact, unit, infer_datetime_format, origin)
507 elif isinstance(arg, ABCSeries):
508 from pandas import Series
--> 509 values = _convert_listlike(arg._values, False, format)
510 result = Series(values, index=arg.index, name=arg.name)
511 elif isinstance(arg, (ABCDataFrame, MutableMapping)):
/usr/local/lib/python3.6/site-packages/pandas/core/tools/datetimes.py in _convert_listlike(arg, box, format, name, tz)
412 try:
413 result = tslib.array_strptime(arg, format, exact=exact,
--> 414 errors=errors)
415 except tslib.OutOfBoundsDatetime:
416 if errors == 'raise':
pandas/_libs/tslib.pyx in pandas._libs.tslib.array_strptime (pandas/_libs/tslib.c:62494)()
AssertionError:
In [158]:
surveys_df[['hindfoot_length', 'weight']].apply(lambda x: x.mean())
Out[158]:
hindfoot_length 29.287932
weight 42.672428
dtype: float64
In [162]:
def standardize(x):
return (x - x.mean() / x.std())
surveys_df[['hindfoot_length', 'weight']].apply(standardize).tail()
Out[162]:
hindfoot_length
weight
35544
NaN
NaN
35545
NaN
NaN
35546
11.937933
12.835082
35547
32.937933
49.835082
35548
NaN
NaN
In [165]:
(surveys_df[['hindfoot_length', 'weight']].
apply(standardize).
applymap(lambda x: '{:03.2f}'.format(x)).tail())
Out[165]:
hindfoot_length
weight
35544
nan
nan
35545
nan
nan
35546
11.94
12.84
35547
32.94
49.84
35548
nan
nan
In [ ]:
Content source: myles/notebooks
Similar notebooks: