In [1]:
import pandas as pd
surveys_df = pd.read_csv("surveys.csv")
In [2]:
# Method 1: select a 'subset' of the data using the column name
surveys_df['species_id']
# Method 2: use the column name as an 'attribute'; gives the same output
surveys_df.species_id
Out[2]:
0 NL
1 NL
2 DM
3 DM
4 DM
5 PF
6 PE
7 DM
8 DM
9 PF
10 DS
11 DM
12 DM
13 DM
14 DM
15 DM
16 DS
17 PP
18 PF
19 DS
20 DM
21 NL
22 DM
23 SH
24 DM
25 DM
26 DM
27 DM
28 PP
29 DS
...
35519 SF
35520 DM
35521 DM
35522 DM
35523 PB
35524 OL
35525 OT
35526 DO
35527 US
35528 PB
35529 OT
35530 PB
35531 DM
35532 DM
35533 DM
35534 DM
35535 DM
35536 DM
35537 PB
35538 SF
35539 PB
35540 PB
35541 PB
35542 PB
35543 US
35544 AH
35545 AH
35546 RM
35547 DO
35548 NaN
Name: species_id, Length: 35549, dtype: object
In [3]:
# creates an object, surveys_species, that only contains the `species_id` column
surveys_species = surveys_df['species_id']
In [5]:
# select the species and plot columns from the DataFrame
surveys_df[['species_id', 'plot_id']]
Out[5]:
species_id
plot_id
0
NL
2
1
NL
3
2
DM
2
3
DM
7
4
DM
3
5
PF
1
6
PE
2
7
DM
1
8
DM
1
9
PF
6
10
DS
5
11
DM
7
12
DM
3
13
DM
8
14
DM
6
15
DM
4
16
DS
3
17
PP
2
18
PF
4
19
DS
11
20
DM
14
21
NL
15
22
DM
13
23
SH
13
24
DM
9
25
DM
15
26
DM
15
27
DM
11
28
PP
11
29
DS
10
...
...
...
35519
SF
9
35520
DM
9
35521
DM
9
35522
DM
9
35523
PB
9
35524
OL
9
35525
OT
8
35526
DO
13
35527
US
13
35528
PB
13
35529
OT
13
35530
PB
13
35531
DM
14
35532
DM
14
35533
DM
14
35534
DM
14
35535
DM
14
35536
DM
14
35537
PB
15
35538
SF
15
35539
PB
15
35540
PB
15
35541
PB
15
35542
PB
15
35543
US
15
35544
AH
15
35545
AH
15
35546
RM
10
35547
DO
7
35548
NaN
5
35549 rows × 2 columns
In [6]:
# what happens when you flip the order?
surveys_df[['plot_id', 'species_id']]
Out[6]:
plot_id
species_id
0
2
NL
1
3
NL
2
2
DM
3
7
DM
4
3
DM
5
1
PF
6
2
PE
7
1
DM
8
1
DM
9
6
PF
10
5
DS
11
7
DM
12
3
DM
13
8
DM
14
6
DM
15
4
DM
16
3
DS
17
2
PP
18
4
PF
19
11
DS
20
14
DM
21
15
NL
22
13
DM
23
13
SH
24
9
DM
25
15
DM
26
15
DM
27
11
DM
28
11
PP
29
10
DS
...
...
...
35519
9
SF
35520
9
DM
35521
9
DM
35522
9
DM
35523
9
PB
35524
9
OL
35525
8
OT
35526
13
DO
35527
13
US
35528
13
PB
35529
13
OT
35530
13
PB
35531
14
DM
35532
14
DM
35533
14
DM
35534
14
DM
35535
14
DM
35536
14
DM
35537
15
PB
35538
15
SF
35539
15
PB
35540
15
PB
35541
15
PB
35542
15
PB
35543
15
US
35544
15
AH
35545
15
AH
35546
10
RM
35547
7
DO
35548
5
NaN
35549 rows × 2 columns
In [7]:
#what happens if you ask for a column that doesn't exist?
surveys_df['speciess']
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
/Users/C.Yu/anaconda/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2392 try:
-> 2393 return self._engine.get_loc(key)
2394 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5239)()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5085)()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas/_libs/hashtable.c:20405)()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas/_libs/hashtable.c:20359)()
KeyError: 'speciess'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-7-974caf6e9862> in <module>()
1 #what happens if you ask for a column that doesn't exist?
----> 2 surveys_df['speciess']
/Users/C.Yu/anaconda/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
2060 return self._getitem_multilevel(key)
2061 else:
-> 2062 return self._getitem_column(key)
2063
2064 def _getitem_column(self, key):
/Users/C.Yu/anaconda/lib/python3.6/site-packages/pandas/core/frame.py in _getitem_column(self, key)
2067 # get column
2068 if self.columns.is_unique:
-> 2069 return self._get_item_cache(key)
2070
2071 # duplicate columns & possible reduce dimensionality
/Users/C.Yu/anaconda/lib/python3.6/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
1532 res = cache.get(item)
1533 if res is None:
-> 1534 values = self._data.get(item)
1535 res = self._box_item_values(item, values)
1536 cache[item] = res
/Users/C.Yu/anaconda/lib/python3.6/site-packages/pandas/core/internals.py in get(self, item, fastpath)
3588
3589 if not isnull(item):
-> 3590 loc = self.items.get_loc(item)
3591 else:
3592 indexer = np.arange(len(self.items))[isnull(self.items)]
/Users/C.Yu/anaconda/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2393 return self._engine.get_loc(key)
2394 except KeyError:
-> 2395 return self._engine.get_loc(self._maybe_cast_indexer(key))
2396
2397 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5239)()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5085)()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas/_libs/hashtable.c:20405)()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas/_libs/hashtable.c:20359)()
KeyError: 'speciess'
In [8]:
# Create a list of numbers:
a = [1, 2, 3, 4, 5]
In [9]:
# select rows 0, 1, 2 (row 3 is not selected)
surveys_df[0:3]
Out[9]:
record_id
month
day
year
plot_id
species_id
sex
hindfoot_length
weight
0
1
7
16
1977
2
NL
M
32.0
NaN
1
2
7
16
1977
3
NL
M
33.0
NaN
2
3
7
16
1977
2
DM
F
37.0
NaN
In [10]:
# select the first 5 rows (rows 0, 1, 2, 3, 4)
surveys_df[:5]
# select the last element in the list
# (the slice starts at the last element,
# and ends at the end of the list)
surveys_df[-1:]
Out[10]:
record_id
month
day
year
plot_id
species_id
sex
hindfoot_length
weight
35548
35549
12
31
2002
5
NaN
NaN
NaN
NaN
In [11]:
# using the 'copy() method'
true_copy_surveys_df = surveys_df.copy()
# using '=' operator
ref_surveys_df = surveys_df
In [13]:
# Assign the value `0` to the first three rows of data in the DataFrame
ref_surveys_df[0:3] = 0
In [15]:
# ref_surveys_df was created using the '=' operator
ref_surveys_df.head()
Out[15]:
record_id
month
day
year
plot_id
species_id
sex
hindfoot_length
weight
0
0
0
0
0
0
0
0
0.0
0.0
1
0
0
0
0
0
0
0
0.0
0.0
2
0
0
0
0
0
0
0
0.0
0.0
3
4
7
16
1977
7
DM
M
36.0
NaN
4
5
7
16
1977
3
DM
M
35.0
NaN
In [16]:
# surveys_df is the original dataframe
surveys_df.head()
Out[16]:
record_id
month
day
year
plot_id
species_id
sex
hindfoot_length
weight
0
0
0
0
0
0
0
0
0.0
0.0
1
0
0
0
0
0
0
0
0.0
0.0
2
0
0
0
0
0
0
0
0.0
0.0
3
4
7
16
1977
7
DM
M
36.0
NaN
4
5
7
16
1977
3
DM
M
35.0
NaN
In [17]:
true_copy_surveys_df = surveys_df.copy()
In [18]:
ref_surveys_df = surveys_df
In [19]:
surveys_df = pd.read_csv("surveys.csv")
In [20]:
# iloc[row slicing, column slicing]
surveys_df.iloc[0:3, 1:4]
Out[20]:
month
day
year
0
7
16
1977
1
7
16
1977
2
7
16
1977
In [22]:
# select all columns for rows of index values 0 and 10
surveys_df.loc[[0, 10], :]
Out[22]:
record_id
month
day
year
plot_id
species_id
sex
hindfoot_length
weight
0
1
7
16
1977
2
NL
M
32.0
NaN
10
11
7
16
1977
5
DS
F
53.0
NaN
In [23]:
# what does this do?
surveys_df.loc[0, ['species_id', 'plot_id', 'weight']]
Out[23]:
species_id NL
plot_id 2
weight NaN
Name: 0, dtype: object
In [24]:
# What happens when you type the code below?
surveys_df.loc[[0, 10, 35549], :]
Out[24]:
record_id
month
day
year
plot_id
species_id
sex
hindfoot_length
weight
0
1.0
7.0
16.0
1977.0
2.0
NL
M
32.0
NaN
10
11.0
7.0
16.0
1977.0
5.0
DS
F
53.0
NaN
35549
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
In [26]:
# Syntax for iloc indexing to finding a specific data element
surveys_df.iloc[2, 6]
Out[26]:
'F'
In [27]:
surveys_df[surveys_df.year == 2002]
Out[27]:
record_id
month
day
year
plot_id
species_id
sex
hindfoot_length
weight
33320
33321
1
12
2002
1
DM
M
38.0
44.0
33321
33322
1
12
2002
1
DO
M
37.0
58.0
33322
33323
1
12
2002
1
PB
M
28.0
45.0
33323
33324
1
12
2002
1
AB
NaN
NaN
NaN
33324
33325
1
12
2002
1
DO
M
35.0
29.0
33325
33326
1
12
2002
2
OT
F
20.0
26.0
33326
33327
1
12
2002
2
OT
M
20.0
24.0
33327
33328
1
12
2002
2
OT
F
21.0
22.0
33328
33329
1
12
2002
2
DM
M
37.0
47.0
33329
33330
1
12
2002
2
DO
M
35.0
51.0
33330
33331
1
12
2002
2
PE
F
21.0
23.0
33331
33332
1
12
2002
2
OT
F
20.0
18.0
33332
33333
1
12
2002
2
OT
M
20.0
25.0
33333
33334
1
12
2002
2
OT
F
20.0
22.0
33334
33335
1
12
2002
2
DO
F
36.0
46.0
33335
33336
1
12
2002
2
DM
F
35.0
45.0
33336
33337
1
12
2002
2
PB
M
28.0
47.0
33337
33338
1
12
2002
2
PB
F
26.0
30.0
33338
33339
1
12
2002
2
NL
NaN
NaN
NaN
33339
33340
1
12
2002
12
DO
M
34.0
24.0
33340
33341
1
12
2002
12
PE
F
20.0
15.0
33341
33342
1
12
2002
12
DO
F
36.0
26.0
33342
33343
1
12
2002
12
DO
F
37.0
47.0
33343
33344
1
12
2002
12
DM
M
36.0
40.0
33344
33345
1
12
2002
12
DO
M
37.0
55.0
33345
33346
1
12
2002
12
PE
M
21.0
23.0
33346
33347
1
12
2002
12
DM
F
37.0
45.0
33347
33348
1
12
2002
19
PB
M
29.0
51.0
33348
33349
1
12
2002
19
PB
M
27.0
46.0
33349
33350
1
12
2002
19
PP
F
20.0
13.0
...
...
...
...
...
...
...
...
...
...
35519
35520
12
31
2002
9
SF
NaN
24.0
36.0
35520
35521
12
31
2002
9
DM
M
37.0
48.0
35521
35522
12
31
2002
9
DM
F
35.0
45.0
35522
35523
12
31
2002
9
DM
F
36.0
44.0
35523
35524
12
31
2002
9
PB
F
25.0
27.0
35524
35525
12
31
2002
9
OL
M
21.0
26.0
35525
35526
12
31
2002
8
OT
F
20.0
24.0
35526
35527
12
31
2002
13
DO
F
33.0
43.0
35527
35528
12
31
2002
13
US
NaN
NaN
NaN
35528
35529
12
31
2002
13
PB
F
25.0
25.0
35529
35530
12
31
2002
13
OT
F
20.0
NaN
35530
35531
12
31
2002
13
PB
F
27.0
NaN
35531
35532
12
31
2002
14
DM
F
34.0
43.0
35532
35533
12
31
2002
14
DM
F
36.0
48.0
35533
35534
12
31
2002
14
DM
M
37.0
56.0
35534
35535
12
31
2002
14
DM
M
37.0
53.0
35535
35536
12
31
2002
14
DM
F
35.0
42.0
35536
35537
12
31
2002
14
DM
F
36.0
46.0
35537
35538
12
31
2002
15
PB
F
26.0
31.0
35538
35539
12
31
2002
15
SF
M
26.0
68.0
35539
35540
12
31
2002
15
PB
F
26.0
23.0
35540
35541
12
31
2002
15
PB
F
24.0
31.0
35541
35542
12
31
2002
15
PB
F
26.0
29.0
35542
35543
12
31
2002
15
PB
F
27.0
34.0
35543
35544
12
31
2002
15
US
NaN
NaN
NaN
35544
35545
12
31
2002
15
AH
NaN
NaN
NaN
35545
35546
12
31
2002
15
AH
NaN
NaN
NaN
35546
35547
12
31
2002
10
RM
F
15.0
14.0
35547
35548
12
31
2002
7
DO
M
36.0
51.0
35548
35549
12
31
2002
5
NaN
NaN
NaN
NaN
2229 rows × 9 columns
In [28]:
surveys_df[surveys_df.year != 2002]
Out[28]:
record_id
month
day
year
plot_id
species_id
sex
hindfoot_length
weight
0
1
7
16
1977
2
NL
M
32.0
NaN
1
2
7
16
1977
3
NL
M
33.0
NaN
2
3
7
16
1977
2
DM
F
37.0
NaN
3
4
7
16
1977
7
DM
M
36.0
NaN
4
5
7
16
1977
3
DM
M
35.0
NaN
5
6
7
16
1977
1
PF
M
14.0
NaN
6
7
7
16
1977
2
PE
F
NaN
NaN
7
8
7
16
1977
1
DM
M
37.0
NaN
8
9
7
16
1977
1
DM
F
34.0
NaN
9
10
7
16
1977
6
PF
F
20.0
NaN
10
11
7
16
1977
5
DS
F
53.0
NaN
11
12
7
16
1977
7
DM
M
38.0
NaN
12
13
7
16
1977
3
DM
M
35.0
NaN
13
14
7
16
1977
8
DM
NaN
NaN
NaN
14
15
7
16
1977
6
DM
F
36.0
NaN
15
16
7
16
1977
4
DM
F
36.0
NaN
16
17
7
16
1977
3
DS
F
48.0
NaN
17
18
7
16
1977
2
PP
M
22.0
NaN
18
19
7
16
1977
4
PF
NaN
NaN
NaN
19
20
7
17
1977
11
DS
F
48.0
NaN
20
21
7
17
1977
14
DM
F
34.0
NaN
21
22
7
17
1977
15
NL
F
31.0
NaN
22
23
7
17
1977
13
DM
M
36.0
NaN
23
24
7
17
1977
13
SH
M
21.0
NaN
24
25
7
17
1977
9
DM
M
35.0
NaN
25
26
7
17
1977
15
DM
M
31.0
NaN
26
27
7
17
1977
15
DM
M
36.0
NaN
27
28
7
17
1977
11
DM
M
38.0
NaN
28
29
7
17
1977
11
PP
M
NaN
NaN
29
30
7
17
1977
10
DS
F
52.0
NaN
...
...
...
...
...
...
...
...
...
...
33290
33291
12
15
2001
23
PE
M
20.0
18.0
33291
33292
12
15
2001
23
RM
F
16.0
8.0
33292
33293
12
15
2001
20
PE
F
20.0
22.0
33293
33294
12
15
2001
20
SH
M
25.0
43.0
33294
33295
12
15
2001
20
PB
F
27.0
33.0
33295
33296
12
15
2001
20
PB
M
25.0
35.0
33296
33297
12
15
2001
20
RM
M
16.0
11.0
33297
33298
12
15
2001
20
RM
F
16.0
8.0
33298
33299
12
15
2001
20
PB
F
25.0
28.0
33299
33300
12
15
2001
20
PB
F
26.0
30.0
33300
33301
12
15
2001
20
PB
F
27.0
31.0
33301
33302
12
15
2001
24
PE
M
20.0
24.0
33302
33303
12
15
2001
24
PE
M
20.0
23.0
33303
33304
12
15
2001
24
RM
M
16.0
10.0
33304
33305
12
15
2001
7
PB
M
29.0
44.0
33305
33306
12
15
2001
7
OT
M
19.0
21.0
33306
33307
12
15
2001
7
OT
M
20.0
19.0
33307
33308
12
15
2001
7
PP
M
24.0
16.0
33308
33309
12
16
2001
3
NaN
NaN
NaN
NaN
33309
33310
12
16
2001
4
NaN
NaN
NaN
NaN
33310
33311
12
16
2001
5
NaN
NaN
NaN
NaN
33311
33312
12
16
2001
6
NaN
NaN
NaN
NaN
33312
33313
12
16
2001
8
NaN
NaN
NaN
NaN
33313
33314
12
16
2001
9
NaN
NaN
NaN
NaN
33314
33315
12
16
2001
10
NaN
NaN
NaN
NaN
33315
33316
12
16
2001
11
NaN
NaN
NaN
NaN
33316
33317
12
16
2001
13
NaN
NaN
NaN
NaN
33317
33318
12
16
2001
14
NaN
NaN
NaN
NaN
33318
33319
12
16
2001
15
NaN
NaN
NaN
NaN
33319
33320
12
16
2001
16
NaN
NaN
NaN
NaN
33320 rows × 9 columns
In [29]:
surveys_df[(surveys_df.year >= 1980) & (surveys_df.year <= 1985)]
Out[29]:
record_id
month
day
year
plot_id
species_id
sex
hindfoot_length
weight
2270
2271
1
15
1980
8
DO
M
35.0
53.0
2271
2272
1
15
1980
11
PF
F
16.0
10.0
2272
2273
1
15
1980
18
DM
F
34.0
33.0
2273
2274
1
15
1980
11
DM
M
38.0
37.0
2274
2275
1
15
1980
8
DO
F
33.0
29.0
2275
2276
1
15
1980
11
DS
M
47.0
132.0
2276
2277
1
15
1980
8
PF
M
15.0
8.0
2277
2278
1
15
1980
9
OT
M
21.0
23.0
2278
2279
1
15
1980
11
DM
F
36.0
36.0
2279
2280
1
15
1980
21
OT
F
20.0
21.0
2280
2281
1
15
1980
11
OL
M
20.0
29.0
2281
2282
1
15
1980
17
DM
F
36.0
49.0
2282
2283
1
15
1980
11
OL
M
21.0
23.0
2283
2284
1
15
1980
9
OL
M
20.0
32.0
2284
2285
1
15
1980
10
OL
F
20.0
24.0
2285
2286
1
15
1980
11
DM
M
38.0
47.0
2286
2287
1
15
1980
21
OT
M
19.0
22.0
2287
2288
1
15
1980
19
RM
F
17.0
12.0
2288
2289
1
15
1980
20
DS
F
52.0
150.0
2289
2290
1
15
1980
11
DM
M
37.0
49.0
2290
2291
1
15
1980
9
OL
F
21.0
34.0
2291
2292
1
15
1980
12
DM
F
35.0
40.0
2292
2293
1
15
1980
18
DS
F
51.0
132.0
2293
2294
1
15
1980
22
DM
F
34.0
25.0
2294
2295
1
15
1980
9
OL
M
21.0
36.0
2295
2296
1
15
1980
8
DO
F
34.0
50.0
2296
2297
1
15
1980
11
DM
M
37.0
45.0
2297
2298
1
15
1980
17
DM
M
35.0
47.0
2298
2299
1
15
1980
9
DM
M
38.0
46.0
2299
2300
1
15
1980
18
DM
F
32.0
29.0
...
...
...
...
...
...
...
...
...
...
11197
11198
12
8
1985
4
DS
M
45.0
129.0
11198
11199
12
8
1985
8
DM
F
38.0
42.0
11199
11200
12
8
1985
7
AB
NaN
NaN
NaN
11200
11201
12
8
1985
5
OL
M
21.0
29.0
11201
11202
12
8
1985
9
DM
F
35.0
39.0
11202
11203
12
8
1985
7
PE
F
17.0
19.0
11203
11204
12
8
1985
3
PP
F
22.0
16.0
11204
11205
12
8
1985
5
DO
M
37.0
56.0
11205
11206
12
8
1985
11
DM
F
38.0
38.0
11206
11207
12
8
1985
2
PE
M
18.0
19.0
11207
11208
12
8
1985
8
DS
F
50.0
120.0
11208
11209
12
8
1985
2
DO
F
37.0
52.0
11209
11210
12
8
1985
2
DM
F
35.0
40.0
11210
11211
12
8
1985
13
DM
M
37.0
45.0
11211
11212
12
8
1985
4
DS
NaN
NaN
121.0
11212
11213
12
8
1985
13
AH
NaN
NaN
NaN
11213
11214
12
8
1985
1
DM
F
37.0
44.0
11214
11215
12
8
1985
2
NL
F
32.0
160.0
11215
11216
12
8
1985
3
RM
M
17.0
9.0
11216
11217
12
8
1985
4
OL
M
24.0
34.0
11217
11218
12
8
1985
9
DM
F
36.0
39.0
11218
11219
12
8
1985
8
DM
F
38.0
41.0
11219
11220
12
8
1985
5
DO
F
37.0
56.0
11220
11221
12
8
1985
13
AH
NaN
NaN
NaN
11221
11222
12
8
1985
7
AB
NaN
NaN
NaN
11222
11223
12
8
1985
4
DM
M
36.0
40.0
11223
11224
12
8
1985
11
DM
M
37.0
49.0
11224
11225
12
8
1985
7
PE
M
20.0
18.0
11225
11226
12
8
1985
1
DM
M
38.0
47.0
11226
11227
12
8
1985
15
NaN
NaN
NaN
NaN
8957 rows × 9 columns
In [30]:
pd.isnull(surveys_df)
Out[30]:
record_id
month
day
year
plot_id
species_id
sex
hindfoot_length
weight
0
False
False
False
False
False
False
False
False
True
1
False
False
False
False
False
False
False
False
True
2
False
False
False
False
False
False
False
False
True
3
False
False
False
False
False
False
False
False
True
4
False
False
False
False
False
False
False
False
True
5
False
False
False
False
False
False
False
False
True
6
False
False
False
False
False
False
False
True
True
7
False
False
False
False
False
False
False
False
True
8
False
False
False
False
False
False
False
False
True
9
False
False
False
False
False
False
False
False
True
10
False
False
False
False
False
False
False
False
True
11
False
False
False
False
False
False
False
False
True
12
False
False
False
False
False
False
False
False
True
13
False
False
False
False
False
False
True
True
True
14
False
False
False
False
False
False
False
False
True
15
False
False
False
False
False
False
False
False
True
16
False
False
False
False
False
False
False
False
True
17
False
False
False
False
False
False
False
False
True
18
False
False
False
False
False
False
True
True
True
19
False
False
False
False
False
False
False
False
True
20
False
False
False
False
False
False
False
False
True
21
False
False
False
False
False
False
False
False
True
22
False
False
False
False
False
False
False
False
True
23
False
False
False
False
False
False
False
False
True
24
False
False
False
False
False
False
False
False
True
25
False
False
False
False
False
False
False
False
True
26
False
False
False
False
False
False
False
False
True
27
False
False
False
False
False
False
False
False
True
28
False
False
False
False
False
False
False
True
True
29
False
False
False
False
False
False
False
False
True
...
...
...
...
...
...
...
...
...
...
35519
False
False
False
False
False
False
True
False
False
35520
False
False
False
False
False
False
False
False
False
35521
False
False
False
False
False
False
False
False
False
35522
False
False
False
False
False
False
False
False
False
35523
False
False
False
False
False
False
False
False
False
35524
False
False
False
False
False
False
False
False
False
35525
False
False
False
False
False
False
False
False
False
35526
False
False
False
False
False
False
False
False
False
35527
False
False
False
False
False
False
True
True
True
35528
False
False
False
False
False
False
False
False
False
35529
False
False
False
False
False
False
False
False
True
35530
False
False
False
False
False
False
False
False
True
35531
False
False
False
False
False
False
False
False
False
35532
False
False
False
False
False
False
False
False
False
35533
False
False
False
False
False
False
False
False
False
35534
False
False
False
False
False
False
False
False
False
35535
False
False
False
False
False
False
False
False
False
35536
False
False
False
False
False
False
False
False
False
35537
False
False
False
False
False
False
False
False
False
35538
False
False
False
False
False
False
False
False
False
35539
False
False
False
False
False
False
False
False
False
35540
False
False
False
False
False
False
False
False
False
35541
False
False
False
False
False
False
False
False
False
35542
False
False
False
False
False
False
False
False
False
35543
False
False
False
False
False
False
True
True
True
35544
False
False
False
False
False
False
True
True
True
35545
False
False
False
False
False
False
True
True
True
35546
False
False
False
False
False
False
False
False
False
35547
False
False
False
False
False
False
False
False
False
35548
False
False
False
False
False
True
True
True
True
35549 rows × 9 columns
In [31]:
# To select just the rows with NaN values, we can use the 'any()' method
surveys_df[pd.isnull(surveys_df).any(axis=1)]
Out[31]:
record_id
month
day
year
plot_id
species_id
sex
hindfoot_length
weight
0
1
7
16
1977
2
NL
M
32.0
NaN
1
2
7
16
1977
3
NL
M
33.0
NaN
2
3
7
16
1977
2
DM
F
37.0
NaN
3
4
7
16
1977
7
DM
M
36.0
NaN
4
5
7
16
1977
3
DM
M
35.0
NaN
5
6
7
16
1977
1
PF
M
14.0
NaN
6
7
7
16
1977
2
PE
F
NaN
NaN
7
8
7
16
1977
1
DM
M
37.0
NaN
8
9
7
16
1977
1
DM
F
34.0
NaN
9
10
7
16
1977
6
PF
F
20.0
NaN
10
11
7
16
1977
5
DS
F
53.0
NaN
11
12
7
16
1977
7
DM
M
38.0
NaN
12
13
7
16
1977
3
DM
M
35.0
NaN
13
14
7
16
1977
8
DM
NaN
NaN
NaN
14
15
7
16
1977
6
DM
F
36.0
NaN
15
16
7
16
1977
4
DM
F
36.0
NaN
16
17
7
16
1977
3
DS
F
48.0
NaN
17
18
7
16
1977
2
PP
M
22.0
NaN
18
19
7
16
1977
4
PF
NaN
NaN
NaN
19
20
7
17
1977
11
DS
F
48.0
NaN
20
21
7
17
1977
14
DM
F
34.0
NaN
21
22
7
17
1977
15
NL
F
31.0
NaN
22
23
7
17
1977
13
DM
M
36.0
NaN
23
24
7
17
1977
13
SH
M
21.0
NaN
24
25
7
17
1977
9
DM
M
35.0
NaN
25
26
7
17
1977
15
DM
M
31.0
NaN
26
27
7
17
1977
15
DM
M
36.0
NaN
27
28
7
17
1977
11
DM
M
38.0
NaN
28
29
7
17
1977
11
PP
M
NaN
NaN
29
30
7
17
1977
10
DS
F
52.0
NaN
...
...
...
...
...
...
...
...
...
...
35187
35188
11
10
2002
10
NaN
NaN
NaN
NaN
35256
35257
12
7
2002
22
PB
M
26.0
NaN
35259
35260
12
7
2002
21
PB
F
24.0
NaN
35277
35278
12
7
2002
20
AH
NaN
NaN
NaN
35279
35280
12
7
2002
16
PB
M
28.0
NaN
35322
35323
12
8
2002
11
AH
NaN
NaN
NaN
35328
35329
12
8
2002
11
PP
M
NaN
16.0
35370
35371
12
8
2002
14
AH
NaN
NaN
NaN
35378
35379
12
8
2002
15
PB
F
26.0
NaN
35384
35385
12
8
2002
10
NaN
NaN
NaN
NaN
35387
35388
12
29
2002
1
DO
M
35.0
NaN
35403
35404
12
29
2002
2
NL
F
30.0
NaN
35448
35449
12
29
2002
20
OT
F
20.0
NaN
35452
35453
12
29
2002
20
PB
M
28.0
NaN
35457
35458
12
29
2002
20
AH
NaN
NaN
NaN
35477
35478
12
29
2002
24
AH
NaN
NaN
NaN
35485
35486
12
29
2002
16
DO
M
37.0
NaN
35495
35496
12
31
2002
4
PB
NaN
NaN
NaN
35510
35511
12
31
2002
11
DX
NaN
NaN
NaN
35511
35512
12
31
2002
11
US
NaN
NaN
NaN
35512
35513
12
31
2002
11
US
NaN
NaN
NaN
35514
35515
12
31
2002
11
SF
F
27.0
NaN
35519
35520
12
31
2002
9
SF
NaN
24.0
36.0
35527
35528
12
31
2002
13
US
NaN
NaN
NaN
35529
35530
12
31
2002
13
OT
F
20.0
NaN
35530
35531
12
31
2002
13
PB
F
27.0
NaN
35543
35544
12
31
2002
15
US
NaN
NaN
NaN
35544
35545
12
31
2002
15
AH
NaN
NaN
NaN
35545
35546
12
31
2002
15
AH
NaN
NaN
NaN
35548
35549
12
31
2002
5
NaN
NaN
NaN
NaN
4873 rows × 9 columns
In [32]:
empty_weights = surveys_df[pd.isnull(surveys_df['weight'])]['weight']
print(empty_weights)
0 NaN
1 NaN
2 NaN
3 NaN
4 NaN
5 NaN
6 NaN
7 NaN
8 NaN
9 NaN
10 NaN
11 NaN
12 NaN
13 NaN
14 NaN
15 NaN
16 NaN
17 NaN
18 NaN
19 NaN
20 NaN
21 NaN
22 NaN
23 NaN
24 NaN
25 NaN
26 NaN
27 NaN
28 NaN
29 NaN
..
35138 NaN
35168 NaN
35187 NaN
35256 NaN
35259 NaN
35277 NaN
35279 NaN
35322 NaN
35370 NaN
35378 NaN
35384 NaN
35387 NaN
35403 NaN
35448 NaN
35452 NaN
35457 NaN
35477 NaN
35485 NaN
35495 NaN
35510 NaN
35511 NaN
35512 NaN
35514 NaN
35527 NaN
35529 NaN
35530 NaN
35543 NaN
35544 NaN
35545 NaN
35548 NaN
Name: weight, Length: 3266, dtype: float64
In [ ]:
Content source: GT-IDEaS/SkillsWorkshop2017
Similar notebooks: