6.29 - Pandas missing data


In [1]:
import numpy as np
import pandas as pd

In [2]:
d = {'a':[1,2,np.nan],'b':[5,np.nan,np.nan],'c':[1,2,3]}

In [4]:
df = pd.DataFrame(d)
df


Out[4]:
a b c
0 1.0 5.0 1
1 2.0 NaN 2
2 NaN NaN 3

In [6]:
df.dropna() # drops all rows with missing values


Out[6]:
a b c
0 1.0 5.0 1

In [7]:
df.dropna(axis=1)


Out[7]:
c
0 1
1 2
2 3

In [8]:
df.fillna('WOOSH')


Out[8]:
a b c
0 1 5 1
1 2 WOOSH 2
2 WOOSH WOOSH 3

In [11]:
df['a'].fillna(df['a'].mean())


Out[11]:
0    1.0
1    2.0
2    1.5
Name: a, dtype: float64

In [ ]: