In [1]:
import pandas as pd
import numpy as np
from plydata import call
In [2]:
df = pd.DataFrame({
'w': [1, 2, np.nan, 4, 5],
'x': [np.nan, 2, np.nan, 4, 5],
'y': [np.nan] * 4 + [5],
'z': [np.nan] * 5
})
df
Out[2]:
Drop rows with any NaN values
In [3]:
df >> call(pd.DataFrame.dropna)
Out[3]:
In [4]:
# You can call method on the dataframe with '.method_name'
df >> call('.dropna')
Out[4]:
Drop rows with all NaN values
In [5]:
df >> call('.dropna', how='all')
Out[5]:
Drop rows with NaN values in the x column.
In [6]:
df >> call('.dropna', subset=['x'])
Out[6]:
Drop and keep rows atleast 3 non-NaN values
In [7]:
df >> call('.dropna', thresh=3)
Out[7]:
Drop columns with all NaN values
In [8]:
df >> call('.dropna', axis=1, how='all')
Out[8]:
Drop columns with any NaN values in row 3.
In [9]:
df >> call('.dropna', axis=1, subset=[3])
Out[9]:
Replace all NaN values with -1.
In [10]:
df >> call(pd.DataFrame.fillna, -1)
Out[10]:
In [11]:
df >> call('.fillna', -1)
Out[11]:
Replace all NaN values with the first non-NaN value above in column
In [12]:
df >> call('.fillna', method='ffill')
Out[12]:
Replace all NaN values with the first non-NaN value below in column
In [13]:
df >> call('.fillna', method='bfill')
Out[13]:
Replace atmost 2 NaN values with the first non-NaN value below in column
In [14]:
df >> call('.fillna', method='bfill', limit=2)
Out[14]:
Replace all NaN values with the first non-NaN value to the left in the row
In [15]:
df >> call('.fillna', method='ffill', axis=1)
Out[15]:
Replace all NaN values with the first non-NaN value to the right in the row
In [16]:
df >> call('.fillna', method='bfill', axis=1)
Out[16]: