In [1]:
import pandas as pd
import numpy as np
import math
In [2]:
df = pd.read_csv('data/src/sample_pandas_normal_nan.csv')
print(df)
In [3]:
print(df.dtypes)
In [4]:
print(df.at[1, 'name'])
print(type(df.at[1, 'name']))
In [5]:
print(df.at[0, 'point'])
print(type(df.at[0, 'point']))
In [6]:
print(pd.isnull(df.at[0, 'point']))
print(np.isnan(df.at[0, 'point']))
print(math.isnan(df.at[0, 'point']))
In [7]:
print(df.at[0, 'point'] == np.nan)
In [8]:
print(df.dropna(how='all'))
In [9]:
print(df.dropna(how='all', axis=1))
In [10]:
print(df.dropna(how='all').dropna(how='all', axis=1))
In [11]:
df2 = df.dropna(how='all').dropna(how='all', axis=1)
print(df2)
In [12]:
print(df2.dropna(how='any'))
In [13]:
print(df2.dropna())
In [14]:
print(df2.dropna(how='any', axis=1))
In [15]:
print(df.dropna(thresh=3))
In [16]:
print(df.dropna(thresh=3, axis=1))
In [17]:
print(df.dropna(subset=['age']))
In [18]:
print(df.dropna(subset=['age', 'state']))
In [19]:
print(df.dropna(subset=['age', 'state'], how='all'))
In [20]:
print(df.dropna(subset=[0, 4], axis=1))
In [21]:
print(df.dropna(subset=[0, 4], axis=1, how='all'))
In [22]:
s = df['age']
print(s)
In [23]:
print(s.dropna())
In [24]:
print(df.fillna(0))
In [25]:
print(df.fillna({'name': 'XXX', 'age': 20, 'point': 0}))
In [26]:
s_for_fill = pd.Series(['ZZZ', 100], index=['name', 'age'])
print(s_for_fill)
In [27]:
print(df.fillna(s_for_fill))
In [28]:
print(df.mean())
In [29]:
print(df.fillna(df.mean()))
In [30]:
print(df.fillna(df.median()))
In [31]:
print(df.fillna(df.mode().iloc[0]))
In [32]:
print(df.fillna(method='ffill'))
In [33]:
print(df.fillna(method='bfill'))
In [34]:
print(df.fillna(method='bfill', limit=1))
In [35]:
s = df['age']
print(s)
In [36]:
print(s.fillna(100))
In [37]:
print(s.fillna({1: 100, 4: 0}))
In [38]:
print(s.fillna(method='bfill', limit=1))
In [39]:
print(df)
In [40]:
print(df['point'].isnull())
In [41]:
print(df[df['point'].isnull()])
In [42]:
print(df.iloc[2].isnull())
In [43]:
print(df.loc[:, df.iloc[2].isnull()])
In [44]:
df2 = df.dropna(how='all').dropna(how='all', axis=1)
print(df2)
In [45]:
print(df2.isnull())
In [46]:
print(df2.isnull().any(axis=1))
In [47]:
print(df2[df2.isnull().any(axis=1)])
In [48]:
print(df2.isnull().any())
In [49]:
print(df2.loc[:, df2.isnull().any()])