Missing Value in Pandas datafrmae


In [1]:
# import
import pandas as pd

In [2]:
# reading the file
data = pd.read_csv("data/train.csv")

In [5]:
# Looking the data
data.head(6)


Out[5]:
Loan_ID Gender Married Dependents Education Self_Employed ApplicantIncome CoapplicantIncome LoanAmount Loan_Amount_Term Credit_History Property_Area Loan_Status
0 LP001002 Male No 0 Graduate No 5849 0.0 NaN 360.0 1.0 Urban Y
1 LP001003 Male Yes 1 Graduate No 4583 1508.0 128.0 360.0 1.0 Rural N
2 LP001005 Male Yes 0 Graduate Yes 3000 0.0 66.0 360.0 1.0 Urban Y
3 LP001006 Male Yes 0 Not Graduate No 2583 2358.0 120.0 360.0 1.0 Urban Y
4 LP001008 Male No 0 Graduate No 6000 0.0 141.0 360.0 1.0 Urban Y
5 LP001011 Male Yes 2 Graduate Yes 5417 4196.0 267.0 360.0 1.0 Urban Y

In [8]:
# isnull or notnull
data.isnull().head(6)


Out[8]:
Loan_ID Gender Married Dependents Education Self_Employed ApplicantIncome CoapplicantIncome LoanAmount Loan_Amount_Term Credit_History Property_Area Loan_Status
0 False False False False False False False False True False False False False
1 False False False False False False False False False False False False False
2 False False False False False False False False False False False False False
3 False False False False False False False False False False False False False
4 False False False False False False False False False False False False False
5 False False False False False False False False False False False False False

In [9]:
data.notnull().head(6)


Out[9]:
Loan_ID Gender Married Dependents Education Self_Employed ApplicantIncome CoapplicantIncome LoanAmount Loan_Amount_Term Credit_History Property_Area Loan_Status
0 True True True True True True True True False True True True True
1 True True True True True True True True True True True True True
2 True True True True True True True True True True True True True
3 True True True True True True True True True True True True True
4 True True True True True True True True True True True True True
5 True True True True True True True True True True True True True

In [10]:
# use of any
data.isnull().values.any()


Out[10]:
True

In [11]:
# use of all
data.isnull().values.all()


Out[11]:
False

In [12]:
# taking the count of Null/NaN in each column of dataframe
data.isnull().sum()


Out[12]:
Loan_ID               0
Gender               13
Married               3
Dependents           15
Education             0
Self_Employed        32
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount           22
Loan_Amount_Term     14
Credit_History       50
Property_Area         0
Loan_Status           0
dtype: int64

In [13]:
# if want to know the total
data.isnull().sum().sum()


Out[13]:
149

In [14]:
# if want to check in any particular column
data['Dependents'].isnull().sum()


Out[14]:
15

In [ ]: