We're going to check the quality of the a new data set...
Download the data file from /home/data/kelleher/MotorInsuranceFraudClaimABTFull.csv
In [2]:
import os, sys
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
In [3]:
df = pd.read_csv('/home/data/kelleher/MotorInsuranceFraudClaimABTFull.csv')
df.head()
Out[3]:
In [4]:
df.shape
Out[4]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [4]:
df.describe()
Out[4]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [5]:
df.boxplot(column='Claim Amount', by='Injury Type')
Out[5]: