notebook.community

Edit and run



In [1]:

    
import pandas as pd
df = pd.read_csv('/home/dinesh/PycharmProjects/creditCardFraudDetection/creditcard.csv')



In [2]:

    
feature_cols = ['Time', 'V1', 'V2']



In [3]:

    
X = df.loc[:, feature_cols]



In [4]:

    
X.shape









    Out[4]:





(284807, 3)



In [5]:

    
y = df.Class



In [6]:

    
y.shape









    Out[6]:





(284807,)



In [7]:

    
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)



In [8]:

    
import numpy as np
np.count_nonzero(y_train)









    Out[8]:





343



In [9]:

    
np.size(y_train)









    Out[9]:





190820



In [10]:

    
X_test.shape









    Out[10]:





(93987, 3)



In [11]:

    
from imblearn.over_sampling import SMOTE



In [12]:

    
sm = SMOTE(kind='regular')



In [13]:

    
X_res, y_res = sm.fit_sample(X_train, y_train)



In [14]:

    
np.count_nonzero(y_res)









    Out[14]:





190477



In [15]:

    
np.size(y_res)









    Out[15]:





380954



In [16]:

    
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier



In [17]:

    
clf = RandomForestClassifier()
clf.fit(X_res, y_res)
scores = clf.score(X_test, y_test)



In [18]:

    
scores









    Out[18]:





0.9730494642876143



In [19]:

    
clf = GaussianNB()
clf.fit(X_res, y_res)
scores = clf.score(X_test, y_test)



In [20]:

    
scores









    Out[20]:





0.96701671507761711



In [21]:

    
clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=20, criterion='entropy', splitter='random'),
                        algorithm="SAMME.R")
clf.fit(X_res, y_res)
scores = clf.score(X_test, y_test)



In [22]:

    
scores









    Out[22]:





0.97450711268579693