In [1]:
import pandas as pd
df = pd.read_csv('/home/dinesh/PycharmProjects/creditCardFraudDetection/creditcard.csv')

In [2]:
feature_cols = ['Time', 'V1', 'V2']

In [3]:
X = df.loc[:, feature_cols]

In [4]:
X.shape


Out[4]:
(284807, 3)

In [5]:
y = df.Class

In [6]:
y.shape


Out[6]:
(284807,)

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [8]:
import numpy as np
np.count_nonzero(y_train)


Out[8]:
343

In [9]:
np.size(y_train)


Out[9]:
190820

In [10]:
X_test.shape


Out[10]:
(93987, 3)

In [11]:
from imblearn.over_sampling import SMOTE

In [12]:
sm = SMOTE(kind='regular')

In [13]:
X_res, y_res = sm.fit_sample(X_train, y_train)

In [14]:
np.count_nonzero(y_res)


Out[14]:
190477

In [15]:
np.size(y_res)


Out[15]:
380954

In [16]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier

In [17]:
clf = RandomForestClassifier()
clf.fit(X_res, y_res)
scores = clf.score(X_test, y_test)

In [18]:
scores


Out[18]:
0.9730494642876143

In [19]:
clf = GaussianNB()
clf.fit(X_res, y_res)
scores = clf.score(X_test, y_test)

In [20]:
scores


Out[20]:
0.96701671507761711

In [21]:
clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=20, criterion='entropy', splitter='random'),
                        algorithm="SAMME.R")
clf.fit(X_res, y_res)
scores = clf.score(X_test, y_test)

In [22]:
scores


Out[22]:
0.97450711268579693