In [1]:
import pandas as pd
df = pd.read_csv('/home/dinesh/PycharmProjects/creditCardFraudDetection/creditcard.csv')
In [2]:
feature_cols = ['Time', 'V1', 'V2']
In [3]:
X = df.loc[:, feature_cols]
In [4]:
X.shape
Out[4]:
In [5]:
y = df.Class
In [6]:
y.shape
Out[6]:
In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
In [8]:
import numpy as np
np.count_nonzero(y_train)
Out[8]:
In [9]:
np.size(y_train)
Out[9]:
In [10]:
X_test.shape
Out[10]:
In [11]:
from imblearn.over_sampling import SMOTE
In [12]:
sm = SMOTE(kind='regular')
In [13]:
X_res, y_res = sm.fit_sample(X_train, y_train)
In [14]:
np.count_nonzero(y_res)
Out[14]:
In [15]:
np.size(y_res)
Out[15]:
In [16]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
In [17]:
clf = RandomForestClassifier()
clf.fit(X_res, y_res)
scores = clf.score(X_test, y_test)
In [18]:
scores
Out[18]:
In [19]:
clf = GaussianNB()
clf.fit(X_res, y_res)
scores = clf.score(X_test, y_test)
In [20]:
scores
Out[20]:
In [21]:
clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=20, criterion='entropy', splitter='random'),
algorithm="SAMME.R")
clf.fit(X_res, y_res)
scores = clf.score(X_test, y_test)
In [22]:
scores
Out[22]: