In [10]:
import pandas as pd
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

In [11]:
credit_cards = pd.read_csv('creditcard.csv')

features_columns = credit_cards.columns.delete(len(credit_cards.columns)-1)

features = credit_cards[features_columns]
labels = credit_cards['Class']

In [12]:
features_train,feature_test,labels_train,labels_test = train_test_split(features,labels,test_size=0.2,random_state=0)

In [13]:
oversample = SMOTE(random_state=0)
oversample_features,oversample_labels = oversample.fit_sample(features_train,labels_train)

In [14]:
len(oversample_labels[oversample_labels==1])


Out[14]:
227454

In [ ]: