In [1]:
import h2o
h2o.init(max_mem_size = 2)
In [2]:
h2o.remove_all()
In [3]:
get_ipython().magic(u'matplotlib inline')
In [4]:
import numpy as np
import pandas as pd
import os
from h2o.estimators.deeplearning import H2OAutoEncoderEstimator, H2ODeepLearningEstimator
In [5]:
credit_card_data = h2o.import_file(path = os.path.realpath("../data/fraud/creditcard.csv"))
In [6]:
credit_card_data.head()
Out[6]:
In [7]:
credit_card_data_df = credit_card_data.as_data_frame(use_pandas = True)
In [8]:
credit_card_data_df['Class'].describe()
Out[8]:
In [9]:
import matplotlib.pyplot as plt
credit_card_data_df['Counter_on_Class'] =1
print(credit_card_data_df.groupby(['Class'])['Counter_on_Class'].sum())
plt.style.use('ggplot')
credit_card_data_df['Class'].hist()
Out[9]:
In [12]:
# Feature: Time is in seconds 1hr = 60*60 = 3600sec
# 1day = 24*3600 = 86,400
print(credit_card_data_df['Time'].describe())
credit_card_data_df['Time'].hist(bins=20)
credit_card_data_df['Time'].hist(bins=100, by=credit_card_data_df['Class'])
#or credit_card_data_df['Time'].plot(kind='hist', alpha=0.5)
Out[12]:
In [39]:
credit_card_data_df.groupby(['Time', 'Class']).size().reset_index(name='Counter').head()
Out[39]:
In [ ]:
# import matplotlib.cm as cm
# gg = credit_card_data_df.groupby(['Time', 'Class']).sum().unstack()
In [ ]:
# gg.columns = gg.columns.droplevel()
# gg.plot(kind = 'bar', colormap = cm.Accent, width = 1)
In [14]:
# Visualize the class distribution ...
from ggplot import *
ggplot(aes(x='Time', y='Class', color='Class'), data=credit_card_data_df) + geom_point()
Out[14]:
In [20]:
print("Class 0")
print(credit_card_data_df[credit_card_data_df['Class'] == 0]['Amount'].describe())
print("Class 1")
print(credit_card_data_df[credit_card_data_df['Class'] == 1]['Amount'].describe())
In [32]:
credit_card_data_df[credit_card_data_df.columns.difference(['Class'])].head(2)
Out[32]:
In [35]:
credit_card_h2o_frame = h2o.H2OFrame(credit_card_data_df)
train, valid, test = credit_card_h2o_frame.split_frame(ratios=[0.6, 0.2], seed=0)
In [34]:
model = H2OAutoEncoderEstimator(activation="Tanh", hidden=[50, 50, 50],
ignore_const_cols=False, epochs=100)
In [ ]:
model.train(x=credit_card_h2o_frame.col_names, training_frame=credit_card_h2o_frame)