notebook.community

Edit and run



In [17]:

    
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import IsolationForest
from sklearn.ensemble import RandomForestClassifier
from sklearn.grid_search import GridSearchCV
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.feature_extraction import DictVectorizer
import os
import pandas as pd
import time
import scipy
from scipy import stats
import sys



In [18]:

    
# dataDir = '/Users/Gabriel/Dropbox/Research/DICE/Anomaly Detection/Usecases/POSIDONIA'
# data = os.path.join(dataDir, 'cep_man_labeled.csv')

dataDir = '/Users/Gabriel/Documents/workspaces/diceWorkspace/dmon-adp/data'
data = os.path.join(dataDir, 'CEP_Complete_Labeled_Extended.csv')



In [19]:

    
df = pd.read_csv(data)
df.set_index('key', inplace=True)
dropList = ['host']
print "Droped columns are: %s" %dropList
df = df.drop(dropList, axis=1)
print "Index Name: %s" %df.index.name









    



Droped columns are: ['host']
Index Name: key



In [20]:

    
print "Dataframe shape (row, col): %s" %str(df.shape)









    



Dataframe shape (row, col): (4702, 5)



In [21]:

    
#encode dataframe
col = []
for el, v in df.dtypes.iteritems():
    # print el
    if v == 'object':
        col.append(el)
col

from sklearn.feature_extraction import DictVectorizer
def ohEncoding(data, cols, replace=False):
    vec = DictVectorizer()
    mkdict = lambda row: dict((col, row[col]) for col in cols)
    vecData = pd.DataFrame(vec.fit_transform(data[cols].apply(mkdict, axis=1)).toarray())
    vecData.columns = vec.get_feature_names()
    vecData.index = data.index
    if replace is True:
        data = data.drop(cols, axis=1)
        data = data.join(vecData)
    return data, vecData, vec

df, t, v = ohEncoding(df, col, replace=True)

print df.shape



In [22]:

    
features = df.columns[:-1]


print "Detected Features are: %s" %features

 
X = df[features]
# Target always last column of dataframe
y = df.iloc[:,-1].values
print y









    



Detected Features are: Index([u'ms', u'ship', u'Anomaly', u'component=AIS_SENTENCE_LISTENER',
       u'component=RETRACT_OLD_AISGEOMDATA', u'component=SESSION',
       u'component=SIMPLE_ANCHOR_IN', u'component=SIMPLE_DOCK_START_OUT',
       u'component=SIMPLE_DOCK_STOP', u'component=STOP_OVER_IN',
       u'component=STOP_OVER_OUT', u'method=FIRE_ALL_RULES',
       u'method=HANDLE_MESSAGE', u'method=RETRACT'],
      dtype='object')
[ 1.  1.  1. ...,  1.  1.  0.]



In [24]:

    
rfc = RandomForestClassifier(n_jobs=-1, max_features='sqrt', n_estimators=50, oob_score=True)

# if isinstance(rfc, RandomForestClassifier):
#     print "test"

param_grid = {
    'n_estimators': [200, 700],
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth': [5, 15, 25]
}
start_time_g = time.time()
CV_rfc = GridSearchCV(estimator=rfc, param_grid=param_grid, cv=5)
CV_rfc.fit(X, y)
print CV_rfc.best_params_
bestParam = CV_rfc.best_params_
print "Best params for Random Forest: %s" % str(bestParam)
elapsed_time_g = time.time() - start_time_g
print "Grid Search for Random Forest took: %s" %str(elapsed_time_g)









    



{'max_features': 'auto', 'n_estimators': 200, 'max_depth': 5}
Best params for Random Forest: {'max_features': 'auto', 'n_estimators': 200, 'max_depth': 5}
Grid Search for Random Forest took: 179.549936056



In [26]:

    
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
# fix random seed for reproducibility
seed = 7
start_time = time.time()
clfKV = RandomForestClassifier(max_depth=bestParam['max_depth'], n_estimators=bestParam['n_estimators'], max_features=bestParam['max_features'], n_jobs=-1)
kfold = KFold(n_splits=10, shuffle=True, random_state=seed)

results = cross_val_score(clfKV, X, y, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))
elapsed_time = time.time() - start_time
print "Cross validation took: %s" %str(elapsed_time)









    



Baseline: 100.00% (0.00%)
Cross validation took: 9.33692383766



In [29]:

    
start_time = time.time()
clf = RandomForestClassifier(max_depth=bestParam['max_depth'], n_estimators=bestParam['n_estimators'], max_features=bestParam['max_features'], n_jobs=-1)
clf.fit(X, y)

# Apply the classifier we trained to the test data (which, remember, it has never seen before)
predict = clf.predict(X)
# print predict

# View the predicted probabilities of the first 10 observations
predProb = clf.predict_proba(X)
# print predProb

score = clf.score(X, y)
print "Training Score Random Forest: %s" %score

# # Create confusion matrix
# print pd.crosstab(test['species'], preds, rownames=['Actual Species'], colnames=['Predicted Species'])
#
# View a list of the features and their importance scores

expDir = '/Users/Gabriel/Documents/workspaces/diceWorkspace/dmon-adp/experiments'

fimp = list(zip(X, clf.feature_importances_))
print "Feature importance Random Forest Training: "
print fimp
elapsed_time = time.time() - start_time
print "Training Random Forest Took: %s" % str(elapsed_time)
dfimp = dict(fimp)
dfimp = pd.DataFrame(dfimp.items(), columns=['Metric', 'Importance'])
sdfimp = dfimp.sort('Importance', ascending=False)
dfimpCsv = 'Feature_Importance_RF_%s.csv' % 'CEP'
sdfimp.to_csv(os.path.join(expDir, dfimpCsv))









    



Training Score Random Forest: 1.0
Feature importance Random Forest Training: 
[('ms', 0.071915374255335965), ('ship', 0.1076995522711244), ('Anomaly', 0.011832881494625396), ('component=AIS_SENTENCE_LISTENER', 0.27740578868967797), ('component=RETRACT_OLD_AISGEOMDATA', 0.0017680835533956016), ('component=SESSION', 0.060450688192142221), ('component=SIMPLE_ANCHOR_IN', 0.0057620259298141307), ('component=SIMPLE_DOCK_START_OUT', 0.00029942165682853651), ('component=SIMPLE_DOCK_STOP', 0.050567356476212993), ('component=STOP_OVER_IN', 0.0098633548778294562), ('component=STOP_OVER_OUT', 0.010501388263294253), ('method=FIRE_ALL_RULES', 0.043842489251508458), ('method=HANDLE_MESSAGE', 0.34565299855458009), ('method=RETRACT', 0.002438596533630738)]
Training Random Forest Took: 1.65897488594






    



/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/ipykernel_launcher.py:30: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)



In [16]:

    
dt_g = DecisionTreeClassifier()

param_grid_dt = {
    'criterion': ['gini', 'random'],
    'splitter':['best', 'random'],
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth': [5, 15, 25, 50, 100],
    'min_sample_split': [2, 5, 10]
}

start_time_d = time.time()
CV_dt = GridSearchCV(estimator=dt_g, param_grid=param_grid_dt, cv=5)
CV_dt.fit(X, y)
print CV_dt.best_params_
bestParam_dt = CV_dt.best_params_
print "Best params for Decision Tree: %s" % str(bestParam_dt)
elapsed_time_dt = time.time() - start_time_d
print "Grid Search for Decision Tree took: %s" %str(elapsed_time_dt)









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-16-5affc87a6e2c> in <module>()
----> 1 dt_g = DecisionTreeClassifier()
      2 
      3 param_grid_dt = {
      4     'criterion': ['gini', 'random'],
      5     'splitter':['best', 'random'],

NameError: name 'DecisionTreeClassifier' is not defined



In [ ]:

    
dt = DecisionTreeClassifier(criterion=settings["criterion"], splitter=settings["splitter"],
                                    max_features=max_features, max_depth=max_depth,
                                    min_samples_split=float(settings["min_sample_split"]),
                                    min_weight_fraction_leaf=float(settings["min_weight_faction_leaf"]), random_state=settings["random_state"])

dt.fit(X, y)
predict = dt.predict(X)
print "Prediction for Decision Tree Training:"
print predict

predProb = dt.predict_proba(X)
print "Prediction probabilities for Decision Tree Training:"
print predProb

score = dt.score(X, y)
print "Decision Tree Training Score: %s" % str(score)

fimp = list(zip(X, dt.feature_importances_))
print "Feature importance Random Forest Training: "
print fimp
dfimp = dict(fimp)
dfimp = pd.DataFrame(dfimp.items(), columns=['Metric', 'Importance'])
sdfimp = dfimp.sort('Importance', ascending=False)
dfimpCsv = 'Feature_Importance_%s.csv' % mname
sdfimp.to_csv(os.path.join(self.modelDir, dfimpCsv))



In [45]:

    
dataDir2 = '/Users/Gabriel/Documents/workspaces/diceWorkspace/dmon-adp/data'
cep = os.path.join(dataDir, 'cep.csv')

dfCep = pd.read_csv(cep)
dfCep = dfCep.drop(dropList, axis=1)
dfCep.set_index('key', inplace=True)

#encode dataframe
col = []
for el, v in dfCep.dtypes.iteritems():
    # print el
    if v == 'object':
        col.append(el)
col

from sklearn.feature_extraction import DictVectorizer
def ohEncoding(data, cols, replace=False):
    vec = DictVectorizer()
    mkdict = lambda row: dict((col, row[col]) for col in cols)
    vecData = pd.DataFrame(vec.fit_transform(data[cols].apply(mkdict, axis=1)).toarray())
    vecData.columns = vec.get_feature_names()
    vecData.index = data.index
    if replace is True:
        data = data.drop(cols, axis=1)
        data = data.join(vecData)
    return data, vecData, vec

dfCep, t, v = ohEncoding(dfCep, col, replace=True)
dfCep









    Out[45]:






  
    
      
      ms
      ship
      component=AIS_SENTENCE_LISTENER
      component=RETRACT_OLD_AISGEOMDATA
      component=SESSION
      component=SIMPLE_ANCHOR_IN
      component=SIMPLE_ANCHOR_OUT
      component=SIMPLE_DOCK_STOP
      component=STOP_OVER_IN
      component=STOP_OVER_OUT
      method=FIRE_ALL_RULES
      method=HANDLE_MESSAGE
      method=RETRACT
      method=UPDATE_ACTIVE
    
    
      key
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      2017-06-22T11:52:54.063Z
      899
      305965000
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:52:54.063Z
      899
      305965000
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:52:54.063Z
      1068
      305965000
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:52:54.063Z
      1068
      305965000
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:52:54.066Z
      283
      305965000
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:52:54.066Z
      283
      305965000
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:52:54.066Z
      303
      305965000
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:52:54.066Z
      303
      305965000
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:52:54.067Z
      290
      305965000
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:52:54.067Z
      290
      305965000
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:52:54.067Z
      2428
      305965000
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:52:54.067Z
      2428
      305965000
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:52:54.076Z
      1064
      305965000
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:52:54.076Z
      1064
      305965000
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:52:54.076Z
      1021
      305965000
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:52:54.076Z
      1021
      305965000
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:52:54.077Z
      407
      636092524
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:52:54.077Z
      407
      636092524
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:52:54.077Z
      407
      636092524
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:52:54.077Z
      447
      636092524
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:52:54.077Z
      447
      636092524
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:52:54.077Z
      447
      636092524
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:52:54.077Z
      366
      636092524
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:52:54.077Z
      366
      636092524
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:52:54.077Z
      366
      636092524
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:52:54.078Z
      362
      636092524
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:52:54.080Z
      361
      636092524
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:52:54.080Z
      361
      636092524
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:52:54.080Z
      361
      636092524
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:52:54.080Z
      363
      636092524
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      2017-06-22T11:53:04.245Z
      766
      224161160
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
    
    
      2017-06-22T11:53:04.246Z
      199
      225366000
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
    
    
      2017-06-22T11:53:04.257Z
      365
      305965000
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:53:04.257Z
      365
      305965000
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:53:04.257Z
      297
      305965000
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:53:04.257Z
      297
      305965000
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:53:04.258Z
      340
      305965000
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:53:04.259Z
      303
      305965000
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:53:04.260Z
      686
      305965000
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:53:04.261Z
      618
      305965000
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:53:04.262Z
      755
      305965000
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:53:04.263Z
      9483
      305965000
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
    
    
      2017-06-22T11:53:04.267Z
      391
      305965000
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:53:04.268Z
      297
      305965000
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:53:04.269Z
      295
      305965000
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:53:04.270Z
      293
      305965000
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:53:04.270Z
      293
      305965000
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:53:04.270Z
      680
      305965000
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:53:04.270Z
      680
      305965000
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:53:04.271Z
      652
      305965000
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:53:04.272Z
      710
      305965000
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      2017-06-22T11:53:04.273Z
      8784
      305965000
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
    
    
      2017-06-22T11:53:04.274Z
      717
      224126750
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
    
    
      2017-06-22T11:53:04.275Z
      1140
      211636100
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
    
    
      2017-06-22T11:53:04.276Z
      661
      224133320
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
    
    
      2017-06-22T11:53:04.276Z
      661
      224133320
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
    
    
      2017-06-22T11:53:04.276Z
      294
      224133320
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
    
    
      2017-06-22T11:53:04.276Z
      294
      224133320
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
    
    
      2017-06-22T11:53:04.277Z
      221
      224181370
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
    
    
      2017-06-22T11:53:04.278Z
      209
      211636100
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
    
  

845 rows × 14 columns



In [47]:

    
cep_predict = clf.predict(dfCep)
cep_predict
# print predict

# View the predicted probabilities of the first 10 observations

# print predProb









    Out[47]:





array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])



In [48]:

    
#attach to dataframe labels
dfCep['Target'] = cep_predict
dfCep.to_csv(os.path.join(expDir, 'CEP_Anomaly_exp.csv'))

#dump serialized model
import cPickle as pickle
fname = os.path.join(expDir, 'CEP_Model_EXP')
pickle.dump(clf, open(fname, "wb"))



In [ ]:

	ms	ship	component=AIS_SENTENCE_LISTENER	component=RETRACT_OLD_AISGEOMDATA	component=SESSION	component=SIMPLE_ANCHOR_IN	component=SIMPLE_ANCHOR_OUT	component=SIMPLE_DOCK_STOP	component=STOP_OVER_IN	component=STOP_OVER_OUT	method=FIRE_ALL_RULES	method=HANDLE_MESSAGE	method=RETRACT	method=UPDATE_ACTIVE
key
2017-06-22T11:52:54.063Z	899	305965000	0.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	1.0
2017-06-22T11:52:54.063Z	899	305965000	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:52:54.063Z	1068	305965000	0.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	1.0
2017-06-22T11:52:54.063Z	1068	305965000	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:52:54.066Z	283	305965000	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:52:54.066Z	283	305965000	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:52:54.066Z	303	305965000	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:52:54.066Z	303	305965000	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:52:54.067Z	290	305965000	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:52:54.067Z	290	305965000	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:52:54.067Z	2428	305965000	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:52:54.067Z	2428	305965000	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:52:54.076Z	1064	305965000	0.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	1.0
2017-06-22T11:52:54.076Z	1064	305965000	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:52:54.076Z	1021	305965000	0.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	1.0
2017-06-22T11:52:54.076Z	1021	305965000	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:52:54.077Z	407	636092524	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:52:54.077Z	407	636092524	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:52:54.077Z	407	636092524	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:52:54.077Z	447	636092524	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:52:54.077Z	447	636092524	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:52:54.077Z	447	636092524	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:52:54.077Z	366	636092524	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:52:54.077Z	366	636092524	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:52:54.077Z	366	636092524	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:52:54.078Z	362	636092524	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:52:54.080Z	361	636092524	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:52:54.080Z	361	636092524	0.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	1.0
2017-06-22T11:52:54.080Z	361	636092524	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:52:54.080Z	363	636092524	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	1.0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
2017-06-22T11:53:04.245Z	766	224161160	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0
2017-06-22T11:53:04.246Z	199	225366000	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0
2017-06-22T11:53:04.257Z	365	305965000	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:53:04.257Z	365	305965000	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:53:04.257Z	297	305965000	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:53:04.257Z	297	305965000	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:53:04.258Z	340	305965000	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:53:04.259Z	303	305965000	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:53:04.260Z	686	305965000	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:53:04.261Z	618	305965000	0.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	1.0
2017-06-22T11:53:04.262Z	755	305965000	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:53:04.263Z	9483	305965000	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0
2017-06-22T11:53:04.267Z	391	305965000	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:53:04.268Z	297	305965000	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:53:04.269Z	295	305965000	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:53:04.270Z	293	305965000	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:53:04.270Z	293	305965000	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:53:04.270Z	680	305965000	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:53:04.270Z	680	305965000	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:53:04.271Z	652	305965000	0.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	1.0
2017-06-22T11:53:04.272Z	710	305965000	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0
2017-06-22T11:53:04.273Z	8784	305965000	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0
2017-06-22T11:53:04.274Z	717	224126750	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0
2017-06-22T11:53:04.275Z	1140	211636100	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0
2017-06-22T11:53:04.276Z	661	224133320	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0
2017-06-22T11:53:04.276Z	661	224133320	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0
2017-06-22T11:53:04.276Z	294	224133320	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0
2017-06-22T11:53:04.276Z	294	224133320	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0
2017-06-22T11:53:04.277Z	221	224181370	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0
2017-06-22T11:53:04.278Z	209	211636100	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0