In [2]:
%matplotlib inline
from matplotlib import pyplot as plt
import joblib
import numpy as np
from sklearn import metrics
from sklearn import cross_validation
import pymongo

In [18]:
cm = pymongo.MongoClient("afruizc-office.cs.unm.edu")
cm.malware.authenticate("populator", "malware_challenge")
db = cm.malware

data_train = [('\n'.join(x['ida_comments']), x['class'])
                          for x in db.samples.find({
                          "id": {"$exists": True}, # The id field exists
                          "ida_comments ": {"$ne": ""}})]
                            # ^ For non-empty field
model = joblib.load('../model.pkl')
data, train = zip(*data_train)
X_train, X_test, y_train, y_test = cross_validation.train_test_split(data, train, test_size=0.4)
y_pred = model.predict(X_test)
conf_mat = metrics.confusion_matrix(y_test, y_pred)
%matplotlib inline
from matplotlib import pyplot as plt
plt.matshow(conf_mat)
plt.colorbar()


Out[18]:
<matplotlib.colorbar.Colorbar at 0x7fe6363d3dd8>

In [23]:
cm = pymongo.MongoClient("afruizc-office.cs.unm.edu")
cm.malware.authenticate("populator", "malware_challenge")
db = cm.malware

model = joblib.load('../../first_model/model_vc_tfid_sgd_baseline/model.pkl')
data, train = zip(*data_train)
X_train, X_test, y_train, y_test = cross_validation.train_test_split(data, train, test_size=0.4)
y_pred = model.predict(X_test)
conf_mat = metrics.confusion_matrix(y_test, y_pred)
plt.matshow(conf_mat)
plt.colorbar()


Out[23]:
<matplotlib.colorbar.Colorbar at 0x7fe611aa5780>