In [1]:
from keras.models import load_model
model_best = load_model("bigtfifd_best.hdf5")
In [2]:
from scipy.io import mmread
import numpy as np
In [3]:
sparse_mat_train_test = mmread("../data/features/tfifd_4gram_hashed_full_features.mtx")
In [4]:
# load training classes
classes = np.load("../data/features/train_classes.npy")
# convert csr to a numpy array
sparse = sparse_mat_train_test.toarray()
X_test = sparse[classes.shape[0]:,:]
print X_test.shape
In [5]:
class_preds = model_best.predict_classes(X_test)
In [6]:
malware_classes = ["Agent", "AutoRun", "FraudLoad", "FraudPack", "Hupigon", "Krap",
"Lipler", "Magania", "None", "Poison", "Swizzor", "Tdss",
"VB", "Virut", "Zbot"]
# a function for writing predictions in the required format
def write_predictions(predictions, ids, outfile):
"""
assumes len(predictions) == len(ids), and that predictions[i] is the
index of the predicted class with the malware_classes list above for
the executable corresponding to ids[i].
outfile will be overwritten
"""
with open(outfile,"w+") as f:
# write header
f.write("Id,Prediction\n")
for i, history_id in enumerate(ids):
f.write("%s,%d\n" % (history_id, predictions[i]))
def classes_to_Y(classes):
output = []
for cls in classes:
output.append(malware_classes.index(cls))
return np.array(output)
In [9]:
test_ids = np.load("../data/features/test_ids.npy")
print test_ids
print class_preds
write_predictions(class_preds, test_ids, "../predictions/tfidf_deepnet.csv")
In [10]:
variance = np.var(sparse, axis=0)
In [11]:
print variance
In [12]:
print variance.mean()
In [13]:
print sparse
In [14]:
print variance.max()
In [15]:
print variance.min()
In [24]:
test = np.zeros(len(sparse))
test[0] = .0005
print np.var(test)
In [82]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.hist(variance, bins=10000);
plt.xscale('log')
In [ ]:
filtered = sparse[:,variance > 1 * 10 ** -9]
In [31]:
class_w = {}
In [27]:
n = len(classes)
In [71]:
r = .0130 * n
In [34]:
t = .5214 * n
In [72]:
t / r
Out[72]:
In [74]:
class_w[15] = 1
In [75]:
class_w
Out[75]:
In [ ]:
print "blah"
In [ ]: