In [ ]:
import pandas as pd
df = pd.read_csv('queryset_CNN.csv')
print(df.shape)
print(df.dtypes)
preds = []
In [ ]:
pred = []
for index, row in df.iterrows():
doc_id = row.doc_id
author_id = row.author_id
import ast
authorList = ast.literal_eval(row.authorList)
candidate = len(authorList)
algo = "tfidf_svc"
test = algo # change before run
level = "word"
iterations = 30
dropout = 0.5
samples = 3200
dimensions = 200
loc = authorList.index(author_id)
printstate = (("doc_id = %s, candidate = %s, ") % (str(doc_id), str(candidate)))
printstate += (("samples = %s, ") % (str(samples)))
printstate += (("test = %s") % (str(test)))
print("Current test: %s" % (str(printstate)))
from sshtunnel import SSHTunnelForwarder
with SSHTunnelForwarder(('144.214.121.15', 22),
ssh_username='ninadt',
ssh_password='Ninad123',
remote_bind_address=('localhost', 3306),
local_bind_address=('localhost', 3300)):
import UpdateDB as db
case = db.checkOldML(doc_id = doc_id, candidate = candidate, samples = samples,
test = test, port = 3300)
if case == False:
print("Running: %12s" % (str(printstate)))
import StyloML as Stylo
(labels_index, train_acc, val_acc, samples) = Stylo.getResults(
algo,
doc_id = doc_id, authorList = authorList[:],
samples = samples)
(labels_index, testY, predY, samples) = Stylo.getTestResults(
algo, labels_index = labels_index,
doc_id = doc_id, authorList = authorList[:],
samples = samples)
loc = testY
test_acc = predY[loc]
test_bin = 0
if(predY.tolist().index(max(predY)) == testY):
test_bin = 1
from sshtunnel import SSHTunnelForwarder
with SSHTunnelForwarder(('144.214.121.15', 22),
ssh_username='ninadt',
ssh_password='Ninad123',
remote_bind_address=('localhost', 3306),
local_bind_address=('localhost', 3300)):
import UpdateDB as db
case = db.updateresultOldML(doc_id = doc_id, candidate = candidate, samples = samples,
train_acc = train_acc, val_acc = val_acc,
test_acc = test_acc, test_bin = test_bin,
test = test, port = 3300)
del Stylo
import time
time.sleep(10)
from IPython.display import clear_output
clear_output()
else:
print("Skipped: %12s" % (str(printstate)))
In [ ]:
# import matplotlib.pyplot as plt
# # summarize history for accuracy
# plt.plot(history.history['acc'])
# plt.plot(history.history['val_acc'])
# plt.title('model accuracy')
# plt.ylabel('accuracy')
# plt.xlabel('epoch')
# plt.legend(['train', 'test'], loc='upper left')
# plt.show()
In [ ]:
# # summarize history for loss
# plt.plot(history.history['loss'])
# plt.plot(history.history['val_loss'])
# plt.title('model loss')
# plt.ylabel('loss')
# plt.xlabel('epoch')
# plt.legend(['train', 'test'], loc='upper left')
# plt.show()
In [ ]:
%tb
In [ ]: