In [1]:
import pandas as pd
df = pd.read_csv('queryset_CNN.csv')
print(df.shape)
print(df.dtypes)
In [2]:
# for index, row in df.iterrows():
# doc_id = row.doc_id
# import ast
# authorList = ast.literal_eval(row.authorList)
authorList = [11, 18, 80, 88, 64, 44, 91, 19, 97]
doc_id = 1
parameters = {
'candidate': [2, 3, 4, 5, 6, 7, 8, 9],
'samples': [320, 1600, 3200],
'dropout': [0.0, 0.1, 0.2, 0.3, 0.4, 0.5],
'dimensions': [50, 100, 200],
'iterations': [10, 20, 40, 80, 120, 240],
'cv': [320, 1600]#, 3200],
}
# print('doc_id: %3s actual author: %4s author_id: %s' % (str(doc_id), str(row.author_id), str(authorList)))
level = 'word'
for idxp, paralist in parameters.iteritems():
if idxp == 'samples':
for idxl, val in enumerate(paralist):
authorList = [11, 18, 80, 88, 64, 44, 91, 19, 97]
doc_id = 1
candidate = 4
test = idxp # change before run
level = "word"
iterations = 30
dropout = 0.5
samples = val
dimensions = 200
# loc = authorList.index(author_id)
printstate = (("doc_id = %s, candidate = %s, ") % (str(doc_id), str(candidate)))
printstate += (("dimensions = %s, samples = %s, ") % (str(dimensions), str(samples)))
printstate += (("\niterations = %s, dropout = %s, test = %s") % (str(iterations), str(dropout), str(test)))
print("Current test: %s" % (str(printstate)))
import UpdateDB as db
case = db.checkOldCNN(doc_id = doc_id, candidate = candidate, dimensions = dimensions,
samples = samples,iterations = iterations, dropout = dropout,
test = test)
if case == False:
print("Running: %12s" % (str(printstate)))
import StyloNeural as Stylo
(labels_index, history, train_acc, val_acc, samples) = Stylo.getResults(
doc_id = doc_id, authorList = authorList[:candidate],
level = level, glove = '../../glove/', dimensions = dimensions,
samples = samples, nb_epoch = iterations, dropout = dropout, batch_size = 10 )
# for key, auth in labels_index.iteritems():
# if auth == author_id:
# loc = key
# ans = predY[loc]
# count = 0
# for train_acc, val_acc in zip(train_acc_list, val_acc_list):
import UpdateDB as db
case = db.updateresultOldCNN(doc_id = doc_id, candidate = candidate, dimensions = dimensions,
samples = samples, iterations = iterations, dropout = dropout,
train_acc = train_acc, val_acc = val_acc,
test = str(test))# + '-fold-' + str(count)))
# count = count + 1
# if case:
# ans = 0.0
# if (predY.tolist().index(max(predY)) == loc):
# ans = 1.0
#
# import UpdateDB as db
# case = db.updateresultOldCNN(doc_id = doc_id, candidate = candidate, dimensions = dimensions,
# samples = samples, iterations = iterations, dropout = dropout,
# accuracy = ans, test = str(test + 'binary'))
# else:
# print("Skipped: %12s" % (str(printstate)))
del Stylo
from keras import backend as K
K.clear_session()
import time
time.sleep(10)
from IPython.display import clear_output
clear_output()
else:
print("Skipped: %12s" % (str(printstate)))
In [3]:
%tb
In [ ]: