notebook.community

Edit and run



In [ ]:

    
import pandas as pd
df = pd.read_csv('queryset_CNN.csv')
print(df.shape)
print(df.dtypes)
preds = []









    



(32, 3)
doc_id         int64
author_id      int64
authorList    object
dtype: object



In [ ]:

    
pred = []
for index, row in df.iterrows():
    doc_id = row.doc_id

    author_id = row.author_id

    import ast
    authorList = ast.literal_eval(row.authorList)
    
    candidate = len(authorList)
    
    algo = "tfidf_svc"

    test = algo # change before run

    level = "word"

    iterations = 30

    dropout = 0.5

    samples = 3200

    dimensions = 200

    loc = authorList.index(author_id)

    printstate = (("doc_id = %s, candidate = %s, ") % (str(doc_id), str(candidate)))
    printstate += (("samples = %s, ") % (str(samples)))
    printstate += (("test = %s") % (str(test)))

    print("Current test: %s" % (str(printstate)))
    
    from sshtunnel import SSHTunnelForwarder
    with SSHTunnelForwarder(('144.214.121.15', 22),
                            ssh_username='ninadt',
                            ssh_password='Ninad123',
                            remote_bind_address=('localhost', 3306),
                            local_bind_address=('localhost', 3300)):
        import UpdateDB as db
        case = db.checkOldML(doc_id = doc_id, candidate = candidate, samples = samples,
                             test = test, port = 3300)
        
    if case == False:
        
        print("Running: %12s" % (str(printstate)))

        import StyloML as Stylo
        (labels_index, train_acc, val_acc, samples) = Stylo.getResults(
            algo, 
            doc_id = doc_id, authorList = authorList[:], 
            samples = samples)

        (labels_index, testY, predY, samples) = Stylo.getTestResults(
            algo, labels_index = labels_index,
            doc_id = doc_id, authorList = authorList[:], 
            samples = samples)
        
        loc = testY

        test_acc = predY[loc]

        test_bin = 0

        if(predY.tolist().index(max(predY)) == testY):
            test_bin = 1
        
        from sshtunnel import SSHTunnelForwarder
        with SSHTunnelForwarder(('144.214.121.15', 22),
                                ssh_username='ninadt',
                                ssh_password='Ninad123',
                                remote_bind_address=('localhost', 3306),
                                local_bind_address=('localhost', 3300)):
            import UpdateDB as db
            case = db.updateresultOldML(doc_id = doc_id, candidate = candidate, samples = samples,
                                        train_acc = train_acc, val_acc = val_acc,
                                        test_acc = test_acc, test_bin = test_bin,
                                        test = test, port = 3300)
        
        del Stylo
        
        import time
        time.sleep(10)
        
        from IPython.display import clear_output

        clear_output()

    else:
        print("Skipped: %12s" % (str(printstate)))









    



Current test: doc_id = 85, candidate = 3, samples = 3200, test = tfidf_svc
Execution completed
Running: doc_id = 85, candidate = 3, samples = 3200, test = tfidf_svc
Algo: tfidf_svc
Execution completed
Read completed
Number of rows: 124
author_id       int64
doc_content    object
dtype: object
Data Frame created: Shape: (11578, 2)
Author:    44  Size:  4746
Author:    64  Size:  5106
Author:    82  Size:  1726
Min: 1726
Max: 5106
Authors [44, 64, 82].
Found 5178 texts.
Found 5178 labels.



In [ ]:

    
# import matplotlib.pyplot as plt
# # summarize history for accuracy
# plt.plot(history.history['acc'])
# plt.plot(history.history['val_acc'])
# plt.title('model accuracy')
# plt.ylabel('accuracy')
# plt.xlabel('epoch')
# plt.legend(['train', 'test'], loc='upper left')
# plt.show()



In [ ]:

    
# # summarize history for loss
# plt.plot(history.history['loss'])
# plt.plot(history.history['val_loss'])
# plt.title('model loss')
# plt.ylabel('loss')
# plt.xlabel('epoch')
# plt.legend(['train', 'test'], loc='upper left')
# plt.show()



In [ ]:

    
%tb



In [ ]: