In [1]:
import pandas as pd
df = pd.read_csv('queryset_CNN_SVC.csv')
print(df.shape)
print(df.dtypes)


(28, 3)
doc_id         int64
author_id      int64
authorList    object
dtype: object

In [2]:
for index, row in df.iterrows():
    doc_id = row.doc_id

    author_id = row.author_id

    import ast
    authorList = ast.literal_eval(row.authorList)
    
    candidate = len(authorList)

    test = "batch10" # change before run

    level = "word"

    iterations = 30

    dropout = 0.5

    samples = 3200

    dimensions = 200

    loc = authorList.index(author_id)

    printstate = (("doc_id = %s, candidate = %s, ") % (str(doc_id), str(candidate)))
    printstate += (("dimensions = %s, samples = %s, ") % (str(dimensions), str(samples)))
    printstate += (("\niterations = %s, dropout = %s, test = %s") % (str(iterations), str(dropout), str(test)))

    print("Current test: %s" % (str(printstate)))
    
    import UpdateDB as db
    case = db.checkCNN(doc_id = doc_id, candidate = candidate, dimensions = dimensions,
                       samples = samples, iterations = iterations, dropout = dropout,
                       test = test)

    if case == False:

        print("Running: %12s" % (str(printstate)))

        import StyloNeural as Stylo
        (labels_index, history, train_acc, val_acc, samples) = Stylo.getResults(
            doc_id = doc_id, authorList = authorList[:], 
            level = level, glove = '../../glove/', dimensions = dimensions, 
            samples = samples, nb_epoch = iterations, dropout = dropout, batch_size = 10 )

        (predYList, predY, testY) = Stylo.getTestResults(
            doc_id = doc_id, authorList = authorList[:], labels_index = labels_index,
            level = level, glove = '../../glove/', dimensions = dimensions, 
            samples = samples, nb_epoch = iterations, dropout = dropout, batch_size = 10 )

        loc = testY
        
        test_acc = predY[loc]

        test_bin = 0

        if(predY.tolist().index(max(predY)) == testY):
            test_bin = 1
        
        import UpdateDB as db
        case = db.updateresultCNN(doc_id = doc_id, candidate = candidate, dimensions = dimensions,
                                  samples = samples, iterations = iterations, dropout = dropout,
                                  train_acc = train_acc, val_acc = val_acc,
                                  test_acc = test_acc, test_bin = test_bin,
                                  test = test)
                                     
        del Stylo

        from keras import backend as K
        K.clear_session()

        import time
        time.sleep(10)
        
        from IPython.display import clear_output

        clear_output()

    else:
        print("Skipped: %12s" % (str(printstate)))


# import pandas as pd
# df = pd.DataFrame(output)
# df.to_csv("styloout.csv", index = False, encoding='utf-8')

import time
time.sleep(10)


Current test: doc_id = 2171, candidate = 2, dimensions = 200, samples = 3200, 
iterations = 30, dropout = 0.5, test = batch10
Execution completed
Running: doc_id = 2171, candidate = 2, dimensions = 200, samples = 3200, 
iterations = 30, dropout = 0.5, test = batch10
Level = Word
File used: glove.6B.200d.txt
Found 400000 word vectors.
Execution completed
Read completed
Number of rows: 106
author_id       int64
doc_content    object
dtype: object
Data Frame created: Shape: (13564, 2)
Author:    21  Size:  7772
Author:    88  Size:  5792
Min: 5792
Max: 7772
Authors [21, 88].
Found 6400 texts.
Found 6400 labels.
Found 53717 unique tokens.
Shape of data tensor: (6400, 1000)
Shape of label tensor: (6400, 2)
Done compiling.
Train on 5120 samples, validate on 1280 samples
Epoch 1/30
5110/5120 [============================>.] - ETA: 0s - loss: 0.4981 - acc: 0.7411  Epoch 00000: val_acc improved from -inf to 0.88359, saving model to author-cnn-ngrams-word.hdf5
5120/5120 [==============================] - 54s - loss: 0.4973 - acc: 0.7416 - val_loss: 0.2873 - val_acc: 0.8836
Epoch 2/30
5110/5120 [============================>.] - ETA: 0s - loss: 0.2571 - acc: 0.8975 Epoch 00001: val_acc did not improve
5120/5120 [==============================] - 52s - loss: 0.2571 - acc: 0.8977 - val_loss: 0.3048 - val_acc: 0.8703
Epoch 3/30
5110/5120 [============================>.] - ETA: 0s - loss: 0.1824 - acc: 0.9303 Epoch 00002: val_acc improved from 0.88359 to 0.92187, saving model to author-cnn-ngrams-word.hdf5
5120/5120 [==============================] - 54s - loss: 0.1823 - acc: 0.9303 - val_loss: 0.2197 - val_acc: 0.9219
Epoch 4/30
5110/5120 [============================>.] - ETA: 0s - loss: 0.1401 - acc: 0.9452 Epoch 00003: val_acc did not improve
5120/5120 [==============================] - 53s - loss: 0.1403 - acc: 0.9449 - val_loss: 0.2458 - val_acc: 0.9008
Epoch 5/30
5110/5120 [============================>.] - ETA: 0s - loss: 0.1040 - acc: 0.9603 Epoch 00004: val_acc did not improve
5120/5120 [==============================] - 53s - loss: 0.1039 - acc: 0.9604 - val_loss: 0.2577 - val_acc: 0.9047
Epoch 6/30
5110/5120 [============================>.] - ETA: 0s - loss: 0.0936 - acc: 0.9681 Epoch 00005: val_acc improved from 0.92187 to 0.92344, saving model to author-cnn-ngrams-word.hdf5
5120/5120 [==============================] - 54s - loss: 0.0934 - acc: 0.9682 - val_loss: 0.2594 - val_acc: 0.9234
Epoch 7/30
5110/5120 [============================>.] - ETA: 0s - loss: 0.0752 - acc: 0.9710     Epoch 00006: val_acc did not improve
5120/5120 [==============================] - 53s - loss: 0.0751 - acc: 0.9711 - val_loss: 0.2856 - val_acc: 0.9172
Epoch 8/30
5110/5120 [============================>.] - ETA: 0s - loss: 0.0696 - acc: 0.9750 Epoch 00007: val_acc did not improve
5120/5120 [==============================] - 53s - loss: 0.0695 - acc: 0.9750 - val_loss: 0.3039 - val_acc: 0.9008
Epoch 9/30
5110/5120 [============================>.] - ETA: 0s - loss: 0.0576 - acc: 0.9791 Epoch 00008: val_acc improved from 0.92344 to 0.93828, saving model to author-cnn-ngrams-word.hdf5
5120/5120 [==============================] - 54s - loss: 0.0575 - acc: 0.9791 - val_loss: 0.1671 - val_acc: 0.9383
Epoch 10/30
5110/5120 [============================>.] - ETA: 0s - loss: 0.0456 - acc: 0.9845 Epoch 00009: val_acc did not improve
5120/5120 [==============================] - 52s - loss: 0.0455 - acc: 0.9846 - val_loss: 0.2057 - val_acc: 0.9383
Epoch 11/30
5110/5120 [============================>.] - ETA: 0s - loss: 0.0366 - acc: 0.9883 Epoch 00010: val_acc did not improve
5120/5120 [==============================] - 52s - loss: 0.0366 - acc: 0.9883 - val_loss: 0.3231 - val_acc: 0.9156
Epoch 12/30
5110/5120 [============================>.] - ETA: 0s - loss: 0.0339 - acc: 0.9881 Epoch 00011: val_acc did not improve
5120/5120 [==============================] - 53s - loss: 0.0338 - acc: 0.9881 - val_loss: 0.2386 - val_acc: 0.9320
Epoch 13/30
5110/5120 [============================>.] - ETA: 0s - loss: 0.0374 - acc: 0.9853 Epoch 00012: val_acc did not improve
5120/5120 [==============================] - 53s - loss: 0.0374 - acc: 0.9854 - val_loss: 0.2483 - val_acc: 0.9227
Epoch 14/30
5110/5120 [============================>.] - ETA: 0s - loss: 0.0303 - acc: 0.9912     Epoch 00013: val_acc did not improve
5120/5120 [==============================] - 52s - loss: 0.0303 - acc: 0.9912 - val_loss: 0.2763 - val_acc: 0.9258
Epoch 15/30
5110/5120 [============================>.] - ETA: 0s - loss: 0.0258 - acc: 0.9908     Epoch 00014: val_acc did not improve
5120/5120 [==============================] - 52s - loss: 0.0258 - acc: 0.9908 - val_loss: 0.3112 - val_acc: 0.9172
Epoch 16/30
5110/5120 [============================>.] - ETA: 0s - loss: 0.0220 - acc: 0.9945 Epoch 00015: val_acc did not improve
5120/5120 [==============================] - 52s - loss: 0.0220 - acc: 0.9945 - val_loss: 0.3365 - val_acc: 0.9242
Epoch 17/30
5110/5120 [============================>.] - ETA: 0s - loss: 0.0179 - acc: 0.9947     Epoch 00016: val_acc did not improve
5120/5120 [==============================] - 52s - loss: 0.0179 - acc: 0.9947 - val_loss: 0.3766 - val_acc: 0.9164
Epoch 18/30
5110/5120 [============================>.] - ETA: 0s - loss: 0.0237 - acc: 0.9924     Epoch 00017: val_acc did not improve
5120/5120 [==============================] - 52s - loss: 0.0237 - acc: 0.9924 - val_loss: 0.3143 - val_acc: 0.9195
Epoch 19/30
5110/5120 [============================>.] - ETA: 0s - loss: 0.0219 - acc: 0.9933     Epoch 00018: val_acc improved from 0.93828 to 0.93984, saving model to author-cnn-ngrams-word.hdf5
5120/5120 [==============================] - 54s - loss: 0.0218 - acc: 0.9934 - val_loss: 0.2447 - val_acc: 0.9398
Epoch 20/30
5110/5120 [============================>.] - ETA: 0s - loss: 0.0128 - acc: 0.9963     Epoch 00019: val_acc did not improve
5120/5120 [==============================] - 52s - loss: 0.0128 - acc: 0.9963 - val_loss: 0.2782 - val_acc: 0.9367
Epoch 21/30
5110/5120 [============================>.] - ETA: 0s - loss: 0.0185 - acc: 0.9951     Epoch 00020: val_acc did not improve
5120/5120 [==============================] - 52s - loss: 0.0185 - acc: 0.9951 - val_loss: 0.4668 - val_acc: 0.9016
Epoch 22/30
5110/5120 [============================>.] - ETA: 0s - loss: 0.0158 - acc: 0.9951     Epoch 00021: val_acc improved from 0.93984 to 0.94297, saving model to author-cnn-ngrams-word.hdf5
5120/5120 [==============================] - 54s - loss: 0.0158 - acc: 0.9951 - val_loss: 0.2104 - val_acc: 0.9430
Epoch 23/30
5110/5120 [============================>.] - ETA: 0s - loss: 0.0152 - acc: 0.9953     Epoch 00022: val_acc did not improve
5120/5120 [==============================] - 52s - loss: 0.0152 - acc: 0.9953 - val_loss: 0.2854 - val_acc: 0.9320
Epoch 24/30
5110/5120 [============================>.] - ETA: 0s - loss: 0.0165 - acc: 0.9955     Epoch 00023: val_acc did not improve
5120/5120 [==============================] - 52s - loss: 0.0164 - acc: 0.9955 - val_loss: 0.2930 - val_acc: 0.9187
Epoch 25/30
5110/5120 [============================>.] - ETA: 0s - loss: 0.0153 - acc: 0.9949     Epoch 00024: val_acc did not improve
5120/5120 [==============================] - 52s - loss: 0.0155 - acc: 0.9947 - val_loss: 0.2495 - val_acc: 0.9422
Epoch 26/30
5110/5120 [============================>.] - ETA: 0s - loss: 0.0183 - acc: 0.9949 Epoch 00025: val_acc did not improve
5120/5120 [==============================] - 52s - loss: 0.0182 - acc: 0.9949 - val_loss: 0.2333 - val_acc: 0.9414
Epoch 27/30
5110/5120 [============================>.] - ETA: 0s - loss: 0.0155 - acc: 0.9957     Epoch 00026: val_acc did not improve
5120/5120 [==============================] - 52s - loss: 0.0155 - acc: 0.9957 - val_loss: 0.2146 - val_acc: 0.9336
Epoch 28/30
5110/5120 [============================>.] - ETA: 0s - loss: 0.0138 - acc: 0.9967     Epoch 00027: val_acc did not improve
5120/5120 [==============================] - 52s - loss: 0.0138 - acc: 0.9967 - val_loss: 0.3140 - val_acc: 0.9242
Epoch 29/30
5110/5120 [============================>.] - ETA: 0s - loss: 0.0118 - acc: 0.9969     Epoch 00028: val_acc did not improve
5120/5120 [==============================] - 52s - loss: 0.0118 - acc: 0.9969 - val_loss: 0.2418 - val_acc: 0.9336
Epoch 30/30
5110/5120 [============================>.] - ETA: 0s - loss: 0.0088 - acc: 0.9971     Epoch 00029: val_acc did not improve
5120/5120 [==============================] - 52s - loss: 0.0088 - acc: 0.9971 - val_loss: 0.4674 - val_acc: 0.9102
5088/5120 [============================>.] - ETA: 0s 

Final Train Accuracy: 99.98
1248/1280 [============================>.] - ETA: 0s
Final Test Accuracy: 94.30
Level = Word
File used: glove.6B.200d.txt
Found 400000 word vectors.
Found 53717 unique tokens.
2017-03-16 05:22:22,815| ERROR   | Could not resolve IP address for srn02.cs.cityu.edu.hk, aborting!
---------------------------------------------------------------------------
BaseSSHTunnelForwarderError               Traceback (most recent call last)
<ipython-input-2-31163daad0a5> in <module>()
     48             doc_id = doc_id, authorList = authorList[:], labels_index = labels_index,
     49             level = level, glove = '../../glove/', dimensions = dimensions,
---> 50             samples = samples, nb_epoch = iterations, dropout = dropout, batch_size = 10 )
     51 
     52         loc = testY

/home/ninadt/Data/deepstylometry-python/fyp/ipynb/StyloNeural.pyc in getTestResults(authorList, doc_id, labels_index, chunk_size, nb_epoch, level, glove, samples, dimensions, dropout, batch_size)
     57         md.makeTokenizer()
     58 
---> 59         (testX, testY) = md.loadDocData(authorList, doc_id, chunk_size = chunk_size)
     60 
     61         (testX, testY) = md.preProcessTest(testX, labels_index, testY, chunk_size = chunk_size)

/home/ninadt/Data/deepstylometry-python/fyp/ipynb/CNNModelCreatorWord.pyc in loadDocData(authorList, doc_id, chunk_size)
     95                             ssh_password='stylometry',
     96                             remote_bind_address=('localhost', 5432),
---> 97                             local_bind_address=('localhost', 5400)):
     98         textToUse = DatabaseQuery.getWordDocData(5400, doc_id, documentTable = documentTable,
     99                                                  chunk_size = chunk_size)

/home/ninadt/anaconda2/lib/python2.7/site-packages/sshtunnel.pyc in __enter__(self)
   1480     def __enter__(self):
   1481         try:
-> 1482             self.start()
   1483             return self
   1484         except KeyboardInterrupt:

/home/ninadt/anaconda2/lib/python2.7/site-packages/sshtunnel.pyc in start(self)
   1222         if not self.is_active:
   1223             self._raise(BaseSSHTunnelForwarderError,
-> 1224                         reason='Could not establish session to SSH gateway')
   1225         for _srv in self._server_list:
   1226             thread = threading.Thread(

/home/ninadt/anaconda2/lib/python2.7/site-packages/sshtunnel.pyc in _raise(self, exception, reason)
   1034     def _raise(self, exception=BaseSSHTunnelForwarderError, reason=None):
   1035         if self._raise_fwd_exc:
-> 1036             raise exception(reason)
   1037         else:
   1038             self.logger.error(repr(exception(reason)))

BaseSSHTunnelForwarderError: Could not establish session to SSH gateway

In [ ]:
%tb

In [ ]: