In [8]:
# Read csv & model selection
import csv
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
# Train data with DL model
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.utils import plot_model
from keras.utils import np_utils

# Plot the eva results    
import matplotlib.pyplot as plt

# Load training & test data
input_test_data = []
input_train_data = []

def convert(data_array):
    feature_array = []
    label_array = []
    for d in data_array:
        features = [float(x) for x in d[0:len(d)-1]]
        label = d[len(d)-1]
        feature_array.append(features)
        label_array.append(label)
    return feature_array, label_array

# Please replace it with your local file (header removed)
# Load testing data (human-labelled)
with open('/Users/yjiang/Dropbox/DLData/humanlabelled.csv', newline='') as csvfile:
    data_iter = csv.reader(csvfile, delimiter=',', quotechar='|')
    input_test_data = [data for data in data_iter]
# Load traning data (user-generated)
with open('/Users/yjiang/Dropbox/DLData/backup/training_auto_new_only_query.csv', newline='') as csvfile:
    data_iter = csv.reader(csvfile, delimiter=',', quotechar='|')
    input_train_data = [data for data in data_iter]
        
# train, test = train_test_split(input_data, test_size = 0.3)
# test_data, test_labels = convert(test)
test_data_x, test_labels = convert(input_test_data)
train_data, train_labels = convert(input_train_data)

# scaler = preprocessing.StandardScaler().fit(test_data)
scaler = preprocessing.StandardScaler().fit(train_data)
train_data = scaler.transform(train_data)
test_data =  scaler.transform(test_data_x)
train_labels = np_utils.to_categorical(train_labels, num_classes=2)
test_labels = np_utils.to_categorical(test_labels, num_classes=2)


# Build NN 
model = Sequential()
model.add(Dense(8, input_dim=10, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(6, activation='relu'))
# model.add(Dense(5, activation='relu'))
model.add(Dense(4, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax'))
model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Batch processing
#==============================================================================
# model.fit(train_data, train_labels, epochs = 50, batch_size = 128)
# loss_and_metrics = model.evaluate(test_data, test_labels, batch_size = 128)
# print(loss_and_metrics)
# # Plot out the model shape
# plot_model(model, to_file='model.png', show_shapes = "true")
#==============================================================================

# Online learning
increment = 32
chunks_train_data = [train_data[x:x+increment] for x in range(0, len(train_data), increment)]
chunks_train_labels = [train_labels[x:x+increment] for x in range(0, len(train_labels), increment)]

test_accuracy = [];
test_loss = [];
train_accuracy = [];
train_loss = [];
for i, el in enumerate(chunks_train_data):
    print(i)
    train_loss_and_metrics = model.train_on_batch(el, chunks_train_labels[i])
    print(train_loss_and_metrics)
    train_loss.append(train_loss_and_metrics[0])
    train_accuracy.append(train_loss_and_metrics[1])
    test_loss_and_metrics = model.evaluate(test_data, test_labels, batch_size=128)
    print(test_loss_and_metrics)
    test_loss.append(test_loss_and_metrics[0])
    test_accuracy.append(test_loss_and_metrics[1])

fig = plt.figure()

ax1 = fig.add_subplot(211)
ax1.plot(train_loss)
ax1.plot(test_loss)
ax1.set_ylabel('Loss')
ax1.set_xlabel('Iteration')
ax1.legend(['training', 'testing'], loc='upper left')

ax2 = fig.add_subplot(212)
ax2.plot(train_accuracy)
ax2.plot(test_accuracy)
ax2.set_ylabel('Accuracy')
ax2.set_xlabel('Iteration')
ax2.legend(['training', 'testing'], loc='upper left')
plt.show()

#==============================================================================
# sample = np.array([[0,0,0,0,0,0,0,0,0,0]])
# prediction = model.predict(sample)
# # model.predict_classes(sample)
# print(prediction)
#==============================================================================


prediction = model.predict_classes(test_data)
output = np.column_stack((test_data_x, test_labels, prediction))
np.savetxt("/Users/yjiang/Dropbox/DLData/test_result.csv", output, delimiter=",")


0
[0.69803274, 0.546875]
22144/29982 [=====================>........] - ETA: 0s [0.66277485651311796, 0.63601494229871258]
1
[0.7060194, 0.875]
23168/29982 [======================>.......] - ETA: 0s[0.64793218215926862, 0.66092989129592639]
2
[0.65332502, 0.75]
29312/29982 [============================>.] - ETA: 0s[0.63391767243723185, 0.68791274764461308]
3
[0.71346104, 0.96875]
29696/29982 [============================>.] - ETA: 0s[0.62431863737379878, 0.70405576681726556]
4
[0.77706939, 0.9375]
28032/29982 [===========================>..] - ETA: 0s[0.61458545680695287, 0.72470148758305841]
5
[0.61320764, 0.9375]
29952/29982 [============================>.] - ETA: 0s[0.60682862775590263, 0.74017743982106787]
6
[0.60035884, 0.90625]
28032/29982 [===========================>..] - ETA: 0s[0.59950543663832701, 0.75281835770115557]
7
[0.6722306, 0.875]
28928/29982 [===========================>..] - ETA: 0s[0.59046745947988033, 0.76912814354087022]
8
[0.59196365, 0.9375]
28672/29982 [===========================>..] - ETA: 0s[0.584549367022271, 0.77850043360939358]
9
[0.62091583, 0.84375]
28160/29982 [===========================>..] - ETA: 0s[0.57900985106641933, 0.78860649724757648]
10
[0.64254475, 0.875]
29440/29982 [============================>.] - ETA: 0s[0.57365999470860185, 0.80091388164642663]
11
[0.62945938, 0.9375]
29982/29982 [==============================] - 0s     
[0.56907041510129086, 0.80911880460019892]
12
[0.58504021, 0.9375]
28160/29982 [===========================>..] - ETA: 0s[0.56194519547749433, 0.81965846172780876]
13
[0.61367476, 0.875]
26240/29982 [=========================>....] - ETA: 0s[0.55595823271359968, 0.82953105194860788]
14
[0.63125801, 0.96875]
28416/29982 [===========================>..] - ETA: 0s[0.55196361701663921, 0.83643519446193138]
15
[0.6567626, 0.875]
29824/29982 [============================>.] - ETA: 0s[0.54663486264735017, 0.84544059770387658]
16
[0.636388, 0.875]
28416/29982 [===========================>..] - ETA: 0s[0.54118162642469336, 0.85551330799671899]
17
[0.62054682, 0.9375]
28416/29982 [===========================>..] - ETA: 0s[0.53686125757504821, 0.86008271630837263]
18
[0.63736773, 0.90625]
25856/29982 [========================>.....] - ETA: 0s[0.5316467896669449, 0.86782069242737736]
19
[0.64410949, 0.84375]
29056/29982 [============================>.] - ETA: 0s[0.52632801115341876, 0.87829364286430622]
20
[0.56716883, 0.96875]
29696/29982 [============================>.] - ETA: 0s[0.52178446567127745, 0.88232939761870299]
21
[0.62309015, 0.90625]
29312/29982 [============================>.] - ETA: 0s[0.51579181486050496, 0.89030084718292368]
22
[0.68640387, 0.875]
27008/29982 [==========================>...] - ETA: 0s[0.50948757909327302, 0.89880595024476084]
23
[0.58093375, 0.9375]
26496/29982 [=========================>....] - ETA: 0s[0.50517053511404986, 0.90127409779996059]
24
[0.52226067, 0.95833331]
29056/29982 [============================>.] - ETA: 0s[0.50128313053140139, 0.90290841172164693]
28160/29982 [===========================>..] - ETA: 0s  

In [ ]:


In [ ]: