In [10]:
import tensorflow as tf
import sys
import math
import os
import numpy as np
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from getvector import getvector
from tensorflow.python.platform import gfile
import time
#from progress.bar import Bar
In [11]:
IMGDIR = "/Users/zhouyu/Documents/Zhou_Yu/DS/Galvanize/Capstone_data/test_data"
In [68]:
# build data inputs and labels
data_inputs = []
data_labels = []
In [69]:
# JPG images --> Inception-V3 --> 2048-dimensional vector, sequential method
image_dir = IMGDIR
file_list = []
file_glob = os.path.join(image_dir, '*.jpg')
file_list.extend(gfile.Glob(file_glob))
file_list = file_list[0:300]
start_time = time.time()
for file_name in file_list:
data_inputs.append(getvector(file_name))
if 'basset' in file_name:
data_labels.append([1, 0])
else:
data_labels.append([0, 1])
print " on average takes {}s to conver one image".format((time.time() - start_time)/len(file_list))
In [70]:
np.savetxt('data_inputs.txt', data_inputs)
np.savetxt('data_labels.txt', data_labels)
In [12]:
# if figures data already exists:
if os.path.isfile('./data_inputs.txt') and os.path.isfile('./data_labels.txt'):
data_inputs = np.loadtxt('data_inputs.txt')
data_labels = np.loadtxt('data_labels.txt')
In [13]:
# build models
# Splitting into train, val, and test
train_inputs, valtest_inputs, train_labels, valtest_labels = train_test_split(data_inputs, data_labels, test_size=0.3,
random_state=42, stratify=data_labels)
val_inputs, test_inputs, val_labels, test_labels = train_test_split(valtest_inputs, valtest_labels, test_size=0.4, random_state=43)
In [14]:
# Setting hyperparameters
learning_rate = 0.01
batch_size = 16
epochs = 10
log_batch_step = 50
# useful info
n_features = np.size(train_inputs, 1)
n_labels = np.size(train_labels, 1)
# Placeholders for input features and labels
inputs = tf.placeholder(tf.float32, (None, n_features))
labels = tf.placeholder(tf.float32, (None, n_labels))
# Setting up weights and bias
weights = tf.Variable(tf.truncated_normal((n_features, n_labels), stddev=0.1), name='weights')
bias = tf.Variable(tf.zeros(n_labels), name='bias')
tf.add_to_collection('vars', weights)
tf.add_to_collection('vars', bias)
# Setting up operation in fully connected layer
logits = tf.add(tf.matmul(inputs, weights), bias)
prediction = tf.nn.softmax(logits)
tf.add_to_collection('pred', prediction)
# Defining loss of network
difference = tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logits)
loss = tf.reduce_sum(difference)
# Setting optimiser
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
# Define accuracy
is_correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(is_correct_prediction, tf.float32))
saver = tf.train.Saver((weights, bias))
In [15]:
saver = tf.train.Saver((weights, bias))
In [16]:
# training
with tf.Session() as sess:
init = tf.global_variables_initializer()
sess.run(init)
# Running the training in batches
batch_count = int(math.ceil(len(train_inputs)/batch_size))
for epoch_i in range(epochs):
batches_pbar = tqdm(range(batch_count), desc='Epoch {:>2}/{}'.format(epoch_i+1, epochs), unit='batches')
# The training cycle
for batch_i in batches_pbar:
# Get a batch of training features and labels
batch_start = batch_i*batch_size
batch_inputs = train_inputs[batch_start:batch_start + batch_size]
batch_labels = train_labels[batch_start:batch_start + batch_size]
# Run optimizer
_ = sess.run(optimizer, feed_dict={inputs: batch_inputs, labels: batch_labels})
# Check accuracy against validation data
val_accuracy, val_loss = sess.run([accuracy, loss], feed_dict={inputs: val_inputs, labels: val_labels})
print("After epoch {}, Loss: {}, Accuracy: {}".format(epoch_i+1, val_loss, val_accuracy))
g = tf.get_default_graph()
saver.save(sess, 'testsave')
In [17]:
# to make one test prediction
TESTDIR = "/Users/zhouyu/Documents/Zhou_Yu/DS/Galvanize/Capstone_data/test_of_test"
test_list = []
test_glob = os.path.join(TESTDIR, '*.jpg')
test_list.extend(gfile.Glob(test_glob))
In [18]:
test_file_name = test_list[5]
print test_file_name
In [19]:
image_input = getvector(test_file_name).reshape((1,2048))
start_time = time.time()
with tf.Session() as sess:
#new_saver = tf.train.import_meta_graph('testsave.meta')
#new_saver.restore(sess, tf.train.latest_checkpoint('./'))
predict_res = sess.run(prediction, feed_dict={inputs: image_input})
print ('It\'s a Basset: {:.3f}, It\'s a Pit Bull: {:.3f}'.format(predict_res[0][0], predict_res[0][1]))
print "time spent on predicting one picture is: {:.2f}s".format(time.time() - start_time)
In [ ]: