In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
from scipy import misc
import tensorflow as tf
import time
dic_val = {'violence': 0, 'fearful': 1, 'no eye contact': 2, 'not alone': 3, 'depression': 4}
In [2]:
def getdata(folders = ['data1', 'data2']):
images, labels = [], []
for i in folders:
nested_imgs, nested_labels = [], []
list_images = os.listdir(os.getcwd() + '/' + i)
del list_images[list_images.index('label.txt')]
list_images = [k[:-4] for k in list_images]
list_images.sort(key = int)
list_images = [os.getcwd() + '/' + i + '/' + k + '.jpg' for k in list_images]
nested_imgs += list_images
with open(os.getcwd() + '/' + i + '/label.txt') as fopen:
data = fopen.read().split('\n')
data = filter(None, data)
for k in data:
k = k.lower()
k = k.split(',')
k = filter(None, k)
k = [n.strip() for n in k]
label = np.zeros((1, len(dic_val)), dtype = np.float32)
for n in k:
label[0, dic_val[n]] = 1.0
nested_labels.append(label)
images += nested_imgs; labels += nested_labels
return images, labels
In [3]:
class Model:
def __init__(self, learning_rate, beta, picture_size, label_size):
self.X = tf.placeholder(tf.float32, (None, picture_size, picture_size, 3))
self.Y = tf.placeholder(tf.float32, (None, label_size))
fully_connected1 = tf.Variable(tf.random_normal([7 * 7 * 16, 512], stddev = 0.5))
fully_b1 = tf.zeros([512])
fully_connected2 = tf.Variable(tf.random_normal([512, 32], stddev = 0.5))
fully_b2 = tf.zeros([32])
fully_connected3 = tf.Variable(tf.random_normal([32, label_size], stddev = 0.5))
fully_b3 = tf.zeros([label_size])
conv1 = tf.layers.conv2d(self.X, 64, (3, 3), padding = 'same', activation = tf.nn.tanh)
maxpool1 = tf.layers.max_pooling2d(conv1, (2, 2), (2, 2), padding = 'same')
maxpool1 = tf.layers.batch_normalization(maxpool1)
conv2 = tf.layers.conv2d(maxpool1, 32, (3, 3), padding = 'same', activation = tf.nn.tanh)
maxpool2 = tf.layers.max_pooling2d(conv2, (2, 2), (2, 2), padding = 'same')
maxpool2 = tf.layers.batch_normalization(maxpool2)
conv3 = tf.layers.conv2d(maxpool2, 16, (3, 3), padding = 'same', activation = tf.nn.tanh)
maxpool3 = tf.layers.max_pooling2d(conv3, (2,2), (2, 2), padding = 'same')
maxpool3 = tf.layers.batch_normalization(maxpool3)
conv4 = tf.layers.conv2d(maxpool3, 16, (3, 3), padding = 'same', activation = tf.nn.tanh)
maxpool4 = tf.layers.max_pooling2d(conv4, (2, 2), (2, 2), padding = 'same')
maxpool4 = tf.layers.batch_normalization(maxpool4)
conv5 = tf.layers.conv2d(maxpool4, 16, (3, 3), padding = 'same', activation = tf.nn.tanh)
maxpool5 = tf.layers.max_pooling2d(conv5, (2, 2), (2, 2), padding = 'same')
maxpool5 = tf.reshape(maxpool5, [-1, 7 * 7 * 16])
maxpool5 = tf.layers.batch_normalization(maxpool5)
linear = tf.nn.tanh(tf.matmul(maxpool5, fully_connected1) + fully_b1)
linear = tf.layers.batch_normalization(linear)
linear = tf.nn.tanh(tf.matmul(linear, fully_connected2) + fully_b2)
linear = tf.layers.batch_normalization(linear)
self.logits = tf.matmul(linear, fully_connected3) + fully_b3
self.cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels = self.Y, logits = self.logits))
self.cost += beta * tf.nn.l2_loss(fully_connected1) + beta * tf.nn.l2_loss(fully_connected2) + beta * tf.nn.l2_loss(fully_connected3)
self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)
In [4]:
image_size = 200
learning_rate = 0.0001
beta = 0.00005
epoch = 20
batch_size = 2
images, labels = getdata()
In [5]:
image = misc.imread(images[0])
plt.imshow(image)
plt.show()
for i in xrange(labels[0].shape[1]):
print dic_val.keys()[i] + ': ' + str(labels[0][0, i])
In [6]:
sess = tf.InteractiveSession()
model = Model(learning_rate, beta, image_size, labels[0].shape[1])
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver(tf.global_variables())
In [7]:
for i in xrange(epoch):
last = time.time()
LOST, ACCURACY, LOSS, ACC = 0, 0, [], []
for x in xrange(0, (len(images) // batch_size) * batch_size, batch_size):
emb_data = np.zeros((batch_size, image_size, image_size, 3), dtype = np.float32)
labels_data = np.zeros((batch_size, labels[0].shape[1]), dtype = np.float32)
for k in xrange(batch_size):
image = misc.imread(images[x + k])
image = misc.imresize(image, (image_size, image_size)) / 255.0
emb_data[k, :, :, :] = image
labels_data[k, :] = labels[x + k]
_, loss = sess.run([model.optimizer, model.cost], feed_dict = {model.X : emb_data, model.Y : labels_data})
LOST += loss
logits = sess.run(tf.nn.sigmoid(model.logits), feed_dict = {model.X : emb_data})
for k in xrange(logits.shape[0]):
ACCURACY += (logits[k, :] - labels_data[k, :]).mean()
LOST /= (len(images) // batch_size); ACCURACY /= (len(images) // batch_size)
print 'epoch: ' + str(i + 1) + ', loss: ' + str(LOST) + ', accuracy: ' + str(ACCURACY) + ', s / epoch: ' + str(time.time() - last)
In [8]:
for i in xrange(len(images)):
emb_data = np.zeros((1, image_size, image_size, 3), dtype = np.float32)
image = misc.imread(images[i])
image = misc.imresize(image, (image_size, image_size))
emb_data[0, :, :, :] = image
logits = sess.run(tf.nn.sigmoid(model.logits), feed_dict = {model.X : emb_data})
image = misc.imread(images[i])
plt.imshow(image)
plt.show()
true = []
for k in xrange(labels[i].shape[1]):
true.append(dic_val.keys()[k] + ': ' + str(labels[0][0, k]))
predict = []
for k in xrange(labels[i].shape[1]):
predict.append(dic_val.keys()[k] + ': ' + str(logits[0][k]))
print '[ACTUAL]' + ' '.join(true)
print '[PREDICT]' + ' '.join(predict)
In [9]:
emb_data = np.zeros((1, image_size, image_size, 3), dtype = np.float32)
image = misc.imread('test.jpg')
image = misc.imresize(image, (image_size, image_size))
emb_data[0, :, :, :] = image
logits = sess.run(tf.nn.sigmoid(model.logits), feed_dict = {model.X : emb_data})
predict = []
for k in xrange(labels[i].shape[1]):
predict.append(dic_val.keys()[k] + ': ' + str(logits[0][k]))
print '[PREDICT]' + ' '.join(predict)
In [ ]: