Import
In [1]:
import tensorflow as tf
# from PIL import Image
import numpy as np
from scipy.misc import imread, imresize
from imagenet_classes import class_names
import os
file_path_info
In [2]:
#File Path
filename_queue_description = tf.train.string_input_producer(['./data/description/v2/coded_data.csv'])
filepath_ckpt = "./ckpt/model_weight_v2_2.ckpt" #weight saver check point file path
filepath_pred = "./output/predicted_v2.csv" #predicted value file path
num_record = 50
Hyper Params - IMG
In [3]:
bilinear_size = 28
resized_size = bilinear_size*bilinear_size*3
img_label_size = 5
Hyper Params - LSTM
In [4]:
label_vec_size = 32
input_vec_size = 32
batch_size = num_record
state_size_1 = 20
state_size_2 = 100
hidden = 18
learning_rate = 0.001
vgg16_class
In [5]:
class vgg16:
def __init__(self, imgs, weights=None, sess=None):
self.imgs = imgs
self.convlayers()
self.fc_layers()
self.probs = tf.nn.softmax(self.fc3l)
if weights is not None and sess is not None:
self.load_weights(weights, sess)
def convlayers(self):
self.parameters = []
# zero-mean input
with tf.name_scope('preprocess') as scope:
mean = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean')
images = self.imgs-mean
# conv1_1
with tf.name_scope('conv1_1') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 3, 64], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv1_1 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# conv1_2
with tf.name_scope('conv1_2') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 64, 64], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.conv1_1, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv1_2 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# pool1
self.pool1 = tf.nn.max_pool(self.conv1_2,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool1')
# conv2_1
with tf.name_scope('conv2_1') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 64, 128], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.pool1, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[128], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv2_1 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# conv2_2
with tf.name_scope('conv2_2') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 128, 128], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.conv2_1, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[128], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv2_2 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# pool2
self.pool2 = tf.nn.max_pool(self.conv2_2,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool2')
# conv3_1
with tf.name_scope('conv3_1') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 128, 256], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.pool2, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv3_1 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# conv3_2
with tf.name_scope('conv3_2') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 256], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.conv3_1, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv3_2 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# conv3_3
with tf.name_scope('conv3_3') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 256], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.conv3_2, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv3_3 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# pool3
self.pool3 = tf.nn.max_pool(self.conv3_3,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool3')
# conv4_1
with tf.name_scope('conv4_1') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 512], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.pool3, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv4_1 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# conv4_2
with tf.name_scope('conv4_2') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.conv4_1, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv4_2 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# conv4_3
with tf.name_scope('conv4_3') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.conv4_2, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv4_3 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# pool4
self.pool4 = tf.nn.max_pool(self.conv4_3,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool4')
# conv5_1
with tf.name_scope('conv5_1') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.pool4, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv5_1 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# conv5_2
with tf.name_scope('conv5_2') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.conv5_1, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv5_2 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# conv5_3
with tf.name_scope('conv5_3') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.conv5_2, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv5_3 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# pool5
self.pool5 = tf.nn.max_pool(self.conv5_3,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool4')
def fc_layers(self):
# fc1
with tf.name_scope('fc1') as scope:
shape = int(np.prod(self.pool5.get_shape()[1:]))
fc1w = tf.Variable(tf.truncated_normal([shape, 4096],
dtype=tf.float32,
stddev=1e-1), name='weights')
fc1b = tf.Variable(tf.constant(1.0, shape=[4096], dtype=tf.float32),
trainable=True, name='biases')
pool5_flat = tf.reshape(self.pool5, [-1, shape])
fc1l = tf.nn.bias_add(tf.matmul(pool5_flat, fc1w), fc1b)
self.fc1 = tf.nn.relu(fc1l)
self.parameters += [fc1w, fc1b]
# fc2
with tf.name_scope('fc2') as scope:
fc2w = tf.Variable(tf.truncated_normal([4096, 4096],
dtype=tf.float32,
stddev=1e-1), name='weights')
fc2b = tf.Variable(tf.constant(1.0, shape=[4096], dtype=tf.float32),
trainable=True, name='biases')
fc2l = tf.nn.bias_add(tf.matmul(self.fc1, fc2w), fc2b)
self.fc2 = tf.nn.relu(fc2l)
self.parameters += [fc2w, fc2b]
# fc3
with tf.name_scope('fc3') as scope:
fc3w = tf.Variable(tf.truncated_normal([4096, 1000],
dtype=tf.float32,
stddev=1e-1), name='weights')
fc3b = tf.Variable(tf.constant(1.0, shape=[1000], dtype=tf.float32),
trainable=True, name='biases')
self.fc3l = tf.nn.bias_add(tf.matmul(self.fc2, fc3w), fc3b)
self.parameters += [fc3w, fc3b]
def load_weights(self, weight_file, sess):
weights = np.load(weight_file)
keys = sorted(weights.keys())
for i, k in enumerate(keys):
print(i, k, np.shape(weights[k]))
sess.run(self.parameters[i].assign(weights[k]))
Train
load_img_vgg16
In [ ]:
with tf.Session() as sess_vgg:
imgs = tf.placeholder(tf.float32, [None, 200, 200, 3])
vgg = vgg16(imgs, 'vgg16_weights.npz', sess_vgg)
img_files = ['./data/img/cropped/' + i for i in os.listdir('./data/img/cropped')]
imgs = [imread(file, mode='RGB') for file in img_files]
#bilinear feature
imgs_bi = [sess_vgg.run(vgg.fc1, feed_dict={vgg.imgs: [img]})[0] for img in imgs]
# imgs_bi = [imresize(arr=img, interp='bilinear', size=bilinear_size) for img in imgs]
imgs_bi = np.reshape(a=imgs_bi, newshape=[50,-1])
#label
prob = [sess_vgg.run(vgg.probs, feed_dict={vgg.imgs: [img]})[0] for img in imgs]
preds = [(np.argsort(p)[::-1])[0:1] for p in prob]
preds = [p[0] for p in preds]
for i in range(len(preds)):
if(preds[i]==430):
preds[i]=0
elif(preds[i]==429):
preds[i]=1
elif(preds[i]==805):
preds[i]=2
elif(preds[i]==768):
preds[i]=3
elif(preds[i]==574):
preds[i]=4
img_label_onehot = tf.one_hot(indices=preds, depth = 5)
print(preds)
# print(sess_vgg.run(img_label_onehot))
#clear
imgs = None
vgg = None
img_files = None
prob = None
sess_vgg.close()
In [ ]:
print(np.shape(imgs_bi))
Text_Reader_Setting
In [ ]:
reader = tf.TextLineReader()
key,value = reader.read(filename_queue_description)
record_defaults =[[-2], [-2], [-2], [-2], [-2], [-2], [-2], [-2], [-2], [-2], [-2], [-2], [-2], [-2], [-2], [-2], [-2], [-2], [-2]]
w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15, w16, w17, w18, w19 = tf.decode_csv(value, record_defaults)
feature_label = tf.stack([w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15, w16, w17, w18, w19])
feature_word = tf.stack([w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15, w16, w17, w18])
In [ ]:
with tf.Session() as sess_data:
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
# img_queue = []
for i in range(num_record):
# image = sess.run(images)
raw_label, raw_input = sess_data.run([feature_label, feature_word])
onehot_input = tf.one_hot(indices=raw_input, depth=32)
onehot_label = tf.one_hot(indices=raw_label, depth=32)
if i == 0:
full_input = onehot_input
full_label = onehot_label
else:
full_input = tf.concat([full_input, onehot_input], 0)
full_label = tf.concat([full_label, onehot_label], 0)
raw_label = None
raw_input = None
onehot_input = None
onehot_label = None
coord.request_stop()
coord.join(threads)
sess_data.close()
In [ ]:
with tf.name_scope('batch') as scope:
# full_label = tf.reshape(full_label, [batch_size, hidden, label_vec_size])
full_input = tf.reshape(full_input, [batch_size, hidden, input_vec_size])
full_label = tf.reshape(full_label, [batch_size, hidden, input_vec_size])
# input_batch, label_batch = tf.train.batch([full_input, full_label], batch_size=50)
Model
LSTM First Layer
In [ ]:
with tf.name_scope('lstm_layer_1') as scope:
with tf.variable_scope('lstm_layer_1'):
rnn_cell_1 = tf.contrib.rnn.BasicLSTMCell(state_size_1, reuse=None)
output_1, _ = tf.contrib.rnn.static_rnn(rnn_cell_1, tf.unstack(full_input, axis=1), dtype=tf.float32)
input_2 = [tf.concat([out, imgs_bi, img_label_onehot], axis=1) for out in output_1]
# output_w_1 = tf.Variable(tf.truncated_normal([hidden, state_size_1, input_vec_size]))
# output_b_1 = tf.Variable(tf.zeros([input_vec_size]))
# pred_temp = tf.matmul(output_1, output_w_1) + output_b_1
LSTM Second Layer
In [ ]:
with tf.name_scope('lstm_layer_2') as scope:
with tf.variable_scope('lstm_layer_2'):
rnn_cell_2 = tf.contrib.rnn.BasicLSTMCell(state_size_2, reuse=None)
output_2, _ = tf.contrib.rnn.static_rnn(rnn_cell_2, tf.unstack(input_2, axis=0), dtype=tf.float32)
output_w_2 = tf.Variable(tf.truncated_normal([hidden, state_size_2, input_vec_size]))
output_b_2 = tf.Variable(tf.zeros([input_vec_size]))
pred = tf.nn.softmax(tf.matmul(output_2, output_w_2) + output_b_2)
Loss
In [ ]:
with tf.name_scope('loss') as scope:
loss = tf.constant(0, tf.float32)
for i in range(hidden):
loss += tf.losses.softmax_cross_entropy(tf.unstack(full_label, axis=1)[i], tf.unstack(pred, axis=0)[i])
train = tf.train.AdamOptimizer(learning_rate).minimize(loss)
Run Train
In [ ]:
sess_train = tf.Session()
sess_train.run(tf.global_variables_initializer())
saver = tf.train.Saver()
save_path = saver.save(sess_train, filepath_ckpt)
In [ ]:
for i in range(10001):
sess_train.run(train)
if i % 1000 == 0:
print("loss : ", sess_train.run(loss))
# print("pred : ", sess.run(pred))
save_path = saver.save(sess_train, filepath_ckpt)
print("= Weigths are saved in " + filepath_ckpt)
In [ ]:
sess_train.close()
Test
In [6]:
with tf.Session() as sess_vgg_test:
imgs = tf.placeholder(tf.float32, [None, 200, 200, 3])
vgg = vgg16(imgs, 'vgg16_weights.npz', sess_vgg_test)
test_img_files = ['./data/img/test/cropped/test001.png', './data/img/cropped/005.png', './data/img/cropped/014.png', './data/img/cropped/021.png', './data/img/cropped/036.png', './data/img/cropped/050.png']
num_imgs = len(test_img_files)
test_imgs = [imread(file, mode='RGB') for file in test_img_files]
#bilinear feature
test_imgs_bi = [sess_vgg_test.run(vgg.fc1, feed_dict={vgg.imgs: [img]})[0] for img in test_imgs]
# test_imgs_bi = [imresize(arr=img, interp='bilinear', size=bilinear_size) for img in test_imgs]
test_imgs_bi = np.reshape(a=test_imgs_bi, newshape=[num_imgs,-1])
#label
prob = [sess_vgg_test.run(vgg.probs, feed_dict={vgg.imgs: [img]})[0] for img in test_imgs]
test_preds = [(np.argsort(p)[::-1])[0:1] for p in prob]
test_preds = [p[0] for p in test_preds]
for i in range(len(test_preds)):
if(test_preds[i]==430):
test_preds[i]=0
elif(test_preds[i]==429):
test_preds[i]=1
elif(test_preds[i]==805):
test_preds[i]=2
elif(test_preds[i]==768):
test_preds[i]=3
elif(test_preds[i]==574):
test_preds[i]=4
test_img_label_onehot = tf.one_hot(indices=test_preds, depth = 5)
print(sess_vgg_test.run(test_img_label_onehot))
#clear
test_imgs = None
vgg = None
test_img_files = None
prob = None
sess_vgg_test.close()
In [7]:
start_input = tf.zeros([num_imgs,hidden,input_vec_size])
with tf.Session() as sess_init_generator:
input_init = sess_init_generator.run(start_input)
for i in range(num_imgs):
input_init[i][0][0] = 1
Test-First_Layer-LSTM
In [8]:
with tf.name_scope('lstm_layer_1') as scope:
with tf.variable_scope('lstm_layer_1'):
rnn_cell_1 = tf.contrib.rnn.BasicLSTMCell(state_size_1, reuse=None)
output_test_1, _ = tf.contrib.rnn.static_rnn(rnn_cell_1, tf.unstack(input_init, axis=1), dtype=tf.float32)
input_2 = [tf.concat([out, test_imgs_bi, test_img_label_onehot], axis=1) for out in output_test_1]
# output_t_1 = tf.contrib.rnn.static_rnn(rnn_cell, tf.unstack(full_input, axis=1), dtype=tf.float32)
# pred = tf.nn.softmax(tf.matmul(output1, output_w[0]) + output_b[0])
In [9]:
with tf.name_scope('lstm_layer_2') as scope:
with tf.variable_scope('lstm_layer_2'):
rnn_cell_2 = tf.contrib.rnn.BasicLSTMCell(state_size_2, reuse=None)
output_2, _ = tf.contrib.rnn.static_rnn(rnn_cell_2, tf.unstack(input_2, axis=0), dtype=tf.float32)
output_w_2 = tf.Variable(tf.truncated_normal([hidden, state_size_2, input_vec_size]))
output_b_2 = tf.Variable(tf.zeros([input_vec_size]))
pred = tf.nn.softmax(tf.matmul(output_2, output_w_2) + output_b_2)
In [10]:
sess_model = tf.Session()
saver = tf.train.Saver(allow_empty=True)
saver.restore(sess_model, filepath_ckpt)
In [11]:
for i in range(hidden):
result = sess_model.run(pred)
result_temp = result[i]
if i == hidden -1:
pass
else:
input_init[:,i+1] = result_temp
In [12]:
print(np.shape(result))
Result Check
In [13]:
import csv
classes = []
f = open('./data/description/v2/class.csv', 'r')
csvReader = csv.reader(f)
for row in csvReader:
classes.append(row)
# print(row)
f.close()
In [14]:
decoded_result = np.argmax(a=result, axis=2)
print(np.shape(decoded_result))
In [18]:
for i in range(num_imgs):
str = " "
for r in decoded_result:
str += classes[r[i]][0] + " "
print(str)
Code Storage