In [1]:
import tensorflow as tf
from PIL import Image
import numpy as np

File Info


In [2]:
filenames = ['./data/img/cropped/001.png', './data/img/cropped/002.png', './data/img/cropped/003.png']
filename_queue_img = tf.train.string_input_producer(filenames)
img_record = 3
filename_queue_description = tf.train.string_input_producer(['./data/description/raw_data.csv'])
num_record = 50

Img Reader


In [3]:
reader = tf.WholeFileReader()
key, value = reader.read(filename_queue_img)
images = tf.image.decode_png(value, channels=3)

In [4]:
with tf.Session() as sess:
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    for i in range(img_record):
        image = sess.run(images)
        print(sess.run(tf.shape(image)))
#         Image.fromarray(np.asarray(image)).show()

    coord.request_stop()
    coord.join(threads)
    sess.close()


[200 200   3]
[200 200   3]
[200 200   3]

Text Reader


In [5]:
reader = tf.TextLineReader()
key,value = reader.read(filename_queue_description)
record_defaults =[[-1], [-1], [-1], [-1], [-1], [-2], [-2], [-2], [-2], [-2], [-2], [-2], [-2], [-2], [-2], [-2], [-2], [-2], [-2], [-2]]
lab1, lab2, lab3, lab4, lab5, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15 = tf.decode_csv(value, record_defaults)  

feautre_label = tf.stack([lab1, lab2, lab3, lab4, lab5])
feature_word = tf.stack([w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15])

In [ ]:


In [6]:
with tf.Session() as sess:
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    
    
    for i in range(num_record):
        label, raw_word = sess.run([feautre_label, feature_word])
        onehot = tf.one_hot(indices=raw_word, depth=27)
        if i == 0:
            full_input = onehot
            full_label = label
        else:
            full_input = tf.concat([full_input, onehot], 0)
            full_label = tf.concat([full_label, label], 0)
        
#   print(sess.run(onehot))
#     print(sess.run(label_batch))
    coord.request_stop()
    coord.join(threads)
    sess.close()

Text Batch


In [7]:
label_vec_size = 5
input_vec_size = 27
batch_size = 50
state_size = 5
hidden = 15
learning_rate = 0.01

In [8]:
with tf.name_scope('batch') as scope:
    # full_label = tf.reshape(full_label, [batch_size, hidden, label_vec_size])
    full_input = tf.reshape(full_input, [batch_size, hidden, input_vec_size])
    input_batch, label_batch = tf.train.batch([full_input, full_input], batch_size=1)

In [9]:
with tf.Session() as sess:
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)

#     print(sess.run((full_input[0])))
#     print(sess.run(tf.unstack(full_input, axis=1)))
    
    coord.request_stop()
    coord.join(threads)
    sess.close()

LSTM First Layer


In [10]:
with tf.name_scope('lstm_layer_1') as scope:
    rnn_cell = tf.contrib.rnn.BasicLSTMCell(state_size, reuse=None)
    output, _ = tf.contrib.rnn.static_rnn(rnn_cell, tf.unstack(full_input, axis=1), dtype=tf.float32)
    output_w = tf.Variable(tf.truncated_normal([hidden, state_size, input_vec_size]))
    output_b = tf.Variable(tf.zeros([input_vec_size]))
    pred = tf.nn.softmax(tf.matmul(output, output_w) + output_b)

In [11]:
with tf.Session() as sess:
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    sess.run(tf.global_variables_initializer())
#     print(sess.run(pred))
    
    coord.request_stop()
    coord.join(threads)
    sess.close()

In [12]:
with tf.name_scope('loss') as scope:
    loss = tf.constant(0, tf.float32)
    for i in range(hidden):
    #     loss += tf.losses.mean_squared_error(tf.unstack(cls.b_label, axis=1)[i], tf.unstack(cls.pred, axis=0)[i])
        loss += tf.losses.softmax_cross_entropy(tf.unstack(full_input, axis=1)[i], tf.unstack(pred, axis=0)[i])
    train = tf.train.AdamOptimizer(learning_rate).minimize(loss)

In [ ]:
merged = tf.summary.merge_all()

In [ ]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(10001):
        sess.run(train)
        if i % 1000 == 0:
            train_writer = tf.summary.FileWriter('./summary/train', sess.graph)
            print("loss : ", sess.run(loss))
#             print("pred : ", sess.run(pred))
    sess.close()


loss :  49.4499
loss :  35.7349
loss :  35.7229
loss :  35.6037
loss :  35.6028
loss :  35.5636
loss :  35.5632
loss :  35.5631
loss :  35.5631
loss :  35.563

Result


In [ ]:


In [ ]:
with tf.Session() as sess:
    sess.close()

In [ ]:
output1 = tf.contrib.rnn.static_rnn(rnn_cell, tf.unstack(full_input, axis=1), dtype=tf.float32)
pred = tf.nn.softmax(tf.matmul(output1, output_w[0]) + output_b[0])

In [ ]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import inspect
import time

import numpy as np
import tensorflow as tf

import reader

In [ ]:


In [ ]:


In [ ]:
def lstm_cell():
    tf.contrib.rnn.BasicLSTMCell(size, forget_bias=0.0, state_is_tuple=True)
      inputs = tf.nn.embedding_lookup(embedding, input_.input_data)

    if is_training and config.keep_prob < 1:
    output = tf.reshape(tf.stack(axis=1, values=outputs), [-1, size])
    softmax_w = tf.get_variable("softmax_w", [size, vocab_size], dtype=data_type())
    softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type())
    logits = tf.matmul(output, softmax_w) + softmax_b
    loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
        [logits],
        [tf.reshape(input_.targets, [-1])],
        [tf.ones([batch_size * num_steps], dtype=data_type())])
    self._cost = cost = tf.reduce_sum(loss) / batch_size
    self._final_state = state