Download cloth folding dataset from https://data.mendeley.com/datasets/c7y3hcrj7z/1.
Extract the zip. Keep note of where you extract it; we'll call that location DATASET_DIR
.
In [1]:
import tensorflow as tf
import numpy as np
from vgg16 import vgg16
import glob, os
from scipy.misc import imread, imresize
DATASET_DIR = os.path.join(os.path.expanduser('~'), 'res', 'cloth_folding_rgb_vids')
NUM_VIDS = 45
def get_img_pair(video_id):
img_files = sorted(glob.glob(os.path.join(DATASET_DIR, video_id, '*.png')))
start_img = img_files[0]
end_img = img_files[-1]
pair = []
for image_file in [start_img, end_img]:
img_original = imread(image_file)
img_resized = imresize(img_original, (224, 224))
pair.append(img_resized)
return tuple(pair)
start_imgs = []
end_imgs= []
for vid_id in range(1, NUM_VIDS + 1):
start_img, end_img = get_img_pair(str(vid_id))
start_imgs.append(start_img)
end_imgs.append(end_img)
print('Images of starting state {}'.format(np.shape(start_imgs)))
print('Images of ending state {}'.format(np.shape(end_imgs)))
In [2]:
imgs_plc = tf.placeholder(tf.float32, [None, 224, 224, 3])
In [3]:
n_features = 4096
n_hidden = 10 # n_features * 2
with tf.name_scope("input"):
x1 = tf.placeholder(tf.float32, [None, n_features], name="x1")
x2 = tf.placeholder(tf.float32, [None, n_features], name="x2")
dropout_keep_prob = tf.placeholder(tf.float32, name='dropout_prob')
with tf.name_scope("hidden_layer"):
with tf.name_scope("weights"):
w1 = tf.Variable(tf.random_normal([n_features, n_hidden]), name="w1")
tf.summary.histogram("w1", w1)
b1 = tf.Variable(tf.random_normal([n_hidden]), name="b1")
tf.summary.histogram("b1", b1)
with tf.name_scope("output"):
h1 = tf.nn.dropout(tf.nn.relu(tf.matmul(x1,w1) + b1), keep_prob=dropout_keep_prob)
tf.summary.histogram("h1", h1)
h2 = tf.nn.dropout(tf.nn.relu(tf.matmul(x2, w1) + b1), keep_prob=dropout_keep_prob)
tf.summary.histogram("h2", h2)
with tf.name_scope("output_layer"):
with tf.name_scope("weights"):
w2 = tf.Variable(tf.random_normal([n_hidden, 1]), name="w2")
tf.summary.histogram("w2", w2)
b2 = tf.Variable(tf.random_normal([1]), name="b2")
tf.summary.histogram("b2", b2)
with tf.name_scope("output"):
s1 = tf.matmul(h1, w2) + b2
s2 = tf.matmul(h2, w2) + b2
#根据输出节点计算概率值
with tf.name_scope("loss"):
s12 = s1 - s2
s12_flat = tf.reshape(s12, [-1])
pred = tf.sigmoid(s12)
lable_p = tf.sigmoid(-tf.ones_like(s12))
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=tf.zeros_like(s12_flat), logits=s12_flat + 1)
loss = tf.reduce_mean(cross_entropy)
tf.summary.scalar("loss", loss)
with tf.name_scope("train_op"):
train_op = tf.train.AdamOptimizer(0.001).minimize(loss)
In [4]:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
In [5]:
print('Loading model...')
vgg = vgg16(imgs_plc, 'vgg16_weights.npz', sess)
print('Done loading!')
In [6]:
start_imgs_embedded = sess.run(vgg.fc1, feed_dict={vgg.imgs: start_imgs})
end_imgs_embedded = sess.run(vgg.fc1, feed_dict={vgg.imgs: end_imgs})
idxs = np.random.choice(NUM_VIDS, NUM_VIDS, replace=False)
train_idxs = idxs[0:int(NUM_VIDS * 0.75)]
test_idxs = idxs[int(NUM_VIDS * 0.75):]
train_start_imgs = start_imgs_embedded[train_idxs]
train_end_imgs = end_imgs_embedded[train_idxs]
test_start_imgs = start_imgs_embedded[test_idxs]
test_end_imgs = end_imgs_embedded[test_idxs]
print('Train start imgs {}'.format(np.shape(train_start_imgs)))
print('Train end imgs {}'.format(np.shape(train_end_imgs)))
print('Test start imgs {}'.format(np.shape(test_start_imgs)))
print('Test end imgs {}'.format(np.shape(test_end_imgs)))
In [7]:
train_y1 = np.expand_dims(np.zeros(np.shape(train_start_imgs)[0]), axis=1)
train_y2 = np.expand_dims(np.ones(np.shape(train_end_imgs)[0]), axis=1)
for epoch in range(100):
for i in range(np.shape(train_start_imgs)[0]):
_, cost_val = sess.run([train_op, loss],
feed_dict={x1: train_start_imgs[i:i+1,:],
x2: train_end_imgs[i:i+1,:],
dropout_keep_prob: 0.5})
print('{}. {}'.format(epoch, cost_val))
s1_val, s2_val = sess.run([s1, s2], feed_dict={x1: test_start_imgs, x2: test_end_imgs, dropout_keep_prob: 1})
print('Accuracy: {}%'.format(100 * np.mean(s1_val < s2_val)))
In [8]:
def get_img_seq(video_id):
img_files = sorted(glob.glob(os.path.join(DATASET_DIR, video_id, '*.png')))
imgs = []
for image_file in img_files:
img_original = imread(image_file)
img_resized = imresize(img_original, (224, 224))
imgs.append(img_resized)
return imgs
imgs = get_img_seq('1')
print(np.shape(imgs))
In [9]:
imgs_embedded = sess.run(vgg.fc1, feed_dict={vgg.imgs: imgs})
scores = sess.run([s1], feed_dict={x1: imgs_embedded, dropout_keep_prob: 1})
In [10]:
%matplotlib inline
from matplotlib import pyplot as plt
plt.figure()
plt.title('Utility of cloth-folding over time')
plt.xlabel('time (video frame #)')
plt.ylabel('Utility')
plt.plot(scores[-1])
Out[10]: