In [ ]:
import codecs
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
Load data in Tensorflow.
In [3]:
root = "../"
training_data_folder = '%straining_data/web-radio/output/rec' % root
embDir = '%sembeddings' % root
what = 'artist'
uri_file = '%s/%s.emb.u' % (embDir, what)
vector_file = '%s/%s.emb.v' % (embDir, what)
# header_file = '%s/%s.emb.h' % (embDir, what)
training_file = '%s/%s.dat' % (training_data_folder, what)
vectors = np.array([line.strip().split(' ') for line in codecs.open(vector_file, 'r', 'utf-8')])
# heads = np.array([line.strip() for line in codecs.open(header_file, 'r', 'utf-8')])
uris = np.array([line.strip() for line in codecs.open(uri_file, 'r', 'utf-8')])
train_array = np.array([line.strip().split(' ') for line in codecs.open(training_file, 'r', 'utf-8')])
pd.DataFrame(train_array, columns=['seed', 'target', 'score']).head()
Out[3]:
Data pre-processing: I want to substitute the seed and target with their embeddings
In [4]:
def get_embs(x):
# uri to embedding
v = vectors[np.argwhere(uris == x)]
if v.size == 0:
result = -2. * np.ones(vectors[0].size)
else:
result = v[0][0]
return result.astype('float32')
In [5]:
col1 = np.array([get_embs(xi) for xi in train_array[:, 0]])
col2 = np.array([get_embs(xi) for xi in train_array[:, 1]])
col1 = np.concatenate((col1, [12., 45., 73.] * np.ones((train_array.shape[0], 3))), axis=1)
col2 = np.concatenate((col2, [12., 45., 73.] * np.ones((train_array.shape[0], 3))), axis=1)
col3 = np.array(train_array[:, 2]).astype('float32')
col3 = col3.reshape((col3.size, 1))
In [6]:
def next_batch(num, data, labels):
"""
Return a total of `num` random samples and labels.
"""
idx = np.arange(0, len(data))
np.random.shuffle(idx)
idx = idx[:num]
data_shuffle = data[idx]
labels_shuffle = labels[idx]
return data_shuffle, labels_shuffle
In [ ]:
In [7]:
training_vector = np.concatenate((col1, col2, col3), axis=1)
training_vector.shape
Out[7]:
Split test and train
In [8]:
train, test = train_test_split(training_vector, train_size=0.7)
train_vector = train[:, : -1]
train_label = train[:, -1]
train_label = train_label.reshape((len(train_label), 1))
test_vector = test[:, :-1]
test_label = test[:, -1]
test_label = test_label.reshape((len(test_label), 1))
In [9]:
print('Train')
print(train_vector.shape)
print(train_label.shape)
print('Test')
print(test_vector.shape)
print(test_label.shape)
In [10]:
# Parameters
learning_rate = 0.1
num_steps = 1000
batch_size = 64
display_step = 100
In [14]:
# Network Parameters
n_hidden_1 = 256 # 1st layer number of neurons
n_hidden_2 = 256 # 2nd layer number of neurons
num_input = train_vector[0].size
num_output = col1[0].size
num_output_wrap = train_label[0].size
# tf Graph input
X = tf.placeholder(tf.float32, [None, num_input], name="X")
Y = tf.placeholder(tf.float32, [None, num_output_wrap], name="Y")
Network
In [78]:
def weighted_l2(a, b, w):
with tf.name_scope('weighted_l2') as scope:
# https://stackoverflow.com/a/8861999/1218213
q = tf.subtract(a, b, name="q")
# return np.sqrt((w * q * q).sum())
pow_q = tf.cast(tf.pow(q, 2), tf.float32, name="q-power")
return tf.reduce_sum(tf.multiply(w, pow_q), name="o", keepdims=True)
In [91]:
def compute_penalty(expected, taken, total):
with tf.name_scope('penalty') as scope:
penalty = tf.divide(tf.subtract(expected, taken), total)
return tf.cast(penalty, tf.float32)
w = tf.Variable(tf.random_normal([1, num_output]), name='w')
def neural_net_wrap(x):
seed, target = tf.split(x, [num_output, num_output], axis=1)
bs = tf.equal(seed, -2.)
bt = tf.equal(target, -2.)
_ones = tf.ones_like(w, tf.float32)
max_distance = weighted_l2(_ones, _ones * -1., w)
bad_mask = tf.logical_or(bs, bt)
good_mask = tf.logical_not(bad_mask)
bs_count = tf.count_nonzero(tf.logical_not(bs), axis=1, keepdims=True)
good_count = tf.count_nonzero(good_mask, axis=1, keepdims=True)
_zeros = tf.zeros_like(seed, tf.float32)
_seed = tf.where(good_mask, seed, _zeros)
_target = tf.where(good_mask, target, _zeros)
# distance
d = weighted_l2(_seed, _target, w)
# how much info I am not finding
penalty = compute_penalty(bs_count, good_count, num_output)
multiplier = tf.subtract(1., penalty)
# score
s = tf.divide(tf.subtract(max_distance, d), max_distance)
return tf.multiply(s, multiplier)
In [92]:
# Construct model
logits = neural_net_wrap(X)
In [93]:
Y.shape
Out[93]:
In [94]:
logits.shape
Out[94]:
In [95]:
# Define loss and optimizer
# loss_op = MSE
loss_op = tf.reduce_mean(tf.square(tf.subtract(logits, Y)))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)
# Evaluate model (with test logits, for dropout to be disabled)
correct_pred = tf.less(tf.subtract(logits, Y), 0.1)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
In [ ]:
In [96]:
# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()
In [99]:
with tf.Session() as sess:
writer = tf.summary.FileWriter("output", sess.graph)
# Run the initializer
sess.run(init)
print("Start learning")
for step in range(1, num_steps + 1):
batch_x, batch_y = next_batch(batch_size, train_vector, train_label)
# Run optimization op (backprop)
sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
if step % display_step == 0 or step == 1:
# Calculate batch loss and accuracy
preds, my_weights, loss, acc = sess.run([logits, w, loss_op, accuracy],
feed_dict={X: batch_x, Y: batch_y})
print("Step " + str(step) + ", Minibatch Loss= " + \
"{:.4f}".format(loss) + ", Training Accuracy= " + \
"{:.3f}".format(acc))
# print("Predictions %s VS %s" % (preds[0], batch_y[0]))
np.set_printoptions(precision=2)
print("My weights %s" % np.mean(my_weights, axis=0))
print("Optimization Finished!")
print("Testing Accuracy:",
sess.run(accuracy, feed_dict={X: test_vector, Y: test_label}))
writer.close()
In [ ]:
In [ ]:
In [ ]:
In [ ]: