In [ ]:
import codecs
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split


/Users/pasquale/anaconda3/lib/python3.5/importlib/_bootstrap.py:222: RuntimeWarning: compiletime version 3.6 of module 'tensorflow.python.framework.fast_tensor_util' does not match runtime version 3.5
  return f(*args, **kwds)
/Users/pasquale/anaconda3/lib/python3.5/site-packages/h5py/__init__.py:34: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters

Load data in Tensorflow.


In [3]:
root = "../"
training_data_folder = '%straining_data/web-radio/output/rec' % root
embDir = '%sembeddings' % root
what = 'artist'

uri_file = '%s/%s.emb.u' % (embDir, what)
vector_file = '%s/%s.emb.v' % (embDir, what)
# header_file = '%s/%s.emb.h' % (embDir, what)
training_file = '%s/%s.dat' % (training_data_folder, what)

vectors = np.array([line.strip().split(' ') for line in codecs.open(vector_file, 'r', 'utf-8')])
# heads = np.array([line.strip() for line in codecs.open(header_file, 'r', 'utf-8')])
uris = np.array([line.strip() for line in codecs.open(uri_file, 'r', 'utf-8')])

train_array = np.array([line.strip().split(' ') for line in codecs.open(training_file, 'r', 'utf-8')])
pd.DataFrame(train_array, columns=['seed', 'target', 'score']).head()


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-3-01484f3e2d59> in <module>()
      9 training_file = '%s/%s.dat' % (training_data_folder, what)
     10 
---> 11 vectors = np.array([line.strip().split(' ') for line in codecs.open(vector_file, 'r', 'utf-8')])
     12 # heads = np.array([line.strip() for line in codecs.open(header_file, 'r', 'utf-8')])
     13 uris = np.array([line.strip() for line in codecs.open(uri_file, 'r', 'utf-8')])

NameError: name 'np' is not defined

Data pre-processing: I want to substitute the seed and target with their embeddings


In [48]:
col1 = np.array([get_embs(xi) for xi in train_array[:, 0]])
col2 = np.array([get_embs(xi) for xi in train_array[:, 1]])
col1 = np.concatenate((col1, [12., 45., 73.] * np.ones((train_array.shape[0], 3))), axis=1)
col2 = np.concatenate((col2, [12., 45., 73.] * np.ones((train_array.shape[0], 3))), axis=1)
col3 = np.array(train_array[:, 2]).astype('float32')
col3 = col3.reshape((col3.size, 1))

In [1]:
def get_embs(x):
    # uri to embedding
    v = vectors[np.argwhere(uris == x)]
    if v.size == 0:
        result = -2. * np.ones(vectors[0].size)
    else:
        result = v[0][0]
    return result.astype('float32')

In [2]:
training_vector = np.concatenate((col1, col2, col3), axis=1)
training_vector.shape


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-2-4658b3795d6a> in <module>()
----> 1 training_vector = np.concatenate((col1, col2, col3), axis=1)
      2 training_vector.shape

NameError: name 'np' is not defined

Split test and train


In [50]:
train, test = train_test_split(training_vector, train_size=0.7)

train_vector = train[:, :-1]
train_label = train[:, -1]
train_label = train_label.reshape((len(train_label), 1))

test_vector = test[:, :-1]
test_label = test[:, -1]
test_label = test_label.reshape((len(test_label), 1))


/Users/pasquale/anaconda3/lib/python3.5/site-packages/sklearn/model_selection/_split.py:2026: FutureWarning: From version 0.21, test_size will always complement train_size unless both are specified.
  FutureWarning)

In [51]:
print('Train')
print(train_vector.shape)
print(train_label.shape)
print('Test')
print(test_vector.shape)
print(test_label.shape)


Train
(8633, 34)
(8633, 1)
Test
(3700, 34)
(3700, 1)

In [52]:
# Parameters
learning_rate = 0.1
num_steps = 1000
batch_size = 64
display_step = 100

In [53]:
# Network Parameters
n_hidden_1 = 256  # 1st layer number of neurons
n_hidden_2 = 256  # 2nd layer number of neurons
num_input = train_vector[0].size
num_output = int(num_input / 2)
num_output_wrap = train_label[0].size

# tf Graph input
X = tf.placeholder(tf.float32, [None, num_input], name="X")
Y = tf.placeholder(tf.float32, [None, num_output_wrap], name="Y")

Neural network


In [54]:
# Create model
def neural_net(x):
    with tf.name_scope('hidden_1') as scope:
        # Hidden fully connected layer with 256 neurons
        w1 = tf.Variable(tf.random_normal([num_input, n_hidden_1]), name='w')
        b1 = tf.Variable(tf.random_normal([n_hidden_1]), name='b')
        layer_1 = tf.add(tf.matmul(x, w1), b1, name='o')
    with tf.name_scope('hidden_2') as scope:
        # Hidden fully connected layer with 256 neurons
        w2 = tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2]), name='w')
        b2 = tf.Variable(tf.random_normal([n_hidden_2]), name='b')
        layer_2 = tf.add(tf.matmul(layer_1, w2), b2, name='o')
    with tf.name_scope('out_layer') as scope:
        # Output fully connected layer with a neuron for each class
        wo = tf.Variable(tf.random_normal([n_hidden_2, num_output]), name='w')
        bo = tf.Variable(tf.random_normal([num_output]), name='b')
        out_layer = tf.add(tf.matmul(layer_2, wo), bo, name="o")
        with tf.name_scope('u_norm') as scope:
            row_sum = tf.reduce_sum(out_layer, axis=1, keepdims=True)
            return tf.divide(out_layer, row_sum)

In [55]:
def weighted_l2(a, b, w):
    with tf.name_scope('weighted_l2') as scope:
        # https://stackoverflow.com/a/8861999/1218213
        q = tf.subtract(a, b, name="q")
        # return np.sqrt((w * q * q).sum())
        pow_q = tf.cast(tf.pow(q, 2), tf.float32, name="q-power")

        return tf.reduce_sum(tf.multiply(w, pow_q), axis=1, name="o", keepdims=True)

In [56]:
def compute_penalty(expected, taken, total):
    with tf.name_scope('penalty') as scope:
        penalty = tf.divide(tf.subtract(expected, taken), total)
        return tf.cast(penalty, tf.float32)


def neural_net_wrap(x, previous_out):
    with tf.name_scope('nn_wrapper') as scope:
        lt = previous_out.shape.as_list()[0]  # vertical size of the tensor
        lh = previous_out[0].shape.as_list()[0]  # horizontal size of the tensor
        seed, target = tf.split(x, [lh, lh], axis=1)
        bs = tf.equal(seed, -2.)
        bt = tf.equal(target, -2.)

        _ones = tf.ones_like(previous_out, tf.float32)
        max_distance = weighted_l2(_ones, _ones * -1., previous_out)

        bad_mask = tf.logical_or(bs, bt)
        good_mask = tf.logical_not(bad_mask)

        bs_count = tf.count_nonzero(tf.logical_not(bs), axis=1, keepdims=True)
        good_count = tf.count_nonzero(good_mask, axis=1, keepdims=True)

        _zeros = tf.zeros_like(previous_out, tf.float32)
        _seed = tf.where(good_mask, seed, _zeros)
        _target = tf.where(good_mask, target, _zeros)

        # distance
        d = weighted_l2(_seed, _target, previous_out)

        # how much info I am not finding
        penalty = compute_penalty(bs_count, good_count, lh)
        multiplier = tf.subtract(1., penalty)
        # score
        s = tf.divide(tf.subtract(max_distance, d), max_distance)
        return tf.multiply(s, multiplier)

In [57]:
# Construct model
intermediate = neural_net(X)
logits = neural_net_wrap(X, intermediate)

In [58]:
logits.shape


Out[58]:
TensorShape([Dimension(None), Dimension(1)])

In [ ]:


In [2]:
# Define loss and optimizer
# loss_op = MSE
loss_op = tf.reduce_mean(tf.square(tf.subtract(logits, Y)))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

# Evaluate model (with test logits, for dropout to be disabled)
correct_pred = tf.less(tf.subtract(logits, Y), 0.1)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-2-ff952803e31b> in <module>()
      1 # Define loss and optimizer
      2 # loss_op = MSE
----> 3 loss_op = tf.reduce_mean(tf.square(tf.subtract(logits, Y)))
      4 optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
      5 train_op = optimizer.minimize(loss_op)

NameError: name 'tf' is not defined

In [ ]:


In [60]:
# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

In [61]:
def next_batch(num, data, labels):
    """
    Return a total of `num` random samples and labels. 
    """
    idx = np.arange(0, len(data))
    np.random.shuffle(idx)
    idx = idx[:num]
    data_shuffle = data[idx]
    labels_shuffle = labels[idx]
    return data_shuffle, labels_shuffle

In [62]:
with tf.Session() as sess:
    writer = tf.summary.FileWriter("output", sess.graph)

    # Run the initializer
    sess.run(init)

    print("Start learning")
    for step in range(1, num_steps + 1):
        batch_x, batch_y = next_batch(batch_size, train_vector, train_label)

        # Run optimization op (backprop)
        sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
        if step % display_step == 0 or step == 1:
            # Calculate batch loss and accuracy
            preds, my_weights, loss, acc = sess.run([logits, intermediate, loss_op, accuracy],
                                                    feed_dict={X: batch_x, Y: batch_y})
            
            print("Step " + str(step) + ", Minibatch Loss= " + \
                  "{:.4f}".format(loss) + ", Training Accuracy= " + \
                  "{:.3f}".format(acc))
            # print("Predictions %s VS %s" % (preds[0], batch_y[0]))
            np.set_printoptions(precision=2)
            print("My weights %s" % np.mean(my_weights, axis=0))

    print("Optimization Finished!")

    print("Testing Accuracy:",
          sess.run(accuracy, feed_dict={X: test_vector, Y: test_label}))
    writer.close()


Start learning
Step 1, Minibatch Loss= 0.0781, Training Accuracy= 0.922
My weights [ 0.52 -0.33  0.48  0.68 -0.48 -0.22  0.57  0.36 -0.09 -0.02 -0.23 -0.25
 -0.22  0.38 -0.1   0.23 -0.28]
Step 100, Minibatch Loss= 0.0313, Training Accuracy= 0.969
My weights [ 0.52 -0.32  0.48  0.68 -0.48 -0.21  0.57  0.36 -0.09 -0.02 -0.23 -0.25
 -0.22  0.38 -0.1   0.23 -0.28]
Step 200, Minibatch Loss= 0.0312, Training Accuracy= 0.969
My weights [ 0.52 -0.33  0.48  0.68 -0.48 -0.21  0.57  0.36 -0.09 -0.02 -0.23 -0.25
 -0.22  0.38 -0.1   0.23 -0.28]
Step 300, Minibatch Loss= 0.0156, Training Accuracy= 0.984
My weights [ 0.52 -0.32  0.48  0.68 -0.48 -0.21  0.57  0.36 -0.09 -0.02 -0.23 -0.25
 -0.22  0.38 -0.1   0.23 -0.28]
Step 400, Minibatch Loss= 0.0312, Training Accuracy= 0.969
My weights [ 0.52 -0.32  0.48  0.68 -0.48 -0.21  0.56  0.36 -0.09 -0.02 -0.23 -0.25
 -0.22  0.38 -0.11  0.23 -0.28]
Step 500, Minibatch Loss= 0.0156, Training Accuracy= 0.984
My weights [ 0.52 -0.32  0.48  0.68 -0.48 -0.22  0.56  0.36 -0.09 -0.02 -0.23 -0.25
 -0.22  0.38 -0.1   0.23 -0.29]
Step 600, Minibatch Loss= 0.0156, Training Accuracy= 0.984
My weights [ 0.53 -0.33  0.48  0.68 -0.48 -0.22  0.57  0.36 -0.09 -0.02 -0.23 -0.25
 -0.22  0.38 -0.1   0.23 -0.29]
Step 700, Minibatch Loss= 0.0312, Training Accuracy= 0.969
My weights [ 0.52 -0.32  0.48  0.68 -0.48 -0.21  0.57  0.36 -0.09 -0.02 -0.23 -0.25
 -0.22  0.38 -0.1   0.23 -0.29]
Step 800, Minibatch Loss= 0.0312, Training Accuracy= 0.969
My weights [ 0.53 -0.33  0.48  0.68 -0.48 -0.21  0.57  0.36 -0.09 -0.02 -0.23 -0.25
 -0.22  0.38 -0.1   0.23 -0.28]
Step 900, Minibatch Loss= 0.0156, Training Accuracy= 0.984
My weights [ 0.52 -0.32  0.48  0.68 -0.48 -0.22  0.57  0.36 -0.09 -0.02 -0.23 -0.25
 -0.22  0.38 -0.1   0.23 -0.29]
Step 1000, Minibatch Loss= 0.0625, Training Accuracy= 0.938
My weights [ 0.52 -0.32  0.48  0.68 -0.48 -0.21  0.57  0.37 -0.09 -0.02 -0.23 -0.25
 -0.22  0.38 -0.1   0.24 -0.28]
Optimization Finished!
Testing Accuracy: 0.96054053

In [ ]:


In [ ]:


In [ ]:


In [ ]: