``````

In [1]:

import numpy as np

``````
``````

In [2]:

train_X, train_y, test_X, test_y = [], [], [], []

``````
``````

In [3]:

def unique(seq):
result = []
for x in seq:
if x not in result:
result.append(x)
return result

``````
``````

In [4]:

for line in open('coursera_sessions_train.txt'):
x, y = line.split(';')
train_X.append([int(value) for value in x.split(',')])

y = y.strip()
if y:
y = [int(value) for value in y.split(',')]
else:
y = []
train_y.append(y)

``````
``````

In [5]:

for line in open('coursera_sessions_test.txt'):
x, y = line.split(';')
test_X.append([int(value) for value in x.split(',')])

y = y.strip()
if y:
y = [int(value) for value in y.split(',')]
else:
y = []
test_y.append(y)

``````
``````

In [6]:

from collections import Counter
watched_counter = Counter(value for session_values in train_X for value in session_values)
bought_counter = Counter(value for session_values in train_y for value in session_values)

``````
``````

In [7]:

def precision(y_pred, y_true, k):
seq = y_pred[:k]
return sum(1 for value in seq if value in y_true) / k

``````
``````

In [8]:

def recall(y_pred, y_true, k):
predictions = y_pred[:k]
return sum(1 for value in y_true if value in predictions) / len(y_true)

``````
``````

In [9]:

X_train_not_empty = [train_X[idx] for idx in range(len(train_X)) if train_y[idx]]
y_train_not_empty = [train_y[idx] for idx in range(len(train_y)) if train_y[idx]]

``````
``````

In [10]:

X_test_not_empty = [test_X[idx] for idx in range(len(test_X)) if test_y[idx]]
y_test_not_empty = [test_y[idx] for idx in range(len(test_y)) if test_y[idx]]

``````
``````

In [11]:

def sort_key(counter):
def _key(x):
return counter.get(x, 0)
return _key

top_watched_train = [unique(sorted(session, key=sort_key(watched_counter), reverse=True)) for session in X_train_not_empty]
top_bought_train = [unique(sorted(session, key=sort_key(bought_counter), reverse=True)) for session in X_train_not_empty]

``````
``````

In [12]:

from itertools import cycle

def get_metric(y_pred, y_true, metric, k):
sample_wise_metric = map(metric, y_pred, y_true, cycle([k]))
return np.mean(list(sample_wise_metric))

``````
``````

In [13]:

def get_metrics_values(y_pred, y_true, file):
with open(file, 'w') as fp:
for k in [1, 5]:
for metric in [recall, precision]:
fp.write(str(round(get_metric(y_pred, y_true, metric, k), 2)) + ' ')

``````
``````

In [14]:

get_metrics_values(top_watched_train, y_train_not_empty, 'ans1.txt')

``````
``````

In [15]:

get_metrics_values(top_bought_train, y_train_not_empty, 'ans3.txt')

``````
``````

In [16]:

top_watched_test = [unique(sorted(session, key=sort_key(watched_counter), reverse=True)) for session in X_test_not_empty]
top_bought_test = [unique(sorted(session, key=sort_key(bought_counter), reverse=True)) for session in X_test_not_empty]

``````
``````

In [17]:

get_metrics_values(top_watched_test, y_test_not_empty, 'ans2.txt')

``````
``````

In [18]:

get_metrics_values(top_bought_test, y_test_not_empty, 'ans4.txt')

``````
``````

In [ ]:

``````