notebook.community

Edit and run



In [1]:

    
import numpy as np



In [2]:

    
train_X, train_y, test_X, test_y = [], [], [], []



In [3]:

    
def unique(seq):
    result = []
    for x in seq:
        if x not in result:
            result.append(x)
    return result



In [4]:

    
for line in open('coursera_sessions_train.txt'):
    x, y = line.split(';')
    train_X.append([int(value) for value in x.split(',')])
    
    y = y.strip()
    if y:
        y = [int(value) for value in y.split(',')]
    else: 
        y = []
    train_y.append(y)



In [5]:

    
for line in open('coursera_sessions_test.txt'):
    x, y = line.split(';')
    test_X.append([int(value) for value in x.split(',')])
    
    y = y.strip()
    if y:
        y = [int(value) for value in y.split(',')]
    else: 
        y = []
    test_y.append(y)



In [6]:

    
from collections import Counter
watched_counter = Counter(value for session_values in train_X for value in session_values)
bought_counter = Counter(value for session_values in train_y for value in session_values)



In [7]:

    
def precision(y_pred, y_true, k):
    seq = y_pred[:k]
    return sum(1 for value in seq if value in y_true) / k



In [8]:

    
def recall(y_pred, y_true, k): 
    predictions = y_pred[:k]
    return sum(1 for value in y_true if value in predictions) / len(y_true)



In [9]:

    
X_train_not_empty = [train_X[idx] for idx in range(len(train_X)) if train_y[idx]]
y_train_not_empty = [train_y[idx] for idx in range(len(train_y)) if train_y[idx]]



In [10]:

    
X_test_not_empty = [test_X[idx] for idx in range(len(test_X)) if test_y[idx]]
y_test_not_empty = [test_y[idx] for idx in range(len(test_y)) if test_y[idx]]



In [11]:

    
def sort_key(counter):
    def _key(x):
        return counter.get(x, 0)
    return _key

top_watched_train = [unique(sorted(session, key=sort_key(watched_counter), reverse=True)) for session in X_train_not_empty]
top_bought_train = [unique(sorted(session, key=sort_key(bought_counter), reverse=True)) for session in X_train_not_empty]



In [12]:

    
from itertools import cycle

def get_metric(y_pred, y_true, metric, k):
    sample_wise_metric = map(metric, y_pred, y_true, cycle([k]))
    return np.mean(list(sample_wise_metric))



In [13]:

    
def get_metrics_values(y_pred, y_true, file):
    with open(file, 'w') as fp:
        for k in [1, 5]:
            for metric in [recall, precision]:
                fp.write(str(round(get_metric(y_pred, y_true, metric, k), 2)) + ' ')



In [14]:

    
get_metrics_values(top_watched_train, y_train_not_empty, 'ans1.txt')



In [15]:

    
get_metrics_values(top_bought_train, y_train_not_empty, 'ans3.txt')



In [16]:

    
top_watched_test = [unique(sorted(session, key=sort_key(watched_counter), reverse=True)) for session in X_test_not_empty]
top_bought_test = [unique(sorted(session, key=sort_key(bought_counter), reverse=True)) for session in X_test_not_empty]



In [17]:

    
get_metrics_values(top_watched_test, y_test_not_empty, 'ans2.txt')



In [18]:

    
get_metrics_values(top_bought_test, y_test_not_empty, 'ans4.txt')



In [ ]: