In [2]:
import pandas as pd
import numpy as np
import math

names = ['pearson', 'cos', 'our', 'msd', 'hybrid1', 'hybrid2']

threshold = .6
eps = float(1e-9)

def similar_users(name, i):# returns S. S[u] is the list of similarity with users u+1,u+2,...,943
    file = 'Similarities/'+str(i)+'.'+name
    #print(file)
    S = []
    S.append([])
    for u in range(1, 944):
        a = []
        S.append([])
        for v in range(u+1, 945):
            a.append(0)
        S[u] = a
    sim_list = open(file, 'r').readlines()
    #print(len(sim_list))
    u = 1
    for line in sim_list:
        tokens = line.split(r' ')
        #print(len(tokens))
        a = []
        v = u+1
        for token in tokens:
            if '.' in token:
                sim = float(token)
                if sim >= eps+threshold:
                    a.append(v)
            v += 1
        S[u] = a
        u += 1
    return S
#S = similar_users(names[0], 1)
#for name in names:
   # for i in range(1, 6):
def write():
    S = similar_users(names[0], 1)
    path = 'Similar_users/sim'+str(1)+'.'+names[0]
    file = open(path, 'w')
    for users in S[1:]:
        for u in users:
            file.write(str(u)+' ')
        file.write('\n')

In [7]:
def train_data(i):
    rating = []
    for u in range(0, 945):
        rating.append({})
    path = 'Datasets/train'+str(i)+'.csv'
    file = open(path, 'r')
    lines = file.readlines()
    l = len(lines)
    for i in range(0, l):
        line = lines[i]
        tokens = line.split(r',')
        if i == 0:
            continue
        u = int(tokens[0])
        m = int(tokens[1])
        r = int(tokens[2])
        rating[u][m] = r
    return rating

def weights(name, i):
    ratings = train_data(i)
    movies = set()
    weights = []
    for u in range(1, 945):
        for movie, rating in ratings[u].items():
            movies.add(int(movie))
        weights.append({})
    movies = list(movies)
    path = 'Similar_users/sim'+str(i)+'.'+name
    file = open(path, 'r')
    lines = file.readlines()
    u = 1
    mxl = 0
    for line in lines:
        usrs = line.strip().split(' ')
        mxl = max(mxl, len(usrs))
        for movie in movies:
            a = {}
            mxr = 0
            mnr = 100
            for usr in usrs:
                usr = int(usr)
                mxr = max(mxr, ratings[usr][movie])
                mnr = min(mnr, ratings[usr][movie])
            a['a'] = mxr
            a['b'] = mnr
            weights[u][movie] = a
        u += 1
weights(names[0], 1)


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-7-c1e12fd537a5> in <module>()
     47             weights[u][movie] = a
     48         u += 1
---> 49 weights(names[0], 1)

<ipython-input-7-c1e12fd537a5> in weights(name, i)
     41             for usr in usrs:
     42                 usr = int(usr)
---> 43                 mxr = max(mxr, ratings[usr][movie])
     44                 mnr = min(mnr, ratings[usr][movie])
     45             a['a'] = mxr

KeyError: 1

In [ ]: