In [2]:
import pandas as pd
import numpy as np
import math
names = ['pearson', 'cos', 'our', 'msd', 'hybrid1', 'hybrid2']
threshold = .6
eps = float(1e-9)
def similar_users(name, i):# returns S. S[u] is the list of similarity with users u+1,u+2,...,943
file = 'Similarities/'+str(i)+'.'+name
#print(file)
S = []
S.append([])
for u in range(1, 944):
a = []
S.append([])
for v in range(u+1, 945):
a.append(0)
S[u] = a
sim_list = open(file, 'r').readlines()
#print(len(sim_list))
u = 1
for line in sim_list:
tokens = line.split(r' ')
#print(len(tokens))
a = []
v = u+1
for token in tokens:
if '.' in token:
sim = float(token)
if sim >= eps+threshold:
a.append(v)
v += 1
S[u] = a
u += 1
return S
#S = similar_users(names[0], 1)
#for name in names:
# for i in range(1, 6):
def write():
S = similar_users(names[0], 1)
path = 'Similar_users/sim'+str(1)+'.'+names[0]
file = open(path, 'w')
for users in S[1:]:
for u in users:
file.write(str(u)+' ')
file.write('\n')
In [7]:
def train_data(i):
rating = []
for u in range(0, 945):
rating.append({})
path = 'Datasets/train'+str(i)+'.csv'
file = open(path, 'r')
lines = file.readlines()
l = len(lines)
for i in range(0, l):
line = lines[i]
tokens = line.split(r',')
if i == 0:
continue
u = int(tokens[0])
m = int(tokens[1])
r = int(tokens[2])
rating[u][m] = r
return rating
def weights(name, i):
ratings = train_data(i)
movies = set()
weights = []
for u in range(1, 945):
for movie, rating in ratings[u].items():
movies.add(int(movie))
weights.append({})
movies = list(movies)
path = 'Similar_users/sim'+str(i)+'.'+name
file = open(path, 'r')
lines = file.readlines()
u = 1
mxl = 0
for line in lines:
usrs = line.strip().split(' ')
mxl = max(mxl, len(usrs))
for movie in movies:
a = {}
mxr = 0
mnr = 100
for usr in usrs:
usr = int(usr)
mxr = max(mxr, ratings[usr][movie])
mnr = min(mnr, ratings[usr][movie])
a['a'] = mxr
a['b'] = mnr
weights[u][movie] = a
u += 1
weights(names[0], 1)
In [ ]: