In [ ]:
from _collections import defaultdict
import time
import timeit
from numpy.linalg import norm
import scipy.optimize
import random
import numpy as np
def parseData(fname):
for l in open(fname):
yield eval(l)
def parseTxt(fname):
for l in open(fname):
yield l.strip().split(" ")
print "Reading train..."
#train = list(parseData("/home/iizhaki/oasis/CSE255/Project2/assignment2/train.json"))
train = list(parseData("/home/iizhaki/oasis/train.json"))
print "Reading test..."
tetst = list(parseData("/home/iizhaki/oasis/CSE255/Project2/assignment2/helpful.json"))
print "done"
In [ ]:
from sets import Set
print "done"
allXs = []
allYs = []
allUsers = Set()
rater = {}
for l in train:
user, item, rating = l['reviewerID'], l['itemID'], l['rating']
rater[item] = float(rating)
allXs.append([user, item])
allYs.append(float(rating))
allUsers.add(user)
In [ ]:
crazy = defaultdict(Set)
for l in tetst:
user, item = l['reviewerID'], l['itemID']
crazy[user].add(item)
In [ ]:
frac = 1.0
pop = 100
In [176]:
print "Reading test..."
myPredictions = open("/home/iizhaki/oasis/idan_predictions_Purchase_" + str(frac) + "_" + str(pop) + "_new9.txt", 'w')
print "done"
print "Reading test..."
test = np.array(list(parseTxt("/home/iizhaki/oasis/CSE255/Project2/assignment2/pairs_Purchase.txt")))
print "done"
In [177]:
dictI = defaultdict(Set)
dictU = defaultdict(Set)
for u, i in allXs:
dictI[i].add(u)
dictU[u].add(i)
In [178]:
def tossUniassed(port):
val = random.uniform(0.0, 1.0)
if val >= port:
return "0"
return "1"
In [179]:
itemCount = defaultdict(int)
userCount = defaultdict(int)
totalPurchases = 0
for user, item in allXs:
itemCount[item] += 1
userCount[user] += 1
totalPurchases += 1
mostPopular = [(itemCount[x], x) for x in itemCount]
mostPopular.sort()
mostPopular.reverse()
mostPopularU = [(userCount[x], x) for x in userCount]
mostPopularU.sort()
mostPopularU.reverse()
return1 = set()
count = 0
for ic, i in mostPopular:
count += ic
return1.add(i)
if count > totalPurchases * frac: break
return2 = set()
count = 0
for uc, u in mostPopularU:
count += uc
return2.add(u)
if count > totalPurchases * 1.0: break
In [180]:
#nos = {'U447391487': 'I140025784', 'U348881258': 'I828453257', 'U197471804': 'I638136162', 'U912621491': 'I686442838'}
#U165565902-I068152971,0
In [181]:
#myPredictions.write(str(test[0][0]) + '\n')
hasChanged = True
bestUIs = defaultdict(Set)
loc = 1
while hasChanged:
print "Loop ", loc
loc += 1
hasChanged = False
toAdd = []
for l in test[1 :]:
u, i = l[0].split("-")
if (u in dictU) and (i in dictI):
for it in dictU[u]:
Jacc = len(dictI[it] & dictI[i]) * 1.0 / len(dictI[it] | dictI[i])
if Jacc > 0:
if i not in dictU[u]:
toAdd.append((u, i))
break
else:
for ut in dictI[i]:
Jacc = len(dictU[ut] & dictU[u]) * 1.0 / len(dictU[ut] | dictU[u])
if Jacc > 0 and u not in dictI[i]:
toAdd.append((u, i))
break
for (uu, ii) in toAdd:
hasChanged = True
dictU[uu].add(ii)
dictI[ii].add(uu)
In [256]:
import random
myPredictions = open("/home/iizhaki/oasis/idan_predictions_Purchase_" + str(frac) + "_" + str(pop) + "_new9.txt", 'w')
myPredictions.write(str(test[0][0]) + '\n')
hasChanged = True
bestUIs = defaultdict(Set)
ones = 0
zeros = 0
pop = 70
userC = defaultdict(int)
itemC = defaultdict(int)
for l in test[1 :]:
u, i = l[0].split("-")
userC[u] += 1
itemC[i] += 1
for l in test[1 :]:
done = False
u, i = l[0].split("-")
if (u in dictU) and (i in dictI):
for it in dictU[u]:
Jacc = len(dictI[it] & dictI[i]) * 1.0 / len(dictI[it] | dictI[i])
if Jacc > 0.0:
myPredictions.write(u + '-' + i + ',1\n')
ones += 1
done = True
break
if not done:
for ut in dictI[i]:
Jacc = len(dictU[ut] & dictU[u]) * 1.0 / len(dictU[ut] | dictU[u])
if Jacc > 0:
myPredictions.write(u + '-' + i + ',1\n')
ones += 1
done = True
break
if not done:
if len(dictI[i]) > pop or len(dictU[u]) > 30:
#pop -= 1
myPredictions.write(u + '-' + i + ',1\n')
ones += 1
else:
myPredictions.write(u + '-' + i + ',0\n')
zeros += 1
else:
if i in return1 or u in return2:
myPredictions.write(u + '-' + i + ',1\n')
ones += 1
elif itemC[i] > 1 or userC[u] > 1:
myPredictions.write(u + '-' + i + ',1\n')
ones += 1
else:
myPredictions.write(u + '-' + i + ',1\n')
ones += 1
#myPredictions.write(u + '-' + i + ',0\n')
#zeros += 1
myPredictions.flush()
myPredictions.close()
In [257]:
print ones
print zeros
In [244]:
In [107]:
In [ ]: