In [ ]:
from _collections import defaultdict
import time
import timeit

from numpy.linalg import norm
import scipy.optimize
import random

import numpy as np


def parseData(fname):
  for l in open(fname):
    yield eval(l)
    
def parseTxt(fname):
  for l in open(fname):
    yield l.strip().split(" ")

print "Reading train..."
#train = list(parseData("/home/iizhaki/oasis/CSE255/Project2/assignment2/train.json"))
train = list(parseData("/home/iizhaki/oasis/train.json"))

print "Reading test..."
tetst = list(parseData("/home/iizhaki/oasis/CSE255/Project2/assignment2/helpful.json"))
print "done"

In [ ]:
from sets import Set
print "done"

allXs = []
allYs = []
allUsers = Set()
rater = {}
for l in train:
    user, item, rating = l['reviewerID'], l['itemID'], l['rating']
    rater[item] = float(rating)
    allXs.append([user, item])
    allYs.append(float(rating))
    allUsers.add(user)

In [ ]:
crazy = defaultdict(Set)

for l in tetst:
    user, item = l['reviewerID'], l['itemID']
    crazy[user].add(item)

Jaccard


In [ ]:
frac = 1.0
pop = 100

In [176]:
print "Reading test..."
myPredictions = open("/home/iizhaki/oasis/idan_predictions_Purchase_" + str(frac) + "_" + str(pop) + "_new9.txt", 'w')
print "done"
print "Reading test..."
test = np.array(list(parseTxt("/home/iizhaki/oasis/CSE255/Project2/assignment2/pairs_Purchase.txt")))
print "done"


Reading test...
done
Reading test...
done

In [177]:
dictI = defaultdict(Set)
dictU = defaultdict(Set)
for u, i in allXs:
    dictI[i].add(u)
    dictU[u].add(i)

In [178]:
def tossUniassed(port):
    val = random.uniform(0.0, 1.0)
    if val >= port:
        return "0"
    return "1"

In [179]:
itemCount = defaultdict(int)
userCount = defaultdict(int)
totalPurchases = 0

for user, item in allXs:
    itemCount[item] += 1
    userCount[user] += 1
    totalPurchases += 1

mostPopular = [(itemCount[x], x) for x in itemCount]
mostPopular.sort()
mostPopular.reverse()

mostPopularU = [(userCount[x], x) for x in userCount]
mostPopularU.sort()
mostPopularU.reverse()

return1 = set()
count = 0
for ic, i in mostPopular:
    count += ic
    return1.add(i)
    if count > totalPurchases * frac: break
        
return2 = set()
count = 0
for uc, u in mostPopularU:
    count += uc
    return2.add(u)
    if count > totalPurchases * 1.0: break

In [180]:
#nos = {'U447391487': 'I140025784', 'U348881258': 'I828453257', 'U197471804': 'I638136162', 'U912621491': 'I686442838'}
#U165565902-I068152971,0

In [181]:
#myPredictions.write(str(test[0][0]) + '\n')

hasChanged = True
bestUIs = defaultdict(Set)
loc = 1

while hasChanged:
    print "Loop ", loc
    loc += 1
    hasChanged = False
    toAdd = []
    
    for l in test[1 :]:
        u, i = l[0].split("-")

        if (u in dictU) and (i in dictI):
            for it in dictU[u]:        
                Jacc = len(dictI[it] & dictI[i]) * 1.0 / len(dictI[it] | dictI[i])
                if Jacc > 0:
                    if i not in dictU[u]:
                        toAdd.append((u, i))
                        break
                else:
                    for ut in dictI[i]:        
                        Jacc = len(dictU[ut] & dictU[u]) * 1.0 / len(dictU[ut] | dictU[u])
                        if Jacc > 0 and u not in dictI[i]:
                            toAdd.append((u, i))
                            break
          
    for (uu, ii) in toAdd:
        hasChanged = True
        dictU[uu].add(ii)
        dictI[ii].add(uu)


Loop  1
Loop  2
Loop  3

In [256]:
import random

myPredictions = open("/home/iizhaki/oasis/idan_predictions_Purchase_" + str(frac) + "_" + str(pop) + "_new9.txt", 'w')
myPredictions.write(str(test[0][0]) + '\n')

hasChanged = True
bestUIs = defaultdict(Set)
ones = 0
zeros = 0
pop = 70

userC = defaultdict(int)
itemC = defaultdict(int)

for l in test[1 :]:
    u, i = l[0].split("-")
    userC[u] += 1
    itemC[i] += 1

for l in test[1 :]:
    done = False
    u, i = l[0].split("-")

    if (u in dictU) and (i in dictI):
        for it in dictU[u]:        
            Jacc = len(dictI[it] & dictI[i]) * 1.0 / len(dictI[it] | dictI[i])
            if Jacc > 0.0:
                myPredictions.write(u + '-' + i + ',1\n')
                ones  += 1
                done = True
                break
        if not done:
            for ut in dictI[i]:
                Jacc = len(dictU[ut] & dictU[u]) * 1.0 / len(dictU[ut] | dictU[u])
                if Jacc > 0:
                    myPredictions.write(u + '-' + i + ',1\n')
                    ones += 1
                    done = True
                    break
            if not done:
                if len(dictI[i]) > pop or len(dictU[u]) > 30:
                    #pop -= 1
                    myPredictions.write(u + '-' + i + ',1\n')
                    ones += 1
                else:
                    myPredictions.write(u + '-' + i + ',0\n')
                    zeros += 1 

    else:
        if i in return1 or u in return2:
            myPredictions.write(u + '-' + i + ',1\n')
            ones += 1
        elif itemC[i] > 1 or userC[u] > 1:
            myPredictions.write(u + '-' + i + ',1\n')
            ones += 1
        else:    
            myPredictions.write(u + '-' + i + ',1\n')
            ones += 1
            #myPredictions.write(u + '-' + i + ',0\n')
            #zeros += 1 


myPredictions.flush()
    
myPredictions.close()

In [257]:
print ones
print zeros


45624
54376

In [244]:


In [107]:


In [ ]: