In [2]:
from _collections import defaultdict
import time
import timeit

from numpy.linalg import norm
import scipy.optimize
import random

import numpy as np

In [3]:
def parseData(fname):
  for l in open(fname):
    yield eval(l)
    
def parseTxt(fname):
  for l in open(fname):
    yield l.strip().split(" ")

print "Reading train..."
train = list(parseData("/home/iizhaki/oasis/CSE255/Project2/assignment2/train.json"))


Reading train...

In [3]:


In [3]:


In [4]:
print "done"

allXs = []
allYs = []
for l in train:
  user, item, rating = l['reviewerID'], l['itemID'], l['rating']
  allXs.append([user, item])
  allYs.append(float(rating))


done

In [4]:


In [4]:


In [7]:
def miniFunc(Data, Alpha, BetaU, BetaI, Lambd):
    part1 = 0
    for [u, i], Rui in Data:
        part1 += ((Alpha + BetaU[u] + BetaI[i] - Rui) ** 2)

    return part1


oldVal = 0
lambd = 1
alpha = 0
X = allXs
y = allYs
betaU = defaultdict(float)
betaI = defaultdict(float)
Ntrain = len(y)

data = zip(X, y)

Iu = defaultdict(int)
Ii = defaultdict(int)
for [u, i], Rui in data:
    Iu[u] += 1
    Ii[i] += 1

while True:
    lastAlpha = alpha
    lastBetaU = betaU
    lastBetaI = betaI
    
    #----------------------
    start = time.time()
    #----------------------
    
    # Alpha stage
    alpha = 0
    for [u, i], Rui in data:
        bu = betaU[u]
        bi = betaI[i]
        alpha += Rui - (bu + bi)
    alpha = alpha / Ntrain
    
    #----------------------
    end = time.time()
    finished = end - start
    #print "Alpha time: ", finished
    #----------------------
    
    #----------------------
    #start = time.time()
    #----------------------

    # BetaU stage 
    betaU = defaultdict(float)
    for [u, i], Rui in data:
        betaU[u] += (Rui - (alpha + betaI[i]))
    for u in betaU:
        betaU[u] = float(betaU[u]) / Iu[u]
        
    #----------------------
    #end = time.time()
    #finished = end - start
    #print "BetaU time: ", finished
    #----------------------
        
    #----------------------
    #start = time.time()
    #----------------------
        
    # BetaI stage 
    betaI = defaultdict(float)
    for [u, i], Rui in data:
        betaI[i] += (Rui - (alpha + betaU[u]))
    for i in betaI:
        betaI[i] = float(betaI[i]) / Ii[i]
        
    #----------------------
    #end = time.time()
    #finished = end - start
    #print "BetaI time: ", finished
    #----------------------
    
    #----------------------
    #start = time.time()
    #----------------------
    newVal = miniFunc(data, alpha, betaU, betaI, lambd)
    print oldVal, "            ", newVal
    #----------------------
    #end = time.time()
    #finished = end - start
    #print "miniFunc time: ", finished
    #----------------------

    if lastAlpha != 0 and oldVal < newVal:
        alpha = lastAlpha
        betaU = lastBetaU
        betaI = lastBetaI
        break
        
    oldVal = newVal
    
print alpha


0              176706.722687
176706.722687              150540.726778
150540.726778              138623.406725
138623.406725              131589.450991
131589.450991              126924.967809
126924.967809              123615.119392
123615.119392              121157.794215
121157.794215              119272.290327
119272.290327              117788.376494
117788.376494              116596.601644
116596.601644              115623.390344
115623.390344              114817.500746
114817.500746              114142.193088
114142.193088              113570.475582
113570.475582              113082.101918
113082.101918              112661.610947
112661.610947              112297.009863
112297.009863              111978.867501
111978.867501              111699.676304
111699.676304              111453.394644
111453.394644              111235.112833
111235.112833              111040.805637
111040.805637              110867.146328
110867.146328              110711.36521
110711.36521              110571.140769
110571.140769              110444.515026
110444.515026              110329.827102
110329.827102              110225.660602
110225.660602              110130.801623
110130.801623              110044.204969
110044.204969              109964.966792
109964.966792              109892.302292
109892.302292              109825.527418
109825.527418              109764.043782
109764.043782              109707.326147
109707.326147              109654.91199
109654.91199              109606.392776
109606.392776              109561.406599
109561.406599              109519.631985
109519.631985              109480.782617
109480.782617              109444.602855
109444.602855              109410.863903
109410.863903              109379.360524
109379.360524              109349.908207
109349.908207              109322.340734
109322.340734              109296.508058
109296.508058              109272.274475
109272.274475              109249.517021
109249.517021              109228.124081
109228.124081              109207.994167
109207.994167              109189.034846
109189.034846              109171.161799
109171.161799              109154.297989
109154.297989              109138.372931
109138.372931              109123.32204
109123.32204              109109.086055
109109.086055              109095.610528
109095.610528              109082.845368
109082.845368              109070.744436
109070.744436              109059.26518
109059.26518              109048.368309
109048.368309              109038.017502
109038.017502              109028.17915
109028.17915              109018.822115
109018.822115              109009.917519
109009.917519              109001.438555
109001.438555              108993.360313
108993.360313              108985.659623
108985.659623              108978.314912
108978.314912              108971.30608
108971.30608              108964.614377
108964.614377              108958.222303
108958.222303              108952.113506
108952.113506              108946.272698
108946.272698              108940.685572
108940.685572              108935.338729
108935.338729              108930.219614
108930.219614              108925.316448
108925.316448              108920.618178
108920.618178              108916.114422
108916.114422              108911.795421
108911.795421              108907.652
108907.652              108903.675519
108903.675519              108899.857847
108899.857847              108896.191318
108896.191318              108892.668706
108892.668706              108889.283193
108889.283193              108886.028343
108886.028343              108882.898078
108882.898078              108879.886651
108879.886651              108876.988632
108876.988632              108874.198881
108874.198881              108871.512533
108871.512533              108868.924982
108868.924982              108866.431863
108866.431863              108864.029037
108864.029037              108861.71258
108861.71258              108859.478769
108859.478769              108857.324069
108857.324069              108855.245122
108855.245122              108853.238738
108853.238738              108851.301886
108851.301886              108849.431682
108849.431682              108847.625383
108847.625383              108845.880378
108845.880378              108844.194182
108844.194182              108842.564427
108842.564427              108840.988857
108840.988857              108839.465323
108839.465323              108837.991773
108837.991773              108836.566253
108836.566253              108835.186895
108835.186895              108833.851918
108833.851918              108832.559621
108832.559621              108831.308379
108831.308379              108830.096639
108830.096639              108828.922918
108828.922918              108827.785795
108827.785795              108826.683914
108826.683914              108825.615976
108825.615976              108824.580738
108824.580738              108823.577012
108823.577012              108822.603656
108822.603656              108821.659581
108821.659581              108820.743741
108820.743741              108819.855132
108819.855132              108818.992796
108818.992796              108818.15581
108818.15581              108817.34329
108817.34329              108816.55439
108816.55439              108815.788294
108815.788294              108815.044223
108815.044223              108814.321425
108814.321425              108813.619182
108813.619182              108812.9368
108812.9368              108812.273615
108812.273615              108811.628987
108811.628987              108811.002303
108811.002303              108810.392972
108810.392972              108809.800425
108809.800425              108809.224115
108809.224115              108808.663518
108808.663518              108808.118126
108808.118126              108807.587453
108807.587453              108807.071028
108807.071028              108806.568401
108806.568401              108806.079136
108806.079136              108805.602814
108805.602814              108805.139031
108805.139031              108804.687397
108804.687397              108804.247536
108804.247536              108803.819088
108803.819088              108803.401702
108803.401702              108802.995041
108802.995041              108802.598782
108802.598782              108802.21261
108802.21261              108801.836223
108801.836223              108801.469329
108801.469329              108801.111646
108801.111646              108800.762901
108800.762901              108800.422831
108800.422831              108800.091183
108800.091183              108799.767711
108799.767711              108799.452177
108799.452177              108799.144353
108799.144353              108798.844017
108798.844017              108798.550955
108798.550955              108798.26496
108798.26496              108797.98583
108797.98583              108797.713373
108797.713373              108797.447401
108797.447401              108797.187733
108797.187733              108796.934192
108796.934192              108796.68661
108796.68661              108796.444821
108796.444821              108796.208667
108796.208667              108795.977994
108795.977994              108795.752651
108795.752651              108795.532495
108795.532495              108795.317385
108795.317385              108795.107185
108795.107185              108794.901765
108794.901765              108794.700996
108794.700996              108794.504755
108794.504755              108794.312922
108794.312922              108794.125382
108794.125382              108793.942021
108793.942021              108793.762731
108793.762731              108793.587404
108793.587404              108793.41594
108793.41594              108793.248238
108793.248238              108793.084201
108793.084201              108792.923735
108792.923735              108792.76675
108792.76675              108792.613157
108792.613157              108792.46287
108792.46287              108792.315805
108792.315805              108792.171883
108792.171883              108792.031024
108792.031024              108791.893152
108791.893152              108791.758193
108791.758193              108791.626074
108791.626074              108791.496727
108791.496727              108791.370082
108791.370082              108791.246074
108791.246074              108791.124639
108791.124639              108791.005714
108791.005714              108790.889238
108790.889238              108790.775153
108790.775153              108790.6634
108790.6634              108790.553925
108790.553925              108790.446673
108790.446673              108790.341592
108790.341592              108790.238629
108790.238629              108790.137735
108790.137735              108790.038861
108790.038861              108789.941961
108789.941961              108789.846987
108789.846987              108789.753896
108789.753896              108789.662644
108789.662644              108789.573187
108789.573187              108789.485486
108789.485486              108789.399499
108789.399499              108789.315188
108789.315188              108789.232514
108789.232514              108789.15144
108789.15144              108789.071931
108789.071931              108788.99395
108788.99395              108788.917464
108788.917464              108788.842438
108788.842438              108788.768841
108788.768841              108788.696641
108788.696641              108788.625806
108788.625806              108788.556306
108788.556306              108788.488113
108788.488113              108788.421197
108788.421197              108788.355531
108788.355531              108788.291087
108788.291087              108788.227838
108788.227838              108788.165759
108788.165759              108788.104825
108788.104825              108788.04501
108788.04501              108787.986291
108787.986291              108787.928644
108787.928644              108787.872047
108787.872047              108787.816477
108787.816477              108787.761912
108787.761912              108787.708332
108787.708332              108787.655714
108787.655714              108787.60404
108787.60404              108787.553289
108787.553289              108787.503443
108787.503443              108787.454481
108787.454481              108787.406386
108787.406386              108787.359141
108787.359141              108787.312726
108787.312726              108787.267126
108787.267126              108787.222324
108787.222324              108787.178303
108787.178303              108787.135048
108787.135048              108787.092542
108787.092542              108787.050771
108787.050771              108787.009719
108787.009719              108786.969373
108786.969373              108786.929718
108786.929718              108786.89074
108786.89074              108786.852426
108786.852426              108786.814763
108786.814763              108786.777737
108786.777737              108786.741336
108786.741336              108786.705547
108786.705547              108786.670359
108786.670359              108786.63576
108786.63576              108786.601738
108786.601738              108786.568282
108786.568282              108786.535381
108786.535381              108786.503024
108786.503024              108786.471201
108786.471201              108786.4399
108786.4399              108786.409114
108786.409114              108786.37883
108786.37883              108786.34904
108786.34904              108786.319734
108786.319734              108786.290904
108786.290904              108786.262539
108786.262539              108786.234631
108786.234631              108786.207173
108786.207173              108786.180154
108786.180154              108786.153566
108786.153566              108786.127403
108786.127403              108786.101655
108786.101655              108786.076316
108786.076316              108786.051376
108786.051376              108786.02683
108786.02683              108786.002669
108786.002669              108785.978887
108785.978887              108785.955477
108785.955477              108785.932431
108785.932431              108785.909744
108785.909744              108785.887408
108785.887408              108785.865417
108785.865417              108785.843765
108785.843765              108785.822446
108785.822446              108785.801454
108785.801454              108785.780782
108785.780782              108785.760425
108785.760425              108785.740378
108785.740378              108785.720635
108785.720635              108785.70119
108785.70119              108785.682038
108785.682038              108785.663174
108785.663174              108785.644593
108785.644593              108785.62629
108785.62629              108785.608259
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-7-5f3785052525> in <module>()
     84     #start = time.time()
     85     #----------------------
---> 86     newVal = miniFunc(data, alpha, betaU, betaI, lambd)
     87     print oldVal, "            ", newVal
     88     #----------------------

<ipython-input-7-5f3785052525> in miniFunc(Data, Alpha, BetaU, BetaI, Lambd)
      2     part1 = 0
      3     for [u, i], Rui in Data:
----> 4         part1 += ((Alpha + BetaU[u] + BetaI[i] - Rui) ** 2)
      5 
      6     return part1

KeyboardInterrupt: 

In [ ]:


In [ ]:


In [8]:
testRest = np.array(list(parseTxt("/home/iizhaki/oasis/CSE255/Project2/assignment2/pairs_Rating.txt")))
myPredictions = open("/home/iizhaki/oasis/CSE255/Project2/assignment2/idan_predictions_Rating_" + str(lambd) + "_" + str(alpha) + "_" + str(oldVal) + "_o-f.txt", 'w')
myPredictions.write(str(testRest[0][0]) + '\n')

mse = 0
for currLine in testRest[1:]:
    u, i = currLine[0].split("-")
    p = alpha + betaU[u] + betaI[i]
    myPredictions.write(u + '-' + i + ',' + str(p) + '\n')

myPredictions.flush()
myPredictions.close()

In [ ]: