In [1]:
print "Hello"
In [2]:
import numpy as np
#save matrix
file = open("/home/iizhaki/oasis/CSE255/MatrixReorder502.pck")
matrix =np.load(file)
file.close()
print "done"
#save matrix
file = open("/home/iizhaki/oasis/CSE255/YsReorder502.pck")
rankingR = np.load(file)
file.close()
print "done"
#save matrix
file = open("/home/iizhaki/oasis/CSE255/IndexReorder502.pck")
indexes= np.load(file)
file.close()
print "done"
print indexes[:20]
X = [matrix[i] for i in indexes]
y = [rankingR[i] for i in indexes]
n = int(len(X)*0.7)
X_train = X[:n]
y_train = y[:n]
X_test =X[n:]
y_test =y[n:]
In [3]:
import scipy.optimize
from numpy.linalg import norm
### Gradient descent ###
# Objective
def f(theta, X, y, lam):
diff = numpy.dot(X, theta) - y
diffSq = (norm(diff) ** 2) / len(X)
diffSqReg = diffSq + lam * norm(theta) ** 2
#print "f : " , diffSqReg
return diffSqReg
# Derivative
def fprime(theta, X, y, lam):
diff = numpy.dot(X, theta) - y
res = 2 * numpy.dot(X.T, diff) / len(X) + 2 * lam * theta
return res
In [ ]:
import time
import timeit
start = time.time()
thetar,l,info = scipy.optimize.fmin_l_bfgs_b(f, numpy.array([0] * len(X_train[0])).T, fprime, args = (numpy.array(X_train), numpy.array(y_train).T, 0.1))
end = time.time()
finished = end - start
print finished
In [15]:
def predict(data, theta):
theta = numpy.matrix(theta)
prediction = [theta*numpy.matrix(d).T for d in data]
return prediction
#prediction_training = predict(X, thetax)
def MSE(prediction, real):
squares = [ (p - r)**2 for p,r in zip (prediction,real) ]
return numpy.mean(squares)
#print " MSE training", MSE(prediction_training, y )
In [16]:
prediction_training = predict(X_train, thetar)
var = MSE([np.mean(y_train)]*len(y_train), y_train )
ps = [e if e<5 else numpy.matrix([[5.0]]) for e in prediction_training]
ps = [e if e>1 else numpy.matrix([[1.0]]) for e in ps]
ms = MSE(ps, y_train )
print " MSE training",ms # mean abs error 0.832253748827
print "var", var
print "ratio ", ms/var
#MSE training 0.788330098369
#var 1.37562975
#ratio 0.573068515252
In [17]:
prediction_test = predict(X_test, thetar)
var = MSE([np.mean(y_test)]*len(y_test), y_test )
ps = [e if e<5 else numpy.matrix([[5.0]]) for e in prediction_test]
ps = [e if e>1 else numpy.matrix([[1.0]]) for e in ps]
ms =MSE(ps, y_test )
print " MSE test", ms# mean abs error 0.832253748827
print "var", var
print "ratio ", ms/var
#MSE test 0.855575828303
#var 1.40083603823
#ratio 0.610760863481
In [11]:
print finished
In [25]:
pred = dict()
real = dict()
for i in range(100):
pred[i] = []
real[i] = []
for i in range(len(X_test)):
s = X_test[i]
p = s[1:101].tolist().index(1)
pred[p].append(ps[i])
real[p].append(y_test[i])
msedict = dict()
for p in pred:
msedict[p] = MSE(pred[p], real[p] )
In [26]:
K=100
import pickle
file = open("/home/iizhaki/oasis/CSE255/gpsUS" + str(K) + "_cent.pck")
kmm = pickle.load(file)
file.close()
print "done"
In [28]:
from matplotlib import pyplot as PLT
from matplotlib import cm as CM
from matplotlib import mlab as ML
import numpy as NP
x=[]
y=[]
z=[]
for a in msedict:
w,q=kmm[a]
x.append(w)
y.append(q)
z.append(msedict[a])
gridsize=80
#fig = plt.figure(figsize=(8,4), dpi=100)
fig, axes =PLT.subplots(figsize=(16,10))
# if 'bins=None', then color of each hexagon corresponds directly to its count
# 'C' is optional--it maps values to x-y coordinates; if 'C' is None (default) then
# the result is a pure 2D histogram
PLT.hexbin( y,x, C=z, gridsize=gridsize, cmap=CM.jet, bins=None)
PLT.axis([ -128,-60,24, 50])
cb = PLT.colorbar()
cb.set_label('Average Error')
PLT.show()
fig.savefig("AVG_Error.png", dpi=200)
In [ ]:
FileLink('AVG_Rating.png')