In [82]:
import numpy as np
#save matrix
file = open("/home/iizhaki/oasis/CSE255/MatrixReorder502.pck")
matrix =np.load(file)
file.close()
print "done"
#save matrix
file = open("/home/iizhaki/oasis/CSE255/YsReorder502.pck")
rankingR = np.load(file)
file.close()
print "done"
#save matrix
file = open("/home/iizhaki/oasis/CSE255/IndexReorder502.pck")
indexes= np.load(file)
file.close()
print "done"
print indexes[:20]
X = [matrix[i] for i in indexes]
y = [rankingR[i] for i in indexes]
n = int(len(X) * 0.7)
X_train_org = np.array(X[:n])
y_train_org = np.array(y[:n])
X_test_org = np.array(X[n:])
y_test_org = np.array(y[n:])
In [83]:
n = int(len(X) * 0.7)
X_train = X_train_org #[:n]
y_train = y_train_org #[:n]
X_test = X_test_org #[n:]
y_test = y_test_org #[n:]
In [84]:
import pylab as pl
from sklearn.linear_model import SGDRegressor
from sklearn.datasets.samples_generator import make_regression
from sklearn.preprocessing import *
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
import time
import timeit
In [85]:
print X_train.shape, y_train.shape
In [86]:
X_train = X_train.astype(float)
y_train = y_train.astype(float)
X_test = X_test.astype(float)
y_test = y_test.astype(float)
print y_train[0], y[0]
#X_test = preprocessing.scale(X_test.astype(float))
#y_test = preprocessing.scale(y_test.astype(float))
In [87]:
scaler = MinMaxScaler().fit(X_train)
X_train = scaler.transform(X_train)
In [88]:
scaler = MinMaxScaler().fit(X_test)
X_test = scaler.transform(X_test)
In [9]:
# run the classifier - there are parameters for L1/L2 regularization - see link below!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
clf = SGDRegressor(n_iter = 5)
start = time.time()
clf.fit(X_train, y_train)
end = time.time()
finished = end - start
print finished
# and plot the result
#pl.scatter(X, Y, color='black')
#pl.plot(X, clf.predict(X), color='blue', linewidth=3)
#pl.show()
In [10]:
def predict(data, theta):
theta = numpy.matrix(theta)
prediction = [theta * d.T for d in data]
return prediction
#prediction_training = predict(X, thetax)
def MSE(prediction, real):
squares = [ (p - r)**2 for p,r in zip (prediction,real) ]
return numpy.mean(squares)
#print " MSE training", MSE(prediction_training, y )
In [11]:
import scipy
ptrain = clf.predict(X_train)
ytrain = [e for e in y_train]
ptrain = [e for e in ptrain]
ptrain = [e if e <= 5 else 5. for e in ptrain]
ptrain = [e if e >= 1 else 1. for e in ptrain ]
mse = MSE(ptrain, ytrain)
var = MSE(ytrain, [mean(ytrain)] * len(ytrain))
print "MSE training", mse
print "Var", var
print " FVU training", (mse / var)
In [12]:
ptest = clf.predict(X_test)
ytest = [e for e in y_test]
ptest = [e for e in ptest]
ptest = [e if e <= 5 else 5. for e in ptest]
ptest = [e if e >= 1 else 1. for e in ptest ]
mse = MSE(ptest, ytest)
var = MSE(ytest, [mean(ytest)] * len(ytest))
print "MSE testing", mse
print "Var testing", var
print " FVU testing", (mse / var)
In [27]:
print sum([p for p in ptrain if p > 5 or p < 1]) / len(ptrain)
print sum([p for p in ptest if p > 5 or p < 1]) / len(ptrain)
In [ ]:
# class sklearn.linear_model.SGDRegressor(loss='squared_loss', penalty='l2', alpha=0.0001, l1_ratio=0.15, fit_intercept=True, n_iter=5, shuffle=False, verbose=0, epsilon=0.1, random_state=None, learning_rate='invscaling', eta0=0.01, power_t=0.25, warm_start=False)
# http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDRegressor.html
In [ ]:
In [89]:
start = time.time()
thetax,residualsx,rankx,sx = numpy.linalg.lstsq(X_train, y_train)
end = time.time()
finished = end - start
print finished
In [90]:
def predict(data, theta):
theta = numpy.matrix(theta)
prediction = [theta*numpy.matrix(d).T for d in data]
return prediction
prediction_training = predict(X_train, thetax)
def MSE(prediction, real):
squares = [ (p - r)**2 for p,r in zip (prediction,real) ]
return numpy.mean(squares)
y = [e for e in y_train]
p = [e for e in prediction_training]
mse = MSE(p, y)
var = MSE(y, [mean(y)] * len(y))
print "MSE training", mse
print "Var", var
print " FVU training", (mse / var)
In [93]:
prediction_testing = predict(X_test, thetax)
y = [e for e in y_test]
p = [e for e in prediction_testing]
mse = MSE(p, y)
var = MSE(y, [mean(y)] * len(y))
print "MSE testing", mse
print "Var", var
print " FVU testing", (mse / var)
In [77]:
res = {}
In [102]:
for l in range(0, 21):
a = l * 0.01
# run the classifier - there are parameters for L1/L2 regularization - see link below!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
clf = SGDRegressor(n_iter = 5, alpha = a, penalty = 'l1')
start = time.time()
clf.fit(X_train, y_train)
end = time.time()
finished = end - start
print finished
ptest = clf.predict(X_test)
ytest = [e for e in y_test]
ptest = [e for e in ptest]
ptest = [e if e <= 5 else 5. for e in ptest]
ptest = [e if e >= 1 else 1. for e in ptest ]
mse = MSE(ptest, ytest)
var = MSE(ytest, [mean(ytest)] * len(ytest))
print "MSE testing", mse
print "Var testing", var
print " FVU testing", (mse / var)
res[a] =(a, finished, mse, var, (mse / var))
In [136]:
from matplotlib import pyplot as PLT
from matplotlib import cm as CM
from matplotlib import mlab as ML
import numpy as NP
from IPython.display import FileLink, FileLinks
RY = []
XS = []
for i in range (len(R)):
if R[i] < 0.85 and i > 2:
RY.append(R[i])
XS.append(i)
ll = len(RY)
RX = [i * 0.01 for i in XS]
fig, axes = PLT.subplots(figsize=(10,8))
#eft, height, width=0.8, bottom=None, **kwargs)
axes.bar (range(ll), RY, width=0.5, alpha=0.5, align="center")
#(bottom, width, height=0.8, left=None, **kwargs)
axes.set_xlabel('Rating')
axes.set_title('title');
axes.set_xticks(range(ll))
axes.set_xticklabels(RX, fontsize=10, rotation='vertical');
axes.set_title("FVU per L1 Regularization lambda factor");
PLT.axis([-1, ll, 0.5, 0.9])
PLT.show()
fig.savefig("FVU_L1.png", dpi=200)
FileLink("FVU_L1.png")
Out[136]:
In [108]:
res2 = {}
for l in range(0, 21):
a = l * 0.01
# run the classifier - there are parameters for L1/L2 regularization - see link below!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
clf = SGDRegressor(n_iter = 5, alpha = a, penalty = 'l2')
start = time.time()
clf.fit(X_train, y_train)
end = time.time()
finished = end - start
print finished
ptest = clf.predict(X_test)
ytest = [e for e in y_test]
ptest = [e for e in ptest]
ptest = [e if e <= 5 else 5. for e in ptest]
ptest = [e if e >= 1 else 1. for e in ptest ]
mse = MSE(ptest, ytest)
var = MSE(ytest, [mean(ytest)] * len(ytest))
print "MSE testing", mse
print "Var testing", var
print " FVU testing", (mse / var)
res2[a] =(a, finished, mse, var, (mse / var))
In [137]:
from matplotlib import pyplot as PLT
from matplotlib import cm as CM
from matplotlib import mlab as ML
import numpy as NP
from IPython.display import FileLink, FileLinks
RY = []
XS = []
for i in range (len(R)):
if R[i] < 0.85 and i > 2:
RY.append(R[i])
XS.append(i)
ll = len(RY)
RX = [i * 0.01 for i in XS]
fig, axes = PLT.subplots(figsize=(10,8))
#eft, height, width=0.8, bottom=None, **kwargs)
axes.bar (range(ll), RY, width=0.5, alpha=0.5, align="center")
#(bottom, width, height=0.8, left=None, **kwargs)
axes.set_xlabel('Rating')
axes.set_title('title');
axes.set_xticks(range(ll))
axes.set_xticklabels(RX, fontsize=10, rotation='vertical');
axes.set_title("FVU per L2 Regularization lambda factor");
PLT.axis([-1, ll, 0.5, 0.9])
PLT.show()
fig.savefig("FVU_L2.png", dpi=200)
FileLink("FVU_L2.png")
Out[137]:
In [110]:
res3 = {}
for l in range(0, 21):
a = l * 0.01
# run the classifier - there are parameters for L1/L2 regularization - see link below!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
clf = SGDRegressor(n_iter = 5, alpha = a, penalty = 'elasticnet')
start = time.time()
clf.fit(X_train, y_train)
end = time.time()
finished = end - start
print finished
ptest = clf.predict(X_test)
ytest = [e for e in y_test]
ptest = [e for e in ptest]
ptest = [e if e <= 5 else 5. for e in ptest]
ptest = [e if e >= 1 else 1. for e in ptest ]
mse = MSE(ptest, ytest)
var = MSE(ytest, [mean(ytest)] * len(ytest))
print "MSE testing", mse
print "Var testing", var
print " FVU testing", (mse / var)
res3[a] =(a, finished, mse, var, (mse / var))
In [139]:
from matplotlib import pyplot as PLT
from matplotlib import cm as CM
from matplotlib import mlab as ML
import numpy as NP
from IPython.display import FileLink, FileLinks
R = list([r for (_, _, _, _, r) in res.values()])
RY = []
XS = []
for i in range (len(R)):
if R[i] < 0.85 and i > 2:
RY.append(R[i])
XS.append(i)
ll = len(RY)
RX = [i * 0.01 for i in XS]
fig, axes = PLT.subplots(figsize=(10,8))
#eft, height, width=0.8, bottom=None, **kwargs)
axes.bar (range(ll), RY, width=0.5, alpha=0.5, align="center")
#(bottom, width, height=0.8, left=None, **kwargs)
axes.set_xlabel('Accuracy (FLV value) of prediction')
axes.set_title('title');
axes.set_xticks(range(ll))
axes.set_xticklabels(RX, fontsize=10, rotation='vertical');
axes.set_title("FVU per Elasticnet Regularization lambda factor");
PLT.axis([-1, ll, 0.5, 0.9])
PLT.show()
fig.savefig("FVU_Elasticent.png", dpi=200)
FileLink("FVU_Elasticent.png")
Out[139]:
In [ ]: