In [1]:
import numpy as np
#save matrix
file = open("/home/iizhaki/oasis/CSE255/MatrixReorder_0.pck")
matrix =np.load(file)
file.close()
print "done"
#save matrix
file = open("/home/iizhaki/oasis/CSE255/YsReorder_1.pck")
rankingR = np.load(file)
file.close()
print "done"
#save matrix
file = open("/home/iizhaki/oasis/CSE255/IndexReorder_1.pck")
indexes= np.load(file)
file.close()
print "done"
print indexes[:20]
X = [matrix[i] for i in indexes]
y = [rankingR[i] for i in indexes]
n = int(len(X) * 0.7)
X_train_org = np.array(X[:n])
y_train_org = np.array(y[:n])
X_test_org = np.array(X[n:])
y_test_org = np.array(y[n:])
In [2]:
n = int(len(X) * 0.7)
X_train = X_train_org #[:n]
y_train = y_train_org #[:n]
X_test = X_test_org #[n:]
y_test = y_test_org #[n:]
In [3]:
import pylab as pl
from sklearn.linear_model import SGDRegressor
from sklearn.datasets.samples_generator import make_regression
from sklearn.preprocessing import *
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
import time
import timeit
In [4]:
print X_train.shape, y_train.shape
In [5]:
X_train = X_train.astype(float)
y_train = y_train.astype(float)
X_test = X_test.astype(float)
y_test = y_test.astype(float)
print y_train[0], y[0]
#X_test = preprocessing.scale(X_test.astype(float))
#y_test = preprocessing.scale(y_test.astype(float))
In [6]:
scaler = MinMaxScaler().fit(X_train)
X_train = scaler.transform(X_train)
In [7]:
scaler = MinMaxScaler().fit(X_test)
X_test = scaler.transform(X_test)
In [8]:
# run the classifier - there are parameters for L1/L2 regularization - see link below!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
clf = SGDRegressor(n_iter = 500)
start = time.time()
clf.fit(X_train, y_train)
end = time.time()
finished = end - start
print finished
# and plot the result
#pl.scatter(X, Y, color='black')
#pl.plot(X, clf.predict(X), color='blue', linewidth=3)
#pl.show()
In [9]:
def predict(data, theta):
theta = numpy.matrix(theta)
prediction = [theta * d.T for d in data]
return prediction
#prediction_training = predict(X, thetax)
def MSE(prediction, real):
squares = [ (p - r)**2 for p,r in zip (prediction,real) ]
return numpy.mean(squares)
#print " MSE training", MSE(prediction_training, y )
In [10]:
import scipy
ptrain = clf.predict(X_train)
ytrain = [e / 1000.0 for e in y_train]
ptrain = [e / 1000.0 for e in ptrain]
ptrain = [e if e <= 5 else 5. for e in ptrain]
ptrain = [e if e >= 1 else 1. for e in ptrain ]
mse = MSE(ptrain, ytrain)
var = MSE(ytrain, [mean(ytrain)] * len(ytrain))
print "MSE training", mse
print "Var", var
print " FVU training", (mse / var)
In [11]:
ptest = clf.predict(X_test)
ytest = [e / 1000.0 for e in y_test]
ptest = [e / 1000.0 for e in ptest]
ptest = [e if e <= 5 else 5. for e in ptest]
ptest = [e if e >= 1 else 1. for e in ptest ]
mse = MSE(ptest, ytest)
var = MSE(ytest, [mean(ytest)] * len(ytest))
print "MSE testing", mse
print "Var testing", var
print " FVU testing", (mse / var)
In [12]:
print sum([p for p in ptrain if p > 5 or p < 1]) / len(ptrain)
print sum([p for p in ptest if p > 5 or p < 1]) / len(ptrain)
In [13]:
# class sklearn.linear_model.SGDRegressor(loss='squared_loss', penalty='l2', alpha=0.0001, l1_ratio=0.15, fit_intercept=True, n_iter=5, shuffle=False, verbose=0, epsilon=0.1, random_state=None, learning_rate='invscaling', eta0=0.01, power_t=0.25, warm_start=False)
# http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDRegressor.html
In [13]:
In [14]:
start = time.time()
thetax,residualsx,rankx,sx = numpy.linalg.lstsq(X_train, y_train)
end = time.time()
finished = end - start
print finished
In [62]:
def predict(data, theta):
prediction = [np.dot(theta, d) for d in data]
return prediction
def MSE(prediction, real):
squares = [ (p - r)**2 for p,r in zip (prediction,real) ]
return numpy.mean(squares)
In [64]:
prediction_training = predict(X_train, thetax)
y = [e/1000.0 for e in y_train]
p = [e/1000.0 for e in prediction_training]
mse = MSE(p, y)
var = MSE(y, [mean(y)] * len(y))
print "MSE training", mse
print "Var", var
print " FVU training", (mse / var)
In [49]:
In [66]:
prediction_testing = predict(X_test, thetax)
y = [e/1000.0 for e in y_test]
p = [e/1000.0 for e in prediction_testing]
mse = MSE(p, y)
var = MSE(y, [mean(y)] * len(y))
print "MSE testing", mse
print "Var", var
print " FVU testing", (mse / var)
In [51]:
print finished
In [47]:
In [43]:
In [ ]: