In [1]:
import numpy as np
# from bokeh.plotting import HBox, VBox, figure, show, output_file, GridPlot
# from bokeh.models.mappers import LinearColorMapper
# from bokeh.models import BasicTicker, Grid
from sklearn import metrics
from sklearn.datasets import fetch_mldata
from sklearn import preprocessing
from sklearn import cross_validation
from sklearn.datasets import fetch_olivetti_faces
from sklearn.utils.validation import check_random_state
from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier, ExtraTreesRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge, RidgeCV
from sklearn.cross_validation import train_test_split
from sklearn.covariance import GraphLassoCV, ledoit_wolf
from sklearn.grid_search import GridSearchCV
from scipy.spatial import distance
import scipy
import sklearn
import OVFM.Model as md
import OVFM.FeatureMap as fm
import OVFM.Risk as rsk
import OVFM.LearningRate as lr
import OVFM.DataGeneration as dg
import OVFM.SGD as sgd
import OVFM.FISTA as fista
import time
import timeit
import os
import sys
In [2]:
def generate_features_train( nb_points, nb_gaussian, d, sigma = None ):
nb_points = nb_points / nb_gaussian
if sigma is None:
sigma = []
# sequence of random pd matrix
for i in xrange( nb_gaussian ):
temp = 2 * np.random.rand( d, d ) - 1
temp = np.dot( temp, temp.T )
sigma.append( temp / np.linalg.norm( temp, 2 ) )
# Generate centroids
centers = [ 10 * np.random.rand( d ) for i in xrange( nb_gaussian ) ]
# Generate data
gen = [ np.random.multivariate_normal( centers[ i ], sigma[ i ], nb_points ) for i in xrange( nb_gaussian ) ]
return sklearn.preprocessing.StandardScaler( ).fit_transform( np.concatenate( gen, axis = 0 ) )
def generate_features_test( nb_points, d ):
return sklearn.preprocessing.MinMaxScaler( ( -1, 1 ) ).fit_transform( np.random.rand( nb_points, d ) )
In [3]:
def MSE( p, y ):
return np.mean( ( p - y ) ** 2 )
def RMSE( p, y ):
return np.sqrt( MSE( p, y ) )
def MSE_generalisation( model_gen, model_learn, nb_points = 1e4 ):
X_test = generate_features_test( nb_points, d )
y_test = sklearn.preprocessing.MinMaxScaler( ( -1, 1 ) ).fit_transform( model_gen( X_test ) )
return MSE( y_test, model_learn( X_test ) ), X_test, y_test
def RMSE_generalisation( model_gen, model_learn, nb_points = 1e4 ):
mse, X, y = MSE_generalisation( model_gen, model_learn, nb_points )
return np.sqrt( mse ), X, y
In [4]:
def generate_targets( model, X, cache = 10000 ):
model.coefs1D = 2 * np.random.rand( model.coefs1D.shape[ 0 ], 1 ) - 1
y = np.empty( ( X.shape[ 0 ], model.o ) )
for i in xrange( X.shape[ 0 ] / cache ):
y[ i * cache:min( ( i + 1 ) * cache, X.shape[ 0 ] ), : ] = model( X[ i * cache:min( ( i + 1 ) * cache, X.shape[ 0 ] ), : ].T ).T
return y
In [5]:
def tune_eta0( model, X, y, eta0_grid, lbda, eps, batch ):
Xt, _, yt, _ = sklearn.cross_validation.train_test_split( X, y, train_size = .1 )
best_err = np.inf
risk = rsk.Ridge( lbda )
lb = lr.Constant( 0. ) # no bias
for eta0 in eta0_grid:
lc = lr.Constant( eta0 )
opt = sgd.SGD( risk, eps, lc, lb, batch, 0 )
model.reset( )
opt.fit( model, Xt, yt )
err = RMSE( model( Xt ), yt )
if err < best_err:
best_err = err
best_eta0 = eta0
return best_eta0, best_err
In [6]:
filename = time.strftime( "noise_test_%Y%m%d-%H%M%S" )
if not os.path.exists( filename ):
os.makedirs( filename )
if not os.path.exists( filename + '/data' ):
os.makedirs( filename + '/data' )
if not os.path.exists( filename + '/res' ):
os.makedirs( filename + '/res' )
# Problem parameters
d = 20
p = 20
D = 1000
n = 10000
n_gaussians = 10000
gamma = 0.01 / d
LT = np.eye( p )
# LT = 0.7 * np.eye( p ) + 0.3 * np.ones( ( p, p ) )
LT = 2 * np.random.rand( p, p ) - 1
LT = np.dot( LT, LT.T )
LTU, LTD, LTP = np.linalg.svd( LT )
for i in xrange( LTD.shape[ 0 ] - 10 ):
LTD[ p - i - 1 ] = 0
LT = np.dot( LTU * LTD, LTP )
LT = LT / np.linalg.norm( LT, 2 )
# for i in xrange( LT.shape[ 0 ] ):
# for j in xrange( LT.shape[ 1 ] ):
# if i == j:
# pass
# else:
# if np.random.rand( ) > 0.7:
# LT[ i, j ] = 0
# LT[ j, i ] = 0
# LT = LT / np.linalg.norm( LT, 2 )
# Generate train database
X = generate_features_train( n, n_gaussians, d )
model_gen = md.Model( fm.DecomposableFF( gamma, d, D, LT ) )
y = generate_targets( model_gen, X )
# np.savetxt( filename + '/data/X.txt', X )
# np.savetxt( filename + '/data/y.txt', y )
# np.savetxt( filename + '/data/LT.txt', LT )
# def risk_func( w, risk, model, features, targets ):
# model.coefs = w.reshape( model.coefs.shape )
# return risk( features, targets, model )
# def risk_grad( w, risk, model, features, targets ):
# model.coefs = w.reshape( model.coefs.shape )
# return risk.coefs_gradient( model( features ), targets, model )
# lbda1 = 0
# risk = rsk.Ridge( lbda1 )
# print scipy.optimize.check_grad( risk_func, risk_grad, np.random.rand( model_gen.coefs.shape[ 1 ] ), risk, model_gen, X[ :1, : ], y[ :1, : ] )
In [ ]:
In [ ]:
# res0 = np.empty( ( 10, 5, 3, 10, 4 ) )
# for rep in xrange( 0, 10 ):
# yn = y + 0 * np.var( y ) * np.random.multivariate_normal( np.zeros( p ), np.eye( p ), y.shape[ 0 ] )
# X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split( X, yn, train_size = 0.8 )
# # y_train = y_train + np.var( y_train ) * np.random.multivariate_normal( np.zeros( p ), np.eye( p ) )
# sigma = np.median( scipy.spatial.distance.pdist( X ) )
# sigma_y = np.median( scipy.spatial.distance.pdist( y_train.T ) )
# gamma_y = 1. / sigma_y ** 2
# LS = np.exp( gamma_y * scipy.spatial.distance.squareform( scipy.spatial.distance.pdist( y_train.T ) ) )
# LS = LS / np.linalg.norm( LS, 2 )
# np.random.seed( seed = 0 )
# for i, gamma in enumerate( np.logspace( -4, 0, 5 ) ):
# print i
# for j, L in enumerate( [ np.eye( p ), LS, LT ] ):
# model_learn_id = md.Model( fm.DecomposableFF( gamma, d, D, L ) )
# for k, lbda in enumerate( np.logspace( -10, 1, 10 ) ):
# # eta0, _ = tune_eta0( model_learn_id, X_train, y_train, np.logspace( -3, 1, 10 ), lbda, 0.001, 1 )
# risk = rsk.Ridge( lbda )
# lb = lr.Constant( 0. ) # no bias
# lc = lr.InvScaling( 1, lbda, 1 )
# opt = sgd.SGD( risk, 10000, lc, lb, 1, 0 )
# model_learn_id.reset( )
# start = time.clock( )
# opt.fit( model_learn_id, X_train, y_train )
# res0[ rep, i, j, k, : ] = lbda, RMSE( model_learn_id( X_train ), y_train ), RMSE( model_learn_id( X_test ), y_test ), time.clock( ) - start
errr = np.empty( 10 )
for rep in xrange( 0, 10 ):
yn = y + 0.5 * np.var( y ) * np.random.multivariate_normal( np.zeros( p ), np.eye( p ), y.shape[ 0 ] )
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split( X, yn, train_size = 0.8 )
tree = ExtraTreesRegressor( )
start = time.clock( )
tree.fit( X_train, y_train )
print time.clock( ) - start
start = time.clock( )
errr[ rep ] = RMSE( model_learn_id( X_test ), y_test )
print time.clock( ) - start
In [ ]:
print np.mean( errr )
print np.std( errr )
In [ ]:
res01 = np.zero( ( 10, 5, 3, 10, 4 ) )
for rep in xrange( 0, 10 ):
yn = y + 0.1 * np.var( y ) * np.random.multivariate_normal( np.zeros( p ), np.eye( p ), y.shape[ 0 ] )
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split( X, yn, train_size = 0.8 )
# y_train = y_train + np.var( y_train ) * np.random.multivariate_normal( np.zeros( p ), np.eye( p ) )
sigma = np.median( scipy.spatial.distance.pdist( X ) )
sigma_y = np.median( scipy.spatial.distance.pdist( y_train.T ) )
gamma_y = 1. / sigma_y ** 2
LS = np.exp( gamma_y * scipy.spatial.distance.squareform( scipy.spatial.distance.pdist( y_train.T ) ) )
LS = LS / np.linalg.norm( LS, 2 )
np.random.seed( seed = 0 )
for i, gamma in enumerate( np.logspace( -4, 0, 5 ) ):
print i
for j, L in enumerate( [ np.eye( p ), LS, LT ] ):
model_learn_id = md.Model( fm.DecomposableFF( gamma, d, D, L ) )
for k, lbda in enumerate( np.logspace( -10, 1, 10 ) ):
# eta0, _ = tune_eta0( model_learn_id, X_train, y_train, np.logspace( -3, 1, 10 ), lbda, 0.001, 1 )
risk = rsk.Ridge( lbda )
lb = lr.Constant( 0. ) # no bias
lc = lr.InvScaling( 1, lbda, 1 )
opt = sgd.SGD( risk, 10000, lc, lb, 1, 0 )
model_learn_id.reset( )
start = time.clock( )
opt.fit( model_learn_id, X_train, y_train )
res01[ rep, i, j, k, : ] = lbda, RMSE( model_learn_id( X_train ), y_train ), RMSE( model_learn_id( X_test ), y_test ), time.clock( ) - start
In [ ]:
print res01[ 9, :, :, :]
In [ ]:
print np.dot( ( model_learn_id.feature_map.grkhs * model_learn_id.coefs.reshape( ( model_learn_id.feature_map.shape[ 1 ] / ( 2 * model_learn_id.feature_map.B.shape[ 1 ] ), model_learn_id.feature_map.B.shape[ 1 ] ) ).T ).T, model_learn_id.feature_map.A ).shape
# model_learn_id.coefs.reshape( ( model_learn_id.feature_map.D, model_learn_id.feature_map.o ) )
# print model_learn_id.feature_map.grkhs * model_learn_id.coefs.reshape( ( model_learn_id.feature_map.shape[ 1 ] / ( 2 * model_learn_id.feature_map.B.shape[ 1 ] ), model_learn_id.feature_map.B.shape[ 1 ] ) ).T
# print model_learn_id.coefs.shape
# print model_learn_id.feature_map.D
# print model_learn_id.feature_map.shape
# print model_learn_id.feature_map.B.shape
In [ ]:
res1 = np.empty( ( 10, 5, 3, 10, 4 ) )
for rep in xrange( 0, 10 ):
yn = y + .5 * np.var( y ) * np.random.multivariate_normal( np.zeros( p ), np.eye( p ), y.shape[ 0 ] )
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split( X, yn, train_size = 0.8 )
# y_train = y_train + np.var( y_train ) * np.random.multivariate_normal( np.zeros( p ), np.eye( p ) )
sigma = np.median( scipy.spatial.distance.pdist( X ) )
sigma_y = np.median( scipy.spatial.distance.pdist( y_train.T ) )
gamma_y = 1. / sigma_y ** 2
LS = np.exp( gamma_y * scipy.spatial.distance.squareform( scipy.spatial.distance.pdist( y_train.T ) ) )
LS = LS / np.linalg.norm( LS, 2 )
np.random.seed( seed = 0 )
for i, gamma in enumerate( np.logspace( -4, 0, 5 ) ):
print i
for j, L in enumerate( [ np.eye( p ), LS, LT ] ):
model_learn_id = md.Model( fm.DecomposableFF( gamma, d, D, L ) )
for k, lbda in enumerate( np.logspace( -10, 1, 10 ) ):
# eta0, _ = tune_eta0( model_learn_id, X_train, y_train, np.logspace( -3, 1, 10 ), lbda, 0.001, 1 )
risk = rsk.Ridge( lbda )
lb = lr.Constant( 0. ) # no bias
lc = lr.InvScaling( 1, lbda, 1 )
opt = sgd.SGD( risk, 10000, lc, lb, 1, 0 )
model_learn_id.reset( )
start = time.clock( )
opt.fit( model_learn_id, X_train, y_train )
res1[ rep, i, j, k, : ] = lbda, RMSE( model_learn_id( X_train ), y_train ), RMSE( model_learn_id( X_test ), y_test ), time.clock( ) - start
In [ ]:
res001 = np.empty( ( 10, 5, 3, 10, 4 ) )
for rep in xrange( 0, 10 ):
yn = y + 0.01 * np.var( y ) * np.random.multivariate_normal( np.zeros( p ), np.eye( p ), y.shape[ 0 ] )
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split( X, yn, train_size = 0.8 )
# y_train = y_train + np.var( y_train ) * np.random.multivariate_normal( np.zeros( p ), np.eye( p ) )
sigma = np.median( scipy.spatial.distance.pdist( X ) )
sigma_y = np.median( scipy.spatial.distance.pdist( y_train.T ) )
gamma_y = 1. / sigma_y ** 2
LS = np.exp( gamma_y * scipy.spatial.distance.squareform( scipy.spatial.distance.pdist( y_train.T ) ) )
LS = LS / np.linalg.norm( LS, 2 )
np.random.seed( seed = 0 )
for i, gamma in enumerate( np.logspace( -4, 0, 5 ) ):
print i
for j, L in enumerate( [ np.eye( p ), LS, LT ] ):
model_learn_id = md.Model( fm.DecomposableFF( gamma, d, D, L ) )
for k, lbda in enumerate( np.logspace( -10, 1, 10 ) ):
# eta0, _ = tune_eta0( model_learn_id, X_train, y_train, np.logspace( -3, 1, 10 ), lbda, 0.001, 1 )
risk = rsk.Ridge( lbda )
lb = lr.Constant( 0. ) # no bias
lc = lr.InvScaling( 1, lbda, 1 )
opt = sgd.SGD( risk, 10000, lc, lb, 1, 0 )
model_learn_id.reset( )
start = time.clock( )
opt.fit( model_learn_id, X_train, y_train )
res001[ rep, i, j, k, : ] = lbda, RMSE( model_learn_id( X_train ), y_train ), RMSE( model_learn_id( X_test ), y_test ), time.clock( ) - start
In [ ]:
def errorbar(fig, x, y, legend, xerr=None, yerr=None, color='red' ):
fig.line(x, y, color=color, legend=legend, line_width=2 )
if xerr is not None:
x_err_x = []
x_err_y = []
for px, py, err in zip(x, y, xerr):
x_err_x.append((px - err, px + err))
x_err_y.append((py, py))
fig.multi_line(x_err_x, x_err_y, color=color )
if yerr is not None:
y_err_x = []
y_err_y = []
for px, py, err in zip(x, y, yerr):
y_err_x.append((px, px))
y_err_y.append((py - err, py + err))
fig.multi_line(y_err_x, y_err_y, color=color )
# output to static HTML file
output_file("lines.html", title="line plot example")
gamma_arr = np.logspace( -4, 0, 5 )
# res = np.mean( res01, axis = 0 )
res = res1
res_mean = np.mean( res[ :, :, :, : ], axis = 0 )
pl = []
for i in xrange( 0, 5 ): # gamma
pl.append( figure(title="gamma = " + str( gamma_arr[ i ] ), x_axis_label='lambda', y_axis_label='MSE', x_axis_type="log", y_axis_type="log", x_range=[1e-10, 1e1], y_range=[1e-3, 1] ) )
errorbar( pl[ i ], res_mean[ i, 0, :, 0], res_mean[ i, 0, :, 2], 'ID', yerr = np.std( res[ :, i, 0, :, 2], axis = 0 ), color='red' )
errorbar( pl[ i ], res_mean[ i, 1, :, 0], res_mean[ i, 1, :, 2], 'LS', yerr = np.std( res[ :, i, 1, :, 2], axis = 0 ), color='blue' )
errorbar( pl[ i ], res_mean[ i, 2, :, 0], res_mean[ i, 2, :, 2], 'LT', yerr = np.std( res[ :, i, 2, :, 2], axis = 0 ), color='green' )
print scipy.stats.ttest_ind( res[ :, i, 0, :, 2], res[ :, i, 1, :, 2] )[ 1 ]
print scipy.stats.ttest_ind( res[ :, i, 0, :, 2], res[ :, i, 2, :, 2] )[ 1 ]
print scipy.stats.ttest_ind( res[ :, i, 1, :, 2], res[ :, i, 2, :, 2] )[ 1 ]
print 'next'
# pl[ i ].line( res[ i, 0, :, 0], res[ i, 0, :, 2] ** 2, legend="Id", line_width=2, color = "red" )
# pl[ i ].line( res[ i, 1, :, 0], res[ i, 1, :, 2] ** 2, legend="LS", line_width=2, color = "blue" )
# pl[ i ].line( res[ i, 2, :, 0], res[ i, 2, :, 2] ** 2, legend="LT", line_width=2, color = "green" )
# show the results
show( HBox( pl[ 0 ], pl[ 1 ], pl[ 2 ], pl[ 3 ], pl[ 4 ] ) )
print res_mean[ 1, :, :, 2 ]
In [ ]:
res01[ :, 2, 2, :, 3]
In [ ]:
np.std( res01[ :, 2, 0, :, 2], axis = 0 )
In [ ]:
matfile = 'res1.mat'
scipy.io.savemat(matfile, mdict={'out': res1}, oned_as='row')
In [ ]:
pipicaca = np.load( 'res0.csv' )
In [7]:
A = 2 * np.random.rand( p, p ) - 1
A = np.dot( A, A.T )
A = A / np.linalg.norm( A, 2 )
yn = y + 0.0 * np.var( y ) * np.random.multivariate_normal( np.zeros( p ), A, y.shape[ 0 ] )
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split( X, yn, train_size = 0.8 )
In [11]:
gamma = .001
lbda1 = 1e-6
# lbda1 = 0
lbda2 = 0.00
model_learn_id = md.Model( fm.DecomposableFF( gamma, d, 1000, LT ) )
lfloss = rsk.LF_loss( )
print lfloss( X[ :1, : ].T, y[ :1, : ].T, model_learn_id )
print lfloss.check_coefs_gradient( X[ :2, : ].T, y[ :2, : ].T, model_learn_id )
In [ ]:
gamma = .001
lbda1 = 1e-6
# lbda1 = 0
lbda2 = 0.00
model_learn_id = md.Model( fm.DecomposableFF( gamma, d, 1000, LT ) )
risk = rsk.Ridge( lbda1 )
lb = lr.Constant( 0. ) # no bias
lc = lr.InvScaling( 1., lbda1, 1 )
opt = sgd.SGD( risk, 10000, lc, lb, 10, 0 )
model_learn_id.reset( )
start = time.clock( )
opt.fit( model_learn_id, X_train, y_train )
stop = time.clock( )
start_pred = time.clock( )
print lbda1, lbda2, RMSE( model_learn_id( X_train.T ).T, y_train ), RMSE( model_learn_id( X_test.T ).T, y_test ), stop - start
stop_pred = time.clock( )
print stop_pred - start_pred
print np.std( y_train ), np.std( y_test )
# print np.linalg.matrix_rank( model_learn_id.coefs.reshape( ( 2 * D, p ) ) )
# print np.linalg.matrix_rank( model_learn_id.feature_map.B )
In [ ]:
print model_learn_id.feature_map.phi_x.shape
print model_learn_id.feature_map._shapef
print model_learn_id.coefs2D.shape
# print np.dot( model_learn_id.feature_map.B.T, model_learn_id.feature_map.B )
print scipy.linalg.pinv( model_learn_id.feature_map.B ) - model_learn_id.feature_map.B.T
# print model_learn_id.feature_map.B.shape
In [ ]:
import matplotlib.pyplot as plt
f, axarr = plt.subplots( 2, sharex=True )
axarr[0].matshow( model_learn_id.coefs2D.T, aspect='auto', cmap='bwr')
axarr[1].matshow( np.log( model_learn_id.feature_map.grkhs.T ), aspect='auto', cmap='bwr' )
plt.show( )
In [ ]:
# print model_learn_id.feature_map.grkhs[ model_learn_id.coefs.ravel( ) == np.NaN ]
# print model_learn_id.coefs.reshape( ( D, p ) ).T[ :, 854 ]
print model_learn_id.feature_map.grkhs.min( )
print model_learn_id.coefs2D.T
# print np.linalg.norm( model_learn_id.feature_map.Z[ :, 854 ] )
In [ ]:
gamma = .001
lbda1 = 0.0
lbda2 = 0.0
model_learn_id = md.Model( fm.DecomposableFF( gamma, d, 1000, LT ) )
risk = rsk.Ridge_B( lbda1, lbda2 )
lb = lr.Constant( 0. ) # no bias
lc = lr.InvScaling( 1, lbda1, 1 )
opt = sgd.SGD( risk, 1000, lc, lb, 10, 0 )
model_learn_id.reset( )
start = time.clock( )
opt.fit( model_learn_id, X_train, y_train )
stop = time.clock( )
start_pred = time.clock( )
print lbda1, lbda2, RMSE( model_learn_id( X_train ), y_train ), RMSE( model_learn_id( X_test ), y_test ), stop - start
stop_pred = time.clock( )
print stop_pred - start_pred
print np.std( y_train ), np.std( y_test )
In [ ]:
import matplotlib.pyplot as plt
plt.pcolor( model_learn_id.feature_map.A, cmap = 'seismic' )
plt.show( )
In [ ]:
np.linalg.matrix_rank( model_learn_id.feature_map.B )
In [ ]:
np.linalg.svd( np.random.rand( 2 * model_learn_id.feature_map.D, model_learn_id.o ), full_matrices = False )[ 0 ].shape
In [ ]:
np.abs( model_learn_id.feature_map.B ).sum( axis = 0 )
In [ ]:
print model_learn_id.feature_map.B
In [ ]:
d1 > 0.01
In [ ]:
x = np.random.rand( 5 )
y = np.random.rand( 5, 10 )
In [ ]:
x.T * y
In [ ]:
from sklearn.datasets import fetch_mldata
def simplex_map( i ):
if i > 2:
return np.bmat( [ [ [ [ 1 ] ], np.repeat( -1.0 / ( i - 1 ), i - 1 ).reshape( ( 1, i - 1 ) ) ], [ np.zeros( ( i - 2, 1 ) ), simplex_map( i - 1 ) * np.sqrt( 1.0 - 1.0 / ( i - 1 ) ** 2 ) ] ] )
elif i == 2:
return np.array( [ [ 1, -1 ] ] )
else:
raise "invalid number of classes"
In [ ]:
mnist = fetch_mldata( 'MNIST original', data_home='mnist_data' )
data_X = np.asarray( mnist.data, np.float )
data_y = np.asarray( mnist.target )
# data_X = preprocessing.StandardScaler( ).fit_transform( data_X )
data_X = 2 * ( data_X / 255.0 ) - 1
# data_X = ( data_X.T - np.mean( data_X, axis = 1 ) ).T
# data_X = ( data_X.T / np.std( data_X, axis = 1 ) ).T
# data_X = preprocessing.StandardScaler( ).fit_transform( data_X )
# print data_X.mean( )
lb = preprocessing.LabelBinarizer( neg_label=0, pos_label=1 )
data_y = np.dot( np.asarray( lb.fit_transform( data_y ), np.float ), simplex_map( 10 ).T )
In [ ]:
train_X, test_X, train_y, test_y = sklearn.cross_validation.train_test_split( data_X, data_y, test_size = 1.0 / 7.0 )
train_X = np.asarray( train_X )
test_X = np.asarray( test_X )
train_y = np.asarray( train_y )
test_y = np.asarray( test_y )
In [ ]:
print data_y
In [ ]:
gamma = 0.0073
d = train_X.shape[ 1 ]
p = train_y.shape[ 1 ]
lbda1 = 0.00
eps = .1
model_learn_id = md.Model( fm.DecomposableFF( gamma, d, 1000, np.eye( p ) ) )
risk = rsk.Ridge( lbda1, eps )
lb = lr.Constant( 0. ) # no bias
lc = lr.InvScaling( 1, lbda1, 1 )
opt = sgd.SGD( risk, 20000, lc, lb, 10, 1000 )
model_learn_id.reset( )
start = time.clock( )
opt.fit( model_learn_id, train_X, train_y )
stop = time.clock( )
start_pred = time.clock( )
S = simplex_map( 10 )
pred_train = model_learn_id( train_X )
pred_train = np.argmax( np.dot( pred_train, S ), axis = 1 )
ground_truth_train = np.argmax( np.dot( train_y, S ), axis = 1 )
pred_test = model_learn_id( test_X )
pred_test = np.argmax( np.dot( pred_test, S ), axis = 1 )
ground_truth_test = np.argmax( np.dot( test_y, S ), axis = 1 )
print lbda1, lbda2, 100 * np.float( np.sum( pred_train != ground_truth_train ) ) / pred_train.shape[ 0 ], 100 * np.float( np.sum( pred_test != ground_truth_test ) ) / pred_test.shape[ 0 ], stop - start
stop_pred = time.clock( )
print stop_pred - start_pred
In [ ]:
gamma = 1.0 / ( 2 * 11 ** 2 )
d = train_X.shape[ 1 ]
p = train_y.shape[ 1 ]
lbda1 = 0.00
lbda2 = 0.00
model_learn_id = md.Model( fm.DecomposableFF( gamma, d, 1000, np.eye( p ) ) )
risk = rsk.Ridge_B( lbda1, lbda2, 0.0 )
lb = lr.Constant( 0. ) # no bias
lc = lr.InvScaling( 10, lbda1, 1 )
opt = sgd.SGD( risk, 20000, lc, lb, 100, 1000 )
model_learn_id.reset( )
start = time.clock( )
opt.fit( model_learn_id, train_X, train_y )
stop = time.clock( )
start_pred = time.clock( )
S = simplex_map( 10 )
pred_train = model_learn_id( train_X )
pred_train = np.argmax( np.dot( pred_train, S ), axis = 1 )
ground_truth_train = np.argmax( np.dot( train_y, S ), axis = 1 )
pred_test = model_learn_id( test_X )
pred_test = np.argmax( np.dot( pred_test, S ), axis = 1 )
ground_truth_test = np.argmax( np.dot( test_y, S ), axis = 1 )
print lbda1, lbda2, 100 * np.float( np.sum( pred_train != ground_truth_train ) ) / pred_train.shape[ 0 ], 100 * np.float( np.sum( pred_test != ground_truth_test ) ) / pred_test.shape[ 0 ], stop - start
stop_pred = time.clock( )
print stop_pred - start_pred
In [ ]:
import matplotlib.pyplot as plt
plt.pcolor( model_learn_id.feature_map.A, cmap = 'seismic' )
plt.show( )
In [ ]:
np.linalg.matrix_rank( model_learn_id.feature_map.B )
In [ ]:
print model_learn_id.feature_map.A
In [ ]:
print np.cov( y.T )
In [ ]:
In [ ]:
In [ ]:
risk_func( np.random.rand( 1, model_learn_id.coefs.shape[ 1 ] ), risk, model_learn_id, train_X[ :1, : ], train_y[ :1, : ] )
In [ ]:
risk_grad( np.random.rand( 1, model_learn_id.coefs.shape[ 1 ] ), risk, model_learn_id, train_X[ :1, : ], train_y[ :1, : ] ).shape
In [ ]:
print model_learn_id( train_X[ :1, : ] )
In [ ]:
A = np.random.rand( 10, 10 )
np.linalg.norm( A, 'fro' ) ** 2 - ( A ** 2 ).sum( )
In [ ]:
print model_learn_id.coefs.shape
In [ ]:
w = np.random.rand( 1, model_learn_id.coefs.shape[ 1 ] )
w_aa = np.zeros( ( 1, model_learn_id.coefs.shape[ 1 ] ) )
w_aa[ 0 ] = 1
( risk_func( w, risk, model_learn_id, train_X[ :1, : ], train_y[ :1, : ] ) - risk_func( w + 1e-8 * w_aa, risk, model_learn_id, train_X[ :1, : ], train_y[ :1, : ] ) ) / 1e-8
In [ ]:
risk_grad( w, risk, model_learn_id, train_X[ :1, : ], train_y[ :1, : ] )[ 0, 0 ]
In [ ]:
scipy.optimize.check_grad( risk_func, risk_grad, w, risk, model_learn_id, train_X[ :1, : ], train_y[ :1, : ] )[ 0 ]
In [ ]:
# Hi,
# Here’s the results. I used train/test/validation procedure with 2500/1000/1500 datapoint sampled, repeated 10 times. Parameters learned with train/test portion, MSE results with validation set. The decomposable kernel is accompanied with a pinv(D-W), where W is a linear, quadratic or gaussian kernel over the output features.
# Average MSE’s over the 10 repeats:
# ORFF with k.I: 0.01975 runtime 71.3
# ORFF with k.Alin: 0.01953 runtime 75.3
# ORFF with k.Aquad: 0.01933 runtime 74.1
# ORFF with k.Agauss: 0.01929 runtime 75.5
# Ridge regression: 0.01957 runtime 1.1
# Knn: 0.01929 runtime 12.3
# Extra-trees: 0.01933 runtime 42.7
# Linear regression: 0.13484 runtime 81.3
# -> We match state-of-the-art performance.
# The linear regression seems to have failed, I don’t know why. I use romain’s code:
# estimator.fit( X_train, Y_train )
# Y_test_predict[ name ] = estimator.predict( X_val )
# I also ran the experiments when using the whole dataset, i.e. split 15226/6085/9164 (sum=30475). Again, I randomly sampled the split 10 times. Due to the large size, I used the same parameters as learned from the previous smaller splits. I don’t have yet the competing approaches with the full dataset.
# ORFF with k.I: 0.01909 runtime 426.4
# ORFF with k.Alin: 0.01982 runtime 409.0
# ORFF with k.Aquad: 0.01930 runtime 435.0
# ORFF with k.Agauss: 0.01913 runtime 388.4