In [1]:
from ipyparallel import Client
rc = Client()
dview = rc[:]

with dview.sync_imports():
    import numpy as np
    from sklearn import metrics
    from sklearn.datasets import fetch_mldata

    from sklearn import preprocessing
    from sklearn import cross_validation
    from sklearn.datasets import fetch_olivetti_faces
    from sklearn.utils.validation import check_random_state
    from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier, ExtraTreesRegressor
    from sklearn.neighbors import KNeighborsRegressor
    from sklearn.linear_model import LinearRegression
    from sklearn.linear_model import Ridge, RidgeCV
    from sklearn.cross_validation import train_test_split
    from sklearn.covariance import GraphLassoCV, ledoit_wolf
    from sklearn.grid_search import GridSearchCV
    from scipy.spatial import distance

    import scipy
    import sklearn
    import OVFM.Model as md
    import OVFM.FeatureMap as fm
    import OVFM.Risk as rsk
    import OVFM.LearningRate as lr
    import OVFM.DataGeneration as dg
    import OVFM.SGD as sgd
    import OVFM.FISTA as fista

    import time
    import timeit
    import os
    import sys


importing numpy on engine(s)
importing metrics from sklearn on engine(s)
importing fetch_mldata from sklearn.datasets on engine(s)
importing preprocessing from sklearn on engine(s)
importing cross_validation from sklearn on engine(s)
importing fetch_olivetti_faces from sklearn.datasets on engine(s)
importing check_random_state from sklearn.utils.validation on engine(s)
importing ExtraTreesClassifier,RandomForestClassifier,ExtraTreesRegressor from sklearn.ensemble on engine(s)
importing KNeighborsRegressor from sklearn.neighbors on engine(s)
importing LinearRegression from sklearn.linear_model on engine(s)
importing Ridge,RidgeCV from sklearn.linear_model on engine(s)
importing train_test_split from sklearn.cross_validation on engine(s)
importing GraphLassoCV,ledoit_wolf from sklearn.covariance on engine(s)
importing GridSearchCV from sklearn.grid_search on engine(s)
importing distance from scipy.spatial on engine(s)
importing scipy on engine(s)
importing sklearn on engine(s)
importing OVFM.Model on engine(s)
importing OVFM.FeatureMap on engine(s)
importing OVFM.Risk on engine(s)
importing OVFM.LearningRate on engine(s)
importing OVFM.DataGeneration on engine(s)
importing OVFM.SGD on engine(s)
importing OVFM.FISTA on engine(s)
importing time on engine(s)
importing timeit on engine(s)
importing os on engine(s)
importing sys on engine(s)
/Library/Python/2.7/site-packages/IPython/parallel.py:13: ShimWarning: The `IPython.parallel` package has been deprecated. You should import from ipyparallel instead.
  "You should import from ipyparallel instead.", ShimWarning)

In [2]:
def generate_features_train( nb_points, nb_gaussian, d, sigma = None ):
    
    nb_points = nb_points / nb_gaussian
    
    if sigma is None:
        sigma = []
    # sequence of random pd matrix
        for i in xrange( nb_gaussian ):
            temp = 2 * np.random.rand( d, d ) - 1
            temp = np.dot( temp, temp.T )
            sigma.append( temp / np.linalg.norm( temp, 2 ) )

    # Generate centroids
    centers = [ 10 * np.random.rand( d ) for i in xrange( nb_gaussian ) ]
    
    # Generate data
    gen = [ np.random.multivariate_normal( centers[ i ], sigma[ i ], nb_points ) for i in xrange( nb_gaussian ) ]
    return sklearn.preprocessing.MinMaxScaler( ( -1, 1 ) ).fit_transform( np.concatenate( gen, axis = 0 ) )
    
def generate_features_test( nb_points, d ):
    return sklearn.preprocessing.MinMaxScaler( ( -1, 1 ) ).fit_transform( np.random.rand( nb_points, d ) )

In [3]:
def MSE( p, y ):
    return np.mean( ( p - y ) ** 2 )

def RMSE( p, y ):
    return np.sqrt( MSE( p, y ) )

def MSE_generalisation( model_gen, model_learn, nb_points = 1e4 ):
    X_test = generate_features_test( nb_points, d )
    y_test = sklearn.preprocessing.MinMaxScaler( ( -1, 1 ) ).fit_transform( model_gen( X_test ) )
    return MSE( y_test, model_learn( X_test ) ), X_test, y_test

def RMSE_generalisation( model_gen, model_learn, nb_points = 1e4 ):
    mse, X, y = MSE_generalisation( model_gen, model_learn, nb_points )
    return np.sqrt( mse ), X, y

In [4]:
def generate_targets( model, X, cache = 10000 ):
    model.coefs1D = 2 * np.random.rand( model.coefs1D.shape[ 0 ], 1 ) - 1
    y = np.empty( ( X.shape[ 0 ], model.o ) )
    for i in xrange( X.shape[ 0 ] / cache ):
        y[ i * cache:min( ( i + 1 ) * cache, X.shape[ 0 ] ), : ] = model( X[ i * cache:min( ( i + 1 ) * cache, X.shape[ 0 ] ), : ].T ).T
    return y

In [5]:
def tune_eta0( model, X, y, eta0_grid, lbda, eps, batch ):
    Xt, _, yt, _ = sklearn.cross_validation.train_test_split( X, y, train_size = .1 )
    best_err = np.inf
    risk = rsk.Ridge( lbda )
    lb = lr.Constant( 0. ) # no bias
    for eta0 in eta0_grid:
        lc = lr.Constant( eta0 )
        opt = sgd.SGD( risk, eps, lc, lb, batch, 0 )
        model.reset( )
        opt.fit( model, Xt, yt )
        err = RMSE( model( Xt ), yt )
        if err < best_err:
            best_err = err
            best_eta0 = eta0
        
    return best_eta0, best_err

In [6]:
# filename = time.strftime( "noise_test_%Y%m%d-%H%M%S" )

# if not os.path.exists( filename ):
#     os.makedirs( filename )
# if not os.path.exists( filename + '/data' ):
#     os.makedirs( filename + '/data' )
# if not os.path.exists( filename + '/res' ):
#     os.makedirs( filename + '/res' )

# Problem parameters
d = 20
p = 2000
D = 1000
n = 10000
n_gaussians = 10000
gamma = 0.01 / d
LT = np.eye( p )
# LT = 0.7 * np.eye( p ) + 0.3 * np.ones( ( p, p ) )
LT = 2 * np.random.rand( p, p ) - 1
LT = np.dot( LT, LT.T )
LT = LT / np.linalg.norm( LT, 2 )
# for i in xrange( LT.shape[ 0 ] ):
#     for j in xrange( LT.shape[ 1 ] ):
#         if i == j:
#             pass
#         else:
#             if np.random.rand( ) > 0.7:
#                 LT[ i, j ] = 0
#                 LT[ j, i ] = 0
# LT = LT / np.linalg.norm( LT, 2 )

# Generate train database
X = generate_features_train( n, n_gaussians, d )
model_gen = md.Model( fm.DecomposableFF( gamma, d, D, LT, rank = p ) )
y = generate_targets( model_gen, X )
# model_gen.plot_coefs( )

# np.savetxt( filename + '/data/X.txt', X )
# np.savetxt( filename + '/data/y.txt', y )
# np.savetxt( filename + '/data/LT.txt', LT )


# def risk_func( w, risk, model, features, targets ):
#     model.coefs = w.reshape( model.coefs.shape )
#     return risk( features, targets, model )

# def risk_grad( w, risk, model, features, targets ):
#     model.coefs = w.reshape( model.coefs.shape )
#     return risk.coefs_gradient( model( features ), targets, model )

# lbda1 = 0
# risk = rsk.Ridge( lbda1 )
# print scipy.optimize.check_grad( risk_func, risk_grad, np.random.rand( model_gen.coefs.shape[ 1 ] ), risk, model_gen, X[ :1, : ], y[ :1, : ] )

In [ ]:
# res0 = np.empty( ( 10, 5, 3, 10, 4 ) )
# for rep in xrange( 0, 10 ):
#     yn = y + 0 * np.var( y ) * np.random.multivariate_normal( np.zeros( p ), np.eye( p ), y.shape[ 0 ] )
#     X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split( X, yn, train_size = 0.8 )
#     # y_train = y_train + np.var( y_train ) * np.random.multivariate_normal( np.zeros( p ), np.eye( p ) )
#     sigma = np.median( scipy.spatial.distance.pdist( X ) )
#     sigma_y = np.median( scipy.spatial.distance.pdist( y_train.T ) )
#     gamma_y = 1. / sigma_y ** 2

#     LS = np.exp( gamma_y * scipy.spatial.distance.squareform( scipy.spatial.distance.pdist( y_train.T ) ) )
#     LS = LS / np.linalg.norm( LS, 2 )
#     np.random.seed( seed = 0 )
#     for i, gamma in enumerate( np.logspace( -4, 0, 5 ) ):
#         print i
#         for j, L in enumerate( [ np.eye( p ), LS, LT ] ):
#             model_learn_id = md.Model( fm.DecomposableFF( gamma, d, D, L ) )
#             for k, lbda in enumerate( np.logspace( -10, 1, 10 ) ):

#     #             eta0, _ = tune_eta0( model_learn_id, X_train, y_train, np.logspace( -3, 1, 10 ), lbda, 0.001, 1 )
#                 risk = rsk.Ridge( lbda )
#                 lb = lr.Constant( 0. ) # no bias
#                 lc = lr.InvScaling( 1, lbda, 1 )
#                 opt = sgd.SGD( risk, 10000, lc, lb, 1, 0 )
#                 model_learn_id.reset( )
#                 start = time.clock( )
#                 opt.fit( model_learn_id, X_train, y_train )
#                 res0[ rep, i, j, k, : ] = lbda, RMSE( model_learn_id( X_train ), y_train ), RMSE( model_learn_id( X_test ), y_test ), time.clock( ) - start

errr = np.empty( 10 )
for rep in xrange( 0, 10 ):
    yn = y + 0.5 * np.var( y ) * np.random.multivariate_normal( np.zeros( p ), np.eye( p ), y.shape[ 0 ] )
    X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split( X, yn, train_size = 0.8 )
    tree = ExtraTreesRegressor( )
    start = time.clock( )
    tree.fit( X_train, y_train )
    print time.clock( ) - start
    start = time.clock( )
    errr[ rep ] = RMSE( model_learn_id( X_test ), y_test )
    print time.clock( ) - start

In [ ]:
print np.mean( errr )
print np.std( errr )

In [ ]:
res01 = np.zero( ( 10, 5, 3, 10, 4 ) )
for rep in xrange( 0, 10 ):
    yn = y + 0.1 * np.var( y ) * np.random.multivariate_normal( np.zeros( p ), np.eye( p ), y.shape[ 0 ] )
    X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split( X, yn, train_size = 0.8 )
    # y_train = y_train + np.var( y_train ) * np.random.multivariate_normal( np.zeros( p ), np.eye( p ) )
    sigma = np.median( scipy.spatial.distance.pdist( X ) )
    sigma_y = np.median( scipy.spatial.distance.pdist( y_train.T ) )
    gamma_y = 1. / sigma_y ** 2

    LS = np.exp( gamma_y * scipy.spatial.distance.squareform( scipy.spatial.distance.pdist( y_train.T ) ) )
    LS = LS / np.linalg.norm( LS, 2 )
    np.random.seed( seed = 0 )
    for i, gamma in enumerate( np.logspace( -4, 0, 5 ) ):
        print i
        for j, L in enumerate( [ np.eye( p ), LS, LT ] ):
            model_learn_id = md.Model( fm.DecomposableFF( gamma, d, D, L ) )
            for k, lbda in enumerate( np.logspace( -10, 1, 10 ) ):

    #             eta0, _ = tune_eta0( model_learn_id, X_train, y_train, np.logspace( -3, 1, 10 ), lbda, 0.001, 1 )
                risk = rsk.Ridge( lbda )
                lb = lr.Constant( 0. ) # no bias
                lc = lr.InvScaling( 1, lbda, 1 )
                opt = sgd.SGD( risk, 10000, lc, lb, 1, 0 )
                model_learn_id.reset( )
                start = time.clock( )
                opt.fit( model_learn_id, X_train, y_train )
                res01[ rep, i, j, k, : ] = lbda, RMSE( model_learn_id( X_train ), y_train ), RMSE( model_learn_id( X_test ), y_test ), time.clock( ) - start

In [ ]:
print res01[ 9, :, :, :]

In [13]:
print np.dot( ( model_learn_id.feature_map.grkhs * model_learn_id.coefs.reshape( ( model_learn_id.feature_map.shape[ 1 ] / ( 2 * model_learn_id.feature_map.B.shape[ 1 ] ), model_learn_id.feature_map.B.shape[ 1 ] ) ).T ).T, model_learn_id.feature_map.A ).shape
# model_learn_id.coefs.reshape( ( model_learn_id.feature_map.D, model_learn_id.feature_map.o ) )
# print model_learn_id.feature_map.grkhs * model_learn_id.coefs.reshape( ( model_learn_id.feature_map.shape[ 1 ] / ( 2 * model_learn_id.feature_map.B.shape[ 1 ] ), model_learn_id.feature_map.B.shape[ 1 ] ) ).T
# print model_learn_id.coefs.shape
# print model_learn_id.feature_map.D
# print model_learn_id.feature_map.shape
# print model_learn_id.feature_map.B.shape


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-13-8ed2e00b965e> in <module>()
----> 1 np.dot( ( model_learn_id.feature_map.grkhs * model_learn_id.coefs.reshape( ( model_learn_id.feature_map.shape[ 1 ] / ( 2 * model_learn_id.feature_map.B.shape[ 1 ] ), model_learn_id.feature_map.B.shape[ 1 ] ) ).T ).T, model_learn_id.feature_map.A ).shape
      2 # model_learn_id.coefs.reshape( ( model_learn_id.feature_map.D, model_learn_id.feature_map.o ) )
      3 # print model_learn_id.feature_map.grkhs * model_learn_id.coefs.reshape( ( model_learn_id.feature_map.shape[ 1 ] / ( 2 * model_learn_id.feature_map.B.shape[ 1 ] ), model_learn_id.feature_map.B.shape[ 1 ] ) ).T
      4 # print model_learn_id.coefs.shape
      5 # print model_learn_id.feature_map.D

AttributeError: 'OVFM.Model.Model' object has no attribute 'coefs'

In [ ]:
res1 = np.empty( ( 10, 5, 3, 10, 4 ) )
for rep in xrange( 0, 10 ):
    yn = y + .5 * np.var( y ) * np.random.multivariate_normal( np.zeros( p ), np.eye( p ), y.shape[ 0 ] )
    X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split( X, yn, train_size = 0.8 )
    # y_train = y_train + np.var( y_train ) * np.random.multivariate_normal( np.zeros( p ), np.eye( p ) )
    sigma = np.median( scipy.spatial.distance.pdist( X ) )
    sigma_y = np.median( scipy.spatial.distance.pdist( y_train.T ) )
    gamma_y = 1. / sigma_y ** 2

    LS = np.exp( gamma_y * scipy.spatial.distance.squareform( scipy.spatial.distance.pdist( y_train.T ) ) )
    LS = LS / np.linalg.norm( LS, 2 )
    np.random.seed( seed = 0 )
    for i, gamma in enumerate( np.logspace( -4, 0, 5 ) ):
        print i
        for j, L in enumerate( [ np.eye( p ), LS, LT ] ):
            model_learn_id = md.Model( fm.DecomposableFF( gamma, d, D, L ) )
            for k, lbda in enumerate( np.logspace( -10, 1, 10 ) ):

    #             eta0, _ = tune_eta0( model_learn_id, X_train, y_train, np.logspace( -3, 1, 10 ), lbda, 0.001, 1 )
                risk = rsk.Ridge( lbda )
                lb = lr.Constant( 0. ) # no bias
                lc = lr.InvScaling( 1, lbda, 1 )
                opt = sgd.SGD( risk, 10000, lc, lb, 1, 0 )
                model_learn_id.reset( )
                start = time.clock( )
                opt.fit( model_learn_id, X_train, y_train )
                res1[ rep, i, j, k, : ] = lbda, RMSE( model_learn_id( X_train ), y_train ), RMSE( model_learn_id( X_test ), y_test ), time.clock( ) - start

In [ ]:
res001 = np.empty( ( 10, 5, 3, 10, 4 ) )
for rep in xrange( 0, 10 ):
    yn = y + 0.01 * np.var( y ) * np.random.multivariate_normal( np.zeros( p ), np.eye( p ), y.shape[ 0 ] )
    X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split( X, yn, train_size = 0.8 )
    # y_train = y_train + np.var( y_train ) * np.random.multivariate_normal( np.zeros( p ), np.eye( p ) )
    sigma = np.median( scipy.spatial.distance.pdist( X ) )
    sigma_y = np.median( scipy.spatial.distance.pdist( y_train.T ) )
    gamma_y = 1. / sigma_y ** 2

    LS = np.exp( gamma_y * scipy.spatial.distance.squareform( scipy.spatial.distance.pdist( y_train.T ) ) )
    LS = LS / np.linalg.norm( LS, 2 )
    np.random.seed( seed = 0 )
    for i, gamma in enumerate( np.logspace( -4, 0, 5 ) ):
        print i
        for j, L in enumerate( [ np.eye( p ), LS, LT ] ):
            model_learn_id = md.Model( fm.DecomposableFF( gamma, d, D, L ) )
            for k, lbda in enumerate( np.logspace( -10, 1, 10 ) ):

    #             eta0, _ = tune_eta0( model_learn_id, X_train, y_train, np.logspace( -3, 1, 10 ), lbda, 0.001, 1 )
                risk = rsk.Ridge( lbda )
                lb = lr.Constant( 0. ) # no bias
                lc = lr.InvScaling( 1, lbda, 1 )
                opt = sgd.SGD( risk, 10000, lc, lb, 1, 0 )
                model_learn_id.reset( )
                start = time.clock( )
                opt.fit( model_learn_id, X_train, y_train )
                res001[ rep, i, j, k, : ] = lbda, RMSE( model_learn_id( X_train ), y_train ), RMSE( model_learn_id( X_test ), y_test ), time.clock( ) - start

In [ ]:
def errorbar(fig, x, y, legend, xerr=None, yerr=None, color='red' ):

  fig.line(x, y, color=color, legend=legend, line_width=2  )

  if xerr is not None:
      x_err_x = []
      x_err_y = []
      for px, py, err in zip(x, y, xerr):
          x_err_x.append((px - err, px + err))
          x_err_y.append((py, py))
      fig.multi_line(x_err_x, x_err_y, color=color )

  if yerr is not None:
      y_err_x = []
      y_err_y = []
      for px, py, err in zip(x, y, yerr):
          y_err_x.append((px, px))
          y_err_y.append((py - err, py + err))
      fig.multi_line(y_err_x, y_err_y, color=color )

# output to static HTML file
output_file("lines.html", title="line plot example")

gamma_arr = np.logspace( -4, 0, 5 )

# res = np.mean( res01, axis = 0 )
res = res1
res_mean = np.mean( res[ :, :, :, : ], axis = 0 )
pl = []
for i in xrange( 0, 5 ): # gamma
    pl.append( figure(title="gamma = " + str( gamma_arr[ i ] ), x_axis_label='lambda', y_axis_label='MSE', x_axis_type="log", y_axis_type="log", x_range=[1e-10, 1e1], y_range=[1e-3, 1] ) )
    errorbar( pl[ i ], res_mean[ i, 0, :, 0], res_mean[ i, 0, :, 2], 'ID', yerr = np.std( res[ :, i, 0, :, 2], axis = 0 ), color='red' )
    errorbar( pl[ i ], res_mean[ i, 1, :, 0], res_mean[ i, 1, :, 2], 'LS', yerr = np.std( res[ :, i, 1, :, 2], axis = 0 ), color='blue' )
    errorbar( pl[ i ], res_mean[ i, 2, :, 0], res_mean[ i, 2, :, 2], 'LT', yerr = np.std( res[ :, i, 2, :, 2], axis = 0 ), color='green' )
    print scipy.stats.ttest_ind( res[ :, i, 0, :, 2], res[ :, i, 1, :, 2] )[ 1 ]
    print scipy.stats.ttest_ind( res[ :, i, 0, :, 2], res[ :, i, 2, :, 2] )[ 1 ]
    print scipy.stats.ttest_ind( res[ :, i, 1, :, 2], res[ :, i, 2, :, 2] )[ 1 ]
    print 'next'
#     pl[ i ].line( res[ i, 0, :, 0], res[ i, 0, :, 2] ** 2, legend="Id", line_width=2, color = "red" )
#     pl[ i ].line( res[ i, 1, :, 0], res[ i, 1, :, 2] ** 2, legend="LS", line_width=2, color = "blue" )
#     pl[ i ].line( res[ i, 2, :, 0], res[ i, 2, :, 2] ** 2, legend="LT", line_width=2, color = "green" )

# show the results
show( HBox( pl[ 0 ], pl[ 1 ], pl[ 2 ], pl[ 3 ], pl[ 4 ] ) )
print res_mean[ 1, :, :, 2 ]

In [ ]:
res01[ :, 2, 2, :, 3]

In [ ]:
np.std( res01[ :, 2, 0, :, 2], axis = 0 )

In [ ]:
matfile = 'res1.mat'
scipy.io.savemat(matfile, mdict={'out': res1}, oned_as='row')

In [ ]:
pipicaca = np.load( 'res0.csv' )

In [8]:
A = 2 * np.random.rand( p, p ) - 1
A = np.dot( A, A.T )
A = A / np.linalg.norm( A, 2 )
yn = y + 0 * np.var( y ) * np.random.multivariate_normal( np.zeros( p ), A, y.shape[ 0 ] )
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split( X, yn, train_size = .1 )
scaler_X = sklearn.preprocessing.StandardScaler( )
X_train_s = scaler_X.fit_transform( X_train )
X_test_s = scaler_X.transform( X_test )

scaler_y = sklearn.preprocessing.StandardScaler( )
y_train_s = scaler_y.fit_transform( y_train )
y_test_s = scaler_y.transform( y_test )

In [9]:
gamma = .001
lbda1 = .0
# lbda1 = 0
lbda2 = 0.00
model_learn_id = md.Model( fm.DecomposableFF( gamma, d, 1000, np.eye( p ) ) )
risk = rsk.Ridge( lbda1, 0.0 )
lb = lr.Constant( 0. ) # no bias
lc = lr.InvScaling( 1, lbda1, 1 )
opt = sgd.SGD( risk, 10000, lc, lb, 5, 0 )
model_learn_id.reset( )
start = time.clock( )
opt.fit( model_learn_id, X_train_s.T, y_train_s.T )
stop = time.clock( )

start_pred = time.clock( )
print lbda1, lbda2, RMSE( scaler_y.inverse_transform( model_learn_id( X_train_s.T ).T ), y_train ), RMSE( scaler_y.inverse_transform( model_learn_id( X_test_s.T ).T ), y_test ), stop - start
stop_pred = time.clock( )
print stop_pred - start_pred
print np.std( y_train ), np.std( y_test )
# print np.linalg.matrix_rank( model_learn_id.coefs.reshape( ( 2 * D, p ) ) )
# print np.linalg.matrix_rank( model_learn_id.feature_map.B )


0.0 0.0 0.00640676043975 0.00653882479289 9433.411853
5.087065
0.282057248976 0.282065780692

In [ ]:
model_learn_id.plot_coefs( )
np.linalg.matrix_rank( model_learn_id.coefs2D )
# ax01.matshow( model_learn_id.feature_map.B.T, aspect='auto', cmap='bwr')
# ax11 = plt.subplot( 224 )
# ax11.matshow( model_learn_id.feature_map.A, aspect='auto', cmap='bwr' )
# plt.show( )

In [12]:
gamma = 0.001
lbda1 = 0.
# lbda1 = 0
lbda2 = 0.00
A = np.cov( y_train_s.T )
A = A / np.linalg.norm( A, 2 )
model_learn_id = md.Model( fm.DecomposableFF( gamma, d, 1000, A, rank = 20 ) )
risk = rsk.Ridge( lbda1, 0.0 )
lb = lr.Constant( 0. ) # no bias
lc = lr.InvScaling( 2, lbda1, 1 )
opt = sgd.SGD( risk, 10000, lc, lb, 5, 0 )
model_learn_id.reset( )
start = time.clock( )
opt.fit( model_learn_id, X_train_s.T, y_train_s.T )
stop = time.clock( )

start_pred = time.clock( )
print lbda1, lbda2, RMSE( scaler_y.inverse_transform( model_learn_id( X_train_s.T ).T ), y_train ), RMSE( scaler_y.inverse_transform( model_learn_id( X_test_s.T ).T ), y_test ), stop - start
stop_pred = time.clock( )
print stop_pred - start_pred
print np.std( y_train ), np.std( y_test )
# print np.linalg.matrix_rank( model_learn_id.coefs.reshape( ( 2 * D, p ) ) )
# print np.linalg.matrix_rank( model_learn_id.feature_map.B )


0.0 0.0 0.00591893385177 0.00605844028525 22.994635
1.606975
0.282057248976 0.282065780692
/Library/Python/2.7/site-packages/ipykernel/__main__.py:7: DeprecationWarning: elementwise comparison failed; this will raise the error in the future.

In [ ]:
model_learn_id.plot_coefs( )
np.linalg.matrix_rank( model_learn_id.coefs2D )

In [ ]:
start_pred = time.clock( )
print opt.CV( model_learn_id, X_train.T, y_train.T, 2 )
stop_pred = time.clock( )
print stop_pred - start_pred

In [ ]:
@dview.parallel( )

In [ ]:
np.linalg.matrix_rank( model_learn_id.feature_map.B )

In [ ]:
np.linalg.svd( np.random.rand( 2 * model_learn_id.feature_map.D, model_learn_id.o ), full_matrices = False )[ 0 ].shape

In [ ]:
np.abs( model_learn_id.feature_map.B ).sum( axis = 0 )

In [ ]:
print model_learn_id.feature_map.B

In [ ]:
d1 > 0.01

In [ ]:
x = np.random.rand( 5 )
y = np.random.rand( 5, 10 )

In [ ]:
x.T * y

In [ ]:
from sklearn.datasets import fetch_mldata

def simplex_map( i ):
    if i > 2:
        return np.bmat( [ [ [ [ 1 ] ], np.repeat( -1.0 / ( i - 1 ), i - 1 ).reshape( ( 1, i - 1 ) ) ], [ np.zeros( ( i - 2, 1 ) ), simplex_map( i - 1 ) * np.sqrt( 1.0 - 1.0 / ( i - 1 ) ** 2 ) ] ] )
    elif i == 2:
        return np.array( [ [ 1, -1 ] ] )
    else:
        raise "invalid number of classes"

In [ ]:
mnist = fetch_mldata( 'MNIST original', data_home='mnist_data' )
data_X = np.asarray( mnist.data, np.float )
data_y = np.asarray( mnist.target )

# data_X = preprocessing.StandardScaler( ).fit_transform( data_X )
data_X = 2 * ( data_X / 255.0 ) - 1
# data_X = ( data_X.T - np.mean( data_X, axis = 1 ) ).T
# data_X = ( data_X.T / np.std( data_X, axis = 1 ) ).T
# data_X = preprocessing.StandardScaler( ).fit_transform( data_X )
# print data_X.mean( )

lb = preprocessing.LabelBinarizer( neg_label=0, pos_label=1 )
data_y = np.dot( np.asarray( lb.fit_transform( data_y ), np.float ), simplex_map( 10 ).T )

In [ ]:
train_X, test_X, train_y, test_y = sklearn.cross_validation.train_test_split( data_X, data_y, test_size = 1.0 / 7.0 )
train_X = np.asarray( train_X )
test_X = np.asarray( test_X )
train_y = np.asarray( train_y )
test_y = np.asarray( test_y )

In [ ]:
print data_y

In [ ]:
gamma = 0.0073
d = train_X.shape[ 1 ]
p = train_y.shape[ 1 ]

lbda1 = 0.00
eps = .1
model_learn_id = md.Model( fm.DecomposableFF( gamma, d, 1000, np.eye( p ) ) )
risk = rsk.Ridge( lbda1, eps )
lb = lr.Constant( 0. ) # no bias
lc = lr.InvScaling( 1, lbda1, 1 )
opt = sgd.SGD( risk, 20000, lc, lb, 10, 1000 )
model_learn_id.reset( )
start = time.clock( )
opt.fit( model_learn_id, train_X, train_y )
stop = time.clock( )

start_pred = time.clock( )
S = simplex_map( 10 )

pred_train = model_learn_id( train_X )
pred_train = np.argmax( np.dot( pred_train, S ), axis = 1 )
ground_truth_train = np.argmax( np.dot( train_y, S ), axis = 1 )

pred_test = model_learn_id( test_X )
pred_test = np.argmax( np.dot( pred_test, S ), axis = 1 )
ground_truth_test = np.argmax( np.dot( test_y, S ), axis = 1 )

print lbda1, lbda2, 100 * np.float( np.sum( pred_train != ground_truth_train ) ) / pred_train.shape[ 0 ], 100 * np.float( np.sum( pred_test != ground_truth_test ) ) / pred_test.shape[ 0 ], stop - start
stop_pred = time.clock( )
print stop_pred - start_pred

In [ ]:
gamma = 1.0 / ( 2 * 11 ** 2 )
d = train_X.shape[ 1 ]
p = train_y.shape[ 1 ]

lbda1 = 0.00
lbda2 = 0.00
model_learn_id = md.Model( fm.DecomposableFF( gamma, d, 1000, np.eye( p ) ) )
risk = rsk.Ridge_B( lbda1, lbda2, 0.0 )
lb = lr.Constant( 0. ) # no bias
lc = lr.InvScaling( 10, lbda1, 1 )
opt = sgd.SGD( risk, 20000, lc, lb, 100, 1000 )
model_learn_id.reset( )
start = time.clock( )
opt.fit( model_learn_id, train_X, train_y )
stop = time.clock( )

start_pred = time.clock( )
S = simplex_map( 10 )

pred_train = model_learn_id( train_X )
pred_train = np.argmax( np.dot( pred_train, S ), axis = 1 )
ground_truth_train = np.argmax( np.dot( train_y, S ), axis = 1 )

pred_test = model_learn_id( test_X )
pred_test = np.argmax( np.dot( pred_test, S ), axis = 1 )
ground_truth_test = np.argmax( np.dot( test_y, S ), axis = 1 )

print lbda1, lbda2, 100 * np.float( np.sum( pred_train != ground_truth_train ) ) / pred_train.shape[ 0 ], 100 * np.float( np.sum( pred_test != ground_truth_test ) ) / pred_test.shape[ 0 ], stop - start
stop_pred = time.clock( )
print stop_pred - start_pred

In [ ]:
import matplotlib.pyplot as plt
plt.pcolor( model_learn_id.feature_map.A, cmap = 'seismic' )
plt.show( )

In [ ]:
np.linalg.matrix_rank( model_learn_id.feature_map.B )

In [ ]:
print model_learn_id.feature_map.A

In [ ]:
print np.cov( y.T )

In [ ]:


In [ ]:


In [ ]:
risk_func( np.random.rand( 1, model_learn_id.coefs.shape[ 1 ] ), risk, model_learn_id, train_X[ :1, : ], train_y[ :1, : ] )

In [ ]:
risk_grad( np.random.rand( 1, model_learn_id.coefs.shape[ 1 ] ), risk, model_learn_id, train_X[ :1, : ], train_y[ :1, : ] ).shape

In [ ]:
print model_learn_id(  train_X[ :1, : ] )

In [ ]:
A = np.random.rand( 10, 10 )
np.linalg.norm( A, 'fro' ) ** 2 - ( A ** 2 ).sum( )

In [ ]:
print model_learn_id.coefs.shape

In [ ]:
w = np.random.rand( 1, model_learn_id.coefs.shape[ 1 ] )
w_aa = np.zeros( ( 1, model_learn_id.coefs.shape[ 1 ] ) )
w_aa[ 0 ] = 1
( risk_func( w, risk, model_learn_id, train_X[ :1, : ], train_y[ :1, : ] ) - risk_func( w + 1e-8 * w_aa, risk, model_learn_id, train_X[ :1, : ], train_y[ :1, : ] ) ) / 1e-8

In [ ]:
risk_grad( w, risk, model_learn_id, train_X[ :1, : ], train_y[ :1, : ] )[ 0, 0 ]

In [ ]:
scipy.optimize.check_grad( risk_func, risk_grad, w, risk, model_learn_id, train_X[ :1, : ], train_y[ :1, : ] )[ 0 ]

In [ ]:
# Hi,

# Here’s the results. I used train/test/validation procedure with 2500/1000/1500 datapoint sampled, repeated 10 times. Parameters learned with train/test portion, MSE results with validation set. The decomposable kernel is accompanied with a pinv(D-W), where W is a linear, quadratic or gaussian kernel over the output features.

# Average MSE’s over the 10 repeats:

# ORFF with k.I:      0.01975 runtime 71.3
# ORFF with k.Alin:   0.01953 runtime 75.3
# ORFF with k.Aquad:  0.01933 runtime 74.1
# ORFF with k.Agauss: 0.01929 runtime 75.5
# Ridge regression:   0.01957 runtime 1.1
# Knn:                0.01929 runtime 12.3
# Extra-trees:        0.01933 runtime 42.7
# Linear regression:  0.13484 runtime 81.3

# -> We match state-of-the-art performance.

#  The linear regression seems to have failed, I don’t know why. I use romain’s code:

#   estimator.fit( X_train, Y_train )
#   Y_test_predict[ name ] = estimator.predict( X_val )


# I also ran the experiments when using the whole dataset, i.e. split 15226/6085/9164 (sum=30475). Again, I randomly sampled the split 10 times. Due to the large size, I used the same parameters as learned from the previous smaller splits. I don’t have yet the competing approaches with the full dataset.

# ORFF with k.I:      0.01909 runtime 426.4
# ORFF with k.Alin:   0.01982 runtime 409.0
# ORFF with k.Aquad:  0.01930 runtime 435.0
# ORFF with k.Agauss: 0.01913 runtime 388.4