In [1]:
import numpy as np

# from bokeh.plotting import HBox, VBox, figure, show, output_file, GridPlot
# from bokeh.models.mappers import LinearColorMapper
# from bokeh.models import BasicTicker, Grid

from sklearn import metrics
from sklearn.datasets import fetch_mldata

from sklearn import preprocessing
from sklearn import cross_validation
from sklearn.datasets import fetch_olivetti_faces
from sklearn.utils.validation import check_random_state
from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier, ExtraTreesRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge, RidgeCV
from sklearn.cross_validation import train_test_split
from sklearn.covariance import GraphLassoCV, ledoit_wolf
from sklearn.grid_search import GridSearchCV
from scipy.spatial import distance
 
import scipy
import sklearn
import OVFM.Model as md
import OVFM.FeatureMap as fm
import OVFM.Risk as rsk
import OVFM.LearningRate as lr
import OVFM.DataGeneration as dg
import OVFM.SGD as sgd
import OVFM.FISTA as fista
 
import time
import timeit
import os
import sys

In [2]:
def generate_features_train( nb_points, nb_gaussian, d, sigma = None ):
    
    nb_points = nb_points / nb_gaussian
    
    if sigma is None:
        sigma = []
    # sequence of random pd matrix
        for i in xrange( nb_gaussian ):
            temp = 2 * np.random.rand( d, d ) - 1
            temp = np.dot( temp, temp.T )
            sigma.append( temp / np.linalg.norm( temp, 2 ) )

    # Generate centroids
    centers = [ 10 * np.random.rand( d ) for i in xrange( nb_gaussian ) ]
    
    # Generate data
    gen = [ np.random.multivariate_normal( centers[ i ], sigma[ i ], nb_points ) for i in xrange( nb_gaussian ) ]
    return sklearn.preprocessing.StandardScaler( ).fit_transform( np.concatenate( gen, axis = 0 ) )
    
def generate_features_test( nb_points, d ):
    return sklearn.preprocessing.MinMaxScaler( ( -1, 1 ) ).fit_transform( np.random.rand( nb_points, d ) )

In [3]:
def MSE( p, y ):
    return np.mean( ( p - y ) ** 2 )

def RMSE( p, y ):
    return np.sqrt( MSE( p, y ) )

def MSE_generalisation( model_gen, model_learn, nb_points = 1e4 ):
    X_test = generate_features_test( nb_points, d )
    y_test = sklearn.preprocessing.MinMaxScaler( ( -1, 1 ) ).fit_transform( model_gen( X_test ) )
    return MSE( y_test, model_learn( X_test ) ), X_test, y_test

def RMSE_generalisation( model_gen, model_learn, nb_points = 1e4 ):
    mse, X, y = MSE_generalisation( model_gen, model_learn, nb_points )
    return np.sqrt( mse ), X, y

In [4]:
def generate_targets( model, X, cache = 10000 ):
    model.coefs1D = 2 * np.random.rand( model.coefs1D.shape[ 0 ], 1 ) - 1
    y = np.empty( ( X.shape[ 0 ], model.o ) )
    for i in xrange( X.shape[ 0 ] / cache ):
        y[ i * cache:min( ( i + 1 ) * cache, X.shape[ 0 ] ), : ] = model( X[ i * cache:min( ( i + 1 ) * cache, X.shape[ 0 ] ), : ].T ).T
    return y

In [5]:
def tune_eta0( model, X, y, eta0_grid, lbda, eps, batch ):
    Xt, _, yt, _ = sklearn.cross_validation.train_test_split( X, y, train_size = .1 )
    best_err = np.inf
    risk = rsk.Ridge( lbda )
    lb = lr.Constant( 0. ) # no bias
    for eta0 in eta0_grid:
        lc = lr.Constant( eta0 )
        opt = sgd.SGD( risk, eps, lc, lb, batch, 0 )
        model.reset( )
        opt.fit( model, Xt, yt )
        err = RMSE( model( Xt ), yt )
        if err < best_err:
            best_err = err
            best_eta0 = eta0
        
    return best_eta0, best_err

In [6]:
filename = time.strftime( "noise_test_%Y%m%d-%H%M%S" )

if not os.path.exists( filename ):
    os.makedirs( filename )
if not os.path.exists( filename + '/data' ):
    os.makedirs( filename + '/data' )
if not os.path.exists( filename + '/res' ):
    os.makedirs( filename + '/res' )

# Problem parameters
d = 20
p = 20
D = 1000
n = 10000
n_gaussians = 10000
gamma = 0.01 / d
LT = np.eye( p )
# LT = 0.7 * np.eye( p ) + 0.3 * np.ones( ( p, p ) )
LT = 2 * np.random.rand( p, p ) - 1
LT = np.dot( LT, LT.T )
LTU, LTD, LTP = np.linalg.svd( LT )
for i in xrange( LTD.shape[ 0 ] - 10 ):
    LTD[ p - i - 1 ] = 0
LT = np.dot( LTU * LTD, LTP )
LT = LT / np.linalg.norm( LT, 2 )
# for i in xrange( LT.shape[ 0 ] ):
#     for j in xrange( LT.shape[ 1 ] ):
#         if i == j:
#             pass
#         else:
#             if np.random.rand( ) > 0.7:
#                 LT[ i, j ] = 0
#                 LT[ j, i ] = 0
# LT = LT / np.linalg.norm( LT, 2 )

# Generate train database
X = generate_features_train( n, n_gaussians, d )
model_gen = md.Model( fm.DecomposableFF( gamma, d, D, LT ) )
y = generate_targets( model_gen, X )

# np.savetxt( filename + '/data/X.txt', X )
# np.savetxt( filename + '/data/y.txt', y )
# np.savetxt( filename + '/data/LT.txt', LT )


# def risk_func( w, risk, model, features, targets ):
#     model.coefs = w.reshape( model.coefs.shape )
#     return risk( features, targets, model )

# def risk_grad( w, risk, model, features, targets ):
#     model.coefs = w.reshape( model.coefs.shape )
#     return risk.coefs_gradient( model( features ), targets, model )

# lbda1 = 0
# risk = rsk.Ridge( lbda1 )
# print scipy.optimize.check_grad( risk_func, risk_grad, np.random.rand( model_gen.coefs.shape[ 1 ] ), risk, model_gen, X[ :1, : ], y[ :1, : ] )


/Library/Python/2.7/site-packages/ipykernel/__main__.py:38: DeprecationWarning: elementwise comparison failed; this will raise the error in the future.

In [ ]:


In [ ]:
# res0 = np.empty( ( 10, 5, 3, 10, 4 ) )
# for rep in xrange( 0, 10 ):
#     yn = y + 0 * np.var( y ) * np.random.multivariate_normal( np.zeros( p ), np.eye( p ), y.shape[ 0 ] )
#     X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split( X, yn, train_size = 0.8 )
#     # y_train = y_train + np.var( y_train ) * np.random.multivariate_normal( np.zeros( p ), np.eye( p ) )
#     sigma = np.median( scipy.spatial.distance.pdist( X ) )
#     sigma_y = np.median( scipy.spatial.distance.pdist( y_train.T ) )
#     gamma_y = 1. / sigma_y ** 2

#     LS = np.exp( gamma_y * scipy.spatial.distance.squareform( scipy.spatial.distance.pdist( y_train.T ) ) )
#     LS = LS / np.linalg.norm( LS, 2 )
#     np.random.seed( seed = 0 )
#     for i, gamma in enumerate( np.logspace( -4, 0, 5 ) ):
#         print i
#         for j, L in enumerate( [ np.eye( p ), LS, LT ] ):
#             model_learn_id = md.Model( fm.DecomposableFF( gamma, d, D, L ) )
#             for k, lbda in enumerate( np.logspace( -10, 1, 10 ) ):

#     #             eta0, _ = tune_eta0( model_learn_id, X_train, y_train, np.logspace( -3, 1, 10 ), lbda, 0.001, 1 )
#                 risk = rsk.Ridge( lbda )
#                 lb = lr.Constant( 0. ) # no bias
#                 lc = lr.InvScaling( 1, lbda, 1 )
#                 opt = sgd.SGD( risk, 10000, lc, lb, 1, 0 )
#                 model_learn_id.reset( )
#                 start = time.clock( )
#                 opt.fit( model_learn_id, X_train, y_train )
#                 res0[ rep, i, j, k, : ] = lbda, RMSE( model_learn_id( X_train ), y_train ), RMSE( model_learn_id( X_test ), y_test ), time.clock( ) - start

errr = np.empty( 10 )
for rep in xrange( 0, 10 ):
    yn = y + 0.5 * np.var( y ) * np.random.multivariate_normal( np.zeros( p ), np.eye( p ), y.shape[ 0 ] )
    X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split( X, yn, train_size = 0.8 )
    tree = ExtraTreesRegressor( )
    start = time.clock( )
    tree.fit( X_train, y_train )
    print time.clock( ) - start
    start = time.clock( )
    errr[ rep ] = RMSE( model_learn_id( X_test ), y_test )
    print time.clock( ) - start

In [ ]:
print np.mean( errr )
print np.std( errr )

In [ ]:
res01 = np.zero( ( 10, 5, 3, 10, 4 ) )
for rep in xrange( 0, 10 ):
    yn = y + 0.1 * np.var( y ) * np.random.multivariate_normal( np.zeros( p ), np.eye( p ), y.shape[ 0 ] )
    X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split( X, yn, train_size = 0.8 )
    # y_train = y_train + np.var( y_train ) * np.random.multivariate_normal( np.zeros( p ), np.eye( p ) )
    sigma = np.median( scipy.spatial.distance.pdist( X ) )
    sigma_y = np.median( scipy.spatial.distance.pdist( y_train.T ) )
    gamma_y = 1. / sigma_y ** 2

    LS = np.exp( gamma_y * scipy.spatial.distance.squareform( scipy.spatial.distance.pdist( y_train.T ) ) )
    LS = LS / np.linalg.norm( LS, 2 )
    np.random.seed( seed = 0 )
    for i, gamma in enumerate( np.logspace( -4, 0, 5 ) ):
        print i
        for j, L in enumerate( [ np.eye( p ), LS, LT ] ):
            model_learn_id = md.Model( fm.DecomposableFF( gamma, d, D, L ) )
            for k, lbda in enumerate( np.logspace( -10, 1, 10 ) ):

    #             eta0, _ = tune_eta0( model_learn_id, X_train, y_train, np.logspace( -3, 1, 10 ), lbda, 0.001, 1 )
                risk = rsk.Ridge( lbda )
                lb = lr.Constant( 0. ) # no bias
                lc = lr.InvScaling( 1, lbda, 1 )
                opt = sgd.SGD( risk, 10000, lc, lb, 1, 0 )
                model_learn_id.reset( )
                start = time.clock( )
                opt.fit( model_learn_id, X_train, y_train )
                res01[ rep, i, j, k, : ] = lbda, RMSE( model_learn_id( X_train ), y_train ), RMSE( model_learn_id( X_test ), y_test ), time.clock( ) - start

In [ ]:
print res01[ 9, :, :, :]

In [ ]:
print np.dot( ( model_learn_id.feature_map.grkhs * model_learn_id.coefs.reshape( ( model_learn_id.feature_map.shape[ 1 ] / ( 2 * model_learn_id.feature_map.B.shape[ 1 ] ), model_learn_id.feature_map.B.shape[ 1 ] ) ).T ).T, model_learn_id.feature_map.A ).shape
# model_learn_id.coefs.reshape( ( model_learn_id.feature_map.D, model_learn_id.feature_map.o ) )
# print model_learn_id.feature_map.grkhs * model_learn_id.coefs.reshape( ( model_learn_id.feature_map.shape[ 1 ] / ( 2 * model_learn_id.feature_map.B.shape[ 1 ] ), model_learn_id.feature_map.B.shape[ 1 ] ) ).T
# print model_learn_id.coefs.shape
# print model_learn_id.feature_map.D
# print model_learn_id.feature_map.shape
# print model_learn_id.feature_map.B.shape

In [ ]:
res1 = np.empty( ( 10, 5, 3, 10, 4 ) )
for rep in xrange( 0, 10 ):
    yn = y + .5 * np.var( y ) * np.random.multivariate_normal( np.zeros( p ), np.eye( p ), y.shape[ 0 ] )
    X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split( X, yn, train_size = 0.8 )
    # y_train = y_train + np.var( y_train ) * np.random.multivariate_normal( np.zeros( p ), np.eye( p ) )
    sigma = np.median( scipy.spatial.distance.pdist( X ) )
    sigma_y = np.median( scipy.spatial.distance.pdist( y_train.T ) )
    gamma_y = 1. / sigma_y ** 2

    LS = np.exp( gamma_y * scipy.spatial.distance.squareform( scipy.spatial.distance.pdist( y_train.T ) ) )
    LS = LS / np.linalg.norm( LS, 2 )
    np.random.seed( seed = 0 )
    for i, gamma in enumerate( np.logspace( -4, 0, 5 ) ):
        print i
        for j, L in enumerate( [ np.eye( p ), LS, LT ] ):
            model_learn_id = md.Model( fm.DecomposableFF( gamma, d, D, L ) )
            for k, lbda in enumerate( np.logspace( -10, 1, 10 ) ):

    #             eta0, _ = tune_eta0( model_learn_id, X_train, y_train, np.logspace( -3, 1, 10 ), lbda, 0.001, 1 )
                risk = rsk.Ridge( lbda )
                lb = lr.Constant( 0. ) # no bias
                lc = lr.InvScaling( 1, lbda, 1 )
                opt = sgd.SGD( risk, 10000, lc, lb, 1, 0 )
                model_learn_id.reset( )
                start = time.clock( )
                opt.fit( model_learn_id, X_train, y_train )
                res1[ rep, i, j, k, : ] = lbda, RMSE( model_learn_id( X_train ), y_train ), RMSE( model_learn_id( X_test ), y_test ), time.clock( ) - start

In [ ]:
res001 = np.empty( ( 10, 5, 3, 10, 4 ) )
for rep in xrange( 0, 10 ):
    yn = y + 0.01 * np.var( y ) * np.random.multivariate_normal( np.zeros( p ), np.eye( p ), y.shape[ 0 ] )
    X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split( X, yn, train_size = 0.8 )
    # y_train = y_train + np.var( y_train ) * np.random.multivariate_normal( np.zeros( p ), np.eye( p ) )
    sigma = np.median( scipy.spatial.distance.pdist( X ) )
    sigma_y = np.median( scipy.spatial.distance.pdist( y_train.T ) )
    gamma_y = 1. / sigma_y ** 2

    LS = np.exp( gamma_y * scipy.spatial.distance.squareform( scipy.spatial.distance.pdist( y_train.T ) ) )
    LS = LS / np.linalg.norm( LS, 2 )
    np.random.seed( seed = 0 )
    for i, gamma in enumerate( np.logspace( -4, 0, 5 ) ):
        print i
        for j, L in enumerate( [ np.eye( p ), LS, LT ] ):
            model_learn_id = md.Model( fm.DecomposableFF( gamma, d, D, L ) )
            for k, lbda in enumerate( np.logspace( -10, 1, 10 ) ):

    #             eta0, _ = tune_eta0( model_learn_id, X_train, y_train, np.logspace( -3, 1, 10 ), lbda, 0.001, 1 )
                risk = rsk.Ridge( lbda )
                lb = lr.Constant( 0. ) # no bias
                lc = lr.InvScaling( 1, lbda, 1 )
                opt = sgd.SGD( risk, 10000, lc, lb, 1, 0 )
                model_learn_id.reset( )
                start = time.clock( )
                opt.fit( model_learn_id, X_train, y_train )
                res001[ rep, i, j, k, : ] = lbda, RMSE( model_learn_id( X_train ), y_train ), RMSE( model_learn_id( X_test ), y_test ), time.clock( ) - start

In [ ]:
def errorbar(fig, x, y, legend, xerr=None, yerr=None, color='red' ):

  fig.line(x, y, color=color, legend=legend, line_width=2  )

  if xerr is not None:
      x_err_x = []
      x_err_y = []
      for px, py, err in zip(x, y, xerr):
          x_err_x.append((px - err, px + err))
          x_err_y.append((py, py))
      fig.multi_line(x_err_x, x_err_y, color=color )

  if yerr is not None:
      y_err_x = []
      y_err_y = []
      for px, py, err in zip(x, y, yerr):
          y_err_x.append((px, px))
          y_err_y.append((py - err, py + err))
      fig.multi_line(y_err_x, y_err_y, color=color )

# output to static HTML file
output_file("lines.html", title="line plot example")

gamma_arr = np.logspace( -4, 0, 5 )

# res = np.mean( res01, axis = 0 )
res = res1
res_mean = np.mean( res[ :, :, :, : ], axis = 0 )
pl = []
for i in xrange( 0, 5 ): # gamma
    pl.append( figure(title="gamma = " + str( gamma_arr[ i ] ), x_axis_label='lambda', y_axis_label='MSE', x_axis_type="log", y_axis_type="log", x_range=[1e-10, 1e1], y_range=[1e-3, 1] ) )
    errorbar( pl[ i ], res_mean[ i, 0, :, 0], res_mean[ i, 0, :, 2], 'ID', yerr = np.std( res[ :, i, 0, :, 2], axis = 0 ), color='red' )
    errorbar( pl[ i ], res_mean[ i, 1, :, 0], res_mean[ i, 1, :, 2], 'LS', yerr = np.std( res[ :, i, 1, :, 2], axis = 0 ), color='blue' )
    errorbar( pl[ i ], res_mean[ i, 2, :, 0], res_mean[ i, 2, :, 2], 'LT', yerr = np.std( res[ :, i, 2, :, 2], axis = 0 ), color='green' )
    print scipy.stats.ttest_ind( res[ :, i, 0, :, 2], res[ :, i, 1, :, 2] )[ 1 ]
    print scipy.stats.ttest_ind( res[ :, i, 0, :, 2], res[ :, i, 2, :, 2] )[ 1 ]
    print scipy.stats.ttest_ind( res[ :, i, 1, :, 2], res[ :, i, 2, :, 2] )[ 1 ]
    print 'next'
#     pl[ i ].line( res[ i, 0, :, 0], res[ i, 0, :, 2] ** 2, legend="Id", line_width=2, color = "red" )
#     pl[ i ].line( res[ i, 1, :, 0], res[ i, 1, :, 2] ** 2, legend="LS", line_width=2, color = "blue" )
#     pl[ i ].line( res[ i, 2, :, 0], res[ i, 2, :, 2] ** 2, legend="LT", line_width=2, color = "green" )

# show the results
show( HBox( pl[ 0 ], pl[ 1 ], pl[ 2 ], pl[ 3 ], pl[ 4 ] ) )
print res_mean[ 1, :, :, 2 ]

In [ ]:
res01[ :, 2, 2, :, 3]

In [ ]:
np.std( res01[ :, 2, 0, :, 2], axis = 0 )

In [ ]:
matfile = 'res1.mat'
scipy.io.savemat(matfile, mdict={'out': res1}, oned_as='row')

In [ ]:
pipicaca = np.load( 'res0.csv' )

In [7]:
A = 2 * np.random.rand( p, p ) - 1
A = np.dot( A, A.T )
A = A / np.linalg.norm( A, 2 )
yn = y + 0.0 * np.var( y ) * np.random.multivariate_normal( np.zeros( p ), A, y.shape[ 0 ] )
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split( X, yn, train_size = 0.8 )

In [11]:
gamma = .001
lbda1 = 1e-6
# lbda1 = 0
lbda2 = 0.00
model_learn_id = md.Model( fm.DecomposableFF( gamma, d, 1000, LT ) )
lfloss = rsk.LF_loss( )
print lfloss( X[ :1, : ].T, y[ :1, : ].T, model_learn_id )
print lfloss.check_coefs_gradient( X[ :2, : ].T, y[ :2, : ].T, model_learn_id )


0.0420410460092
/Library/Python/2.7/site-packages/ipykernel/__main__.py:5: DeprecationWarning: elementwise comparison failed; this will raise the error in the future.
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-11-b91b274824f4> in <module>()
      6 lfloss = rsk.LF_loss( )
      7 print lfloss( X[ :1, : ].T, y[ :1, : ].T, model_learn_id )
----> 8 print lfloss.check_coefs_gradient( X[ :2, : ], y[ :2, : ], model_learn_id )

OVFM/Risk.pyx in OVFM.Risk.Smooth_func.check_coefs_gradient (OVFM/Risk.c:5229)()

/Library/Python/2.7/site-packages/scipy/optimize/optimize.pyc in check_grad(func, grad, x0, *args, **kwargs)
    664         raise ValueError("Unknown keyword arguments: %r" %
    665                          (list(kwargs.keys()),))
--> 666     return sqrt(sum((grad(x0, *args) -
    667                      approx_fprime(x0, func, step, *args))**2))
    668 

OVFM/Risk.pyx in OVFM.Risk.Smooth_func.check_coefs_gradient.grad (OVFM/Risk.c:4868)()

OVFM/Model.pyx in OVFM.Model.Model.__call__ (OVFM/Model.c:2772)()

OVFM/FeatureMap.pyx in OVFM.FeatureMap.DecomposableFF.map_predict (OVFM/FeatureMap.c:9136)()

OVFM/FeatureMap.pyx in OVFM.FeatureMap.DecomposableFF._mapf (OVFM/FeatureMap.c:8475)()

OVFM/FeatureMap.pyx in OVFM.FeatureMap.generic_GaussianOVGFF._mapf (OVFM/FeatureMap.c:5749)()

OVFM/FeatureMap.pyx in OVFM.FeatureMap.GaussianFF.map (OVFM/FeatureMap.c:4262)()

ValueError: shapes (1000,20) and (40,1) not aligned: 20 (dim 1) != 40 (dim 0)

In [ ]:
gamma = .001
lbda1 = 1e-6
# lbda1 = 0
lbda2 = 0.00
model_learn_id = md.Model( fm.DecomposableFF( gamma, d, 1000, LT ) )
risk = rsk.Ridge( lbda1 )
lb = lr.Constant( 0. ) # no bias
lc = lr.InvScaling( 1., lbda1, 1 )
opt = sgd.SGD( risk, 10000, lc, lb, 10, 0 )
model_learn_id.reset( )
start = time.clock( )
opt.fit( model_learn_id, X_train, y_train )
stop = time.clock( )

start_pred = time.clock( )
print lbda1, lbda2, RMSE( model_learn_id( X_train.T ).T, y_train ), RMSE( model_learn_id( X_test.T ).T, y_test ), stop - start
stop_pred = time.clock( )
print stop_pred - start_pred
print np.std( y_train ), np.std( y_test )
# print np.linalg.matrix_rank( model_learn_id.coefs.reshape( ( 2 * D, p ) ) )
# print np.linalg.matrix_rank( model_learn_id.feature_map.B )

In [ ]:
print model_learn_id.feature_map.phi_x.shape
print model_learn_id.feature_map._shapef
print model_learn_id.coefs2D.shape
# print np.dot( model_learn_id.feature_map.B.T, model_learn_id.feature_map.B )
print scipy.linalg.pinv( model_learn_id.feature_map.B ) - model_learn_id.feature_map.B.T
# print model_learn_id.feature_map.B.shape

In [ ]:
import matplotlib.pyplot as plt
f, axarr = plt.subplots( 2, sharex=True )
axarr[0].matshow( model_learn_id.coefs2D.T, aspect='auto', cmap='bwr')
axarr[1].matshow( np.log( model_learn_id.feature_map.grkhs.T ), aspect='auto', cmap='bwr' )
plt.show( )

In [ ]:
# print model_learn_id.feature_map.grkhs[ model_learn_id.coefs.ravel( ) == np.NaN ]
# print model_learn_id.coefs.reshape( ( D, p ) ).T[ :, 854 ]
print model_learn_id.feature_map.grkhs.min( )
print model_learn_id.coefs2D.T
# print np.linalg.norm( model_learn_id.feature_map.Z[ :, 854 ] )

In [ ]:
gamma = .001
lbda1 = 0.0
lbda2 = 0.0
model_learn_id = md.Model( fm.DecomposableFF( gamma, d, 1000, LT ) )
risk = rsk.Ridge_B( lbda1, lbda2 )
lb = lr.Constant( 0. ) # no bias
lc = lr.InvScaling( 1, lbda1, 1 )
opt = sgd.SGD( risk, 1000, lc, lb, 10, 0 )
model_learn_id.reset( )
start = time.clock( )
opt.fit( model_learn_id, X_train, y_train )
stop = time.clock( )

start_pred = time.clock( )
print lbda1, lbda2, RMSE( model_learn_id( X_train ), y_train ), RMSE( model_learn_id( X_test ), y_test ), stop - start
stop_pred = time.clock( )
print stop_pred - start_pred
print np.std( y_train ), np.std( y_test )

In [ ]:
import matplotlib.pyplot as plt
plt.pcolor( model_learn_id.feature_map.A, cmap = 'seismic' )
plt.show( )

In [ ]:
np.linalg.matrix_rank( model_learn_id.feature_map.B )

In [ ]:
np.linalg.svd( np.random.rand( 2 * model_learn_id.feature_map.D, model_learn_id.o ), full_matrices = False )[ 0 ].shape

In [ ]:
np.abs( model_learn_id.feature_map.B ).sum( axis = 0 )

In [ ]:
print model_learn_id.feature_map.B

In [ ]:
d1 > 0.01

In [ ]:
x = np.random.rand( 5 )
y = np.random.rand( 5, 10 )

In [ ]:
x.T * y

In [ ]:
from sklearn.datasets import fetch_mldata

def simplex_map( i ):
    if i > 2:
        return np.bmat( [ [ [ [ 1 ] ], np.repeat( -1.0 / ( i - 1 ), i - 1 ).reshape( ( 1, i - 1 ) ) ], [ np.zeros( ( i - 2, 1 ) ), simplex_map( i - 1 ) * np.sqrt( 1.0 - 1.0 / ( i - 1 ) ** 2 ) ] ] )
    elif i == 2:
        return np.array( [ [ 1, -1 ] ] )
    else:
        raise "invalid number of classes"

In [ ]:
mnist = fetch_mldata( 'MNIST original', data_home='mnist_data' )
data_X = np.asarray( mnist.data, np.float )
data_y = np.asarray( mnist.target )

# data_X = preprocessing.StandardScaler( ).fit_transform( data_X )
data_X = 2 * ( data_X / 255.0 ) - 1
# data_X = ( data_X.T - np.mean( data_X, axis = 1 ) ).T
# data_X = ( data_X.T / np.std( data_X, axis = 1 ) ).T
# data_X = preprocessing.StandardScaler( ).fit_transform( data_X )
# print data_X.mean( )

lb = preprocessing.LabelBinarizer( neg_label=0, pos_label=1 )
data_y = np.dot( np.asarray( lb.fit_transform( data_y ), np.float ), simplex_map( 10 ).T )

In [ ]:
train_X, test_X, train_y, test_y = sklearn.cross_validation.train_test_split( data_X, data_y, test_size = 1.0 / 7.0 )
train_X = np.asarray( train_X )
test_X = np.asarray( test_X )
train_y = np.asarray( train_y )
test_y = np.asarray( test_y )

In [ ]:
print data_y

In [ ]:
gamma = 0.0073
d = train_X.shape[ 1 ]
p = train_y.shape[ 1 ]

lbda1 = 0.00
eps = .1
model_learn_id = md.Model( fm.DecomposableFF( gamma, d, 1000, np.eye( p ) ) )
risk = rsk.Ridge( lbda1, eps )
lb = lr.Constant( 0. ) # no bias
lc = lr.InvScaling( 1, lbda1, 1 )
opt = sgd.SGD( risk, 20000, lc, lb, 10, 1000 )
model_learn_id.reset( )
start = time.clock( )
opt.fit( model_learn_id, train_X, train_y )
stop = time.clock( )

start_pred = time.clock( )
S = simplex_map( 10 )

pred_train = model_learn_id( train_X )
pred_train = np.argmax( np.dot( pred_train, S ), axis = 1 )
ground_truth_train = np.argmax( np.dot( train_y, S ), axis = 1 )

pred_test = model_learn_id( test_X )
pred_test = np.argmax( np.dot( pred_test, S ), axis = 1 )
ground_truth_test = np.argmax( np.dot( test_y, S ), axis = 1 )

print lbda1, lbda2, 100 * np.float( np.sum( pred_train != ground_truth_train ) ) / pred_train.shape[ 0 ], 100 * np.float( np.sum( pred_test != ground_truth_test ) ) / pred_test.shape[ 0 ], stop - start
stop_pred = time.clock( )
print stop_pred - start_pred

In [ ]:
gamma = 1.0 / ( 2 * 11 ** 2 )
d = train_X.shape[ 1 ]
p = train_y.shape[ 1 ]

lbda1 = 0.00
lbda2 = 0.00
model_learn_id = md.Model( fm.DecomposableFF( gamma, d, 1000, np.eye( p ) ) )
risk = rsk.Ridge_B( lbda1, lbda2, 0.0 )
lb = lr.Constant( 0. ) # no bias
lc = lr.InvScaling( 10, lbda1, 1 )
opt = sgd.SGD( risk, 20000, lc, lb, 100, 1000 )
model_learn_id.reset( )
start = time.clock( )
opt.fit( model_learn_id, train_X, train_y )
stop = time.clock( )

start_pred = time.clock( )
S = simplex_map( 10 )

pred_train = model_learn_id( train_X )
pred_train = np.argmax( np.dot( pred_train, S ), axis = 1 )
ground_truth_train = np.argmax( np.dot( train_y, S ), axis = 1 )

pred_test = model_learn_id( test_X )
pred_test = np.argmax( np.dot( pred_test, S ), axis = 1 )
ground_truth_test = np.argmax( np.dot( test_y, S ), axis = 1 )

print lbda1, lbda2, 100 * np.float( np.sum( pred_train != ground_truth_train ) ) / pred_train.shape[ 0 ], 100 * np.float( np.sum( pred_test != ground_truth_test ) ) / pred_test.shape[ 0 ], stop - start
stop_pred = time.clock( )
print stop_pred - start_pred

In [ ]:
import matplotlib.pyplot as plt
plt.pcolor( model_learn_id.feature_map.A, cmap = 'seismic' )
plt.show( )

In [ ]:
np.linalg.matrix_rank( model_learn_id.feature_map.B )

In [ ]:
print model_learn_id.feature_map.A

In [ ]:
print np.cov( y.T )

In [ ]:


In [ ]:


In [ ]:
risk_func( np.random.rand( 1, model_learn_id.coefs.shape[ 1 ] ), risk, model_learn_id, train_X[ :1, : ], train_y[ :1, : ] )

In [ ]:
risk_grad( np.random.rand( 1, model_learn_id.coefs.shape[ 1 ] ), risk, model_learn_id, train_X[ :1, : ], train_y[ :1, : ] ).shape

In [ ]:
print model_learn_id(  train_X[ :1, : ] )

In [ ]:
A = np.random.rand( 10, 10 )
np.linalg.norm( A, 'fro' ) ** 2 - ( A ** 2 ).sum( )

In [ ]:
print model_learn_id.coefs.shape

In [ ]:
w = np.random.rand( 1, model_learn_id.coefs.shape[ 1 ] )
w_aa = np.zeros( ( 1, model_learn_id.coefs.shape[ 1 ] ) )
w_aa[ 0 ] = 1
( risk_func( w, risk, model_learn_id, train_X[ :1, : ], train_y[ :1, : ] ) - risk_func( w + 1e-8 * w_aa, risk, model_learn_id, train_X[ :1, : ], train_y[ :1, : ] ) ) / 1e-8

In [ ]:
risk_grad( w, risk, model_learn_id, train_X[ :1, : ], train_y[ :1, : ] )[ 0, 0 ]

In [ ]:
scipy.optimize.check_grad( risk_func, risk_grad, w, risk, model_learn_id, train_X[ :1, : ], train_y[ :1, : ] )[ 0 ]

In [ ]:
# Hi,

# Here’s the results. I used train/test/validation procedure with 2500/1000/1500 datapoint sampled, repeated 10 times. Parameters learned with train/test portion, MSE results with validation set. The decomposable kernel is accompanied with a pinv(D-W), where W is a linear, quadratic or gaussian kernel over the output features.

# Average MSE’s over the 10 repeats:

# ORFF with k.I:      0.01975 runtime 71.3
# ORFF with k.Alin:   0.01953 runtime 75.3
# ORFF with k.Aquad:  0.01933 runtime 74.1
# ORFF with k.Agauss: 0.01929 runtime 75.5
# Ridge regression:   0.01957 runtime 1.1
# Knn:                0.01929 runtime 12.3
# Extra-trees:        0.01933 runtime 42.7
# Linear regression:  0.13484 runtime 81.3

# -> We match state-of-the-art performance.

#  The linear regression seems to have failed, I don’t know why. I use romain’s code:

#   estimator.fit( X_train, Y_train )
#   Y_test_predict[ name ] = estimator.predict( X_val )


# I also ran the experiments when using the whole dataset, i.e. split 15226/6085/9164 (sum=30475). Again, I randomly sampled the split 10 times. Due to the large size, I used the same parameters as learned from the previous smaller splits. I don’t have yet the competing approaches with the full dataset.

# ORFF with k.I:      0.01909 runtime 426.4
# ORFF with k.Alin:   0.01982 runtime 409.0
# ORFF with k.Aquad:  0.01930 runtime 435.0
# ORFF with k.Agauss: 0.01913 runtime 388.4