In [1]:
import numpy as np
from sklearn import ensemble
from sklearn import cross_validation
from sklearn import metrics
from sklearn import preprocessing
from sklearn import linear_model

import OVFM.FeatureMap as fm
import OVFM.Model as mdl
import OVFM.Risk as rsk
import OVFM.LearningRate as lr
import OVFM.DataGeneration as dg
import OVFM.SGD as sgd

import scipy.optimize

import matplotlib.pyplot as plt

In [2]:
# Load the training and test data sets
train = np.genfromtxt('climate.csv', delimiter=',',skip_header=1)
# test = np.genfromtxt('test.csv', delimiter=',',skip_header=1)


imp = preprocessing.Imputer( missing_values='NaN', strategy='mean', axis=0 )
train = imp.fit_transform( train )

# scaler = preprocessing.StandardScaler( )
# train = scaler.fit_transform( train )

train = train.reshape( ( train.shape[ 0 ] / ( 12 * 13 ), 13, 12, train.shape[ 1 ] ) ).astype( float )[:,:,:,4:]
meanl = train.mean( axis = ( 1, 2 ) )[ :, np.newaxis, np.newaxis, : ] # mean per location for each variable and year
std = np.sqrt( train.var( axis = ( 1, 2 ) )[ :, np.newaxis, np.newaxis, : ] ) # var per location for each variable and years
std[ std == 0 ] = 1
train = ( train - meanl ) / std

# Create numpy arrays for use with scikit-learn
train = train.reshape( ( train.shape[ 0 ], 12 * 13, train.shape[ 3 ] ) )

train_X = train[:,:-1,:]
train_y = train[:,1:,:]

A = 5
train_X_Ay = train[:,:12*A-1,:]
train_y_Ay = train[:,1:12*A,:]

train_X = train_X.reshape( ( train_X.shape[ 0 ] * train_X.shape[ 1 ], train_X.shape[ 2 ] ) )
train_y = train_y.reshape( ( train_y.shape[ 0 ] * train_y.shape[ 1 ], train_y.shape[ 2 ] ) )

train_X_Ay = train_X_Ay.reshape( ( train_X_Ay.shape[ 0 ] * train_X_Ay.shape[ 1 ], train_X_Ay.shape[ 2 ] ) )
train_y_Ay = train_y_Ay.reshape( ( train_y_Ay.shape[ 0 ] * train_y_Ay.shape[ 1 ], train_y_Ay.shape[ 2 ] ) )

D = 100
gamma = 0.01
C = 0.0001
eta0 = 1.0

In [3]:
L = np.cov( train_y.T )
# L = L / np.linalg.norm( L )
# Dg = np.diag( np.diag( M ) + np.sum( M, axis = 0 ) )
# L = np.linalg.inv( Dg - M )
# L = 2 * np.random.rand( train_y.shape[ 1 ], train_y.shape[ 1 ] ) - 1
# L = np.dot( L, L.T )
# L = np.eye( train_y.shape[ 1 ] )

P = np.zeros( ( train_X.shape[ 1 ], train_X.shape[ 1 ], train_X.shape[ 1 ] ) )
for i in xrange( 0, P.shape[ 0 ] ):
    P[ i, :, : ] = np.eye( train_X.shape[ 1 ] )
    P[ i, i, i ] = 0


model = mdl.Model( fm.DecomposableFF( gamma, train_X.shape[ 1 ], D, A = L ) )
risk  = rsk.Ridge( 1e-3, 0.1 )
lc = lr.Constant( 0.1 )
lb = lr.Constant( 0.001 )

ovksgd = sgd.SGD( risk, 10, lc, lb, 10, 0 )
ovksgd.fit( model, train_X_Ay, train_y_Ay )
J = np.mean( model.Jacobian( train_X_Ay[:12*13-1,:] ), axis = 0 )

In [7]:
print float( np.sum( model.coefs == 0 ) ) / model.coefs.size

row_labels = ['CO2','CH4','CO','H2','WET','CLD','VAP','PRE','FRS','DTR','TMN','TMP','TMX','GLO','ETR','ETRN','DIR','UV']
column_labels = ['CO2','CH4','CO','H2','WET','CLD','VAP','PRE','FRS','DTR','TMN','TMP','TMX','GLO','ETR','ETRN','DIR','UV']

p_serie = np.empty( ( 12 * 13 - 1, train_X.shape[ 1 ] ) )
p_serie[ :12 * A, : ] = train_X[ :12 * A, : ]
for i in xrange( 12 * A, 12 * 13 - 1 ):
	p_serie[ i, : ] = model( p_serie[ i - 1, : ].reshape( ( 1, train_X.shape[ 1 ] ) ) )


fig, axes = plt.subplots( nrows=2, ncols=2, sharex=False, sharey=False )
im = axes[ 1, 0 ].pcolor( J )
im.set_cmap( 'hot' )
plt.colorbar( im )
axes[ 1, 0 ].set_xlim([0,len(row_labels)])
axes[ 1, 0 ].set_ylim([0,len(column_labels)])
axes[ 1, 0 ].set_xticks(np.arange(J.shape[1])+0.5, minor=False)
axes[ 1, 0 ].set_yticks(np.arange(J.shape[0])+0.5, minor=False)
axes[ 1, 0 ].set_xticklabels(row_labels, minor=False)
axes[ 1, 0 ].set_yticklabels(column_labels, minor=False)
axes[ 0, 0 ].plot( np.arange( 0, 12 * 13 - 1 ), p_serie[ :, 0 ], color='r' )
axes[ 0, 0 ].plot( np.arange( 0, 12 * 13 - 1 ), train_X[ :12 * 13 - 1, 0 ], color='k' )
axes[ 0, 0 ].plot( np.arange( 0, 12 * 13 - 1 ), np.concatenate( ( [ train_X[ 0, 0 ] ], model( train_X[ :12 * 13 - 2, : ] )[ :, 0 ] ) ), color='b' )
axes[ 0, 0 ].axvline( x = 12 * A - 1, color = 'r' )


plt.show( )


0.0

In [6]:
print model.coefs


[[ 0.53556457  0.27564512  0.25418803 ..., -0.06101118 -0.08353064
  -0.23113024]]

In [ ]: