In [1]:
import numpy as np
from sklearn import ensemble
from sklearn import cross_validation
from sklearn import metrics
from sklearn import preprocessing
from sklearn import linear_model
import OVFM.FeatureMap as fm
import OVFM.Model as mdl
import OVFM.Risk as rsk
import OVFM.LearningRate as lr
import OVFM.DataGeneration as dg
import OVFM.SGD as sgd
import scipy.optimize
import matplotlib.pyplot as plt
In [2]:
# Load the training and test data sets
train = np.genfromtxt('climate.csv', delimiter=',',skip_header=1)
# test = np.genfromtxt('test.csv', delimiter=',',skip_header=1)
imp = preprocessing.Imputer( missing_values='NaN', strategy='mean', axis=0 )
train = imp.fit_transform( train )
# scaler = preprocessing.StandardScaler( )
# train = scaler.fit_transform( train )
train = train.reshape( ( train.shape[ 0 ] / ( 12 * 13 ), 13, 12, train.shape[ 1 ] ) ).astype( float )[:,:,:,4:]
meanl = train.mean( axis = ( 1, 2 ) )[ :, np.newaxis, np.newaxis, : ] # mean per location for each variable and year
std = np.sqrt( train.var( axis = ( 1, 2 ) )[ :, np.newaxis, np.newaxis, : ] ) # var per location for each variable and years
std[ std == 0 ] = 1
train = ( train - meanl ) / std
# Create numpy arrays for use with scikit-learn
train = train.reshape( ( train.shape[ 0 ], 12 * 13, train.shape[ 3 ] ) )
train_X = train[:,:-1,:]
train_y = train[:,1:,:]
A = 5
train_X_Ay = train[:,:12*A-1,:]
train_y_Ay = train[:,1:12*A,:]
train_X = train_X.reshape( ( train_X.shape[ 0 ] * train_X.shape[ 1 ], train_X.shape[ 2 ] ) )
train_y = train_y.reshape( ( train_y.shape[ 0 ] * train_y.shape[ 1 ], train_y.shape[ 2 ] ) )
train_X_Ay = train_X_Ay.reshape( ( train_X_Ay.shape[ 0 ] * train_X_Ay.shape[ 1 ], train_X_Ay.shape[ 2 ] ) )
train_y_Ay = train_y_Ay.reshape( ( train_y_Ay.shape[ 0 ] * train_y_Ay.shape[ 1 ], train_y_Ay.shape[ 2 ] ) )
D = 100
gamma = 0.01
C = 0.0001
eta0 = 1.0
In [3]:
L = np.cov( train_y.T )
# L = L / np.linalg.norm( L )
# Dg = np.diag( np.diag( M ) + np.sum( M, axis = 0 ) )
# L = np.linalg.inv( Dg - M )
# L = 2 * np.random.rand( train_y.shape[ 1 ], train_y.shape[ 1 ] ) - 1
# L = np.dot( L, L.T )
# L = np.eye( train_y.shape[ 1 ] )
P = np.zeros( ( train_X.shape[ 1 ], train_X.shape[ 1 ], train_X.shape[ 1 ] ) )
for i in xrange( 0, P.shape[ 0 ] ):
P[ i, :, : ] = np.eye( train_X.shape[ 1 ] )
P[ i, i, i ] = 0
model = mdl.Model( fm.DecomposableFF( gamma, train_X.shape[ 1 ], D, A = L ) )
risk = rsk.Ridge( 1e-3, 0.1 )
lc = lr.Constant( 0.1 )
lb = lr.Constant( 0.001 )
ovksgd = sgd.SGD( risk, 10, lc, lb, 10, 0 )
ovksgd.fit( model, train_X_Ay, train_y_Ay )
J = np.mean( model.Jacobian( train_X_Ay[:12*13-1,:] ), axis = 0 )
In [7]:
print float( np.sum( model.coefs == 0 ) ) / model.coefs.size
row_labels = ['CO2','CH4','CO','H2','WET','CLD','VAP','PRE','FRS','DTR','TMN','TMP','TMX','GLO','ETR','ETRN','DIR','UV']
column_labels = ['CO2','CH4','CO','H2','WET','CLD','VAP','PRE','FRS','DTR','TMN','TMP','TMX','GLO','ETR','ETRN','DIR','UV']
p_serie = np.empty( ( 12 * 13 - 1, train_X.shape[ 1 ] ) )
p_serie[ :12 * A, : ] = train_X[ :12 * A, : ]
for i in xrange( 12 * A, 12 * 13 - 1 ):
p_serie[ i, : ] = model( p_serie[ i - 1, : ].reshape( ( 1, train_X.shape[ 1 ] ) ) )
fig, axes = plt.subplots( nrows=2, ncols=2, sharex=False, sharey=False )
im = axes[ 1, 0 ].pcolor( J )
im.set_cmap( 'hot' )
plt.colorbar( im )
axes[ 1, 0 ].set_xlim([0,len(row_labels)])
axes[ 1, 0 ].set_ylim([0,len(column_labels)])
axes[ 1, 0 ].set_xticks(np.arange(J.shape[1])+0.5, minor=False)
axes[ 1, 0 ].set_yticks(np.arange(J.shape[0])+0.5, minor=False)
axes[ 1, 0 ].set_xticklabels(row_labels, minor=False)
axes[ 1, 0 ].set_yticklabels(column_labels, minor=False)
axes[ 0, 0 ].plot( np.arange( 0, 12 * 13 - 1 ), p_serie[ :, 0 ], color='r' )
axes[ 0, 0 ].plot( np.arange( 0, 12 * 13 - 1 ), train_X[ :12 * 13 - 1, 0 ], color='k' )
axes[ 0, 0 ].plot( np.arange( 0, 12 * 13 - 1 ), np.concatenate( ( [ train_X[ 0, 0 ] ], model( train_X[ :12 * 13 - 2, : ] )[ :, 0 ] ) ), color='b' )
axes[ 0, 0 ].axvline( x = 12 * A - 1, color = 'r' )
plt.show( )
In [6]:
print model.coefs
In [ ]: