In [4]:
import numpy as np
import pandas as pd

from sknn.mlp import Classifier, Layer
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import log_loss
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score
from multiprocessing import Pool, TimeoutError
from multiprocessing import cpu_count
from datetime import timedelta

import sys
import csv
import itertools
import time

In [2]:
def two_layers_nnet(X_train,
                    Y_train,
                    X_test,
                    Y_test,
                    method1="Tanh",
                    neurons1=5,
                    method2="",
                    neurons2=0,
                    decay=0.0001,
                    learning_rate=0.001,
                    n_iter=25,
                    random_state=1):
    """
    Parameters
    ----------
    X_train       : pandas data frame
        data frame of features for the training set
    Y_train       : pandas data frame
        data frame of labels for the training set
    X_test        : pandas data frame
        data frame of features for the test set
    Y_test        : pandas data frame
        data frame of labels for the test set
    method1       : str
        method used for the first layer
    neurons1      : int
        number of neurons of the first layer
    method2       : None
        method used for the first layer
    neurons2      : int
        number of neurons of the first layer
    decay         : float
        weight decay
    learning_rate : float
        learning rate
    n_iter        : int
        number of iterations
    random_state  : int
        seed for weight initialization
        
    Result:
    -------
    numpy array
        logloss    : averaged logarithmic loss
        miss_err   : missclassification error rate
        prec       : precision
        recall     : recall
        f1         : f1 score
        parameters : previous parameters in the order previously specified
    """

    labels = np.unique(Y_train)
    
    ## # Scale Data
    scaler = MinMaxScaler()
    X_test = scaler.fit_transform(X_test)
    X_train = scaler.fit_transform(X_train)
    
    # Layers
    if neurons2 == 0 :
        layers=[Layer(method1, weight_decay = decay, units = neurons1),
                Layer("Softmax")]
    else:
        layers=[Layer(method1, weight_decay = decay, units = neurons1),
                Layer(method2, weight_decay = decay, units = neurons2),
                Layer("Softmax")]
        
    ## # Run nnet
    # Define classifier
    nn = Classifier(layers,
                    learning_rate=learning_rate,
                    random_state=random_state,
                    n_iter=n_iter)
    # Fit
    nn.fit(X_train, Y_train)
    # Predict
    Y_hat = nn.predict(X_test)
    Y_probs = nn.predict_proba(X_test)
    
    ## # Misclassification error rate
    miss_err = 1-accuracy_score(Y_test, Y_hat)
    ## # Log Loss
    eps = 10^(-15)
    logloss = log_loss(Y_test, Y_probs, eps = eps)
    
    ## # Precision
    prec = precision_score(y_true=Y_test, y_pred=Y_hat, labels=labels, average='micro')
    ## # Recal
    recall = recall_score(y_true=Y_test, y_pred=Y_hat, labels=labels, average='micro') 
    ## # F1
    f1 = f1_score(y_true=Y_test, y_pred=Y_hat, labels=labels, average='micro')
    
    # Summarized results
    result = np.array([logloss,
                       miss_err,
                       prec,
                       recall,
                       f1,
                       method1,
                       neurons1,
                       method2,
                       neurons2,
                       decay,
                       learning_rate,
                       n_iter,
                       random_state])
    return result

In [3]:
def processInput((X_train,Y_train,X_test,Y_test,parameters,index)): 
    # Define parameters names
    method1,neurons1,method2,neurons2,decay,learning_rate,n_iter,random_state=parameters[index]
    
    # Run nnet
    result = two_layers_nnet(X_train,
                             Y_train,
                             X_test,
                             Y_test,
                             method1,
                             neurons1,
                             method2,
                             neurons2,
                             decay,
                             learning_rate,
                             n_iter,
                             random_state)
    return result


def two_layers_nnet_simulation(X_train,
                               Y_train,
                               X_test,
                               Y_test,
                               method1,
                               neurons1,
                               method2,
                               neurons2,
                               decay,
                               learning_rate,
                               n_iter,
                               random_state):
    """
    Parameters:
    -----------
    Same parameters as two_layers_nnet, in a list format.
    
    Result:
    ------
    List of Lists of results from two_layers_nnet.
        One list corresponds to one set of parameters
    """
    
    print('Lauching Simulation...')
    start = time.time()
    
    # Combinations
    param = np.array([method1,
                      neurons1,
                      method2,
                      neurons2,
                      decay,
                      learning_rate,
                      n_iter,
                      random_state])
    
    parameters = list(itertools.product(*param))
    
    indexes = range(len(parameters))
    print "Number of sets of parameters: %s.\n" %len(parameters)
    
    print 'Parameters:\n-----------'
    print np.array(parameters)
    

    # Number of clusters
    num_cpu = cpu_count()          
    print "\nNumber of identified CPUs: %s.\n" %num_cpu
    num_clusters = min(num_cpu,len(parameters))
    
    ## # Parallelization
    tuples_indexes = tuple([(X_train,Y_train,X_test,Y_test,parameters,index) for index in indexes])

    # Start clusters
    print 'Start %s clusters.\n' % num_clusters
    print 'Running...'
    pool = Pool(processes=num_clusters)
    results = pool.map(processInput, tuples_indexes) 
    pool.terminate()
    
    # Results
    print 'Results:\n--------'
    print results
    end = time.time()
    elapsed = end - start
    print 'End of Simulation.\nElapsed time: %s' %str(timedelta(seconds=elapsed))
    print 'Write into csv...'
    
    return results

In [4]:
def two_layers_nnet_predict(X_train,
                            Y_train,
                            X_test,
                            method1="Tanh",
                            neurons1=5,
                            method2="",
                            neurons2=0,
                            decay=0.0001,
                            learning_rate=0.001,
                            n_iter=25,
                            random_state=1):
    """
    Parameters
    ----------
    X_train       : pandas data frame
        data frame of features for the training set
    Y_train       : pandas data frame
        data frame of labels for the training set
    X_test        : pandas data frame
        data frame of features for the test set
    method1       : str
        method used for the first layer
    neurons1      : int
        number of neurons of the first layer
    method2       : None
        method used for the first layer
    neurons2      : int
        number of neurons of the first layer
    decay         : float
        weight decay
    learning_rate : float
        learning rate
    n_iter        : int
        number of iterations
    random_state  : int
        seed for weight initialization
        
    Result:
    -------
    tuple of numpy arrays
        (predicted classes, predicted probabilities)
    """

    labels = np.unique(Y_train)
    
    ## # Scale Data
    scaler = MinMaxScaler()
    X_test = scaler.fit_transform(X_test)
    X_train = scaler.fit_transform(X_train)

    ## # Split data set into train/test
    
    # Layers
    if neurons2 == 0 :
        layers=[Layer(method1, weight_decay = decay, units = neurons1),
                Layer("Softmax")]
    else:
        layers=[Layer(method1, weight_decay = decay, units = neurons1),
                Layer(method2, weight_decay = decay, units = neurons2),
                Layer("Softmax")]
        
    ## # Run nnet
    # Define classifier
    nn = Classifier(layers,
                    learning_rate=learning_rate,
                    random_state=random_state,
                    n_iter=n_iter)
    # Fit
    nn.fit(X_train, Y_train)
    # Predict
    Y_hat = nn.predict(X_test)
    Y_probs = nn.predict_proba(X_test)
    
    # Summarized results
    result = (Y_hat,Y_probs)
    return result