In [ ]:
import sys
import imp
import yaml
import csv
import pandas as pd
import re
from rf import *
from svm import *
modl = imp.load_source('read_model_yaml', 'read_model_yaml.py')

# Parse the YAML file location as the first parameter
inp_yaml = sys.argv[1]

def write_results_txt(filename, result):
    """
    Write results into csv file.
    
    Parameters
    ----------
    filename : string
        filename to output the result
    labels : list
        labels for the results, i.e. names of parameters and metrics
    """
    with open(filename, "w") as fp:
        for item in result:
            fp.write("%s\n\n" % item)


def execute_model(inp_yaml):
    """Apply trees in the forest to X, return leaf indices.
        Parameters
        ----------
        inp_yaml : A yaml file with model specifications

        Returns
        -------
        parameters_dict : A python dictionary with the model specifications
                          to be used to encode metadata for the model
                          and pass into specific model functions e.g. random
                          forest
        """
    
    # Read in and parse all parameters from the YAML file
    yaml_params = modl.read_model_yaml(inp_yaml)
    
    # Define output file name based on input
    folder_name     = re.split("/", inp_yaml)[2]
    file_name       = re.split("/", inp_yaml)[3][:-5]
    output_txt_file = 'data/output/' + folder_name + '/' + file_name + '.txt'
    
    #-------------------------------------------------
    # Create Train and Test Datasets
    #-------------------------------------------------    

    train_data_source = yaml_params["train_data_source"]
    test_data_source  = yaml_params["test_data_source"]
    
    # Open test and train sets
    df_train = pd.read_csv("data/output/model_clean_data/" + train_data_source, compression='gzip', index_col = None)
    df_test  = pd.read_csv("data/output/model_clean_data/" + test_data_source, compression='gzip', index_col = None)
    
    # Define test/training set
    X_train  =  np.array(df_train.drop(['labels', 'train.csv', 'index', 'Time'], axis = 1))
    Y_train  =  np.array(df_train[['labels']])[:,0]
    X_test   =  np.array(df_test.drop(['labels', 'test.csv', 'index', 'Time'], axis = 1))
    Y_test   =  np.array(df_test[['labels']])[:,0]
    
    #-------------------------------------------------
    # Run RF (RANDOM FOREST)
    #-------------------------------------------------    
    
    if yaml_params["model_type"] == "RF":        
        
        # Extract the RF model variables from the YAML file        
        n_estimators  = yaml_params["parameters"]["n_estimators"]
        criterion     = yaml_params["parameters"]["criterion"]   
        max_features  = yaml_params["parameters"]["max_features"]          
        max_depth     = yaml_params["parameters"]["max_depth"]      
        n_jobs        = yaml_params["parameters"]["n_jobs"]
        
        print("running RF WITHOUT simulation...")
        
        # Run simulation
        result = rf(X_train        = X_train
                    , Y_train      = Y_train
                    , X_test       = X_test
                    , Y_test       = Y_test
                    , n_estimators = n_estimators
                    , criterion    = criterion
                    , max_features = max_features
                    , max_depth    = max_depth)

        print("finished - rf without simulation")
        
        # Write into text file
        write_results_txt(output_txt_file, result)
            
    #-------------------------------------------------
    # Run SVM (SUPPORT VECTOR MACHINE)
    #-------------------------------------------------
    
    # Extract the SVM model variables from the YAML file        
    if yaml_params["model_type"] == "SVM":        
        kernel  = yaml_params["parameters"]["kernel"] 
        degree  = yaml_params["parameters"]["degree"]  
        gamma   = yaml_params["parameters"]["gamma"] 
        tol     = yaml_params["parameters"]["tol"]

        print("running SVM WITHOUT simulation...")
        
        # Run a single simulation
        result = svm(X_train        = X_train
                     , Y_train      = Y_train
                     , X_test       = X_test
                     , Y_test       = Y_test
                     , kernel       = kernel
                     , C            = 1.0
                     , degree       = degree
                     , gamma        = gamma
                     , tol          = tol
                     , decision_function_shape='ovr')

        # Write into text file
        write_results_txt(output_txt_file, result)
        
        print("finished - SVM without simulation")

# Run the execute model code        
execute_model(inp_yaml)