In [8]:
import imp
import yaml
import csv
#modl = imp.load_source('read_model_yaml', '../read_model_yaml.py')
modl = imp.load_source('read_model_yaml', '../../read_model_yaml.py')

In [10]:
a = modl.read_model_yaml("../../model/spec/SS/SS_RF_1.yaml")

In [11]:
a.keys()


Out[11]:
dict_keys(['label_colname', 'test_data_source', 'simulations', 'train_data_source', 'parameters', 'author', 'model_type', 'holdout_data_source', 'predict'])

In [12]:
a.values()


Out[12]:
dict_values(['label_granular', 'clean_test.csv', False, 'clean_train.csv', {'criterion': 'gini', 'max_depth': 2, 'n_estimators': 10, 'max_features': 'auto', 'n_jobs': 1}, 'SHAMINDRA', 'RF', 'clean_holdout.csv', False])

In [13]:
# Run model
import imp
import yaml
import sys
modl = imp.load_source('read_model_yaml', '../read_model_yaml.py')

inp_yaml = sys.argv[1]

def write(filename,results,labels,level=1):
    """
    Write results into csv file.
    
    Parameters
    ----------
    filename : string
        filename to output the result
    results : list or numpy array
        results of some simulation
    labels : list
        labels for the results, i.e. names of parameters and metrics
    level : int, either 1 or 2
        first dimension of the 'results' array
        
    """
    ## # Write into csv file
    with open(filename, 'wb') as f:
        writer = csv.writer(f)
        writer.writerow(labels)
        if level == 2:
            writer.writerows(results)
        else:
            writer.writerow(results)

    

def run_model(inp_yaml):
    """Apply trees in the forest to X, return leaf indices.
        Parameters
        ----------
        inp_yaml : A yaml file with model specifications

        Returns
        -------
        parameters_dict : A python dictionary with the model specifications
                          to be used to encode metadata for the model
                          and pass into specific model functions e.g. random
                          forest
        """    
    
    
    # TODO: Fix this
    X_train = 
    Y_train = 
    X_test  = 
    Y_test  = 
    
    yaml_params = modl.read_model_yaml(inp_yaml)
    
    # TODO: define output file name
    filename = 
    
    if yaml_params["model_type"] = "RF":
        n_estimators  = yaml_params["parameters"]["n_estimators"]
        criterion     = yaml_params["parameters"]["criterion"]   
        max_features  = yaml_params["parameters"]["max_features"]          
        max_depth     = yaml_params["parameters"]["max_depth"]      
        n_jobs        = yaml_params["parameters"]["n_jobs"]
        
        # Define labels of output
        labels = ["logloss",
                  "miss_err",
                  "prec",
                  "recall",
                  "f1",
                  "C",
                  "n_estimators",
                  "criterion",   
                  "max_features",
                  "max_depth"]
        
        # Run many simulations in parallel using as many cores as necessary
        if yaml_params["simulations"]:
            # Run simulation
            result = rf_simulation(X_train      = X_train,
                                  Y_train      = Y_train,
                                  X_test       = X_test,
                                  Y_test       = Y_test,
                                  n_estimators = n_estimators,
                                  criterion    = criterion,
                                  max_features = max_features,
                                  max_depth    = max_depth)
            # Write into csv
            write(filename,result,labels,level=1)
        
        # Run a single simulation
        else:
            # Run simulation
            result = rf(X_train      = X_train,
                       Y_train      = Y_train,
                       X_test       = X_test,
                       Y_test       = Y_test,
                       n_estimators = n_estimators,
                       criterion    = criterion,
                       max_features = max_features,
                       max_depth    = max_depth)
            # Write into csv
            write(filename,result,labels,level=2)
        

# Run the model!    
run_model(inp_yaml = inp_yaml)


  File "<ipython-input-13-44044386c4ef>", line 25
    X_train =
              ^
SyntaxError: invalid syntax

In [ ]: