In [1]:
from corpus_characterizer import generator_chunker
import numpy as np
import os
# from sklearn.metrics import mean_squared_error,mean_absolute_error, median_absolute_error, mean_squared_log_error, explained_variance_score, r2_score
from sklearn.metrics import mean_squared_error, mean_absolute_error, median_absolute_error, explained_variance_score, \
    r2_score
import pandas as pd
from scipy.stats import describe, kurtosistest, skewtest, normaltest
from Conv1D_LSTM_Ensemble import pair_generator_1dconv_lstm_bagged
from Conv1D_ActivationSearch_BigLoop import pair_generator_1dconv_lstm #NOT BAGGED

# @@@@@@@@@@@@@@ RELATIVE PATHS @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
Base_Path = "./"
image_path = "./images/"
train_path = "./train/"
test_path = "./test/"
analysis_path = "./analysis/"
chunker_path = analysis_path + "chunker/"
preds_path = analysis_path + "preds/"
results_aggregation_path = analysis_path + "results_to_aggregate/"

#create MAIN dataframe (all models and all results) for those without MSE
#create MAIN dataframe with MSE 
#colnames: mape, mse,mae {mean, med, stdev}, metric 1 

#results to aggregate 
results_files_list = list(os.listdir(results_aggregation_path))
counter_110 = 0
counter_6_cols = 0
list_all_ok = []
list_not_ok = []
list_of_shapes = []
correct_colname_list = ['acc','loss','mape','mse','mae','filename']
colname_list_no_mse = ['acc','loss','mape','mae','filename']
#print(results_files_list)
for results_file in results_files_list:
    complete_filename = results_aggregation_path + results_file
    interm_df = pd.read_csv(complete_filename)
#     if 'filename' not in interm_df.columns:
#         print("TROUBLE", str(results_file), interm_df.columns)
    if len(set(correct_colname_list) - set(interm_df.columns)) != 0:
        print("TROUBLE, NOT ALL COLNAMES ARE IN ", str(results_file), interm_df.columns)
        print("the missing part is:", set(correct_colname_list) - set(interm_df.columns))
    if interm_df.shape[0] >= 110:
        counter_110 += 1
    if interm_df.shape[1] == 6:
        counter_6_cols += 1
    if interm_df.shape[0] == 110 and interm_df.shape[1] == 6:
        list_all_ok.append(results_file)
    if interm_df.shape[0] != 110 or interm_df.shape[1] != 6:
        print(str(results_file), interm_df.shape[0],interm_df.shape[1])
        list_not_ok.append(results_file)
    if interm_df.shape not in list_of_shapes:
        list_of_shapes.append(interm_df.shape)
    #todo: if there are 7 columns, drop the first one. read_csv usecols
for special_file in list_not_ok:
    complete_special_file = results_aggregation_path + str(special_file)
    interm_df = pd.read_csv(complete_special_file)
    if interm_df.shape[0] != 110 or interm_df.shape[1] != 6:
        print(str(special_file), interm_df.shape[0],interm_df.shape[1])
    if 'filename' not in interm_df.columns:
        print("TROUBLE", str(special_file), interm_df.columns)

print("number of files", len(results_files_list))
print("counter 6 cols", counter_6_cols)
print("counter 110", counter_110)
print("list NOT ok", len(list_not_ok), list_not_ok)
print("list of df shapes", list_of_shapes)
# #folder
# #list dir
# # for each item
# #declare new INTERMEDIATE df
# #load each model's result into INTERMEDIATE df #check for file length.. some are 109 and some are shorter. 
# #aggregate, stdev,avg,median of INTERMEDIATE df

# #save MAIN pandas df as a csv


Using Theano backend.
('TROUBLE, NOT ALL COLNAMES ARE IN ', 'combi_scores_rf5_ws___bag_convdblfilters_lstm_nodense_tiny_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', Index([u'seq_name', u'r2', u'mse', u'mae'], dtype='object'))
('the missing part is:', set(['acc', 'loss', 'filename', 'mape']))
('combi_scores_rf5_ws___bag_convdblfilters_lstm_nodense_tiny_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', 108, 4)
('TROUBLE, NOT ALL COLNAMES ARE IN ', 'combi_scores_linreg_model__tiny_bidir_1sd.csv', Index([u'seq_name', u'r2', u'mse', u'mae'], dtype='object'))
('the missing part is:', set(['acc', 'loss', 'filename', 'mape']))
('combi_scores_linreg_model__tiny_bidir_1sd.csv', 108, 4)
('TROUBLE, NOT ALL COLNAMES ARE IN ', 'combi_scores_et30_ws___bag_convdblfilters_lstm_nodense_tiny_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', Index([u'seq_name', u'r2', u'mse', u'mae'], dtype='object'))
('the missing part is:', set(['acc', 'loss', 'filename', 'mape']))
('combi_scores_et30_ws___bag_convdblfilters_lstm_nodense_tiny_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', 108, 4)
('TROUBLE, NOT ALL COLNAMES ARE IN ', 'scores_conv__tree_medbidir_nodense_fv1c_slowlr_relu_ca_sigmoid_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', Index([u'Unnamed: 0', u'loss', u'lstm_output_mean_absolute_error', u'mae',
       u'lstm_output_loss', u'mse', u'seq_name',
       u'lstm_output_mean_squared_logarithmic_error',
       u'lstm_output_mean_absolute_percentage_error',
       u'lstm_output_mean_squared_error', u'msle', u'mape',
       u'combined_output_loss'],
      dtype='object'))
('the missing part is:', set(['acc', 'filename']))
('scores_conv__tree_medbidir_nodense_fv1c_slowlr_relu_ca_sigmoid_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', 108, 13)
('TROUBLE, NOT ALL COLNAMES ARE IN ', 'scores_lstm_rerun__bidir100ea_fv1c_.csv', Index([u'Unnamed: 0', u'loss', u'msle', u'mae', u'mape', u'seq_name', u'mse'], dtype='object'))
('the missing part is:', set(['acc', 'filename']))
('scores_lstm_rerun__bidir100ea_fv1c_.csv', 108, 7)
('TROUBLE, NOT ALL COLNAMES ARE IN ', 'scores_sk_conv_lstm_bagged__bag_conv_lstm_dense_tiny_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', Index([u'Unnamed: 0', u'seq_name', u'mse', u'mse_f3', u'mae', u'mae_f3'], dtype='object'))
('the missing part is:', set(['acc', 'loss', 'filename', 'mape']))
('scores_sk_conv_lstm_bagged__bag_conv_lstm_dense_tiny_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', 108, 6)
('TROUBLE, NOT ALL COLNAMES ARE IN ', 'combi_scores_fv1cridgecholesky_50sd.csv', Index([u'seq_name', u'r2', u'mse', u'mae'], dtype='object'))
('the missing part is:', set(['acc', 'loss', 'filename', 'mape']))
('combi_scores_fv1cridgecholesky_50sd.csv', 108, 4)
('TROUBLE, NOT ALL COLNAMES ARE IN ', 'combi_scores_elasticnet__bag_convdblfilters_lstm_nodense_tiny_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', Index([u'seq_name', u'r2', u'mse', u'mae'], dtype='object'))
('the missing part is:', set(['acc', 'loss', 'filename', 'mape']))
('combi_scores_elasticnet__bag_convdblfilters_lstm_nodense_tiny_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', 108, 4)
('TROUBLE, NOT ALL COLNAMES ARE IN ', 'combi_scores_fv1cridgesaga_50sd.csv', Index([u'seq_name', u'r2', u'mse', u'mae'], dtype='object'))
('the missing part is:', set(['acc', 'loss', 'filename', 'mape']))
('combi_scores_fv1cridgesaga_50sd.csv', 108, 4)
('TROUBLE, NOT ALL COLNAMES ARE IN ', 'combi_scores_fv1c_elasticnet_50sd.csv', Index([u'seq_name', u'r2', u'mse', u'mae'], dtype='object'))
('the missing part is:', set(['acc', 'loss', 'filename', 'mape']))
('combi_scores_fv1c_elasticnet_50sd.csv', 108, 4)
('TROUBLE, NOT ALL COLNAMES ARE IN ', 'scores_lstm_rerun__tiny_bidir_.csv', Index([u'Unnamed: 0', u'loss', u'msle', u'mae', u'mape', u'seq_name', u'mse'], dtype='object'))
('the missing part is:', set(['acc', 'filename']))
('scores_lstm_rerun__tiny_bidir_.csv', 108, 7)
('TROUBLE, NOT ALL COLNAMES ARE IN ', 'scores_conv__tree_tinybidir_nodense_fv1c_slowlr_relu_ca_sigmoid_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', Index([u'Unnamed: 0', u'loss', u'lstm_output_mean_absolute_error', u'mae',
       u'lstm_output_loss', u'mse', u'seq_name',
       u'lstm_output_mean_squared_logarithmic_error',
       u'lstm_output_mean_absolute_percentage_error',
       u'lstm_output_mean_squared_error',
       u'combined_output_mean_squared_logarithmic_error', u'mape',
       u'combined_output_loss'],
      dtype='object'))
('the missing part is:', set(['acc', 'filename']))
('scores_conv__tree_tinybidir_nodense_fv1c_slowlr_relu_ca_sigmoid_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', 108, 13)
('TROUBLE, NOT ALL COLNAMES ARE IN ', 'scores_conv_lstm_bagged__bag_conv_lstm_dense_tiny_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', Index([u'Unnamed: 0', u'loss', u'lstm_output_mean_absolute_error',
       u'lstm_output_acc', u'mae', u'lstm_output_loss', u'mse',
       u'lstm_output_mean_squared_logarithmic_error',
       u'lstm_output_mean_squared_error',
       u'lstm_output_mean_absolute_percentage_error', u'seq_name',
       u'combined_output_acc', u'mape', u'msle', u'combined_output_loss'],
      dtype='object'))
('the missing part is:', set(['acc', 'filename']))
('scores_conv_lstm_bagged__bag_conv_lstm_dense_tiny_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', 108, 15)
('TROUBLE, NOT ALL COLNAMES ARE IN ', 'scores_lstm_rerun__big_bidir_.csv', Index([u'Unnamed: 0', u'loss', u'msle', u'mae', u'mape', u'seq_name', u'mse'], dtype='object'))
('the missing part is:', set(['acc', 'filename']))
('scores_lstm_rerun__big_bidir_.csv', 108, 7)
('TROUBLE, NOT ALL COLNAMES ARE IN ', 'combi_scores_fv1clinreg_50sd.csv', Index([u'seq_name', u'r2', u'mse', u'mae'], dtype='object'))
('the missing part is:', set(['acc', 'loss', 'filename', 'mape']))
('combi_scores_fv1clinreg_50sd.csv', 108, 4)
('TROUBLE, NOT ALL COLNAMES ARE IN ', 'combi_scores_rf30_ws__model__tiny_bidir_.csv', Index([u'seq_name', u'r2', u'mse', u'mae'], dtype='object'))
('the missing part is:', set(['acc', 'loss', 'filename', 'mape']))
('combi_scores_rf30_ws__model__tiny_bidir_.csv', 108, 4)
('TROUBLE, NOT ALL COLNAMES ARE IN ', 'combi_scores_elasticnet_model__tiny_bidir_20sd.csv', Index([u'seq_name', u'r2', u'mse', u'mae'], dtype='object'))
('the missing part is:', set(['acc', 'loss', 'filename', 'mape']))
('combi_scores_elasticnet_model__tiny_bidir_20sd.csv', 108, 4)
('TROUBLE, NOT ALL COLNAMES ARE IN ', 'combi_scores_fv1crf60_ws__.csv', Index([u'seq_name', u'r2', u'mse', u'mae'], dtype='object'))
('the missing part is:', set(['acc', 'loss', 'filename', 'mape']))
('combi_scores_fv1crf60_ws__.csv', 108, 4)
('TROUBLE, NOT ALL COLNAMES ARE IN ', 'combi_scores_ridgesaga_model__tiny_bidir_20sd.csv', Index([u'seq_name', u'r2', u'mse', u'mae'], dtype='object'))
('the missing part is:', set(['acc', 'loss', 'filename', 'mape']))
('combi_scores_ridgesaga_model__tiny_bidir_20sd.csv', 108, 4)
('TROUBLE, NOT ALL COLNAMES ARE IN ', 'scores_sk_conv_lstm_bagged__bag_conv_lstm_nodense_micro_128d_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', Index([u'Unnamed: 0', u'seq_name', u'mse', u'mse_f3', u'mae', u'mae_f3'], dtype='object'))
('the missing part is:', set(['acc', 'loss', 'filename', 'mape']))
('scores_sk_conv_lstm_bagged__bag_conv_lstm_nodense_micro_128d_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', 108, 6)
('TROUBLE, NOT ALL COLNAMES ARE IN ', 'combi_scores_et5_ws__model__tiny_bidir_20sd.csv', Index([u'seq_name', u'r2', u'mse', u'mae'], dtype='object'))
('the missing part is:', set(['acc', 'loss', 'filename', 'mape']))
('combi_scores_et5_ws__model__tiny_bidir_20sd.csv', 108, 4)
('combi_scores_rf5_ws___bag_convdblfilters_lstm_nodense_tiny_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', 108, 4)
('TROUBLE', 'combi_scores_rf5_ws___bag_convdblfilters_lstm_nodense_tiny_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', Index([u'seq_name', u'r2', u'mse', u'mae'], dtype='object'))
('combi_scores_linreg_model__tiny_bidir_1sd.csv', 108, 4)
('TROUBLE', 'combi_scores_linreg_model__tiny_bidir_1sd.csv', Index([u'seq_name', u'r2', u'mse', u'mae'], dtype='object'))
('combi_scores_et30_ws___bag_convdblfilters_lstm_nodense_tiny_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', 108, 4)
('TROUBLE', 'combi_scores_et30_ws___bag_convdblfilters_lstm_nodense_tiny_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', Index([u'seq_name', u'r2', u'mse', u'mae'], dtype='object'))
('scores_conv__tree_medbidir_nodense_fv1c_slowlr_relu_ca_sigmoid_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', 108, 13)
('TROUBLE', 'scores_conv__tree_medbidir_nodense_fv1c_slowlr_relu_ca_sigmoid_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', Index([u'Unnamed: 0', u'loss', u'lstm_output_mean_absolute_error', u'mae',
       u'lstm_output_loss', u'mse', u'seq_name',
       u'lstm_output_mean_squared_logarithmic_error',
       u'lstm_output_mean_absolute_percentage_error',
       u'lstm_output_mean_squared_error', u'msle', u'mape',
       u'combined_output_loss'],
      dtype='object'))
('scores_lstm_rerun__bidir100ea_fv1c_.csv', 108, 7)
('TROUBLE', 'scores_lstm_rerun__bidir100ea_fv1c_.csv', Index([u'Unnamed: 0', u'loss', u'msle', u'mae', u'mape', u'seq_name', u'mse'], dtype='object'))
('scores_sk_conv_lstm_bagged__bag_conv_lstm_dense_tiny_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', 108, 6)
('TROUBLE', 'scores_sk_conv_lstm_bagged__bag_conv_lstm_dense_tiny_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', Index([u'Unnamed: 0', u'seq_name', u'mse', u'mse_f3', u'mae', u'mae_f3'], dtype='object'))
('combi_scores_fv1cridgecholesky_50sd.csv', 108, 4)
('TROUBLE', 'combi_scores_fv1cridgecholesky_50sd.csv', Index([u'seq_name', u'r2', u'mse', u'mae'], dtype='object'))
('combi_scores_elasticnet__bag_convdblfilters_lstm_nodense_tiny_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', 108, 4)
('TROUBLE', 'combi_scores_elasticnet__bag_convdblfilters_lstm_nodense_tiny_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', Index([u'seq_name', u'r2', u'mse', u'mae'], dtype='object'))
('combi_scores_fv1cridgesaga_50sd.csv', 108, 4)
('TROUBLE', 'combi_scores_fv1cridgesaga_50sd.csv', Index([u'seq_name', u'r2', u'mse', u'mae'], dtype='object'))
('combi_scores_fv1c_elasticnet_50sd.csv', 108, 4)
('TROUBLE', 'combi_scores_fv1c_elasticnet_50sd.csv', Index([u'seq_name', u'r2', u'mse', u'mae'], dtype='object'))
('scores_lstm_rerun__tiny_bidir_.csv', 108, 7)
('TROUBLE', 'scores_lstm_rerun__tiny_bidir_.csv', Index([u'Unnamed: 0', u'loss', u'msle', u'mae', u'mape', u'seq_name', u'mse'], dtype='object'))
('scores_conv__tree_tinybidir_nodense_fv1c_slowlr_relu_ca_sigmoid_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', 108, 13)
('TROUBLE', 'scores_conv__tree_tinybidir_nodense_fv1c_slowlr_relu_ca_sigmoid_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', Index([u'Unnamed: 0', u'loss', u'lstm_output_mean_absolute_error', u'mae',
       u'lstm_output_loss', u'mse', u'seq_name',
       u'lstm_output_mean_squared_logarithmic_error',
       u'lstm_output_mean_absolute_percentage_error',
       u'lstm_output_mean_squared_error',
       u'combined_output_mean_squared_logarithmic_error', u'mape',
       u'combined_output_loss'],
      dtype='object'))
('scores_conv_lstm_bagged__bag_conv_lstm_dense_tiny_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', 108, 15)
('TROUBLE', 'scores_conv_lstm_bagged__bag_conv_lstm_dense_tiny_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', Index([u'Unnamed: 0', u'loss', u'lstm_output_mean_absolute_error',
       u'lstm_output_acc', u'mae', u'lstm_output_loss', u'mse',
       u'lstm_output_mean_squared_logarithmic_error',
       u'lstm_output_mean_squared_error',
       u'lstm_output_mean_absolute_percentage_error', u'seq_name',
       u'combined_output_acc', u'mape', u'msle', u'combined_output_loss'],
      dtype='object'))
('scores_lstm_rerun__big_bidir_.csv', 108, 7)
('TROUBLE', 'scores_lstm_rerun__big_bidir_.csv', Index([u'Unnamed: 0', u'loss', u'msle', u'mae', u'mape', u'seq_name', u'mse'], dtype='object'))
('combi_scores_fv1clinreg_50sd.csv', 108, 4)
('TROUBLE', 'combi_scores_fv1clinreg_50sd.csv', Index([u'seq_name', u'r2', u'mse', u'mae'], dtype='object'))
('combi_scores_rf30_ws__model__tiny_bidir_.csv', 108, 4)
('TROUBLE', 'combi_scores_rf30_ws__model__tiny_bidir_.csv', Index([u'seq_name', u'r2', u'mse', u'mae'], dtype='object'))
('combi_scores_elasticnet_model__tiny_bidir_20sd.csv', 108, 4)
('TROUBLE', 'combi_scores_elasticnet_model__tiny_bidir_20sd.csv', Index([u'seq_name', u'r2', u'mse', u'mae'], dtype='object'))
('combi_scores_fv1crf60_ws__.csv', 108, 4)
('TROUBLE', 'combi_scores_fv1crf60_ws__.csv', Index([u'seq_name', u'r2', u'mse', u'mae'], dtype='object'))
('combi_scores_ridgesaga_model__tiny_bidir_20sd.csv', 108, 4)
('TROUBLE', 'combi_scores_ridgesaga_model__tiny_bidir_20sd.csv', Index([u'seq_name', u'r2', u'mse', u'mae'], dtype='object'))
('scores_sk_conv_lstm_bagged__bag_conv_lstm_nodense_micro_128d_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', 108, 6)
('TROUBLE', 'scores_sk_conv_lstm_bagged__bag_conv_lstm_nodense_micro_128d_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', Index([u'Unnamed: 0', u'seq_name', u'mse', u'mse_f3', u'mae', u'mae_f3'], dtype='object'))
('combi_scores_et5_ws__model__tiny_bidir_20sd.csv', 108, 4)
('TROUBLE', 'combi_scores_et5_ws__model__tiny_bidir_20sd.csv', Index([u'seq_name', u'r2', u'mse', u'mae'], dtype='object'))
('number of files', 21)
('counter 6 cols', 2)
('counter 110', 0)
('list NOT ok', 21, ['combi_scores_rf5_ws___bag_convdblfilters_lstm_nodense_tiny_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', 'combi_scores_linreg_model__tiny_bidir_1sd.csv', 'combi_scores_et30_ws___bag_convdblfilters_lstm_nodense_tiny_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', 'scores_conv__tree_medbidir_nodense_fv1c_slowlr_relu_ca_sigmoid_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', 'scores_lstm_rerun__bidir100ea_fv1c_.csv', 'scores_sk_conv_lstm_bagged__bag_conv_lstm_dense_tiny_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', 'combi_scores_fv1cridgecholesky_50sd.csv', 'combi_scores_elasticnet__bag_convdblfilters_lstm_nodense_tiny_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', 'combi_scores_fv1cridgesaga_50sd.csv', 'combi_scores_fv1c_elasticnet_50sd.csv', 'scores_lstm_rerun__tiny_bidir_.csv', 'scores_conv__tree_tinybidir_nodense_fv1c_slowlr_relu_ca_sigmoid_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', 'scores_conv_lstm_bagged__bag_conv_lstm_dense_tiny_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', 'scores_lstm_rerun__big_bidir_.csv', 'combi_scores_fv1clinreg_50sd.csv', 'combi_scores_rf30_ws__model__tiny_bidir_.csv', 'combi_scores_elasticnet_model__tiny_bidir_20sd.csv', 'combi_scores_fv1crf60_ws__.csv', 'combi_scores_ridgesaga_model__tiny_bidir_20sd.csv', 'scores_sk_conv_lstm_bagged__bag_conv_lstm_nodense_micro_128d_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv', 'combi_scores_et5_ws__model__tiny_bidir_20sd.csv'])
('list of df shapes', [(108, 4), (108, 13), (108, 7), (108, 6), (108, 15)])

In [2]:
#create MAIN dataframe (all models and all results) for those without MSE
no_mse_main_colnames =['results_filename','mae_avg','mae_med','mae_stdev','mape_avg','mape_med','mape_stdev','mae_normd_dev']
no_mse_main_df = pd.DataFrame(columns=no_mse_main_colnames)
no_mse_main_df.set_index(keys=['results_filename'],inplace=True)

#create MAIN dataframe with MSE 
mse_main_colnames = ['results_filename','mse_avg','mse_med','mse_stdev','mae_avg','mae_med','mae_stdev','mae_normd_dev']
mse_main_df = pd.DataFrame(columns=mse_main_colnames)
mse_main_df.set_index(keys=['results_filename'], inplace=True)
#colnames: mape, mse,mae {mean, med, stdev}, metric 1 

#just do it a second time becausethe clutter is too much
for results_file in results_files_list:
    interm_df = pd.read_csv(results_aggregation_path + results_file)
    print("results file: ", str(results_file))
    #check if it has mse or not
    if 'mse' not in interm_df.columns: #aggregate into the no_mse_main
        keys_to_aggregate = ['mae','mape']
        for key in keys_to_aggregate:
            key_avg_colname = str(key) + "_" + "avg"
            key_med_colname = str(key) + "_" + "med"
            key_std_colname = str(key) + "_" + "stdev"
#             no_mse_main_df.loc[str(results_file),[key_avg_colname]] = interm_df.loc[:,[str(key)]].mean(axis=1).values
#             no_mse_main_df.loc[str(results_file),[key_med_colname]] = interm_df.loc[:,[str(key)]].median(axis=1).values
#             no_mse_main_df.loc[str(results_file),[key_std_colname]] = interm_df.loc[:,[str(key)]].std(axis=1).values
            no_mse_main_df.loc[str(results_file),[key_avg_colname]] = interm_df[str(key)].mean()
            no_mse_main_df.loc[str(results_file),[key_med_colname]] = interm_df[str(key)].median()
            no_mse_main_df.loc[str(results_file),[key_std_colname]] = interm_df[str(key)].std()
            
    if 'mse' in interm_df.columns: #aggregate into the mse_main
        keys_to_aggregate = ['mse','mae'] #I took mape out. 
        for key in keys_to_aggregate:
            key_avg_colname = str(key) + "_" + "avg"
            key_med_colname = str(key) + "_" + "med"
            key_std_colname = str(key) + "_" + "stdev"
#             mse_main_df.loc[str(results_file),[key_avg_colname]] = interm_df.loc[:,[str(key)]].mean(axis=1).values
#             mse_main_df.loc[str(results_file),[key_med_colname]] = interm_df.loc[:,[str(key)]].median(axis=1).values
#             mse_main_df.loc[str(results_file),[key_std_colname]] = interm_df.loc[:,[str(key)]].std(axis=1).values
            mse_main_df.loc[str(results_file),[key_avg_colname]] = interm_df[str(key)].mean()
            mse_main_df.loc[str(results_file),[key_med_colname]] = interm_df[str(key)].median()
            mse_main_df.loc[str(results_file),[key_std_colname]] = interm_df[str(key)].std()

            
#mse_main_df.set_index(keys=['results_filename'],inplace=True) #inplace True deletes the column I used as index..
mse_main_df.sort_values(by=['mae_avg'],inplace=True)
mse_main_df.loc[:,['mae_normd_dev']] = (mse_main_df.loc[:,['mae_med']].values - mse_main_df.loc[:,['mae_avg']].values)/ (mse_main_df.loc[:,['mae_stdev']].values)
print("mse main df's head", mse_main_df.head(2))

#no_mse_main_df.set_index(keys=['results_filename'],inplace=True)
no_mse_main_df.sort_values(by=['mae_avg'],inplace=True)
no_mse_main_df.loc[:,['mae_normd_dev']] = (no_mse_main_df.loc[:,['mae_med']].values - no_mse_main_df.loc[:,['mae_avg']].values)/ (no_mse_main_df.loc[:,['mae_stdev']].values)

#no_mse_main_df.reset_index
print("no mse main df's head", no_mse_main_df.head(2))

no_mse_main_df.to_csv("./analysis/no_mse_main_df.csv")
mse_main_df.to_csv("./analysis/mse_main_df.csv")
        #mean(axis=1)
        #median(axis=1)
        #std(axis=1)
        #aggregate and stuff into the no_mse_main_df


('results file: ', 'combi_scores_rf5_ws___bag_convdblfilters_lstm_nodense_tiny_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv')
('results file: ', 'combi_scores_linreg_model__tiny_bidir_1sd.csv')
('results file: ', 'combi_scores_et30_ws___bag_convdblfilters_lstm_nodense_tiny_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv')
('results file: ', 'scores_conv__tree_medbidir_nodense_fv1c_slowlr_relu_ca_sigmoid_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv')
('results file: ', 'scores_lstm_rerun__bidir100ea_fv1c_.csv')
('results file: ', 'scores_sk_conv_lstm_bagged__bag_conv_lstm_dense_tiny_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv')
('results file: ', 'combi_scores_fv1cridgecholesky_50sd.csv')
('results file: ', 'combi_scores_elasticnet__bag_convdblfilters_lstm_nodense_tiny_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv')
('results file: ', 'combi_scores_fv1cridgesaga_50sd.csv')
('results file: ', 'combi_scores_fv1c_elasticnet_50sd.csv')
('results file: ', 'scores_lstm_rerun__tiny_bidir_.csv')
('results file: ', 'scores_conv__tree_tinybidir_nodense_fv1c_slowlr_relu_ca_sigmoid_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv')
('results file: ', 'scores_conv_lstm_bagged__bag_conv_lstm_dense_tiny_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv')
('results file: ', 'scores_lstm_rerun__big_bidir_.csv')
('results file: ', 'combi_scores_fv1clinreg_50sd.csv')
('results file: ', 'combi_scores_rf30_ws__model__tiny_bidir_.csv')
('results file: ', 'combi_scores_elasticnet_model__tiny_bidir_20sd.csv')
('results file: ', 'combi_scores_fv1crf60_ws__.csv')
('results file: ', 'combi_scores_ridgesaga_model__tiny_bidir_20sd.csv')
('results file: ', 'scores_sk_conv_lstm_bagged__bag_conv_lstm_nodense_micro_128d_shufstart_relu_ca_tanh_da_3_cbd_standard_per_batch_sclr_l1l2_kr_HLR.csv')
('results file: ', 'combi_scores_et5_ws__model__tiny_bidir_20sd.csv')
("mse main df's head",                                                        mse_avg     mse_med  \
results_filename                                                             
combi_scores_elasticnet__bag_convdblfilters_lst...  0.00904585  0.00908431   
combi_scores_elasticnet_model__tiny_bidir_20sd.csv  0.00904648  0.00908492   

                                                      mse_stdev     mae_avg  \
results_filename                                                              
combi_scores_elasticnet__bag_convdblfilters_lst...  0.000280291  0.00827828   
combi_scores_elasticnet_model__tiny_bidir_20sd.csv  0.000280288  0.00888981   

                                                       mae_med    mae_stdev  \
results_filename                                                              
combi_scores_elasticnet__bag_convdblfilters_lst...  0.00846952  0.000453919   
combi_scores_elasticnet_model__tiny_bidir_20sd.csv  0.00908113  0.000454009   

                                                   mae_normd_dev  
results_filename                                                  
combi_scores_elasticnet__bag_convdblfilters_lst...      0.421319  
combi_scores_elasticnet_model__tiny_bidir_20sd.csv      0.421414  )
("no mse main df's head", Empty DataFrame
Columns: [mae_avg, mae_med, mae_stdev, mape_avg, mape_med, mape_stdev, mae_normd_dev]
Index: [])

In [3]:
#TODO: pca whiten=True.
##################################################### OLD STUFF BELOW
# #load into generator
#
#
# # instantiate variables
# mse_cumulative = 0.0
# mse_at_instance = 0.0
# mse_average = 0.0
# mse_at_instance_list = []
# mse_average_list = []
# mse_cumulative_list = []
#
# mae_cumulative = 0.0
# mae_at_instance = 0.0
# mae_average = 0.0
# mae_at_instance_list = []
# mae_average_list = []
# mae_cumulative_list = []
#
# # med_ae_cumulative = 0.0
# # med_ae_at_instance = 0.0
# # med_ae_average = 0.0
# # med_ae_at_instance_list = []
# # med_ae_average_list = []
# # med_ae_cumulative_list = []
#
# msle_cumulative = 0.0
# msle_at_instance = 0.0
# msle_average = 0.0
# msle_at_instance_list = []
# msle_average_list = []
# msle_cumulative_list = []
#
# evs_cumulative = 0.0
# evs_at_instance = 0.0
# evs_average = 0.0
# evs_at_instance_list = []
# evs_average_list = []
# evs_cumulative_list = []
#
# r2_cumulative = 0.0
# r2_at_instance = 0.0
# r2_average = 0.0
# r2_at_instance_list = []
# r2_average_list = []
# r2_cumulative_list = []
#
# # for index_to_load in range(0,2):
# for index_to_load in range(0, len(test_seqs_filenames)):
#
#     mse_cumulative = 0.0
#     mse_at_instance = 0.0
#     mse_average = 0.0
#     mse_at_instance_list = []
#     mse_average_list = []
#     mse_cumulative_list = []
#
#     mae_cumulative = 0.0
#     mae_at_instance = 0.0
#     mae_average = 0.0
#     mae_at_instance_list = []
#     mae_average_list = []
#     mae_cumulative_list = []
#
#     # med_ae_cumulative = 0.0
#     # med_ae_at_instance = 0.0
#     # med_ae_average = 0.0
#     # med_ae_at_instance_list = []
#     # med_ae_average_list = []
#     # med_ae_cumulative_list = []
#
#     # msle_cumulative = 0.0
#     # msle_at_instance = 0.0
#     # msle_average = 0.0
#     # msle_at_instance_list = []
#     # msle_average_list = []
#     # msle_cumulative_list = []
#
#     evs_cumulative = 0.0
#     evs_at_instance = 0.0
#     evs_average = 0.0
#     evs_at_instance_list = []
#     evs_average_list = []
#     evs_cumulative_list = []
#
#     r2_cumulative = 0.0
#     r2_at_instance = 0.0
#     r2_average = 0.0
#     r2_at_instance_list = []
#     r2_average_list = []
#     r2_cumulative_list = []
#
#     files = combined_filenames[index_to_load]
#     print("files: {}".format(files))
#     preds_load_path = preds_path + files[0]
#     test_label_load_path = test_labels_path + files[1]
#     preds_array_temp = np.load(preds_load_path)
#     label_test_array = np.load(test_label_load_path)
#     print("before changing. preds shape: {}, label shape: {}".format(preds_array_temp.shape, label_test_array.shape))
#     if label_test_array.shape[1] > 5:
#         label_test_array = label_test_array[:, 1:]
#
#     # TODO: reshape the preds.
#     preds_array = np.reshape(preds_array_temp, newshape=(preds_array_temp.shape[1], 4))
#     identifier = files[1][:-4]
#     mse_full = mean_squared_error(y_pred=preds_array, y_true=label_test_array[0:preds_array.shape[0], 1:])
#     # mse_full_vw = mean_squared_error(y_pred=preds_array,y_true=label_test_array[0:preds_array.shape[0],1:],multioutput='variance_weighted')
#     mae_full = mean_absolute_error(y_pred=preds_array, y_true=label_test_array[0:preds_array.shape[0], 1:])
#     # mae_full_vw = mean_absolute_error(y_pred=preds_array, y_true=label_test_array[0:preds_array.shape[0],1:],multioutput='variance_weighted')
#     r2_full = r2_score(y_pred=preds_array, y_true=label_test_array[0:preds_array.shape[0], 1:])
#     # r2_full_vw = r2_score(y_pred=preds_array, y_true=label_test_array[0:preds_array.shape[0],1:],multioutput='variance_weighted')
#     evs_full = explained_variance_score(y_pred=preds_array, y_true=label_test_array[0:preds_array.shape[0], 1:])
#     # evs_full_vw = explained_variance_score(y_pred=preds_array, y_true=label_test_array[0:preds_array.shape[0],1:],multioutput='variance_weighted')
#
#     # if train_array.shape[1] > 11:
#     #     train_array = train_array[:,1:]
#
#     # identifier = files[0][:-4]
#
#     # TODO load predictions
#     # TODO load labels
#     # initialize two sklearn metrics
#
#     # loss_cumulative = loss_temp + mean_squared_error(y_true=label_train_array,y_pred=train_array[:,-4:])
#     # loss_at_instance = mean_squared_error(y_true=None,y_pred=None)
#     # loss_average = loss_cumulative / 5#counter #BASED ON LOSS CUMULATIVE
#     # loss_instance_avg = ?
#
#     chunker_proto_preds = generator_chunker(array_raw=preds_array, chunker_batch_size=CHUNKER_BATCH_TRAVERSAL_SIZE,
#                                             start_at=0,
#                                             scaler_active=False)
#     chunker_proto_label = generator_chunker(array_raw=label_test_array, chunker_batch_size=CHUNKER_BATCH_TRAVERSAL_SIZE,
#                                             start_at=0,
#                                             scaler_active=True, scaler_type='standard_per_batch')
#
#     remaining = CHUNKER_BATCH_SIZE * (preds_array.shape[0] // CHUNKER_BATCH_SIZE)
#     counter = 0
#     # TODO modify this. load both generators and just accumulate the loss.
#     while remaining > 0:
#         counter = counter + 1
#         chunk_preds = chunker_proto_preds.next()
#         # chunk_data = chunk_data[:,-4:] #dummy, just cut the array to the last 4 columns.
#         chunk_label = chunker_proto_label.next()
#         chunk_label = chunk_label[:, 1:]
#
#         # MSE
#         mse_at_instance = mean_squared_error(y_true=chunk_label, y_pred=chunk_preds)
#         mse_at_instance_list.append(mse_at_instance)
#         mse_cumulative = mse_cumulative + mse_at_instance
#         mse_cumulative_list.append(mse_cumulative)
#         mse_average = mse_cumulative / counter
#         mse_average_list.append(mse_average)
#
#         # MAE
#         mae_at_instance = mean_absolute_error(y_true=chunk_label, y_pred=chunk_preds)
#         mae_at_instance_list.append(mae_at_instance)
#         mae_cumulative = mae_cumulative + mae_at_instance
#         mae_cumulative_list.append(mae_cumulative)
#         mae_average = mae_cumulative / counter
#         mae_average_list.append(mae_average)
#
#         # MSLE
#         # msle_at_instance = mean_squared_log_error(y_true=chunk_label,y_pred=chunk_data)
#         # msle_at_instance_list.append(msle_at_instance)
#         # msle_cumulative = msle_cumulative + msle_at_instance
#         # msle_cumulative_list.append(msle_cumulative)
#         # msle_average = msle_cumulative/counter
#         # msle_average_list.append(msle_average)
#
#         # R2
#         r2_at_instance = r2_score(y_true=chunk_label, y_pred=chunk_preds)
#         r2_at_instance_list.append(r2_at_instance)
#         r2_cumulative = r2_cumulative + r2_at_instance
#         r2_cumulative_list.append(r2_cumulative)
#         r2_average = r2_cumulative / counter
#         r2_average_list.append(r2_average)
#
#         # EVS
#         evs_at_instance = explained_variance_score(y_true=chunk_label, y_pred=chunk_preds)
#         evs_at_instance_list.append(evs_at_instance)
#         evs_cumulative = evs_cumulative + evs_at_instance
#         evs_cumulative_list.append(evs_cumulative)
#         evs_average = evs_cumulative / counter
#         evs_average_list.append(evs_average)
#
#         # Med_AE can't do multiple columns at once!
#         # med_ae_at_instance = median_absolute_error(y_true=chunk_label,y_pred=chunk_data) #
#         # med_ae_at_instance_list.append(med_ae_at_instance)
#         # med_ae_cumulative = med_ae_cumulative + med_ae_at_instance
#         # med_ae_cumulative_list.append(med_ae_cumulative)
#         # med_ae_average = med_ae_cumulative/counter
#         # med_ae_average_list.append(med_ae_average)
#
#         print("remaining: {}".format(remaining))
#         remaining = remaining - CHUNKER_BATCH_SIZE
#         # print("data chunk 2: {}".format(chunker_proto_data.next()))
#
#     aggregate_list = []  # saves the aggregated array in a list so the filename saving can be automated
#     aggregate_name_list = []  # since trying to directly access variable names isn't a good idea in python...
#     # MSE MAE MSLE R2 EVS MED_AE
#     print("mse_cumulative: {}".format(mse_cumulative_list))
#     print("mse_average: {}".format(mse_average_list))
#     print("mse_at_instance: {}".format(mse_at_instance_list))
#     assert (len(mse_cumulative_list) == len(mse_average_list) == len(mse_at_instance_list))
#     aggregate_mse = np.empty(shape=(len(mse_cumulative_list), 4))
#     # ORDER IS cumulative - average - at instance
#     aggregate_mse[:, 0] = np.asarray(mse_cumulative_list)
#     aggregate_mse[:, 1] = np.asarray(mse_average_list)
#     aggregate_mse[:, 2] = np.asarray(mse_at_instance_list)
#     aggregate_mse[0, 3] = mse_full
#     # aggregate_mse[1, 3] = mse_full_vw
#     aggregate_list.append(aggregate_mse)
#     aggregate_name_list.append('mse_agg')
#
#     print("mae_cumulative: {}".format(mae_cumulative_list))
#     print("mae_average: {}".format(mae_average_list))
#     print("mae_at_instance: {}".format(mae_at_instance_list))
#     assert (len(mae_cumulative_list) == len(mae_average_list) == len(mae_at_instance_list))
#     aggregate_mae = np.empty(shape=(len(mae_cumulative_list), 4))
#     # ORDER IS cumulative - average - at instance
#     aggregate_mae[:, 0] = np.asarray(mae_cumulative_list)
#     aggregate_mae[:, 1] = np.asarray(mae_average_list)
#     aggregate_mae[:, 2] = np.asarray(mae_at_instance_list)
#     aggregate_mae[0, 3] = mae_full
#     aggregate_mse[1:, 3] = 0.0
#     aggregate_list.append(aggregate_mae)
#     aggregate_name_list.append('mae_agg')
#
#     # print("msle_cumulative: {}".format(msle_cumulative_list))
#     # print("msle_average: {}".format(msle_average_list))
#     # print("msle_at_instance: {}".format(msle_at_instance_list))
#     # assert(len(msle_cumulative_list)==len(msle_average_list)==len(msle_at_instance_list))
#     # aggregate_msle = np.empty(shape=(len(msle_cumulative_list),4))
#     # #ORDER IS cumulative - average - at instance
#     # aggregate_msle[:, 0] = np.asarray(msle_cumulative_list)
#     # aggregate_msle[:, 1] = np.asarray(msle_average_list)
#     # aggregate_msle[:, 2] = np.asarray(msle_at_instance_list)
#     # aggregate_msle[0, 3] = msle_full
#     # aggregate_list.append(aggregate_msle)
#     # aggregate_name_list.append("msle")
#
#     print("r2_cumulative: {}".format(r2_cumulative_list))
#     print("r2_average: {}".format(r2_average_list))
#     print("r2_at_instance: {}".format(r2_at_instance_list))
#     assert (len(r2_cumulative_list) == len(r2_average_list) == len(r2_at_instance_list))
#     aggregate_r2 = np.empty(shape=(len(r2_cumulative_list), 4))
#     # ORDER IS cumulative - average - at instance
#     aggregate_r2[:, 0] = np.asarray(r2_cumulative_list)
#     aggregate_r2[:, 1] = np.asarray(r2_average_list)
#     aggregate_r2[:, 2] = np.asarray(r2_at_instance_list)
#     aggregate_r2[0, 3] = r2_full
#     aggregate_list.append(aggregate_r2)
#     aggregate_name_list.append('r2_agg')
#
#     print("evs_cumulative: {}".format(evs_cumulative_list))
#     print("evs_average: {}".format(evs_average_list))
#     print("evs_at_instance: {}".format(evs_at_instance_list))
#     assert (len(evs_cumulative_list) == len(evs_average_list) == len(evs_at_instance_list))
#     aggregate_evs = np.empty(shape=(len(evs_cumulative_list), 4))
#     # ORDER IS cumulative - average - at instance
#     aggregate_evs[:, 0] = np.asarray(evs_cumulative_list)  # cumulative.
#     aggregate_evs[:, 1] = np.asarray(evs_average_list)
#     aggregate_evs[:, 2] = np.asarray(evs_at_instance_list)
#     aggregate_evs[0, 3] = evs_full
#     aggregate_list.append(aggregate_evs)
#     aggregate_name_list.append('evs_agg')
#
#     if save_arrays == True:
#         for index in range(0, len(aggregate_list)):
#             # get the index of the array in the list of names (the second list)
#             arrayname = aggregate_name_list[index]
#             np.savetxt(fname=chunker_path + arrayname + "_" + str(identifier) + ".csv", delimiter=',',
#                        X=aggregate_list[index], header="cumulative(sum)-average-instance", fmt='%.18e')
#
#
#             # TODO: aggregate and save as a numpy array or a csv.
#
#             # print("med_ae_cumulative: {}".format(med_ae_cumulative_list))
#             # print("med_ae_average: {}".format(med_ae_average_list))
#             # print("med_ae_at_instance: {}".format(med_ae_at_instance_list))
#             # print("label chunk 1: {}".format(chunker_proto_label.next()))
#             # print("label chunk 2: {}".format(chunker_proto_label.next()))
#
#             # if (str(files[0]) == 'sequence_2c_288_9_fv1b.npy') == True:
#             #     plt.clf()
#             #     plt.cla()
#             #     plt.close()
#             #     plt.plot(label_truth[:, 0], '^', label="ground truth", markersize=5)
#             #     plt.plot(y_prediction[:, 0], '.', label="prediction", markersize=4)
#             #     plt.xscale('log')
#             #     plt.xlabel('# Cycle(s)')
#             #     plt.yscale('log')
#             #     plt.ylabel('Value(s)')
#             #     plt.legend()
#             #     plt.xlim((0.75 * (len(y_prediction)), 1 * (len(y_prediction))))
#             #     plt.title('truth vs prediction from 75% - 100% of the sequence on Crack 01')
#             #     plt.grid(True)
#             #     plt.savefig('results_' + str(files[0]) + '_flaw_0_conv_75_100_newmarker_batch' + str(
#             #         generator_batch_size) + '_.png')
#             #
#             #     plt.clf()
#             #     plt.cla()
#             #     plt.close()
#             #     plt.plot(label_truth[:, 1], '^', label="ground truth", markersize=5)
#             #     plt.plot(y_prediction[:, 1], 'v', label="prediction", markersize=4)
#             #     plt.xscale('log')
#             #     plt.xlabel('# Cycle(s)')
#             #     plt.yscale('log')
#             #     plt.ylabel('Value(s)')
#             #     plt.legend()
#             #     plt.xlim((0.75 * (len(y_prediction)), 1 * (len(y_prediction))))
#             #     plt.title('truth vs prediction  from 75% - 100% of the sequence on Crack 02')
#             #     plt.grid(True)
#             #     plt.savefig('results_' + str(files[0]) + '_flaw_1_conv_75_100_newmarker_batch' + str(
#             #         generator_batch_size) + '_.png')
#             #
#             #     plt.clf()
#             #     plt.cla()
#             #     plt.close()
#             #     plt.plot(label_truth[:, 2], '^', label="ground truth", markersize=5)
#             #     plt.plot(y_prediction[:, 2], 'v', label="prediction", markersize=4)
#             #     plt.xscale('log')
#             #     plt.xlabel('# Cycle(s)')
#             #     plt.yscale('log')
#             #     plt.ylabel('Value(s)')
#             #     plt.legend()
#             #     plt.xlim((0.75 * (len(y_prediction)), 1 * (len(y_prediction))))
#             #     plt.title('truth vs prediction  from 75% - 100% of the sequence on Crack 03')
#             #     plt.grid(True)
#             #     plt.savefig('results_' + str(files[0]) + '_flaw_2_conv_75_100_newmarker_batch' + str(
#             #         generator_batch_size) + '_.png')
#             #
#             #     plt.clf()
#             #     plt.cla()
#             #     plt.close()
#             #     plt.plot(label_truth[:, 3], '^', label="ground truth", markersize=5)
#             #     plt.plot(y_prediction[:, 3], 'v', label="prediction", markersize=4)
#             #     plt.xscale('log')
#             #     plt.xlabel('# Cycle(s)')
#             #     plt.yscale('log')
#             #     plt.ylabel('Value(s)')
#             #     plt.legend()
#             #     plt.xlim((0.75 * (len(y_prediction)), 1 * (len(y_prediction))))
#             #     plt.title('truth vs prediction  from 75% - 100% of the sequence on Crack 04')
#             #     plt.grid(True)
#             #     plt.savefig('results_' + str(files[0]) + '_flaw_3_conv_75_100_newmarker_batch' + str(
#             #         generator_batch_size) + '_.png')
#             # DEVIN PLOT CODE
#             # if save_figs == True:
#             #     plt.clf()
#             #     plt.cla()
#             #     plt.close()
#             #     plt.plot(label_truth[:,0],'^',label="ground truth", markersize=5)
#             #     plt.plot(y_prediction[:,0],'.',label="prediction", markersize=4)
#             #     plt.xscale('log')
#             #     plt.xlabel('# Cycle(s)')
#             #     plt.yscale('log')
#             #     plt.ylabel('Value(s)')
#             #     plt.legend()
#             #     plt.xlim((0.5*(len(y_prediction)), 1*(len(y_prediction))))
#             #     plt.title('truth vs prediction from 50% - 100% of the sequence on Crack 01')
#             #     plt.grid(True)
#             #     plt.savefig('results_' + str(files[0]) + '_flaw_0_conv_50_100_newmarker_batch' + str(generator_batch_size) + '_.png')
#             #
#             #     plt.clf()
#             #     plt.cla()
#             #     plt.close()
#             #     plt.plot(label_truth[:,1],'^',label="ground truth", markersize=5)
#             #     plt.plot(y_prediction[:,1],'v',label="prediction", markersize=4)
#             #     plt.xscale('log')
#             #     plt.xlabel('# Cycle(s)')
#             #     plt.yscale('log')
#             #     plt.ylabel('Value(s)')
#             #     plt.legend()
#             #     plt.xlim((0.5*(len(y_prediction)), 1*(len(y_prediction))))
#             #     plt.title('truth vs prediction  from 50% - 100% of the sequence on Crack 02')
#             #     plt.grid(True)
#             #     plt.savefig('results_' + str(files[0]) + '_flaw_1_conv_50_100_newmarker_batch' + str(generator_batch_size) + '_.png')
#             #
#             #     plt.clf()
#             #     plt.cla()
#             #     plt.close()
#             #     plt.plot(label_truth[:,2],'^',label="ground truth", markersize=5)
#             #     plt.plot(y_prediction[:,2],'v',label="prediction", markersize=4)
#             #     plt.xscale('log')
#             #     plt.xlabel('# Cycle(s)')
#             #     plt.yscale('log')
#             #     plt.ylabel('Value(s)')
#             #     plt.legend()
#             #     plt.xlim((0.5*(len(y_prediction)), 1*(len(y_prediction))))
#             #     plt.title('truth vs prediction  from 50% - 100% of the sequence on Crack 03')
#             #     plt.grid(True)
#             #     plt.savefig('results_' + str(files[0]) + '_flaw_2_conv_50_100_newmarker_batch' + str(generator_batch_size) + '_.png')
#             #
#             #     plt.clf()
#             #     plt.cla()
#             #     plt.close()
#             #     plt.plot(label_truth[:,3],'^',label="ground truth", markersize=5)
#             #     plt.plot(y_prediction[:,3],'v',label="prediction", markersize=4)
#             #     plt.xscale('log')
#             #     plt.xlabel('# Cycle(s)')
#             #     plt.yscale('log')
#             #     plt.ylabel('Value(s)')
#             #     plt.legend()
#             #     plt.xlim((0.5*(len(y_prediction)), 1*(len(y_prediction))))
#             #     plt.title('truth vs prediction  from 50% - 100% of the sequence on Crack 04')
#             #     plt.grid(True)
#             #     plt.savefig('results_' + str(files[0]) + '_flaw_3_conv_50_100_newmarker_batch' + str(generator_batch_size) + '_.png')