In [1]:
import warnings
import itertools
import pandas
import math
import sys
import os
import numpy as np
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)



In [2]:
# Add detector summary file to check scores for a given profile (Note that if there are missing fields in the file)

detector_summary_file = "../results/numenta/numenta_standard_scores.csv"
#colomn name of the profile score in result files
detector_profile = "S(t)_standard"

In [3]:
#Reading summary
Error = None
if os.path.isfile(detector_summary_file):
    summaryDataFrame = pandas.read_csv(detector_summary_file,index_col="File")
    if 'Score' in summaryDataFrame.columns:
        summaryDataFrame = summaryDataFrame.sort_values(by=['Score'],ascending=False)
        print("Printing scores for selected profile (NaN represents total score):")
        print(summaryDataFrame.loc[:, 'Score'])
    else:
        Error = "'Score' colomn mission in data file"
        print(Error)
else:
    Error = "No such file : "+detector_summary_file
    print(Error)


Printing scores for selected profile (NaN represents total score):
File
NaN                                                         45.635852
realTraffic/speed_7578.csv                                   2.976448
realTweets/Twitter_volume_PFE.csv                            2.911776
realKnownCause/machine_temperature_system_failure.csv        2.720758
realAdExchange/exchange-3_cpc_results.csv                    2.570778
realTweets/Twitter_volume_KO.csv                             2.462493
realKnownCause/nyc_taxi.csv                                  2.328074
realTweets/Twitter_volume_CVS.csv                            2.309500
realAWSCloudwatch/grok_asg_anomaly.csv                       2.001070
realTraffic/speed_t4013.csv                                  1.984625
realTweets/Twitter_volume_UPS.csv                            1.829071
realAWSCloudwatch/ec2_cpu_utilization_5f5533.csv             1.723102
realAWSCloudwatch/rds_cpu_utilization_e47b3b.csv             1.712269
realKnownCause/ec2_request_latency_system_failure.csv        1.705869
realTraffic/occupancy_t4013.csv                              1.626025
realAWSCloudwatch/elb_request_count_8c0756.csv               1.605956
realAdExchange/exchange-2_cpm_results.csv                    1.508243
realTweets/Twitter_volume_FB.csv                             1.500482
realAWSCloudwatch/ec2_cpu_utilization_53ea38.csv             1.414532
realAWSCloudwatch/ec2_network_in_5abac7.csv                  1.321571
realAWSCloudwatch/ec2_cpu_utilization_24ae8d.csv             1.279566
realAdExchange/exchange-4_cpm_results.csv                    1.274710
realTweets/Twitter_volume_AAPL.csv                           1.263898
realKnownCause/ambient_temperature_system_failure.csv        1.206227
realAWSCloudwatch/ec2_cpu_utilization_ac20cd.csv             0.875727
realTraffic/occupancy_6005.csv                               0.861272
realAWSCloudwatch/ec2_network_in_257a54.csv                  0.860672
artificialWithAnomaly/art_daily_jumpsup.csv                  0.860672
artificialWithAnomaly/art_increase_spike_density.csv         0.860672
realAWSCloudwatch/ec2_cpu_utilization_825cc2.csv             0.846538
realAdExchange/exchange-3_cpm_results.csv                    0.752160
realAWSCloudwatch/ec2_cpu_utilization_fe7f93.csv             0.739635
realAWSCloudwatch/ec2_cpu_utilization_77c1ca.csv             0.733170
realTweets/Twitter_volume_CRM.csv                            0.723576
artificialWithAnomaly/art_daily_jumpsdown.csv                0.638909
realAdExchange/exchange-4_cpc_results.csv                    0.622261
artificialWithAnomaly/art_daily_flatmiddle.csv               0.605923
realAWSCloudwatch/ec2_disk_write_bytes_1ef3de.csv            0.594544
realTraffic/TravelTime_451.csv                               0.560100
realAWSCloudwatch/ec2_disk_write_bytes_c0d644.csv            0.556843
artificialWithAnomaly/art_load_balancer_spikes.csv           0.404788
realTraffic/speed_6005.csv                                   0.402622
realTraffic/TravelTime_387.csv                               0.166453
artificialNoAnomaly/art_flatline.csv                         0.000000
artificialNoAnomaly/art_noisy.csv                            0.000000
artificialNoAnomaly/art_daily_perfect_square_wave.csv        0.000000
realAWSCloudwatch/rds_cpu_utilization_cc0c53.csv            -0.138449
artificialNoAnomaly/art_daily_small_noise.csv               -0.220000
realTweets/Twitter_volume_GOOG.csv                          -0.260196
artificialNoAnomaly/art_daily_no_noise.csv                  -0.330000
realAWSCloudwatch/ec2_cpu_utilization_c6585a.csv            -0.330000
realTweets/Twitter_volume_IBM.csv                           -0.544111
realKnownCause/cpu_utilization_asg_misconfiguration.csv     -0.573201
realTweets/Twitter_volume_AMZN.csv                          -0.743957
realKnownCause/rogue_agent_key_hold.csv                     -1.113701
artificialWithAnomaly/art_daily_nojump.csv                  -1.278351
realKnownCause/rogue_agent_key_updown.csv                   -1.295760
realAdExchange/exchange-2_cpc_results.csv                   -1.330000
realAWSCloudwatch/iio_us-east-1_i-a2eb1cd9_NetworkIn.csv    -2.110000
Name: Score, dtype: float64

In [4]:
# select detector summary file row(data file name) from above
detector_summary_row = "realTraffic/speed_7578.csv"

# Add detector result file(a single file for specific data file)
result_file = "../results/numenta/realTraffic/numenta_speed_7578.csv"

In [5]:
if not summaryDataFrame.index.contains(detector_summary_row):
    Error = "No detector_summary_row '"+detector_summary_row+"' in file "+detector_summary_file
else:
    Error = None

if set(['Threshold','TP','TN','FP','FN','Total_Count']).issubset(summaryDataFrame.columns) and Error is None:
    if summaryDataFrame["Threshold"][detector_summary_row].size == 1:
        threshold = summaryDataFrame["Threshold"][detector_summary_row]
        TP = summaryDataFrame["TP"][detector_summary_row]
        TN = summaryDataFrame["TN"][detector_summary_row]
        FP = summaryDataFrame["FP"][detector_summary_row]
        FN = summaryDataFrame["FN"][detector_summary_row]
        total_Count = summaryDataFrame["Total_Count"][detector_summary_row]
    else:
        threshold = summaryDataFrame["Threshold"][detector_summary_row][0]
        TP = summaryDataFrame["TP"][detector_summary_row][0]
        TN = summaryDataFrame["TN"][detector_summary_row][0]
        FP = summaryDataFrame["FP"][detector_summary_row][0]
        FN = summaryDataFrame["FN"][detector_summary_row][0]
        total_Count = summaryDataFrame["Total_Count"][detector_summary_row][0]
else:
    if Error is None:
        Error = "Missing colomns in file "+detector_summary_file
    print(Error)
    print("Run from beginng to clear Error")

In [10]:
#Ploting Result (Note if the plot is not visible please close the tab shutdown notebook and restart)
#Reading results file 
if os.path.isfile(result_file):
    dataframe = pandas.read_csv(result_file)
    Error = None
else:
    Error = "No such file : "+result_file
    print(Error)

if set(['timestamp','value','anomaly_score','label', detector_profile]).issubset(dataframe.columns)\
        and Error is None:
    x = np.array(dataframe['timestamp'])
    value = np.array(dataframe['value'])
    anomaly_score = np.array(dataframe['anomaly_score'])
    anomaly_label = np.array(dataframe['label'])

    standard_score = np.array(dataframe[detector_profile])
else:
    if Error is None:
        Error = "Missing colomns in file "+result_file
    print(Error)
# Plot values, anomaly score and label scaled to values

if Error is None:
    value_max = np.max(value)
    trace_value = {"x": x,
             "y": value,
             "mode": 'lines',
             "name": 'Value'}

    trace_anomaly_score = {"x": x,
                  "y": anomaly_score*value_max,
                  "mode": 'lines',
                  "name": 'Anomaly score'}

    trace_anomaly_label = {"x": x,
                  "y": anomaly_label*value_max,
                  "mode": 'lines',
                  "name": 'Anomaly window'}
    trace_threshold = {"x": x,
                  "y": np.ones(len(x))*threshold*value_max,
                  "mode": 'lines',
                  "name": 'Anomaly threshold'}

    traces = [trace_value,trace_anomaly_score,trace_threshold,trace_anomaly_label]
    layout = dict(title = "Scalled anomaly score with value : "+result_file,
                  xaxis = dict(title = 'X'),
                  yaxis = dict(title = 'Value')
                 )
    fig = dict(data=traces, layout=layout)
    iplot(fig)

    #plot, anomalys score, label, and result from benchmark
    trace_anomaly_score = {"x": x,
                  "y": anomaly_score,
                  "mode": 'lines',
                  "name": 'Anomaly score'}

    trace_anomaly_label = {"x": x,
                  "y": anomaly_label,
                  "mode": 'lines',
                  "name": 'Anomaly window'}

    trace_threshold = {"x": x,
                  "y": np.ones(len(x))*threshold,
                  "mode": 'lines',
                  "name": 'Anomaly threshold'}

    trace_standard_score = {"x": x,
                  "y": standard_score,
                  "mode": 'lines',
                  "name": 'standard_score'}

    traces = [trace_anomaly_score,trace_threshold,trace_anomaly_label,trace_standard_score]
    layout = dict(title = "Anomaly score : "+result_file,
                  xaxis = dict(title = 'X'),
                  yaxis = dict(title = 'Value')
                 )
    fig = dict(data=traces, layout=layout)
    iplot(fig)
else:
    print(Error)
    print("Run from beginng to clear Error")



In [38]:
if Error is None:
    TP,TN,FP,FN = 0,0,0,0
    for x in standard_score:
        if x > 0:
            TP +=1
        elif x == 0:
            TN +=1
        elif x == -0.11:
            FP +=1
        elif x == -1:
            FN +=1
    print("For result file : " + result_file)
    print("True Positive (Detected anomalies) : " + str(TP))
    print("True Negative (Detected non anomalies) : " + str(TN))
    print("False Positive (False alarms) : " + str(FP))
    print("False Negative (Anomaly not detected) : " + str(FN))
    print("Total data points : " + str(total_Count))
    print(detector_profile+" score : "+str(np.sum(standard_score)))
else:
    print(Error)
    print("Run from beginng to clear Error")


For result file : ../results/numenta/realTraffic/numenta_speed_7578.csv
True Positive (Detected anomalies) : 4
True Negative (Detected non anomalies) : 1118
False Positive (False alarms) : 4
False Negative (Anomaly not detected) : 0
Total data points : 1127
S(t)_standard score : 2.97644796595

In [ ]: