In [ ]:
#######################################################
# Script:
#    getAlerts.py
# Usage:
#    python getAlerts.py <input_file> <output_file>
# Description:
#    Generate alerts on test predictions
# Authors:
#    Jasmin Nakic, jnakic@salesforce.com
#    Samir Pilipovic, spilipovic@salesforce.com
#######################################################

import sys
import numpy as np

# Imports required for visualization (plotly)
import plotly.graph_objs as go
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

In [ ]:
# Script debugging flag
debugFlag = False
DELTAHIGH = 200000
DELTALOW = -200000
PCTHIGH = 20.0
PCTLOW = -20.0

In [ ]:
# Function to validate rule
def applyRule(V,P):
    delta = (P[0] - V[0]) + (P[1] - V[1]) + (P[2] - V[2])
    sum = V[0] + V[1] + V[2]
    pct = 100.0*delta/float(sum) if sum != 0 else 100.0
    alert = ""
    if delta > DELTAHIGH and pct > PCTHIGH:
        alert = "HIGH"
    if delta < DELTALOW and pct < PCTLOW:
        alert = "LOW"
    delta = delta / 3
    return (alert,delta,pct)

#end applyRule

# Iterate over test results
def getAlerts(data):
    X = np.zeros(data.shape[0])
    # X = np.reshape(X,(-1,1))
    v = [0,0,0]
    p = [0,0,0]
    idx = 0
    row = 0
    raiseAlert = False
    for m in np.nditer(data):
        idx = idx + 1
        v[0] = v[1] if idx > 2 else 0
        v[1] = v[2] if idx > 1 else 0
        v[2] = m['cnt']
        p[0] = p[1] if idx > 2 else 0
        p[1] = p[2] if idx > 1 else 0
        p[2] = m['predHS']
        alert = ""
        val = 0
        pct = 0
        if idx >= 3: # has enough data
            raiseAlert = True
        if raiseAlert:
            (alert,val,pct) = applyRule(v,p)
        if alert != "":
            X[row] = val
            print(alert, m['timeStamp'], val, pct, "(", p[0], p[1], p[2], ") (", v[0], v[1], v[2], ")")
            idx = 0
            raiseAlert = False
        row = row + 1
    return X
#end getAlerts

# Write results to file
def writeResult(output,calcData,A):
    # generate result file
    result = np.array(
        np.empty(calcData.shape[0]),
        dtype=[
            ("timeStamp","|U19"),
            ("dateFrac",float),
            ("isHoliday",int),
            ("isSunday",int),
            ("cnt",int),
            ("predSimple",int),
            ("predTrig",int),
            ("predHourDay",int),
            ("predHourWeek",int),
            ("predHS",int),
            ("alertVal",int)
        ]
    )

    result["timeStamp"]    = calcData["timeStamp"]
    result["dateFrac"]     = calcData["dateFrac"]
    result["isHoliday"]    = calcData["isHoliday"]
    result["isSunday"]     = calcData["isSunday"]
    result["cnt"]          = calcData["cnt"]
    result["predSimple"]   = calcData["predSimple"]
    result["predTrig"]     = calcData["predTrig"]
    result["predHourDay"]  = calcData["predHourDay"]
    result["predHourWeek"] = calcData["predHourWeek"]
    result["predHS"]       = calcData["predHS"]
    result["alertVal"]     = A

    if debugFlag:
        print("R 0-5: ", result[0:5])
    hdr = "timeStamp\tdateFrac\tisHoliday\tisSunday\tcnt\tpredSimple\tpredTrig\tpredHourDay\tpredHourWeek\tpredHS\talertVal"
    np.savetxt(output,result,fmt="%s",delimiter="\t",header=hdr,comments="")
#end writeResult

# Process alerts for input file
def process(inputFile,outputFile):
    # timeStamp dateFrac isHoliday isSunday cnt predSimple predTrig predHourDay predHourWeek predHS
    testData = np.genfromtxt(
        inputFile,
        delimiter='\t',
        names=True,
        dtype=("|U19",float,int,int,int,int,int,int,int,int)
    )

    AV = getAlerts(testData)
    writeResult(outputFile,testData,AV)      
#end process

In [ ]:
# Start
trainInputFile = "train_exc.txt"
trainAlertFile = "train_alert.txt"

testInputFile = "test_exc.txt"
testAlertFile = "test_alert.txt"

process(trainInputFile,trainAlertFile)
process(testInputFile,testAlertFile)

In [ ]:
# Load results from file generated above using correct data types
results = np.genfromtxt(
    testAlertFile,
    dtype=("|U19",float,int,int,int,int,int,int,int,int,int),
    delimiter='\t',
    names=True
)

In [ ]:
# Examine result data
print("Shape:", results.shape)
print("Columns:", len(results.dtype.names))
print(results[1:5])

In [ ]:
# Generate chart with predicitons based on training data (using plotly)
print("Plotly version", __version__) # requires plotly version >= 1.9.0
init_notebook_mode(connected=True)

set1 = go.Bar(
    x=results["dateFrac"],
    y=results["cnt"],
#    marker=dict(color='blue'),
    name='Actual'
)
set2 = go.Bar(
    x=results["dateFrac"],
    y=results["predHS"],
#    marker=dict(color='crimson'),
    opacity=0.6,
    name='Prediction'
)
set3 = go.Bar(
    x=results["dateFrac"],
    y=results["alertVal"],
#    marker=dict(color='crimson'),
    opacity=0.6,
    name='Alert'
)
barData = [set1, set2, set3]
barLayout = go.Layout(barmode='group', title="Prediction vs. Actual with Alerts")

fig = go.Figure(data=barData, layout=barLayout)
iplot(fig)

In [ ]: