In [1]:
import pyisc;
import numpy as np
from scipy.stats import poisson, norm
%matplotlib inline
from pylab import plot
In [2]:
po_normal = poisson(10)
po_anomaly = poisson(25)
po_normal2 = poisson(2)
po_anomaly2 = poisson(3)
gs_normal = norm(1, 12)
gs_anomaly = norm(2,30)
normal_len = 10000
anomaly_len = 15
data = np.column_stack(
[
[1] * (normal_len+anomaly_len),
list(po_normal.rvs(normal_len))+list(po_anomaly.rvs(anomaly_len)),
list(po_normal2.rvs(normal_len))+list(po_anomaly2.rvs(anomaly_len)),
list(gs_normal.rvs(normal_len))+list(gs_anomaly.rvs(anomaly_len)),
]
)
Create an anomaly detector using as first argument the used statistical models. The we use
Given that we now have more than one variable, it is necessary to also add a method to combine the output from the statistical models, which in this case is the maximum anomaly score of each component model:
In [3]:
anomaly_detector = pyisc.AnomalyDetector(
component_models=[
pyisc.P_PoissonOnesided(1,0), # columns 1 and 0
pyisc.P_Poisson(2,0), # columns 2 and 0
pyisc.P_Gaussian(3) # column 3
],
output_combination_rule=pyisc.cr_max
)
Train the anomaly detector:
In [4]:
anomaly_detector.fit(data);
In [5]:
Compute the anomaly scores for each data point:
In [ ]:
scores = anomaly_detector.anomaly_score(data)
In [ ]:
from pandas import DataFrame
df= DataFrame(data[:15], columns=['#Days', 'Freq1','Freq2','Measure'])
df['Anomaly Score'] = scores[:15]
print df.to_string()
The anomalous frequencies vs. anomaly scores for the 15 anomalous data points:
In [ ]:
df= DataFrame(data[-15:], columns=['#Days', 'Freq1','Freq2','Measure'])
df['Anomaly Score'] = scores[-15:]
print df.to_string()
As can be seen above, the anomalous data also have higher anomaly scores than the normal frequencies as it should be.
This becomes even more visible if we plot the anomaly scores (y-axis) against each data point (x-axis):
In [ ]:
plot(scores, '.');
We can also look at the details of each column in terms of their individual anomaly scores:
In [ ]:
score_details = anomaly_detector.anomaly_score_details(data)
In [ ]:
df= DataFrame(data[-15:], columns=['#Days', 'Freq1','Freq2','Measure'])
df['Anomaly:Freq1'] = [detail[1][0] for detail in score_details[-15:]] # Anomaly Score of Freq1
df['Anomaly:Freq2'] = [detail[1][1] for detail in score_details[-15:]] # Anomaly Score of Freq2
df['Anomaly:Measure'] = [detail[1][2] for detail in score_details[-15:]] # Anomaly Score of Measure
df['Anomaly Score'] = [detail[0] for detail in score_details[-15:]] # Combined Anomaly Score
df
Above, the last column corresponds to the same anomaly score as before, where we can se that it corresponds to the maximum of the individual anomaly score to the left, thus, it is the result of the combination rule specified to the anomaly detector.
In [ ]: