In [1]:
%matplotlib inline
"""
This IPython notebook serves as the test bed for researchers who desire to refine Ellipsoid Boundary Modeling
techinques for anomaly detection in Wireless Sesnsor Networks (WSNs).
Users have immediete access to baseline functions, imported below, that represent the current methods and algorithms
that are meant to improved upon. After testing the baseline functions and reviewing the respective source code, located at
the root of the project in baseline.py, users can improve upon various sections in a modular fashion by adding
their modified code in cells clearly labeled for such work and plugging them into their respective phase.
Note that the baseline methods in this specific notebook are based on research completed by Dr. Sutharan et al.
using successive differences. However, the baseline methods and easily be ported to other Ellipsoid Boundary
Modeling techniques with very few changes required within this notebook.
"""
# Import necessary libraries
import copy
import math
import matplotlib.pyplot as pyplot
import helpers # helper functions are the baseline functions from Dr. Shan et al's research
import baseline # baseline functions based on Dr. Suthaharan et al.'s reserach
In [2]:
"""Note: This phase will need slight tweaking for each data source as they do not follow a truly standard data format.
As a result, interfaces will likely need to be written for each source akin to read_ibrl_data()
"""
# location of IBRL sensor measurements dataset
ibrl_sensor_measurements_file = "./datasets/Reduced2530K.csv"
# Create dictionary of original sensors mapping to their measurements
# x1, x2, ..., xn where xi = (ti', hi') and X = (T, H)
measurements = helpers.read_ibrl_data(ibrl_sensor_measurements_file)
measurements_np = baseline.read_ibrl_data(ibrl_sensor_measurements_file)
In [3]:
# Shuffle measurements
# y1, y2, ..., yn
shuffled_measurements = helpers.randomize_readings(measurements)
shuffled_measurements_np = baseline.randomize_readings(measurements_np)
In [4]:
"""YOUR CODE HERE"""
Out[4]:
In [4]:
# Calculate successive differences and construct the lookup table
# p1 = y2-y1, p2 = y3-y2, ... pn-1 = yn - yn-1 where pi = (ti, hi) and P = (T', H')
# With shuffling
differences, lookup_table = helpers.generate_differences(shuffled_measurements)
differences_np, lookup_table_np = baseline.generate_differences(shuffled_measurements_np)
# Without shuffling
#differences, lookup_table = helpers.generate_differences(measurements)
#differences_np, lookup_table_np = baseline.generate_differences(measurements_np)
print [reading[0] for reading in differences['1'][:5]]
print differences_np['1'][0][:5]
In [130]:
"""YOUR CODE HERE"""
Out[130]:
In [131]:
# Standardize the differences
standardized_differences = helpers.standardize_readings(copy.deepcopy(differences))
In [132]:
"""YOUR CODE HERE"""
Out[132]:
In [8]:
"""Note: in a real world application, each individual sensor would calculate these parameters locally sending the results
to the base station. This dictionary is the result that the base station would compile and use for determing the regional
ellipsoid parameters."""
# Model ellipsoid for each sensor with data using manually set a and b
# (a=1.7601, b=4.1168 for standardized successive differences on IBRL dataset)
# (a=8.7886, b=22.9904 for)
ellipsoid_parameters = {sensor: helpers.generate_ellipsoid(readings, 8.7886, 22.9904)#, 0.8057)
for (sensor, readings) in differences.iteritems()}
ellipsoid_parameters_np = {sensor_id: baseline.generate_ellipsoid(sensor_readings, 8.7886, 22.9904)
for (sensor_id, sensor_readings) in differences_np.iteritems()}
In [134]:
"""YOUR CODE HERE"""
Out[134]:
In [19]:
# Determine the regional values of a, b, and theta assuming one homogeneous region
regional_a, regional_b, regional_theta = baseline.generate_regional_ellipsoid_parameters(ellipsoid_parameters_np)
print regional_theta
print math.tan(.801154228626)
In [136]:
"""YOUR CODE HERE"""
Out[136]:
In [137]:
################ INCOMPLETE ################
# 1C - Inverse Transformation
# Segregate anomalies from true measurements
In [138]:
"""YOUR CODE HERE"""
Out[138]:
In [10]:
original_data_fig_np = pyplot.figure(figsize=(6, 6))
original_data_axes_np = original_data_fig_np.add_axes([0.1, 0.1, 0.8, 0.8],
title='Original Data (of all sensors)',
xlabel='Temperature',
ylabel='Humidity')
for sensor in shuffled_measurements_np:
original_data_axes_np.scatter([temp for temp in shuffled_measurements_np[sensor][0]],
[humidity for humidity in shuffled_measurements_np[sensor][1]],
s=10)
In [142]:
# Uncomment the line below and run the cell to save
# original_data_fig.savefig('original_data_fig.png')
In [11]:
successive_differences_fig_np = pyplot.figure(figsize=(6, 6))
successive_differences_axes_np = successive_differences_fig_np.add_axes([0.1, 0.1, 0.8, 0.8],
title='Successive Differences (of all sensors)',
xlabel='Temperature',
ylabel='Humidity')
for sensor in differences_np:
successive_differences_axes_np.scatter([temp for temp in differences_np[sensor][0]],
[humidity for humidity in differences_np[sensor][1]],
s=10)
In [146]:
# Uncomment the line below and run the cell to save
# successive_differences_fig.savefig('successive_differences_fig.png')
In [ ]:
# TODO: implement
std_successive_differences_fig = pyplot.figure(figsize=(6, 6))
std_successive_differences_axes = std_successive_differences_fig.add_axes([0.1, 0.1, 0.8, 0.8],
title='Standardized Successive Differences (of all sensors)',
xlabel='Temperature',
ylabel='Humidity')
for sensor in standardized_differences:
std_successive_differences_axes.scatter([reading[0] for reading in standardized_differences[sensor]],
[reading[1] for reading in standardized_differences[sensor]],
s=10)
In [148]:
# Uncomment the line below and run the cell to save
# std_successive_differences_fig.savefig('std_successive_differences_fig.png')
In [18]:
succ_diff_ellipsoids_fig = pyplot.figure(figsize=(6, 6))
succ_diff_ellipsoids_axes = succ_diff_ellipsoids_fig.add_axes([0.1, 0.1, 0.8, 0.8],
title='Successive difference ellipsoids',
xlabel='Temperature',
ylabel='Humidity')
for sensor_id, sensor_readings in differences_np.iteritems():
ellipsoid_params = baseline.generate_ellipsoid(sensor_readings, 1.7601, 4.1168)
succ_diff_ellipsoids_axes.scatter([reading[0] for reading in ellipsoid_params['ellipsoid_points']],
[reading[1] for reading in ellipsoid_params['ellipsoid_points']],
s=10,
c='r')
In [150]:
# Uncomment the line below and run the cell to save
# succ_diff_ellipsoids_fig.savefig('succ_diff_ellipsoids_fig.png')
In [29]:
# TODO: Implement
std_succ_diff_ellipsoids_fig = pyplot.figure(figsize=(6, 6))
std_succ_diff_ellipsoids_axes = std_succ_diff_ellipsoids_fig.add_axes([0.1, 0.1, 0.8, 0.8],
title='Standardized successive difference ellipsoids',
xlabel='Temperature',
ylabel='Humidity')
for sensor in standardized_differences:
ellipsoid_params = helpers.generate_ellipsoid(standardized_differences[sensor], 1.7601, 4.1168)
std_succ_diff_ellipsoids_axes.scatter([reading[0] for reading in ellipsoid_params['ellipsoid_points']],
[reading[1] for reading in ellipsoid_params['ellipsoid_points']],
s=10,
c='r')
In [152]:
# Uncomment the line below and run the cell to save
# std_succ_diff_ellipsoids_fig.savefig('std_succ_diff_ellipsoids_fig.png')
In [28]:
regional_theta = .45 # Mocked for testing purposes
test_fig = pyplot.figure(figsize=(6, 6))
test_axes = test_fig.add_axes([0.1, 0.1, 0.8, 0.8],
title='Regional ellipsoid within successive differences',
xlabel='Temperature',
ylabel='Humidity')
# Plot successive differences
for sensor_id, sensor_readings in differences_np.iteritems():
test_axes.scatter([temp for temp in sensor_readings[0]],
[humidity for humidity in sensor_readings[1]],
s=10)
# Plot calculated regional ellipsoid within successive differneces
for sensor_id, sensor_readings in differences_np.iteritems():
ellipsoid_params = baseline.generate_ellipsoid(sensor_readings,
regional_a,
regional_b,
regional_theta)
test_axes.plot([reading[0] for reading in ellipsoid_params['ellipsoid_points']],
[reading[1] for reading in ellipsoid_params['ellipsoid_points']],
'ro')
In [28]:
# Uncomment the line below and run the cell to save
# test_fig.savefig('test_fig.png')
Everything seems to be wokring fine except for the ellipsoid parameter generation. While it is creating ellipses (finally) it still isn't generating the tails and 'closing in' on the true data. In the bottom right figure we can see the ellipsoid generated for every sensor stacked on top of each other.
NOTE: They all use the same 'a' and 'b' values (given by Dr. Shan) but the theta is changing. The subplot depicts this in way that we would expect.
What's next?
In [9]:
original_data_fig = pyplot.figure(figsize=(6, 6))
original_data_axes = original_data_fig.add_axes([0.1, 0.1, 0.8, 0.8],
title='Original Data (of all sensors)',
xlabel='Temperature',
ylabel='Humidity')
for sensor in measurements:
original_data_axes.scatter([reading[0] for reading in measurements[sensor]],
[reading[1] for reading in measurements[sensor]],
s=10)
In [12]:
successive_differences_fig = pyplot.figure(figsize=(6, 6))
successive_differences_axes = successive_differences_fig.add_axes([0.1, 0.1, 0.8, 0.8],
title='Successive Differences (of all sensors)',
xlabel='Temperature',
ylabel='Humidity')
for sensor in differences:
successive_differences_axes.scatter([reading[0] for reading in differences[sensor]],
[reading[1] for reading in differences[sensor]],
s=10)
In [147]:
std_successive_differences_fig = pyplot.figure(figsize=(6, 6))
std_successive_differences_axes = std_successive_differences_fig.add_axes([0.1, 0.1, 0.8, 0.8],
title='Standardized Successive Differences (of all sensors)',
xlabel='Temperature',
ylabel='Humidity')
for sensor in standardized_differences:
std_successive_differences_axes.scatter([reading[0] for reading in standardized_differences[sensor]],
[reading[1] for reading in standardized_differences[sensor]],
s=10)
In [14]:
succ_diff_ellipsoids_fig = pyplot.figure(figsize=(6, 6))
succ_diff_ellipsoids_axes = succ_diff_ellipsoids_fig.add_axes([0.1, 0.1, 0.8, 0.8],
title='Successive difference ellipsoids',
xlabel='Temperature',
ylabel='Humidity')
for sensor in differences:
ellipsoid_params = helpers.generate_ellipsoid(differences[sensor], 1.7601, 4.1168)
succ_diff_ellipsoids_axes.scatter([reading[0] for reading in ellipsoid_params['ellipsoid_points']],
[reading[1] for reading in ellipsoid_params['ellipsoid_points']],
s=10,
c='r')
In [15]:
std_succ_diff_ellipsoids_fig = pyplot.figure(figsize=(6, 6))
std_succ_diff_ellipsoids_axes = std_succ_diff_ellipsoids_fig.add_axes([0.1, 0.1, 0.8, 0.8],
title='Standardized successive difference ellipsoids',
xlabel='Temperature',
ylabel='Humidity')
for sensor in standardized_differences:
ellipsoid_params = helpers.generate_ellipsoid(standardized_differences[sensor], 1.7601, 4.1168)
std_succ_diff_ellipsoids_axes.scatter([reading[0] for reading in ellipsoid_params['ellipsoid_points']],
[reading[1] for reading in ellipsoid_params['ellipsoid_points']],
s=10,
c='r')
In [ ]:
test_fig = pyplot.figure(figsize=(6, 6))
test_axes = test_fig.add_axes([0.1, 0.1, 0.8, 0.8],
title='Regional ellipsoid within successive differences',
xlabel='Temperature',
ylabel='Humidity')
# Plot successive differences
for sensor in differences:
test_axes.scatter([reading[0] for reading in differences[sensor]],
[reading[1] for reading in differences[sensor]],
s=10)
# Plot calculated regional ellipsoid within successive differneces
for sensor in differences:
ellipsoid_params = helpers.generate_ellipsoid(differences[sensor],
regional_a,
regional_b,
regional_theta)
test_axes.plot([reading[0] for reading in ellipsoid_params['ellipsoid_points']],
[reading[1] for reading in ellipsoid_params['ellipsoid_points']])
In [ ]: