In [1]:
%matplotlib inline
"""
This IPython notebook serves as the test bed for researchers who desire to refine Ellipsoid Boundary Modeling
techinques for anomaly detection in Wireless Sesnsor Networks (WSNs).
Users have immediete access to baseline functions, imported below, that represent the current methods and algorithms
that are meant to improved upon. After testing the baseline functions and reviewing the respective source code, located at
the root of the project in baseline.py, users can improve upon various sections in a modular fashion by adding
their modified code in cells clearly labeled for such work and plugging them into their respective phase.
Note that the baseline methods in this specific notebook are based on research completed by Dr. Sutharan et al.
using successive differences. However, the baseline methods and easily be ported to other Ellipsoid Boundary
Mod
where researchers ca
"""
# Import necessary libraries
import copy
import math
import matplotlib.pyplot as pyplot
import baseline # baseline functions based on Dr. Suthaharan et al.'s reserach
In [2]:
"""Note: This phase will need slight tweaking for each data source as they do not follow a truly standard data format.
As a result, interfaces will likely need to be written for each source akin to read_ibrl_data()
"""
# location of IBRL sensor measurements dataset
ibrl_sensor_measurements_file = "./datasets/Reduced2530K.csv"
# Create dictionary of original sensors mapping to their measurements
# x1, x2, ..., xn where xi = (ti', hi') and X = (T, H)
measurements = baseline.read_ibrl_data(ibrl_sensor_measurements_file)
In [3]:
# Shuffle measurements
# y1, y2, ..., yn
shuffled_measurements = baseline.randomize_readings(measurements)
In [4]:
"""YOUR CODE HERE"""
Out[4]:
In [5]:
# Calculate successive differences and construct the lookup table
# p1 = y2-y1, p2 = y3-y2, ... pn-1 = yn - yn-1 where pi = (ti, hi) and P = (T', H')
differences, lookup_table = baseline.generate_differences(shuffled_measurements)
In [6]:
"""YOUR CODE HERE"""
Out[6]:
In [7]:
# Standardize the differences
# standardized_differences = helpers.standardize_readings(copy.deepcopy(differences))
In [8]:
"""YOUR CODE HERE"""
Out[8]:
In [9]:
"""Note: in a real world application, each individual sensor would calculate these parameters locally sending the results
to the base station. This dictionary is the result that the base station would compile and use for determing the regional
ellipsoid parameters."""
# Model ellipsoid for each sensor with data using manually set a and b
# (a=1.7601, b=4.1168 for standardized successive differences with the IBRL dataset)
# (a=8.7886, b=22.9904 for non-standardized successive differences with the IBRL dataset)
ellipsoid_parameters = {sensor_id: baseline.generate_ellipsoid(sensor_readings, 8.7886, 22.9904)
for (sensor_id, sensor_readings) in differences.iteritems()}
In [10]:
"""YOUR CODE HERE"""
Out[10]:
In [11]:
# Determine the regional values of a, b, and theta assuming one homogeneous region
regional_a, regional_b, regional_theta = baseline.generate_regional_ellipsoid_parameters(ellipsoid_parameters)
In [12]:
"""YOUR CODE HERE"""
Out[12]:
In [13]:
############################################
###### To be completed in iteration 2 ######
############################################
# 1C - Inverse Transformation
# Segregate anomalies from true measurements
In [14]:
"""YOUR CODE HERE"""
Out[14]:
In [15]:
original_data_fig = pyplot.figure(figsize=(6, 6))
original_data_axes = original_data_fig.add_axes([0.1, 0.1, 0.8, 0.8],
title='Original Data (of all sensors)',
xlabel='Temperature',
ylabel='Humidity')
for sensor in shuffled_measurements:
original_data_axes.scatter([temp for temp in shuffled_measurements[sensor][0]],
[humidity for humidity in shuffled_measurements[sensor][1]],
s=10)
In [16]:
# Uncomment the line below and run the cell to save
original_data_fig.savefig('original_data_fig.png')
In [17]:
successive_differences_fig = pyplot.figure(figsize=(6, 6))
successive_differences_axes = successive_differences_fig.add_axes([0.1, 0.1, 0.8, 0.8],
title='Successive Differences (of all sensors)',
xlabel='Temperature',
ylabel='Humidity')
for sensor in differences:
successive_differences_axes.scatter([temp for temp in differences[sensor][0]],
[humidity for humidity in differences[sensor][1]],
s=10)
In [18]:
# Uncomment the line below and run the cell to save
successive_differences_fig.savefig('successive_differences_fig.png')
In [ ]:
# TODO: implement
std_successive_differences_fig = pyplot.figure(figsize=(6, 6))
std_successive_differences_axes = std_successive_differences_fig.add_axes([0.1, 0.1, 0.8, 0.8],
title='Standardized Successive Differences (of all sensors)',
xlabel='Temperature',
ylabel='Humidity')
for sensor in standardized_differences:
std_successive_differences_axes.scatter([reading[0] for reading in standardized_differences[sensor]],
[reading[1] for reading in standardized_differences[sensor]],
s=10)
In [ ]:
# Uncomment the line below and run the cell to save
# std_successive_differences_fig.savefig('std_successive_differences_fig.png')
In [19]:
succ_diff_ellipsoids_fig = pyplot.figure(figsize=(6, 6))
succ_diff_ellipsoids_axes = succ_diff_ellipsoids_fig.add_axes([0.1, 0.1, 0.8, 0.8],
title='Successive difference ellipsoids',
xlabel='Temperature',
ylabel='Humidity')
for sensor_id, sensor_readings in differences.iteritems():
ellipsoid_params = baseline.generate_ellipsoid(sensor_readings, 1.7601, 4.1168)
succ_diff_ellipsoids_axes.scatter([reading[0] for reading in ellipsoid_params['ellipsoid_points']],
[reading[1] for reading in ellipsoid_params['ellipsoid_points']],
s=10,
c='r')
In [20]:
# Uncomment the line below and run the cell to save
succ_diff_ellipsoids_fig.savefig('succ_diff_ellipsoids_fig.png')
In [ ]:
# TODO: Implement
std_succ_diff_ellipsoids_fig = pyplot.figure(figsize=(6, 6))
std_succ_diff_ellipsoids_axes = std_succ_diff_ellipsoids_fig.add_axes([0.1, 0.1, 0.8, 0.8],
title='Standardized successive difference ellipsoids',
xlabel='Temperature',
ylabel='Humidity')
for sensor in standardized_differences:
ellipsoid_params = helpers.generate_ellipsoid(standardized_differences[sensor], 1.7601, 4.1168)
std_succ_diff_ellipsoids_axes.scatter([reading[0] for reading in ellipsoid_params['ellipsoid_points']],
[reading[1] for reading in ellipsoid_params['ellipsoid_points']],
s=10,
c='r')
In [ ]:
# Uncomment the line below and run the cell to save
# std_succ_diff_ellipsoids_fig.savefig('std_succ_diff_ellipsoids_fig.png')
In [21]:
regional_theta = .45 # Mocked for testing purposes
regional_ellipsoid_fig = pyplot.figure(figsize=(6, 6))
regional_ellipsoid_axes = regional_ellipsoid_fig.add_axes([0.1, 0.1, 0.8, 0.8],
title='Regional ellipsoid within successive differences',
xlabel='Temperature',
ylabel='Humidity')
# Plot successive differences
for sensor_id, sensor_readings in differences.iteritems():
regional_ellipsoid_axes.scatter([temp for temp in sensor_readings[0]],
[humidity for humidity in sensor_readings[1]],
s=10)
# Plot calculated regional ellipsoid within successive differneces
for sensor_id, sensor_readings in differences.iteritems():
ellipsoid_params = baseline.generate_ellipsoid(sensor_readings,
regional_a,
regional_b,
regional_theta)
regional_ellipsoid_axes.plot([reading[0] for reading in ellipsoid_params['ellipsoid_points']],
[reading[1] for reading in ellipsoid_params['ellipsoid_points']],
'ro')
In [22]:
# Uncomment the line below and run the cell to save
regional_ellipsoid_fig.savefig('regional_ellipsoid_fig.png')
In [ ]:
############################################
###### To be completed in iteration 2 ######
############################################