In this notebook, I shall be testing for changepoint for a few appliances from the Tracebase data set.
In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
Lets start with refrigerator
In [33]:
ref = pd.read_csv("/home/nipun/study/datasets/tracebase/Refrigerator/dev_98C08A_2011.08.25.csv", index_col=0, usecols = [0,1],
parse_dates=True, sep=";")
In [34]:
ref["index"] = ref.index
ref = ref.drop_duplicates(cols="index", take_last = True)
ref = ref.drop("index",1)
In [35]:
ref.columns = ['real']
In [36]:
len(ref.index)
Out[36]:
In [38]:
ref = ref.resample('1T',how='median')
In [39]:
ref.plot()
Out[39]:
In [40]:
from bayesianchangepoint import bcp
hazard_func = lambda r: bcp.constant_hazard(r, _lambda=200)
In [44]:
import time
test_signal = ref.head(1000).real.values
start = time.time()
beliefs, maxes = bcp.inference(test_signal, hazard_func)
end = time.time()
In [45]:
fig, ax = plt.subplots(nrows = 2, sharex = True)
ax[0].plot(test_signal)
ax[1].imshow(-np.log(beliefs), interpolation='none', aspect='auto',
origin='lower', cmap=plt.cm.Blues)
ax[1].plot(maxes, color='r')
ax[1].set_xlim([0, len(test_signal)])
ax[1].set_ylim([0, ax[1].get_ylim()[1]])
ax[0].grid()
ax[1].grid()
index_changes = np.where(np.diff(maxes.T[0])<0)[0]
ax[0].scatter(index_changes, test_signal[index_changes],c='green');
Ok, something doesn't seem all that well:(
Lets try with a different hazard function
In [46]:
hazard_func = lambda r: bcp.constant_hazard(r, _lambda=10)
In [47]:
import time
test_signal = ref.head(1000).real.values
start = time.time()
beliefs, maxes = bcp.inference(test_signal, hazard_func)
end = time.time()
In [48]:
fig, ax = plt.subplots(nrows = 2, sharex = True)
ax[0].plot(test_signal)
ax[1].imshow(-np.log(beliefs), interpolation='none', aspect='auto',
origin='lower', cmap=plt.cm.Blues)
ax[1].plot(maxes, color='r')
ax[1].set_xlim([0, len(test_signal)])
ax[1].set_ylim([0, ax[1].get_ylim()[1]])
ax[0].grid()
ax[1].grid()
index_changes = np.where(np.diff(maxes.T[0])<0)[0]
ax[0].scatter(index_changes, test_signal[index_changes],c='green');
Ah! Seems to make things worse.. Lets try increasing the lambda paramter
In [49]:
hazard_func = lambda r: bcp.constant_hazard(r, _lambda=400)
In [50]:
import time
test_signal = ref.head(1000).real.values
start = time.time()
beliefs, maxes = bcp.inference(test_signal, hazard_func)
end = time.time()
In [51]:
fig, ax = plt.subplots(nrows = 2, sharex = True)
ax[0].plot(test_signal)
ax[1].imshow(-np.log(beliefs), interpolation='none', aspect='auto',
origin='lower', cmap=plt.cm.Blues)
ax[1].plot(maxes, color='r')
ax[1].set_xlim([0, len(test_signal)])
ax[1].set_ylim([0, ax[1].get_ylim()[1]])
ax[0].grid()
ax[1].grid()
index_changes = np.where(np.diff(maxes.T[0])<0)[0]
ax[0].scatter(index_changes, test_signal[index_changes],c='green');
Can't comment anything on this!
In [54]:
dw = pd.read_csv("/home/nipun/study/datasets/tracebase/Dishwasher/dev_B7E6F4_2012.02.03.csv", index_col=0, usecols = [0,1],
parse_dates=True, sep=";")
In [56]:
dw["index"] = dw.index
dw = dw.drop_duplicates(cols="index", take_last = True)
dw = dw.drop("index",1)
In [57]:
dw.columns =['real']
In [58]:
dw.plot()
Out[58]:
In [63]:
dw = dw.resample('1T', 'median')
In [66]:
import time
test_signal = dw.ix[300:600].real.values
start = time.time()
beliefs, maxes = bcp.inference(test_signal, hazard_func)
end = time.time()
In [67]:
fig, ax = plt.subplots(nrows = 2, sharex = True)
ax[0].plot(test_signal)
ax[1].imshow(-np.log(beliefs), interpolation='none', aspect='auto',
origin='lower', cmap=plt.cm.Blues)
ax[1].plot(maxes, color='r')
ax[1].set_xlim([0, len(test_signal)])
ax[1].set_ylim([0, ax[1].get_ylim()[1]])
ax[0].grid()
ax[1].grid()
index_changes = np.where(np.diff(maxes.T[0])<0)[0]
ax[0].scatter(index_changes, test_signal[index_changes],c='green');
So, this one is better, but anyone could do this even with plain eyes!!