Bayesian Changepoint Detection



In [1]:
%matplotlib inline
import numpy as np
import scipy.stats as stats
from matplotlib import pyplot as plt
import pandas as pd

plt.style.use("ggplot")

In [2]:
# ugly hack to get relative imports working
import os
os.chdir("..")

In [3]:
def hazard(p):
    def geometric(t):
        return p * (1 - p) ** t
    return geometric

In [4]:
def plot_result(data, run_lengths):
    df = pd.DataFrame({"data": data, "run_length": run_lengths})
    return df.plot(secondary_y=["run_length"])

In [5]:
from bcd import offline_changepoint_detection, BayesianChangepointDetection
from bcd.distributions import Gaussian

# ----------------------------- Reference ------------------------------
data = [0] * 100 + [1] * 100
Pr = offline_changepoint_detection(data, hazard(0.6), Gaussian(1, 0, 1, 0.1))

ax = plot_result(data, [Pr[:, t].argmax() for t in range(len(data))])
ax.set_title("Reference")
plt.show()

# ------------------------------ Our Version ------------------------------
detector = BayesianChangepointDetection(hazard(0.6), Gaussian(1, 0, 1, 0.1))
Pr = [detector.step(x) for x in data]

ax = plot_result(data, [pr.argmax() for pr in Pr])
ax.set_title("Our Version")
plt.show()



In [6]:
# Exponential Data
data = np.concatenate([np.random.exponential(scale=1, size=100),
                       np.random.exponential(scale=10, size=100),
                       np.random.exponential(scale=1, size=100)])
detector = BayesianChangepointDetection(hazard(0.6), Gaussian(0.1, 4, 1, 30))
Pr = [detector.step(x) for x in data]
ax = plot_result(data, [pr.argmax() for pr in Pr])



In [7]:
# Gamma
data = np.concatenate([np.random.gamma(1, size=100),
                       np.random.gamma(10, size=100),
                       np.random.gamma(1, size=100)])
detector = BayesianChangepointDetection(hazard(0.6), Gaussian(0.1, 4, 1, 10))
Pr = [detector.step(x) for x in data]
ax = plot_result(data, [pr.argmax() for pr in Pr])



In [8]:
from data.yahoo import get_stock_prices
import datetime as dt

df = get_stock_prices("GOOG", dt.date.today() - dt.timedelta(days=365))

In [9]:
# Change point detection of Google Stock Price
detector = BayesianChangepointDetection(hazard(0.4), Gaussian(0.1, 600, 1, 2000))
df.run_length = pd.Series({date: detector.step(price).argmax() 
                          for date, price in df.Open.iteritems()})
ax = plot_result(df.Open, df.run_length)



In [10]:
# Change point detection of Google Stock Price Growth

detector = BayesianChangepointDetection(hazard(0.4), Gaussian(0.1, 0.01, 1, 0.005 ** 2))
diff = df.pct_change().dropna()
diff.run_length = pd.Series({date: detector.step(price).argmax() 
                             for date, price in diff.Open.iteritems()})
ax = plot_result(diff.Open, diff.run_length)



In [ ]: