In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn
from __future__ import division
from functools import partial
import matplotlib.cm as cm
import pickle
from keras.preprocessing import image
import bayesian_changepoint_detection.online_changepoint_detection as oncd
import bayesian_changepoint_detection.offline_changepoint_detection as offcd
from scipy.stats import multivariate_normal,t
import pandas as pd
import sys
sys.path.append('../')
from src import feature_extraction
%matplotlib inline
%load_ext autoreload
%autoreload 2
Based on:
In [51]:
def generate_normal_time_series(num, minl=50, maxl=1000):
data = np.array([], dtype=np.float64)
partition = np.random.randint(minl, maxl, num)
for p in partition:
mean = np.random.randn()*2 # new random mean
var = np.random.randn()*1 # new random variance
if var < 0:
var = var * -1
tdata = np.random.normal(mean, var, p)
data = np.concatenate((data, tdata))
return data
In [52]:
np.random.seed(1000)
data = generate_normal_time_series(7, 50, 200)
In [53]:
%time Q, P, Pcp = offcd.offline_changepoint_detection(data, partial(offcd.const_prior, l=(len(data)+1)), offcd.gaussian_obs_log_likelihood, truncate=-40)
In [ ]:
Pcp.shape # need to sum to get the probabilities.
In [55]:
fig, ax = plt.subplots(figsize=[12,8])
ax = fig.add_subplot(2, 1, 1)
ax.plot(data[:])
ax.set_ylabel('data')
ax = fig.add_subplot(2, 1, 2, sharex=ax)
ax.plot(np.exp(Pcp).sum(0))
ax.set_ylabel('p(change-point)')
Out[55]:
In [56]:
import bayesian_changepoint_detection.online_changepoint_detection as oncd
reload(oncd)
%time R, maxes = oncd.online_changepoint_detection(data, partial(oncd.constant_hazard, 250), oncd.StudentT(0.1, .01, 1, 0))
In [57]:
R.shape # are the probabilities over run-lengths
Out[57]:
In [58]:
import matplotlib.cm as cm
fig, ax = plt.subplots(figsize=[12,12])
ax = fig.add_subplot(3, 1, 1)
ax.plot(data)
ax = fig.add_subplot(3, 1, 2, sharex=ax)
sparsity = 5 # only plot every fifth data for faster display
ax.pcolor(np.array(range(0, len(R[:,0]), sparsity)),
np.array(range(0, len(R[:,0]), sparsity)),
-np.log(R[0:-1:sparsity, 0:-1:sparsity]),
cmap=cm.Greys, vmin=0, vmax=30)
ax = fig.add_subplot(3, 1, 3, sharex=ax)
Nw=10;
ax.plot(R[Nw,Nw:-1])
Out[58]:
In [66]:
plt.plot(R[4,:])
Out[66]:
In [3]:
def generate_2D_normal_time_series(number_partitions, minl=50, maxl=1000,dim=2):
partition = np.random.randint(minl, maxl, number_partitions)
data = np.zeros((1,dim))
for p in partition:
mean = np.random.randn(dim)*2 # new random mean
Sigma = np.diag(np.ones(dim)).copy()
tdata = multivariate_normal.rvs(mean, Sigma, size=p)
data = np.vstack((data,tdata.copy()))
data=data[1:,:]
return data,partition.cumsum()[0:-1]
In [21]:
# generate data
np.random.seed(102) # this works
data,partition = generate_2D_normal_time_series(3,10,100)
In [22]:
Q, P, Pcp = offcd.offline_changepoint_detection(data, partial(offcd.const_prior, l=(len(data)+1)), offcd.gaussian_obs_log_likelihood, truncate=-40)
In [31]:
fig, ax = plt.subplots(figsize=[12,8])
ax = fig.add_subplot(2, 1, 1)
ax.plot(data[:])
ax.scatter(partition,np.zeros(len(partition)),c='k') # change points
ax.set_ylabel('data')
ax = fig.add_subplot(2, 1, 2, sharex=ax)
ax.plot(np.exp(Pcp).sum(0))
ax.set_ylabel('p(change-point)')
ax.set_xlabel('time point')
Out[31]:
In [23]:
# fit model
R, maxes = oncd.online_changepoint_detection(data, partial(oncd.constant_hazard, 250), oncd.MV_Norm(mu=np.zeros(2),Sigma=np.diag(np.ones(2)),n=np.array([1.0])))
# sometimes need to re-run because of underflow? there are inf's that appear
In [24]:
R
Out[24]:
In [25]:
# Plot
fig, ax = plt.subplots(figsize=[12,12])
ax = fig.add_subplot(3, 1, 1)
ax.plot(data) # time series
ax.scatter(partition,np.zeros(len(partition)),c='k') # change points
ax = fig.add_subplot(3, 1, 2, sharex=ax)
sparsity = 5 # only plot every fifth data for faster display
ax.pcolor(np.array(range(0, len(R[:,0]), sparsity)),
np.array(range(0, len(R[:,0]), sparsity)),
-np.log(R[0:-1:sparsity, 0:-1:sparsity]),
cmap=cm.Greys, vmin=0, vmax=30)
ax = fig.add_subplot(3, 1, 3, sharex=ax)
Nw=10;
ax.plot(R[Nw,Nw:-1])
Out[25]:
In [4]:
# get features for 2 folders
feature_list_black_imgs = feature_extraction.layer_feature_extraction('../data_img_classes/class_black/')
feature_list_white_imgs = feature_extraction.layer_feature_extraction('../data_img_classes/class_white/')
In [15]:
# concatenate features into a vector (black dresses first, then white dresses)
n_blk_imgs = len(feature_list_black_imgs.keys())
n_white_imgs = len(feature_list_white_imgs.keys())
feature_vec = np.empty((n_blk_imgs+n_white_imgs,2048))
img_files = []
for i,img_file in enumerate(feature_list_black_imgs.keys()+feature_list_white_imgs.keys()):
if i<n_blk_imgs:
feature_vec[i,:]=feature_list_black_imgs[img_file]
else:
feature_vec[i,:]=feature_list_white_imgs[img_file]
img_files.append(img_file)
feature_vec.shape
Out[15]:
In [6]:
n_blk_imgs
Out[6]:
In [7]:
# reduce dimensionality
pca_all = pickle.load(open('../data_nn_features/pca_all_items_sample1000.pkl','rb'))
In [55]:
projected_feature_vec = pca_all.transform(feature_vec)
print(projected_feature_vec.shape)
In [31]:
fig,axes =plt.subplots(1,len(img_files),figsize=(50,4))
for i,img_file in enumerate(img_files):
img = image.load_img(img_file, target_size=(224, 224))
axes[i].imshow(img)
axes[i].set_title('image:'+str(i))
axes[i].get_xaxis().set_visible(False)
axes[i].get_yaxis().set_visible(False)
plt.savefig('../figures/test.png',dpi=300)
In [78]:
reduced_projected_feature_vec = projected_feature_vec[:,0:5]
print(reduced_projected_feature_vec.shape)
input_data=reduced_projected_feature_vec
Q, P, Pcp = offcd.offline_changepoint_detection(input_data, partial(offcd.const_prior, l=(len(input_data)+1)), offcd.gaussian_obs_log_likelihood, truncate=-40)
In [79]:
fig, ax = plt.subplots(2,1,figsize=[12,8])
ax[0].plot(input_data[:])
ax[0].set_ylabel('data')
ax[0].set_xlim(0,input_data.shape[0])
ax[1].plot(np.exp(Pcp).sum(0))
ax[1].set_ylabel('p(change-point)')
ax[1].set_xlim(0,input_data.shape[0])
Out[79]:
In [82]:
print('change point at image {0}').format(np.argmax(np.exp(Pcp).sum(0)))
In [73]:
reduced_projected_feature_vec = projected_feature_vec[:,0:50]
print(reduced_projected_feature_vec.shape)
input_data=reduced_projected_feature_vec
Q, P, Pcp = offcd.offline_changepoint_detection(input_data, partial(offcd.const_prior, l=(len(input_data)+1)), offcd.gaussian_obs_log_likelihood, truncate=-40)
In [74]:
fig, ax = plt.subplots(2,1,figsize=[12,8])
ax[0].plot(input_data[:])
ax[0].set_ylabel('data')
ax[0].set_xlim(0,input_data.shape[0])
ax[1].plot(np.exp(Pcp).sum(0))
ax[1].set_ylabel('p(change-point)')
ax[1].set_xlim(0,input_data.shape[0])
Out[74]:
In [84]:
reduced_projected_feature_vec = projected_feature_vec[:,0:50]
print(reduced_projected_feature_vec.shape)
input_data = reduced_projected_feature_vec
R, maxes = oncd.online_changepoint_detection(input_data, partial(oncd.constant_hazard, 250), oncd.MV_Norm(mu=np.zeros(input_data.shape[1]),Sigma=np.diag(np.ones(input_data.shape[1])),n=np.array([1.0])))
# sometimes have to run this twice due to underflow issues
In [85]:
# Plot
fig, ax = plt.subplots(2,1,figsize=[12,8])
ax[0].plot(input_data) # time series
ax[0].set_xlim(0,input_data.shape[0])
sparsity = 1 # only plot every fifth data for faster display
ax[1].pcolor(np.array(range(0, len(R[:,0]), sparsity)),
np.array(range(0, len(R[:,0]), sparsity)),
-np.log(R[0:-1:sparsity, 0:-1:sparsity]),
cmap=cm.Greys, vmin=0, vmax=30)
ax[1].set_xlim(0,input_data.shape[0])
# ax = fig.add_subplot(3, 1, 3, sharex=ax)
# Nw=5;
# ax.plot(R[Nw,Nw:-1])
Out[85]:
In [97]:
reduced_projected_feature_vec = projected_feature_vec[:,0:5]
print(reduced_projected_feature_vec.shape)
input_data = reduced_projected_feature_vec
R, maxes = oncd.online_changepoint_detection(input_data, partial(oncd.constant_hazard, 250), oncd.MV_Norm(mu=np.zeros(input_data.shape[1]),Sigma=np.diag(np.ones(input_data.shape[1])),n=np.array([1.0])))
# sometimes have to run this twice due to underflow issues
In [98]:
# Plot
fig, ax = plt.subplots(2,1,figsize=[12,8])
ax[0].plot(input_data) # time series
ax[0].set_xlim(0,input_data.shape[0])
sparsity = 1 # only plot every fifth data for faster display
ax[1].pcolor(np.array(range(0, len(R[:,0]), sparsity)),
np.array(range(0, len(R[:,0]), sparsity)),
-np.log(R[0:-1:sparsity, 0:-1:sparsity]),
cmap=cm.Greys, vmin=0, vmax=30)
ax[1].set_xlim(0,input_data.shape[0])
# ax = fig.add_subplot(3, 1, 3, sharex=ax)
# Nw=5;
# ax.plot(R[Nw,Nw:-1])
Out[98]:
In [58]:
# instantiate the model
base_model = ResNet50(include_top=False, weights='imagenet') #this will pull the weights from the folder
# cut the model to lower levels only
model = Model(input=base_model.input, output=base_model.get_layer('avg_pool').output)
In [141]:
user_id = 106144465
#get images
folder = '../data_img_sample_item_view_sequences/'
img_files = glob.glob(folder+'*'+str(user_id)+'*')
print(img_files)
# make features
trajectory_features = np.empty((len(img_files),2048))
for i,img_file in enumerate(img_files):
x,img = preprocess_img(img_file) # preprocess
trajectory_features[i,:] = model.predict(x)[0,0,0,:]
In [142]:
trajectory_features.shape
Out[142]:
In [143]:
# stack example trajectory_features
trajectory_features = np.vstack((trajectory_features,trajectory_features,trajectory_features))
trajectory_features.shape
Out[143]:
In [144]:
projection = pca_all.transform(trajectory_features)
projection.shape
Out[144]:
In [160]:
input_data = trajectory_features
input_data = trajectory_features[:,0:100]
input_data = trajectory_features[:,0:10]
#Q, P, Pcp = offcd.offline_changepoint_detection(input_data, partial(offcd.const_prior, l=(len(input_data)+1)), offcd.gaussian_obs_log_likelihood, truncate=-40)
input_data = projection[:,0:5] # 10 PC's
Q, P, Pcp = offcd.offline_changepoint_detection(input_data, partial(offcd.const_prior, l=(len(input_data)+1)), offcd.gaussian_obs_log_likelihood, truncate=-40)
In [161]:
plt.plot(np.exp(Pcp).sum(0))
plt.ylabel('p(change-point)')
Out[161]:
In [162]:
plt.plot(input_data)
Out[162]:
In [149]:
#R
In [159]:
R, maxes = oncd.online_changepoint_detection(input_data, partial(oncd.constant_hazard,250), oncd.MV_Norm(mu=np.zeros(input_data.shape[1]),Sigma=np.diag(np.ones(input_data.shape[1])),n=np.array([1.0])))
fig, ax = plt.subplots(figsize=[12,12])
ax = fig.add_subplot(3, 1, 2, sharex=ax)
sparsity = 1 # only plot every fifth data for faster display
ax.pcolor(np.array(range(0, len(R[:,0]), sparsity)),
np.array(range(0, len(R[:,0]), sparsity)),
-np.log(R[0:-1:sparsity, 0:-1:sparsity]),
cmap=cm.Greys, vmin=0, vmax=30)
ax = fig.add_subplot(3, 1, 3, sharex=ax)
Nw=4;
ax.plot(R[Nw,Nw:-1])
Out[159]:
In [113]:
print('target class')
plt.figure(figsize=(12,6))
len_seq = len(img_files)
fig,axes = plt.subplots(2,len_seq)
# make color
#color_red_black = pd.Series(red_traj>0).map({False:'k',True:'r'}).as_matrix()
for i in range(len_seq):
img = image.load_img(img_files[i], target_size=(224, 224))
# images
axes[0,i].imshow(img)
axes[0,i].set_xticklabels([])
#axes[0,i].get_xaxis().set_visible(False)
axes[0,i].get_xaxis().set_ticks([])
axes[0,i].get_yaxis().set_visible(False)
if i<(len_seq-1):
axes[0,i].set_xlabel('view '+str(i))
else:
axes[0,i].set_xlabel('buy')
# bar
# axes[1,i].bar(0,red_traj[i],color=color_red_black[i])
# axes[1,i].set_ylim([-10,5])
# axes[1,i].get_xaxis().set_visible(False)
# axes[1,i].axhline(y=0,linestyle='--',color='w')
# if i==0:
# print('here')
# axes[1,i].set_ylabel('red classification')
# else:
# axes[1,i].get_yaxis().set_visible(False)
# sns.despine()
# savefile = '../figures/example_sequence_interpretable_features_ui_'+str(user_id)+'.png'
# plt.savefig(savefile,dpi=300)
In [ ]:
%%bash
jupyter nbconvert --to html Change_Point_Detection_in_Trajectories.ipynb && mv Change_Point_Detection_in_Trajectories.html ../notebook_htmls/Change_Point_Detection_in_Trajectories_v1.html
cp Change_Point_Detection_in_Trajectories.ipynb ../notebook_versions/Change_Point_Detection_in_Trajectories_v1.ipynb