This is a personal project around the causal impact analysis of grey disruptions using windowed PCA. Sky is the limit
In [2]:
%matplotlib inline
import os
import sys
import pandas as pd
import pandas_datareader.data as web
import numpy as np
import statsmodels.formula.api as smf
import statsmodels.tsa.api as smt
import statsmodels.api as sm
import scipy.stats as scs
from arch import arch_model
import matplotlib.pyplot as plt
import matplotlib as mpl
#print('Machine: {}\n'.format(os.uname()))
#print(sys.version)
def tsplot(title, y, lags=None, figsize=(10, 8), style='bmh'):
if not isinstance(y, pd.Series):
y = pd.Series(y)
with plt.style.context(style):
fig = plt.figure(figsize=figsize)
#mpl.rcParams['font.family'] = 'Ubuntu Mono'
layout = (3, 2)
ts_ax = plt.subplot2grid(layout, (0, 0), colspan=2)
acf_ax = plt.subplot2grid(layout, (1, 0))
pacf_ax = plt.subplot2grid(layout, (1, 1))
qq_ax = plt.subplot2grid(layout, (2, 0))
pp_ax = plt.subplot2grid(layout, (2, 1))
y.plot(ax=ts_ax)
title_with_infos="{:s} $(\mu={:.3f}, \sigma={:.3f})$".format(title,y.mean(),y.std())
ts_ax.set_title(title_with_infos)
smt.graphics.plot_acf(y, lags=lags, ax=acf_ax, alpha=0.5)
smt.graphics.plot_pacf(y, lags=lags, ax=pacf_ax, alpha=0.5)
sm.qqplot(y, line='s', ax=qq_ax)
qq_ax.set_title('QQ Plot')
scs.probplot(y, sparams=(y.mean(), y.std()), plot=pp_ax)
plt.tight_layout()
#print("@tsplot(); $\mu={:.3f}$, variance={:.3f}, sigma={:.3f}".format(y.mean(), y.var(), y.std()))
return
In [3]:
np.random.seed(1)
n_samples = 1000
# plot of discrete white noise
randser = np.random.normal(size=n_samples)
tsplot('Discrete white noise', randser, lags=30)
In [4]:
x = w = randser
for t in range(n_samples):
x[t] = x[t-1] + w[t]
_ = tsplot('Random walk', x, lags=30)
_ = tsplot('First difference of random walk', np.diff(x), lags=30)
In [26]:
df = pd.read_csv('../Specter/loganalyzer/resources/cache/cxseed/scheduling.schedule.builder.CatchupAsOfTodaySimulation/nmon/rawdata/cxseed-a-back.csv')
df.plot(x='Time', y=['NETETOTAL total-KB/s','NETETOTAL total-read-KB/s','NETETOTAL total-write-KB/s'], style='o-')
Out[26]:
In [24]:
#pd.set_option('display.max_rows', len(df))
#print(df[['NETETOTAL total-KB/s','NETETOTAL total-read-KB/s','NETETOTAL total-write-KB/s']])
#list(df.columns.values)
In [ ]: