This notebook uses uncompressed data from here. Namely the datasets: BC-pAug89 and BC-pOct89.
Description:
The files whose names end in TL are ASCII-format tracing data, consisting of one 20-byte line per Ethernet packet arrival. Each line contains a floating- point time stamp (representing the time in seconds since the start of a trace) and an integer length (representing the Ethernet data length in bytes). Although the times are expressed to 6 places after the decimal point, giving the appearance of microsecond resolution, the hardware clock had an actual resolution of 4 microseconds. Our testing of the entire monitor suggests that jitter in the inner code loop and (much more seriously) bus contention limited the actual accuracy to roughly 10 microseconds. The length field does not include the Ethernet preamble, header, or CRC; however, the Ethernet protocol forces all packets to have at least the minimum size of 64 bytes and at most the maximum size of 1518 bytes. 99.5% of the encapsulated packets carried by the Ethernet PDUs were IP. All traces were conducted on an Ethernet cable at the Bellcore Morristown Research and Engineering facility, building MRE-2. At that time, the Ethernet cable nicknamed the "purple cable" carried not only a major portion of our Lab's traffic but also all traffic to and from the internet and all of Bellcore. The records include all complete packets (the monitor did not artificially "clip" traffic bursts), but do not include any fragments or collisions. These samples are excerpts from approximately 300 million arrivals recorded; the complete trace records included Ethernet status flags, the Ethernet source and destination, and the first 60 bytes of each encapsulated packet (allowing identification of higher-level protocols, IP source and destination fields, and so on).
In [ ]:
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
Load the data with numpy.
In [ ]:
pAug89_t, pAug89_x = np.loadtxt("./BC-pAug89.TL.gz", unpack=True)
pOct89_t, pOct89_x = np.loadtxt("./BC-pOct89.TL.gz", unpack=True)
Estimate the mean packet size and produce a random walk without the drift.
In [ ]:
drift = pAug89_x.mean()
T, X = pAug89_t.copy(), (pAug89_x - drift).cumsum()
# drift = pOct89_x.mean()
# T, X = pOct89_t.copy(), (pOct89_x - drift).cumsum()
print("%0.4f" % drift)
Construct the crossing tree for the traffic data
In [ ]:
from crossing_tree import crossing_tree
## Set the base scale to the median
scale = np.median(np.abs(np.diff(X)))
origin = X[0]
## Build a crossing tree
xi, ti, offspring, Vnk, Znk, Wnk = crossing_tree(X, T, scale, origin=origin)
# Rebuild the tree
index = list([offspring[0]])
for index_ in offspring[1:]:
index.append(index[-1][index_])
Xnk = [xi.base[index_] for index_ in index]
Tnk = [ti.base[index_] for index_ in index]
Plot the crossing times for the last 4 levels of the tree.
In [ ]:
l = len(Tnk) - 2
levels = np.arange(l-4, l+1, dtype=np.int)
## Plot the sample path
fig = plt.figure(figsize=(6, 5))
ax = fig.add_subplot(111)
ax.set_xticks(Tnk[levels[0]], minor=True)
delta = 2 * scale * (1 << levels[0])
xm, xM = (Xnk[levels[0]] - origin).min() / delta, (Xnk[levels[0]] - origin).max() / delta
ax.set_yticks(origin + np.arange(xm-1, xM+2) * delta)
ax.plot(T, X, linestyle='-', color='gray', label='X(t)', alpha=0.5)
color=plt.cm.rainbow_r(np.linspace(0, 1, len(levels)))
for j, col_ in zip(levels, color):
ax.plot(Tnk[j], Xnk[j], '-s', color=col_, markersize=4, alpha=0.75)
ax.set_xlim(left=-50)
ax.grid(color='k', linestyle='-', alpha=0.15, zorder=-99)
Plot the crossing tree
In [ ]:
fig = plt.figure(figsize=(6, 5))
ax = fig.add_subplot(111)
ax.set_xticks(Tnk[levels[0]], minor=True)
colors = plt.cm.rainbow_r(np.linspace(0, 1, len(levels)))
for j, col_ in zip(levels, colors):
lht0, lht1 = Tnk[j], Tnk[j+1]
offs_ = offspring[j+1]
parent = np.repeat(np.arange(len(offs_) - 1), np.diff(offs_))
parent = np.r_[parent, np.repeat(len(offs_) - 1, len(lht0) - offs_[-1])]
p_ti = np.r_[np.repeat(np.nan, offs_[0]), lht1[parent]]
## Draw the line segments between two levels
delta = (1 << j)
ax.plot([p_ti, lht0], [len(lht0) * [2 * delta], len(lht0) * [delta]],
'-s', color=col_, markersize=2, lw=.5)
ax.grid(color='k', linestyle='-', alpha=0.05, zorder=-99)
ax.set_yscale("log", basey=2)
ax.set_ylim(0.9 * (1 << levels[0]), 1.1 * (1 << levels[-1] + 1))
ax.set_xlim(left=-50)
ax.set_ylabel(r"$\delta \times 2^k$")
Get the structural statistics of the crossing tree for de-drifted traffic data.
In [ ]:
from crossing_tree import structural_statistics
scale = np.median(np.abs(np.diff(X)))
scale, Nn, Dnk, Cnkk, Vnde, Wnp, Wavgn, Wstdn = structural_statistics(X, T, scale, origin=X[0])
Estimate the hurst exponent based on the offspring distribution.
In [ ]:
def offspring_hurst(Dmnk, levels, laplace=False):
# Get pooled frequencies
Dmj = Dmnk[:, levels].sum(axis=2, dtype=np.float)
# Compute the sum of the left-closed tails sums,
# and divide by the total number of offspring.
Mmj = 2 * Dmnk[:, levels, ::-1].cumsum(axis=-1).sum(axis=-1) / Dmj
Hmj = np.log(2) / np.log(Mmj)
levels = np.arange(Dmnk.shape[1], dtype=np.int)[levels]
return levels + 1, np.nanmean(Hmj, axis=0), np.nanstd(Hmj, axis=0)
Plot the hurst exponents.
In [ ]:
levels, Hj_avg, Hj_std = offspring_hurst(Dnk[np.newaxis], slice(0, None))
plt.plot(levels, Hj_avg)
Try to make a sliding estimate of the hurst exponent with the tree. An estimator that compute rolling crossing tree statistics for a sample path.
In [ ]:
from joblib import Parallel, delayed
from numpy.lib.stride_tricks import as_strided
from crossing_tree import collect_structural_statistics
def _dewindow(arr, width, stride, ravel=True):
n_steps = width // stride
padded_ = np.pad(arr, (n_steps - 1, n_steps - 1), mode="edge")
arr_ = as_strided(padded_, shape=(padded_.shape[0] - n_steps + 1, n_steps),
strides=(arr.strides[0], arr.strides[0]))
return as_strided(arr_.mean(axis=-1), shape=(len(arr_), stride),
strides=(arr.strides[0], 0)).ravel()
def _strided_window(arr, width, stride):
n_steps = (arr.shape[0] - window - 1) // stride
return as_strided(arr, shape=(1 + n_steps, window,),
strides=(stride * arr.strides[0], arr.strides[0],))
def rolling_tree(T, X, window=1 << 15, stride=1 << 10, common_scale=True,
n_jobs=1, verbose=0):
path_windows = zip(_strided_window(T, window, stride),
_strided_window(X, window, stride))
structural_statistics_ = delayed(structural_statistics, check_pickle=False)
if common_scale:
scale = np.median(np.abs(np.diff(X)))
# scale = np.diff(X).std()
trees_ = (structural_statistics_(xx, tt, scale, origin=xx[0])
for tt, xx in path_windows)
else:
trees_ = (structural_statistics_(xx, tt, scale=np.median(np.abs(np.diff(xx))),
origin=xx[0])
for tt, xx in path_windows)
# trees_ = (structural_statistics_(xx, tt, scale=np.diff(xx).std(), origin=xx[0])
# for tt, xx in path_windows)
par_ = Parallel(n_jobs=n_jobs, verbose=verbose, max_nbytes=None)
return collect_structural_statistics(par_(trees_))
Test on the fractional brownian motion process.
In [ ]:
from crossing_tree.processes import FractionalBrownianMotion
FBM = FractionalBrownianMotion(N=1 << 23, hurst=0.5, n_threads=4, random_state=1234)
FBM.start()
Draw and plot a sample path of BM.
In [ ]:
T, X = FBM.draw()
plt.plot(T, X)
Estimate the base scale and get the structural statistics of the path.
In [ ]:
scale = np.median(np.abs(np.diff(X)))
results = structural_statistics(X, T, scale=scale, origin=X[0])
scale, Nn, Dnk, Cnkk, Vnde, Wnp, Wavgn, Wstdn = results
$ \log_2 \mathbb{E} W^n - \frac{1}{H} (n + \log_2 \delta) = f(H, d) \,. $
In [ ]:
log2ed_ = np.log2(Wavgn) - (np.log2(scale) + np.arange(Wavgn.shape[0], dtype=float)) / 0.5
In [ ]:
plt.plot(1.0 / np.diff(np.log2(Wavgn)))
Draw
In [ ]:
T, X = FBM.draw()
window, stride = 1 << 15, 1 << 11
result_test = rolling_tree(T, X, window=window, stride=stride,
common_scale=False, n_jobs=-1, verbose=10)
scale_m, Nmn, Dmnk, Cmnkk, Vmnde, Wmnp, Wavgmn, Wstdmn = result_test
Hmj = np.stack([offspring_hurst(Dnk[np.newaxis], slice(None, -4))[1] for Dnk in Dmnk])
hurst_ = np.nanmean(1.0 / np.diff(np.log2(Wavgmn[:, 2:-4]), axis=-1), axis=-1)
plt.plot(np.nanmean(Hmj, axis=-1), "-k", markersize=3)
plt.plot(hurst_, "-r", markersize=3)
In [ ]:
try:
from l1tf import l1_filter
l1_hurst_ = l1_filter(hurst_, C=1e-2, relative=True)
len_ = (hurst_.shape[0] + window // stride - 1) * stride
fig = plt.figure(figsize=(16, 6))
ax = fig.add_subplot(121)
ax.plot(T, X)
# ax.plot(hurst_, "k", alpha=0.25, lw=2)
# ax.plot(l1_hurst_, "r", alpha=1.0)
ax = fig.add_subplot(122)
ax.plot(T[:len_], _dewindow(hurst_, window, stride), "k", alpha=0.25, lw=2)
ax.plot(T[:len_], _dewindow(l1_hurst_, window, stride), "r", alpha=1.0)
except ImportError:
print("Please install L1-trend filter python wrapper from https://github.com/ivannz/l1_tf")
pass
In [ ]:
plt.plot(np.log2(Nmn) / (1 + np.arange(Nmn.shape[1])[np.newaxis, ::-1]))
Compute for pAug89
In [ ]:
drift = pAug89_x.mean()
T, X = pAug89_t.copy(), (pAug89_x - drift).cumsum()
Make a sliding crossing tree
In [ ]:
window, stride = 1 << 15, 1 << 11
result = rolling_tree(T, X, window=window, stride=stride,
common_scale=False, n_jobs=8, verbose=10)
scale_m, Nmn, Dmnk, Cmnkk, Vmnde, Wmnp, Wavgmn, Wstdmn = result
Use regression estimates of the hurst exponent
In [ ]:
hurst_ = 1.0 / np.nanmean(np.diff(np.log2(Wavgmn[:, 2:-4]), axis=-1), axis=-1)
In [ ]:
try:
from l1tf import l1_filter
l1_hurst_ = l1_filter(hurst_, C=1e-1)
len_ = (hurst_.shape[0] + window // stride - 1) * stride
fig = plt.figure(figsize=(16, 6))
ax = fig.add_subplot(121)
ax.plot(T, X)
# ax.plot(hurst_, "k", alpha=0.25, lw=2)
# ax.plot(l1_filter(hurst_, C=1e-1), "r", alpha=1.0)
ax = fig.add_subplot(122)
ax.plot(T[:len_], _dewindow(hurst_, window, stride), "k", alpha=0.25, lw=2)
ax.plot(T[:len_], _dewindow(l1_hurst_, window, stride), "r", alpha=1.0)
except ImportError:
print("Please install L1-trend filter python wrapper from https://github.com/ivannz/l1_tf")
pass
Now derive an estimated based on the heuristic scaling properties.
In [ ]:
Hmj = np.stack([offspring_hurst(Dnk[np.newaxis], slice(0, -4))[1] for Dnk in Dmnk])
Compare
In [ ]:
# plt.plot(np.nanmean(Hmj, axis=-1), "-sk", markersize=3)
# plt.plot(hurst_, "-^k", markersize=3)
plt.plot(np.nanmean(Hmj, axis=-1), "k", alpha=0.5)
plt.plot(hurst_, "r", alpha=0.5)
In [ ]:
plt.plot(np.nanmean(Hmj[:, 2:-6], axis=-1))
In [ ]:
plt.plot(np.log2(Nmn) / (1 + np.arange(Nmn.shape[1])[np.newaxis, ::-1]))
Compute for pOct89
In [ ]:
drift = pOct89_x.mean()
T, X = pOct89_t.copy(), (pOct89_x - drift).cumsum()
plt.plot(T, X)
In [ ]:
window, stride = 1 << 15, 1 << 11
result = rolling_tree(T, X, window=window, stride=stride,
common_scale=False, n_jobs=8, verbose=10)
scale_m, Nmn, Dmnk, Cmnkk, Vmnde, Wmnp, Wavgmn, Wstdmn = result
hurst_ = 1.0 / np.nanmean(np.diff(np.log2(Wavgmn[:, 2:-4]), axis=-1), axis=-1)
In [ ]:
try:
from l1tf import l1_filter
l1_hurst_ = l1_filter(hurst_, C=1e-2, relative=True)
len_ = (hurst_.shape[0] + window // stride - 1) * stride
fig = plt.figure(figsize=(16, 6))
ax = fig.add_subplot(121)
ax.plot(T, X)
# ax.plot(hurst_, "k", alpha=0.25, lw=2)
# ax.plot(l1_hurst_, "r", alpha=1.0)
ax = fig.add_subplot(122)
ax.plot(T[:len_], _dewindow(hurst_, window, stride), "k", alpha=0.25, lw=2)
ax.plot(T[:len_], _dewindow(l1_hurst_, window, stride), "r", alpha=1.0)
except ImportError:
print("Please install L1-trend filter python wrapper from https://github.com/ivannz/l1_tf")
pass
In [ ]: