In [ ]:
import os
import re
import time
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
In [ ]:
BASE_PATH = "/Volumes/LaCie/from_macHD/Github/crossing_paper2017"
# BASE_PATH = ".."
Compute the empirical probabilities by averaging across all replications
In [ ]:
def offspring_empirical(Dmnk, levels, laplace=False):
# Get pooled frequencies
Djk = Dmnk[:, levels].sum(axis=1, keepdims=False, dtype=np.float)
Dj = Djk.sum(axis=1, keepdims=True)
# Compute the empirical probabilities
Pjk = Djk / Dj if not laplace else (Djk + 1.0) / (Dj + Djk.shape[1])
levels = np.arange(Dmnk.shape[1], dtype=np.int)[levels]
return levels + 1, np.nanmean(Pjk, axis=0), np.nanstd(Pjk, axis=0)
Get theoretical values of the probability according to the conjectured distribution: $$ Z \sim \text{Geom}\bigl(4^{\frac{1}{2}-\frac{1}{2h}}\bigr) \text{ over } \{2n\,:\,n\geq 1\} \,. $$
For $\theta = 2^{1-h^{-1}}$, the law, once again, is $$ \mathbb{P}(Z=2k) = \theta \cdot (1-\theta)^{k-1}\,. $$
In [ ]:
from math import log
def offspring_prob(Z_max, hurst):
Z = np.arange(2, Z_max, 2)
theta = 2.0 ** (1.0 - 1.0 / hurst)
return Z, theta * np.exp((Z // 2 - 1) * log(1 - theta))
Use the geometric distribution's mean value to estimate the hurst exponent: $$ \mathbb{E} Z = 2 \theta \sum_{k\geq 1} k (1 - \theta)^{k-1} = 2 \theta \sum_{k\geq 1} \sum_{j\geq k} (1 - \theta)^{j-1} = 2 \theta \sum_{k\geq 1} \theta^{-1} (1 - \theta)^{k-1} = 2 \theta^{-1} \,, $$ whence $$ 2^{1-h^{-1}} = \frac{2}{\mathbb{E} Z} \Leftrightarrow h = \frac{\log 2}{\log \mathbb{E}Z}\,. $$
In [ ]:
def offspring_hurst(Dmnk, levels, laplace=False):
# Get pooled frequencies
Dmj = Dmnk[:, levels].sum(axis=2, dtype=np.float)
# Compute the sum of the left-closed tails sums,
# and divide by the total number of offspring.
Mmj = 2 * Dmnk[:, levels, ::-1].cumsum(axis=-1).sum(axis=-1) / Dmj
Hmj = np.log(2) / np.log(Mmj)
levels = np.arange(Dmnk.shape[1], dtype=np.int)[levels]
return levels + 1, np.nanmean(Hmj, axis=0), np.nanstd(Hmj, axis=0)
Create the output folder.
In [ ]:
output_path = os.path.join("../plots", time.strftime("%Y%m%d_%H%M%S"))
if not os.path.exists(output_path):
os.mkdir(output_path)
print(output_path)
Load the experiment manager
In [ ]:
from crossing_tree.manager import ExperimentManager
experiment = ExperimentManager(name_format=re.compile(
r"^(?P<class>[^-]+)"+
r"-(?P<size>\d+)" +
r"-(?P<hurst>(\d*\.)?\d+)" +
r"-(?P<replications>\d+x\d+)" + # r"-(?P<n_batch>\d+)x(?P<n_jobs>\d+)" +
r"_(?:[\d-]+)" + # r"_(?P<dttm>[\d-]+)" +
r".gz$", flags=re.I | re.U))
experiment.update(os.path.join(BASE_PATH, "results/version_2"))
Print the keys of the experiment
In [ ]:
print(experiment.keys_)
Choose a particular instance
In [ ]:
method = "med" # needs bytes encoding
experiments = [# (8388608, "125x8", "FBM", method),
(33554432, "334x3", "FBM", method),
(8388608, "125x8", "HRP2_1", method),
(8388608, "125x8", "HRP3_1", method),
(8388608, "125x8", "HRP4_1", method),
# (524288, "125x8", "HRP2_16", method),
# (524288, "125x8", "HRP3_16", method),
# (524288, "125x8", "HRP4_16", method),
(8388608, "125x8", "WEI_1.2", method),
]
exponents = [0.500, 0.550, 0.600, 0.650, 0.700, 0.750, 0.800, 0.850, 0.900,
0.910, 0.915, 0.920, 0.925, 0.930, 0.935, 0.940, 0.945, 0.950,
0.990]
for label fig:fbm_offspring_distribution
In [ ]:
def figure_01(fig, generator, size, replications, method, p=6, q=7, bars=True, legend=True):
ax = fig.add_subplot(111)
results = experiment[generator, size, :, replications]
data = {float(info_[2]): data_[method] for info_, start_, finish_, seeds_, data_ in results}
color_ = plt.cm.rainbow(np.linspace(0, 1, num=len(exponents)))[::-1]
for col_, hurst_ in zip(color_, exponents):
try:
try:
scale_m, Nmn, Dmnk, Cmnkk, Vmnde, Wmnp, Wavgmn, Wstdmn = data[hurst_]
except ValueError:
scale_m, Nmn, Dmnk, Vmnde, Wmnp, Wavgmn, Wstdmn = data[hurst_]
except KeyError:
continue
levels, Pk_avg, Pk_std = offspring_empirical(Dmnk, slice(p, q), laplace=False)
k, Pk = offspring_prob(2*(Pk_avg.shape[0] + 1), hurst=hurst_)
ax.plot(k, Pk, linestyle='-', color='black', alpha=0.5, zorder=-99)
if bars:
ax.errorbar(k, Pk_avg, yerr=Pk_std, fmt='-s',
color=col_, markersize=3, alpha=1.0,
label="%s %0.3f"%(generator, hurst_))
else:
ax.plot(k, Pk_avg, "-s", color=col_, markersize=3,
alpha=1.0, label="%s %0.3f"%(generator, hurst_))
ax.set_xticks(np.arange(2, 43, 2))
ax.grid(alpha=0.5, linestyle=":", color="grey")
ax.set_xlim(1.9, 12.1)
ax.set_yscale("log", basey=2)
ax.set_ylim(.5e-4, 1.1)
ax.set_ylabel("probability")
ax.set_xlabel("number of offspring")
if legend:
legend_ = ax.legend(loc="lower left", frameon=True,
ncol=2, fontsize=7)
legend_.get_frame() #.set_facecolor("whitesmoke")
Generate a figure-01 for different sizes and numbers of replications.
In [ ]:
p, q = 6, 10 # 5, 8
for experiment_ in experiments:
size, replications, generator, method_ = experiment_
name_ = "fig_01-%d_%s-%s-%d-%s-%s.pdf"%(p, str(q) if isinstance(q, int) else "X",
generator, size, replications, method_,)
fig = plt.figure(figsize=(6, 5))
figure_01(fig, str(generator), str(size), str(replications), method_,
p, q, bars=False, legend=True)
fig.savefig(os.path.join(output_path, name_), format="pdf")
plt.close()
for label fig:fbm_hurst_crossing_tree
In [ ]:
# exponents = [0.5, 0.6, 0.7, 0.8, 0.9]
# exponents = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]
def figure_04(fig, generator, size, replications, method, p=6, q=7, bars=False, legend=True):
ax = fig.add_subplot(111)
results = experiment[generator, size, :, replications]
data = {float(info_[2]): data_[method] for info_, start_, finish_, seeds_, data_ in results}
first_, last_ = np.inf, -np.inf
color_ = plt.cm.rainbow(np.linspace(0, 1, num=len(exponents)))[::-1]
for col_, hurst_ in zip(color_, exponents):
try:
try:
scale_m, Nmn, Dmnk, Cmnkk, Vmnde, Wmnp, Wavgmn, Wstdmn = data[hurst_]
except ValueError:
scale_m, Nmn, Dmnk, Vmnde, Wmnp, Wavgmn, Wstdmn = data[hurst_]
except KeyError:
continue
levels, Hj_avg, Hj_std = offspring_hurst(Dmnk, slice(p, q))
ax.axhline(y=hurst_, color='black', linestyle='-', alpha=0.25, zorder=-99)
mask = Hj_avg < hurst_ * 1.35
if bars:
ax.errorbar(levels[mask], Hj_avg[mask], yerr=Hj_std[mask],
fmt="-s", color=col_, markersize=3, alpha=1.0,
label="%s %0.3f"%(generator, hurst_))
else:
ax.plot(levels[mask], Hj_avg[mask], "-s",
color=col_, markersize=3, alpha=1.0,
label="%s %0.3f"%(generator, hurst_))
first_ = min(levels[mask][0], first_)
last_ = max(levels[mask][-1], last_)
last_ = 20 # min(last_, 20)
ax.set_xticks(np.arange(first_, last_ + 1))
ax.grid(color="grey", linestyle=":", alpha=0.5)
ax.set_xlim(first_ - 0.1, last_ + 1.1)
ax.set_ylim(0.45, 1.01)
## Add a legend with white opaque background.
# ax.set_title( 'Crossing tree estimates of the Hurst exponent' )
ax.set_xlabel("level $\\delta 2^k$")
ax.set_ylabel("$H$")
if legend:
legend_ = ax.legend(loc="lower right", frameon=1,
ncol=2, fontsize=7)
legend_.get_frame() #.set_facecolor("whitesmoke")
Create a figure-04 plot of mean-based hurst estimates
In [ ]:
p, q = 0, None
for experiment_ in experiments:
size, replications, generator, method_ = experiment_
name_ = "fig_04-%d_%s-%s-%d-%s-%s.pdf"%(p, str(q) if isinstance(q, int) else "X",
generator, size, replications, method_,)
fig = plt.figure(figsize=(6, 5))
figure_04(fig, str(generator), str(size), str(replications), method_,
p, q, bars=False, legend=True)
fig.savefig(os.path.join(output_path, name_), format="pdf")
plt.close()
for label fig:fbm_avg_crossing_durations
In [ ]:
# exponents = [0.5, 0.6, 0.7, 0.8, 0.9]
# exponents = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]
def figure_08(fig, generator, size, replications, method, bars=False, legend=True):
ax = fig.add_subplot(111)
results = experiment[generator, size, :, replications]
data = {float(info_[2]): data_[method] for info_, start_, finish_, seeds_, data_ in results}
color_ = plt.cm.rainbow(np.linspace(0, 1, num=len(exponents)))[::-1]
for col_, hurst_ in zip(color_, exponents):
try:
try:
scale_m, Nmn, Dmnk, Cmnkk, Vmnde, Wmnp, Wavgmn, Wstdmn = data[hurst_]
except ValueError:
scale_m, Nmn, Dmnk, Vmnde, Wmnp, Wavgmn, Wstdmn = data[hurst_]
except KeyError:
continue
level = np.arange(Wavgmn.shape[-1], dtype=np.float128)
scale_ = (2 ** (-level / hurst_))
# scale_ *= (2 * hurst_ - 1) * 2 * hurst_
Wavgn_ = np.nanmean(Wavgmn / (scale_m[:, np.newaxis] ** (1 / hurst_)), axis=0) * scale_
if bars:
Wstdn_ = np.nanstd(Wavgmn / (scale_m[:, np.newaxis] ** (1 / hurst_)), axis=0) * scale_
ax.errorbar(1+level, Wavgn_, yerr=Wstdn_, fmt="-s", color=col_,
markersize=3, alpha=1.0, label="%s %0.3f"%(generator, hurst_))
else:
ax.plot(1+level, Wavgn_, "-s", color=col_, markersize=3,
alpha=1.0, label="%s %0.3f"%(generator, hurst_))
ax.set_xticks(range(1, 21))
ax.grid(color="grey", linestyle=":", alpha=0.5)
ax.set_yscale("log", basey=2)
ax.set_xlim(0.9, 20.1)
ax.set_xlabel("level")
ax.set_ylabel("$\\left(\\delta 2^n \\right)^{-H^{-1}} {\\mathbb{E}W^n}$")
if legend:
legend_ = ax.legend(loc="lower left", frameon=1,
ncol=3, fontsize=7)
legend_.get_frame() #.set_facecolor("whitesmoke")
Create a figure-08 plot of scaled average crossing durations.
In [ ]:
for experiment_ in experiments:
size, replications, generator, method_ = experiment_
name_ = "fig_08-%s-%d-%s-%s.pdf"%(generator, size, replications, method_,)
fig = plt.figure(figsize=(6, 5))
figure_08(fig, str(generator), str(size), str(replications), method_,
bars=False, legend=True)
fig.savefig(os.path.join(output_path, name_), format="pdf")
plt.close()
For the table tab:avg_offspring showing the average number
of offspring at each tree level.
In [ ]:
from math import floor
full_table = list()
for experiment_ in experiments:
size, replications, generator, method = experiment_
results = experiment[str(generator), str(size), :, str(replications)]
data = {float(info_[2]): data_[method] for info_, start_, finish_, seeds_, data_ in results}
table = list()
for hurst_ in exponents:
try:
try:
scale_m, Nmn, Dmnk, Cmnkk, Vmnde, Wmnp, Wavgmn, Wstdmn = data[hurst_]
except ValueError:
scale_m, Nmn, Dmnk, Vmnde, Wmnp, Wavgmn, Wstdmn = data[hurst_]
except KeyError:
continue
# Compute the average number of offspring and the standard deviation
# df_ = pd.DataFrame(dict(average=Nmn.mean(axis=0), std=Nmn.std(axis=0)),
# index=pd.RangeIndex(stop=Nmn.shape[1],name='Level'))
df_ = pd.Series(["$%1.1f\pm%0.2f\\%%$"%(m/1000, 100*s/m) if floor(m/100) > 0 else "--"
for m, s in zip(Nmn.mean(axis=0), Nmn.std(axis=0))],
index=pd.RangeIndex(stop=Nmn.shape[1],name='Level'), name=hurst_)
table.append((hurst_, df_))
table = pd.concat([tab_ for hurst_, tab_ in table], axis=1,
keys=[hurst_ for hurst_, tab_ in table], names=["hurst"])
full_table.append((experiment_, table))
table = pd.concat([tab_ for hurst_, tab_ in full_table], axis=1, join="inner",
keys=[hurst_ for hurst_, tab_ in full_table],
names=["size", "replications", "generator", "method"])
Might want to use \usepackage{booktabs} or \usepackage{lscape}
Output .tex files with name format "tab_01-%s-%0.3f.tex"%(method, hurst,)
In [ ]:
for hurst_ in exponents:
name_ = "tab_01-%s-%0.3f.tex"%(method_, hurst_,)
out_ = table.xs(method, axis=1, level=3).xs(hurst_, axis=1, level=-1)
out_.columns = out_.columns.droplevel(0).droplevel(0)
# .style.format({"average":"{:1.0f}", "std":"±{:1.0f}"})
body_ = out_.to_latex(escape=False, na_rep="--", bold_rows=True)\
.replace("_", "\\_")
body_ += """\\caption{The average number of offspring at each level (in\n"""\
""" thousands; $\\pm$1 std. dev. in percent) for processes\n"""\
""" with $H=%0.3f$.} \n"""%(hurst_,)
body_ += """\\label{tab:avg_offspring_%0.3f}\n"""%(hurst_,)
with open(os.path.join(output_path, name_), "w") as fout_:
fout_.write(body_)
Take the average crossing duration at each level
$(\delta 2^n)^{-H^{-1}} \mathbb{E}W^n = 2^{f(H, d)}$
$\log_2 \mathbb{E}W^n = f(H, d) + \frac1H \log_2 \delta + \frac{n}{H}$
$\log_2 (\delta 2^n)^{-H^{-1}} \mathbb{E}W^n = f(H, d)$
$F(H, d) = d \bigl(H - \frac12\bigr)^{-\frac2{d}}$
In [ ]:
selector = np.s_[:12]
levels_ = np.r_[selector].astype(float)
log2ed_list = []
check_list_ = [
(33554432, "334x3", "FBM", 1.0),
(8388608, "125x8", "HRP2_1", 2.0),
(8388608, "125x8", "HRP3_1", 3.0),
(8388608, "125x8", "HRP4_1", 4.0),
]
for size, replications, name, degree in check_list_:
results = experiment[name, str(size), :, str(replications)]
data = {float(info_[2]): data_[method]
for info_, start_, finish_, seeds_, data_ in results
if float(info_[2]) > 0.5}
slices_ = {hurst_: (res_[0], res_[-2][:, selector]) for hurst_, res_ in data.items()}
log2ed_ = np.stack([(np.log2(dur_) - (np.log2(delta_[:, np.newaxis]) + levels_) / hurst_).mean(axis=0)
for hurst_, (delta_, dur_) in slices_.items()], axis=0)
hursts_ = np.array([*slices_.keys()])[:, np.newaxis]
order_ = hursts_.argsort(axis=0)[:, 0]
hursts_ = hursts_[order_]
log2ed_ = log2ed_[order_]
log2ed_ /= (1.5 - hursts_)
# h0_ = (hursts_ - 1) / degree + 1
# log2ed_ /= (1.5 - h0_)
log2ed_list.append(log2ed_)
# log2ed_ /= ((hursts_ - 1) / degree + 0.5) ** (-2 / float(degree))
In [ ]:
log2ed_ = np.stack(log2ed_list, axis=0).mean(axis=-1)
In [ ]:
plt.plot(hursts_, log2ed_[0], "r") # d - 1
plt.plot(hursts_, log2ed_[1], "g") # d - 1 - 0
plt.plot(hursts_, log2ed_[2], "b") # d - 1 - 0.75
plt.plot(hursts_, log2ed_[3], "k")# d - 1 -
In [ ]:
dlog2ed_ = np.diff(log2ed_, axis=-1) / np.diff(hursts_.T, axis=-1)
In [ ]:
dlog2ed_[:, :-2].mean(axis=-1)
In [ ]:
plt.plot(hursts_[1:], dlog2ed_[0], "r")
plt.plot(hursts_[1:], dlog2ed_[1], "g")
plt.plot(hursts_[1:], dlog2ed_[2], "b")
plt.plot(hursts_[1:], dlog2ed_[3], "k")
In [ ]:
plt.plot(hursts_[1:], dlog2ed_[0] + 0, "r") # d - 1
plt.plot(hursts_[1:], dlog2ed_[1] + 18.5, "g") # d - 1 - 0
plt.plot(hursts_[1:], dlog2ed_[2] + 25, "b") # d - 1 - 0.75
plt.plot(hursts_[1:], dlog2ed_[3] + 28.25, "k") # d - 1 -
In [ ]:
fig = plt.figure(figsize=(16, 9))
ax = fig.add_subplot(111)
color_ = plt.cm.rainbow(np.linspace(0, 1, num=5))[::-1]
for hurst_, Wavgmn in slices_.items():
ax.hist(np.log2(Wavgmn[:, 0]),
bins=200, alpha=0.5, lw=0, normed=True, color="red")
# for level, (Wavgn, col_) in enumerate(zip(Wavgmn.T, color_), 7):
# ax.hist(np.log2(Wavgn) - (float(level) / hurst_)**(1-hurst_),
# bins=200, alpha=0.5, lw=0, normed=True, color=col_)
In [ ]:
log_Wavghn = np.stack([np.nanmean(np.diff(np.log2(Wavgmn), axis=1), axis=0)
for hurst_, Wavgmn in slices_.items()])
hursts_ = np.array(slices_.keys())
In [ ]:
log_Wavghn.shape
In [ ]:
plt.plot(hursts_[np.newaxis, :] * log_Wavghn.T)
In [ ]:
colors_ = plt.cm.rainbow_r(np.linspace(0, 1, num=log_Wavghn.shape[1]))
for col_, log_Wavgh in zip(colors_, log_Wavghn.T):
plt.scatter(hursts_, log_Wavgh * hursts_, lw=0, color=col_, alpha=0.5);
In [ ]:
log_Wavgh * hursts_ - 1
In [ ]:
y = np.log2(np.diff(log_Wavghn, axis=1).mean(axis=1))
X = hursts_
In [ ]:
1.0 / (np.diff(y) / np.diff(X))
In [ ]:
1.0 / np.diff(log_Wavghn, axis=1).mean(axis=1) - hursts_
In [ ]:
plt.scatter(hursts_, np.diff(log_Wavghn, axis=1).mean(axis=1) - 1.0 / hursts_)
In [ ]:
plt.scatter(hursts_, np.log2(np.diff(log_Wavghn, axis=1).mean(axis=1)))
In [ ]:
plt.plot(np.diff(log_Wavghn, axis=1).T)
In [ ]:
# / hursts_[np.newaxis]