Plots and analysis


In [ ]:
import os
import re
import time
import numpy as np
import pandas as pd

%matplotlib inline
import matplotlib.pyplot as plt

In [ ]:
BASE_PATH = "/Volumes/LaCie/from_macHD/Github/crossing_paper2017"
# BASE_PATH = ".."

Compute the empirical probabilities by averaging across all replications


In [ ]:
def offspring_empirical(Dmnk, levels, laplace=False):
    # Get pooled frequencies
    Djk = Dmnk[:, levels].sum(axis=1, keepdims=False, dtype=np.float)
    Dj = Djk.sum(axis=1, keepdims=True)

    # Compute the empirical probabilities
    Pjk = Djk / Dj if not laplace else (Djk + 1.0) / (Dj + Djk.shape[1])

    levels = np.arange(Dmnk.shape[1], dtype=np.int)[levels]
    return levels + 1, np.nanmean(Pjk, axis=0), np.nanstd(Pjk, axis=0)

Get theoretical values of the probability according to the conjectured distribution: $$ Z \sim \text{Geom}\bigl(4^{\frac{1}{2}-\frac{1}{2h}}\bigr) \text{ over } \{2n\,:\,n\geq 1\} \,. $$

For $\theta = 2^{1-h^{-1}}$, the law, once again, is $$ \mathbb{P}(Z=2k) = \theta \cdot (1-\theta)^{k-1}\,. $$


In [ ]:
from math import log

def offspring_prob(Z_max, hurst):
    Z = np.arange(2, Z_max, 2)
    theta = 2.0 ** (1.0 - 1.0 / hurst)
    return Z, theta * np.exp((Z // 2 - 1) * log(1 - theta))

Use the geometric distribution's mean value to estimate the hurst exponent: $$ \mathbb{E} Z = 2 \theta \sum_{k\geq 1} k (1 - \theta)^{k-1} = 2 \theta \sum_{k\geq 1} \sum_{j\geq k} (1 - \theta)^{j-1} = 2 \theta \sum_{k\geq 1} \theta^{-1} (1 - \theta)^{k-1} = 2 \theta^{-1} \,, $$ whence $$ 2^{1-h^{-1}} = \frac{2}{\mathbb{E} Z} \Leftrightarrow h = \frac{\log 2}{\log \mathbb{E}Z}\,. $$


In [ ]:
def offspring_hurst(Dmnk, levels, laplace=False):
    # Get pooled frequencies
    Dmj = Dmnk[:, levels].sum(axis=2, dtype=np.float)

    # Compute the sum of the left-closed tails sums,
    #  and divide by the total number of offspring.
    Mmj = 2 * Dmnk[:, levels, ::-1].cumsum(axis=-1).sum(axis=-1) / Dmj
    Hmj = np.log(2) / np.log(Mmj)

    levels = np.arange(Dmnk.shape[1], dtype=np.int)[levels]
    return levels + 1, np.nanmean(Hmj, axis=0), np.nanstd(Hmj, axis=0)

Experiments

Create the output folder.


In [ ]:
output_path = os.path.join("../plots", time.strftime("%Y%m%d_%H%M%S"))

if not os.path.exists(output_path):
    os.mkdir(output_path)

print(output_path)

Load the experiment manager


In [ ]:
from crossing_tree.manager import ExperimentManager

experiment = ExperimentManager(name_format=re.compile(
        r"^(?P<class>[^-]+)"+
        r"-(?P<size>\d+)" +
        r"-(?P<hurst>(\d*\.)?\d+)" +
        r"-(?P<replications>\d+x\d+)" + # r"-(?P<n_batch>\d+)x(?P<n_jobs>\d+)" +
        r"_(?:[\d-]+)" + # r"_(?P<dttm>[\d-]+)" +
        r".gz$", flags=re.I | re.U))

experiment.update(os.path.join(BASE_PATH, "results/version_2"))

Print the keys of the experiment


In [ ]:
print(experiment.keys_)

Choose a particular instance


In [ ]:
method = "med"  # needs bytes encoding

experiments = [# (8388608, "125x8", "FBM", method),
               (33554432, "334x3", "FBM", method),
               (8388608, "125x8", "HRP2_1", method),
               (8388608, "125x8", "HRP3_1", method),
               (8388608, "125x8", "HRP4_1", method),
               # (524288, "125x8", "HRP2_16", method),
               # (524288, "125x8", "HRP3_16", method),
               # (524288, "125x8", "HRP4_16", method),
               (8388608, "125x8", "WEI_1.2", method),
              ]

exponents = [0.500, 0.550, 0.600, 0.650, 0.700, 0.750, 0.800, 0.850, 0.900,
             0.910, 0.915, 0.920, 0.925, 0.930, 0.935, 0.940, 0.945, 0.950,
             0.990]

FIGURE 01

for label fig:fbm_offspring_distribution


In [ ]:
def figure_01(fig, generator, size, replications, method, p=6, q=7, bars=True, legend=True):
    ax = fig.add_subplot(111)

    results = experiment[generator, size, :, replications]
    data = {float(info_[2]): data_[method] for info_, start_, finish_, seeds_, data_ in results}

    color_ = plt.cm.rainbow(np.linspace(0, 1, num=len(exponents)))[::-1]
    for col_, hurst_ in zip(color_, exponents):
        try:
            try:
                scale_m, Nmn, Dmnk, Cmnkk, Vmnde, Wmnp, Wavgmn, Wstdmn = data[hurst_]
            except ValueError: 
                scale_m, Nmn, Dmnk, Vmnde, Wmnp, Wavgmn, Wstdmn = data[hurst_]
        except KeyError:
            continue

        levels, Pk_avg, Pk_std = offspring_empirical(Dmnk, slice(p, q), laplace=False)
        k, Pk = offspring_prob(2*(Pk_avg.shape[0] + 1), hurst=hurst_)
        ax.plot(k, Pk, linestyle='-', color='black', alpha=0.5, zorder=-99)
        if bars:
            ax.errorbar(k, Pk_avg, yerr=Pk_std, fmt='-s',
                        color=col_, markersize=3, alpha=1.0,
                        label="%s %0.3f"%(generator, hurst_))
        else:
            ax.plot(k, Pk_avg, "-s", color=col_, markersize=3,
                    alpha=1.0, label="%s %0.3f"%(generator, hurst_))

    ax.set_xticks(np.arange(2, 43, 2))
    ax.grid(alpha=0.5, linestyle=":", color="grey")
    ax.set_xlim(1.9, 12.1)
    ax.set_yscale("log", basey=2)
    ax.set_ylim(.5e-4, 1.1)

    ax.set_ylabel("probability")
    ax.set_xlabel("number of offspring")
    
    if legend:
        legend_ = ax.legend(loc="lower left", frameon=True,
                            ncol=2, fontsize=7)
        legend_.get_frame() #.set_facecolor("whitesmoke")

Generate a figure-01 for different sizes and numbers of replications.


In [ ]:
p, q = 6, 10 # 5, 8
for experiment_ in experiments:
    size, replications, generator, method_ = experiment_
    name_ = "fig_01-%d_%s-%s-%d-%s-%s.pdf"%(p, str(q) if isinstance(q, int) else "X",
                                               generator, size, replications, method_,)

    fig = plt.figure(figsize=(6, 5))
    figure_01(fig, str(generator), str(size), str(replications), method_,
              p, q, bars=False, legend=True)
    fig.savefig(os.path.join(output_path, name_), format="pdf")
    plt.close()

FIGURE 04

for label fig:fbm_hurst_crossing_tree


In [ ]:
# exponents = [0.5, 0.6, 0.7, 0.8, 0.9] 
# exponents = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]
def figure_04(fig, generator, size, replications, method, p=6, q=7, bars=False, legend=True):
    ax = fig.add_subplot(111)

    results = experiment[generator, size, :, replications]
    data = {float(info_[2]): data_[method] for info_, start_, finish_, seeds_, data_ in results}

    first_, last_ = np.inf, -np.inf
    color_ = plt.cm.rainbow(np.linspace(0, 1, num=len(exponents)))[::-1]
    for col_, hurst_ in zip(color_, exponents):
        try:
            try:
                scale_m, Nmn, Dmnk, Cmnkk, Vmnde, Wmnp, Wavgmn, Wstdmn = data[hurst_]
            except ValueError: 
                scale_m, Nmn, Dmnk, Vmnde, Wmnp, Wavgmn, Wstdmn = data[hurst_]
        except KeyError:
            continue
        levels, Hj_avg, Hj_std = offspring_hurst(Dmnk, slice(p, q))
        ax.axhline(y=hurst_, color='black', linestyle='-', alpha=0.25, zorder=-99)

        mask = Hj_avg < hurst_ * 1.35
        if bars:
            ax.errorbar(levels[mask], Hj_avg[mask], yerr=Hj_std[mask],
                        fmt="-s", color=col_, markersize=3, alpha=1.0,
                        label="%s %0.3f"%(generator, hurst_))
        else:
            ax.plot(levels[mask], Hj_avg[mask], "-s", 
                    color=col_, markersize=3, alpha=1.0,
                    label="%s %0.3f"%(generator, hurst_))
        first_ = min(levels[mask][0], first_)
        last_ = max(levels[mask][-1], last_)

    last_ = 20 # min(last_, 20)
    ax.set_xticks(np.arange(first_, last_ + 1))
    ax.grid(color="grey", linestyle=":", alpha=0.5)
    ax.set_xlim(first_ - 0.1, last_ + 1.1)
    ax.set_ylim(0.45, 1.01)
    ## Add a legend with white opaque background.
    #     ax.set_title( 'Crossing tree estimates of the Hurst exponent' )
    ax.set_xlabel("level $\\delta 2^k$")
    ax.set_ylabel("$H$")

    if legend:
        legend_ = ax.legend(loc="lower right", frameon=1,
                            ncol=2, fontsize=7)
        legend_.get_frame() #.set_facecolor("whitesmoke")

Create a figure-04 plot of mean-based hurst estimates


In [ ]:
p, q = 0, None
for experiment_ in experiments:
    size, replications, generator, method_ = experiment_
    name_ = "fig_04-%d_%s-%s-%d-%s-%s.pdf"%(p, str(q) if isinstance(q, int) else "X",
                                               generator, size, replications, method_,)

    fig = plt.figure(figsize=(6, 5))
    figure_04(fig, str(generator), str(size), str(replications), method_,
              p, q, bars=False, legend=True)
    fig.savefig(os.path.join(output_path, name_), format="pdf")
    plt.close()

FIGURE 08

for label fig:fbm_avg_crossing_durations


In [ ]:
# exponents = [0.5, 0.6, 0.7, 0.8, 0.9] 
# exponents = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]

def figure_08(fig, generator, size, replications, method, bars=False, legend=True):
    ax = fig.add_subplot(111)

    results = experiment[generator, size, :, replications]
    data = {float(info_[2]): data_[method] for info_, start_, finish_, seeds_, data_ in results}

    color_ = plt.cm.rainbow(np.linspace(0, 1, num=len(exponents)))[::-1]
    for col_, hurst_ in zip(color_, exponents):
        try:
            try:
                scale_m, Nmn, Dmnk, Cmnkk, Vmnde, Wmnp, Wavgmn, Wstdmn = data[hurst_]
            except ValueError: 
                scale_m, Nmn, Dmnk, Vmnde, Wmnp, Wavgmn, Wstdmn = data[hurst_]
        except KeyError:
            continue
        level = np.arange(Wavgmn.shape[-1], dtype=np.float128)
        scale_ = (2 ** (-level / hurst_))
#         scale_ *= (2 * hurst_ - 1) * 2 * hurst_
        Wavgn_ = np.nanmean(Wavgmn / (scale_m[:, np.newaxis] ** (1 / hurst_)), axis=0) * scale_
        if bars:
            Wstdn_ = np.nanstd(Wavgmn / (scale_m[:, np.newaxis] ** (1 / hurst_)), axis=0) * scale_
            ax.errorbar(1+level, Wavgn_, yerr=Wstdn_, fmt="-s", color=col_,
                        markersize=3, alpha=1.0, label="%s %0.3f"%(generator, hurst_))
        else:
            ax.plot(1+level, Wavgn_, "-s", color=col_, markersize=3,
                    alpha=1.0, label="%s %0.3f"%(generator, hurst_))

    ax.set_xticks(range(1, 21))
    ax.grid(color="grey", linestyle=":", alpha=0.5)
    ax.set_yscale("log", basey=2)
    ax.set_xlim(0.9, 20.1)
    ax.set_xlabel("level")
    ax.set_ylabel("$\\left(\\delta 2^n \\right)^{-H^{-1}} {\\mathbb{E}W^n}$")
    if legend:
        legend_ = ax.legend(loc="lower left", frameon=1,
                            ncol=3, fontsize=7)
        legend_.get_frame() #.set_facecolor("whitesmoke")

Create a figure-08 plot of scaled average crossing durations.


In [ ]:
for experiment_ in experiments:
    size, replications, generator, method_ = experiment_
    name_ = "fig_08-%s-%d-%s-%s.pdf"%(generator, size, replications, method_,)

    fig = plt.figure(figsize=(6, 5))
    figure_08(fig, str(generator), str(size), str(replications), method_,
              bars=False, legend=True)
    fig.savefig(os.path.join(output_path, name_), format="pdf")
    plt.close()

TABLE 01

For the table tab:avg_offspring showing the average number of offspring at each tree level.


In [ ]:
from math import floor

full_table = list()
for experiment_ in experiments:
    size, replications, generator, method = experiment_

    results = experiment[str(generator), str(size), :, str(replications)]
    data = {float(info_[2]): data_[method] for info_, start_, finish_, seeds_, data_ in results}
    
    table = list()
    for hurst_ in exponents:
        try:
            try:
                scale_m, Nmn, Dmnk, Cmnkk, Vmnde, Wmnp, Wavgmn, Wstdmn = data[hurst_]
            except ValueError: 
                scale_m, Nmn, Dmnk, Vmnde, Wmnp, Wavgmn, Wstdmn = data[hurst_]
        except KeyError:
            continue

        # Compute the average number of offspring and the standard deviation
        # df_ = pd.DataFrame(dict(average=Nmn.mean(axis=0), std=Nmn.std(axis=0)),
        #                    index=pd.RangeIndex(stop=Nmn.shape[1],name='Level'))
        df_ = pd.Series(["$%1.1f\pm%0.2f\\%%$"%(m/1000, 100*s/m) if floor(m/100) > 0 else "--"
                         for m, s in zip(Nmn.mean(axis=0), Nmn.std(axis=0))],
                        index=pd.RangeIndex(stop=Nmn.shape[1],name='Level'), name=hurst_)
        table.append((hurst_, df_))

    table = pd.concat([tab_ for hurst_, tab_ in table], axis=1,
                      keys=[hurst_ for hurst_, tab_ in table], names=["hurst"])    
    full_table.append((experiment_, table))

table = pd.concat([tab_ for hurst_, tab_ in full_table], axis=1, join="inner", 
                  keys=[hurst_ for hurst_, tab_ in full_table],
                  names=["size", "replications", "generator", "method"])

Might want to use \usepackage{booktabs} or \usepackage{lscape}

Output .tex files with name format "tab_01-%s-%0.3f.tex"%(method, hurst,)


In [ ]:
for hurst_ in exponents:
    name_ = "tab_01-%s-%0.3f.tex"%(method_, hurst_,)
    
    out_ = table.xs(method, axis=1, level=3).xs(hurst_, axis=1, level=-1)
    out_.columns = out_.columns.droplevel(0).droplevel(0)

    # .style.format({"average":"{:1.0f}", "std":"±{:1.0f}"})
    body_ = out_.to_latex(escape=False, na_rep="--", bold_rows=True)\
                .replace("_", "\\_")
    body_ += """\\caption{The average number of offspring at each level (in\n"""\
             """         thousands; $\\pm$1 std. dev. in percent) for processes\n"""\
             """         with $H=%0.3f$.} \n"""%(hurst_,)
    body_ += """\\label{tab:avg_offspring_%0.3f}\n"""%(hurst_,)

    with open(os.path.join(output_path, name_), "w") as fout_:
        fout_.write(body_)

FIGURE

Take the average crossing duration at each level

$(\delta 2^n)^{-H^{-1}} \mathbb{E}W^n = 2^{f(H, d)}$

$\log_2 \mathbb{E}W^n = f(H, d) + \frac1H \log_2 \delta + \frac{n}{H}$

$\log_2 (\delta 2^n)^{-H^{-1}} \mathbb{E}W^n = f(H, d)$

$F(H, d) = d \bigl(H - \frac12\bigr)^{-\frac2{d}}$


In [ ]:
selector = np.s_[:12]
levels_ = np.r_[selector].astype(float)

log2ed_list = []

check_list_ = [
    (33554432, "334x3", "FBM", 1.0),
    (8388608, "125x8", "HRP2_1", 2.0),
    (8388608, "125x8", "HRP3_1", 3.0),
    (8388608, "125x8", "HRP4_1", 4.0),
]

for size, replications, name, degree in check_list_:
    results = experiment[name, str(size), :, str(replications)]
    data = {float(info_[2]): data_[method]
            for info_, start_, finish_, seeds_, data_ in results
            if float(info_[2]) > 0.5}

    slices_ = {hurst_: (res_[0], res_[-2][:, selector]) for hurst_, res_ in data.items()}
    
    log2ed_ = np.stack([(np.log2(dur_) - (np.log2(delta_[:, np.newaxis]) + levels_) / hurst_).mean(axis=0)
                        for hurst_, (delta_, dur_) in slices_.items()], axis=0)

    hursts_ = np.array([*slices_.keys()])[:, np.newaxis]
    order_ = hursts_.argsort(axis=0)[:, 0]

    hursts_ = hursts_[order_]
    log2ed_ = log2ed_[order_]

    log2ed_ /= (1.5 - hursts_)

#     h0_ = (hursts_ - 1) / degree + 1
#     log2ed_ /= (1.5 - h0_)

    log2ed_list.append(log2ed_)
    # log2ed_ /= ((hursts_ - 1) / degree + 0.5) ** (-2 / float(degree))

In [ ]:
log2ed_ = np.stack(log2ed_list, axis=0).mean(axis=-1)

In [ ]:
plt.plot(hursts_, log2ed_[0], "r")    # d - 1
plt.plot(hursts_, log2ed_[1], "g") # d - 1 - 0
plt.plot(hursts_, log2ed_[2], "b")   # d - 1 - 0.75
plt.plot(hursts_, log2ed_[3], "k")# d - 1 -

In [ ]:
dlog2ed_ = np.diff(log2ed_, axis=-1) / np.diff(hursts_.T, axis=-1)

In [ ]:
dlog2ed_[:, :-2].mean(axis=-1)

In [ ]:
plt.plot(hursts_[1:], dlog2ed_[0], "r")
plt.plot(hursts_[1:], dlog2ed_[1], "g")
plt.plot(hursts_[1:], dlog2ed_[2], "b")
plt.plot(hursts_[1:], dlog2ed_[3], "k")

In [ ]:
plt.plot(hursts_[1:], dlog2ed_[0] + 0, "r")     # d - 1
plt.plot(hursts_[1:], dlog2ed_[1] + 18.5, "g")  # d - 1 - 0
plt.plot(hursts_[1:], dlog2ed_[2] + 25, "b")    # d - 1 - 0.75
plt.plot(hursts_[1:], dlog2ed_[3] + 28.25, "k") # d - 1 -

In [ ]:
fig = plt.figure(figsize=(16, 9))
ax = fig.add_subplot(111)
color_ = plt.cm.rainbow(np.linspace(0, 1, num=5))[::-1]
for hurst_, Wavgmn in slices_.items():
    ax.hist(np.log2(Wavgmn[:, 0]),
            bins=200, alpha=0.5, lw=0, normed=True, color="red")
#     for level, (Wavgn, col_) in enumerate(zip(Wavgmn.T, color_), 7):
#         ax.hist(np.log2(Wavgn) - (float(level) / hurst_)**(1-hurst_),
#                 bins=200, alpha=0.5, lw=0, normed=True, color=col_)

In [ ]:
log_Wavghn = np.stack([np.nanmean(np.diff(np.log2(Wavgmn), axis=1), axis=0)
                       for hurst_, Wavgmn in slices_.items()])
hursts_ = np.array(slices_.keys())

In [ ]:
log_Wavghn.shape

In [ ]:
plt.plot(hursts_[np.newaxis, :] * log_Wavghn.T)

In [ ]:
colors_ = plt.cm.rainbow_r(np.linspace(0, 1, num=log_Wavghn.shape[1]))
for col_, log_Wavgh in zip(colors_, log_Wavghn.T):
    plt.scatter(hursts_, log_Wavgh * hursts_, lw=0, color=col_, alpha=0.5);

In [ ]:
log_Wavgh * hursts_ - 1

In [ ]:
y = np.log2(np.diff(log_Wavghn, axis=1).mean(axis=1))
X = hursts_

In [ ]:
1.0 / (np.diff(y) / np.diff(X))

In [ ]:
1.0 / np.diff(log_Wavghn, axis=1).mean(axis=1) - hursts_

In [ ]:
plt.scatter(hursts_, np.diff(log_Wavghn, axis=1).mean(axis=1) - 1.0 / hursts_)

In [ ]:
plt.scatter(hursts_, np.log2(np.diff(log_Wavghn, axis=1).mean(axis=1)))

In [ ]:
plt.plot(np.diff(log_Wavghn, axis=1).T)

In [ ]:
# / hursts_[np.newaxis]