In [1]:
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
A handy routines to store and recover python objects, in particular, the experiment resutls dictionaires.
In [2]:
import time, gzip
import os, pickle
def save(obj, path, prefix=None):
prefix_ = "" if prefix is None else "%s_"%(prefix,)
filename_ = os.path.join(path, "%s%s.gz"%(prefix_, time.strftime("%Y%m%d-%H%M%S"),))
with gzip.open(filename_, "wb+", 9) as fout_:
pickle.dump(obj, fout_)
return filename_
def load(filename):
with gzip.open(filename, "rb") as f:
return pickle.load(f)
The path analyzer
In [3]:
from crossing_tree import structural_statistics
Collect a list of results returned by path_analyze into aligned data tensors.
In [4]:
from crossing_tree import collect_structural_statistics
A function implementing various delta choices.
In [5]:
import warnings
def get_delta_method(delta=1.0):
if isinstance(delta, str):
if delta == "std":
# the standard deviation of increments
delta_ = lambda X: np.diff(X).std()
elif delta == "med":
# Use the median absolute difference [Jones, Rolls; 2009] p. 11 (arxiv:0911.5204v2)
delta_ = lambda X: np.median(np.abs(np.diff(X)))
elif delta == "mean":
# Use the mean absolute difference
delta_ = lambda X: np.mean(np.abs(np.diff(X)))
elif delta == "iqr":
# Interquartile range
delta_ = lambda X: np.subtract(*np.percentile(np.diff(X), [75, 25]))
elif delta == "rng":
# Use the range estimate as suggested by Geoffrey on 2015-05-28
warnings.warn("""Use of `range`-based grid resolution """
"""is discouraged since it may cause misaligned """
"""crossing trees.""", RuntimeWarning)
delta_ = lambda X: (X.max() - X.min()) / (2**12)
else:
raise ValueError("""Invalid `delta` setting. Accepted values """
"""are: [`iqr`, `std`, `med`, `rng`, `mean`].""")
elif isinstance(delta, float) and delta > 0:
delta_ = lambda X: delta
else:
raise TypeError("""`delta` must be either a float, or a method """
"""identifier.""")
return delta_
An MC experiment kernel.
In [6]:
from sklearn.base import clone
def experiment(experiment_id, n_replications, methods, generator):
generator = clone(generator)
generator.start()
deltas = [get_delta_method(method_) for method_ in methods]
results = {method_: list() for method_ in methods}
for j in xrange(n_replications):
T, X = generator.draw()
# Apply all methods to the same sample path.
for delta, method in zip(deltas, methods):
result_ = structural_statistics(X, T, scale=delta(X), origin=X[0])
results[method].append(result_)
generator.finish()
return experiment_id, results
In [7]:
from joblib import Parallel, delayed
A couple of random seeds from here.
In [8]:
# Extra random seeds should be prepended to the array.
master_seeds = [0xD5F60A17, 0x26F9935C, 0x0E4C1E75, 0xDA7C4291, 0x7ABE722E,
0x126F3E10, 0x045300B1, 0xB0A9AD11, 0xEED05353, 0x824736C7,
0x7AA17C9C, 0xB695D6B1, 0x7E214411, 0x538CDEEF, 0xFD55FF46,
0xE14E1801, 0x872F687C, 0xA58440D9, 0xB8A273FD, 0x0BD1DD28,
0xAB6A6AE6, 0x7180E905, 0x870E7BAB, 0x846D0C7A, 0xAEF0422D,
0x16C53C83, 0xE32EA61D, 0xE0AD0A26, 0xCC90CA9A, 0x7D4020D2,]
the Monte Carlo experiemnt is run in parallel batches, with each initialized to a randomly picked seed.
In [9]:
MAX_RAND_SEED = np.iinfo(np.int32).max
The folder to store the results in
In [10]:
OUTPUT_PATH = "../results/"
In [11]:
from crossing_tree.processes import FractionalBrownianMotion
seed = master_seeds.pop()
print("Using seed %X"%(seed,))
random_state = np.random.RandomState(seed)
skip = False
Setup
In [12]:
n_samples, methods = 1 << 23, ["med", "std", "iqr", "mean",]
hurst_exponents = [0.500, 0.550, 0.600, 0.650, 0.700, 0.750, 0.800, 0.850, 0.900,
0.910, 0.915, 0.920, 0.925, 0.930, 0.935, 0.940, 0.945, 0.950,
0.990,]
n_per_batch, n_batches = 125, 8
Run the experiment for the Fractional Brownian Motion.
In [13]:
if not skip:
par_ = Parallel(n_jobs=-1, verbose=0)
for hurst_ in hurst_exponents:
name_ = "FBM-%d-%0.3f-%dx%d"%(n_samples, hurst_, n_per_batch, n_batches)
print(name_,)
# Schedule the experiments
seeds = random_state.randint(MAX_RAND_SEED, size=(n_batches,))
schedule_ = (delayed(experiment)(seed_, n_per_batch, methods,
FractionalBrownianMotion(N=n_samples,
hurst=hurst_,
random_state=seed_))
for seed_ in seeds)
# Run the experiment and collect the results
tick_ = time.time()
experiment_ids = list()
results_ = {method: list() for method in methods}
for id_, dict_ in par_(schedule_):
experiment_ids.append(id_)
for method in methods:
results_[method].extend(dict_[method])
results = {key_: collect_structural_statistics(list_)
for key_, list_ in results_.iteritems()}
tock_ = time.time()
# Save the results and log
filename_ = save((tick_, tock_, experiment_ids, results), OUTPUT_PATH, name_)
print("%0.3fsec."%(tock_ - tick_,), filename_)
In [14]:
from crossing_tree.processes import HermiteProcess
seed = master_seeds.pop()
print("Using seed %X"%(seed,))
random_state = np.random.RandomState(seed)
skip = False
Setup: use no downsampling.
In [15]:
n_samples, n_downsample = 1 << 23, 1
degrees, methods = [2, 3, 4], ["med", "std", "iqr", "mean",]
hurst_exponents = [ 0.550, 0.600, 0.650, 0.700, 0.750, 0.800, 0.850, 0.900,
0.910, 0.915, 0.920, 0.925, 0.930, 0.935, 0.940, 0.945, 0.950,
0.990,]
n_per_batch, n_batches = 125, 8
Run the experiment for the Hermite process.
In [16]:
if not skip:
par_ = Parallel(n_jobs=-1, verbose=0)
for degree_ in degrees:
for hurst_ in hurst_exponents:
name_ = "HRP%d_%d-%d-%0.3f-%dx%d"%(degree_, n_downsample, n_samples, hurst_, n_per_batch, n_batches)
print(name_,)
# Schedule the experiments
seeds = random_state.randint(MAX_RAND_SEED, size=(n_batches,))
schedule_ = (delayed(experiment)(seed_, n_per_batch, methods,
HermiteProcess(N=n_samples,
degree=degree_,
n_downsample=n_downsample,
hurst=hurst_,
random_state=seed_))
for seed_ in seeds)
# Run the experiment and collect the results
tick_ = time.time()
experiment_ids = list()
results_ = {method: list() for method in methods}
for id_, dict_ in par_(schedule_):
experiment_ids.append(id_)
for method in methods:
results_[method].extend(dict_[method])
results = {key_: collect_structural_statistics(list_)
for key_, list_ in results_.iteritems()}
tock_ = time.time()
# Save the results and log
filename_ = save((tick_, tock_, experiment_ids, results), OUTPUT_PATH, name_)
print("%0.3fsec."%(tock_ - tick_,), filename_)
In [17]:
from crossing_tree.processes import WeierstrassFunction
seed = master_seeds.pop()
print("Using seed %X"%(seed,))
random_state = np.random.RandomState(seed)
skip = False
Setup
In [18]:
n_samples, lambda_0 = 1 << 23, 1.2
methods = ["med", "std", "iqr", "mean",]
holder_exponents = [0.500, 0.550, 0.600, 0.650, 0.700, 0.750, 0.800, 0.850, 0.900,
0.910, 0.915, 0.920, 0.925, 0.930, 0.935, 0.940, 0.945, 0.950,
0.990,]
n_per_batch, n_batches = 125, 8
Run the experimnet for the random Weierstrass function $[0, 1]\mapsto \mathbb{R}$: $$ W_H(t) = \sum_{k\geq 0} \lambda_0^{-k H} \bigl(\cos(2 \pi \lambda_0^k t + \phi_k) - \cos \phi_k\bigr)\,, $$ with $(\phi_k)_{k\geq0} \sim \mathbb{U}[0, 2\pi]$, and $\lambda_0 > 1$ -- the fundamental harmonic.
In [19]:
if not skip:
par_ = Parallel(n_jobs=-1, verbose=0)
for holder_ in holder_exponents:
name_ = "WEI_%g-%d-%0.3f-%dx%d"%(lambda_0, n_samples, holder_, n_per_batch, n_batches)
print(name_,)
# Schedule the experiments
seeds = random_state.randint(MAX_RAND_SEED, size=(n_batches,))
schedule_ = (delayed(experiment)(seed_, n_per_batch, methods,
WeierstrassFunction(N=n_samples,
lambda_0=lambda_0,
holder=holder_,
random_state=seed_,
one_sided=False))
for seed_ in seeds)
# Run the experiment and collect the results
tick_ = time.time()
experiment_ids = list()
results_ = {method: list() for method in methods}
for id_, dict_ in par_(schedule_):
experiment_ids.append(id_)
for method in methods:
results_[method].extend(dict_[method])
results = {key_: collect_structural_statistics(list_)
for key_, list_ in results_.iteritems()}
tock_ = time.time()
# Save the results and log
filename_ = save((tick_, tock_, experiment_ids, results), OUTPUT_PATH, name_)
print("%0.3fsec."%(tock_ - tick_,), filename_)
In [20]:
from crossing_tree.processes import HermiteProcess
seed = master_seeds.pop()
print("Using seed %X"%(seed,))
random_state = np.random.RandomState(seed)
skip = False
Setup
In [21]:
n_samples, n_downsample = 1 << 19, 1 << 4
degrees, methods = [2, 3, 4], ["med", "std", "iqr", "mean",]
hurst_exponents = [ 0.550, 0.600, 0.650, 0.700, 0.750, 0.800, 0.850, 0.900,
0.910, 0.915, 0.920, 0.925, 0.930, 0.935, 0.940, 0.945, 0.950,
0.990,]
n_per_batch, n_batches = 125, 8
Run the experiment for the Hermite process.
In [22]:
if not skip:
par_ = Parallel(n_jobs=-1, verbose=0)
for degree_ in degrees:
for hurst_ in hurst_exponents:
name_ = "HRP%d_%d-%d-%0.3f-%dx%d"%(degree_, n_downsample, n_samples, hurst_, n_per_batch, n_batches)
print(name_,)
# Schedule the experiments
seeds = random_state.randint(MAX_RAND_SEED, size=(n_batches,))
schedule_ = (delayed(experiment)(seed_, n_per_batch, methods,
HermiteProcess(N=n_samples,
degree=degree_,
n_downsample=n_downsample,
hurst=hurst_,
random_state=seed_))
for seed_ in seeds)
# Run the experiment and collect the results
tick_ = time.time()
experiment_ids = list()
results_ = {method: list() for method in methods}
for id_, dict_ in par_(schedule_):
experiment_ids.append(id_)
for method in methods:
results_[method].extend(dict_[method])
results = {key_: collect_structural_statistics(list_)
for key_, list_ in results_.iteritems()}
tock_ = time.time()
# Save the results and log
filename_ = save((tick_, tock_, experiment_ids, results), OUTPUT_PATH, name_)
print("%0.3fsec."%(tock_ - tick_,), filename_)
In [23]:
from crossing_tree.processes import WeierstrassFunction
seed = master_seeds.pop()
print("Using seed %X"%(seed,))
random_state = np.random.RandomState(seed)
skip = True
Setup
In [24]:
n_samples, lambda_0 = 1 << 23, 3.0
methods = ["med", "std", "iqr", "mean",]
holder_exponents = [0.500, 0.550, 0.600, 0.650, 0.700, 0.750, 0.800, 0.850, 0.900,
0.910, 0.915, 0.920, 0.925, 0.930, 0.935, 0.940, 0.945, 0.950,
0.990,]
n_per_batch, n_batches = 125, 8
Run the experimnet for the random Weierstrass function $[0, 1]\mapsto \mathbb{R}$: $$ W_H(t) = \sum_{k\geq 0} \lambda_0^{-k H} \bigl(\cos(2 \pi \lambda_0^k t + \phi_k) - \cos \phi_k\bigr)\,, $$ with $(\phi_k)_{k\geq0} \sim \mathbb{U}[0, 2\pi]$, and $\lambda_0 > 1$ -- the fundamental harmonic.
In [25]:
if not skip:
par_ = Parallel(n_jobs=-1, verbose=0)
for holder_ in holder_exponents:
name_ = "WEI_%g-%d-%0.3f-%dx%d"%(lambda_0, n_samples, holder_, n_per_batch, n_batches)
print(name_,)
# Schedule the experiments
seeds = random_state.randint(MAX_RAND_SEED, size=(n_batches,))
schedule_ = (delayed(experiment)(seed_, n_per_batch, methods,
WeierstrassFunction(N=n_samples,
lambda_0=lambda_0,
holder=holder_,
random_state=seed_,
one_sided=False))
for seed_ in seeds)
# Run the experiment and collect the results
tick_ = time.time()
experiment_ids = list()
results_ = {method: list() for method in methods}
for id_, dict_ in par_(schedule_):
experiment_ids.append(id_)
for method in methods:
results_[method].extend(dict_[method])
results = {key_: collect_structural_statistics(list_)
for key_, list_ in results_.iteritems()}
tock_ = time.time()
# Save the results and log
filename_ = save((tick_, tock_, experiment_ids, results), OUTPUT_PATH, name_)
print("%0.3fsec."%(tock_ - tick_,), filename_)
In [26]:
from crossing_tree.processes import WeierstrassFunction
seed = master_seeds.pop()
print("Using seed %X"%(seed,))
random_state = np.random.RandomState(seed)
skip = True
Setup
In [27]:
n_samples, lambda_0 = 1 << 23, 1.7
methods = ["med", "std", "iqr", "mean",]
holder_exponents = [0.500, 0.550, 0.600, 0.650, 0.700, 0.750, 0.800, 0.850, 0.900,
0.910, 0.915, 0.920, 0.925, 0.930, 0.935, 0.940, 0.945, 0.950,
0.990,]
n_per_batch, n_batches = 125, 8
Run the experimnet for the random Weierstrass function $[0, 1]\mapsto \mathbb{R}$: $$ W_H(t) = \sum_{k\geq 0} \lambda_0^{-k H} \bigl(\cos(2 \pi \lambda_0^k t + \phi_k) - \cos \phi_k\bigr)\,, $$ with $(\phi_k)_{k\geq0} \sim \mathbb{U}[0, 2\pi]$, and $\lambda_0 > 1$ -- the fundamental harmonic.
In [28]:
if not skip:
par_ = Parallel(n_jobs=-1, verbose=0)
for holder_ in holder_exponents:
name_ = "WEI_%g-%d-%0.3f-%dx%d"%(lambda_0, n_samples, holder_, n_per_batch, n_batches)
print(name_,)
# Schedule the experiments
seeds = random_state.randint(MAX_RAND_SEED, size=(n_batches,))
schedule_ = (delayed(experiment)(seed_, n_per_batch, methods,
WeierstrassFunction(N=n_samples,
lambda_0=lambda_0,
holder=holder_,
random_state=seed_,
one_sided=False))
for seed_ in seeds)
# Run the experiment and collect the results
tick_ = time.time()
experiment_ids = list()
results_ = {method: list() for method in methods}
for id_, dict_ in par_(schedule_):
experiment_ids.append(id_)
for method in methods:
results_[method].extend(dict_[method])
results = {key_: collect_structural_statistics(list_)
for key_, list_ in results_.iteritems()}
tock_ = time.time()
# Save the results and log
filename_ = save((tick_, tock_, experiment_ids, results), OUTPUT_PATH, name_)
print("%0.3fsec."%(tock_ - tick_,), filename_)
In [29]:
from crossing_tree.processes import FractionalBrownianMotion
seed = master_seeds.pop()
print("Using seed %X"%(seed,))
random_state = np.random.RandomState(seed)
skip = True
Setup
In [30]:
n_samples, methods = 1 << 25, ["med", "std", "iqr", "mean",]
hurst_exponents = [0.500, 0.550, 0.600, 0.650, 0.700, 0.750, 0.800, 0.850, 0.900,
0.910, 0.915, 0.920, 0.925, 0.930, 0.935, 0.940, 0.945, 0.950,
0.990,]
n_per_batch, n_batches, n_threads = 334, 3, 4
Run the experiment for the Fractional Brownian Motion.
In [31]:
if not skip:
par_ = Parallel(n_jobs=-1, verbose=0)
for hurst_ in hurst_exponents:
name_ = "FBM-%d-%0.3f-%dx%d"%(n_samples, hurst_, n_per_batch, n_batches)
print(name_,)
# Schedule the experiments
seeds = random_state.randint(MAX_RAND_SEED, size=(n_batches,))
schedule_ = (delayed(experiment)(seed_, n_per_batch, methods,
FractionalBrownianMotion(N=n_samples,
hurst=hurst_,
random_state=seed_,
n_threads=n_threads))
for seed_ in seeds)
# Run the experiment and collect the results
tick_ = time.time()
experiment_ids = list()
results_ = {method: list() for method in methods}
for id_, dict_ in par_(schedule_):
experiment_ids.append(id_)
for method in methods:
results_[method].extend(dict_[method])
results = {key_: collect_structural_statistics(list_)
for key_, list_ in results_.iteritems()}
tock_ = time.time()
# Save the results and log
filename_ = save((tick_, tock_, experiment_ids, results), OUTPUT_PATH, name_)
print("%0.3fsec."%(tock_ - tick_,), filename_)
In [ ]: