In [ ]:
# Iterative Construction of a Penalised Vine Structure
This notebook iteratively estimate the quantile.

Libraries


In [1]:
import openturns as ot
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

%load_ext autoreload
%autoreload 2

random_state = 123
np.random.seed(random_state)

Model function

This example consider the simple additive example.


In [10]:
from depimpact.tests import func_overflow, margins_overflow, var_names_overflow, func_sum
from dependence import iterative_vine_minimize

test_func = func_overflow

Dimension and margins

We first define the problem dimension and the margins


In [3]:
if test_func == func_overflow:
    margins = margins_overflow
    dim = len(margins)
else:
    dim = 6
    margins = [ot.Normal()]*dim

We chose the coefficients of the variables throught the additive function.


In [4]:
if test_func == func_sum:
    coeficients = np.logspace(0., 5., dim+1, endpoint=False)[1:]

    n_plot = 10000
    x = np.asarray(ot.ComposedDistribution(margins).getSample(n_plot))
    y = test_func(x, coeficients)

    fig, axes = plt.subplots(dim, 1, sharex=True, sharey=True, figsize=(4, 2*dim))
    for i in range(dim):
        ax = axes[i]
        ax.plot(x[:, i], y, '.')
        ax.set_xlabel(r'$X_%d$' % (i+1), fontsize=12)
        ax.set_ylabel(r'$y$', fontsize=12)
    fig.tight_layout()

Copula families

We now consider only Gaussian dependencies for this example


In [5]:
families = np.zeros((dim, dim), dtype=int)
for i in range(1, dim):
    for j in range(i):
        families[i, j] = 1

Estimations

We create an instance of the main class for conservative estimate, and we define a q_func object for the quantile as a quantity of interest


In [6]:
from depimpact import ConservativeEstimate, quantile_func

alpha = 0.95

if alpha > 0.5: # Maximizing the quantile
    def q_func(x, axis=1):
        return - quantile_func(alpha)(x, axis=axis)
else: # Minimizing
    q_func = quantile_func(alpha)

quant_estimate = ConservativeEstimate(model_func=test_func, margins=margins, families=families)

First, we compute the quantile at independence


In [7]:
n = 10000
indep_result = quant_estimate.independence(n_input_sample=n, q_func=q_func, random_state=random_state)

In [8]:
indep_result.compute_bootstrap(1000)
boot_std = indep_result.bootstrap_sample.std()
boot_mean = indep_result.bootstrap_sample.mean()
print('Quantile at independence: %.2f with a C.O.V at %.1f %%' % (boot_mean, abs(boot_std/boot_mean)*100.))


Quantile at independence: 9.44 with a C.O.V at 0.2 %

In [9]:
algorithm_parameters = {
    "n_input_sample": n,
    "n_dep_param_init": 10,
    "max_n_pairs": 1,
    "grid_type": 'lhs',
    "q_func": q_func,
    "n_add_pairs": 1,
    "n_remove_pairs": 0,
    "adapt_vine_structure": True,
    "with_bootstrap": False,
    "verbose": False,
    "iterative_save": False,
    "iterative_load": False,
    "load_input_samples": False,
    "keep_input_samples": False
}

quant_estimate = ConservativeEstimate(model_func=test_func, margins=margins, families=families)


iterative_results = iterative_vine_minimize(estimate_object=quant_estimate, **algorithm_parameters)


Iteration 1: selected pair: (1, 0)
Total number of evaluations = 2800000. Minimum quantity at 8.90.

Max number of pairs reached

In [10]:
from depimpact.dependence_plot import matrix_plot_quantities
results = iterative_results[0]
matrix_plot_quantities(results, figsize=(18, 15))
# plt.savefig('output/matrix_plot.png')


Iterative Approach

Now lets see how good we can be with the iterative appraoch.


In [19]:
algorithm_parameters = {
    "n_input_sample": 10000,
    "n_dep_param_init": 10,
    "max_n_pairs": 2,
    "grid_type": 'vertices',
    "q_func": q_func,
    "n_add_pairs": 1,
    "n_remove_pairs": 5,
    "adapt_vine_structure": True,
    "with_bootstrap": False,
    "verbose": True,
    "iterative_save": False,
    "iterative_load": False,
    "load_input_samples": False,
    "keep_input_samples": False,
    "input_names": var_names_overflow
}

quant_estimate = ConservativeEstimate(model_func=test_func, margins=margins, families=families)

iterative_results = iterative_vine_minimize(estimate_object=quant_estimate, **algorithm_parameters)


n=10000. Worst quantile of [(1, 0)] at 8.90224334121049
The variables are: K_s-Q
n=10000. Worst quantile of [(2, 0)] at 9.006464496106924
The variables are: Z_v-Q
n=10000. Worst quantile of [(2, 1)] at 9.139618226972797
The variables are: Z_v-K_s
n=10000. Worst quantile of [(3, 0)] at 9.315396923282806
The variables are: Z_m-Q
n=10000. Worst quantile of [(3, 1)] at 9.433830254687335
The variables are: Z_m-K_s
n=10000. Worst quantile of [(3, 2)] at 9.392639922181026
The variables are: Z_m-Z_v
n=10000. Worst quantile of [(4, 0)] at 8.98360348847192
The variables are: H_d-Q
n=10000. Worst quantile of [(4, 1)] at 9.177995799227025
The variables are: H_d-K_s
n=10000. Worst quantile of [(4, 2)] at 9.10235093223933
The variables are: H_d-Z_v
n=10000. Worst quantile of [(4, 3)] at 9.394098627510612
The variables are: H_d-Z_m
n=10000. Worst quantile of [(5, 0)] at 9.24314574845784
The variables are: C_b-Q
n=10000. Worst quantile of [(5, 1)] at 9.375341506902279
The variables are: C_b-K_s
n=10000. Worst quantile of [(5, 2)] at 9.311816555363391
The variables are: C_b-Z_v
n=10000. Worst quantile of [(5, 3)] at 9.452199342722688
The variables are: C_b-Z_m
n=10000. Worst quantile of [(5, 4)] at 9.292928657028435
The variables are: C_b-H_d
n=10000. Worst quantile of [(6, 0)] at 9.427256001407729
The variables are: L-Q
n=10000. Worst quantile of [(6, 1)] at 9.432594854714296
The variables are: L-K_s
n=10000. Worst quantile of [(6, 2)] at 9.431376039927738
The variables are: L-Z_v
n=10000. Worst quantile of [(6, 3)] at 9.462036278175859
The variables are: L-Z_m
n=10000. Worst quantile of [(6, 4)] at 9.452422784319994
The variables are: L-H_d
n=10000. Worst quantile of [(6, 5)] at 9.440915506663902
The variables are: L-C_b
n=10000. Worst quantile of [(7, 0)] at 9.460298338211826
The variables are: B-Q
n=10000. Worst quantile of [(7, 1)] at 9.414527020748503
The variables are: B-K_s
n=10000. Worst quantile of [(7, 2)] at 9.415054835635
The variables are: B-Z_v
n=10000. Worst quantile of [(7, 3)] at 9.441416976794274
The variables are: B-Z_m
n=10000. Worst quantile of [(7, 4)] at 9.47619344521345
The variables are: B-H_d
n=10000. Worst quantile of [(7, 5)] at 9.47596467346882
The variables are: B-C_b
n=10000. Worst quantile of [(7, 6)] at 9.441486288542883
The variables are: B-L

Iteration 1: selected pair: (1, 0) (K_s-Q)
Total number of evaluations = 840000. Minimum quantity at 8.90.

n=10000. Worst quantile of [(1, 0), (2, 0)] at 8.13309911645036
The variables are: K_s-Q Z_v-Q
n=10000. Worst quantile of [(1, 0), (2, 1)] at 8.125332247266039
The variables are: K_s-Q Z_v-K_s
n=10000. Worst quantile of [(1, 0), (3, 0)] at 8.625038843543662
The variables are: K_s-Q Z_m-Q
n=10000. Worst quantile of [(1, 0), (3, 1)] at 8.546216452737712
The variables are: K_s-Q Z_m-K_s
n=10000. Worst quantile of [(1, 0), (3, 2)] at 8.775400130624346
The variables are: K_s-Q Z_m-Z_v
n=10000. Worst quantile of [(1, 0), (4, 0)] at 8.100706450785987
The variables are: K_s-Q H_d-Q
n=10000. Worst quantile of [(1, 0), (4, 1)] at 8.101992988352789
The variables are: K_s-Q H_d-K_s
n=10000. Worst quantile of [(1, 0), (4, 2)] at 8.538108339815608
The variables are: K_s-Q H_d-Z_v
n=10000. Worst quantile of [(1, 0), (4, 3)] at 8.764058666138508
The variables are: K_s-Q H_d-Z_m
n=10000. Worst quantile of [(1, 0), (5, 0)] at 8.488198104517384
The variables are: K_s-Q C_b-Q
n=10000. Worst quantile of [(1, 0), (5, 1)] at 8.57002845459004
The variables are: K_s-Q C_b-K_s
n=10000. Worst quantile of [(1, 0), (5, 2)] at 8.763938293707339
The variables are: K_s-Q C_b-Z_v
n=10000. Worst quantile of [(1, 0), (5, 3)] at 8.8003381329916
The variables are: K_s-Q C_b-Z_m
n=10000. Worst quantile of [(1, 0), (5, 4)] at 8.705401393266817
The variables are: K_s-Q C_b-H_d
n=10000. Worst quantile of [(1, 0), (6, 0)] at 8.727344912375665
The variables are: K_s-Q L-Q
n=10000. Worst quantile of [(1, 0), (6, 1)] at 8.806910592474873
The variables are: K_s-Q L-K_s
n=10000. Worst quantile of [(1, 0), (6, 2)] at 8.790685980174624
The variables are: K_s-Q L-Z_v
n=10000. Worst quantile of [(1, 0), (6, 5)] at 8.77394952417879
The variables are: K_s-Q L-C_b
n=10000. Worst quantile of [(1, 0), (7, 1)] at 8.806096632227899
The variables are: K_s-Q B-K_s
n=10000. Worst quantile of [(1, 0), (7, 2)] at 8.790941977182996
The variables are: K_s-Q B-Z_v
n=10000. Worst quantile of [(1, 0), (7, 3)] at 8.719155254771952
The variables are: K_s-Q B-Z_m
n=10000. Worst quantile of [(1, 0), (7, 6)] at 8.803267151572772
The variables are: K_s-Q B-L

Iteration 2: selected pair: (4, 0) (H_d-Q)
Total number of evaluations = 2820000. Minimum quantity at 8.10.

Max number of pairs reached

In [12]:
from depimpact.dependence_plot import plot_iterative_results, matrix_plot_input

plot_iterative_results(iterative_results, indep_result=indep_result, q_func=q_func)



In [22]:
n = 10000
K = 500
quant_estimate.vine_structure = None
grid_results = quant_estimate.gridsearch(n_dep_param=K, n_input_sample=n, q_func=q_func, grid_type='vertices')

In [23]:
min_result = grid_results.min_result
print(min_result.quantity)


6.680878490242667

In [17]:
from depimpact.dependence_plot import matrix_plot_input
matrix_plot_input(min_result, margins=margins)


Out[17]:
<seaborn.axisgrid.PairGrid at 0x7f0bb7fc0710>

In [46]:
from depimpact.utils import get_pair_id, get_pairs_by_levels, get_possible_structures
pairs_iter = [[1, 0]]
pairs_iter_id = [get_pair_id(dim, pair, with_plus=False) for pair in pairs_iter]
pairs_by_levels = get_pairs_by_levels(dim, pairs_iter_id)
quant_estimate.vine_structure = get_possible_structures(dim, pairs_by_levels)[0]
quant_estimate.vine_structure


Out[46]:
array([[2, 0, 0, 0, 0, 0, 0, 0],
       [8, 1, 0, 0, 0, 0, 0, 0],
       [7, 8, 3, 0, 0, 0, 0, 0],
       [6, 7, 8, 4, 0, 0, 0, 0],
       [5, 6, 7, 8, 5, 0, 0, 0],
       [4, 5, 6, 7, 8, 6, 0, 0],
       [3, 4, 5, 6, 7, 8, 7, 0],
       [1, 3, 4, 5, 6, 7, 8, 8]])

In [47]:
grid_results = quant_estimate.gridsearch(n_dep_param=K, n_input_sample=n, q_func=q_func)

In [48]:
min_result = grid_results.min_result
print(min_result.quantity)


-0.5385555538636204

In [49]:
from depimpact.dependence_plot import matrix_plot_input
matrix_plot_input(min_result, margins=margins)


Out[49]:
<seaborn.axisgrid.PairGrid at 0x7fff521bf4e0>

In [ ]: