Accuracy of BMLiNGAM

This notebook shows accuracy of BMLiNGAM for causal inference.



In [1]:

    
%matplotlib inline
%autosave 0
import sys, os
sys.path.insert(0, os.path.expanduser('~/work/git/github/taku-y/bmlingam'))
sys.path.insert(0, os.path.expanduser('~/work/git/github/pymc-devs/pymc3'))

import theano
theano.config.floatX = 'float64'

from copy import deepcopy
import hashlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import time

from expr1 import run_trial
from bmlingam import load_pklz, save_pklz
# from bmlingam import do_mcmc_bmlingam, InferParams, MCMCParams, save_pklz, load_pklz, define_hparam_searchspace, find_best_model
# from bmlingam.utils.gendata import GenDataParams, gen_artificial_data









    














    



Autosave disabled






    



/opt/conda/lib/python3.5/site-packages/matplotlib/__init__.py:1357: UserWarning:  This call to matplotlib.use() has no effect
because the backend has already been chosen;
matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.

  warnings.warn(_use_error_msg)

Experimental condition

The experimental conditions are as follows.

Parameter of artiifcial data
- The number of observed samples (n_samples): [100]
- The number of confounding factors (n_confs or $Q$): [1, 3, 5, 10]
- Scale of total noise: $c=0.25, 0.5, 1.0, 3.0$.
- Scale of confounding factors: $c/\sqrt{Q}$
- Distribution of observation noise (data_noise_type): ['laplace', 'uniform']
- Scale of observation noise: 3
Hyperparameter range
- Correlation coefficient of confounding factors: (L_cov_21s): [[-.9, -.7, -.5, -.3, 0, .3, .5, .7, .9]]
- Distribution of observation noise (model_noise_type): ['gg']



In [2]:

    
conds = [
        {
            'totalnoise': totalnoise, 
            'L_cov_21s': L_cov_21s, 
            'n_samples': n_samples, 
            'n_confs': n_confs, 
            'data_noise_type': data_noise_type, 
            'model_noise_type': model_noise_type
        }
        for totalnoise in [0.25, 0.5, 1.0, 3.0]
        for L_cov_21s in [[-.9, -.7, -.5, -.3, 0, .3, .5, .7, .9]]
        for n_samples in [100]
        for n_confs in [1, 3, 5, 10] # [1, 3, 5, 10]
        for data_noise_type in ['laplace', 'uniform']
        for model_noise_type in ['gg']
    ]

Program

Identifier of trial

Identifier of a trial is determined based on:

Trial index (ix_trial)
The number of samples (n_samples)
The number of confounders (n_confs)
Distribution of observation noise of artificial data (data_noise_type)
Distribution of observation noise of BMLiNGAM model (model_noise_type)
Correlation coefficient of confounding effect (L_cov_21s)
Total noise scale (totalnoise)

We use identifiers to store results of trials.



In [3]:

    
def make_id(ix_trial, n_samples, n_confs, data_noise_type, model_noise_type, L_cov_21s, totalnoise):
    L_cov_21s_ = ' '.join([str(v) for v in L_cov_21s])
    
    return hashlib.md5(
        str((L_cov_21s_, ix_trial, n_samples, n_confs, data_noise_type, model_noise_type, totalnoise)).encode('utf-8')
    ).hexdigest()

# Test
print(make_id(55, 100, 12, 'all', 'gg', [1, 2, 3], 0.3))









    



8eb07d5dcb68e8163a7a2a0582b59d1f

Append results to dataframe



In [4]:

    
def add_result_to_df(df, result):
    if df is None:
        return pd.DataFrame({k: [v] for k, v in result.items()})
    else:
        return df.append(result, ignore_index=True)

# Test
result1 = {'col1': 10, 'col2': 20}
result2 = {'col1': 30, 'col2': -10}
df1 = add_result_to_df(None, result1)
print('--- df1 ---')
print(df1)
df2 = add_result_to_df(df1, result2)
print('--- df2 ---')
print(df2)









    



--- df1 ---
   col1  col2
0    10    20
--- df2 ---
   col1  col2
0    10    20
1    30   -10

Save and load data frame



In [5]:

    
def load_df(df_file):
    if os.path.exists(df_file):
        return load_pklz(df_file)
    else:
        return None

def save_df(df_file, df):
    save_pklz(df_file, df)

Perform experiment over conditions and trials



In [6]:

    
def df_exist_result_id(df, result_id):
    if df is not None:
        return result_id in np.array(df['result_id'])
    else:
        False

def run_expr(conds, n_trials_per_cond=50):
    """Perform evaluation of BMLiNGAM given a set of experimental conditions.
    
    For each condition, several trials are executed.
    In a trial, BMLiNGAM is applied to causal inference for artificial data.
    The average accuracy is computed for each condition.
    """
    # Filename of dataframe
    data_dir = '.'
    df_file = data_dir + '/20160822-eval-bml-results.pklz'
    
    # Load results computed in previous
    df = load_df(df_file)

    # Loop over experimental conditions
    n_skip = 0
    for cond in conds:
        print(cond)
        
        # Loop over trials
        for ix_trial in range(n_trials_per_cond):
            # Identifier of a trial for (cond, ix_trial)
            result_id = make_id(ix_trial, **cond)
            
            # Check if the result has been already stored in the data frame
            if df_exist_result_id(df, result_id):
                n_skip += 1
            else:
                # `result` is a dict including results of trials.
                # `ix_trial` is used as the random seed of the corresponding trial. 
                result = run_trial(ix_trial, cond)
                result.update({'result_id': result_id})
                
                df = add_result_to_df(df, result)
                save_df(df_file, df)
                
    print('Number of skipped trials = {}'.format(n_skip))
    return df

Run program



In [7]:

    
df = run_expr(conds)









    



{'totalnoise': 0.25, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 1, 'model_noise_type': 'gg', 'data_noise_type': 'laplace', 'n_samples': 100}
{'totalnoise': 0.25, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 1, 'model_noise_type': 'gg', 'data_noise_type': 'uniform', 'n_samples': 100}
{'totalnoise': 0.25, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 3, 'model_noise_type': 'gg', 'data_noise_type': 'laplace', 'n_samples': 100}
{'totalnoise': 0.25, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 3, 'model_noise_type': 'gg', 'data_noise_type': 'uniform', 'n_samples': 100}
{'totalnoise': 0.25, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 5, 'model_noise_type': 'gg', 'data_noise_type': 'laplace', 'n_samples': 100}
{'totalnoise': 0.25, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 5, 'model_noise_type': 'gg', 'data_noise_type': 'uniform', 'n_samples': 100}
{'totalnoise': 0.25, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 10, 'model_noise_type': 'gg', 'data_noise_type': 'laplace', 'n_samples': 100}
{'totalnoise': 0.25, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 10, 'model_noise_type': 'gg', 'data_noise_type': 'uniform', 'n_samples': 100}
{'totalnoise': 0.5, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 1, 'model_noise_type': 'gg', 'data_noise_type': 'laplace', 'n_samples': 100}
{'totalnoise': 0.5, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 1, 'model_noise_type': 'gg', 'data_noise_type': 'uniform', 'n_samples': 100}
{'totalnoise': 0.5, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 3, 'model_noise_type': 'gg', 'data_noise_type': 'laplace', 'n_samples': 100}
{'totalnoise': 0.5, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 3, 'model_noise_type': 'gg', 'data_noise_type': 'uniform', 'n_samples': 100}
{'totalnoise': 0.5, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 5, 'model_noise_type': 'gg', 'data_noise_type': 'laplace', 'n_samples': 100}
{'totalnoise': 0.5, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 5, 'model_noise_type': 'gg', 'data_noise_type': 'uniform', 'n_samples': 100}
{'totalnoise': 0.5, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 10, 'model_noise_type': 'gg', 'data_noise_type': 'laplace', 'n_samples': 100}
{'totalnoise': 0.5, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 10, 'model_noise_type': 'gg', 'data_noise_type': 'uniform', 'n_samples': 100}
{'totalnoise': 1.0, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 1, 'model_noise_type': 'gg', 'data_noise_type': 'laplace', 'n_samples': 100}
{'totalnoise': 1.0, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 1, 'model_noise_type': 'gg', 'data_noise_type': 'uniform', 'n_samples': 100}
{'totalnoise': 1.0, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 3, 'model_noise_type': 'gg', 'data_noise_type': 'laplace', 'n_samples': 100}
{'totalnoise': 1.0, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 3, 'model_noise_type': 'gg', 'data_noise_type': 'uniform', 'n_samples': 100}
{'totalnoise': 1.0, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 5, 'model_noise_type': 'gg', 'data_noise_type': 'laplace', 'n_samples': 100}
{'totalnoise': 1.0, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 5, 'model_noise_type': 'gg', 'data_noise_type': 'uniform', 'n_samples': 100}
{'totalnoise': 1.0, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 10, 'model_noise_type': 'gg', 'data_noise_type': 'laplace', 'n_samples': 100}
{'totalnoise': 1.0, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 10, 'model_noise_type': 'gg', 'data_noise_type': 'uniform', 'n_samples': 100}
{'totalnoise': 3.0, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 1, 'model_noise_type': 'gg', 'data_noise_type': 'laplace', 'n_samples': 100}
{'totalnoise': 3.0, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 1, 'model_noise_type': 'gg', 'data_noise_type': 'uniform', 'n_samples': 100}
{'totalnoise': 3.0, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 3, 'model_noise_type': 'gg', 'data_noise_type': 'laplace', 'n_samples': 100}
{'totalnoise': 3.0, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 3, 'model_noise_type': 'gg', 'data_noise_type': 'uniform', 'n_samples': 100}
{'totalnoise': 3.0, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 5, 'model_noise_type': 'gg', 'data_noise_type': 'laplace', 'n_samples': 100}
{'totalnoise': 3.0, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 5, 'model_noise_type': 'gg', 'data_noise_type': 'uniform', 'n_samples': 100}
{'totalnoise': 3.0, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 10, 'model_noise_type': 'gg', 'data_noise_type': 'laplace', 'n_samples': 100}
{'totalnoise': 3.0, 'L_cov_21s': [-0.9, -0.7, -0.5, -0.3, 0, 0.3, 0.5, 0.7, 0.9], 'n_confs': 10, 'model_noise_type': 'gg', 'data_noise_type': 'uniform', 'n_samples': 100}
Number of skipped trials = 1600

Result table



In [8]:

    
import pandas as pd

df_file = './20160822-eval-bml-results.pklz'
df = load_pklz(df_file)
df = pd.concat(
    {
        '2log(bf)': df['log_bf'], 
        'correct rate': df['correct_rate'], 
        'totalnoise': df['totalnoise'], 
        'data noise type': df['data_noise_type'], 
        'n_confs': df['n_confs']
    }, axis=1
)
sg = df.groupby(['data noise type', 'n_confs', 'totalnoise'])
sg1 = sg['correct rate'].mean()
sg2 = sg['2log(bf)'].mean()

pd.concat(
    {
        'correct_rate': sg1, 
        '2log(bf)': sg2, 
    }, axis=1
)









    Out[8]:






  
    
      
      
      
      2log(bf)
      correct_rate
    
    
      data noise type
      n_confs
      totalnoise
      
      
    
  
  
    
      laplace
      1
      0.25
      10.658543
      0.84
    
    
      0.50
      9.956851
      0.76
    
    
      1.00
      8.132585
      0.78
    
    
      3.00
      6.067567
      0.44
    
    
      3
      0.25
      9.701838
      0.82
    
    
      0.50
      8.965892
      0.80
    
    
      1.00
      6.592364
      0.64
    
    
      3.00
      5.585870
      0.38
    
    
      5
      0.25
      9.973692
      0.88
    
    
      0.50
      9.525252
      0.80
    
    
      1.00
      7.731706
      0.70
    
    
      3.00
      6.341696
      0.60
    
    
      10
      0.25
      10.690053
      0.86
    
    
      0.50
      10.349471
      0.86
    
    
      1.00
      8.007714
      0.74
    
    
      3.00
      5.491167
      0.52
    
    
      uniform
      1
      0.25
      11.557302
      0.88
    
    
      0.50
      10.521002
      0.76
    
    
      1.00
      7.951135
      0.68
    
    
      3.00
      5.253098
      0.48
    
    
      3
      0.25
      12.815920
      0.90
    
    
      0.50
      11.660920
      0.86
    
    
      1.00
      7.951725
      0.74
    
    
      3.00
      6.008483
      0.56
    
    
      5
      0.25
      11.016579
      0.86
    
    
      0.50
      10.214459
      0.78
    
    
      1.00
      6.845886
      0.66
    
    
      3.00
      5.512882
      0.52
    
    
      10
      0.25
      11.683218
      0.88
    
    
      0.50
      10.147768
      0.82
    
    
      1.00
      7.335032
      0.62
    
    
      3.00
      6.205580
      0.50

Bayes factor and accuracy



In [9]:

    
import pandas as pd

def count(x): return np.sum(x.astype(int))
data_dir = '.'
df_file = data_dir + '/20160822-eval-bml-results.pklz'
df = load_pklz(df_file)

df = pd.concat(
    {
        '2log(bf)': df['log_bf'], 
        'correct rate': df['correct_rate'], 
        'count': df['correct_rate'], 
        'totalnoise': df['totalnoise'], 
        'data noise type': df['data_noise_type']
    }, axis=1
)

df = df.pivot_table(values=['correct rate', 'count'], 
                    index=['totalnoise', pd.cut(df['2log(bf)'], [0., 2., 6., 10., 100.])], 
                    columns='data noise type', 
                    aggfunc={'correct rate': np.mean, 'count': np.sum})
df









    Out[9]:






  
    
      
      
      count
      correct rate
    
    
      
      data noise type
      laplace
      uniform
      laplace
      uniform
    
    
      totalnoise
      2log(bf)
      
      
      
      
    
  
  
    
      0.25
      (0, 2]
      21.0
      18.0
      0.636364
      0.642857
    
    
      (2, 6]
      26.0
      31.0
      0.722222
      0.794872
    
    
      (6, 10]
      37.0
      20.0
      0.822222
      0.869565
    
    
      (10, 100]
      86.0
      107.0
      1.000000
      0.972727
    
    
      0.50
      (0, 2]
      13.0
      16.0
      0.481481
      0.533333
    
    
      (2, 6]
      37.0
      27.0
      0.711538
      0.613636
    
    
      (6, 10]
      37.0
      24.0
      0.840909
      0.800000
    
    
      (10, 100]
      74.0
      94.0
      0.961039
      0.979167
    
    
      1.00
      (0, 2]
      21.0
      18.0
      0.567568
      0.600000
    
    
      (2, 6]
      36.0
      39.0
      0.631579
      0.520000
    
    
      (6, 10]
      27.0
      34.0
      0.600000
      0.723404
    
    
      (10, 100]
      59.0
      44.0
      0.967213
      0.916667
    
    
      3.00
      (0, 2]
      28.0
      30.0
      0.509091
      0.600000
    
    
      (2, 6]
      34.0
      36.0
      0.465753
      0.486486
    
    
      (6, 10]
      17.0
      20.0
      0.515152
      0.500000
    
    
      (10, 100]
      18.0
      17.0
      0.461538
      0.472222



In [ ]:

			2log(bf)	correct_rate
data noise type	n_confs	totalnoise
laplace	1	0.25	10.658543	0.84
		0.50	9.956851	0.76
		1.00	8.132585	0.78
		3.00	6.067567	0.44
	3	0.25	9.701838	0.82
		0.50	8.965892	0.80
		1.00	6.592364	0.64
		3.00	5.585870	0.38
	5	0.25	9.973692	0.88
		0.50	9.525252	0.80
		1.00	7.731706	0.70
		3.00	6.341696	0.60
	10	0.25	10.690053	0.86
		0.50	10.349471	0.86
		1.00	8.007714	0.74
		3.00	5.491167	0.52
uniform	1	0.25	11.557302	0.88
		0.50	10.521002	0.76
		1.00	7.951135	0.68
		3.00	5.253098	0.48
	3	0.25	12.815920	0.90
		0.50	11.660920	0.86
		1.00	7.951725	0.74
		3.00	6.008483	0.56
	5	0.25	11.016579	0.86
		0.50	10.214459	0.78
		1.00	6.845886	0.66
		3.00	5.512882	0.52
	10	0.25	11.683218	0.88
		0.50	10.147768	0.82
		1.00	7.335032	0.62
		3.00	6.205580	0.50