Author: James Bourbeau



In [1]:

    
%load_ext watermark
%watermark -u -d -v -p numpy,matplotlib,scipy,pandas,sklearn,mlxtend









    



last updated: 2017-09-25 

CPython 2.7.13
IPython 5.3.0

numpy 1.12.1
matplotlib 2.0.2
scipy 0.19.0
pandas 0.20.1
sklearn 0.18.1
mlxtend 0.7.0

Formatting for PyUnfold use

Define analysis free parameters
Data preprocessing
Fitting random forest
Fraction correctly identified
Spectrum
Unfolding
Feature importance



In [2]:

    
%matplotlib inline
from __future__ import division, print_function
import os
from collections import defaultdict
import numpy as np
from scipy.sparse import block_diag
import pandas as pd
import matplotlib.pyplot as plt
import seaborn.apionly as sns
import json
from scipy.interpolate import UnivariateSpline

from sklearn.metrics import accuracy_score, confusion_matrix, roc_curve, auc, classification_report
from sklearn.model_selection import cross_val_score, StratifiedShuffleSplit, KFold, StratifiedKFold

import comptools as comp
import comptools.analysis.plotting as plotting

color_dict = comp.analysis.get_color_dict()









    



/home/jbourbeau/.virtualenvs/composition/lib/python2.7/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)






    














    



Welcome to JupyROOT 6.09/02

Define analysis free parameters

[ back to top ]

Whether or not to train on 'light' and 'heavy' composition classes, or the individual compositions



In [3]:

    
comp_class = True
comp_list = ['light', 'heavy'] if comp_class else ['P', 'He', 'O', 'Fe']

Get composition classifier pipeline

Define energy binning for this analysis



In [4]:

    
energybins = comp.analysis.get_energybins()

Data preprocessing

[ back to top ]

Load simulation/data dataframe and apply specified quality cuts
Extract desired features from dataframe
Get separate testing and training datasets
Feature transformation



In [5]:

    
df_sim_train, df_sim_test = comp.load_sim(config='IC86.2012', log_energy_min=6.0, log_energy_max=8.3)



In [6]:

    
log_energy_sim_test = df_sim_test['lap_log_energy']
log_reco_energy_sim_test = df_sim_test['lap_log_energy']
log_true_energy_sim_test = df_sim_test['MC_log_energy']



In [7]:

    
feature_list, feature_labels = comp.analysis.get_training_features()



In [8]:

    
pipeline_str = 'BDT'
pipeline = comp.get_pipeline(pipeline_str)



In [9]:

    
pipeline









    Out[9]:





Pipeline(steps=[('classifier', GradientBoostingClassifier(criterion='friedman_mse', init=None,
              learning_rate=0.1, loss='exponential', max_depth=3,
              max_features=None, max_leaf_nodes=None,
              min_impurity_split=1e-07, min_samples_leaf=1,
              min_samples_split=2, min_weight_fraction_leaf=0.0,
              n_estimators=100, presort='auto', random_state=2,
              subsample=1.0, verbose=0, warm_start=False))])



In [10]:

    
pipeline.fit(df_sim_train[feature_list], df_sim_train['target'])









    Out[10]:





Pipeline(steps=[('classifier', GradientBoostingClassifier(criterion='friedman_mse', init=None,
              learning_rate=0.1, loss='exponential', max_depth=3,
              max_features=None, max_leaf_nodes=None,
              min_impurity_split=1e-07, min_samples_leaf=1,
              min_samples_split=2, min_weight_fraction_leaf=0.0,
              n_estimators=100, presort='auto', random_state=2,
              subsample=1.0, verbose=0, warm_start=False))])



In [ ]:



In [ ]:

Load detection efficiency arrays



In [13]:

    
efficiencies = {}
efficiencies_err = {}
for composition in comp_list:
    eff_path = os.path.join(comp.paths.comp_data_dir, 'unfolding',
                            'efficiency-{}.npy'.format(composition))
    efficiencies[composition] = np.load(eff_path)
    eff_err_path = os.path.join(comp.paths.comp_data_dir, 'unfolding',
                                'efficiency-err-{}.npy'.format(composition))
    efficiencies_err[composition] = np.load(eff_err_path)

Plot efficiencies



In [16]:

    
fig, ax = plt.subplots()
for composition in comp_list:
    ax.errorbar(energybins.log_energy_midpoints, efficiencies[composition],
                yerr=efficiencies_err[composition], color=color_dict[composition],
                label=composition, marker='.')
ax.set_xlabel('$\mathrm{\log_{10}(E_{true}/GeV)}$')
ax.set_ylabel('Detection efficiency \n($\mathrm{N_{passed}/N_{thrown}}$)')
ax.grid()
ax.ticklabel_format(style='sci',axis='y')
ax.yaxis.major.formatter.set_powerlimits((0,0))
plt.show()



In [43]:

    
det_efficiencies = np.empty(len(energybins.log_energy_midpoints)*2)
det_efficiencies[::2] = efficiencies['light']
det_efficiencies[1::2] = efficiencies['heavy']



In [44]:

    
det_efficiencies_err = np.empty_like(det_efficiencies)
det_efficiencies_err[::2] = efficiencies_err['light']
det_efficiencies_err[1::2] = efficiencies_err['heavy']



In [ ]:



In [11]:

    
df_sim = comp.load_sim(config='IC86.2012', test_size=0)



In [12]:

    
df_sim.lap_cos_zenith.min(), df_sim.lap_cos_zenith.max()









    Out[12]:





(0.8071662437908339, 0.99999988925917194)



In [13]:

    
(df_sim.lap_cos_zenith.max() + df_sim.lap_cos_zenith.min())/2









    Out[13]:





0.90358306652500286



In [14]:

    
counts_MC_energy_bins = np.histogram(df_sim.MC_log_energy, bins=energybins.log_energy_bins)[0]
counts_MC_energy_bins









    Out[14]:





array([5316, 5202, 5595, 5897, 5508, 5518, 2353, 2319, 2287, 2385, 2261,
       2490, 2469, 2423, 2677, 2396])



In [15]:

    
counts_reco_energy_bins = np.histogram(df_sim.lap_log_energy, bins=energybins.log_energy_bins)[0]
counts_reco_energy_bins









    Out[15]:





array([4839, 4995, 5488, 5481, 5135, 4197, 2447, 2253, 2317, 2389, 2316,
       2458, 2475, 2535, 2443, 1729])



In [16]:

    
counts_reco_energy_bins / counts_MC_energy_bins









    Out[16]:





array([ 0.91027088,  0.96020761,  0.98087578,  0.92945566,  0.93228032,
        0.76060167,  1.039949  ,  0.97153946,  1.01311762,  1.00167715,
        1.02432552,  0.98714859,  1.00243013,  1.04622369,  0.91258872,
        0.72161937])



In [17]:

    
fig, ax = plt.subplots()
plotting.plot_steps(energybins.log_energy_bins, counts_MC_energy_bins, ax=ax)
ax.set_xlabel('$\mathrm{\log_{10}(E_{MC}/GeV)}$')
ax.set_ylabel('\# true events')
ax.set_xlim(energybins.log_energy_min, energybins.log_energy_max)
ax.grid()
plt.show()









    












    



/home/jbourbeau/.virtualenvs/composition/lib/python2.7/site-packages/matplotlib/figure.py:1743: UserWarning: This figure includes Axes that are not compatible with tight_layout, so its results might be incorrect.
  warnings.warn("This figure includes Axes that are not "



In [18]:

    
fig, ax = plt.subplots()
plotting.plot_steps(energybins.log_energy_bins, counts_reco_energy_bins, ax=ax)
ax.set_xlabel('$\mathrm{\log_{10}(E_{reco}/GeV)}$')
ax.set_ylabel('\# reco events')
ax.set_xlim(energybins.log_energy_min, energybins.log_energy_max)
ax.grid()
plt.show()



In [19]:

    
fig, ax = plt.subplots()
plotting.plot_steps(energybins.log_energy_bins, counts_reco_energy_bins / counts_MC_energy_bins, ax=ax)
ax.axhline(1.0, marker='None', ls='-.', color='k')
ax.set_xlabel('$\mathrm{\log_{10}(E/GeV)}$')
ax.set_ylabel('\# reco events / \# true events')
ax.set_xlim(energybins.log_energy_min, energybins.log_energy_max)
ax.grid()
plt.show()



In [ ]:



In [ ]:



In [ ]:



In [20]:

    
def run_to_energy_bin(run):
    ebin_first = 5.0
    ebin_last = 7.9
    # Taken from http://simprod.icecube.wisc.edu/cgi-bin/simulation/cgi/cfg?dataset=12360
    return (ebin_first*10+(run-1)%(ebin_last*10-ebin_first*10+1))/10



In [21]:

    
def thrown_showers_per_ebin(sim_list, log_energy_bins=None):
    e_bins = []
    for sim in sim_list:
        print(sim)
        for f in comp.simfunctions.get_level3_sim_files_iterator(sim):
            start_idx = f.find('Run')
            run = int(f[start_idx+3: start_idx+9])
            e_bin = run_to_energy_bin(run)
            e_bins.append(e_bin)

    if log_energy_bins is None:
        log_energy_bins = np.arange(5, 8.1, 0.1)
    log_energy_midpoints = (log_energy_bins[1:] + log_energy_bins[:-1]) / 2
    vals = np.histogram(e_bins, bins=log_energy_bins)[0]

    n_resamples = 100
    n_showers_per_file = n_resamples
    thrown_showers = vals * n_showers_per_file

    return thrown_showers



In [22]:

    
sim_list = [12360, 12362, 12630, 12631]
thrown_showers = thrown_showers_per_ebin(sim_list, log_energy_bins=energybins.log_energy_bins)



In [23]:

    
fig, ax = plt.subplots()
plotting.plot_steps(energybins.log_energy_bins, counts_reco_energy_bins / thrown_showers, ax=ax, 
                    label='Reco energy bins', color='C0', alpha=0.75)
plotting.plot_steps(energybins.log_energy_bins, counts_MC_energy_bins / thrown_showers, ax=ax, 
                    label='MC energy bins', color='C1', alpha=0.75)
# ax.axhline(1.0, marker='None', ls='-.', color='k')
ax.set_xlabel('$\mathrm{\log_{10}(E/GeV)}$')
ax.set_ylabel('\# passed events / \# thrown events')
ax.set_xlim(energybins.log_energy_min, energybins.log_energy_max)
ax.set_ylim(0)
ax.grid()
ax.legend()
plt.show()



In [47]:

    
sim_dict = {'light': [12360, 12630],
            'heavy': [12362, 12631]}
efficiencies = np.empty(len(energybins.log_energy_midpoints)*2)
efficiencies_err = np.empty_like(efficiencies)
fig, ax = plt.subplots()
for composition, sim_list in sim_dict.items():
    comp_mask = df_sim.MC_comp_class == composition
    # Get number of thrown showers in each energy bin
    thrown_showers = thrown_showers_per_ebin(sim_list, log_energy_bins=energybins.log_energy_bins)
    # Get number of showers in each energy bin that pass cuts
    counts_MC_energy_bins = np.histogram(df_sim.MC_log_energy[comp_mask],
                                         bins=energybins.log_energy_bins)[0].astype(float)
    
#     # (Maybe??) Scale larger thrown radius up by area ratio
    large_thrown_radius = energybins.log_energy_midpoints > 7
#     counts_MC_energy_bins[large_thrown_radius] *= (1700/1100)**2 

    # Calculate detection efficiency and add them to normalizations array
    eff, eff_err = comp.ratio_error(counts_MC_energy_bins, np.sqrt(counts_MC_energy_bins),
                                    thrown_showers, np.sqrt(thrown_showers))
    
    start_idx = 0 if composition == 'light' else 1
    efficiencies[start_idx::2] = eff
    efficiencies_err[start_idx::2] = eff_err
    # Plot detection efficiencies
    plotting.plot_steps(energybins.log_energy_bins, eff, yerr=eff_err, ax=ax, 
                        label=composition, color=color_dict[composition], alpha=0.75)
    
    spl = UnivariateSpline(energybins.log_energy_midpoints[~large_thrown_radius],
                           eff[~large_thrown_radius], s=5)
    ax.plot(energybins.log_energy_midpoints[~large_thrown_radius],
            spl(energybins.log_energy_midpoints[[~large_thrown_radius]]), color=color_dict[composition])
    
    spl = UnivariateSpline(energybins.log_energy_midpoints[large_thrown_radius],
                           eff[large_thrown_radius], s=5)
    ax.plot(energybins.log_energy_midpoints[large_thrown_radius],
            spl(energybins.log_energy_midpoints[[large_thrown_radius]]), color=color_dict[composition])

# ax.axhline(1.0, marker='None', ls='-.', color='k')
ax.set_xlabel('$\mathrm{\log_{10}(E_{MC}/GeV)}$')
ax.set_ylabel('Detection efficiency \n($\mathrm{N_{passed}/N_{thrown}}$)')
# ax.set_xlim(energybins.log_energy_min, energybins.log_energy_max)
ax.set_ylim(0)
plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
ax.grid()
ax.legend()
efficiency_outfile = os.path.join(comp.paths.figures_dir, 'unfolding', 'iter-bayesian',
                                  'detection-efficiency.png')
comp.check_output_dir(efficiency_outfile)
plt.savefig(efficiency_outfile)
plt.show()



In [44]:

    
# sim_dict = {'light': [12360, 12630],
#             'heavy': [12362, 12631]}
# ebins = np.arange(5.0, 8.1, 0.1)
# emidpoints = (ebins[1:] + ebins[:-1]) / 2
# efficiencies = np.empty((len(ebins)-1)*2)
# efficiencies_err = np.empty((len(ebins)-1)*2)
# # efficiencies_err = np.empty(len(energybins.log_energy_midpoints)*2)
# fig, ax = plt.subplots()
# for composition, sim_list in sim_dict.items():
#     comp_mask = df_sim.MC_comp_class == composition
#     # Get number of thrown showers in each energy bin
#     thrown_showers = thrown_showers_per_ebin(sim_list, log_energy_bins=ebins)
# #     thrown_showers = thrown_showers_per_ebin(sim_list, log_energy_bins=energybins.log_energy_bins)
#     # Get number of showers in each energy bin that pass cuts
#     counts_MC_energy_bins = np.histogram(df_sim.MC_log_energy[comp_mask],
#                                          bins=ebins)[0].astype(float)
    
# #     # (Maybe??) Scale larger thrown radius up by area ratio
#     large_thrown_radius = emidpoints > 7
# #     counts_MC_energy_bins[large_thrown_radius] *= (1700/1100)**2 

#     # Calculate detection efficiency and add them to normalizations array
#     eff, eff_err = comp.ratio_error(counts_MC_energy_bins, np.sqrt(counts_MC_energy_bins),
#                                     thrown_showers, np.sqrt(thrown_showers))
    
#     start_idx = 0 if composition == 'light' else 1
#     efficiencies[start_idx::2] = eff
#     efficiencies_err[start_idx::2] = eff_err
#     # Plot detection efficiencies
#     plotting.plot_steps(ebins, eff, yerr=eff_err, ax=ax, 
#                         label=composition, color=color_dict[composition], alpha=0.75)
    
#     spl = UnivariateSpline(emidpoints[~large_thrown_radius], eff[~large_thrown_radius], s=5)
#     ax.plot(emidpoints[~large_thrown_radius], spl(emidpoints[[~large_thrown_radius]]), color=color_dict[composition])

# # ax.axhline(1.0, marker='None', ls='-.', color='k')
# ax.set_xlabel('$\mathrm{\log_{10}(E_{MC}/GeV)}$')
# ax.set_ylabel('Detection efficiency \n($\mathrm{N_{passed}/N_{thrown}}$)')
# # ax.set_xlim(energybins.log_energy_min, energybins.log_energy_max)
# ax.set_ylim(0)
# plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
# ax.grid()
# ax.legend()
# efficiency_outfile = os.path.join(comp.paths.figures_dir, 'unfolding', 'iter-bayesian',
#                                   'detection-efficiency.png')
# comp.check_output_dir(efficiency_outfile)
# plt.savefig(efficiency_outfile)
# plt.show()



In [43]:

    
efficiencies, efficiencies_err









    Out[43]:





(array([  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          1.51400454e-05,   7.55287009e-06,   2.25733634e-04,
          3.01659125e-05,   5.26711813e-04,   1.43072289e-04,
          1.86274510e-03,   5.65184627e-04,   4.79150872e-03,
          1.89006024e-03,   1.11640212e-02,   5.20030234e-03,
          2.00228137e-02,   1.26797880e-02,   1.30990174e-02,
          1.04147813e-02,   1.71493213e-02,   1.54731264e-02,
          1.89291101e-02,   1.74333587e-02,   2.02950076e-02,
          1.93030303e-02,   1.88855193e-02,   2.13368580e-02,
          2.06914894e-02,   1.88804265e-02,   2.28495034e-02,
          1.96974281e-02,   2.24408848e-02,   2.23863636e-02,
          2.17236143e-02,   2.00226929e-02,   2.07908745e-02,
          2.10909091e-02,   8.57580398e-03,   9.34090909e-03,
          9.33789954e-03,   8.25396825e-03,   8.27534039e-03,
          9.03787879e-03,   9.28679818e-03,   8.83561644e-03,
          8.10790274e-03,   9.07294833e-03,   9.47488584e-03,
          9.42467827e-03,   8.74147081e-03,   9.96969697e-03,
          9.00230238e-03,   9.54198473e-03,   1.06778370e-02,
          9.65177896e-03,   8.95579268e-03,   9.30640244e-03]),
 array([             nan,              nan,              nan,
                     nan,              nan,              nan,
          1.07057098e-05,   7.55289861e-06,   4.12177858e-05,
          1.50831838e-05,   6.29706800e-05,   3.28253822e-05,
          1.18633971e-04,   6.52803392e-05,   1.91052059e-04,
          1.19412233e-04,   2.92106491e-04,   1.98774505e-04,
          3.94098353e-04,   3.11774549e-04,   3.16712631e-04,
          2.81710582e-04,   3.62697083e-04,   3.44883046e-04,
          3.81386505e-04,   3.67545540e-04,   3.95768989e-04,
          3.86080119e-04,   3.81948460e-04,   4.05700687e-04,
          4.00603905e-04,   3.82767580e-04,   4.22546423e-04,
          3.89784618e-04,   4.18348047e-04,   4.16401548e-04,
          4.10525245e-04,   3.93052382e-04,   4.01737055e-04,
          4.03917704e-04,   2.57347591e-04,   2.67255434e-04,
          2.67821357e-04,   2.50805147e-04,   2.51227270e-04,
          2.62845185e-04,   2.66675179e-04,   2.60453950e-04,
          2.49218243e-04,   2.63759163e-04,   2.69796971e-04,
          2.68360639e-04,   2.58559081e-04,   2.76190150e-04,
          2.64028415e-04,   2.71172661e-04,   2.86692051e-04,
          2.71605201e-04,   2.62434645e-04,   2.67568825e-04]))



In [26]:

    
@np.vectorize
def get_sim_thrown_radius(log_energy):
    if log_energy <= 6:
        thrown_radius = 800.0
    elif (log_energy > 6) & (log_energy <=7):
        thrown_radius = 1100.0
    elif (log_energy > 7) & (log_energy <=8):
        thrown_radius = 1700.0
    else:
        raise ValueError('Invalid energy entered')
    return thrown_radius



In [27]:

    
thrown_radii = get_sim_thrown_radius(energybins.log_energy_midpoints)
thrown_area = np.pi * (thrown_radii**2)
geom_factor = (df_sim.lap_cos_zenith.max() + df_sim.lap_cos_zenith.min()) / 2
print(geom_factor)
fig, ax = plt.subplots()
plotting.plot_steps(energybins.log_energy_bins, thrown_area * geom_factor)
ax.set_xlim(energybins.log_energy_min, energybins.log_energy_max)
ax.set_xlabel('$\mathrm{\log_{10}(E_{reco}/GeV)}$')
ax.set_ylabel('Thrown area [$\mathrm{m^2}$]')
plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
ax.grid()
thrown_area_outfile = os.path.join(comp.paths.figures_dir, 'unfolding', 'iter-bayesian',
                                  'thrown-area.png')
comp.check_output_dir(thrown_area_outfile)
plt.savefig(thrown_area_outfile)
plt.show()









    












    



0.903583066525



In [ ]:



In [ ]:



In [29]:

    
sim_list = [12360, 12362, 12630, 12631]
thrown_showers = thrown_showers_per_ebin(sim_list, log_energy_bins=energybins.log_energy_bins)



In [31]:

    
passed_showers = np.histogram(df_sim.loc[:, 'MC_log_energy'], bins=energybins.log_energy_bins)[0]



In [32]:

    
efficiency, efficiency_err = comp.ratio_error(passed_showers, np.sqrt(passed_showers),
                                              thrown_showers, np.sqrt(thrown_showers))



In [33]:

    
geom_factor = (df_sim.lap_cos_zenith.max() + df_sim.lap_cos_zenith.min()) / 2



In [35]:

    
thrown_radii = get_sim_thrown_radius(energybins.log_energy_midpoints)
thrown_areas = np.pi * thrown_radii**2



In [36]:

    
eff_area = efficiency * thrown_areas * geom_factor



In [41]:

    
plotting.plot_steps(energybins.log_energy_bins, eff_area)
plt.ylim(0)
plt.show()



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [18]:

    
df_data = comp.load_data(config='IC86.2012', log_energy_min=6.0, log_energy_max=8.3)



In [19]:

    
X_data = comp.dataframe_functions.dataframe_to_array(df_data, feature_list + ['lap_log_energy'])
log_energy_data = X_data[:,-1]
X_data = X_data[:,:-1]



In [20]:

    
# is_finite_mask = np.isfinite(data.X)
# not_finite_mask = np.logical_not(is_finite_mask)
# finite_data_mask = np.logical_not(np.any(not_finite_mask, axis=1))
# data = data[finite_data_mask]



In [21]:

    
data_predictions = pipeline.predict(X_data)



In [22]:

    
# Get composition masks
data_labels = np.array([comp.dataframe_functions.label_to_comp(pred) for pred in data_predictions])
data_light_mask = data_labels == 'light'
data_heavy_mask = data_labels == 'heavy'



In [23]:

    
# Get number of identified comp in each energy bin
df_flux = {}
comp_list = ['light', 'heavy']
for composition in comp_list:
    comp_mask = data_labels == composition
    df_flux['counts_' + composition] = np.histogram(log_energy_data[comp_mask],
                                            bins=energybins.log_energy_bins)[0]
    df_flux['counts_' + composition + '_err'] = np.sqrt(df_flux['counts_' + composition])

df_flux['counts_total'] = np.histogram(log_energy_data, bins=energybins.log_energy_bins)[0]
df_flux['counts_total_err'] = np.sqrt(df_flux['counts_total'])



In [24]:

    
# Get number of identified comp in each energy bin
unfolding_df = pd.DataFrame(df_flux)
comp_list = ['light', 'heavy']
for composition in comp_list:
    comp_mask = data_labels == composition
    unfolding_df['counts_' + composition] = np.histogram(log_energy_data[comp_mask],
                                            bins=energybins.log_energy_bins)[0]
    unfolding_df['counts_' + composition + '_err'] = np.sqrt(df_flux['counts_' + composition])

unfolding_df['counts_total'] = np.histogram(log_energy_data, bins=energybins.log_energy_bins)[0]
unfolding_df['counts_total_err'] = np.sqrt(unfolding_df['counts_total'])



In [25]:

    
unfolding_df.index.rename('log_energy_bin_idx', inplace=True)



In [26]:

    
unfolding_df









    Out[26]:







  
    
      
      counts_heavy
      counts_heavy_err
      counts_light
      counts_light_err
      counts_total
      counts_total_err
    
    
      log_energy_bin_idx
      
      
      
      
      
      
    
  
  
    
      0
      225390
      474.752567
      368260
      606.844296
      593650
      770.486859
    
    
      1
      143534
      378.858813
      258636
      508.562681
      402170
      634.168747
    
    
      2
      98418
      313.716432
      165013
      406.217922
      263431
      513.255297
    
    
      3
      69488
      263.605766
      99269
      315.069834
      168757
      410.800438
    
    
      4
      40943
      202.343767
      64699
      254.359981
      105642
      325.026153
    
    
      5
      30707
      175.234129
      34266
      185.110778
      64973
      254.898019
    
    
      6
      19891
      141.035457
      20045
      141.580366
      39936
      199.839936
    
    
      7
      12010
      109.590146
      12740
      112.871608
      24750
      157.321327
    
    
      8
      8434
      91.836812
      7049
      83.958323
      15483
      124.430704
    
    
      9
      5534
      74.390860
      4372
      66.121101
      9906
      99.528890
    
    
      10
      3604
      60.033324
      2732
      52.268537
      6336
      79.598995
    
    
      11
      2753
      52.469038
      1392
      37.309516
      4145
      64.381674
    
    
      12
      1880
      43.358967
      721
      26.851443
      2601
      51.000000
    
    
      13
      1163
      34.102786
      464
      21.540659
      1627
      40.336088
    
    
      14
      758
      27.531800
      298
      17.262677
      1056
      32.496154
    
    
      15
      558
      23.622024
      145
      12.041595
      703
      26.514147

Spectrum

[ back to top ]

Number of events observed



In [53]:

    
# num_particles, num_particles_err = comp.analysis.get_num_particles(sim_train, data, pipeline, comp_list)



In [54]:

    
# unfolding_df['counts_light'] = num_particles['light']
# unfolding_df['counts_heavy'] = num_particles['heavy']
# unfolding_df['counts_err_light'] = num_particles_err['light']
# unfolding_df['counts_err_heavy'] = num_particles_err['heavy']

Block diagonal response matrix and error



In [46]:

    
# pipeline.fit(sim_train.X, sim_train.y)
test_predictions = pipeline.predict(df_sim_test[feature_list])
true_comp = df_sim_test['target'].apply(comp.dataframe_functions.label_to_comp)
pred_comp = pd.Series([comp.dataframe_functions.label_to_comp(i) for i in test_predictions])



In [47]:

    
# response_list = []
# response_err_list = []
# sim_bin_idxs = np.digitize(log_energy_sim_test, energybins.log_energy_bins) - 1
# energy_bin_idx = np.unique(sim_bin_idxs)
# # energy_bin_idx = energy_bin_idx[1:]
# print(energy_bin_idx)
# # print(energybins.energy_midpoints.shape)
# for bin_idx in energy_bin_idx:
#     if (bin_idx == -1) or (bin_idx == energybins.energy_midpoints.shape[0]):
#         continue
#     sim_bin_mask = sim_bin_idxs == bin_idx
#     response_mat = confusion_matrix(true_comp[sim_bin_mask], pred_comp[sim_bin_mask],
#                                     labels=comp_list)
#     # Transpose response matrix to get MC comp on x-axis and reco comp on y-axis
#     response_mat = response_mat.T
#     # Get response matrix statistical error
#     response_mat_err = np.sqrt(response_mat)
#     response_err_list.append(response_mat_err)
#     response_list.append(response_mat)
# block_response = block_diag(response_list).toarray()
# # block_response = np.flipud(block_response)

# # Normalize along MC comp axis to go from counts to probabilities
# block_response = block_response / block_response.sum(axis=0)
# print('block_response = \n{}'.format(block_response))
# block_response_err = block_diag(response_err_list).toarray()
# # block_response_err = np.flipud(block_response_err)
# block_response_err = block_response_err / block_response_err.sum(axis=0)
# print('block_response_err = \n{}'.format(block_response_err))



In [48]:

    
# res_mat_outfile = os.path.join(comp.paths.comp_data_dir, 'unfolding', 'block_response.txt')
# res_mat_err_outfile = os.path.join(comp.paths.comp_data_dir, 'unfolding', 'block_response_err.txt')

# comp.check_output_dir(res_mat_outfile)
# comp.check_output_dir(res_mat_err_outfile)

# np.savetxt(res_mat_outfile, block_response)
# np.savetxt(res_mat_err_outfile, block_response_err)



In [ ]:



In [49]:

    
true_ebin_idxs = np.digitize(log_true_energy_sim_test, energybins.log_energy_bins) - 1
reco_ebin_idxs = np.digitize(log_reco_energy_sim_test, energybins.log_energy_bins) - 1
energy_bin_idx = np.unique(true_ebin_idxs)
print(energy_bin_idx)

hstack_list = []
for true_ebin_idx in energy_bin_idx:
    if (true_ebin_idx == -1) or (true_ebin_idx == energybins.energy_midpoints.shape[0]):
        continue
    true_ebin_mask = true_ebin_idxs == true_ebin_idx
    
    vstack_list = []
    for reco_ebin_idx in energy_bin_idx:
        if (reco_ebin_idx == -1) or (reco_ebin_idx == energybins.energy_midpoints.shape[0]):
            continue
        reco_ebin_mask = reco_ebin_idxs == reco_ebin_idx
        
        combined_mask = true_ebin_mask & reco_ebin_mask
        if combined_mask.sum() == 0:
            response_mat = np.zeros((2, 2), dtype=int)
        else:
            response_mat = confusion_matrix(true_comp[true_ebin_mask & reco_ebin_mask],
                                            pred_comp[true_ebin_mask & reco_ebin_mask],
                                            labels=comp_list)
        # Transpose response matrix to get MC comp on x-axis and reco comp on y-axis
        response_mat = response_mat.T
        vstack_list.append(response_mat)
    hstack_list.append(np.vstack(vstack_list))
    
res = np.hstack(hstack_list)
res_err = np.sqrt(res)









    



[-1  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15]

Normalize response matrix column-wise (i.e. $P(E|C)$)



In [51]:

    
# res_normalized = res / res.sum(axis=0)
# # Error propagate the repsonse matrix error
# res_err_normalized = res_err / res.sum(axis=0)
# # res_err = res_err / res_err.sum(axis=0)

# res_col_sum_err = np.array([np.sqrt(np.sum(res_err[:, i]**2)) for i in range(res_err.shape[1])])
# res_normalized, res_normalized_err = comp.analysis.ratio_error(res, res_err,
#                                                                res.sum(axis=0), res_col_sum_err,
#                                                                nan_to_num=True)

res_col_sum = res.sum(axis=0)
res_col_sum_err = np.array([np.sqrt(np.sum(res_err[:, i]**2)) for i in range(res_err.shape[1])])

normalizations, normalizations_err = comp.analysis.ratio_error(res_col_sum, res_col_sum_err,
                                                               det_efficiencies, det_efficiencies_err)

res_normalized, res_normalized_err = comp.analysis.ratio_error(res, res_err,
                                                               normalizations, normalizations_err,
                                                               nan_to_num=True)









    



/home/jbourbeau/cr-composition/comptools/analysis/data_functions.py:27: RuntimeWarning: invalid value encountered in true_divide
  ratio_err = np.abs(ratio) * np.sqrt((num_err / num)**2 + (den_err / den)**2)



In [52]:

    
np.testing.assert_allclose(res_normalized.sum(axis=0), det_efficiencies)



In [53]:

    
res_normalized









    Out[53]:





array([[  1.18431017e-02,   5.62066958e-03,   5.56357050e-03, ...,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00],
       [  4.02218547e-03,   1.20219877e-02,   3.11439001e-03, ...,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00],
       [  3.51941229e-03,   1.56129711e-03,   8.76867090e-03, ...,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00],
       ..., 
       [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00, ...,
          5.45735108e-03,   5.81493559e-04,   1.45389678e-03],
       [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00, ...,
          5.12427331e-05,   3.75327661e-03,   1.05003656e-03],
       [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00, ...,
          1.53728199e-04,   2.00879593e-03,   6.86562369e-03]])



In [54]:

    
res_normalized_err









    Out[54]:





array([[  1.02490428e-03,   7.52242735e-04,   4.63488680e-04, ...,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00],
       [  5.19175382e-04,   1.23285146e-03,   3.29804059e-04, ...,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00],
       [  4.80567203e-04,   3.62889712e-04,   6.17159880e-04, ...,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00],
       ..., 
       [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00, ...,
          4.72975305e-04,   1.28074960e-04,   2.13050072e-04],
       [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00, ...,
          3.63359916e-05,   3.77172195e-04,   1.77563884e-04],
       [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00, ...,
          6.32873245e-05,   2.55789791e-04,   5.69318071e-04]])



In [108]:

    
# res_col_sum_err = np.array([np.sqrt(np.sum(res_err[:, i]**2)) for i in range(res_err.shape[1])])
# res_col_sum_err



In [109]:

    
# res.sum(axis=0)



In [110]:

    
# res1, res1_err = comp.analysis.ratio_error(res, res_err, res.sum(axis=0), res_col_sum_err)



In [111]:

    
# np.testing.assert_array_equal(res_normalized, res1)



In [112]:

    
# plt.plot(res.sum(axis=0))



In [55]:

    
fig, ax = plt.subplots()
# h = np.flipud(block_response)
sns.heatmap(res[24:26, 24:26], annot=True, fmt='d', ax=ax, square=True,
           xticklabels=['light', 'heavy'], yticklabels=['light', 'heavy'],
           cbar_kws={'label': 'Counts'}, vmin=0, cmap='viridis')
ax.invert_yaxis()
plt.xlabel('True composition')
plt.ylabel('Pred composition')
plt.title('$\mathrm{7.6 < \log_{10}(E_{true}/GeV) < 7.7}$' + '\n$\mathrm{7.6 < \log_{10}(E_{reco}/GeV) < 7.7}$')
res_mat_outfile = os.path.join(comp.paths.figures_dir, 'unfolding', 'response-matrix-single-energy-bin.png')
comp.check_output_dir(res_mat_outfile)
plt.savefig(res_mat_outfile)
plt.show()



In [56]:

    
plt.imshow(res, origin='lower', cmap='viridis')

# ax = sns.heatmap(res, square=True, xticklabels=2, yticklabels=2, 
# ax = sns.heatmap(res, square=True, mask=res==0, xticklabels=2, yticklabels=2, 
#             cbar_kws={'label': 'Counts'})

ax.plot([0, res.shape[0]-1], [0, res.shape[1]-1], marker='None', ls=':', color='C1')

# ax.invert_yaxis()



for i in np.arange(0, res.shape[0], 2):
    plt.axvline(i-0.5, marker='None', ls='-', lw=0.5, color='gray')
# for i in np.arange(0, res.shape[0], 2):
#     plt.axvline(i+0.5, marker='None', ls=':', color='gray')
for i in np.arange(0, res.shape[0], 2):
    plt.axhline(i-0.5, marker='None', ls='-', lw=0.5, color='gray')
# for i in np.arange(0, res.shape[0], 2):
#     plt.axhline(i+0.5, marker='None', ls=':', color='gray')
    
plt.xlabel('True bin')
plt.ylabel('Reconstructed bin')
# plt.grid()

# plt.xticks(np.arange(0.5, res.shape[0], 2),
#            ['{}'.format(i+1) for i in range(res.shape[0])], 
#            rotation='vertical')
# plt.yticks(np.arange(0.5, res.shape[0], 2),
#            ['{}'.format(i+1) for i in range(res.shape[0])])

plt.colorbar(label='Counts')

res_mat_outfile = os.path.join(comp.paths.figures_dir, 'unfolding', 'response-statistics.png')
comp.check_output_dir(res_mat_outfile)
plt.savefig(res_mat_outfile)
plt.show()



In [57]:

    
plt.imshow(np.sqrt(res), origin='lower', cmap='viridis')
plt.plot([0, res.shape[0]-1], [0, res.shape[1]-1], marker='None', ls=':', color='C1')

for i in np.arange(0, res.shape[0], 2):
    plt.axvline(i-0.5, marker='None', ls='-', lw=0.5, color='gray')
for i in np.arange(0, res.shape[0], 2):
    plt.axhline(i-0.5, marker='None', ls='-', lw=0.5, color='gray')
    
plt.xlabel('True bin')
plt.ylabel('Reconstructed bin')

plt.colorbar(label='Count errors', format='%d')

res_mat_outfile = os.path.join(comp.paths.figures_dir, 'unfolding', 'response-statistics-err.png')
comp.check_output_dir(res_mat_outfile)
plt.savefig(res_mat_outfile)
plt.show()



In [58]:

    
plt.imshow(res_normalized, origin='lower', cmap='viridis')
plt.plot([0, res.shape[0]-1], [0, res.shape[1]-1], marker='None', ls=':', color='C1')

for i in np.arange(0, res.shape[0], 2):
    plt.axvline(i-0.5, marker='None', ls='-', lw=0.5, color='gray')
for i in np.arange(0, res.shape[0], 2):
    plt.axhline(i-0.5, marker='None', ls='-', lw=0.5, color='gray')
    
plt.xlabel('True bin')
plt.ylabel('Reconstructed bin')
plt.title('Response matrix')

plt.colorbar(label='$\mathrm{P(E_i|C_{\mu})}$')

res_mat_outfile = os.path.join(comp.paths.figures_dir, 'unfolding', 'response-matrix.png')
comp.check_output_dir(res_mat_outfile)
plt.savefig(res_mat_outfile)
plt.show()



In [59]:

    
plt.imshow(res_normalized_err, origin='lower', cmap='viridis')
plt.plot([0, res.shape[0]-1], [0, res.shape[1]-1], marker='None', ls=':', color='C1')

for i in np.arange(0, res.shape[0], 2):
    plt.axvline(i-0.5, marker='None', ls='-', lw=0.5, color='gray')
for i in np.arange(0, res.shape[0], 2):
    plt.axhline(i-0.5, marker='None', ls='-', lw=0.5, color='gray')
    
plt.xlabel('True bin')
plt.ylabel('Reconstructed bin')
plt.title('Response matrix error')

plt.colorbar(label='$\mathrm{\delta P(E_i|C_{\mu})}$')

res_mat_outfile = os.path.join(comp.paths.figures_dir, 'unfolding', 'response-matrix-err.png')
comp.check_output_dir(res_mat_outfile)
plt.savefig(res_mat_outfile)
plt.show()



In [61]:

    
res_mat_outfile = os.path.join(comp.paths.comp_data_dir, 'unfolding', 'response.txt')
res_mat_err_outfile = os.path.join(comp.paths.comp_data_dir, 'unfolding', 'response_err.txt')

comp.check_output_dir(res_mat_outfile)
comp.check_output_dir(res_mat_err_outfile)

np.savetxt(res_mat_outfile, res_normalized)
np.savetxt(res_mat_err_outfile, res_normalized_err)

Priors array



In [60]:

    
from icecube.weighting.weighting import from_simprod
from icecube.weighting.fluxes import GaisserH3a, GaisserH4a, Hoerandel5



In [61]:

    
df_sim = comp.load_sim(config='IC86.2012', test_size=0)
df_sim.head()









    Out[61]:







  
    
      
      FractionContainment_Laputop_IceTop
      FractionContainment_Laputop_InIce
      FractionContainment_MCPrimary_IceTop
      FractionContainment_MCPrimary_InIce
      IceTopMaxSignal
      IceTopMaxSignalInEdge
      IceTopMaxSignalString
      IceTopNeighbourMaxSignal
      InIce_charge_1_60
      MC_azimuth
      ...
      log_s80
      log_s125
      log_s180
      log_s250
      log_s500
      log_dEdX
      log_d4r_peak_energy
      log_d4r_peak_sigma
      target
      is_thinned
    
  
  
    
      0
      0.672916
      0.596048
      0.675481
      0.649017
      28.609421
      0
      29
      12.178488
      180.050000
      5.673565
      ...
      0.515847
      -0.023232
      -0.480570
      -0.905580
      -1.842779
      0.733548
      1.251190
      0.905364
      0
      False
    
    
      1
      0.537961
      0.760892
      0.543832
      0.771945
      101.481819
      0
      38
      68.172859
      244.275001
      5.673565
      ...
      0.560367
      -0.013838
      -0.499876
      -0.950742
      -1.942496
      0.758869
      1.096959
      0.790880
      0
      False
    
    
      2
      0.676321
      0.597996
      0.674722
      0.655864
      20.867756
      0
      20
      10.946673
      165.950000
      5.673565
      ...
      0.502415
      -0.000583
      -0.428441
      -0.826892
      -1.708051
      0.363861
      1.511975
      1.486399
      0
      False
    
    
      3
      0.255498
      0.933506
      0.248732
      0.951277
      37.628693
      0
      80
      21.158876
      27.025000
      5.673565
      ...
      0.507438
      -0.057127
      -0.535289
      -0.979059
      -1.955840
      0.663036
      0.807850
      0.452333
      0
      False
    
    
      4
      0.389801
      0.887318
      0.378421
      0.896916
      19.827286
      0
      27
      6.790532
      59.850000
      5.673565
      ...
      0.533604
      -0.027082
      -0.502074
      -0.942989
      -1.913746
      0.510647
      0.917567
      0.767052
      0
      False
    
  

5 rows × 83 columns



In [ ]:



In [62]:

    
flux = GaisserH3a()
model_flux = {}
for ptype in [2212, 1000020040, 1000070140, 1000130270, 1000260560]:
    model_flux[ptype] = flux(energybins.energy_midpoints, ptype)
model_flux_df = pd.DataFrame.from_records(model_flux)
model_flux_df.index = energybins.energy_midpoints
model_flux_df









    Out[62]:







  
    
      
      2212
      1000020040
      1000070140
      1000130270
      1000260560
    
  
  
    
      2.818383e+06
      3.365669e-14
      6.392754e-14
      2.639297e-14
      1.291653e-14
      2.724088e-14
    
    
      3.548134e+06
      1.593250e-14
      3.266399e-14
      1.423278e-14
      7.084518e-15
      1.492133e-14
    
    
      4.466836e+06
      7.341097e-15
      1.637122e-14
      7.639624e-15
      3.881777e-15
      8.165225e-15
    
    
      5.623413e+06
      3.290494e-15
      8.016199e-15
      4.077046e-15
      2.123919e-15
      4.462350e-15
    
    
      7.079458e+06
      1.439990e-15
      3.818003e-15
      2.160371e-15
      1.159933e-15
      2.434567e-15
    
    
      8.912509e+06
      6.217267e-16
      1.761449e-15
      1.134859e-15
      6.319534e-16
      1.325349e-15
    
    
      1.122018e+07
      2.697351e-16
      7.849163e-16
      5.899540e-16
      3.432667e-16
      7.194980e-16
    
    
      1.412538e+07
      1.201025e-16
      3.380352e-16
      3.029216e-16
      1.857741e-16
      3.892281e-16
    
    
      1.778279e+07
      5.550538e-17
      1.417695e-16
      1.533412e-16
      1.001021e-16
      2.096403e-16
    
    
      2.238721e+07
      2.639513e-17
      5.896673e-17
      7.640552e-17
      5.366749e-17
      1.123032e-16
    
    
      2.818383e+07
      1.261851e-17
      2.500624e-17
      3.744697e-17
      2.861132e-17
      5.976306e-17
    
    
      3.548134e+07
      5.925300e-18
      1.107337e-17
      1.806894e-17
      1.516225e-17
      3.155050e-17
    
    
      4.466836e+07
      2.695835e-18
      5.125129e-18
      8.613758e-18
      7.986996e-18
      1.649959e-17
    
    
      5.623413e+07
      1.186024e-18
      2.420720e-18
      4.084171e-18
      4.184115e-18
      8.534752e-18
    
    
      7.079458e+07
      5.101860e-19
      1.132645e-18
      1.943615e-18
      2.182014e-18
      4.361142e-18
    
    
      8.912509e+07
      2.205102e-19
      5.150783e-19
      9.359229e-19
      1.134286e-18
      2.199731e-18



In [63]:

    
fig, ax = plt.subplots()
for key in model_flux_df.columns:
    ax.plot(np.log10(model_flux_df.index), model_flux_df.index**2.7*model_flux_df[key], label=key)
ax.plot(np.log10(model_flux_df.index), model_flux_df.index**2.7*model_flux_df.sum(axis=1), label='All particle')
ax.set_yscale("log", nonposy='clip')
ax.set_ylim(1e3, 1e5)
ax.grid()
ax.legend()
plt.show()



In [64]:

    
model_flux_df.sum(axis=1)









    Out[64]:





2.818383e+06    1.641346e-13
3.548134e+06    8.483512e-14
4.466836e+06    4.339894e-14
5.623413e+06    2.197001e-14
7.079458e+06    1.101287e-14
8.912509e+06    5.475336e-15
1.122018e+07    2.707370e-15
1.412538e+07    1.336061e-15
1.778279e+07    6.603585e-16
2.238721e+07    3.277380e-16
2.818383e+07    1.634461e-16
3.548134e+07    8.178036e-17
4.466836e+07    4.092131e-17
5.623413e+07    2.040978e-17
7.079458e+07    1.012960e-17
8.912509e+07    5.005529e-18
dtype: float64



In [ ]:



In [65]:

    
simlist = np.unique(df_sim['sim'])
for i, sim in enumerate(simlist):
    gcd_file, sim_files = comp.simfunctions.get_level3_sim_files(sim)
    num_files = len(sim_files)
    if i == 0:
        generator = num_files*from_simprod(int(sim))
    else:
        generator += num_files*from_simprod(int(sim))



In [66]:

    
priors_list = ['h3a', 'h4a', 'antih3a', 'Hoerandel5', 'antiHoerandel5']
# priors_list = ['h3a', 'h4a', 'antih3a', 'Hoerandel5', 'antiHoerandel5', 'uniform', 'alllight', 'allheavy']
model_ptypes = {}
model_ptypes['h3a'] = {'light': [2212, 1000020040], 'heavy': [1000070140, 1000130270, 1000260560]}
model_ptypes['h4a'] = {'light': [2212, 1000020040], 'heavy': [1000070140, 1000130270, 1000260560]}
model_ptypes['Hoerandel5'] = {'light': [2212, 1000020040], 'heavy': [1000070140, 1000130270, 1000260560]}



In [67]:

    
priors = defaultdict(list)
for flux, name in zip([GaisserH3a(), GaisserH3a(), GaisserH4a(), Hoerandel5(), Hoerandel5()],
                      ['h3a', 'antih3a', 'h4a', 'Hoerandel5', 'antiHoerandel5']):
    priors_raw = defaultdict(list)
    for energy_mid in energybins.energy_midpoints:
        energy = [energy_mid]*5
        ptype = [2212, 1000020040, 1000070140, 1000130270, 1000260560]
        weights = flux(energy, ptype)
#         light_prior = weights[:2].sum()/weights.sum()
#         heavy_prior = weights[2:].sum()/weights.sum()
        light_prior = weights[:2].sum()
        heavy_prior = weights[2:].sum()
        if 'anti' in name:
            priors_raw['light'].append(heavy_prior)
            priors_raw['heavy'].append(light_prior)
        else:
            priors_raw['light'].append(light_prior)
            priors_raw['heavy'].append(heavy_prior)
        priors[name].extend([light_prior, heavy_prior])
    unfolding_df['{}_flux_light'.format(name)] = priors_raw['light']
    unfolding_df['{}_flux_heavy'.format(name)] = priors_raw['heavy']
    
# unfolding_df['uniform_flux_light'] = [0.5]*len(priors_raw['light'])
# unfolding_df['uniform_flux_heavy'] = [0.5]*len(priors_raw['heavy'])

# unfolding_df['alllight_flux_light'] = [0.9]*len(priors_raw['light'])
# unfolding_df['alllight_flux_heavy'] = [0.1]*len(priors_raw['heavy'])

# unfolding_df['allheavy_flux_light'] = [0.1]*len(priors_raw['light'])
# unfolding_df['allheavy_flux_heavy'] = [0.9]*len(priors_raw['heavy'])



In [68]:

    
unfolding_df









    Out[68]:







  
    
      
      counts_heavy
      counts_heavy_err
      counts_light
      counts_light_err
      counts_total
      counts_total_err
      h3a_flux_light
      h3a_flux_heavy
      antih3a_flux_light
      antih3a_flux_heavy
      h4a_flux_light
      h4a_flux_heavy
      Hoerandel5_flux_light
      Hoerandel5_flux_heavy
      antiHoerandel5_flux_light
      antiHoerandel5_flux_heavy
    
    
      log_energy_bin_idx
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      0
      233037
      482.739060
      360613
      600.510616
      593650
      770.486859
      9.758423e-14
      6.655038e-14
      6.655038e-14
      9.758423e-14
      9.985590e-14
      6.541805e-14
      6.629980e-14
      6.488824e-14
      6.488824e-14
      6.629980e-14
    
    
      1
      145728
      381.743369
      256442
      506.401027
      402170
      634.168747
      4.859649e-14
      3.623863e-14
      3.623863e-14
      4.859649e-14
      4.981589e-14
      3.558706e-14
      3.247099e-14
      3.528813e-14
      3.528813e-14
      3.247099e-14
    
    
      2
      99038
      314.703035
      164393
      405.454066
      263431
      513.255297
      2.371232e-14
      1.968663e-14
      1.968663e-14
      2.371232e-14
      2.436579e-14
      1.931170e-14
      1.541614e-14
      1.916844e-14
      1.916844e-14
      1.541614e-14
    
    
      3
      66484
      257.844915
      102273
      319.801501
      168757
      410.800438
      1.130669e-14
      1.066331e-14
      1.066331e-14
      1.130669e-14
      1.165624e-14
      1.044758e-14
      7.062637e-15
      1.039346e-14
      1.039346e-14
      7.062637e-15
    
    
      4
      40849
      202.111355
      64793
      254.544692
      105642
      325.026153
      5.257994e-15
      5.754872e-15
      5.754872e-15
      5.257994e-15
      5.444601e-15
      5.630737e-15
      3.109625e-15
      5.620064e-15
      5.620064e-15
      3.109625e-15
    
    
      5
      29647
      172.183042
      35326
      187.952122
      64973
      254.898019
      2.383175e-15
      3.092161e-15
      3.092161e-15
      2.383175e-15
      2.482578e-15
      3.020735e-15
      1.311693e-15
      3.026594e-15
      3.026594e-15
      1.311693e-15
    
    
      6
      19869
      140.957440
      20067
      141.658039
      39936
      199.839936
      1.054651e-15
      1.652719e-15
      1.652719e-15
      1.054651e-15
      1.107476e-15
      1.611621e-15
      5.294563e-16
      1.620381e-15
      1.620381e-15
      5.294563e-16
    
    
      7
      12063
      109.831689
      12687
      112.636584
      24750
      157.321327
      4.581377e-16
      8.779237e-16
      8.779237e-16
      4.581377e-16
      4.861355e-16
      8.542774e-16
      2.048821e-16
      8.604621e-16
      8.604621e-16
      2.048821e-16
    
    
      8
      7940
      89.106678
      7543
      86.850446
      15483
      124.430704
      1.972749e-16
      4.630835e-16
      4.630835e-16
      1.972749e-16
      2.120716e-16
      4.494787e-16
      7.637870e-17
      4.519852e-16
      4.519852e-16
      7.637870e-17
    
    
      9
      5442
      73.769913
      4464
      66.813172
      9906
      99.528890
      8.536186e-17
      2.423762e-16
      2.423762e-16
      8.536186e-17
      9.315715e-17
      2.345489e-16
      2.761680e-17
      2.341804e-16
      2.341804e-16
      2.761680e-17
    
    
      10
      3558
      59.648973
      2778
      52.706736
      6336
      79.598995
      3.762475e-17
      1.258213e-16
      1.258213e-16
      3.762475e-17
      4.171736e-17
      1.213184e-16
      9.754514e-18
      1.193495e-16
      1.193495e-16
      9.754514e-18
    
    
      11
      2722
      52.172790
      1423
      37.722672
      4145
      64.381674
      1.699867e-17
      6.478170e-17
      6.478170e-17
      1.699867e-17
      1.913925e-17
      6.219132e-17
      3.387166e-18
      5.968111e-17
      5.968111e-17
      3.387166e-18
    
    
      12
      1886
      43.428102
      715
      26.739484
      2601
      51.000000
      7.820965e-18
      3.310034e-17
      3.310034e-17
      7.820965e-18
      8.936023e-18
      3.161033e-17
      1.162203e-18
      2.920519e-17
      2.920519e-17
      1.162203e-18
    
    
      13
      1134
      33.674916
      493
      22.203603
      1627
      40.336088
      3.606744e-18
      1.680304e-17
      1.680304e-17
      3.606744e-18
      4.185068e-18
      1.594605e-17
      3.955419e-19
      1.394010e-17
      1.394010e-17
      3.955419e-19
    
    
      14
      758
      27.531800
      298
      17.262677
      1056
      32.496154
      1.642831e-18
      8.486771e-18
      8.486771e-18
      1.642831e-18
      1.941410e-18
      7.993934e-18
      1.338868e-19
      6.461990e-18
      6.461990e-18
      1.338868e-19
    
    
      15
      559
      23.643181
      144
      12.000000
      703
      26.514147
      7.355884e-19
      4.269940e-18
      4.269940e-18
      7.355884e-19
      8.890250e-19
      3.986566e-18
      4.515669e-20
      2.893904e-18
      2.893904e-18
      4.515669e-20



In [69]:

    
unfolding_df_outfile = os.path.join(comp.paths.comp_data_dir, 'unfolding', 'unfolding-dataframe.csv')
comp.check_output_dir(unfolding_df_outfile)
unfolding_df.to_csv(unfolding_df_outfile)

Formatting for PyUnfold use



In [70]:

    
formatted_df = pd.DataFrame()



In [71]:

    
counts_formatted = []
priors_formatted = defaultdict(list)
for index, row in unfolding_df.iterrows():
    counts_formatted.extend([row['counts_light'], row['counts_heavy']])
    for priors_name in priors_list:
        priors_formatted[priors_name].extend([row[priors_name+'_flux_light'], row[priors_name+'_flux_heavy']])
        
formatted_df['counts'] = counts_formatted
formatted_df['counts_err'] = np.sqrt(counts_formatted)


for key, value in priors_formatted.iteritems():
    formatted_df[key+'_flux'] = value
    formatted_df[key+'_priors'] = formatted_df[key+'_flux'] / formatted_df[key+'_flux'].sum()

formatted_df.index.rename('log_energy_bin_idx', inplace=True)



In [72]:

    
formatted_df.head()









    Out[72]:







  
    
      
      counts
      counts_err
      Hoerandel5_flux
      Hoerandel5_priors
      h3a_flux
      h3a_priors
      antiHoerandel5_flux
      antiHoerandel5_priors
      h4a_flux
      h4a_priors
      antih3a_flux
      antih3a_priors
    
    
      log_energy_bin_idx
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      0
      360613.0
      600.510616
      6.629980e-14
      0.247104
      9.758423e-14
      0.290274
      6.488824e-14
      0.241843
      9.985590e-14
      0.295078
      6.655038e-14
      0.197960
    
    
      1
      233037.0
      482.739060
      6.488824e-14
      0.241843
      6.655038e-14
      0.197960
      6.629980e-14
      0.247104
      6.541805e-14
      0.193313
      9.758423e-14
      0.290274
    
    
      2
      256442.0
      506.401027
      3.247099e-14
      0.121022
      4.859649e-14
      0.144555
      3.528813e-14
      0.131521
      4.981589e-14
      0.147208
      3.623863e-14
      0.107795
    
    
      3
      145728.0
      381.743369
      3.528813e-14
      0.131521
      3.623863e-14
      0.107795
      3.247099e-14
      0.121022
      3.558706e-14
      0.105161
      4.859649e-14
      0.144555
    
    
      4
      164393.0
      405.454066
      1.541614e-14
      0.057457
      2.371232e-14
      0.070535
      1.916844e-14
      0.071442
      2.436579e-14
      0.072002
      1.968663e-14
      0.058560

Save formatted DataFrame to disk



In [73]:

    
formatted_df_outfile = os.path.join(comp.paths.comp_data_dir, 'unfolding',
                                    'unfolding-dataframe-PyUnfold-formatted.csv')
comp.check_output_dir(formatted_df_outfile)
formatted_df.to_csv(formatted_df_outfile)



In [77]:

    
model_to_ls = {'h3a': '-.', 'h4a': ':', 'Hoerandel5': '-', 'antih3a': '--'}
model_to_marker = {'h3a': '.', 'h4a': '^', 'Hoerandel5': '*', 'antih3a': '.'}

fig, ax = plt.subplots()
for model in ['h3a', 'h4a', 'Hoerandel5', 'antih3a']:
    key = '{}_priors'.format(model)
    light_priors = formatted_df[key][::2]
    heavy_priors = formatted_df[key][1::2]
    ax.plot(energybins.log_energy_midpoints, light_priors,
            color=color_dict['light'], ls=model_to_ls[model], marker=model_to_marker[model],
            label='{} light'.format(model), alpha=0.75)
    ax.plot(energybins.log_energy_midpoints, heavy_priors,
            color=color_dict['heavy'], ls=model_to_ls[model], marker=model_to_marker[model],
            label='{} heavy'.format(model), alpha=0.75)

ax.set_xlabel('$\mathrm{\log_{10}(E_{reco}/GeV)}$')
ax.set_ylabel('Prior')
ax.set_xlim([energybins.log_energy_min, energybins.log_energy_max])
# ax.set_ylim([0, 1])
ax.set_yscale("log", nonposy='clip')
ax.grid()
leg = ax.legend(loc='center left', bbox_to_anchor=(1, 0.5),
                 frameon=False, fancybox=False, numpoints=1)
priors_outfile = os.path.join(comp.paths.figures_dir, 'unfolding/iter-bayesian', 'priors.png')
comp.check_output_dir(priors_outfile)
plt.savefig(priors_outfile)
plt.show()



In [130]:

    
woo = pd.read_csv(formatted_df_outfile, index_col='log_energy_bin_idx')



In [131]:

    
woo









    Out[131]:







  
    
      
      counts
      counts_err
      Hoerandel5_flux
      Hoerandel5_priors
      h3a_flux
      h3a_priors
      antiHoerandel5_flux
      antiHoerandel5_priors
      h4a_flux
      h4a_priors
      antih3a_flux
      antih3a_priors
    
    
      log_energy_bin_idx
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      0
      360613.0
      600.510616
      6.629980e-14
      2.471037e-01
      9.758423e-14
      0.290274
      6.488824e-14
      2.418428e-01
      9.985590e-14
      0.295078
      6.655038e-14
      0.197960
    
    
      1
      233037.0
      482.739060
      6.488824e-14
      2.418428e-01
      6.655038e-14
      0.197960
      6.629980e-14
      2.471037e-01
      6.541805e-14
      0.193313
      9.758423e-14
      0.290274
    
    
      2
      256442.0
      506.401027
      3.247099e-14
      1.210215e-01
      4.859649e-14
      0.144555
      3.528813e-14
      1.315212e-01
      4.981589e-14
      0.147208
      3.623863e-14
      0.107795
    
    
      3
      145728.0
      381.743369
      3.528813e-14
      1.315212e-01
      3.623863e-14
      0.107795
      3.247099e-14
      1.210215e-01
      3.558706e-14
      0.105161
      4.859649e-14
      0.144555
    
    
      4
      164393.0
      405.454066
      1.541614e-14
      5.745698e-02
      2.371232e-14
      0.070535
      1.916844e-14
      7.144202e-02
      2.436579e-14
      0.072002
      1.968663e-14
      0.058560
    
    
      5
      99038.0
      314.703035
      1.916844e-14
      7.144202e-02
      1.968663e-14
      0.058560
      1.541614e-14
      5.745698e-02
      1.931170e-14
      0.057067
      2.371232e-14
      0.070535
    
    
      6
      102273.0
      319.801501
      7.062637e-15
      2.632291e-02
      1.130669e-14
      0.033633
      1.039346e-14
      3.873710e-02
      1.165624e-14
      0.034445
      1.066331e-14
      0.031719
    
    
      7
      66484.0
      257.844915
      1.039346e-14
      3.873710e-02
      1.066331e-14
      0.031719
      7.062637e-15
      2.632291e-02
      1.044758e-14
      0.030873
      1.130669e-14
      0.033633
    
    
      8
      64793.0
      254.544692
      3.109625e-15
      1.158978e-02
      5.257994e-15
      0.015640
      5.620064e-15
      2.094635e-02
      5.444601e-15
      0.016089
      5.754872e-15
      0.017118
    
    
      9
      40849.0
      202.111355
      5.620064e-15
      2.094635e-02
      5.754872e-15
      0.017118
      3.109625e-15
      1.158978e-02
      5.630737e-15
      0.016639
      5.257994e-15
      0.015640
    
    
      10
      35326.0
      187.952122
      1.311693e-15
      4.888767e-03
      2.383175e-15
      0.007089
      3.026594e-15
      1.128032e-02
      2.482578e-15
      0.007336
      3.092161e-15
      0.009198
    
    
      11
      29647.0
      172.183042
      3.026594e-15
      1.128032e-02
      3.092161e-15
      0.009198
      1.311693e-15
      4.888767e-03
      3.020735e-15
      0.008926
      2.383175e-15
      0.007089
    
    
      12
      20067.0
      141.658039
      5.294563e-16
      1.973319e-03
      1.054651e-15
      0.003137
      1.620381e-15
      6.039267e-03
      1.107476e-15
      0.003273
      1.652719e-15
      0.004916
    
    
      13
      19869.0
      140.957440
      1.620381e-15
      6.039267e-03
      1.652719e-15
      0.004916
      5.294563e-16
      1.973319e-03
      1.611621e-15
      0.004762
      1.054651e-15
      0.003137
    
    
      14
      12687.0
      112.636584
      2.048821e-16
      7.636090e-04
      4.581377e-16
      0.001363
      8.604621e-16
      3.206999e-03
      4.861355e-16
      0.001437
      8.779237e-16
      0.002611
    
    
      15
      12063.0
      109.831689
      8.604621e-16
      3.206999e-03
      8.779237e-16
      0.002611
      2.048821e-16
      7.636090e-04
      8.542774e-16
      0.002524
      4.581377e-16
      0.001363
    
    
      16
      7543.0
      86.850446
      7.637870e-17
      2.846685e-04
      1.972749e-16
      0.000587
      4.519852e-16
      1.684579e-03
      2.120716e-16
      0.000627
      4.630835e-16
      0.001377
    
    
      17
      7940.0
      89.106678
      4.519852e-16
      1.684579e-03
      4.630835e-16
      0.001377
      7.637870e-17
      2.846685e-04
      4.494787e-16
      0.001328
      1.972749e-16
      0.000587
    
    
      18
      4464.0
      66.813172
      2.761680e-17
      1.029296e-04
      8.536186e-17
      0.000254
      2.341804e-16
      8.728057e-04
      9.315715e-17
      0.000275
      2.423762e-16
      0.000721
    
    
      19
      5442.0
      73.769913
      2.341804e-16
      8.728057e-04
      2.423762e-16
      0.000721
      2.761680e-17
      1.029296e-04
      2.345489e-16
      0.000693
      8.536186e-17
      0.000254
    
    
      20
      2778.0
      52.706736
      9.754514e-18
      3.635572e-05
      3.762475e-17
      0.000112
      1.193495e-16
      4.448236e-04
      4.171736e-17
      0.000123
      1.258213e-16
      0.000374
    
    
      21
      3558.0
      59.648973
      1.193495e-16
      4.448236e-04
      1.258213e-16
      0.000374
      9.754514e-18
      3.635572e-05
      1.213184e-16
      0.000359
      3.762475e-17
      0.000112
    
    
      22
      1423.0
      37.722672
      3.387166e-18
      1.262419e-05
      1.699867e-17
      0.000051
      5.968111e-17
      2.224354e-04
      1.913925e-17
      0.000057
      6.478170e-17
      0.000193
    
    
      23
      2722.0
      52.172790
      5.968111e-17
      2.224354e-04
      6.478170e-17
      0.000193
      3.387166e-18
      1.262419e-05
      6.219132e-17
      0.000184
      1.699867e-17
      0.000051
    
    
      24
      715.0
      26.739484
      1.162203e-18
      4.331609e-06
      7.820965e-18
      0.000023
      2.920519e-17
      1.088497e-04
      8.936023e-18
      0.000026
      3.310034e-17
      0.000098
    
    
      25
      1886.0
      43.428102
      2.920519e-17
      1.088497e-04
      3.310034e-17
      0.000098
      1.162203e-18
      4.331609e-06
      3.161033e-17
      0.000093
      7.820965e-18
      0.000023
    
    
      26
      493.0
      22.203603
      3.955419e-19
      1.474211e-06
      3.606744e-18
      0.000011
      1.394010e-17
      5.195566e-05
      4.185068e-18
      0.000012
      1.680304e-17
      0.000050
    
    
      27
      1134.0
      33.674916
      1.394010e-17
      5.195566e-05
      1.680304e-17
      0.000050
      3.955419e-19
      1.474211e-06
      1.594605e-17
      0.000047
      3.606744e-18
      0.000011
    
    
      28
      298.0
      17.262677
      1.338868e-19
      4.990051e-07
      1.642831e-18
      0.000005
      6.461990e-18
      2.408426e-05
      1.941410e-18
      0.000006
      8.486771e-18
      0.000025
    
    
      29
      758.0
      27.531800
      6.461990e-18
      2.408426e-05
      8.486771e-18
      0.000025
      1.338868e-19
      4.990051e-07
      7.993934e-18
      0.000024
      1.642831e-18
      0.000005
    
    
      30
      144.0
      12.000000
      4.515669e-20
      1.683020e-07
      7.355884e-19
      0.000002
      2.893904e-18
      1.078577e-05
      8.890250e-19
      0.000003
      4.269940e-18
      0.000013
    
    
      31
      559.0
      23.643181
      2.893904e-18
      1.078577e-05
      4.269940e-18
      0.000013
      4.515669e-20
      1.683020e-07
      3.986566e-18
      0.000012
      7.355884e-19
      0.000002



In [59]:

    
# with open('pyunfold_dict.json', 'w') as outfile:
#     data = {'counts': counts_observed,
#             'block_response': block_response.tolist(),
#             'block_response_err': block_response_err.tolist()}
#     for model in ['h3a', 'h4a', 'Hoerandel5']:
#         data['priors_{}'.format(model)] = priors[model]
#     json.dump(data, outfile)









    




NameErrorTraceback (most recent call last)
<ipython-input-59-e1f5f3c0597f> in <module>()
      1 with open('pyunfold_dict.json', 'w') as outfile:
----> 2     data = {'counts': counts_observed,
      3             'block_response': block_response.tolist(),
      4             'block_response_err': block_response_err.tolist()}
      5     for model in ['h3a', 'h4a', 'Hoerandel5']:

NameError: name 'counts_observed' is not defined



In [116]:

    
df = pd.read_csv(formatted_df_outfile, index_col='log_energy_bin_idx')



In [117]:

    
df









    Out[117]:







  
    
      
      counts
      allheavy_priors
      h3a_priors
      h4a_priors
      alllight_priors
      Hoerandel5_priors
      uniform_priors
      antih3a_priors
      antiHoerandel5_priors
    
    
      log_energy_bin_idx
      
      
      
      
      
      
      
      
      
    
  
  
    
      0
      360613.0
      0.1
      0.594538
      0.604184
      0.9
      0.505380
      0.5
      0.405462
      0.494620
    
    
      1
      233037.0
      0.9
      0.405462
      0.395816
      0.1
      0.494620
      0.5
      0.594538
      0.505380
    
    
      2
      256442.0
      0.1
      0.572835
      0.583304
      0.9
      0.479212
      0.5
      0.427165
      0.520788
    
    
      3
      145728.0
      0.9
      0.427165
      0.416696
      0.1
      0.520788
      0.5
      0.572835
      0.479212
    
    
      4
      164393.0
      0.1
      0.546380
      0.557857
      0.9
      0.445752
      0.5
      0.453620
      0.554248
    
    
      5
      99038.0
      0.9
      0.453620
      0.442143
      0.1
      0.554248
      0.5
      0.546380
      0.445752
    
    
      6
      102273.0
      0.1
      0.514642
      0.527341
      0.9
      0.404594
      0.5
      0.485358
      0.595406
    
    
      7
      66484.0
      0.9
      0.485358
      0.472659
      0.1
      0.595406
      0.5
      0.514642
      0.404594
    
    
      8
      64793.0
      0.1
      0.477441
      0.491597
      0.9
      0.356213
      0.5
      0.522559
      0.643787
    
    
      9
      40849.0
      0.9
      0.522559
      0.508403
      0.1
      0.643787
      0.5
      0.477441
      0.356213
    
    
      10
      35326.0
      0.1
      0.435256
      0.451106
      0.9
      0.302353
      0.5
      0.564744
      0.697647
    
    
      11
      29647.0
      0.9
      0.564744
      0.548894
      0.1
      0.697647
      0.5
      0.435256
      0.302353
    
    
      12
      20067.0
      0.1
      0.389548
      0.407295
      0.9
      0.246277
      0.5
      0.610452
      0.753723
    
    
      13
      19869.0
      0.9
      0.610452
      0.592705
      0.1
      0.753723
      0.5
      0.389548
      0.246277
    
    
      14
      12687.0
      0.1
      0.342902
      0.362676
      0.9
      0.192315
      0.5
      0.657098
      0.807685
    
    
      15
      12063.0
      0.9
      0.657098
      0.637324
      0.1
      0.807685
      0.5
      0.342902
      0.192315
    
    
      16
      7543.0
      0.1
      0.298739
      0.320568
      0.9
      0.144557
      0.5
      0.701261
      0.855443
    
    
      17
      7940.0
      0.9
      0.701261
      0.679432
      0.1
      0.855443
      0.5
      0.298739
      0.144557
    
    
      18
      4464.0
      0.1
      0.260458
      0.284270
      0.9
      0.105489
      0.5
      0.739542
      0.894511
    
    
      19
      5442.0
      0.9
      0.739542
      0.715730
      0.1
      0.894511
      0.5
      0.260458
      0.105489
    
    
      20
      2778.0
      0.1
      0.230197
      0.255879
      0.9
      0.075555
      0.5
      0.769803
      0.924445
    
    
      21
      3558.0
      0.9
      0.769803
      0.744121
      0.1
      0.924445
      0.5
      0.230197
      0.075555
    
    
      22
      1423.0
      0.1
      0.207858
      0.235327
      0.9
      0.053706
      0.5
      0.792142
      0.946294
    
    
      23
      2722.0
      0.9
      0.792142
      0.764673
      0.1
      0.946294
      0.5
      0.207858
      0.053706
    
    
      24
      715.0
      0.1
      0.191122
      0.220390
      0.9
      0.038271
      0.5
      0.808878
      0.961729
    
    
      25
      1886.0
      0.9
      0.808878
      0.779610
      0.1
      0.961729
      0.5
      0.191122
      0.038271
    
    
      26
      493.0
      0.1
      0.176716
      0.207891
      0.9
      0.027592
      0.5
      0.823284
      0.972408
    
    
      27
      1134.0
      0.9
      0.823284
      0.792109
      0.1
      0.972408
      0.5
      0.176716
      0.027592
    
    
      28
      298.0
      0.1
      0.162181
      0.195404
      0.9
      0.020299
      0.5
      0.837819
      0.979701
    
    
      29
      758.0
      0.9
      0.837819
      0.804596
      0.1
      0.979701
      0.5
      0.162181
      0.020299
    
    
      30
      144.0
      0.1
      0.146955
      0.182342
      0.9
      0.015364
      0.5
      0.853045
      0.984636
    
    
      31
      559.0
      0.9
      0.853045
      0.817658
      0.1
      0.984636
      0.5
      0.146955
      0.015364



In [ ]:

	counts_heavy	counts_heavy_err	counts_light	counts_light_err	counts_total	counts_total_err
log_energy_bin_idx
0	225390	474.752567	368260	606.844296	593650	770.486859
1	143534	378.858813	258636	508.562681	402170	634.168747
2	98418	313.716432	165013	406.217922	263431	513.255297
3	69488	263.605766	99269	315.069834	168757	410.800438
4	40943	202.343767	64699	254.359981	105642	325.026153
5	30707	175.234129	34266	185.110778	64973	254.898019
6	19891	141.035457	20045	141.580366	39936	199.839936
7	12010	109.590146	12740	112.871608	24750	157.321327
8	8434	91.836812	7049	83.958323	15483	124.430704
9	5534	74.390860	4372	66.121101	9906	99.528890
10	3604	60.033324	2732	52.268537	6336	79.598995
11	2753	52.469038	1392	37.309516	4145	64.381674
12	1880	43.358967	721	26.851443	2601	51.000000
13	1163	34.102786	464	21.540659	1627	40.336088
14	758	27.531800	298	17.262677	1056	32.496154
15	558	23.622024	145	12.041595	703	26.514147

	FractionContainment_Laputop_IceTop	FractionContainment_Laputop_InIce	FractionContainment_MCPrimary_IceTop	FractionContainment_MCPrimary_InIce	IceTopMaxSignal	IceTopMaxSignalString	IceTopNeighbourMaxSignal	InIce_charge_1_60	MC_azimuth	...	log_s80	log_s125	log_s180	log_s250	log_s500	log_dEdX	log_d4r_peak_energy	log_d4r_peak_sigma	is_thinned
0	0.672916	0.596048	0.675481	0.649017	28.609421	29	12.178488	180.050000	5.673565	...	0.515847	-0.023232	-0.480570	-0.905580	-1.842779	0.733548	1.251190	0.905364	False
1	0.537961	0.760892	0.543832	0.771945	101.481819	38	68.172859	244.275001	5.673565	...	0.560367	-0.013838	-0.499876	-0.950742	-1.942496	0.758869	1.096959	0.790880	False
2	0.676321	0.597996	0.674722	0.655864	20.867756	20	10.946673	165.950000	5.673565	...	0.502415	-0.000583	-0.428441	-0.826892	-1.708051	0.363861	1.511975	1.486399	False
3	0.255498	0.933506	0.248732	0.951277	37.628693	80	21.158876	27.025000	5.673565	...	0.507438	-0.057127	-0.535289	-0.979059	-1.955840	0.663036	0.807850	0.452333	False
4	0.389801	0.887318	0.378421	0.896916	19.827286	27	6.790532	59.850000	5.673565	...	0.533604	-0.027082	-0.502074	-0.942989	-1.913746	0.510647	0.917567	0.767052	False

	2212	1000020040	1000070140	1000130270	1000260560
2.818383e+06	3.365669e-14	6.392754e-14	2.639297e-14	1.291653e-14	2.724088e-14
3.548134e+06	1.593250e-14	3.266399e-14	1.423278e-14	7.084518e-15	1.492133e-14
4.466836e+06	7.341097e-15	1.637122e-14	7.639624e-15	3.881777e-15	8.165225e-15
5.623413e+06	3.290494e-15	8.016199e-15	4.077046e-15	2.123919e-15	4.462350e-15
7.079458e+06	1.439990e-15	3.818003e-15	2.160371e-15	1.159933e-15	2.434567e-15
8.912509e+06	6.217267e-16	1.761449e-15	1.134859e-15	6.319534e-16	1.325349e-15
1.122018e+07	2.697351e-16	7.849163e-16	5.899540e-16	3.432667e-16	7.194980e-16
1.412538e+07	1.201025e-16	3.380352e-16	3.029216e-16	1.857741e-16	3.892281e-16
1.778279e+07	5.550538e-17	1.417695e-16	1.533412e-16	1.001021e-16	2.096403e-16
2.238721e+07	2.639513e-17	5.896673e-17	7.640552e-17	5.366749e-17	1.123032e-16
2.818383e+07	1.261851e-17	2.500624e-17	3.744697e-17	2.861132e-17	5.976306e-17
3.548134e+07	5.925300e-18	1.107337e-17	1.806894e-17	1.516225e-17	3.155050e-17
4.466836e+07	2.695835e-18	5.125129e-18	8.613758e-18	7.986996e-18	1.649959e-17
5.623413e+07	1.186024e-18	2.420720e-18	4.084171e-18	4.184115e-18	8.534752e-18
7.079458e+07	5.101860e-19	1.132645e-18	1.943615e-18	2.182014e-18	4.361142e-18
8.912509e+07	2.205102e-19	5.150783e-19	9.359229e-19	1.134286e-18	2.199731e-18

	counts_heavy	counts_heavy_err	counts_light	counts_light_err	counts_total	counts_total_err	h3a_flux_light	h3a_flux_heavy	antih3a_flux_light	antih3a_flux_heavy	h4a_flux_light	h4a_flux_heavy	Hoerandel5_flux_light	Hoerandel5_flux_heavy	antiHoerandel5_flux_light	antiHoerandel5_flux_heavy
log_energy_bin_idx
0	233037	482.739060	360613	600.510616	593650	770.486859	9.758423e-14	6.655038e-14	6.655038e-14	9.758423e-14	9.985590e-14	6.541805e-14	6.629980e-14	6.488824e-14	6.488824e-14	6.629980e-14
1	145728	381.743369	256442	506.401027	402170	634.168747	4.859649e-14	3.623863e-14	3.623863e-14	4.859649e-14	4.981589e-14	3.558706e-14	3.247099e-14	3.528813e-14	3.528813e-14	3.247099e-14
2	99038	314.703035	164393	405.454066	263431	513.255297	2.371232e-14	1.968663e-14	1.968663e-14	2.371232e-14	2.436579e-14	1.931170e-14	1.541614e-14	1.916844e-14	1.916844e-14	1.541614e-14
3	66484	257.844915	102273	319.801501	168757	410.800438	1.130669e-14	1.066331e-14	1.066331e-14	1.130669e-14	1.165624e-14	1.044758e-14	7.062637e-15	1.039346e-14	1.039346e-14	7.062637e-15
4	40849	202.111355	64793	254.544692	105642	325.026153	5.257994e-15	5.754872e-15	5.754872e-15	5.257994e-15	5.444601e-15	5.630737e-15	3.109625e-15	5.620064e-15	5.620064e-15	3.109625e-15
5	29647	172.183042	35326	187.952122	64973	254.898019	2.383175e-15	3.092161e-15	3.092161e-15	2.383175e-15	2.482578e-15	3.020735e-15	1.311693e-15	3.026594e-15	3.026594e-15	1.311693e-15
6	19869	140.957440	20067	141.658039	39936	199.839936	1.054651e-15	1.652719e-15	1.652719e-15	1.054651e-15	1.107476e-15	1.611621e-15	5.294563e-16	1.620381e-15	1.620381e-15	5.294563e-16
7	12063	109.831689	12687	112.636584	24750	157.321327	4.581377e-16	8.779237e-16	8.779237e-16	4.581377e-16	4.861355e-16	8.542774e-16	2.048821e-16	8.604621e-16	8.604621e-16	2.048821e-16
8	7940	89.106678	7543	86.850446	15483	124.430704	1.972749e-16	4.630835e-16	4.630835e-16	1.972749e-16	2.120716e-16	4.494787e-16	7.637870e-17	4.519852e-16	4.519852e-16	7.637870e-17
9	5442	73.769913	4464	66.813172	9906	99.528890	8.536186e-17	2.423762e-16	2.423762e-16	8.536186e-17	9.315715e-17	2.345489e-16	2.761680e-17	2.341804e-16	2.341804e-16	2.761680e-17
10	3558	59.648973	2778	52.706736	6336	79.598995	3.762475e-17	1.258213e-16	1.258213e-16	3.762475e-17	4.171736e-17	1.213184e-16	9.754514e-18	1.193495e-16	1.193495e-16	9.754514e-18
11	2722	52.172790	1423	37.722672	4145	64.381674	1.699867e-17	6.478170e-17	6.478170e-17	1.699867e-17	1.913925e-17	6.219132e-17	3.387166e-18	5.968111e-17	5.968111e-17	3.387166e-18
12	1886	43.428102	715	26.739484	2601	51.000000	7.820965e-18	3.310034e-17	3.310034e-17	7.820965e-18	8.936023e-18	3.161033e-17	1.162203e-18	2.920519e-17	2.920519e-17	1.162203e-18
13	1134	33.674916	493	22.203603	1627	40.336088	3.606744e-18	1.680304e-17	1.680304e-17	3.606744e-18	4.185068e-18	1.594605e-17	3.955419e-19	1.394010e-17	1.394010e-17	3.955419e-19
14	758	27.531800	298	17.262677	1056	32.496154	1.642831e-18	8.486771e-18	8.486771e-18	1.642831e-18	1.941410e-18	7.993934e-18	1.338868e-19	6.461990e-18	6.461990e-18	1.338868e-19
15	559	23.643181	144	12.000000	703	26.514147	7.355884e-19	4.269940e-18	4.269940e-18	7.355884e-19	8.890250e-19	3.986566e-18	4.515669e-20	2.893904e-18	2.893904e-18	4.515669e-20

	counts	counts_err	Hoerandel5_flux	Hoerandel5_priors	h3a_flux	h3a_priors	antiHoerandel5_flux	antiHoerandel5_priors	h4a_flux	h4a_priors	antih3a_flux	antih3a_priors
log_energy_bin_idx
0	360613.0	600.510616	6.629980e-14	0.247104	9.758423e-14	0.290274	6.488824e-14	0.241843	9.985590e-14	0.295078	6.655038e-14	0.197960
1	233037.0	482.739060	6.488824e-14	0.241843	6.655038e-14	0.197960	6.629980e-14	0.247104	6.541805e-14	0.193313	9.758423e-14	0.290274
2	256442.0	506.401027	3.247099e-14	0.121022	4.859649e-14	0.144555	3.528813e-14	0.131521	4.981589e-14	0.147208	3.623863e-14	0.107795
3	145728.0	381.743369	3.528813e-14	0.131521	3.623863e-14	0.107795	3.247099e-14	0.121022	3.558706e-14	0.105161	4.859649e-14	0.144555
4	164393.0	405.454066	1.541614e-14	0.057457	2.371232e-14	0.070535	1.916844e-14	0.071442	2.436579e-14	0.072002	1.968663e-14	0.058560

	counts	allheavy_priors	h3a_priors	h4a_priors	alllight_priors	Hoerandel5_priors	uniform_priors	antih3a_priors	antiHoerandel5_priors
log_energy_bin_idx
0	360613.0	0.1	0.594538	0.604184	0.9	0.505380	0.5	0.405462	0.494620
1	233037.0	0.9	0.405462	0.395816	0.1	0.494620	0.5	0.594538	0.505380
2	256442.0	0.1	0.572835	0.583304	0.9	0.479212	0.5	0.427165	0.520788
3	145728.0	0.9	0.427165	0.416696	0.1	0.520788	0.5	0.572835	0.479212
4	164393.0	0.1	0.546380	0.557857	0.9	0.445752	0.5	0.453620	0.554248
5	99038.0	0.9	0.453620	0.442143	0.1	0.554248	0.5	0.546380	0.445752
6	102273.0	0.1	0.514642	0.527341	0.9	0.404594	0.5	0.485358	0.595406
7	66484.0	0.9	0.485358	0.472659	0.1	0.595406	0.5	0.514642	0.404594
8	64793.0	0.1	0.477441	0.491597	0.9	0.356213	0.5	0.522559	0.643787
9	40849.0	0.9	0.522559	0.508403	0.1	0.643787	0.5	0.477441	0.356213
10	35326.0	0.1	0.435256	0.451106	0.9	0.302353	0.5	0.564744	0.697647
11	29647.0	0.9	0.564744	0.548894	0.1	0.697647	0.5	0.435256	0.302353
12	20067.0	0.1	0.389548	0.407295	0.9	0.246277	0.5	0.610452	0.753723
13	19869.0	0.9	0.610452	0.592705	0.1	0.753723	0.5	0.389548	0.246277
14	12687.0	0.1	0.342902	0.362676	0.9	0.192315	0.5	0.657098	0.807685
15	12063.0	0.9	0.657098	0.637324	0.1	0.807685	0.5	0.342902	0.192315
16	7543.0	0.1	0.298739	0.320568	0.9	0.144557	0.5	0.701261	0.855443
17	7940.0	0.9	0.701261	0.679432	0.1	0.855443	0.5	0.298739	0.144557
18	4464.0	0.1	0.260458	0.284270	0.9	0.105489	0.5	0.739542	0.894511
19	5442.0	0.9	0.739542	0.715730	0.1	0.894511	0.5	0.260458	0.105489
20	2778.0	0.1	0.230197	0.255879	0.9	0.075555	0.5	0.769803	0.924445
21	3558.0	0.9	0.769803	0.744121	0.1	0.924445	0.5	0.230197	0.075555
22	1423.0	0.1	0.207858	0.235327	0.9	0.053706	0.5	0.792142	0.946294
23	2722.0	0.9	0.792142	0.764673	0.1	0.946294	0.5	0.207858	0.053706
24	715.0	0.1	0.191122	0.220390	0.9	0.038271	0.5	0.808878	0.961729
25	1886.0	0.9	0.808878	0.779610	0.1	0.961729	0.5	0.191122	0.038271
26	493.0	0.1	0.176716	0.207891	0.9	0.027592	0.5	0.823284	0.972408
27	1134.0	0.9	0.823284	0.792109	0.1	0.972408	0.5	0.176716	0.027592
28	298.0	0.1	0.162181	0.195404	0.9	0.020299	0.5	0.837819	0.979701
29	758.0	0.9	0.837819	0.804596	0.1	0.979701	0.5	0.162181	0.020299
30	144.0	0.1	0.146955	0.182342	0.9	0.015364	0.5	0.853045	0.984636
31	559.0	0.9	0.853045	0.817658	0.1	0.984636	0.5	0.146955	0.015364

Formatting for PyUnfold use

Table of contents

Define analysis free parameters

Data preprocessing

Load detection efficiency arrays

Spectrum

Number of events observed

Block diagonal response matrix and error

Normalize response matrix column-wise (i.e. $P(E|C)$)

Priors array

Formatting for PyUnfold use