notebook.community

Edit and run



In [1]:

    
from __future__ import division
import os
from collections import defaultdict
import itertools
import subprocess
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import pyprind

import comptools as comp

from save_pyunfold_format import save_pyunfold_root_file
from run_unfolding import unfold
    
# color_dict allows for a consistent color-coding for each composition
color_dict = comp.get_color_dict()

%matplotlib inline









    



/home/jbourbeau/.virtualenvs/composition/lib/python2.7/site-packages/sklearn/cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)



In [2]:

    
# config = 'IC79.2010'
config = 'IC86.2012'
num_groups = 4
comp_list = comp.get_comp_list(num_groups=num_groups)



In [3]:

    
energybins = comp.get_energybins(config)



In [4]:

    
feature_list, feature_labels = comp.get_training_features()
pipeline_str = 'BDT_comp_{}_{}-groups'.format(config, num_groups)



In [5]:

    
unfolding_dir  = os.path.join(comp.paths.comp_data_dir, config,
                              'unfolding')
figures_dir  = os.path.join(comp.paths.figures_dir, 'unfolding', config,
                            'datachallenge')



In [6]:

    
df_data = comp.load_data(config=config,
                         log_energy_min=energybins.log_energy_min,
                         log_energy_max=energybins.log_energy_max,
                         columns=feature_list,
                         n_jobs=10, verbose=True)









    



[########################################] | 100% Completed |  7min 54.4s



In [7]:

    
fig, ax = plt.subplots()
df_data.reco_log_energy.plot(kind='hist', histtype='step', bins=energybins.log_energy_bins,
                             color=color_dict['data'], logy=True, ax=ax)
ax.grid()
plt.show()









    



/home/jbourbeau/.virtualenvs/composition/lib/python2.7/site-packages/matplotlib/figure.py:1743: UserWarning: This figure includes Axes that are not compatible with tight_layout, so its results might be incorrect.
  warnings.warn("This figure includes Axes that are not "

Load simulation & train BDT



In [6]:

    
df_sim_train, df_sim_test = comp.load_sim(config=config,
                                          log_energy_min=energybins.log_energy_min,
                                          log_energy_max=energybins.log_energy_max,
                                          test_size=0.5,
                                          verbose=True)









    



[########################################] | 100% Completed |  2.5s



In [7]:

    
pipeline = comp.get_pipeline(pipeline_str)

# Fit composition classifier
print('Fitting composition classifier...')
pipeline = pipeline.fit(df_sim_train[feature_list],
                        df_sim_train['comp_target_{}'.format(num_groups)])









    



Fitting composition classifier...



In [10]:

    
fig, ax = plt.subplots()
for composition in comp_list:
    comp_mask = df_sim_test['comp_group_{}'.format(num_groups)] == composition
    counts = np.histogram(df_sim_test.loc[comp_mask, 'MC_log_energy'], bins=energybins.log_energy_bins)[0]
    comp.plot_steps(energybins.log_energy_bins, counts, label=composition,
                    color=color_dict[composition], ax=ax)

ax.set_xlim(energybins.log_energy_min, energybins.log_energy_max)
ax.set_xlabel('$\mathrm{\log_{10}(E/GeV)}$')
ax.set_ylabel('Counts')
ax.grid(lw=1)

leg = plt.legend(loc='upper center', frameon=False,
                 bbox_to_anchor=(0.5,  # horizontal
                                 1.25),# vertical 
                 ncol=len(comp_list)//2, fancybox=False)
nevents_outfile = os.path.join(figures_dir, 'num_testing_sim_events.png'.format(config))
comp.check_output_dir(nevents_outfile)
plt.savefig(nevents_outfile)
plt.show()

Plot true flux



In [9]:

    
# Solid angle
theta_max = 40 if config == 'IC79.2010' else 65
# solid_angle = 2*np.pi*(np.cos(df_sim_train['lap_zenith'].min())-np.cos(df_sim_train['lap_zenith'].max()))
solid_angle = np.pi*np.sin(np.deg2rad(theta_max))**2
solid_angle









    Out[9]:





2.5804847429997841



In [10]:

    
livetime, livetime_err = comp.get_detector_livetime(config=config)
livetime, livetime_err









    Out[10]:





(28442248.921, 2699.1328177300002)



In [11]:

    
# Get simulation thrown areas for each energy bin
thrown_radii = comp.simfunctions.get_sim_thrown_radius(energybins.log_energy_midpoints)
thrown_areas = np.pi * thrown_radii**2
thrown_area = thrown_areas.max()
print('thrown_area = {}'.format(thrown_area))









    



thrown_area = 9079202.76887



In [12]:

    
# Load fitted effective area
print('Loading detection efficiencies...')
eff_path = os.path.join(
                comp.paths.comp_data_dir, config, 'efficiencies',
                'efficiency_fit_num_groups_{}.hdf'.format(num_groups))
df_eff = pd.read_hdf(eff_path)









    



Loading detection efficiencies...



In [13]:

    
df_eff.head()









    Out[13]:







  
    
      
      eff_median_PPlus
      eff_err_low_PPlus
      eff_err_high_PPlus
      eff_median_He4Nucleus
      eff_err_low_He4Nucleus
      eff_err_high_He4Nucleus
      eff_median_O16Nucleus
      eff_err_low_O16Nucleus
      eff_err_high_O16Nucleus
      eff_median_Fe56Nucleus
      eff_err_low_Fe56Nucleus
      eff_err_high_Fe56Nucleus
      eff_median_total
      eff_err_low_total
      eff_err_high_total
    
  
  
    
      0
      0.006811
      0.000106
      0.000107
      0.007278
      0.000120
      0.000113
      0.006527
      0.000120
      0.000106
      0.006132
      0.000123
      0.000116
      0.006691
      0.000056
      0.000059
    
    
      1
      0.007645
      0.000097
      0.000102
      0.008170
      0.000093
      0.000089
      0.007697
      0.000102
      0.000090
      0.007539
      0.000102
      0.000101
      0.007762
      0.000047
      0.000049
    
    
      2
      0.008183
      0.000079
      0.000090
      0.008620
      0.000071
      0.000072
      0.008328
      0.000075
      0.000068
      0.008263
      0.000073
      0.000069
      0.008365
      0.000037
      0.000036
    
    
      3
      0.008509
      0.000068
      0.000073
      0.008828
      0.000067
      0.000068
      0.008626
      0.000068
      0.000065
      0.008573
      0.000066
      0.000063
      0.008668
      0.000033
      0.000032
    
    
      4
      0.008697
      0.000067
      0.000069
      0.008921
      0.000072
      0.000070
      0.008759
      0.000071
      0.000067
      0.008697
      0.000067
      0.000069
      0.008812
      0.000034
      0.000034



In [37]:

    
eff_area, eff_area_err = {}, {}
fig, ax = plt.subplots()
for composition in comp_list + ['total']:
    eff_area[composition] = df_eff['eff_median_{}'.format(composition)]*thrown_area
    eff_area_err[composition] = df_eff['eff_err_low_{}'.format(composition)]*thrown_area
    comp.plot_steps(energybins.log_energy_bins, eff_area[composition], eff_area_err[composition], 
                    color=color_dict[composition], label=composition, ax=ax)
ax.set_ylim(0)
ax.grid()
plt.show()

Generate fake counts distribution for each composition

(In the future, this will be specified by Josh/Zach)



In [15]:

    
def broken_power_law_flux(energy, gamma_before=-2.7, gamma_after=-3.1, energy_break=3e6):
    """Broken power law flux

    This is a "realistic" flux (simple broken power law with a knee @ 3PeV)
    to weight simulation to. More information can be found on the
    IT73-IC79 data-MC comparison wiki page
    https://wiki.icecube.wisc.edu/index.php/IT73-IC79_Data-MC_Comparison
    
    Parameters
    ----------
    energy : array_like
        Energy values (in GeV) to calculate the flux for. 
    gamma_before : float, optional
        Spectral index before break point (default is -2.7). 
    gamma_after : float, optional
        Spectral index after break point (default is -3.1). 
    energy_break : float, optional
        Energy (in GeV) at which the spectral index break occurs
        (default is 3e6, or 3 PeV).
        
    Returns
    -------
    flux : array_like 
        Broken power law evaluated at energy points. 
    """
    phi_0 = 3.6e-6
    # phi_0 = 3.1e-6
    # phi_0 = 2.95e-6
    eps = 100
    if isinstance(energy, (int, float)):
        energy = [energy]
    energy = np.asarray(energy) * 1e-6
    energy_break = energy_break * 1e-6
    
    flux = (1e-6) * phi_0 * energy**gamma_before *(1+(energy/energy_break)**eps)**((gamma_after-gamma_before)/eps)

    return flux



In [16]:

    
random_state = 2
np.random.seed(random_state)



In [17]:

    
case = 'broken_power_law_0'



In [38]:

    
counts_true = pd.DataFrame(index=energybins.log_energy_midpoints,
                           columns=comp_list)

flux_to_counts_scaling = {}
for composition in comp_list + ['total']:
    flux_to_counts_scaling[composition] = eff_area[composition].values * livetime * solid_angle * energybins.energy_bin_widths

if case == 'constant':
    for composition in comp_list:
        counts_true[composition] = np.array([1000]*len(energybins.log_energy_midpoints))

elif case == 'constants':
    for composition, value in zip(comp_list, [1000, 800, 700, 600]):
        counts_true[composition] = np.array([value]*len(energybins.log_energy_midpoints))

elif case == 'broken_power_law_0':
    for composition in comp_list:
        scale = 1.0 / len(comp_list)
        comp_flux = comp.broken_power_law_flux(energybins.energy_midpoints,
                                               energy_break=3e12)
        comp_flux *= scale
        counts_true[composition] = flux_to_counts_scaling[composition] * comp_flux

elif case == 'broken_power_law_1':
    for composition in comp_list:
        scale = 1.0 / len(comp_list)
        comp_flux = scale * comp.broken_power_law_flux(energybins.energy_midpoints, energy_break=10**7.0)
        counts_true[composition] = flux_to_counts_scaling[composition] * comp_flux

elif case == 'broken_power_law_2':
    for composition in comp_list:
        high_energy_mask = energybins.log_energy_midpoints >= 7.0
        comp_flux = comp.broken_power_law_flux(energybins.energy_midpoints, energy_break=12e6)
        counts_true[composition] = flux_to_counts_scaling[composition] * comp_flux
        if composition in ['PPlus', 'He4Nucleus']:
            counts_true.loc[high_energy_mask, composition] *= 0.25
        else:
            counts_true.loc[~high_energy_mask, composition] *= 0.25

elif case == 'broken_power_law_3':
    scales = [0.25, 0.1, 0.25, 0.4]
    assert sum(scales) == 1
    for composition, scale in zip(comp_list, scales):
        high_energy_mask = energybins.log_energy_midpoints >= 6.5
        comp_flux = scale * comp.broken_power_law_flux(energybins.energy_midpoints)
        counts_true[composition] = flux_to_counts_scaling[composition] * comp_flux

elif case == 'H4a':
    model_flux = comp.model_flux(model='H4a',
                                 energy=energybins.energy_midpoints,
                                 num_groups=num_groups)
    for composition in comp_list:
        counts_true[composition] = model_flux['flux_{}'.format(composition)].values * flux_to_counts_scaling[composition]

else:
    raise ValueError('Invalid case, {}, entered'.format(case))


# Calculate total counts
counts_true['total'] = counts_true.sum(axis=1)

counts_true









    Out[38]:







  
    
      
      PPlus
      He4Nucleus
      O16Nucleus
      Fe56Nucleus
      total
    
  
  
    
      6.15
      524044.524018
      559921.325042
      502160.796436
      471749.147414
      2.057876e+06
    
    
      6.25
      397637.534997
      424982.134278
      400376.213004
      392128.759396
      1.615125e+06
    
    
      6.35
      287754.553435
      303138.332280
      292855.998536
      290579.334250
      1.174328e+06
    
    
      6.45
      202298.012631
      209895.454693
      205095.842430
      203816.988185
      8.211063e+05
    
    
      6.55
      139797.411480
      143397.994083
      140798.622312
      139802.740898
      5.637968e+05
    
    
      6.65
      95647.837376
      97368.797866
      95810.931495
      95053.379600
      3.838809e+05
    
    
      6.75
      65104.716631
      65953.332188
      64946.790648
      64406.899278
      2.604117e+05
    
    
      6.85
      44171.469057
      44627.077358
      43963.422946
      43579.103300
      1.763411e+05
    
    
      6.95
      29924.219277
      30183.808962
      29737.947954
      29472.889907
      1.193189e+05
    
    
      7.05
      20253.616403
      20409.994219
      20110.319785
      19928.077439
      8.070201e+04
    
    
      7.15
      13702.097453
      13799.570754
      13597.811727
      13473.494359
      5.457297e+04
    
    
      7.25
      9266.777399
      9329.827961
      9193.688720
      9109.347643
      3.689964e+04
    
    
      7.35
      6266.071851
      6307.804075
      6215.783544
      6158.718011
      2.494838e+04
    
    
      7.45
      4236.845920
      4264.625280
      4202.408835
      4163.815585
      1.686770e+04
    
    
      7.55
      2864.602164
      2883.248308
      2841.183658
      2815.087731
      1.140412e+04
    
    
      7.65
      1936.754333
      1949.317381
      1920.877757
      1903.233642
      7.710183e+03
    
    
      7.75
      1309.426591
      1317.900969
      1298.673272
      1286.744059
      5.212745e+03
    
    
      7.85
      885.288497
      891.010607
      878.011037
      869.945803
      3.524256e+03
    
    
      7.95
      598.531229
      602.397161
      593.608355
      588.155560
      2.382692e+03



In [39]:

    
fig, ax = plt.subplots()
for composition in counts_true:
    comp.plot_steps(energybins.log_energy_bins, counts_true[composition],
                    color=color_dict[composition], label=composition,
                    ax=ax)
ax.set_yscale("log", nonposy='clip')
ax.grid()
ax.legend()
plt.show()



In [40]:

    
fig, axarr = plt.subplots(1, len(comp_list) + 1, sharex=True, sharey=True, figsize=(15, 5))
for idx, composition in enumerate(comp_list + ['total']):
    ax = axarr[idx]
    model_flux = comp.model_flux(model='H4a',
                                 energy=energybins.energy_midpoints,
                                 num_groups=num_groups)
    ax.plot(energybins.log_energy_midpoints,
            energybins.energy_midpoints**2.7*model_flux['flux_{}'.format(composition)].values,
            color=color_dict[composition], ls=':', marker='*',
            label='H4a')
    
    comp_flux = counts_true[composition] / flux_to_counts_scaling[composition]
    ax.plot(energybins.log_energy_midpoints,
            energybins.energy_midpoints**2.7*comp_flux,
            color=color_dict[composition],
            label='Test case',
            marker='.')
    ax.set_yscale("log", nonposy='clip')
    ax.set_xlabel('$\mathrm{\log_{10}(E/GeV)}$')
    if idx == 0:
        ax.set_ylabel('$\mathrm{ E^{2.7} \ J(E) \ [GeV^{1.7} m^{-2} sr^{-1} s^{-1}]}$')
    ax.set_title(composition)
    ax.grid(lw=0.8, which='both')
    ax.legend()
plt.show()

Calculate (testing set) simulation event weights



In [91]:

    
counts_sim = pd.DataFrame(index=energybins.log_energy_midpoints,
                          columns=comp_list)
for composition in comp_list:
    comp_mask = df_sim_test['comp_group_{}'.format(num_groups)] == composition
    comp_counts, _ = np.histogram(df_sim_test.loc[comp_mask, 'reco_log_energy'],
                                  bins=energybins.log_energy_bins)
    counts_sim[composition] = comp_counts



In [92]:

    
fig, ax = plt.subplots()
for composition in counts_sim:
    comp.plot_steps(energybins.log_energy_bins, counts_sim[composition],
                    color=color_dict[composition], label=composition)
ax.grid()
ax.legend()
plt.show()

Run analysis pipeline on simulation



In [93]:

    
counts_observed = pd.DataFrame(0, index=energybins.log_energy_midpoints, columns=comp_list)
weights = pd.DataFrame(0, index=energybins.log_energy_midpoints, columns=comp_list)
for idx_log_energy, composition in itertools.product(range(len(energybins.log_energy_midpoints)),
                                                     comp_list):
    
    log_energy = energybins.log_energy_midpoints[idx_log_energy]
    
    # Construct composition mask
    comp_mask = df_sim_test['comp_group_{}'.format(num_groups)] == composition
    # Construct mask for energy bin
    energy_bins = np.digitize(df_sim_test['MC_log_energy'], bins=energybins.log_energy_bins) - 1
    energy_mask = energy_bins == idx_log_energy
    # Filter out events that don't pass composition + energy  mask
    df_sim_bin = df_sim_test.loc[comp_mask & energy_mask, :]
    
    # Reweight simulation events to get desired number of events
    weight = counts_true.loc[log_energy, composition] / df_sim_bin.shape[0]
    weights.loc[log_energy, composition] = weight
    
    # Get predicted composition 
    pred_target = pipeline.predict(df_sim_bin[feature_list])
    pred_comp = np.array(comp.decode_composition_groups(pred_target, num_groups=num_groups))
    assert len(pred_comp) == df_sim_bin.shape[0]
    for p_comp in np.unique(pred_comp):
        pred_comp_mask = pred_comp == p_comp
        comp_counts, _ = np.histogram(df_sim_bin.loc[pred_comp_mask, 'reco_log_energy'],
                                      bins=energybins.log_energy_bins)
        comp_counts = weight * comp_counts
        counts_observed[p_comp] += comp_counts

counts_observed









    Out[93]:







  
    
      
      PPlus
      He4Nucleus
      O16Nucleus
      Fe56Nucleus
    
  
  
    
      6.15
      448470.697288
      558708.028255
      61553.669808
      457597.477438
    
    
      6.25
      427392.360601
      506010.333059
      101575.191688
      812860.867915
    
    
      6.35
      252150.662515
      386208.748319
      92743.879145
      580312.258955
    
    
      6.45
      142369.251000
      276002.149682
      80052.314976
      313334.978728
    
    
      6.55
      133460.857811
      133049.535105
      95735.160529
      213504.458641
    
    
      6.65
      83376.010735
      75875.898698
      58160.413714
      106996.788235
    
    
      6.75
      34462.117200
      31866.906484
      58290.156902
      99715.117607
    
    
      6.85
      35283.446909
      36356.733839
      23134.933716
      39156.472249
    
    
      6.95
      16521.814168
      18516.644737
      11872.678809
      42945.073518
    
    
      7.05
      9254.816895
      3030.297653
      9593.263916
      17140.732063
    
    
      7.15
      7569.084547
      5273.925726
      5085.355849
      12861.713432
    
    
      7.25
      3809.535124
      2147.377384
      5786.493968
      5136.329048
    
    
      7.35
      3381.386295
      1074.495814
      4944.115189
      3042.650143
    
    
      7.45
      1659.407380
      1410.442299
      1443.664549
      2073.980836
    
    
      7.55
      793.130770
      916.053870
      1382.216434
      1498.137791
    
    
      7.65
      786.689342
      125.880619
      879.974487
      945.802103
    
    
      7.75
      448.939246
      63.586977
      652.973803
      656.855514
    
    
      7.85
      224.400577
      119.362445
      307.963835
      357.294323
    
    
      7.95
      124.345488
      68.906079
      121.544569
      331.733825



In [94]:

    
fig, ax = plt.subplots()
for composition in comp_list:
    weights[composition].plot(ls=':', label=composition,
                              color=color_dict[composition], ax=ax)
ax.set_xlabel('$\mathrm{\log_{10}(E/GeV)}$')
ax.set_ylabel('Weights')
ax.set_yscale("log", nonposy='clip')
ax.grid(lw=1)
ax.legend()
weights_outfile = os.path.join(figures_dir, 
                               'weights.png'.format(num_groups))
comp.check_output_dir(weights_outfile)
plt.savefig(weights_outfile)
plt.show()

Format observed counts and detection efficiencies for use with PyUnfold



In [95]:

    
counts_pyunfold = []
for idx, row in counts_observed.iterrows():
    for composition in comp_list:
        counts_pyunfold.append(row[composition])
        
efficiencies = []
efficiencies_err = []     
for idx, row in df_eff.iterrows():
    for composition in comp_list:
        efficiencies.append(row['eff_median_{}'.format(composition)])
        efficiencies_err.append(row['eff_err_low_{}'.format(composition)])



In [96]:

    
formatted_df = pd.DataFrame()
formatted_df['counts'] = counts_pyunfold
formatted_df['efficiencies'] = efficiencies
formatted_df['efficiencies_err'] = efficiencies_err



In [97]:

    
formatted_df.head()









    Out[97]:







  
    
      
      counts
      efficiencies
      efficiencies_err
    
  
  
    
      0
      448470.697288
      0.006811
      0.000106
    
    
      1
      558708.028255
      0.007278
      0.000120
    
    
      2
      61553.669808
      0.006527
      0.000120
    
    
      3
      457597.477438
      0.006132
      0.000123
    
    
      4
      427392.360601
      0.007645
      0.000097



In [98]:

    
formatted_file = os.path.join(os.getcwd(),'test.hdf')
formatted_df.to_hdf(formatted_file, 'dataframe', mode='w')



In [99]:

    
root_file = os.path.join(os.getcwd(),'test.root')
save_pyunfold_root_file(config=config, num_groups=num_groups,
                        outfile=root_file, formatted_df_file=formatted_file)









    



Saving output file /home/jbourbeau/cr-composition/unfolding/test.root



In [ ]:



In [ ]:



In [ ]:



In [100]:

    
df_unfolding_iter = unfold(config_name=os.path.join(config, 'config.cfg'),
                           priors='Jeffreys',
                           input_file=root_file,
                           ts_stopping=0.01)



In [101]:

    
df_unfolding_iter









    Out[101]:







  
    
      
      sys_err
      stat_err
      n_c
    
  
  
    
      0
      [30935201.7749, 11344662.6106, 9235548.08932, ...
      [122184.008615, 46610.4280331, 37728.3658056, ...
      [226633519.476, 91231840.8719, 67295004.0953, ...
    
    
      1
      [34461344.924, 14225219.9294, 13754581.0252, 1...
      [118640.243957, 50345.6997184, 53530.3499832, ...
      [176191611.269, 81761596.2179, 71227463.8037, ...
    
    
      2
      [35226502.7542, 16255371.9133, 17448098.7779, ...
      [129629.034574, 56711.1813067, 66470.4197135, ...
      [147261343.808, 76136300.8899, 75621157.8449, ...
    
    
      3
      [34807668.5331, 17709227.8952, 20480735.676, 2...
      [135797.488551, 62398.5226621, 77357.6900892, ...
      [128575400.55, 72211935.1651, 79487075.4852, 6...
    
    
      4
      [33974539.1907, 18768212.2251, 22996578.9484, ...
      [141354.651993, 68013.3915995, 86513.3238882, ...
      [115689996.49, 69257499.95, 82641656.8536, 717...
    
    
      5
      [33099185.233, 19566405.2562, 25111389.131, 25...
      [146304.979461, 73512.1696956, 94189.4948535, ...
      [106438630.594, 66955873.6017, 85150968.8911, ...
    
    
      6
      [32341859.5987, 20198020.4907, 26913461.1106, ...
      [150991.143711, 78903.6662298, 100678.932813, ...
      [99601721.8874, 65127773.5014, 87133192.3424, ...



In [102]:

    
def unfolded_counts_dist(unfolding_df, iteration=-1, num_groups=4):
    """
    Convert unfolded distributions DataFrame from PyUnfold counts arrays 
    to a dictionary containing a counts array for each composition. 
    
    Parameters
    ----------
    unfolding_df : pandas.DataFrame
        Unfolding DataFrame returned from PyUnfold. 
    iteration : int, optional
        Specific unfolding iteration to retrieve unfolded counts 
        (default is -1, the last iteration). 
    num_groups : int, optional
        Number of composition groups (default is 4). 
        
    Returns
    -------
    counts : dict
        Dictionary with composition-counts key-value pairs.
    counts_sys_err : dict
        Dictionary with composition-systematic error key-value pairs.
    counts_stat_err : dict
        Dictionary with composition-statistical error key-value pairs.
    """
    comp_list = comp.get_comp_list(num_groups=num_groups)
    
    df_iter = unfolding_df.iloc[iteration]
    
    counts, counts_sys_err, counts_stat_err = {}, {}, {}
    for idx, composition in enumerate(comp_list):
        counts[composition] = df_iter['n_c'][idx::num_groups]
        counts_sys_err[composition] = df_iter['sys_err'][idx::num_groups]
        counts_stat_err[composition] = df_iter['stat_err'][idx::num_groups]
        
    counts['total'] = np.sum([counts[composition] for composition in comp_list], axis=0)
    counts_sys_err['total'] = np.sqrt(np.sum([counts_sys_err[composition]**2 for composition in comp_list], axis=0))
    counts_stat_err['total'] = np.sqrt(np.sum([counts_stat_err[composition]**2 for composition in comp_list], axis=0))
    
    return counts, counts_sys_err, counts_stat_err



In [103]:

    
for model_name in ['Jeffreys']:
    
    # Get plotting axis
    fig = plt.figure(figsize=(12, 5))
    gs = gridspec.GridSpec(nrows=2, ncols=len(comp_list)+1,
                           hspace=0.075)
    axs_flux, axs_ratio = {}, {}
    for idx, composition in enumerate(comp_list + ['total']):
        if idx == 0:
            axs_flux[composition] = fig.add_subplot(gs[0, idx])
        else:
            axs_flux[composition] = fig.add_subplot(gs[0, idx], sharey=axs_flux[comp_list[0]])
        axs_ratio[composition] = fig.add_subplot(gs[1, idx], sharex=axs_flux[composition])
        

    print('{}: {} iterations'.format(model_name, df_unfolding_iter.shape[0]))

    counts, counts_sys_err, counts_stat_err = unfolded_counts_dist(
                                                    df_unfolding_iter,
                                                    num_groups=num_groups)
    for idx, composition in enumerate(comp_list + ['total']):
        
        ax_flux = axs_flux[composition]
        ax_ratio = axs_ratio[composition]
        
        # Flux plot
        flux, flux_err_sys = comp.get_flux(counts[composition], counts_sys_err[composition],
                                                    energybins=energybins.energy_bins,
                                                    eff_area=thrown_area,
                                                    livetime=livetime, livetime_err=livetime_err, 
                                                    solid_angle=solid_angle,
                                                    scalingindex=2.7)
        flux, flux_err_stat = comp.get_flux(counts[composition], counts_stat_err[composition],
                                                     energybins=energybins.energy_bins,
                                                     eff_area=thrown_area,
                                                     livetime=livetime, livetime_err=livetime_err, 
                                                     solid_angle=solid_angle,
                                                     scalingindex=2.7)
        
        comp.plot_steps(energybins.log_energy_bins, flux, yerr=flux_err_sys,
                            ax=ax_flux, alpha=0.4, fillalpha=0.4,  
                            color=color_dict[composition])

        ax_flux.errorbar(energybins.log_energy_midpoints, flux, yerr=flux_err_stat,  
                         color=color_dict[composition], ls='None', marker='.', 
                         label='Unfolded flux', alpha=0.8)

        # True flux
        true_counts = counts_true[composition].values
        true_counts_err = np.sqrt(true_counts)

        true_flux, true_flux_err = comp.get_flux(true_counts, true_counts_err,
                                                energybins=energybins.energy_bins,
                                                eff_area=eff_area[composition],
                                                eff_area_err=eff_area_err[composition],
                                                livetime=livetime, livetime_err=livetime_err, 
                                                solid_angle=solid_angle,
                                                scalingindex=2.7)
        
        
        ax_flux.errorbar(energybins.log_energy_midpoints, true_flux, yerr=true_flux_err,  
                         color=color_dict[composition], ls='None', marker='*', 
                         label='True flux', alpha=0.8)

        ax_flux.set_yscale("log", nonposy='clip')
        ax_flux.set_xlim(6.4, 7.8)
        ax_flux.grid(linestyle='dotted', which="both", lw=1)
        ax_flux.legend(fontsize=8)
        ax_flux.set_title(composition, fontsize=10)
        if idx == 0:
            ax_flux.set_ylabel('$\mathrm{ E^{2.7} \ J(E) \ [GeV^{1.7} m^{-2} sr^{-1} s^{-1}]}$',
                               fontsize=10)
        else:
            plt.setp(ax_flux.get_yticklabels(), visible=False)
        plt.setp(ax_flux.get_xticklabels(), visible=False)
        ax_flux.tick_params(axis='both', which='major', labelsize=10)
        
        
        # Ratio plot
        diff = flux - true_flux
        # Error bar calculation
        diff_err_sys = flux_err_sys
        diff_err_stat = np.sqrt(flux_err_stat**2 + true_flux_err**2)
        
        frac_diff, frac_diff_sys = comp.ratio_error(diff, diff_err_sys, true_flux, np.zeros_like(true_flux))
        frac_diff, frac_diff_stat = comp.ratio_error(diff, diff_err_stat, true_flux, true_flux_err)
        
        comp.plot_steps(energybins.log_energy_bins, frac_diff, yerr=frac_diff_sys,
                            ax=ax_ratio, alpha=0.4, fillalpha=0.4,  
                            color=color_dict[composition])
        ax_ratio.errorbar(energybins.log_energy_midpoints, frac_diff, yerr=frac_diff_stat,  
                          color=color_dict[composition], ls=':', marker='.', 
                          label=composition, alpha=0.8)
        ax_ratio.axhline(0, ls='-.', lw=1, marker='None', color='k')

        ax_ratio.grid(linestyle='dotted', which="both", lw=1)
        extraticks = np.arange(-1, 1.5, 0.25)
        ax_ratio.set_yticks(extraticks)
        ax_ratio.set_ylim(-1, 1)
        if idx == 0:
            ax_ratio.set_ylabel('$\mathrm{(J_{unfolded} - J_{true}) / J_{true}}$',
                                fontsize=10)
        else:
            plt.setp(ax_ratio.get_yticklabels(), visible=False)
        ax_ratio.set_xlabel('$\mathrm{\log_{10}(E/GeV)}$', fontsize=10)
        ax_ratio.tick_params(axis='both', which='major', labelsize=10)
        
    
    plt.tight_layout()
    flux_outfile = os.path.join(figures_dir, 
                                'flux_diff_grid_{}-groups_{}-prior.png'.format(num_groups, model_name))
    comp.check_output_dir(flux_outfile)
    plt.savefig(flux_outfile)
    plt.show()









    



Jeffreys: 7 iterations



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [98]:

    
for model_name in ['Jeffreys']:
    fig, axarr = plt.subplots(2, 2, sharex=True, sharey=True)
    print('{}: {} iterations'.format(model_name, df_unfolding_iter.shape[0]))

    counts, counts_sys_err, counts_stat_err = unfolded_counts_dist(
                                                df_unfolding_iter,
                                                num_groups=num_groups)


    flux_total, flux_total_err_sys = comp.analysis.get_flux(counts['total'], counts_sys_err['total'],
                                                     energybins=energybins.energy_bins,
                                                     eff_area=thrown_area,
                                                     livetime=livetime, livetime_err=livetime_err, 
                                                     solid_angle=solid_angle)
    flux_total, flux_total_err_stat = comp.analysis.get_flux(counts['total'], counts_stat_err['total'],
                                                 energybins=energybins.energy_bins,
                                                 eff_area=thrown_area,
                                                 livetime=livetime, livetime_err=livetime_err, 
                                                 solid_angle=solid_angle)


    for composition, ax in zip(comp_list, axarr.flatten()):

        flux, flux_err_sys = comp.analysis.get_flux(
                                        counts[composition], counts_sys_err[composition],
                                        energybins=energybins.energy_bins,
                                        eff_area=thrown_area,
                                        livetime=livetime,
                                        livetime_err=livetime_err, 
                                        solid_angle=solid_angle)
        flux, flux_err_stat = comp.analysis.get_flux(
                                        counts[composition], counts_stat_err[composition],
                                        energybins=energybins.energy_bins,
                                        eff_area=thrown_area,
                                        livetime=livetime,
                                        livetime_err=livetime_err, 
                                        solid_angle=solid_angle)

        plotting.plot_steps(energybins.log_energy_bins, flux, yerr=flux_err_sys,
                            ax=ax, alpha=0.4, fillalpha=0.4,  
                            color=color_dict[composition])

        ax.errorbar(energybins.log_energy_midpoints, flux, yerr=flux_err_stat,  
                    color=color_dict[composition], ls='None', marker='.', 
                    label=composition + ' unfolded', alpha=0.8)

        # True flux
        true_counts = counts_true[composition].values
        true_counts_err = np.sqrt(true_counts)

        true_flux, true_flux_err = comp.analysis.get_flux(
                                                true_counts, true_counts_err,
                                                energybins=energybins.energy_bins,
                                                eff_area=eff_area[composition],
                                                eff_area_err=eff_area_err[composition],
                                                livetime=livetime, livetime_err=livetime_err, 
                                                solid_angle=solid_angle)

        ax.errorbar(energybins.log_energy_midpoints, true_flux, yerr=true_flux_err,  
                    color=color_dict[composition], ls='None', marker='^', 
                    label=composition + ' true', alpha=0.8)

        ax.set_yscale("log", nonposy='clip')
        ax.set_xlim(6.4, 7.8)
#         ax.set_ylim([1e0, 7e3])

        ax.grid(linestyle='dotted', which="both", lw=1)
        ax.legend(loc='upper right', ncol=1, fontsize=10)

    fig.text(0.5, -0.01, '$\mathrm{\log_{10}(E/GeV)}$', ha='center')
    fig.text(-0.01, 0.5, '$\mathrm{ E^{2.7} \ J(E) \ [GeV^{1.7} m^{-2} sr^{-1} s^{-1}]}$',
             va='center', rotation='vertical')

    flux_outfile = os.path.join(figures_dir, 
                                'flux_grid_{}-groups_{}-prior.png'.format(num_groups, model_name))
    comp.check_output_dir(flux_outfile)
    plt.savefig(flux_outfile)
    plt.show()









    



Jeffreys: 3 iterations



In [33]:

    
for model_name in ['Jeffreys']:
    fig, axarr = plt.subplots(2, 2, sharex=True, sharey=True)
    print('{}: {} iterations'.format(model_name, df_unfolding_iter.shape[0]))

    counts, counts_sys_err, counts_stat_err = unfolded_counts_dist(
                                                df_unfolding_iter,
                                                num_groups=num_groups)
    for composition, ax in zip(comp_list, axarr.flatten()):

        flux, flux_err_sys = comp.analysis.get_flux(counts[composition], counts_sys_err[composition],
                                                     energybins=energybins.energy_bins,
                                                     eff_area=thrown_area,
                                                     livetime=livetime, livetime_err=livetime_err, 
                                                     solid_angle=solid_angle,
                                                     scalingindex=1)
        flux, flux_err_stat = comp.analysis.get_flux(counts[composition], counts_stat_err[composition],
                                                     energybins=energybins.energy_bins,
                                                     eff_area=thrown_area,
                                                     livetime=livetime, livetime_err=livetime_err, 
                                                     solid_angle=solid_angle,
                                                     scalingindex=1)

        # True flux
        true_counts = counts_true[composition].values
        true_counts_err = np.sqrt(true_counts)

        true_flux, true_flux_err = comp.analysis.get_flux(true_counts, true_counts_err,
                                                energybins=energybins.energy_bins,
                                                eff_area=eff_area[composition],
                                                eff_area_err=eff_area_err[composition],
                                                livetime=livetime, livetime_err=livetime_err, 
                                                solid_angle=solid_angle,
                                                scalingindex=1)
        
        diff = flux - true_flux
        # Error bar calculation
        diff_err_sys = flux_err_sys
        diff_err_stat = np.sqrt(flux_err_stat**2 + true_flux_err**2)
        
        frac_diff, frac_diff_sys = comp.ratio_error(diff, diff_err_sys, true_flux, np.zeros_like(true_flux))
        frac_diff, frac_diff_stat = comp.ratio_error(diff, diff_err_stat, true_flux, true_flux_err)
        
        plotting.plot_steps(energybins.log_energy_bins, frac_diff, yerr=frac_diff_sys,
                            ax=ax, alpha=0.4, fillalpha=0.4,  
                            color=color_dict[composition])
        ax.errorbar(energybins.log_energy_midpoints, frac_diff, yerr=frac_diff_stat,  
                    color=color_dict[composition], ls=':', marker='^', 
                    label=composition, alpha=0.8)
        ax.axhline(0, ls='-.', lw=1, marker='None', color='k')

        ax.grid(linestyle='dotted', which="both", lw=1)
        ax.legend(loc='lower left', ncol=1, fontsize=10)
        extraticks = np.arange(-1, 1.5, 0.5)
        ax.set_yticks(extraticks)
    
    ax.set_xlim(6.4, 7.8)
    ax.set_ylim(-1, 1)
    fig.text(0.5, -0.01, '$\mathrm{\log_{10}(E/GeV)}$', ha='center',
             fontsize=14)
    fig.text(-0.02, 0.5, '(Unfolded flux - true flux) / true flux',
             va='center', rotation='vertical', fontsize=14)
    plt.tight_layout()

#     flux_outfile = os.path.join(figures_dir, 
#                                 'flux_diff_grid_{}-groups_{}-prior.png'.format(num_groups, model_name))
#     comp.check_output_dir(flux_outfile)
#     plt.savefig(flux_outfile)
    plt.show()









    



Jeffreys: 3 iterations



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [58]:

    
frac_flux_diff, frac_flux_diff_stat, frac_flux_diff_sys = defaultdict(list), defaultdict(list), defaultdict(list)
ts_stopping_list = np.linspace(0.01, 0.001, 10)
for ts_stopping in pyprind.prog_bar(ts_stopping_list):
    df_unfolding_iter = unfold(config_name=os.path.join(config, 'config.cfg'),
                               priors='Jeffreys',
                               input_file=root_file,
                               ts_stopping=ts_stopping)
    
    counts, counts_sys_err, counts_stat_err = unfolded_counts_dist(
                                                df_unfolding_iter,
                                                num_groups=num_groups)

    for composition in comp_list:

        flux, flux_err_sys = comp.analysis.get_flux(counts[composition], counts_sys_err[composition],
                                                     energybins=energybins.energy_bins,
                                                     eff_area=thrown_area,
                                                     livetime=livetime, livetime_err=livetime_err, 
                                                     solid_angle=solid_angle,
                                                     scalingindex=1)
        flux, flux_err_stat = comp.analysis.get_flux(counts[composition], counts_stat_err[composition],
                                                     energybins=energybins.energy_bins,
                                                     eff_area=thrown_area,
                                                     livetime=livetime, livetime_err=livetime_err, 
                                                     solid_angle=solid_angle,
                                                     scalingindex=1)

        # True flux
        true_counts = counts_true[composition].values
        true_counts_err = np.sqrt(true_counts)

        true_flux, true_flux_err = comp.analysis.get_flux(true_counts, true_counts_err,
                                                energybins=energybins.energy_bins,
                                                eff_area=eff_area[composition],
                                                eff_area_err=eff_area_err[composition],
                                                livetime=livetime, livetime_err=livetime_err, 
                                                solid_angle=solid_angle,
                                                scalingindex=1)

        diff = flux - true_flux
        # Error bar calculation
        diff_err_sys = flux_err_sys
        diff_err_stat = np.sqrt(flux_err_stat**2 + true_flux_err**2)

        frac_diff, frac_diff_sys = comp.ratio_error(diff, diff_err_sys, true_flux, np.zeros_like(true_flux))
        frac_diff, frac_diff_stat = comp.ratio_error(diff, diff_err_stat, true_flux, true_flux_err)
        
        frac_flux_diff[composition].append(frac_diff)
        frac_flux_diff_stat[composition].append(frac_diff_stat)
        frac_flux_diff_sys[composition].append(frac_diff_sys)









    



0% [##########] 100% | ETA: 00:00:00
Total time elapsed: 00:01:12



In [59]:

    
fig, axarr = plt.subplots(2, 2, sharex=True, sharey=True)

color_dict_model = {}
color_dict_model['PPlus'] = sns.color_palette('Blues', len(ts_stopping_list)+5).as_hex()[::-1]
color_dict_model['O16Nucleus'] = sns.color_palette('Reds', len(ts_stopping_list)+5).as_hex()[::-1]
color_dict_model['He4Nucleus'] = sns.color_palette('Purples', len(ts_stopping_list)+5).as_hex()[::-1]
color_dict_model['Fe56Nucleus'] = sns.color_palette('Oranges', len(ts_stopping_list)+5).as_hex()[::-1]

for composition, ax in zip(comp_list, axarr.flatten()):
    for idx, (frac_diff, frac_diff_stat, frac_diff_sys) in enumerate(zip(frac_flux_diff[composition],
                                                                         frac_flux_diff_stat[composition],
                                                                         frac_flux_diff_sys[composition])):

        ax.errorbar(energybins.log_energy_midpoints, frac_diff,
                    yerr=frac_diff_stat, color=color_dict_model[composition][idx],
                    ls=':', marker='.', 
                    label=composition if idx == len(ts_stopping_list)//2 else '',
                    alpha=0.75)
        plotting.plot_steps(energybins.log_energy_bins, frac_diff, yerr=frac_diff_sys,
                            ax=ax, alpha=0.4, fillalpha=0.4,  
                            color=color_dict_model[composition][idx])
        ax.axhline(0, ls='-.', lw=1, marker='None', color='k')

        ax.grid(linestyle='dotted', which="both", lw=1)
        ax.legend(loc='lower left', ncol=1, fontsize=10)
        
        extraticks = np.arange(-1, 1.25, 0.25)
        ax.set_yticks(extraticks)

    ax.set_xlim(6.4, 7.8)
    ax.set_ylim(-1, 1)
fig.text(0.5, -0.01, '$\mathrm{\log_{10}(E/GeV)}$',
         ha='center', fontsize=14)
fig.text(-0.02, 0.5, '(Unfolded flux - true flux) / true flux',
         va='center', rotation='vertical', fontsize=14)
plt.tight_layout()

flux_compare_outfile = os.path.join(
                        figures_dir, 
                        'flux_compare_grid_{}-groups_Jeffreys-prior.png'.format(num_groups))
comp.check_output_dir(flux_compare_outfile)
plt.savefig(flux_compare_outfile)
plt.show()



In [ ]:



In [ ]:



In [ ]:



In [ ]:

	eff_median_PPlus	eff_err_low_PPlus	eff_err_high_PPlus	eff_median_He4Nucleus	eff_err_low_He4Nucleus	eff_err_high_He4Nucleus	eff_median_O16Nucleus	eff_err_low_O16Nucleus	eff_err_high_O16Nucleus	eff_median_Fe56Nucleus	eff_err_low_Fe56Nucleus	eff_err_high_Fe56Nucleus	eff_median_total	eff_err_low_total	eff_err_high_total
0	0.006811	0.000106	0.000107	0.007278	0.000120	0.000113	0.006527	0.000120	0.000106	0.006132	0.000123	0.000116	0.006691	0.000056	0.000059
1	0.007645	0.000097	0.000102	0.008170	0.000093	0.000089	0.007697	0.000102	0.000090	0.007539	0.000102	0.000101	0.007762	0.000047	0.000049
2	0.008183	0.000079	0.000090	0.008620	0.000071	0.000072	0.008328	0.000075	0.000068	0.008263	0.000073	0.000069	0.008365	0.000037	0.000036
3	0.008509	0.000068	0.000073	0.008828	0.000067	0.000068	0.008626	0.000068	0.000065	0.008573	0.000066	0.000063	0.008668	0.000033	0.000032
4	0.008697	0.000067	0.000069	0.008921	0.000072	0.000070	0.008759	0.000071	0.000067	0.008697	0.000067	0.000069	0.008812	0.000034	0.000034

	PPlus	He4Nucleus	O16Nucleus	Fe56Nucleus	total
6.15	524044.524018	559921.325042	502160.796436	471749.147414	2.057876e+06
6.25	397637.534997	424982.134278	400376.213004	392128.759396	1.615125e+06
6.35	287754.553435	303138.332280	292855.998536	290579.334250	1.174328e+06
6.45	202298.012631	209895.454693	205095.842430	203816.988185	8.211063e+05
6.55	139797.411480	143397.994083	140798.622312	139802.740898	5.637968e+05
6.65	95647.837376	97368.797866	95810.931495	95053.379600	3.838809e+05
6.75	65104.716631	65953.332188	64946.790648	64406.899278	2.604117e+05
6.85	44171.469057	44627.077358	43963.422946	43579.103300	1.763411e+05
6.95	29924.219277	30183.808962	29737.947954	29472.889907	1.193189e+05
7.05	20253.616403	20409.994219	20110.319785	19928.077439	8.070201e+04
7.15	13702.097453	13799.570754	13597.811727	13473.494359	5.457297e+04
7.25	9266.777399	9329.827961	9193.688720	9109.347643	3.689964e+04
7.35	6266.071851	6307.804075	6215.783544	6158.718011	2.494838e+04
7.45	4236.845920	4264.625280	4202.408835	4163.815585	1.686770e+04
7.55	2864.602164	2883.248308	2841.183658	2815.087731	1.140412e+04
7.65	1936.754333	1949.317381	1920.877757	1903.233642	7.710183e+03
7.75	1309.426591	1317.900969	1298.673272	1286.744059	5.212745e+03
7.85	885.288497	891.010607	878.011037	869.945803	3.524256e+03
7.95	598.531229	602.397161	593.608355	588.155560	2.382692e+03

	PPlus	He4Nucleus	O16Nucleus	Fe56Nucleus
6.15	448470.697288	558708.028255	61553.669808	457597.477438
6.25	427392.360601	506010.333059	101575.191688	812860.867915
6.35	252150.662515	386208.748319	92743.879145	580312.258955
6.45	142369.251000	276002.149682	80052.314976	313334.978728
6.55	133460.857811	133049.535105	95735.160529	213504.458641
6.65	83376.010735	75875.898698	58160.413714	106996.788235
6.75	34462.117200	31866.906484	58290.156902	99715.117607
6.85	35283.446909	36356.733839	23134.933716	39156.472249
6.95	16521.814168	18516.644737	11872.678809	42945.073518
7.05	9254.816895	3030.297653	9593.263916	17140.732063
7.15	7569.084547	5273.925726	5085.355849	12861.713432
7.25	3809.535124	2147.377384	5786.493968	5136.329048
7.35	3381.386295	1074.495814	4944.115189	3042.650143
7.45	1659.407380	1410.442299	1443.664549	2073.980836
7.55	793.130770	916.053870	1382.216434	1498.137791
7.65	786.689342	125.880619	879.974487	945.802103
7.75	448.939246	63.586977	652.973803	656.855514
7.85	224.400577	119.362445	307.963835	357.294323
7.95	124.345488	68.906079	121.544569	331.733825

	sys_err	stat_err	n_c
0	[30935201.7749, 11344662.6106, 9235548.08932, ...	[122184.008615, 46610.4280331, 37728.3658056, ...	[226633519.476, 91231840.8719, 67295004.0953, ...
1	[34461344.924, 14225219.9294, 13754581.0252, 1...	[118640.243957, 50345.6997184, 53530.3499832, ...	[176191611.269, 81761596.2179, 71227463.8037, ...
2	[35226502.7542, 16255371.9133, 17448098.7779, ...	[129629.034574, 56711.1813067, 66470.4197135, ...	[147261343.808, 76136300.8899, 75621157.8449, ...
3	[34807668.5331, 17709227.8952, 20480735.676, 2...	[135797.488551, 62398.5226621, 77357.6900892, ...	[128575400.55, 72211935.1651, 79487075.4852, 6...
4	[33974539.1907, 18768212.2251, 22996578.9484, ...	[141354.651993, 68013.3915995, 86513.3238882, ...	[115689996.49, 69257499.95, 82641656.8536, 717...
5	[33099185.233, 19566405.2562, 25111389.131, 25...	[146304.979461, 73512.1696956, 94189.4948535, ...	[106438630.594, 66955873.6017, 85150968.8911, ...
6	[32341859.5987, 20198020.4907, 26913461.1106, ...	[150991.143711, 78903.6662298, 100678.932813, ...	[99601721.8874, 65127773.5014, 87133192.3424, ...