Author: James Bourbeau
In [1]:
%load_ext watermark
%watermark -u -d -v -p numpy,matplotlib,scipy,pandas,sklearn,mlxtend
In [2]:
%matplotlib inline
from __future__ import division, print_function
from collections import defaultdict
import os
import numpy as np
from scipy import optimize
from scipy.stats import chisquare
import pandas as pd
import matplotlib.pyplot as plt
import seaborn.apionly as sns
import comptools as comp
color_dict = comp.analysis.get_color_dict()
[ back to top ]
In [3]:
# config = 'IC79'
config = 'IC86.2012'
df_sim = comp.load_sim(config=config, test_size=0)
In [5]:
df_sim
Out[5]:
In [6]:
# df_sim, cut_dict_sim = comp.load_dataframe(datatype='sim', config=config, return_cut_dict=True)
# selection_mask = np.array([True] * len(df_sim))
# # standard_cut_keys = ['IceTopQualityCuts', 'lap_InIce_containment',
# # # 'num_hits_1_60', 'max_qfrac_1_60',
# # 'InIceQualityCuts', 'num_hits_1_60']
# standard_cut_keys = ['passed_IceTopQualityCuts', 'FractionContainment_Laputop_InIce',
# 'passed_InIceQualityCuts', 'num_hits_1_60']
# # for cut in ['MilliNCascAbove2', 'MilliQtotRatio', 'MilliRloglBelow2', 'StochRecoSucceeded']:
# # standard_cut_keys += ['InIceQualityCuts_{}'.format(cut)]
# for key in standard_cut_keys:
# selection_mask *= cut_dict_sim[key]
# print(key, np.sum(selection_mask))
# df_sim = df_sim[selection_mask]
In [7]:
log_energy_bins = np.arange(5.0, 9.51, 0.05)
# log_energy_bins = np.arange(5.0, 9.51, 0.1)
energy_bins = 10**log_energy_bins
energy_midpoints = (energy_bins[1:] + energy_bins[:-1]) / 2
energy_min_fit, energy_max_fit = 5.8, 7.0
midpoints_fitmask = (energy_midpoints >= 10**energy_min_fit) & (energy_midpoints <= 10**energy_max_fit)
In [8]:
log_energy_bins
Out[8]:
In [9]:
np.log10(energy_midpoints[midpoints_fitmask])
Out[9]:
In [10]:
def constant(energy, c):
return c
def linefit(energy, m, b):
return m*np.log10(energy) + b
def sigmoid_flat(energy, p0, p1, p2):
return p0 / (1 + np.exp(-p1*np.log10(energy) + p2))
def sigmoid_slant(energy, p0, p1, p2, p3):
return (p0 + p3*np.log10(energy)) / (1 + np.exp(-p1*np.log10(energy) + p2))
In [11]:
def red_chisquared(obs, fit, sigma, n_params):
zero_mask = sigma != 0
return np.nansum(((obs[zero_mask] - fit[zero_mask])/sigma[zero_mask]) ** 2) / (len(obs[zero_mask]) - n_params)
# return np.sum(((obs - fit)/sigma) ** 2) / (len(obs) - 1 - n_params)
In [12]:
np.sum(midpoints_fitmask)-3
Out[12]:
In [13]:
eff_area, eff_area_error, _ = comp.calculate_effective_area_vs_energy(df_sim, energy_bins)
eff_area_light, eff_area_error_light, _ = comp.calculate_effective_area_vs_energy(df_sim[df_sim.MC_comp_class == 'light'], energy_bins)
eff_area_heavy, eff_area_error_heavy, _ = comp.calculate_effective_area_vs_energy(df_sim[df_sim.MC_comp_class == 'heavy'], energy_bins)
In [71]:
eff_area, eff_area_error, _ = comp.analysis.get_effective_area(df_sim,
energy_bins, energy='MC')
eff_area_light, eff_area_error_light, _ = comp.analysis.get_effective_area(
df_sim[df_sim.MC_comp_class == 'light'],
energy_bins, energy='MC')
eff_area_heavy, eff_area_error_heavy, _ = comp.analysis.get_effective_area(
df_sim[df_sim.MC_comp_class == 'heavy'],
energy_bins, energy='MC')
In [14]:
eff_area_light
Out[14]:
In [15]:
p0 = [1.5e5, 8.0, 50.0]
popt_light, pcov_light = optimize.curve_fit(sigmoid_flat, energy_midpoints[midpoints_fitmask],
eff_area_light[midpoints_fitmask], p0=p0,
sigma=eff_area_error_light[midpoints_fitmask])
popt_heavy, pcov_heavy = optimize.curve_fit(sigmoid_flat, energy_midpoints[midpoints_fitmask],
eff_area_heavy[midpoints_fitmask], p0=p0,
sigma=eff_area_error_heavy[midpoints_fitmask])
In [16]:
print(popt_light)
print(popt_heavy)
In [17]:
perr_light = np.sqrt(np.diag(pcov_light))
print(perr_light)
perr_heavy = np.sqrt(np.diag(pcov_heavy))
print(perr_heavy)
In [18]:
avg = (popt_light[0] + popt_heavy[0]) / 2
print('avg eff area = {}'.format(avg))
In [19]:
eff_area_light
Out[19]:
In [20]:
light_chi2 = red_chisquared(eff_area_light, sigmoid_flat(energy_midpoints, *popt_light),
eff_area_error_light, len(popt_light))
print(light_chi2)
heavy_chi2 = red_chisquared(eff_area_heavy,
sigmoid_flat(energy_midpoints, *popt_heavy),
eff_area_error_heavy, len(popt_heavy))
print(heavy_chi2)
In [21]:
fig, ax = plt.subplots()
# plot effective area data points with poisson errors
ax.errorbar(np.log10(energy_midpoints), eff_area_light, yerr=eff_area_error_light,
ls='None', marker='.')
ax.errorbar(np.log10(energy_midpoints), eff_area_heavy, yerr=eff_area_error_heavy,
ls='None', marker='.')
# plot corresponding sigmoid fits to effective area
x = 10**np.arange(5.0, 9.5, 0.01)
ax.plot(np.log10(x), sigmoid_flat(x, *popt_light),
color=color_dict['light'], label='light', marker='None', ls='-')
ax.plot(np.log10(x), sigmoid_flat(x, *popt_heavy),
color=color_dict['heavy'], label='heavy', marker='None')
avg_eff_area = (sigmoid_flat(x, *popt_light) + sigmoid_flat(x, *popt_heavy)) / 2
ax.plot(np.log10(x), avg_eff_area,
color=color_dict['total'], label='avg', marker='None')
ax.fill_between(np.log10(x),
avg_eff_area-0.01*avg_eff_area,
avg_eff_area+0.01*avg_eff_area,
color=color_dict['total'], alpha=0.5)
ax.axvline(6.4, marker='None', ls='-.', color='k')
ax.set_ylabel('Effective area [m$^2$]')
ax.set_xlabel('$\mathrm{\log_{10}(E_{true}/GeV)}$')
# ax.set_title('$\mathrm{A_{eff} = 143177 \pm 1431.77 \ m^2}$')
ax.grid()
# ax.set_ylim([0, 180000])
ax.set_xlim([5.4, 8.1])
ax.set_title(config)
#set label style
ax.ticklabel_format(style='sci',axis='y')
ax.yaxis.major.formatter.set_powerlimits((0,0))
leg = plt.legend(title='True composition')
for legobj in leg.legendHandles:
legobj.set_linewidth(2.0)
# eff_area_outfile = os.path.join(comp.paths.figures_dir, 'effective-area-{}.png'.format(config))
# comp.check_output_dir(eff_area_outfile)
# plt.savefig(eff_area_outfile)
plt.show()
Effective area as quality cuts are sequentially applied
In [19]:
df_sim, cut_dict_sim = comp.load_dataframe(datatype='sim', config='IC79', return_cut_dict=True)
standard_cut_keys = ['num_hits_1_60', 'IceTopQualityCuts', 'lap_InIce_containment',
# 'num_hits_1_60', 'max_qfrac_1_60',
'InIceQualityCuts']
# for cut in ['MilliNCascAbove2', 'MilliQtotRatio', 'MilliRloglBelow2', 'StochRecoSucceeded']:
# standard_cut_keys += ['InIceQualityCuts_{}'.format(cut)]
eff_area_dict = {}
eff_area_err_dict = {}
selection_mask = np.array([True] * len(df_sim))
for key in standard_cut_keys:
selection_mask *= cut_dict_sim[key]
print(key, np.sum(selection_mask))
eff_area, eff_area_error, _ = comp.analysis.get_effective_area(df_sim[selection_mask],
energy_bins, energy='MC')
# eff_area, eff_area_error = comp.analysis.effective_area.effective_area(df_sim[selection_mask],
# np.arange(5.0, 9.51, 0.1))
eff_area_dict[key] = eff_area
eff_area_err_dict[key] = eff_area_error
In [20]:
fig, ax = plt.subplots()
cut_labels = {'num_hits_1_60': 'NStations/NChannels', 'IceTopQualityCuts': 'IceTopQualityCuts',
'lap_InIce_containment': 'InIce containment', 'InIceQualityCuts': 'InIceQualityCuts'}
for key in standard_cut_keys:
# plot effective area data points with poisson errors
ax.errorbar(np.log10(energy_midpoints), eff_area_dict[key], yerr=eff_area_err_dict[key],
ls='None', marker='.', label=cut_labels[key], alpha=0.75)
ax.set_ylabel('Effective area [m$^2$]')
ax.set_xlabel('$\log_{10}(E_{\mathrm{MC}}/\mathrm{GeV})$')
ax.grid()
# ax.set_ylim([0, 180000])
ax.set_xlim([5.4, 9.6])
#set label style
ax.ticklabel_format(style='sci',axis='y')
ax.yaxis.major.formatter.set_powerlimits((0,0))
leg = plt.legend()
plt.savefig('/home/jbourbeau/public_html/figures/effective-area-cuts.png')
plt.show()
In [ ]:
In [ ]: