Author: James Bourbeau
In [1]:
%load_ext watermark
%watermark -u -d -v -p numpy,scipy,pandas,sklearn,mlxtend
In [2]:
from __future__ import division, print_function
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from icecube.weighting.weighting import from_simprod
from icecube import dataclasses
import comptools as comp
import comptools.analysis.plotting as plotting
color_dict = comp.analysis.get_color_dict()
%matplotlib inline
[ back to top ]
In [6]:
config = 'IC86.2012'
# comp_list = ['light', 'heavy']
comp_list = ['PPlus', 'Fe56Nucleus']
june_july_data_only = False
In [4]:
sim_df = comp.load_dataframe(datatype='sim', config=config, split=False)
data_df = comp.load_dataframe(datatype='data', config=config)
In [5]:
data_df = data_df[np.isfinite(data_df['log_dEdX'])]
In [7]:
if june_july_data_only:
print('Masking out all data events not in June or July')
def is_june_july(time):
i3_time = dataclasses.I3Time(time)
return i3_time.date_time.month in [6, 7]
june_july_mask = data_df.end_time_mjd.apply(is_june_july)
data_df = data_df[june_july_mask].reset_index(drop=True)
In [8]:
months = (6, 7) if june_july_data_only else None
livetime, livetime_err = comp.get_detector_livetime(config, months=months)
[ back to top ]
For more information, see the IT73-IC79 Data-MC comparison wiki page.
First, we'll need to define a 'realistic' flux model
In [9]:
phi_0 = 3.5e-6
# phi_0 = 2.95e-6
gamma_1 = -2.7
gamma_2 = -3.1
eps = 100
def flux(E):
E = np.array(E) * 1e-6
return (1e-6) * phi_0 * E**gamma_1 *(1+(E/3.)**eps)**((gamma_2-gamma_1)/eps)
In [10]:
from icecube.weighting.weighting import PowerLaw
In [11]:
pl_flux = PowerLaw(eslope=-2.7, emin=1e5, emax=3e6, nevents=1e6) + \
PowerLaw(eslope=-3.1, emin=3e6, emax=1e10, nevents=1e2)
In [12]:
pl_flux.spectra
Out[12]:
In [13]:
from icecube.weighting.fluxes import GaisserH3a, GaisserH4a, Hoerandel5
flux_h4a = GaisserH4a()
In [14]:
energy_points = np.logspace(6.0, 9.0, 100)
fig, ax = plt.subplots()
ax.plot(np.log10(energy_points), energy_points**2.7*flux_h4a(energy_points, 2212),
marker='None', ls='-', lw=2, label='H4a proton')
ax.plot(np.log10(energy_points), energy_points**2.7*flux_h4a(energy_points, 1000260560),
marker='None', ls='-', lw=2, label='H4a iron')
ax.plot(np.log10(energy_points), energy_points**2.7*flux(energy_points),
marker='None', ls='-', lw=2, label='Simple knee')
ax.plot(np.log10(energy_points), energy_points**2.7*pl_flux(energy_points),
marker='None', ls='-', lw=2, label='Power law (weighting)')
ax.set_yscale('log', nonposy='clip')
ax.set_xlabel('$\log_{10}(E/\mathrm{GeV})$')
ax.set_ylabel('$\mathrm{E}^{2.7} \ J(E) \ [\mathrm{GeV}^{1.7} \mathrm{m}^{-2} \mathrm{sr}^{-1} \mathrm{s}^{-1}]$')
ax.grid(which='both')
ax.legend()
plt.show()
In [15]:
simlist = np.unique(sim_df['sim'])
for i, sim in enumerate(simlist):
gcd_file, sim_files = comp.simfunctions.get_level3_sim_files(sim)
num_files = len(sim_files)
print('Simulation set {}: {} files'.format(sim, num_files))
if i == 0:
generator = num_files*from_simprod(int(sim))
else:
generator += num_files*from_simprod(int(sim))
energy = sim_df['MC_energy'].values
ptype = sim_df['MC_type'].values
num_ptypes = np.unique(ptype).size
cos_theta = np.cos(sim_df['MC_zenith']).values
weights = 1.0/generator(energy, ptype, cos_theta)
# weights = weights/num_ptypes
In [16]:
sim_df['weights'] = flux(sim_df['MC_energy'])*weights
# sim_df['weights'] = flux_h4a(sim_df['MC_energy'], sim_df['MC_type'])*weights
In [17]:
MC_comp_mask = {}
for composition in comp_list:
MC_comp_mask[composition] = sim_df['MC_comp'] == composition
# MC_comp_mask[composition] = sim_df['MC_comp_class'] == composition
In [18]:
def plot_rate(array, weights, bins, xlabel=None, color='C0',
label=None, legend=True, alpha=0.8, ax=None):
if ax is None:
ax = plt.gca()
rate = np.histogram(array, bins=bins, weights=weights)[0]
rate_err = np.sqrt(np.histogram(array, bins=bins, weights=weights**2)[0])
plotting.plot_steps(bins, rate, yerr=rate_err, color=color,
label=label, alpha=alpha, ax=ax)
ax.set_yscale('log', nonposy='clip')
ax.set_ylabel('Rate [Hz]')
if xlabel:
ax.set_xlabel(xlabel)
if legend:
ax.legend()
ax.grid(True)
return ax
In [19]:
def plot_data_MC_ratio(sim_array, sim_weights, data_array, data_weights, bins,
xlabel=None, color='C0', alpha=0.8, label=None,
legend=False, ylim=None, ax=None):
if ax is None:
ax = plt.gca()
sim_rate = np.histogram(sim_array, bins=bins, weights=sim_weights)[0]
sim_rate_err = np.sqrt(np.histogram(sim_array, bins=bins, weights=sim_weights**2)[0])
data_rate = np.histogram(data_array, bins=bins, weights=data_weights)[0]
data_rate_err = np.sqrt(np.histogram(data_array, bins=bins, weights=data_weights**2)[0])
ratio, ratio_err = comp.analysis.ratio_error(data_rate, data_rate_err, sim_rate, sim_rate_err)
plotting.plot_steps(bins, ratio, yerr=ratio_err,
color=color, label=label, alpha=alpha, ax=ax)
ax.grid(True)
ax.set_ylabel('Data/MC')
if xlabel:
ax.set_xlabel(xlabel)
if ylim:
ax.set_ylim(ylim)
if legend:
ax.legend()
ax.axhline(1, marker='None', ls='-.', color='k')
return ax
[ back to top ]
In [20]:
sim_df['log_s125'].plot(kind='hist', bins=100, alpha=0.6, lw=1.5)
plt.xlabel('$\log_{10}(\mathrm{S}_{125})$')
plt.ylabel('Counts');
In [21]:
log_s125_bins = np.linspace(-0.5, 3.5, 75)
gs = gridspec.GridSpec(2, 1, height_ratios=[2,1], hspace=0.0)
ax1 = plt.subplot(gs[0])
ax2 = plt.subplot(gs[1], sharex=ax1)
for composition in comp_list:
sim_s125 = sim_df[MC_comp_mask[composition]]['log_s125']
sim_weights = sim_df[MC_comp_mask[composition]]['weights']
plot_rate(sim_s125, sim_weights, bins=log_s125_bins,
color=color_dict[composition], label=composition, ax=ax1)
data_weights = np.array([1/livetime]*len(data_df['log_s125']))
plot_rate(data_df['log_s125'], data_weights, bins=log_s125_bins,
color=color_dict['data'], label='Data', ax=ax1)
for composition in comp_list:
sim_s125 = sim_df[MC_comp_mask[composition]]['log_s125']
sim_weights = sim_df[MC_comp_mask[composition]]['weights']
ax2 = plot_data_MC_ratio(sim_s125, sim_weights,
data_df['log_s125'], data_weights, log_s125_bins,
xlabel='$\log_{10}(\mathrm{S}_{125})$', color=color_dict[composition],
label=composition, ax=ax2)
ax2.set_ylim((0, 2))
ax1.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3,
ncol=3, mode="expand", borderaxespad=0., frameon=False)
plt.savefig(os.path.join(comp.paths.figures_dir, 'data-MC-comparison', 's125.png'))
plt.show()
In [47]:
sim_df['log_dEdX'].plot(kind='hist', bins=100, alpha=0.6, lw=1.5)
plt.xlabel('$\log_{10}(\mathrm{dE/dX})$')
plt.ylabel('Counts');
In [60]:
log_dEdX_bins = np.linspace(-2, 4, 75)
gs = gridspec.GridSpec(2, 1, height_ratios=[2,1], hspace=0.0)
ax1 = plt.subplot(gs[0])
ax2 = plt.subplot(gs[1], sharex=ax1)
for composition in comp_list:
sim_dEdX = sim_df[MC_comp_mask[composition]]['log_dEdX']
sim_weights = sim_df[MC_comp_mask[composition]]['weights']
plot_rate(sim_dEdX, sim_weights, bins=log_dEdX_bins,
color=color_dict[composition], label=composition, ax=ax1)
data_weights = np.array([1/livetime]*len(data_df))
plot_rate(data_df['log_dEdX'], data_weights, bins=log_dEdX_bins,
color=color_dict['data'], label='Data', ax=ax1)
for composition in comp_list:
sim_dEdX = sim_df[MC_comp_mask[composition]]['log_dEdX']
sim_weights = sim_df[MC_comp_mask[composition]]['weights']
ax2 = plot_data_MC_ratio(sim_dEdX, sim_weights,
data_df['log_dEdX'], data_weights, log_dEdX_bins,
xlabel='$\log_{10}(\mathrm{dE/dX})$', color=color_dict[composition],
label=composition, ylim=[0, 5.5], ax=ax2)
ax1.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3,
ncol=3, mode="expand", borderaxespad=0., frameon=False)
plt.savefig(os.path.join(comp.paths.figures_dir, 'data-MC-comparison', 'dEdX.png'))
plt.show()
In [49]:
sim_df['lap_cos_zenith'].plot(kind='hist', bins=100, alpha=0.6, lw=1.5)
plt.xlabel('$\cos(\\theta_{\mathrm{reco}})$')
plt.ylabel('Counts');
In [55]:
cos_zenith_bins = np.linspace(0.8, 1.0, 75)
gs = gridspec.GridSpec(2, 1, height_ratios=[2,1], hspace=0.0)
ax1 = plt.subplot(gs[0])
ax2 = plt.subplot(gs[1], sharex=ax1)
for composition in comp_list:
sim_cos_zenith = sim_df[MC_comp_mask[composition]]['lap_cos_zenith']
sim_weights = sim_df[MC_comp_mask[composition]]['weights']
plot_rate(sim_cos_zenith, sim_weights, bins=cos_zenith_bins,
color=color_dict[composition], label=composition, ax=ax1)
data_weights = np.array([1/livetime]*len(data_df))
plot_rate(data_df['lap_cos_zenith'], data_weights, bins=cos_zenith_bins,
color=color_dict['data'], label='Data', ax=ax1)
for composition in comp_list:
sim_cos_zenith = sim_df[MC_comp_mask[composition]]['lap_cos_zenith']
sim_weights = sim_df[MC_comp_mask[composition]]['weights']
ax2 = plot_data_MC_ratio(sim_cos_zenith, sim_weights,
data_df['lap_cos_zenith'], data_weights, cos_zenith_bins,
xlabel='$\cos(\\theta_{\mathrm{reco}})$', color=color_dict[composition],
label=composition, ylim=[0, 3], ax=ax2)
ax1.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3,
ncol=3, mode="expand", borderaxespad=0., frameon=False)
plt.savefig(os.path.join(comp.paths.figures_dir, 'data-MC-comparison', 'zenith.png'))
plt.show()
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [24]:
sim_df['avg_inice_radius'].plot(kind='hist', bins=100, alpha=0.6, lw=1.5)
# plt.xlabel('$\cos(\\theta_{\mathrm{reco}})$')
plt.ylabel('Counts');
In [26]:
inice_radius_bins = np.linspace(0.0, 200, 75)
gs = gridspec.GridSpec(2, 1, height_ratios=[2,1], hspace=0.0)
ax1 = plt.subplot(gs[0])
ax2 = plt.subplot(gs[1], sharex=ax1)
for composition in comp_list:
sim_inice_radius = sim_df[MC_comp_mask[composition]]['avg_inice_radius']
sim_weights = sim_df[MC_comp_mask[composition]]['weights']
plot_rate(sim_inice_radius, sim_weights, bins=inice_radius_bins,
color=color_dict[composition], label=composition, ax=ax1)
data_weights = np.array([1/livetime]*len(data_df))
plot_rate(data_df['avg_inice_radius'], data_weights, bins=inice_radius_bins,
color=color_dict['data'], label='Data', ax=ax1)
for composition in comp_list:
sim_inice_radius = sim_df[MC_comp_mask[composition]]['avg_inice_radius']
sim_weights = sim_df[MC_comp_mask[composition]]['weights']
ax2 = plot_data_MC_ratio(sim_inice_radius, sim_weights,
data_df['avg_inice_radius'], data_weights, inice_radius_bins,
xlabel='$\cos(\\theta_{\mathrm{reco}})$', color=color_dict[composition],
label=composition, ylim=[0, 3], ax=ax2)
ax1.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3,
ncol=3, mode="expand", borderaxespad=0., frameon=False)
# plt.savefig(os.path.join(comp.paths.figures_dir, 'data-MC-comparison', 'zenith.png'))
plt.show()
In [27]:
sim_df.columns
Out[27]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: