In [1]:
%load_ext autoreload
%autoreload 2
In [2]:
from __future__ import division, print_function
import os
import numpy as np
import pandas as pd
import pyunfold
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import seaborn as sns
import comptools as comp
sns.set_context(context='paper', font_scale=1.5)
color_dict = comp.color_dict
%matplotlib inline
In [3]:
config = 'IC86.2012'
num_groups = 2
comp_list = comp.get_comp_list(num_groups=num_groups)
energybins = comp.get_energybins(config)
num_ebins = len(energybins.log_energy_midpoints)
unfolding_dir = os.path.join(comp.paths.comp_data_dir,
config,
'unfolding')
In [4]:
# Load DataFrame with saved prior distributions
df_file = os.path.join(unfolding_dir,
'unfolding-df_{}-groups.hdf'.format(num_groups))
df_priors = pd.read_hdf(df_file)
df_priors.head()
Out[4]:
In [23]:
# Load simulation and train composition classifier
df_sim_train, df_sim_test = comp.load_sim(config=config,
# energy_reco=True,
energy_reco=False,
log_energy_min=None,
log_energy_max=None,
test_size=0.5,
n_jobs=10,
verbose=True)
In [24]:
feature_list, feature_labels = comp.get_training_features()
In [25]:
print('Running energy reconstruction...')
energy_pipeline = comp.load_trained_model('linearregression_energy_{}'.format(config))
# energy_pipeline = comp.load_trained_model('RF_energy_{}'.format(config))
for df in [df_sim_train, df_sim_test]:
X = df_sim_train[feature_list].values
# Energy reconstruction
df['reco_log_energy'] = energy_pipeline.predict(df[feature_list].values)
df['reco_energy'] = 10**df['reco_log_energy']
In [26]:
# Load fitted efficiencies and calculate effective areas
eff_path = os.path.join(comp.paths.comp_data_dir, config, 'efficiencies',
'efficiency_fit_num_groups_{}.hdf'.format(num_groups))
df_eff = pd.read_hdf(eff_path)
df_eff.head()
Out[26]:
In [27]:
# Get simulation thrown areas for each energy bin
thrown_radii = comp.simfunctions.get_sim_thrown_radius(energybins.log_energy_midpoints)
thrown_area = np.pi * thrown_radii.max()**2
thrown_area
Out[27]:
In [28]:
efficiencies, efficiencies_err = comp.get_detector_efficiencies(config=config,
num_groups=num_groups,
sigmoid='slant',
pyunfold_format=True)
In [29]:
print('Running composition classifications...')
# pipeline_str = 'SGD_comp_{}_{}-groups'.format(config, num_groups)
pipeline_str = 'xgboost_comp_{}_{}-groups'.format(config, num_groups)
comp_pipeline = comp.load_trained_model(pipeline_str)
pred_target = comp_pipeline.predict(df_sim_test[feature_list].values)
In [31]:
print('Making response matrix...')
log_reco_energy_sim_test = df_sim_test['reco_log_energy']
log_true_energy_sim_test = df_sim_test['MC_log_energy']
true_target = df_sim_test['comp_target_{}'.format(num_groups)].values
response, response_err = comp.response_matrix(true_energy=log_true_energy_sim_test,
reco_energy=log_reco_energy_sim_test,
true_target=true_target,
pred_target=pred_target,
efficiencies=efficiencies,
efficiencies_err=efficiencies_err,
energy_bins=energybins.log_energy_bins)
In [32]:
fig, ax = plt.subplots()
im = ax.imshow(response, origin='lower', cmap='viridis')
ax.plot([0, response.shape[0] - 1], [0, response.shape[1] - 1], marker='None', ls=':', color='white')
comp.plotting.colorbar(im, label='Normalized response matrix')
ax.set_xlabel('True bins')
ax.set_ylabel('Reconstructed bins')
plt.show()
In [45]:
print('Loading data into memory...')
# df_data = comp.load_data(config=config,
# processed=True,
# columns=feature_list + ['lap_ra', 'lap_dec'],
# n_jobs=15,
# verbose=True)
data_dir = os.path.join(comp.paths.comp_data_dir,
'IC86.2012',
'data')
filepath_up_shift = os.path.join(data_dir,
'data_dataframe_vem_cal_up.hdf')
df_data = pd.read_hdf(filepath_up_shift,
columns=feature_list)
In [48]:
X_data = df_data[feature_list].values
In [49]:
print('Running composition classifications...')
df_data['pred_comp_target'] = comp_pipeline.predict(X_data)
In [50]:
print('Running energy regressions...')
df_data['reco_log_energy'] = energy_pipeline.predict(X_data)
In [51]:
counts_observed = {}
counts_observed_err = {}
for comp_idx, composition in enumerate(comp_list):
print(composition)
# Filter out events that don't pass composition & energy mask
pred_comp_mask = df_data['pred_comp_target'] == comp_idx
mask = pred_comp_mask
energies = df_data.loc[mask, 'reco_log_energy'].values
comp_counts, _ = np.histogram(energies, bins=energybins.log_energy_bins)
counts_observed[composition] = comp_counts
counts_observed_err[composition] = np.sqrt(comp_counts)
counts_observed_err['total'] = np.sqrt(np.sum(counts_observed_err[composition]**2 for composition in comp_list))
# Calculate total counts
counts_observed['total'] = np.sum(counts_observed[composition] for composition in comp_list)
# Format observed counts, detection efficiencies, and priors for PyUnfold use
counts_pyunfold = np.empty(num_groups * len(energybins.energy_midpoints))
counts_err_pyunfold = np.empty(num_groups * len(energybins.energy_midpoints))
for idx, composition in enumerate(comp_list):
counts_pyunfold[idx::num_groups] = counts_observed[composition]
counts_err_pyunfold[idx::num_groups] = counts_observed_err[composition]
In [52]:
counts_observed
Out[52]:
In [53]:
# Effective area
eff_area, eff_area_err = {}, {}
for composition in comp_list + ['total']:
eff_area[composition] = df_eff['eff_median_{}'.format(composition)].values * thrown_area
eff_area_err[composition] = df_eff['eff_err_low_{}'.format(composition)].values * thrown_area
# Solid angle
theta_max = 40 if config == 'IC79.2010' else 65
solid_angle = np.pi*np.sin(np.deg2rad(theta_max))**2
# Livetime
livetime, livetime_err = comp.get_detector_livetime(config=config)
In [54]:
def counts_to_flux(counts, counts_err=None, composition=None):
return comp.get_flux(counts, counts_err,
energybins=energybins.energy_bins,
eff_area=thrown_area,
livetime=livetime,
livetime_err=livetime_err,
solid_angle=solid_angle,
scalingindex=2.7)
In [55]:
# Run unfolding for each of the priors
names = ['uniform', 'H3a', 'H4a', 'simple_power_law']
# names = ['Jeffreys', 'H3a', 'H4a', 'Polygonato']
unfolding_results = {}
for prior_name in names:
prior = None if prior_name == 'uniform' else df_priors['{}_prior'.format(prior_name)]
# priors = 'Jeffreys' if prior_name == 'Jeffreys' else df['{}_prior'.format(prior_name)]
df_unfolding_iter = pyunfold.iterative_unfold(data=counts_pyunfold,
data_err=counts_err_pyunfold,
response=response,
response_err=response_err,
efficiencies=efficiencies,
efficiencies_err=efficiencies_err,
ts='ks',
ts_stopping=0.005,
prior=prior,
return_iterations=True)
# callbacks=[pyunfold.callbacks.SplineRegularizer(degree=1, smooth=256)])
unfolding_results[prior_name] = df_unfolding_iter
In [56]:
names
Out[56]:
In [57]:
fig, ax = plt.subplots()
# fig, ax = plt.subplots(figsize=(10, 5))
linestyles = ['-', ':', '-.', '--']
markers = ['.', '*', '^', 'X']
for prior_name, ls, marker in zip(names, linestyles, markers):
# if prior_name != 'H4a':
# continue
counts, counts_sys_err, counts_stat_err = comp.unfolded_counts_dist(unfolding_results[prior_name],
num_groups=num_groups)
for composition in comp_list + ['total']:
# print(composition)
# print(counts[composition])
flux, flux_sys_err = counts_to_flux(counts=counts[composition],
counts_err=counts_sys_err[composition],
composition=composition)
flux, flux_stat_err = counts_to_flux(counts=counts[composition],
counts_err=counts_stat_err[composition],
composition=composition)
comp.plot_steps(energybins.log_energy_bins, flux,
yerr=flux_sys_err,
color=color_dict[composition],
ls=ls,
ax=ax)
# label = '{} ({})'.format(composition, prior_name.replace('_', '-'))
label = prior_name.replace('_', ' ') if composition == 'total' else ''
ax.errorbar(energybins.log_energy_midpoints, flux,
yerr=flux_stat_err,
color=color_dict[composition],
ls='None',
marker=marker,
capsize=20,
elinewidth=1,
label=label)
ax.set_yscale('log', nonposy='clip')
ax.set_xlabel('$\mathrm{\log_{10}(E/GeV)}$')
ax.set_ylabel('$\mathrm{ E^{2.7} \ J(E) \ [GeV^{1.7} m^{-2} sr^{-1} s^{-1}]}$')
ax.set_xlim(6.4, 7.8)
ax.set_ylim(1e3, 1e5)
ax.grid(lw=1, which='both')
ax.legend()
# ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
# ax.legend(loc='upper center',
# bbox_to_anchor=(0.5, 1.75),
# ncol=2,
# fancybox=True)
outfile = os.path.join(comp.paths.figures_dir, 'skymaps', 'allsky.png')
comp.check_output_dir(outfile)
plt.savefig(outfile)
plt.show()
In [220]:
unfolding_results[prior_name]
Out[220]:
In [224]:
fig, ax = plt.subplots()
# fig, ax = plt.subplots(figsize=(10, 5))
prior_name = 'H4a'
marker = '.'
ls = '-'
counts, counts_sys_err, counts_stat_err = comp.unfolded_counts_dist(unfolding_results[prior_name],
num_groups=num_groups)
for composition in comp_list + ['total']:
flux, flux_sys_err = counts_to_flux(counts=counts[composition],
counts_err=counts_sys_err[composition],
composition=composition)
flux, flux_stat_err = counts_to_flux(counts=counts[composition],
counts_err=counts_stat_err[composition],
composition=composition)
comp.plot_steps(energybins.log_energy_bins, flux,
yerr=flux_sys_err,
color=color_dict[composition],
fillalpha=0.4,
ls=ls,
label=composition,
ax=ax)
ax.errorbar(energybins.log_energy_midpoints, flux,
yerr=flux_stat_err,
color=color_dict[composition],
ls='None',
marker=marker,
capsize=20,
elinewidth=1)
ax.set_yscale('log', nonposy='clip')
ax.set_xlabel('$\mathrm{\log_{10}(E/GeV)}$')
ax.set_ylabel('$\mathrm{ E^{2.7} \ J(E) \ [GeV^{1.7} m^{-2} sr^{-1} s^{-1}]}$')
ax.set_xlim(6.4, 7.8)
ax.set_ylim(1e3, 1e5)
ax.grid(lw=1, which='both')
ax.legend()
# ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
# ax.legend(loc='upper center',
# bbox_to_anchor=(0.5, 1.75),
# ncol=2,
# fancybox=True)
outfile = os.path.join(comp.paths.figures_dir, 'skymaps', 'allsky.png')
comp.check_output_dir(outfile)
plt.savefig(outfile)
plt.show()
In [222]:
fig, ax = plt.subplots()
prior_name = 'simple_power_law'
marker = '.'
ls = '-'
df_unfolding = unfolding_results[prior_name]
def update(i):
ax.clear()
idx_iter, df_iter = i
color_dict_iter = {}
color_dict_iter['light'] = sns.color_palette('Blues', len(df_unfolding.index)).as_hex()[idx_iter]
color_dict_iter['heavy'] = sns.color_palette('Oranges', len(df_unfolding.index)).as_hex()[idx_iter]
color_dict_iter['total'] = sns.color_palette('Greens', len(df_unfolding.index)).as_hex()[idx_iter]
counts, counts_sys_err, counts_stat_err = comp.unfolded_counts_dist(unfolding_results[prior_name],
iteration=idx_iter,
num_groups=num_groups)
for idx, composition in enumerate(comp_list):
if idx == 0:
counts['total'] = np.zeros_like(counts[composition])
counts_sys_err['total'] = np.zeros_like(counts[composition])
counts_stat_err['total'] = np.zeros_like(counts[composition])
counts['total'] += counts[composition]
counts_sys_err['total'] += counts_sys_err[composition]**2
counts_stat_err['total'] += counts_stat_err[composition]**2
counts_sys_err['total'] = np.sqrt(counts_sys_err['total'])
counts_stat_err['total'] = np.sqrt(counts_stat_err['total'])
for composition in comp_list + ['total']:
flux, flux_err_sys = comp.get_flux(counts[composition], counts_sys_err[composition],
energybins=energybins.energy_bins,
eff_area=thrown_area,
livetime=livetime, livetime_err=livetime_err,
solid_angle=solid_angle)
flux, flux_err_stat = comp.get_flux(counts[composition], counts_stat_err[composition],
energybins=energybins.energy_bins,
eff_area=thrown_area,
livetime=livetime, livetime_err=livetime_err,
solid_angle=solid_angle)
comp.plot_steps(energybins.log_energy_bins, flux, yerr=flux_err_sys,
ax=ax, alpha=0.4, fillalpha=0.4,
color=color_dict[composition])
ax.errorbar(energybins.log_energy_midpoints, flux, yerr=flux_err_stat,
color=color_dict[composition], ls='None', marker='.',
label=composition)
ax.set_yscale("log", nonposy='clip')
ax.set_xlabel('$\mathrm{\log_{10}(E_{reco}/GeV)}$')
ax.set_ylabel('$\mathrm{ E^{2.7} \ J(E) \ [GeV^{1.7} m^{-2} sr^{-1} s^{-1}]}$')
ax.set_title('Iteration: {}'.format(idx_iter+1))
ax.set_xlim(6.4, 7.8)
ax.set_ylim([1e3, 7e4])
ax.grid(linestyle='dotted', which="both")
ax.legend()
return ax
anim = FuncAnimation(fig, update, frames=list(df_unfolding.iterrows()),
interval=1250)
iter_unfold_outfile = os.path.join(comp.paths.figures_dir,
'unfolding',
config,
'iterations',
prior_name,
'{}-groups'.format(num_groups),
'flux_iter_{}-prior.gif'.format(prior_name))
comp.check_output_dir(iter_unfold_outfile)
anim.save(iter_unfold_outfile, dpi=300, writer='imagemagick')
In [20]:
import healpy as hp
In [24]:
nside = 64
npix = hp.nside2npix(nside)
In [52]:
m = np.zeros(npix)
m[hp.ang2pix(nside, np.pi/2, np.pi/2)] = 1
m[hp.ang2pix(nside, np.pi/4, np.pi/4)] = 1
# m[:100] = 100
In [71]:
df_data.iloc[:100][['lap_ra', 'lap_dec']].head()
Out[71]:
In [291]:
ra = df_data.loc[:, 'lap_ra'].values
dec = df_data.loc[:, 'lap_dec'].values
Convert from ra/dec equatorial coordinates to the theta/phi coordinates used in healpy
In [291]:
theta, phi = comp.equatorial_to_healpy(ra, dec)
In [292]:
skymap = np.zeros(npix)
unique_pix, pix_counts = np.unique(pix_array, return_counts=True)
skymap[unique_pix] += pix_counts
In [273]:
def mask_map(skymap, decmin=None, decmax=None):
if decmin is None and decmax is None:
raise ValueError('decmin and/or decmax must be specified')
npix = skymap.shape[0]
nside = hp.npix2nside(npix)
theta, phi = hp.pix2ang(nside, range(npix))
print(theta)
theta_mask = np.ones(npix, dtype=bool)
if decmin is not None:
theta_min = np.deg2rad(90 - decmin)
print(theta_min)
theta_mask *= (theta >= theta_min)
print(theta_mask.sum())
if decmax is not None:
theta_max = np.deg2rad(90 - decmax)
print(theta_max)
theta_mask *= (theta <= theta_max)
print(theta_mask.sum())
masked_map = np.copy(skymap)
masked_map[theta_mask] = hp.UNSEEN
return masked_map
In [293]:
masked_m = mask_map(skymap, decmin=None, decmax=-40)
np.sum(masked_m == hp.UNSEEN)
Out[293]:
In [294]:
from matplotlib.ticker import FormatStrFormatter
hp.mollview(masked_m, rot=180, title='', cbar=False)
hp.graticule(verbose=False)
fig = plt.gcf()
ax = plt.gca()
image = ax.get_images()[0]
cbar = fig.colorbar(image,
orientation='horizontal',
aspect=50,
pad=0.01,
fraction=0.1,
ax=ax,
format=FormatStrFormatter('%g'),
shrink=1.0)
cbar.set_label('Counts', size=14)
ax.set_ylim(-1, 0.005)
ax.annotate('0$^\circ$', xy=(1.8, -0.75), size=14)
ax.annotate('360$^\circ$', xy=(-1.99, -0.75), size=14)
ax.annotate('IC86.2012 data', xy=(-1.85,-0.24), size=20, color='white')
plt.show()
In [296]:
theta, phi
Out[296]:
In [300]:
test_m = np.zeros(npix)
radius = np.deg2rad(10)
event_id = 10
vec = hp.ang2vec(theta[event_id], phi[event_id])
ipix = hp.query_disc(nside, vec=vec, radius=radius)
in_disc = np.isin(np.arange(npix), ipix)
test_m[in_disc] = 2
test_m[~in_disc] = 1
data_disc = skymap.copy()
data_disc[~in_disc] = hp.UNSEEN
hp.mollview(data_disc, rot=180, title='', cbar=False)
hp.graticule(verbose=False)
fig = plt.gcf()
ax = plt.gca()
image = ax.get_images()[0]
cbar = fig.colorbar(image,
orientation='horizontal',
aspect=50,
pad=0.01,
fraction=0.1,
ax=ax,
format=FormatStrFormatter('%g'),
shrink=1.0)
cbar.set_label('Counts', size=14)
ax.set_ylim(-1, 0.005)
ax.annotate('0$^\circ$', xy=(1.8, -0.75), size=14)
ax.annotate('360$^\circ$', xy=(-1.99, -0.75), size=14)
ax.annotate('Disc on sky', xy=(-1.85,-0.24), size=20, color='white')
plt.show()
In [302]:
dec.min(), dec.max()
Out[302]:
In [349]:
fig, ax = plt.subplots()
ax.hist(np.rad2deg(dec), bins=50, alpha=0.8)
dec_median = np.median(dec)
dec_median_deg = np.rad2deg(dec_median)
print('dec_median = {} [rad]'.format(dec_median))
print('dec_median = {} [deg]'.format(dec_median_deg))
ax.axvline(dec_median_deg, marker='None', ls='--', lw=1, color='k',
label='Median ({:0.1f}'.format(dec_median_deg)+' $^{\circ}$)')
# ax.set_yscale('log', nonposy='clip')
ax.set_xlabel('Declination [$^{\circ}$]')
ax.set_ylabel('Counts')
ax.grid()
ax.legend()
plt.show()
In [346]:
dec_mask = df_data.lap_dec < dec_median
df_data df_data.loc[dec_mask]
Out[346]:
In [348]:
df_data.loc[df_data.lap_dec > dec_median, 'lap_dec']
Out[348]:
In [ ]: