Cosmic-ray flux vs. sky position


In [1]:
%load_ext watermark
%watermark -u -d -v -p numpy,scipy,pandas,sklearn,mlxtend


last updated: 2018-08-21 

CPython 2.7.13
IPython 5.7.0

numpy 1.14.5
scipy 1.1.0
pandas 0.23.1
sklearn 0.19.1
mlxtend 0.12.0

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from __future__ import division, print_function
import os
import sys
from numbers import Number
import numpy as np
import pandas as pd
import healpy as hp
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter
import seaborn as sns
import dask
from dask import delayed, compute
from dask.diagnostics import ProgressBar
import dask.array as da
import pyunfold

import comptools as comp
import sky_anisotropy as sa

color_dict = comp.color_dict

sns.set_context(context='paper', font_scale=1.5)

%matplotlib inline

In [4]:
config = 'IC86.2012'
num_groups = 2

comp_list = comp.get_comp_list(num_groups=num_groups)
analysis_bins = comp.get_bins(config=config, num_groups=num_groups)
energybins = comp.get_energybins(config)
num_ebins = len(energybins.log_energy_midpoints)

unfolding_dir  = os.path.join(comp.paths.comp_data_dir,
                              config,
                              'unfolding')

In [5]:
# Load DataFrame with saved prior distributions
df_file = os.path.join(unfolding_dir,
                       'unfolding-df_{}-groups.hdf'.format(num_groups))
df_priors = pd.read_hdf(df_file)
df_priors.head()


Out[5]:
counts counts_err efficiencies efficiencies_err broken_power_law_flux broken_power_law_prior simple_power_law_flux simple_power_law_prior H3a_flux H3a_prior H4a_flux H4a_prior
log_energy_bin_idx
0 1071404.0 1035.086470 0.007119 0.000084 1.416780e-12 0.231674 1.416780e-12 0.231486 7.187420e-13 0.302586 7.333020e-13 0.306455
1 743932.0 862.514927 0.006400 0.000078 1.416780e-12 0.231674 1.416780e-12 0.231486 4.082709e-13 0.171880 4.023280e-13 0.168137
2 851277.0 922.646736 0.007928 0.000075 7.608561e-13 0.124416 7.608561e-13 0.124315 3.743101e-13 0.157582 3.821589e-13 0.159708
3 702982.0 838.440219 0.007473 0.000068 7.608561e-13 0.124416 7.608561e-13 0.124315 2.232925e-13 0.094005 2.198728e-13 0.091887
4 579872.0 761.493270 0.008373 0.000068 4.086039e-13 0.066816 4.086039e-13 0.066761 1.925426e-13 0.081059 1.967681e-13 0.082231

In [6]:
# Load simulation and train composition classifier
df_sim_train, df_sim_test = comp.load_sim(config=config,
                                          energy_reco=False,
                                          log_energy_min=None,
                                          log_energy_max=None,
                                          test_size=0.5,
                                          n_jobs=10,
                                          verbose=True)


/home/jbourbeau/cr-composition/.virtualenv/lib/python2.7/site-packages/dask/base.py:835: UserWarning: The get= keyword has been deprecated. Please use the scheduler= keyword instead with the name of the desired scheduler like 'threads' or 'processes'
  warnings.warn("The get= keyword has been deprecated. "
[########################################] | 100% Completed |  6.8s
[########################################] | 100% Completed |  0.1s

In [7]:
feature_list, feature_labels = comp.get_training_features()

In [8]:
print('Running energy reconstruction...')
energy_pipeline_name = 'linearregression'
# energy_pipeline_name = 'RF'
energy_pipeline = comp.load_trained_model('{}_energy_{}'.format(energy_pipeline_name, config))
for df in [df_sim_train, df_sim_test]:
    # Energy reconstruction
    df['reco_log_energy'] = energy_pipeline.predict(df[feature_list].values)
    df['reco_energy'] = 10**df['reco_log_energy']


Running energy reconstruction...

In [9]:
# Load fitted efficiencies and calculate effective areas
# eff_path = os.path.join(comp.paths.comp_data_dir, config, 'efficiencies',
#                         'efficiency_fit_num_groups_{}.hdf'.format(num_groups))
eff_path = comp.get_efficiencies_file(config=config,
                                      num_groups=num_groups,
                                      sigmoid='slant')
df_eff = pd.read_hdf(eff_path)
df_eff.head()


Out[9]:
eff_median_light eff_err_low_light eff_err_high_light eff_median_heavy eff_err_low_heavy eff_err_high_heavy eff_median_total eff_err_low_total eff_err_high_total
0 0.007116 0.000083 0.000084 0.006401 0.000086 0.000076 0.006736 0.000051 0.000058
1 0.007926 0.000072 0.000065 0.007473 0.000066 0.000069 0.007684 0.000045 0.000047
2 0.008371 0.000065 0.000067 0.008009 0.000065 0.000063 0.008192 0.000043 0.000047
3 0.008609 0.000071 0.000062 0.008274 0.000068 0.000064 0.008456 0.000045 0.000048
4 0.008735 0.000072 0.000067 0.008426 0.000066 0.000064 0.008601 0.000045 0.000049

In [10]:
# Get simulation thrown areas for each energy bin
thrown_radii = comp.simfunctions.get_sim_thrown_radius(energybins.log_energy_midpoints)
thrown_area = np.max(np.pi * thrown_radii**2)
thrown_area


Out[10]:
9079202.768874502

In [11]:
efficiencies, efficiencies_err = comp.get_detector_efficiencies(config=config,
                                                                num_groups=num_groups,
                                                                sigmoid='slant',
                                                                pyunfold_format=True)

In [12]:
efficiencies


Out[12]:
array([0.00711621, 0.00640114, 0.00792564, 0.00747336, 0.00837118,
       0.00800932, 0.00860879, 0.00827434, 0.00873487, 0.00842553,
       0.00880808, 0.00853552, 0.00885811, 0.00862577, 0.00889926,
       0.00871167, 0.00893319, 0.00879576, 0.00896376, 0.0088786 ,
       0.00899596, 0.00896131, 0.00902511, 0.00904439, 0.00905524,
       0.00912615, 0.00908302, 0.00920706, 0.00911336, 0.00928806,
       0.00914197, 0.00936856, 0.00917137, 0.00944958, 0.009201  ,
       0.00953194, 0.00922881, 0.00961275])

In [25]:
print('Running composition classifications...')
# pipeline_str = 'SGD_comp_{}_{}-groups'.format(config, num_groups)
pipeline_str = 'xgboost_comp_{}_{}-groups'.format(config, num_groups)
comp_pipeline = comp.load_trained_model(pipeline_str)
pred_target = comp_pipeline.predict(df_sim_test[feature_list].values)


Running composition classifications...
/home/jbourbeau/cr-composition/.virtualenv/lib/python2.7/site-packages/sklearn/preprocessing/label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.
  if diff:

In [26]:
df_sim_test['pred_comp_target'] = pred_target

In [27]:
print('Making response matrix...')
log_reco_energy_sim_test = df_sim_test['reco_log_energy']
log_true_energy_sim_test = df_sim_test['MC_log_energy']
true_target = df_sim_test['comp_target_{}'.format(num_groups)].values

response, response_err = comp.response_matrix(true_energy=log_true_energy_sim_test,
                                              reco_energy=log_reco_energy_sim_test,
                                              true_target=true_target,
                                              pred_target=pred_target,
                                              efficiencies=efficiencies,
                                              efficiencies_err=efficiencies_err,
                                              energy_bins=energybins.log_energy_bins)


Making response matrix...

In [28]:
np.testing.assert_allclose(response.sum(axis=0), efficiencies)

In [29]:
plt.imshow(response, origin='lower')
plt.colorbar()
plt.show()



In [30]:
# print('Loading data into memory...')
# df_data = comp.load_data(config=config,
#                          energy_reco=True,
#                          log_energy_min=6.1,
#                          log_energy_max=8.0,
#                          columns=feature_list + ['lap_ra', 'lap_dec'],
#                          n_jobs=20,
#                          verbose=True)

In [31]:
# df_data.to_hdf('data_dataframe.hdf', 'dataframe', format='table')

In [32]:
df_data = pd.read_hdf('data_dataframe.hdf', 'dataframe', mode='r')

In [33]:
df_data.shape


Out[33]:
(6960257, 7)

In [34]:
energybins.log_energy_bins


Out[34]:
array([6.1, 6.2, 6.3, 6.4, 6.5, 6.6, 6.7, 6.8, 6.9, 7. , 7.1, 7.2, 7.3,
       7.4, 7.5, 7.6, 7.7, 7.8, 7.9, 8. ])

In [35]:
df_data.reco_log_energy.min(), df_data.reco_log_energy.max()


Out[35]:
(6.100000018173085, 7.9997100892394135)

In [36]:
print('Running energy reconstruction...')
for df in [df_data]:
    # Energy reconstruction
    df['reco_log_energy'] = energy_pipeline.predict(df[feature_list].values)
    df['reco_energy'] = 10**df['reco_log_energy']


Running energy reconstruction...

In [37]:
df_data.reco_log_energy.min(), df_data.reco_log_energy.max()


Out[37]:
(6.100000018173085, 7.9997100892394135)

In [38]:
fig, ax = plt.subplots()
df_data.reco_log_energy.plot(kind='hist', bins=50, ax=ax)
ax.set(yscale='log')
plt.show()


Data declination distribution


In [39]:
ra = df_data.loc[:, 'lap_ra'].values
dec = df_data.loc[:, 'lap_dec'].values

In [40]:
dec_median = df_data.loc[:, 'lap_dec'].median()
dec_median_deg = np.rad2deg(dec_median)

In [29]:
fig, ax = plt.subplots()
ax.hist(np.rad2deg(dec), bins=50, alpha=0.8)
print('dec_median = {} [rad]'.format(dec_median))
print('dec_median = {} [deg]'.format(dec_median_deg))
ax.axvline(dec_median_deg, marker='None', ls='--', lw=1, color='k',
           label='Median ({:0.1f}'.format(dec_median_deg)+' $^{\circ}$)')
# ax.set_yscale('log', nonposy='clip')
ax.set_xlabel('Declination [$^{\circ}$]')
ax.set_ylabel('Counts')
ax.grid()
ax.legend()
outfile = os.path.join(comp.paths.figures_dir, 'skymaps', 'declination_hist.png')
comp.check_output_dir(outfile)
plt.savefig(outfile)
plt.show()


dec_median = -1.30333228859 [rad]
dec_median = -74.6754394391 [deg]

Flux vs. declination


In [41]:
import dask.array as da

X_data = da.from_array(df_data[feature_list].values, chunks=int(1e4))
X_data


Out[41]:
dask.array<array, shape=(6960257, 3), dtype=float64, chunksize=(10000, 3)>

In [42]:
from dask_ml.wrappers import ParallelPostFit
from dask.diagnostics import ProgressBar

pred_comp_target = ParallelPostFit(comp_pipeline).predict(X_data)
reco_log_energy = ParallelPostFit(energy_pipeline).predict(X_data)

In [43]:
import warnings

with ProgressBar() as _, warnings.catch_warnings() as _:
    warnings.simplefilter("ignore") # Wan to ignore xgboost DeprecationWarning
    print('Running composition classifications...')
    df_data['pred_comp_target'] = pred_comp_target.compute(schuduler='threads', 
                                                           num_workers=20)
#     df_data['pred_comp_target'] = pred_comp_target.compute()


Running composition classifications...
[########################################] | 100% Completed |  5.6s
[########################################] | 100% Completed |  0.1s

In [44]:
# print('Running energy and composition reconstructions...')
# df_data['pred_comp_target'] = comp_pipeline.predict(df_data[feature_list].values)
# df_data['reco_log_energy'] = energy_pipeline.predict(df_data[feature_list].values)

In [45]:
# Effective area
eff_area, eff_area_err = {}, {}
for composition in comp_list+['total']:
    eff_area[composition] = df_eff['eff_median_{}'.format(composition)].values * thrown_area
    eff_area_err[composition] = df_eff['eff_err_low_{}'.format(composition)].values * thrown_area
    
# Livetime
livetime, livetime_err = comp.get_detector_livetime(config=config)

In [46]:
df_eff['eff_median_total'].values


Out[46]:
array([0.0067357 , 0.00768392, 0.00819213, 0.00845571, 0.00860106,
       0.00869518, 0.00876493, 0.00882504, 0.00888223, 0.00893701,
       0.00899151, 0.00904821, 0.00910125, 0.00915483, 0.00920796,
       0.00926119, 0.00931638, 0.00937015, 0.00942275])

In [47]:
np.rad2deg(np.arccos(df_data.lap_cos_zenith.max())), np.rad2deg(np.arccos(df_data.lap_cos_zenith.min()))


Out[47]:
(0.0048982721856552, 37.65427444520522)

In [48]:
def calc_solid_angle(theta_min=0, theta_max=np.pi/2):
    return 2*np.pi*(np.cos(theta_min) - np.cos(theta_max))

def calc_geom_factor(theta_min=0, theta_max=np.pi/2):
    return (np.cos(theta_min) + np.cos(theta_max)) / 2

In [49]:
efficiencies


Out[49]:
array([0.00711621, 0.00640114, 0.00792564, 0.00747336, 0.00837118,
       0.00800932, 0.00860879, 0.00827434, 0.00873487, 0.00842553,
       0.00880808, 0.00853552, 0.00885811, 0.00862577, 0.00889926,
       0.00871167, 0.00893319, 0.00879576, 0.00896376, 0.0088786 ,
       0.00899596, 0.00896131, 0.00902511, 0.00904439, 0.00905524,
       0.00912615, 0.00908302, 0.00920706, 0.00911336, 0.00928806,
       0.00914197, 0.00936856, 0.00917137, 0.00944958, 0.009201  ,
       0.00953194, 0.00922881, 0.00961275])

In [50]:
np.pi*np.sin(np.deg2rad(40))**2


Out[50]:
1.2980304071610544

Cosmic-ray flux vs. sky position

Sample on/off regions


In [51]:
import sky_anisotropy as sa
from scipy import stats
from scipy.special import erfcinv

In [52]:
nside = 64
npix = hp.nside2npix(nside)
size = np.deg2rad(5)

In [53]:
ra = df_data.loc[:, 'lap_ra'].values
dec = df_data.loc[:, 'lap_dec'].values

In [54]:
# maps = []
# for i in range(20):
#     print(i)
#     ra_random = df_data.loc[:, 'lap_ra'].sample(frac=1.0, random_state=i).values
#     theta_random, phi_random = comp.equatorial_to_healpy(ra_random, dec)
#     pix_random = hp.ang2pix(nside, theta_random, phi_random)
    
#     data_skymap_random = np.zeros(npix)
#     unique_pix, pix_counts = np.unique(pix_random, return_counts=True)
#     data_skymap_random[unique_pix] += pix_counts
#     maps.append(data_skymap_random)

In [55]:
# ref = np.mean(maps, axis=0)
# ref

In [56]:
# comp.plot_skymap(ref, polar=True)
# plt.show()

Convert from ra/dec equatorial coordinates to the theta/phi coordinates used in healpy


In [57]:
theta, phi = comp.equatorial_to_healpy(ra, dec)

Calculate the healpix map pixel for each data event


In [58]:
pix_array = hp.ang2pix(nside, theta, phi)
df_data['pix'] = pix_array

In [59]:
data_skymap = np.zeros(npix)
unique_pix, pix_counts = np.unique(pix_array, return_counts=True)
data_skymap[unique_pix] += pix_counts

In [60]:
# ri = (data_skymap - ref) / ref
# ri = comp.mapfunctions.smooth_map(ri)

# theta, phi = hp.pix2ang(nside, list(range(npix)))
# ra, dec = sa.healpy_to_equatorial(theta, phi)

# dec_max_deg = -65
# dec_mask = dec < np.deg2rad(dec_max_deg)
# ri[~dec_mask] = hp.UNSEEN

# comp.plot_skymap(ri, polar=True)
# plt.show()

In [61]:
data_skymap[data_skymap == 0] = hp.UNSEEN
comp.plot_skymap(data_skymap, cbar_title='Counts', polar=True)
# plt.title('IC86.2012 data')
outfile = os.path.join(comp.paths.figures_dir,
                       'spectrum-anisotropy',
                       '{}-data-skymap-polar.png'.format(config))
comp.check_output_dir(outfile)
plt.savefig(outfile)
plt.show()


/home/jbourbeau/cr-composition/.virtualenv/lib/python2.7/site-packages/healpy/projector.py:750: RuntimeWarning: invalid value encountered in greater
  mask = (np.asarray(x)**2+np.asarray(y)**2>1.0)
/home/jbourbeau/cr-composition/.virtualenv/lib/python2.7/site-packages/healpy/projaxes.py:387: RuntimeWarning: invalid value encountered in greater
  w=np.where(d2 > threshold)[0]

In [62]:
# ra_test = np.deg2rad(34)
# dec_test = np.deg2rad(-70)

ra_test = np.deg2rad(34)
dec_test = np.deg2rad(-80)

theta_test, phi_test = sa.equatorial_to_healpy(ra_test, dec_test)
pix_test = hp.ang2pix(nside, theta_test, phi_test)

# pix_in_disc = sa.regions.disc_on_region(pix_test, size=size, nside=nside)
pix_in_disc = sa.regions.square_on_region(pix_test, size=size, nside=nside)

In [63]:
p = np.arange(npix)

In [64]:
on_region_skymap = data_skymap.copy()
in_on_region = np.isin(p, pix_in_disc)
on_region_skymap[~in_on_region] = hp.UNSEEN

comp.plot_skymap(on_region_skymap, polar=True)
outfile = os.path.join(comp.paths.figures_dir,
                       'spectrum-anisotropy',
                       '{}-on-region-skymap-polar.png'.format(config))
comp.check_output_dir(outfile)
plt.savefig(outfile)
plt.show()



In [65]:
off_region_skymap = data_skymap.copy()
off_region_pix = sa.regions.theta_band_off_region(pix_in_disc, nside=64)
in_off_region = np.isin(p, off_region_pix)
off_region_skymap[~in_off_region] = hp.UNSEEN
off_region_skymap[off_region_skymap == 0] = hp.UNSEEN

comp.plot_skymap(off_region_skymap, polar=True)

outfile = os.path.join(comp.paths.figures_dir,
                       'spectrum-anisotropy',
                       '{}-off-region-skymap-polar.png'.format(config))
comp.check_output_dir(outfile)
plt.savefig(outfile)
plt.show()


Spectrum anisotropy skymap


In [66]:
theta, phi = hp.pix2ang(nside, list(range(npix)))
ra, dec = sa.healpy_to_equatorial(theta, phi)

# dec_max_deg = -60
dec_max_deg = -65
# dec_max_deg = -80
# dec_max_deg = -84
print('dec_max_deg = {}'.format(dec_max_deg))

size = np.deg2rad(5)
print('size = {:0.2f} [deg]'.format(np.rad2deg(size)))

ebins = np.arange(6.4, 7.9, 0.1)
# ebins = np.arange(6.4, 7.5, 0.1)
# ebins = np.concatenate([ebins, [7.8]])

on_region = 'disc'
# on_region = 'square'
print('on_region = {}'.format(on_region))

# off_region = 'allsky'
off_region = 'theta_band'
# off_region = 'opposite'
print('off_region = {}'.format(off_region))

# key = 'log_s125'
key = 'reco_log_energy'
print('key = {}'.format(key))

# with_unfolding = True
with_unfolding = False

has_data = dec < np.deg2rad(dec_max_deg)
if off_region == 'theta_band':
    has_data = has_data & (dec > np.deg2rad(-90) + size)
    
has_data.sum()


dec_max_deg = -65
size = 5.00 [deg]
on_region = disc
off_region = theta_band
key = reco_log_energy
Out[66]:
2160

In [67]:
figures_outdir = os.path.join(comp.paths.figures_dir,
                              'spectrum-anisotropy',
                              '{}-on-{}-off-{:0.0f}deg-size'.format(on_region, off_region, np.rad2deg(size)),
                              'unfolded' if with_unfolding else 'pre-unfolding',
                             )
print('figures_outdir = {}'.format(figures_outdir))


figures_outdir = /home/jbourbeau/public_html/figures/composition/spectrum-anisotropy/disc-on-theta_band-off-5deg-size/pre-unfolding

In [68]:
pix_disc = np.arange(npix)[has_data]
pix_disc.shape


Out[68]:
(2160,)

In [69]:
data = df_data.loc[:, ['reco_log_energy', 'pred_comp_target']].values
pix = df_data.loc[:, 'pix'].values
# pix = df_data.loc[:, 'pix'].sample(frac=1.0).values

binned_skymaps = sa.binned_skymaps(data=data,
                                   pix=pix,
                                   bins=analysis_bins,
                                   nside=nside)

In [70]:
binned_skymaps


Out[70]:
array([[[   0,    0,    0, ..., 1085,  999, 1039],
        [   0,    0,    0, ...,  652,  612,  562]],

       [[   0,    0,    0, ...,  449,  519,  489],
        [   0,    0,    0, ...,  730,  680,  768]],

       [[   0,    0,    0, ...,  259,  267,  267],
        [   0,    0,    0, ...,  590,  579,  569]],

       ...,

       [[   0,    0,    0, ...,    1,    0,    0],
        [   0,    0,    0, ...,    0,    1,    1]],

       [[   0,    0,    0, ...,    0,    0,    0],
        [   0,    0,    0, ...,    1,    0,    1]],

       [[   0,    0,    0, ...,    0,    0,    0],
        [   0,    0,    0, ...,    1,    0,    0]]])

In [71]:
if with_unfolding:
    def unfolding_func(counts, composition='total'):
        original_shape = counts.shape
        counts_err = np.sqrt(counts)
        unfolded_results = pyunfold.iterative_unfold(data=counts.reshape(-1),
                                                     data_err=counts_err.reshape(-1),
                                                     response=response,
                                                     response_err=response_err,
                                                     efficiencies=efficiencies.reshape(-1),
                                                     efficiencies_err=efficiencies_err.reshape(-1),
                                                     ts='ks',
                                                     ts_stopping=0.005,
                                                     return_iterations=True,
                                                    )
        counts, counts_sys_err, counts_stat_err = comp.unfolded_counts_dist(unfolded_results,
                                                                            num_groups=num_groups)

        eff = df_eff['eff_median_{}'.format(composition)].values
        counts = counts[composition] * eff
        counts_err = counts_stat_err[composition] * eff

        unfolding_energy_range_mask = np.logical_and(energybins.log_energy_midpoints >= 6.4,
                                                     energybins.log_energy_midpoints <= 7.8)

        return counts[unfolding_energy_range_mask], counts_err[unfolding_energy_range_mask]
else:
    def unfolding_func(counts, composition='total'):
        original_shape = counts.shape
        counts_err = np.sqrt(counts)

        counts_total = counts.sum(axis=1)
        counts_err_total = np.sqrt(np.sum(counts_err**2, axis=1))

        unfolding_energy_range_mask = np.logical_and(energybins.log_energy_midpoints >= 6.4,
                                                     energybins.log_energy_midpoints <= 7.8)


        return counts_total[unfolding_energy_range_mask], counts_err_total[unfolding_energy_range_mask]

In [72]:
from sky_anisotropy.core import on_off_chi_squared_single

counts_on_test, counts_on_err_test, counts_off_test, counts_off_err_test = sa.on_off_distributions(
                          binned_maps=binned_skymaps,
                          pix_center=pix_test,
                          on_region=on_region,
                          size=size,
                          off_region=off_region,
                          nside=nside,
                          hist_func=unfolding_func)

In [73]:
unfolded_counts_on_test = counts_on_test
unfolded_counts_on_err_test = counts_on_err_test
unfolded_counts_off_test = counts_off_test
unfolded_counts_off_err_test = counts_off_err_test

alpha = np.sum(unfolded_counts_on_test) / np.sum(unfolded_counts_off_test)
scaled_unfolded_counts_off_test = alpha * unfolded_counts_off_test
scaled_unfolded_counts_off_err_test = alpha * unfolded_counts_off_err_test

x = comp.binning.bin_edges_to_midpoints(np.arange(6.4, 7.9, 0.1))

fig, ax = plt.subplots()
ax.errorbar(x,
            unfolded_counts_on_test,
            unfolded_counts_on_err_test,
            label='On',
            color='C0')
ax.errorbar(x,
            scaled_unfolded_counts_off_test,
            scaled_unfolded_counts_off_err_test,
            label='Off',
            color='C1')

ax.set(ylabel='Counts',
       xlabel='$\mathrm{\log_{10}(E/GeV)}$',
       yscale='log')
ax.grid()
ax.legend()

outfile = os.path.join(figures_outdir,
                       'test-unfolded-counts.png')
comp.check_output_dir(outfile)
plt.savefig(outfile)

plt.show()



In [74]:
num_workers = min(len(pix_disc), 25)
scheduler = 'multiprocessing' if with_unfolding else 'threads'

with dask.config.set(scheduler=scheduler, num_workers=num_workers) as _, ProgressBar() as _:   
    results = sa.on_off_chi_squared(binned_maps=binned_skymaps,
                                    pix_center=pix_disc,
                                    on_region=on_region,
                                    size=size,
                                    off_region=off_region,
                                    nside=nside,
                                    hist_func=unfolding_func,
                                    )


[########################################] | 100% Completed |  4.7s
[########################################] | 100% Completed |  0.1s

In [75]:
results.shape


Out[75]:
(2160, 7)

In [76]:
results.head()


Out[76]:
alpha chi2 ndof num_on pix_center pval sig
0 0.058221 11.539888 14 35179 46908 0.643200 -0.367025
1 0.059241 11.417012 14 35761 46909 0.653010 -0.393461
2 0.060353 11.834221 14 36394 46910 0.619614 -0.304468
3 0.059453 16.691543 14 35882 46911 0.272989 0.603799
4 0.058536 15.874409 14 35359 46912 0.321104 0.464614

In [77]:
outdir = os.path.join(os.getcwd(),
                      'results',
                      'unfolded' if with_unfolding else 'pre-unfolding')
print('outdir = {}'.format(outdir))


outdir = /home/jbourbeau/cr-composition/spectrum-anisotropy/results/pre-unfolding

In [78]:
output_hdf = os.path.join(outdir,
                          'chi2-results-{}-on-{}-off-{:0.0f}deg-size_on-region-errors.hdf'.format(
                                                                                 on_region,
                                                                                 off_region,
                                                                                 np.rad2deg(size))
                         )
print('output_hdf = {}'.format(output_hdf))
comp.check_output_dir(output_hdf)
results.to_hdf(output_hdf, 'dataframe', format='table')


output_hdf = /home/jbourbeau/cr-composition/spectrum-anisotropy/results/pre-unfolding/chi2-results-disc-on-theta_band-off-5deg-size_on-region-errors.hdf

In [264]:
indir = os.path.join(os.getcwd(),
                     'results',
                     'unfolded' if with_unfolding else 'pre-unfolding')
input_hdf = os.path.join(indir,
                         'chi2-results-{}-on-{}-off-{:0.0f}deg-size_on-region-errors.hdf'.format(
                                                                                 on_region,
                                                                                 off_region,
                                                                                 np.rad2deg(size))
                        )
print('input_hdf = {}'.format(input_hdf))
results = pd.read_hdf(input_hdf)


input_hdf = /home/jbourbeau/cr-composition/spectrum-anisotropy/results/pre-unfolding/chi2-results-square-on-theta_band-off-5deg-size_on-region-errors.hdf

In [221]:
fig, ax = plt.subplots()
chi2_bins, chi2_bins_size = np.linspace(0, 70, 75, retstep=True)
hist_params = {'bins': chi2_bins,
#                'histtype': 'step',
               'alpha': 0.7,
               }

chi2_hist, _ = np.histogram(results.loc[:, 'chi2'].values, bins=chi2_bins)
chi2_hist_err = np.sqrt(chi2_hist)
chi2_pdf = chi2_hist / (chi2_hist.sum() * chi2_bins_size)
chi2_pdf_err = chi2_hist_err / (chi2_hist.sum() * chi2_bins_size)
comp.plot_steps(chi2_bins, chi2_pdf, chi2_pdf_err, ax=ax)

# Include chi-squared PDF for various degrees of freedom
x = np.linspace(0, 70, 200)
num_bins = len(ebins) - 1
print(num_bins)
dof = 13

chi2_pdf = stats.chi2.pdf(x, dof)
ax.plot(x, chi2_pdf,
        marker='None',
        ls='-',
        color='C2',
        label='{} dof'.format(dof))

ax.set_xlabel('$\\chi^2$')
ax.set_ylabel('Probability density')
# ax.set_title('On-region size: {:0.1f}'.format(np.rad2deg(size)) + '$^{\circ}$')
ax.grid()
ax.legend(title='Degrees of freedom')

outfile = os.path.join(figures_outdir,
                       'chi2-hist.png')
comp.check_output_dir(outfile)
plt.savefig(outfile)

plt.show()


14

Calculate p-value and pre-trial significance


In [222]:
pval = stats.chi2.sf(results['chi2'], dof)
sig = erfcinv(2 * pval) * np.sqrt(2)

In [223]:
fig, ax = plt.subplots()
hist_params = {'bins': 50,
#                'histtype': 'step',
               'alpha': 0.7,
               }
ax.hist(pval, **hist_params)
ax.set(xlabel='p-value',
       ylabel='Counts')
ax.grid()
ax.legend()

outfile = os.path.join(figures_outdir,
                       'pval-hist.png')
comp.check_output_dir(outfile)
plt.savefig(outfile)

plt.show()



In [224]:
from scipy.special import erfcinv

fig, ax = plt.subplots()
sig_bins, sig_bins_size = np.linspace(-6, 6, 50, retstep=True)
sig_bins_midpoints = comp.binning.bin_edges_to_midpoints(sig_bins)
hist_params = {'bins': sig_bins,
#                'histtype': 'step',
               'alpha': 1.0,
               }

theta, phi = hp.pix2ang(nside, results['pix_center'])
ra, dec = sa.healpy_to_equatorial(theta, phi)

sig_hist, _ = np.histogram(sig, bins=sig_bins)
sig_hist_err = np.sqrt(sig_hist)
sig_pdf = sig_hist / (sig_hist.sum() * sig_bins_size)
sig_pdf_err = sig_hist_err / (sig_hist.sum() * sig_bins_size)
comp.plot_steps(sig_bins, sig_pdf, sig_pdf_err, ax=ax)

ax.axvline(np.median(sig),
           marker='None', ls='-.', lw=1,
           label='Median')

counts, _ = np.histogram(sig, bins=sig_bins)
counts = counts / counts.sum()
ax_cdf = ax.twinx()
ax_cdf.plot(sig_bins_midpoints, np.cumsum(counts), marker='None',
            label='Observed')
ax_cdf.set_ylabel('Cumulative distribution')
ax_cdf.set_ylim(bottom=0)

np.random.seed(2)
normal_samples = np.random.normal(size=int(1e7))
ax.axvline(np.median(normal_samples),
           marker='None', ls='-.', lw=1,
           label='Median', color='C1')
ax.hist(normal_samples, bins=sig_bins, density=True,
        histtype='step', color='C1', alpha=1.0, lw=1.0)

counts, _ = np.histogram(normal_samples, bins=sig_bins)
counts = counts / counts.sum()
ax_cdf.plot(sig_bins_midpoints, np.cumsum(counts), marker='None',
            label='Normal \n Gaussian' )


d, ks_pval = stats.ks_2samp(normal_samples, sig)
print(erfcinv(2 * ks_pval) * np.sqrt(2))


ax.set_xlabel('Significance [$\\sigma$]')
ax.set_ylabel('Probability density')
ax.grid()
ax_cdf.legend()

outfile = os.path.join(figures_outdir,
                       'significance-dist.png')
plt.savefig(outfile)

plt.show()


3.5353753954917804

In [225]:
num_on_map = np.full(npix, hp.UNSEEN)
num_on_map[results['pix_center']] = results['num_on']

comp.plot_skymap(num_on_map, cbar_title='$\mathrm{N_{on}}$', polar=True)

outfile = os.path.join(figures_outdir,
                       'num-on-skymap-polar.png')
comp.check_output_dir(outfile)
plt.savefig(outfile)

plt.show()



In [80]:
num_on_map = np.full(npix, hp.UNSEEN)
num_on_map[results['pix_center']] = results['alpha']

comp.plot_skymap(num_on_map, cbar_title='$\mathrm{\\alpha}$', polar=True)

outfile = os.path.join(figures_outdir,
                       'alpha-skymap-polar.png')
comp.check_output_dir(outfile)
plt.savefig(outfile)

plt.show()



In [226]:
chi2_map = np.full(npix, hp.UNSEEN)
chi2_map[results['pix_center']] = results['chi2']

comp.plot_skymap(chi2_map, cbar_title='$\mathrm{\chi^2}$', polar=True)

outfile = os.path.join(figures_outdir,
                       'chi2-skymap-polar.png')
plt.savefig(outfile)

plt.show()



In [158]:
pval_map = np.full(npix, hp.UNSEEN)
pval_map[results['pix_center']] = pval

comp.plot_skymap(pval_map, cbar_title='p-value', polar=True, color_palette='viridis_r')

outfile = os.path.join(figures_outdir,
                       'pval-skymap-polar.png')
plt.savefig(outfile)

plt.show()



In [159]:
sig_map = np.full(npix, hp.UNSEEN)
sig_map[results['pix_center']] = sig

comp.plot_skymap(sig_map,
                 color_palette='RdBu_r',
                 cbar_min=-5,
                 cbar_max=5,
                 color_bins=10,
                 cbar_title='Significance [$\sigma$]',
                 polar=True)

pix_max_sig = results.loc[sig.argmax(), 'pix_center']
theta_max, phi_max = hp.pix2ang(nside=nside, ipix=pix_max_sig)

hp.projscatter(theta_max, phi_max, marker='x', s=100, c='k')

outfile = os.path.join(figures_outdir,
                       'sig-skymap-polar.png')
plt.savefig(outfile)

plt.show()



In [160]:
sig.min(), sig.max()


Out[160]:
(-3.360747906068586, 3.8669994824949883)

In [ ]:


In [161]:
def get_significance_map(data_map, ref_map, alpha=1/20.):

    with np.errstate(invalid='ignore', divide='ignore'):
        n_on  = data_map
        n_off = ref_map / alpha

        sign = np.sign(data_map - ref_map)
        sig_map = sign * np.sqrt(2*(n_on*np.log(((1+alpha)*n_on) / (alpha*(n_on+n_off)))
            + n_off * np.log(((1+alpha)*n_off) / (n_on+n_off))))

    return sig_map

In [162]:
theta_max, phi_max = hp.pix2ang(nside=nside, ipix=pix_max_sig)

counts_on_max, counts_on_err_max, counts_off_max, counts_off_err_max = sa.on_off_distributions(
                                binned_maps=binned_skymaps,
                                pix_center=pix_max_sig,
                                on_region=on_region,
                                size=size,
                                off_region=off_region,
                                nside=nside)

In [163]:
ebins_midpoints = (ebins[:-1] + ebins[1:]) / 2
ebins_midpoints


Out[163]:
array([6.45, 6.55, 6.65, 6.75, 6.85, 6.95, 7.05, 7.15, 7.25, 7.35, 7.45,
       7.55, 7.65, 7.75])

In [164]:
ebins


Out[164]:
array([6.4, 6.5, 6.6, 6.7, 6.8, 6.9, 7. , 7.1, 7.2, 7.3, 7.4, 7.5, 7.6,
       7.7, 7.8])

In [165]:
counts_on_max


Out[165]:
array([[61224, 48534],
       [45374, 36713],
       [30778, 26594],
       [22317, 17217],
       [15201, 10684],
       [10053,  6595],
       [ 5514,  4806],
       [ 3491,  2857],
       [ 2025,  1651],
       [ 1047,  1223],
       [  775,   675],
       [  381,   470],
       [  210,   368],
       [  115,   216],
       [   85,   176],
       [   40,   101],
       [   18,    40],
       [   12,    34],
       [   11,    29]])

In [166]:
counts_on_max_, _ = unfolding_func(counts_on_max)
counts_off_max_, _ = unfolding_func(counts_off_max)

alpha = np.sum(counts_on_max_) / np.sum(counts_off_max_)
scaled_counts_off_max = alpha * counts_off_max_

fig, ax = plt.subplots()
comp.plot_steps(ebins, counts_on_max_, np.sqrt(counts_on_max_),
                label='On', color='C0', ax=ax)
comp.plot_steps(ebins, scaled_counts_off_max, np.sqrt(scaled_counts_off_max),
                label='Off', color='C1', ax=ax)

ax.set_ylabel('Normalzied counts')
ax.set_xlabel('$\mathrm{\log_{10}(E/GeV)}$')
ax.set_yscale('log', nonposy='clip')
ax.grid()
ax.legend()

outfile = os.path.join(figures_outdir,
                       'max-sig-counts-hist.png')
plt.savefig(outfile)

plt.show()



In [89]:
get_significance_map(counts_on_max_, counts_off_max_, alpha=1/alpha)


Out[89]:
array([-263.79192683, -215.05936525, -172.07582901, -136.75256494,
       -106.77941393,  -84.17717326,  -64.66333846,  -49.46913664,
        -39.51709654,  -31.54564139,  -25.06351525,  -19.17561385,
        -16.18627201,  -14.4038147 ])

In [ ]:


In [127]:
for r in [3.0, 4.0, 5.0]:
    input_hdf = os.path.join(os.getcwd(), 'data', 'chi2-results-radius-{:0.1f}-degree.hdf'.format(r))
    results = pd.read_hdf(input_hdf)

    sig_map = np.full(npix, hp.UNSEEN)
    sig_map[results['pix_disc']] = results['sig']

    theta, phi = hp.pix2ang(nside, range(npix))
    ra, dec = healpy_to_equatorial(theta, phi)
    sig_map[dec < np.deg2rad(-90) + np.deg2rad(r)] = hp.UNSEEN

    comp.plot_skymap(sig_map,
                     color_palette='RdBu_r',
                     cbar_min=-5, cbar_max=5,
                     cbar_title='Significance [$\sigma$]',
                     polar=True)

    outfile = os.path.join(comp.paths.figures_dir,
                           'skymaps',
                           'radius-{}-degrees'.format(r),
                           '{}-sig-skymap-polar.png'.format(config))
    plt.savefig(outfile)

    plt.show()



NameErrorTraceback (most recent call last)
<ipython-input-127-42cc16658bca> in <module>()
      7 
      8     theta, phi = hp.pix2ang(nside, range(npix))
----> 9     ra, dec = healpy_to_equatorial(theta, phi)
     10     sig_map[dec < np.deg2rad(-90) + np.deg2rad(r)] = hp.UNSEEN
     11 

NameError: name 'healpy_to_equatorial' is not defined

In [245]:
sig_map.max()


Out[245]:
5.005907156818285

In [246]:
alpha_map = np.full(npix, hp.UNSEEN)
alpha_map[results['pix_disc']] = results['alpha']

theta, phi = hp.pix2ang(nside, range(npix))
ra, dec = healpy_to_equatorial(theta, phi)
alpha_map[dec < np.deg2rad(-90) + radius] = hp.UNSEEN

comp.plot_skymap(alpha_map, cbar_title='$\mathrm{\\alpha}$', polar=True)

outfile = os.path.join(comp.paths.figures_dir,
                       'skymaps',
                       'radius-{}-degrees'.format(np.rad2deg(radius)),
                       '{}-alpha-skymap-polar.png'.format(config))
plt.savefig(outfile)

plt.show()



In [ ]:


In [ ]:


In [87]:
energy_mask = df_data.loc[:, 'reco_log_energy'] > 7.9
energy = df_data.loc[energy_mask, 'reco_log_energy']
ra = df_data.loc[energy_mask, 'lap_ra']
dec = df_data.loc[energy_mask, 'lap_dec']

theta, phi = equatorial_to_healpy(ra, dec)

In [103]:
comp.plot_skymap(np.full(npix, hp.UNSEEN), polar=True)
fig = plt.gcf()
ax = plt.gca()

image = hp.projscatter(theta, phi, c=energy, alpha=0.8)
cbar = fig.colorbar(image,
                    orientation='horizontal',
                    aspect=50,
                    pad=0.01,
                    fraction=0.1,
                    ax=ax,
                    format=FormatStrFormatter('%g'),
                    shrink=1.0)
cbar.set_label('$\mathrm{\log_{10}(E/GeV)}$', size=14)
plt.show()



In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]: