In [1]:
%matplotlib inline
from __future__ import division, print_function
from collections import defaultdict
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter
from matplotlib.colors import ListedColormap
import matplotlib.gridspec as gridspec
import seaborn.apionly as sns
import healpy as hp
from sklearn.model_selection import KFold
import dask
from dask import delayed, multiprocessing, compute
from dask.diagnostics import ProgressBar
import pyprind
from scipy.stats import chi2
from scipy.special import erfcinv
from icecube import astro
import comptools as comp
import comptools.analysis.plotting as plotting
import comptools.anisotropy.anisotropy as anisotropy
color_dict = comp.analysis.get_color_dict()
In [2]:
config = ['IC86.2011', 'IC86.2012', 'IC86.2013', 'IC86.2014', 'IC86.2015']
years_str = '2011-2015'
composition='all'
n_side = 64
scale = 3
smooth = 0.0
n_bins = 36
# decmax = -75
# decmax = -60
decmax = -55
decmin = -90
low_energy = True
In [13]:
def get_proj_nbins_df(bins, data=None, ref=None, composition='all'):
dipole_dict = defaultdict(list)
for n_bins in bins:
dipole_dict['n_bins'].append(n_bins)
kwargs_relint_radius = {'config': config, 'low_energy': low_energy, 'smooth': smooth,
'scale': None, 'decmax': decmax, 'decmin': decmin}
if data is None:
data = anisotropy.get_map(name='data', composition=composition, **kwargs_relint_radius)
if ref is None:
ref = anisotropy.get_map(name='ref', composition=composition, **kwargs_relint_radius)
# relint = anisotropy.get_map(name='relint', composition=composition, **kwargs_relint_radius)
# relint_err = anisotropy.get_map(name='relerr', composition=composition, **kwargs_relint_radius)
# ri, ri_err, ra, ra_err = anisotropy.get_proj_relint(relint, relint_err, n_bins=n_bins,
# decmin=decmin, decmax=decmax)
ri, ri_err, ra, ra_err = anisotropy.get_binned_relint(data, ref, n_bins=n_bins,
decmin=decmin, decmax=decmax)
n_dof = ri.shape[0]
chi2_all = np.sum(ri**2 / ri_err**2)
pval = chi2.sf(chi2_all, n_dof, loc=0, scale=1)
sig = erfcinv(2*pval)*np.sqrt(2)
dipole_dict['ri'].append(ri)
dipole_dict['ri_err'].append(ri_err)
dipole_dict['ra'].append(ra)
dipole_dict['pval'].append(pval)
dipole_dict['sig'].append(sig)
return pd.DataFrame.from_records(dipole_dict, index='n_bins')
In [14]:
# proj_light_df = get_proj_nbins_df(bins, composition='light')
# proj_heavy_df = get_proj_nbins_df(bins, composition='heavy')
In [15]:
bins = np.arange(1, 72+1, 1, dtype=int)
proj_all_df = get_proj_nbins_df(bins, composition='all')
proj_light_df = get_proj_nbins_df(bins, composition='light')
proj_heavy_df = get_proj_nbins_df(bins, composition='heavy')
In [16]:
for proj_df, composition in zip([proj_all_df, proj_light_df, proj_heavy_df], ['total', 'light', 'heavy']):
fig, axarr = plt.subplots(3, 3, figsize=(10, 6), sharex=True, sharey=False)
# for n_bins, ax in zip(proj_df.index[::10], axarr.flatten()):
for n_bins, ax in zip([1, 4, 6, 10, 20, 24, 36, 60, 72], axarr.flatten()):
proj_nbins = proj_df.loc[n_bins]
ra_bins = np.linspace(0, 360, n_bins + 1)
plotting.plot_steps(ra_bins, proj_nbins['ri'], yerr=proj_nbins['ri_err'],
color=color_dict[composition], label=composition, fillalpha=0.2,
ax=ax)
# label='{}$\\sigma$'.format(proj_nbins['sig']), ax=ax)
ax.axhline(0, marker='None', ls='-.', c='k')
ax.set_title(str(n_bins)+' RA bins')
# ax.set_ylabel('$\mathrm{\langle RI \\rangle }$')
# ax.set_xlabel('RA [ $^{\circ}$]')
ax.grid()
# ax.set_ylim(-4.0e-3, 4.0e-3)
ax.set_xlim(0, 360)
ax.invert_xaxis()
ax.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
fig.text(0.5, -0.025, 'RA [ $^{\circ}$]', ha='center', fontsize=16)
fig.text(-0.025, 0.5, '$\mathrm{\langle RI \\rangle }$', va='center', rotation='vertical', fontsize=16)
plt.tight_layout()
proj_vs_nbins_outfile = os.path.join(comp.paths.figures_dir,
'anisotropy', 'proj_vs_nbins_{}.png'.format(composition))
comp.check_output_dir(proj_vs_nbins_outfile)
plt.savefig(proj_vs_nbins_outfile)
plt.show()
In [17]:
fig, ax = plt.subplots()
ax.plot(proj_all_df.index, proj_all_df['sig'], ls='None', label='Significance', color='C2')
ax.axhline(0, marker='None', ls='-.', color='k', lw=1)
rolling_mean = proj_all_df['sig'].rolling(window=10,center=True).mean()
ax.plot(rolling_mean.index, rolling_mean, marker='None', ls='-', color='C2', label='Rolling mean\n(+/- 5 bins window)')
ax.fill_between(rolling_mean.index, rolling_mean+1, rolling_mean-1, color='C2', alpha=0.2)
ax.set_xlabel('Number RA bins')
ax.set_ylabel('Anisotropy significance [$\\sigma$]')
ax.set_ylim(0)
ax.set_xlim(0)
ax.grid()
ax.legend()
sig_vs_nbins_outfile = os.path.join(comp.paths.figures_dir,
'anisotropy', 'sig_vs_nbins_all.png')
comp.check_output_dir(sig_vs_nbins_outfile)
plt.savefig(sig_vs_nbins_outfile)
plt.show()
In [18]:
fig, ax = plt.subplots()
for proj_df, composition in zip([proj_light_df, proj_heavy_df], ['light', 'heavy']):
ax.plot(proj_df.index, proj_df['sig'], ls='None', label=composition, color=color_dict[composition])
# ax.axhline(0, marker='None', ls='-.', color='k', lw=1)
rolling_mean = proj_df['sig'].rolling(window=10,center=True).mean()
ax.plot(rolling_mean.index, rolling_mean, marker='None', ls='-', color=color_dict[composition], label='')
ax.fill_between(rolling_mean.index, rolling_mean+1, rolling_mean-1, color=color_dict[composition], alpha=0.2, label='')
ax.set_xlabel('Number RA bins')
ax.set_ylabel('Anisotropy significance [$\\sigma$]')
ax.set_ylim(0)
ax.set_xlim(0)
ax.grid()
ax.legend()
sig_vs_nbins_outfile = os.path.join(comp.paths.figures_dir,
'anisotropy', 'sig_vs_nbins_comp.png')
comp.check_output_dir(sig_vs_nbins_outfile)
plt.savefig(sig_vs_nbins_outfile)
plt.show()
The heavy projected relative intensities (for large number of RA bins) looks like fluxuations, but is still ~4-sigma away from the null hypothesis. That's weird.
Scramble data in right acension to see if this feature goes away...
In [9]:
kwargs_data = {'config': config, 'low_energy': low_energy, 'smooth': smooth,
'scale': None, 'decmax': decmax}
data_heavy = anisotropy.get_map(name='data', composition='heavy', **kwargs_data)
ref_heavy = anisotropy.get_map(name='ref', composition='heavy', **kwargs_data)
In [10]:
data_heavy
Out[10]:
In [11]:
# Bin in declination
theta, phi = hp.pix2ang(n_side, range(len(data_heavy)))
thetamax = np.deg2rad(90 - decmin)
thetamin = np.deg2rad(90 - decmax)
# dec_mask = (theta <= thetamax) & (theta >= thetamin)
n_dec_bins = 30
dec_bins= np.linspace(thetamin, thetamax, n_dec_bins+1, dtype=float)
theta_bin_num = np.digitize(theta, dec_bins) - 1
In [12]:
theta_bin_num
Out[12]:
In [20]:
data_heavy_RAscrambled = data_heavy.copy()
for idx in range(n_dec_bins):
theta_bin_mask = (theta_bin_num == idx)
unseen_mask = data_heavy == hp.UNSEEN
combined_mask = theta_bin_mask & ~unseen_mask
data_in_dec_bin = data_heavy.copy()
data_in_dec_bin = data_in_dec_bin[combined_mask]
data_series = pd.Series(data_in_dec_bin)
print(idx)
shuffled_data = data_series.sample(frac=1.0, random_state=2).values
data_heavy_RAscrambled[combined_mask] = shuffled_data
# np.random.shuffle(data_in_dec_bin)
# data_heavy_RAscrambled[combined_mask] = data_in_dec_bin
In [13]:
def get_noisy_proj_sig(composition, random_state):
# Set random state for trials
np.random.seed(random_state)
kwargs_data = {'config': config, 'low_energy': low_energy, 'smooth': smooth,
'scale': None, 'decmax': decmax}
ref = anisotropy.get_map(name='ref', composition=composition, **kwargs_data)
unseen_mask = ref == hp.UNSEEN
ref_poisson_noise = ref.copy()
ref_poisson_noise[~unseen_mask] = np.random.poisson(ref_poisson_noise[~unseen_mask])
proj_df = get_proj_nbins_df(bins, data=ref_poisson_noise, ref=ref)
return proj_df['sig']
In [14]:
n_noise_trials = 1000
sig_ref_noise = [delayed(get_noisy_proj_sig)('all', random_state) for random_state in range(n_noise_trials)]
sig_ref_noise = delayed(pd.concat)(sig_ref_noise)
# sig_ref_noise = sig_ref_noise.divide(n_noise_trials)
In [15]:
with ProgressBar():
# sig_ref_noise = sig_ref_noise.compute(get=dask.get)
sig_ref_noise = sig_ref_noise.compute(get=multiprocessing.get, num_works=25)
In [16]:
grouped_nbins = sig_ref_noise.groupby(sig_ref_noise.index)
In [17]:
def gaussian(x, mu=0, sigma=1):
return np.exp(-(x - mu)**2/(2*sigma**2))/np.sqrt(2*np.pi*sigma**2)
In [18]:
sig_bins, sig_step = np.linspace(-5, 5, 50, retstep=True)
sig_midpoints = (sig_bins[1:] + sig_bins[:-1]) / 2
fig, axarr = plt.subplots(3, 3, figsize=(10, 6), sharex=True, sharey=True)
for n_bins, ax in zip([1, 4, 6, 10, 20, 24, 36, 60, 72], axarr.flatten()):
df_noise_nbins = grouped_nbins.get_group(n_bins)
label_mean = '$\mu = {:0.2f}$'.format(df_noise_nbins.mean())
label_std = '$\sigma = {:0.2f}$'.format(df_noise_nbins.std())
df_noise_nbins.plot(kind='hist', bins=sig_bins, histtype='stepfilled', alpha=0.5, lw=1.5,
color=color_dict['total'], ax=ax, label=label_mean + '\n ' + label_std)
ax.plot(sig_midpoints, n_noise_trials*sig_step*gaussian(sig_midpoints),
marker='None', label='Gaussian')
ax.set_ylabel('')
ax.set_title('{} RA bins'.format(n_bins))
ax.grid()
ax.legend()
fig.text(0.5, -0.025, 'Anisotropy significance [$\\sigma$]', ha='center', fontsize=16)
fig.text(-0.025, 0.5, 'Counts', va='center', rotation='vertical', fontsize=16)
plt.tight_layout()
sig_vs_nbins_outfile = os.path.join(comp.paths.figures_dir,
'anisotropy', 'sig_vs_nbins_all.png')
comp.check_output_dir(sig_vs_nbins_outfile)
plt.savefig(sig_vs_nbins_outfile)
plt.show()
In [19]:
fig, ax = plt.subplots()
for n_bins in grouped_nbins.indices.keys():
df_noise_nbins = grouped_nbins.get_group(n_bins)
# label_mean = '$\mu = {:0.2f}$'.format(df_noise_nbins.mean())
# label_std = '$\sigma = {:0.2f}$'.format(df_noise_nbins.std())
# df_noise_nbins.plot(kind='hist', bins=sig_bins, histtype='stepfilled', alpha=0.5, lw=1.5,
# color=color_dict['total'], ax=ax, label=label_mean + '\n ' + label_std)
mean = df_noise_nbins.mean()
err = df_noise_nbins.std()
ax.errorbar(n_bins, mean, yerr=err, marker='.', color=color_dict['total'])
# ax.fill_between(n_bins, mean-err, mean+err)
ax.set_ylabel('Anisotropy significance [$\\sigma$]')
ax.set_xlabel('Number RA bins')
ax.grid()
ax.legend()
# fig.text(0.5, -0.025, 'Anisotropy significance [$\\sigma$]', ha='center', fontsize=16)
# fig.text(-0.025, 0.5, 'Counts', va='center', rotation='vertical', fontsize=16)
plt.tight_layout()
# sig_vs_nbins_outfile = os.path.join(comp.paths.figures_dir,
# 'anisotropy', 'sig_vs_nbins_all.png')
# comp.check_output_dir(sig_vs_nbins_outfile)
# plt.savefig(sig_vs_nbins_outfile)
plt.show()
In [16]:
fig, ax = plt.subplots()
sig_ref_noise.plot(kind='hist', bins=20, histtype='stepfilled', alpha=0.5, lw=1.5, color=color_dict['total'], ax=ax)
ax.set_ylabel('Counts')
ax.set_xlabel('Anisotropy significance [$\\sigma$]')
ax.grid()
plt.show()
In [ ]:
In [50]:
def get_RAscrambled_proj_sig(composition, random_state):
kwargs_data = {'config': config, 'low_energy': low_energy, 'smooth': smooth,
'scale': None, 'decmax': decmax, 'decmin': decmin}
ref = anisotropy.get_map(name='ref', composition=composition, **kwargs_data)
data = anisotropy.get_map(name='data', composition=composition, **kwargs_data)
# Bin in declination
theta, phi = hp.pix2ang(n_side, range(len(data)))
thetamax = np.deg2rad(90 - decmin)
thetamin = np.deg2rad(90 - decmax)
n_dec_bins = 20
theta_bins= np.linspace(thetamin, thetamax, n_dec_bins+1)
theta_bin_num = np.digitize(theta, theta_bins) - 1
data_ra_scrambled = data.copy()
unseen_mask = data_ra_scrambled == hp.UNSEEN
for idx in range(n_dec_bins):
theta_bin_mask = (theta_bin_num == idx)
combined_mask = theta_bin_mask & ~unseen_mask
data_in_dec_bin = data_ra_scrambled[combined_mask]
shuffled_data = pd.Series(data_in_dec_bin).sample(frac=1.0, random_state=random_state).values
data_ra_scrambled[combined_mask] = shuffled_data
proj_df = get_proj_nbins_df(bins, data=data_ra_scrambled, ref=ref)
return proj_df
In [51]:
def get_RAscrambled_data_dists(composition, random_state):
kwargs_data = {'config': config, 'low_energy': low_energy, 'smooth': smooth,
'scale': None, 'decmax': decmax, 'decmin': decmin}
data = anisotropy.get_map(name='data', composition=composition, **kwargs_data)
# Bin in declination
theta, phi = hp.pix2ang(n_side, range(len(data)))
thetamax = np.deg2rad(90 - decmin)
thetamin = np.deg2rad(90 - decmax)
n_dec_bins = 20
theta_bins= np.linspace(thetamin, thetamax, n_dec_bins+1)
theta_bin_num = np.digitize(theta, theta_bins) - 1
# data_ra_scrambled = data.copy()
data_dists = {}
for idx in range(n_dec_bins):
data_ra_scrambled = data.copy()
unseen_mask = data_ra_scrambled == hp.UNSEEN
theta_bin_mask = (theta_bin_num == idx)
combined_mask = theta_bin_mask & ~unseen_mask
data_in_dec_bin = data_ra_scrambled[combined_mask]
shuffled_data = pd.Series(data_in_dec_bin).sample(frac=1.0, random_state=random_state).values
data_ra_scrambled[combined_mask] = shuffled_data
proj, proj_err, ra, ra_err = anisotropy.get_RA_proj_map(data_ra_scrambled,
decmin=decmin, decmax=decmax,
n_bins=10)
data_dists[idx] = proj, proj_err, ra, ra_err
return data_dists
In [69]:
# Bin in declination
theta, phi = hp.pix2ang(n_side, range(hp.nside2npix(n_side)))
thetamax = np.deg2rad(90 - decmin)
thetamin = np.deg2rad(90 - decmax)
n_dec_bins = 20
theta_bins= np.linspace(thetamin, thetamax, n_dec_bins+1)
theta_bin_num = np.digitize(theta, theta_bins) - 1
In [70]:
theta_bins
Out[70]:
In [72]:
def get_RAscrambled_proj(composition, random_state, n_ra_bins=10):
kwargs_data = {'config': config, 'low_energy': low_energy, 'smooth': smooth,
'scale': None, 'decmax': decmax, 'decmin': decmin}
data = anisotropy.get_map(name='data', composition=composition, **kwargs_data)
ref = anisotropy.get_map(name='ref', composition=composition, **kwargs_data)
# Bin in declination
theta, phi = hp.pix2ang(n_side, range(len(ref)))
thetamax = np.deg2rad(90 - decmin)
thetamin = np.deg2rad(90 - decmax)
n_dec_bins = 20
theta_bins= np.linspace(thetamin, thetamax, n_dec_bins+1)
theta_bin_num = np.digitize(theta, theta_bins) - 1
dists = []
for idx in range(n_dec_bins):
projections = {}
data_ra_scrambled = data.copy()
unseen_mask = data_ra_scrambled == hp.UNSEEN
theta_bin_mask = (theta_bin_num == idx)
combined_mask = theta_bin_mask & ~unseen_mask
data_in_dec_bin = data_ra_scrambled[combined_mask]
shuffled_data = pd.Series(data_in_dec_bin).sample(frac=1.0, random_state=random_state).values
data_ra_scrambled[combined_mask] = shuffled_data
data_ra_scrambled[~combined_mask] = hp.UNSEEN
data_proj, data_proj_err, ra, ra_err = anisotropy.get_RA_proj_map(data_ra_scrambled,
decmin=decmin, decmax=decmax,
n_bins=n_ra_bins)
ref_proj, ref_proj_err, ra, ra_err = anisotropy.get_RA_proj_map(ref,
decmin=decmin, decmax=decmax,
n_bins=n_ra_bins)
projections['data_proj'] = data_proj
projections['data_proj_err'] = data_proj_err
projections['ref_proj'] = ref_proj
projections['ref_proj_err'] = ref_proj_err
projections['ra'] = ra
dists.append(projections)
return pd.DataFrame.from_records(dists)
data
In [73]:
n_ra_scramble_trials = 1
ra_scambled_dists = [delayed(get_RAscrambled_proj)('all', random_state, n_ra_bins=30)
for random_state in range(n_ra_scramble_trials)]
ra_scambled_dists = delayed(pd.concat)(ra_scambled_dists)
with ProgressBar():
ra_scambled_dists = compute(ra_scambled_dists, get=multiprocessing.get, num_works=min(n_ra_scramble_trials, 25))[0]
In [74]:
ra_scambled_dists
Out[74]:
In [75]:
# with sns.color_palette('Blues_d', 20):
data_colors = sns.color_palette('Blues_d', len(ra_scambled_dists)+1).as_hex()
ref_colors = sns.color_palette('Greens_d', len(ra_scambled_dists)+1).as_hex()
# fig, ax = plt.subplots()
fig, ax = plt.subplots(figsize=(10, 8))
for dec_bin_idx, proj_df in ra_scambled_dists.iterrows():
ax.errorbar(proj_df['ra'], proj_df['data_proj'], yerr=proj_df['data_proj_err'], marker='.', ls=':', label=str(dec_bin_idx),
color=data_colors[dec_bin_idx])
# ax.errorbar(proj_df['ra'], proj_df['ref_proj'], yerr=proj_df['ref_proj_err'], marker='.', ls='-', label=str(dec_bin_idx),
# color='C2')
# print(proj_df.iloc[n_ra_bins])
ax.grid()
# ax.set_yscale('log', nonposy='clip')
# ax.set_ylim(0e6, 2e6)
ax.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
ax.invert_xaxis()
ax.set_xlabel('RA [ $^{\circ}$]')
ax.set_ylabel('Counts')
ax.legend()
# ax.set_ylabel('$\mathrm{\langle RI \\rangle }$')
plt.show()
In [7]:
# n_ra_scramble_trials = 10
# sig_ra_scambled = [delayed(get_RAscrambled_proj_sig)('all', random_state)
# for random_state in range(n_ra_scramble_trials)]
# sig_ra_scambled = delayed(pd.concat)(sig_ra_scambled)
In [22]:
with ProgressBar():
sig_ra_scambled = sig_ra_scambled.compute(get=multiprocessing.get, num_works=min(n_ra_scramble_trials, 25))
In [24]:
grouped_nbins = sig_ra_scambled.groupby(sig_ra_scambled.index)
In [25]:
grouped_nbins.get_group(n_bins).ri.mean()
Out[25]:
In [26]:
fig, axarr = plt.subplots(3, 3, figsize=(10, 6), sharex=True, sharey=True)
for n_bins, ax in zip([1, 4, 6, 10, 20, 24, 36, 60, 72], axarr.flatten()):
df_scambled_nbins = grouped_nbins.get_group(n_bins)
ax.errorbar(df_scambled_nbins['ra'].mean(), df_scambled_nbins['ri'].mean(),
yerr=None, marker='.', ls=':')
ax.axhline(0, marker='None', ls=':', color='k', lw=1.5)
ax.set_title('{} RA bins'.format(n_bins))
ax.grid()
ax.invert_xaxis()
fig.text(0.5, -0.025, 'RA [ $^{\circ}$]', ha='center', fontsize=16)
fig.text(-0.025, 0.5, '$\mathrm{\langle RI \\rangle }$', va='center', rotation='vertical', fontsize=16)
plt.tight_layout()
scrambled_vs_nbins_outfile = os.path.join(comp.paths.figures_dir,
'anisotropy', 'scrambled_nbins_all.png')
comp.check_output_dir(scrambled_vs_nbins_outfile)
plt.savefig(scrambled_vs_nbins_outfile)
plt.show()
In [166]:
# sig_ra_scambled.replace([np.inf, -np.inf], np.nan, inplace=True)
In [140]:
grouped_nbins = sig_ra_scambled.groupby(sig_ra_scambled.index)
In [141]:
sig_bins, sig_step = np.linspace(-5, 5, 50, retstep=True)
sig_midpoints = (sig_bins[1:] + sig_bins[:-1]) / 2
fig, axarr = plt.subplots(3, 3, figsize=(10, 6), sharex=True, sharey=False)
for n_bins, ax in zip(range(1, 72), axarr.flatten()):
# for n_bins, ax in zip([1, 4, 6, 10, 20, 24, 36, 60, 72], axarr.flatten()):
df_noise_nbins = grouped_nbins.get_group(n_bins)
print(df_noise_nbins)
label_mean = '$\mu = {:0.2f}$'.format(df_noise_nbins.mean())
label_std = '$\sigma = {:0.2f}$'.format(df_noise_nbins.std())
df_noise_nbins.plot(kind='hist', bins=sig_bins, histtype='stepfilled', alpha=0.5, lw=1.5,
color=color_dict['total'], ax=ax, label=label_mean + '\n ' + label_std)
# ax.plot(sig_midpoints, n_noise_trials*sig_step*gaussian(sig_midpoints),
# marker='None', label='Gaussian')
ax.set_ylabel('')
ax.set_title('{} RA bins'.format(n_bins))
ax.grid()
# ax.legend()
fig.text(0.5, -0.025, 'Anisotropy significance [$\\sigma$]', ha='center', fontsize=16)
fig.text(-0.025, 0.5, 'Counts', va='center', rotation='vertical', fontsize=16)
plt.tight_layout()
# sig_vs_nbins_outfile = os.path.join(comp.paths.figures_dir,
# 'anisotropy', 'sig_vs_nbins_all.png')
# comp.check_output_dir(sig_vs_nbins_outfile)
# plt.savefig(sig_vs_nbins_outfile)
plt.show()
In [ ]:
In [ ]: