In [1]:
%matplotlib inline
from __future__ import division, print_function
from collections import defaultdict
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter
from matplotlib.colors import ListedColormap
import matplotlib.gridspec as gridspec
import seaborn.apionly as sns
import healpy as hp
from sklearn.model_selection import KFold
import dask
from dask import delayed, multiprocessing, compute
from dask.diagnostics import ProgressBar
import pyprind
from scipy.stats import chi2
from scipy.special import erfcinv

from icecube import astro

import comptools as comp
import comptools.analysis.plotting as plotting
import comptools.anisotropy.anisotropy as anisotropy

color_dict = comp.analysis.get_color_dict()


/home/jbourbeau/.virtualenvs/composition/lib/python2.7/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)

In [2]:
config = ['IC86.2011', 'IC86.2012', 'IC86.2013', 'IC86.2014', 'IC86.2015']
years_str = '2011-2015'
composition='all'

n_side = 64
scale = 3
smooth = 0.0
n_bins = 36
# decmax = -75
# decmax = -60
decmax = -55
decmin = -90
low_energy = True

In [13]:
def get_proj_nbins_df(bins, data=None, ref=None, composition='all'):
    
    dipole_dict = defaultdict(list)
    for n_bins in bins:

        dipole_dict['n_bins'].append(n_bins)
        kwargs_relint_radius = {'config': config, 'low_energy': low_energy, 'smooth': smooth, 
                                'scale': None, 'decmax': decmax, 'decmin': decmin}
        if data is None:
            data = anisotropy.get_map(name='data', composition=composition, **kwargs_relint_radius)
        if ref is None:
            ref = anisotropy.get_map(name='ref', composition=composition, **kwargs_relint_radius)

            
#         relint = anisotropy.get_map(name='relint', composition=composition, **kwargs_relint_radius)
#         relint_err = anisotropy.get_map(name='relerr', composition=composition, **kwargs_relint_radius)
#         ri, ri_err, ra, ra_err = anisotropy.get_proj_relint(relint, relint_err, n_bins=n_bins,
#                                                            decmin=decmin, decmax=decmax)
        
        ri, ri_err, ra, ra_err = anisotropy.get_binned_relint(data, ref, n_bins=n_bins, 
                                                             decmin=decmin, decmax=decmax)
        
        n_dof = ri.shape[0]
        chi2_all = np.sum(ri**2 / ri_err**2)
        pval = chi2.sf(chi2_all, n_dof, loc=0, scale=1)
        sig = erfcinv(2*pval)*np.sqrt(2)

        dipole_dict['ri'].append(ri)
        dipole_dict['ri_err'].append(ri_err)
        dipole_dict['ra'].append(ra)
        dipole_dict['pval'].append(pval)
        dipole_dict['sig'].append(sig)

        
    return pd.DataFrame.from_records(dipole_dict, index='n_bins')

In [14]:
# proj_light_df = get_proj_nbins_df(bins, composition='light')
# proj_heavy_df = get_proj_nbins_df(bins, composition='heavy')

In [15]:
bins = np.arange(1, 72+1, 1, dtype=int)
proj_all_df = get_proj_nbins_df(bins, composition='all')
proj_light_df = get_proj_nbins_df(bins, composition='light')
proj_heavy_df = get_proj_nbins_df(bins, composition='heavy')

In [16]:
for proj_df, composition in zip([proj_all_df, proj_light_df, proj_heavy_df], ['total', 'light', 'heavy']):
    fig, axarr = plt.subplots(3, 3, figsize=(10, 6), sharex=True, sharey=False)
    # for n_bins, ax in zip(proj_df.index[::10], axarr.flatten()):
    for n_bins, ax in zip([1, 4, 6, 10, 20, 24, 36, 60, 72], axarr.flatten()):
        proj_nbins = proj_df.loc[n_bins]

        ra_bins = np.linspace(0, 360, n_bins + 1)
        plotting.plot_steps(ra_bins, proj_nbins['ri'], yerr=proj_nbins['ri_err'],
                            color=color_dict[composition], label=composition, fillalpha=0.2,
                            ax=ax)
    #                         label='{}$\\sigma$'.format(proj_nbins['sig']), ax=ax)
        ax.axhline(0, marker='None', ls='-.', c='k')
        ax.set_title(str(n_bins)+' RA bins')
    #     ax.set_ylabel('$\mathrm{\langle RI \\rangle }$')
    #     ax.set_xlabel('RA [ $^{\circ}$]')
        ax.grid()
#         ax.set_ylim(-4.0e-3, 4.0e-3)
        ax.set_xlim(0, 360)
        ax.invert_xaxis()
        ax.ticklabel_format(style='sci', axis='y', scilimits=(0,0))

    fig.text(0.5, -0.025, 'RA [ $^{\circ}$]', ha='center', fontsize=16)
    fig.text(-0.025, 0.5, '$\mathrm{\langle RI \\rangle }$', va='center', rotation='vertical', fontsize=16)

    plt.tight_layout()
    proj_vs_nbins_outfile = os.path.join(comp.paths.figures_dir,
                                          'anisotropy', 'proj_vs_nbins_{}.png'.format(composition))
    comp.check_output_dir(proj_vs_nbins_outfile)
    plt.savefig(proj_vs_nbins_outfile)
    plt.show()



In [17]:
fig, ax = plt.subplots()
ax.plot(proj_all_df.index, proj_all_df['sig'], ls='None', label='Significance', color='C2')
ax.axhline(0, marker='None', ls='-.', color='k', lw=1)
rolling_mean = proj_all_df['sig'].rolling(window=10,center=True).mean()
ax.plot(rolling_mean.index, rolling_mean, marker='None', ls='-', color='C2', label='Rolling mean\n(+/- 5 bins window)')
ax.fill_between(rolling_mean.index, rolling_mean+1, rolling_mean-1, color='C2', alpha=0.2)
ax.set_xlabel('Number RA bins')
ax.set_ylabel('Anisotropy significance [$\\sigma$]')
ax.set_ylim(0)
ax.set_xlim(0)
ax.grid()
ax.legend()

sig_vs_nbins_outfile = os.path.join(comp.paths.figures_dir,
                                      'anisotropy', 'sig_vs_nbins_all.png')
comp.check_output_dir(sig_vs_nbins_outfile)
plt.savefig(sig_vs_nbins_outfile)
plt.show()



In [18]:
fig, ax = plt.subplots()
for proj_df, composition in zip([proj_light_df, proj_heavy_df], ['light', 'heavy']):
    ax.plot(proj_df.index, proj_df['sig'], ls='None', label=composition, color=color_dict[composition])
#     ax.axhline(0, marker='None', ls='-.', color='k', lw=1)
    rolling_mean = proj_df['sig'].rolling(window=10,center=True).mean()
    ax.plot(rolling_mean.index, rolling_mean, marker='None', ls='-', color=color_dict[composition], label='')
    ax.fill_between(rolling_mean.index, rolling_mean+1, rolling_mean-1, color=color_dict[composition], alpha=0.2, label='')
ax.set_xlabel('Number RA bins')
ax.set_ylabel('Anisotropy significance [$\\sigma$]')
ax.set_ylim(0)
ax.set_xlim(0)
ax.grid()
ax.legend()

sig_vs_nbins_outfile = os.path.join(comp.paths.figures_dir,
                                      'anisotropy', 'sig_vs_nbins_comp.png')
comp.check_output_dir(sig_vs_nbins_outfile)
plt.savefig(sig_vs_nbins_outfile)
plt.show()


The heavy projected relative intensities (for large number of RA bins) looks like fluxuations, but is still ~4-sigma away from the null hypothesis. That's weird.

Scramble data in right acension to see if this feature goes away...


In [9]:
kwargs_data = {'config': config, 'low_energy': low_energy, 'smooth': smooth, 
               'scale': None, 'decmax': decmax}
data_heavy = anisotropy.get_map(name='data', composition='heavy', **kwargs_data)
ref_heavy = anisotropy.get_map(name='ref', composition='heavy', **kwargs_data)

In [10]:
data_heavy


Out[10]:
array([ -1.63750000e+30,  -1.63750000e+30,  -1.63750000e+30, ...,
         1.32410000e+04,   1.32240000e+04,   1.30480000e+04])

In [11]:
# Bin in declination
theta, phi = hp.pix2ang(n_side, range(len(data_heavy)))
thetamax =  np.deg2rad(90 - decmin)
thetamin = np.deg2rad(90 - decmax)
# dec_mask = (theta <= thetamax) & (theta >= thetamin)
n_dec_bins = 30
dec_bins= np.linspace(thetamin, thetamax, n_dec_bins+1, dtype=float)
theta_bin_num = np.digitize(theta, dec_bins) - 1

In [12]:
theta_bin_num


Out[12]:
array([-1, -1, -1, ..., 29, 29, 29])

In [20]:
data_heavy_RAscrambled = data_heavy.copy()
for idx in range(n_dec_bins):
    theta_bin_mask = (theta_bin_num == idx)
    unseen_mask = data_heavy == hp.UNSEEN
    combined_mask = theta_bin_mask & ~unseen_mask
    
    data_in_dec_bin = data_heavy.copy()
    data_in_dec_bin = data_in_dec_bin[combined_mask]
    data_series = pd.Series(data_in_dec_bin)
    print(idx)
    shuffled_data = data_series.sample(frac=1.0, random_state=2).values
    data_heavy_RAscrambled[combined_mask] = shuffled_data
#     np.random.shuffle(data_in_dec_bin)
#     data_heavy_RAscrambled[combined_mask] = data_in_dec_bin


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29

In [13]:
def get_noisy_proj_sig(composition, random_state):
    # Set random state for trials
    np.random.seed(random_state)
    
    kwargs_data = {'config': config, 'low_energy': low_energy, 'smooth': smooth, 
                   'scale': None, 'decmax': decmax}
    ref = anisotropy.get_map(name='ref', composition=composition, **kwargs_data)
    unseen_mask = ref == hp.UNSEEN
    ref_poisson_noise = ref.copy()
    ref_poisson_noise[~unseen_mask] = np.random.poisson(ref_poisson_noise[~unseen_mask])
    
    proj_df = get_proj_nbins_df(bins, data=ref_poisson_noise, ref=ref)
    
    return proj_df['sig']

In [14]:
n_noise_trials = 1000
sig_ref_noise = [delayed(get_noisy_proj_sig)('all', random_state) for random_state in range(n_noise_trials)]
sig_ref_noise = delayed(pd.concat)(sig_ref_noise)
# sig_ref_noise = sig_ref_noise.divide(n_noise_trials)

In [15]:
with ProgressBar():
#     sig_ref_noise = sig_ref_noise.compute(get=dask.get)
    sig_ref_noise = sig_ref_noise.compute(get=multiprocessing.get, num_works=25)


[########################################] | 100% Completed |  1min 40.1s

In [16]:
grouped_nbins = sig_ref_noise.groupby(sig_ref_noise.index)

In [17]:
def gaussian(x, mu=0, sigma=1):
    return np.exp(-(x - mu)**2/(2*sigma**2))/np.sqrt(2*np.pi*sigma**2)

In [18]:
sig_bins, sig_step = np.linspace(-5, 5, 50, retstep=True)
sig_midpoints = (sig_bins[1:] + sig_bins[:-1]) / 2

fig, axarr = plt.subplots(3, 3, figsize=(10, 6), sharex=True, sharey=True)
for n_bins, ax in zip([1, 4, 6, 10, 20, 24, 36, 60, 72], axarr.flatten()):
    df_noise_nbins = grouped_nbins.get_group(n_bins)
    
    label_mean = '$\mu = {:0.2f}$'.format(df_noise_nbins.mean())
    label_std = '$\sigma = {:0.2f}$'.format(df_noise_nbins.std())
    df_noise_nbins.plot(kind='hist', bins=sig_bins, histtype='stepfilled', alpha=0.5, lw=1.5, 
                        color=color_dict['total'], ax=ax, label=label_mean + '\n ' + label_std)
    
    ax.plot(sig_midpoints, n_noise_trials*sig_step*gaussian(sig_midpoints), 
            marker='None', label='Gaussian')
    
    ax.set_ylabel('')
    ax.set_title('{} RA bins'.format(n_bins))
    ax.grid()
    ax.legend()

fig.text(0.5, -0.025, 'Anisotropy significance [$\\sigma$]', ha='center', fontsize=16)
fig.text(-0.025, 0.5, 'Counts', va='center', rotation='vertical', fontsize=16)
plt.tight_layout()

sig_vs_nbins_outfile = os.path.join(comp.paths.figures_dir,
                                      'anisotropy', 'sig_vs_nbins_all.png')
comp.check_output_dir(sig_vs_nbins_outfile)
plt.savefig(sig_vs_nbins_outfile)
plt.show()



In [19]:
fig, ax = plt.subplots()
for n_bins in grouped_nbins.indices.keys():
    df_noise_nbins = grouped_nbins.get_group(n_bins)
    
#     label_mean = '$\mu = {:0.2f}$'.format(df_noise_nbins.mean())
#     label_std = '$\sigma = {:0.2f}$'.format(df_noise_nbins.std())
#     df_noise_nbins.plot(kind='hist', bins=sig_bins, histtype='stepfilled', alpha=0.5, lw=1.5, 
#                         color=color_dict['total'], ax=ax, label=label_mean + '\n ' + label_std)
    mean = df_noise_nbins.mean()
    err = df_noise_nbins.std()
    ax.errorbar(n_bins, mean, yerr=err, marker='.', color=color_dict['total'])
#     ax.fill_between(n_bins, mean-err, mean+err)
    
ax.set_ylabel('Anisotropy significance [$\\sigma$]')
ax.set_xlabel('Number RA bins')
ax.grid()
ax.legend()

# fig.text(0.5, -0.025, 'Anisotropy significance [$\\sigma$]', ha='center', fontsize=16)
# fig.text(-0.025, 0.5, 'Counts', va='center', rotation='vertical', fontsize=16)
plt.tight_layout()

# sig_vs_nbins_outfile = os.path.join(comp.paths.figures_dir,
#                                       'anisotropy', 'sig_vs_nbins_all.png')
# comp.check_output_dir(sig_vs_nbins_outfile)
# plt.savefig(sig_vs_nbins_outfile)
plt.show()


/home/jbourbeau/.virtualenvs/composition/lib/python2.7/site-packages/matplotlib/axes/_axes.py:545: UserWarning: No labelled objects found. Use label='...' kwarg on individual plots.
  warnings.warn("No labelled objects found. "

In [16]:
fig, ax = plt.subplots()
sig_ref_noise.plot(kind='hist', bins=20, histtype='stepfilled', alpha=0.5, lw=1.5, color=color_dict['total'], ax=ax)
ax.set_ylabel('Counts')
ax.set_xlabel('Anisotropy significance [$\\sigma$]')
ax.grid()
plt.show()



In [ ]:


In [50]:
def get_RAscrambled_proj_sig(composition, random_state):

    kwargs_data = {'config': config, 'low_energy': low_energy, 'smooth': smooth, 
                   'scale': None, 'decmax': decmax, 'decmin': decmin}
    ref = anisotropy.get_map(name='ref', composition=composition, **kwargs_data)
    data = anisotropy.get_map(name='data', composition=composition, **kwargs_data)
    
    # Bin in declination
    theta, phi = hp.pix2ang(n_side, range(len(data)))
    thetamax =  np.deg2rad(90 - decmin)
    thetamin = np.deg2rad(90 - decmax)
    n_dec_bins = 20
    theta_bins= np.linspace(thetamin, thetamax, n_dec_bins+1)
    theta_bin_num = np.digitize(theta, theta_bins) - 1
    
    data_ra_scrambled = data.copy()
    unseen_mask = data_ra_scrambled == hp.UNSEEN
    for idx in range(n_dec_bins):
        theta_bin_mask = (theta_bin_num == idx)
        combined_mask = theta_bin_mask & ~unseen_mask

        data_in_dec_bin = data_ra_scrambled[combined_mask]
        shuffled_data = pd.Series(data_in_dec_bin).sample(frac=1.0, random_state=random_state).values
        data_ra_scrambled[combined_mask] = shuffled_data
        
    proj_df = get_proj_nbins_df(bins, data=data_ra_scrambled, ref=ref)
    
    return proj_df

In [51]:
def get_RAscrambled_data_dists(composition, random_state):

    kwargs_data = {'config': config, 'low_energy': low_energy, 'smooth': smooth, 
                   'scale': None, 'decmax': decmax, 'decmin': decmin}
    data = anisotropy.get_map(name='data', composition=composition, **kwargs_data)
    
    # Bin in declination
    theta, phi = hp.pix2ang(n_side, range(len(data)))
    thetamax =  np.deg2rad(90 - decmin)
    thetamin = np.deg2rad(90 - decmax)
    n_dec_bins = 20
    theta_bins= np.linspace(thetamin, thetamax, n_dec_bins+1)
    theta_bin_num = np.digitize(theta, theta_bins) - 1
    
#     data_ra_scrambled = data.copy()
    data_dists = {}
    for idx in range(n_dec_bins):
        data_ra_scrambled = data.copy()
        unseen_mask = data_ra_scrambled == hp.UNSEEN
        
        theta_bin_mask = (theta_bin_num == idx)
        combined_mask = theta_bin_mask & ~unseen_mask

        data_in_dec_bin = data_ra_scrambled[combined_mask]
        shuffled_data = pd.Series(data_in_dec_bin).sample(frac=1.0, random_state=random_state).values
        data_ra_scrambled[combined_mask] = shuffled_data
        
        proj, proj_err, ra, ra_err = anisotropy.get_RA_proj_map(data_ra_scrambled, 
                                                                decmin=decmin, decmax=decmax, 
                                                                n_bins=10)
        
        data_dists[idx] = proj, proj_err, ra, ra_err
    
    return data_dists

In [69]:
# Bin in declination
theta, phi = hp.pix2ang(n_side, range(hp.nside2npix(n_side)))
thetamax =  np.deg2rad(90 - decmin)
thetamin = np.deg2rad(90 - decmax)
n_dec_bins = 20
theta_bins= np.linspace(thetamin, thetamax, n_dec_bins+1)
theta_bin_num = np.digitize(theta, theta_bins) - 1

In [70]:
theta_bins


Out[70]:
array([ 2.53072742,  2.56127068,  2.59181394,  2.6223572 ,  2.65290046,
        2.68344372,  2.71398699,  2.74453025,  2.77507351,  2.80561677,
        2.83616003,  2.8667033 ,  2.89724656,  2.92778982,  2.95833308,
        2.98887634,  3.01941961,  3.04996287,  3.08050613,  3.11104939,
        3.14159265])

In [72]:
def get_RAscrambled_proj(composition, random_state, n_ra_bins=10):

    kwargs_data = {'config': config, 'low_energy': low_energy, 'smooth': smooth, 
                   'scale': None, 'decmax': decmax, 'decmin': decmin}
    data = anisotropy.get_map(name='data', composition=composition, **kwargs_data)
    ref = anisotropy.get_map(name='ref', composition=composition, **kwargs_data)
    
    # Bin in declination
    theta, phi = hp.pix2ang(n_side, range(len(ref)))
    thetamax =  np.deg2rad(90 - decmin)
    thetamin = np.deg2rad(90 - decmax)
    n_dec_bins = 20
    theta_bins= np.linspace(thetamin, thetamax, n_dec_bins+1)
    theta_bin_num = np.digitize(theta, theta_bins) - 1
    
    dists = []
    for idx in range(n_dec_bins):
        projections = {}
        
        data_ra_scrambled = data.copy()
        unseen_mask = data_ra_scrambled == hp.UNSEEN
        
        theta_bin_mask = (theta_bin_num == idx)
        combined_mask = theta_bin_mask & ~unseen_mask

        data_in_dec_bin = data_ra_scrambled[combined_mask]
        shuffled_data = pd.Series(data_in_dec_bin).sample(frac=1.0, random_state=random_state).values
        data_ra_scrambled[combined_mask] = shuffled_data
        data_ra_scrambled[~combined_mask] = hp.UNSEEN
        
        data_proj, data_proj_err, ra, ra_err = anisotropy.get_RA_proj_map(data_ra_scrambled, 
                                                                decmin=decmin, decmax=decmax, 
                                                                n_bins=n_ra_bins)
        ref_proj, ref_proj_err, ra, ra_err = anisotropy.get_RA_proj_map(ref, 
                                                        decmin=decmin, decmax=decmax, 
                                                        n_bins=n_ra_bins)
        
        projections['data_proj'] = data_proj
        projections['data_proj_err'] = data_proj_err
        projections['ref_proj'] = ref_proj
        projections['ref_proj_err'] = ref_proj_err
        projections['ra'] = ra
        
        dists.append(projections)
           
    return pd.DataFrame.from_records(dists)

data


In [73]:
n_ra_scramble_trials = 1
ra_scambled_dists = [delayed(get_RAscrambled_proj)('all', random_state, n_ra_bins=30) 
                       for random_state in range(n_ra_scramble_trials)]
ra_scambled_dists = delayed(pd.concat)(ra_scambled_dists)
with ProgressBar():
    ra_scambled_dists = compute(ra_scambled_dists, get=multiprocessing.get, num_works=min(n_ra_scramble_trials, 25))[0]


[########################################] | 100% Completed |  0.9s

In [74]:
ra_scambled_dists


Out[74]:
data_proj data_proj_err ra ref_proj ref_proj_err
0 [782.0, 1402.0, 1149.0, 1796.0, 961.0, 1148.0,... [27.9642629082, 37.4432904537, 33.8969025134, ... [6.0, 18.0, 30.0, 42.0, 54.0, 66.0, 78.0, 90.0... [1150901.85157, 1167168.15013, 1143341.99896, ... [1072.80093753, 1080.35556653, 1069.27171428, ...
1 [3535.0, 3452.0, 3614.0, 3517.0, 3475.0, 2869.... [59.4558659848, 58.7537232863, 60.1165534608, ... [6.0, 18.0, 30.0, 42.0, 54.0, 66.0, 78.0, 90.0... [1150901.85157, 1167168.15013, 1143341.99896, ... [1072.80093753, 1080.35556653, 1069.27171428, ...
2 [7344.0, 9230.0, 8099.0, 8282.0, 9021.0, 9919.... [85.6971411425, 96.0728889958, 89.994444273, 9... [6.0, 18.0, 30.0, 42.0, 54.0, 66.0, 78.0, 90.0... [1150901.85157, 1167168.15013, 1143341.99896, ... [1072.80093753, 1080.35556653, 1069.27171428, ...
3 [27712.0, 30015.0, 30953.0, 28247.0, 28345.0, ... [166.469216373, 173.248376616, 175.934646957, ... [6.0, 18.0, 30.0, 42.0, 54.0, 66.0, 78.0, 90.0... [1150901.85157, 1167168.15013, 1143341.99896, ... [1072.80093753, 1080.35556653, 1069.27171428, ...
4 [33866.0, 33314.0, 29603.0, 34571.0, 36762.0, ... [184.027171907, 182.521231642, 172.055223693, ... [6.0, 18.0, 30.0, 42.0, 54.0, 66.0, 78.0, 90.0... [1150901.85157, 1167168.15013, 1143341.99896, ... [1072.80093753, 1080.35556653, 1069.27171428, ...
5 [43437.0, 35300.0, 44793.0, 40872.0, 40042.0, ... [208.415450483, 187.882942281, 211.643568293, ... [6.0, 18.0, 30.0, 42.0, 54.0, 66.0, 78.0, 90.0... [1150901.85157, 1167168.15013, 1143341.99896, ... [1072.80093753, 1080.35556653, 1069.27171428, ...
6 [73426.0, 89850.0, 77324.0, 87394.0, 76037.0, ... [270.97232331, 299.749895746, 278.07193314, 29... [6.0, 18.0, 30.0, 42.0, 54.0, 66.0, 78.0, 90.0... [1150901.85157, 1167168.15013, 1143341.99896, ... [1072.80093753, 1080.35556653, 1069.27171428, ...
7 [67274.0, 69362.0, 66404.0, 60224.0, 65776.0, ... [259.372319263, 263.366664557, 257.689735923, ... [6.0, 18.0, 30.0, 42.0, 54.0, 66.0, 78.0, 90.0... [1150901.85157, 1167168.15013, 1143341.99896, ... [1072.80093753, 1080.35556653, 1069.27171428, ...
8 [83596.0, 61770.0, 84908.0, 71768.0, 81154.0, ... [289.129728669, 248.53571172, 291.389773328, 2... [6.0, 18.0, 30.0, 42.0, 54.0, 66.0, 78.0, 90.0... [1150901.85157, 1167168.15013, 1143341.99896, ... [1072.80093753, 1080.35556653, 1069.27171428, ...
9 [107971.0, 133251.0, 119795.0, 120272.0, 11937... [328.589409446, 365.035614701, 346.114143022, ... [6.0, 18.0, 30.0, 42.0, 54.0, 66.0, 78.0, 90.0... [1150901.85157, 1167168.15013, 1143341.99896, ... [1072.80093753, 1080.35556653, 1069.27171428, ...
10 [80061.0, 80758.0, 80674.0, 80492.0, 78746.0, ... [282.950525711, 284.179520726, 284.031688373, ... [6.0, 18.0, 30.0, 42.0, 54.0, 66.0, 78.0, 90.0... [1150901.85157, 1167168.15013, 1143341.99896, ... [1072.80093753, 1080.35556653, 1069.27171428, ...
11 [92709.0, 75684.0, 74863.0, 90596.0, 76906.0, ... [304.481526533, 275.107251813, 273.611037789, ... [6.0, 18.0, 30.0, 42.0, 54.0, 66.0, 78.0, 90.0... [1150901.85157, 1167168.15013, 1143341.99896, ... [1072.80093753, 1080.35556653, 1069.27171428, ...
12 [120639.0, 139656.0, 119010.0, 123452.0, 12029... [347.331253992, 373.705766613, 344.978260185, ... [6.0, 18.0, 30.0, 42.0, 54.0, 66.0, 78.0, 90.0... [1150901.85157, 1167168.15013, 1143341.99896, ... [1072.80093753, 1080.35556653, 1069.27171428, ...
13 [76837.0, 77245.0, 78471.0, 97034.0, 78013.0, ... [277.194877298, 277.929847264, 280.126757023, ... [6.0, 18.0, 30.0, 42.0, 54.0, 66.0, 78.0, 90.0... [1150901.85157, 1167168.15013, 1143341.99896, ... [1072.80093753, 1080.35556653, 1069.27171428, ...
14 [128952.0, 87699.0, 127761.0, 86029.0, 132696.... [359.098872179, 296.140169514, 357.436707684, ... [6.0, 18.0, 30.0, 42.0, 54.0, 66.0, 78.0, 90.0... [1150901.85157, 1167168.15013, 1143341.99896, ... [1072.80093753, 1080.35556653, 1069.27171428, ...
15 [46688.0, 92918.0, 47130.0, 70603.0, 70551.0, ... [216.074061377, 304.824539695, 217.094449491, ... [6.0, 18.0, 30.0, 42.0, 54.0, 66.0, 78.0, 90.0... [1150901.85157, 1167168.15013, 1143341.99896, ... [1072.80093753, 1080.35556653, 1069.27171428, ...
16 [50863.0, 49953.0, 74867.0, 49597.0, 50507.0, ... [225.528268738, 223.501677846, 273.618347338, ... [6.0, 18.0, 30.0, 42.0, 54.0, 66.0, 78.0, 90.0... [1150901.85157, 1167168.15013, 1143341.99896, ... [1072.80093753, 1080.35556653, 1069.27171428, ...
17 [80765.0, 53373.0, 54545.0, 80854.0, 54759.0, ... [284.191836617, 231.025972566, 233.548710123, ... [6.0, 18.0, 30.0, 42.0, 54.0, 66.0, 78.0, 90.0... [1150901.85157, 1167168.15013, 1143341.99896, ... [1072.80093753, 1080.35556653, 1069.27171428, ...
18 [27823.0, 27956.0, 28236.0, 28325.0, 27787.0, ... [166.802278162, 167.200478468, 168.03571049, 1... [6.0, 18.0, 30.0, 42.0, 54.0, 66.0, 78.0, 90.0... [1150901.85157, 1167168.15013, 1143341.99896, ... [1072.80093753, 1080.35556653, 1069.27171428, ...
19 [0.0, 28628.0, 0.0, 28539.0, 0.0, 28418.0, 0.0... [0.0, 169.198108736, 0.0, 168.934898704, 0.0, ... [6.0, 18.0, 30.0, 42.0, 54.0, 66.0, 78.0, 90.0... [1150901.85157, 1167168.15013, 1143341.99896, ... [1072.80093753, 1080.35556653, 1069.27171428, ...

In [75]:
# with sns.color_palette('Blues_d', 20):
data_colors = sns.color_palette('Blues_d', len(ra_scambled_dists)+1).as_hex()
ref_colors = sns.color_palette('Greens_d', len(ra_scambled_dists)+1).as_hex()

# fig, ax = plt.subplots()
fig, ax = plt.subplots(figsize=(10, 8))
for dec_bin_idx, proj_df in ra_scambled_dists.iterrows():
    
    ax.errorbar(proj_df['ra'], proj_df['data_proj'], yerr=proj_df['data_proj_err'], marker='.', ls=':', label=str(dec_bin_idx),
                color=data_colors[dec_bin_idx])
#     ax.errorbar(proj_df['ra'], proj_df['ref_proj'], yerr=proj_df['ref_proj_err'], marker='.', ls='-', label=str(dec_bin_idx),
#                 color='C2')
#     print(proj_df.iloc[n_ra_bins])
ax.grid()
# ax.set_yscale('log', nonposy='clip')
# ax.set_ylim(0e6, 2e6)
ax.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
ax.invert_xaxis()
ax.set_xlabel('RA [ $^{\circ}$]')
ax.set_ylabel('Counts')
ax.legend()
# ax.set_ylabel('$\mathrm{\langle RI \\rangle }$')
plt.show()



In [7]:
# n_ra_scramble_trials = 10
# sig_ra_scambled = [delayed(get_RAscrambled_proj_sig)('all', random_state) 
#                    for random_state in range(n_ra_scramble_trials)]
# sig_ra_scambled = delayed(pd.concat)(sig_ra_scambled)

In [22]:
with ProgressBar():
    sig_ra_scambled = sig_ra_scambled.compute(get=multiprocessing.get, num_works=min(n_ra_scramble_trials, 25))


[########################################] | 100% Completed | 17.2s

In [24]:
grouped_nbins = sig_ra_scambled.groupby(sig_ra_scambled.index)

In [25]:
grouped_nbins.get_group(n_bins).ri.mean()


Out[25]:
array([ 0.00860688,  0.0080394 ,  0.00406372,  0.00871214,  0.00577433,
        0.00721379,  0.00657298,  0.00750896,  0.01135379,  0.00844444,
        0.00545089,  0.00059905,  0.00444066,  0.00196555,  0.00475735,
       -0.00166376,  0.00287817, -0.00131111, -0.00175566, -0.00321415,
       -0.00415697, -0.00043502, -0.00744665, -0.00502526, -0.00738693,
       -0.00529189, -0.00301959, -0.00301609, -0.00500225, -0.00790016,
       -0.00802193, -0.00666823, -0.01039897, -0.00889884, -0.00339783,
       -0.00908225, -0.00927381, -0.00742482, -0.00990523, -0.006472  ,
       -0.00897578, -0.00772958, -0.00771321, -0.00663994,  0.00135444,
       -0.00440851, -0.00486253, -0.00649784, -0.00198777, -0.00641401,
       -0.0011783 , -0.0014663 ,  0.00183772, -0.00013119,  0.00147325,
        0.00224502, -0.00129202,  0.00173045,  0.00307675,  0.00538986,
        0.00366399,  0.00587107,  0.01113692,  0.00782459,  0.00654408,
        0.00407745,  0.00716617,  0.00457685,  0.00619578,  0.01145233,
        0.00771361,  0.0097799 ])

In [26]:
fig, axarr = plt.subplots(3, 3, figsize=(10, 6), sharex=True, sharey=True)
for n_bins, ax in zip([1, 4, 6, 10, 20, 24, 36, 60, 72], axarr.flatten()):
    df_scambled_nbins = grouped_nbins.get_group(n_bins)
    ax.errorbar(df_scambled_nbins['ra'].mean(), df_scambled_nbins['ri'].mean(), 
                yerr=None, marker='.', ls=':')
    ax.axhline(0, marker='None', ls=':', color='k', lw=1.5)
    ax.set_title('{} RA bins'.format(n_bins))
    ax.grid()
ax.invert_xaxis()
fig.text(0.5, -0.025, 'RA [ $^{\circ}$]', ha='center', fontsize=16)
fig.text(-0.025, 0.5, '$\mathrm{\langle RI \\rangle }$', va='center', rotation='vertical', fontsize=16)
plt.tight_layout()
scrambled_vs_nbins_outfile = os.path.join(comp.paths.figures_dir,
                                      'anisotropy', 'scrambled_nbins_all.png')
comp.check_output_dir(scrambled_vs_nbins_outfile)
plt.savefig(scrambled_vs_nbins_outfile)
plt.show()



In [166]:
# sig_ra_scambled.replace([np.inf, -np.inf], np.nan, inplace=True)

In [140]:
grouped_nbins = sig_ra_scambled.groupby(sig_ra_scambled.index)

In [141]:
sig_bins, sig_step = np.linspace(-5, 5, 50, retstep=True)
sig_midpoints = (sig_bins[1:] + sig_bins[:-1]) / 2

fig, axarr = plt.subplots(3, 3, figsize=(10, 6), sharex=True, sharey=False)
for n_bins, ax in zip(range(1, 72), axarr.flatten()):
# for n_bins, ax in zip([1, 4, 6, 10, 20, 24, 36, 60, 72], axarr.flatten()):
    df_noise_nbins = grouped_nbins.get_group(n_bins)
    print(df_noise_nbins)
    
    label_mean = '$\mu = {:0.2f}$'.format(df_noise_nbins.mean())
    label_std = '$\sigma = {:0.2f}$'.format(df_noise_nbins.std())
    df_noise_nbins.plot(kind='hist', bins=sig_bins, histtype='stepfilled', alpha=0.5, lw=1.5, 
                        color=color_dict['total'], ax=ax, label=label_mean + '\n ' + label_std)
    
#     ax.plot(sig_midpoints, n_noise_trials*sig_step*gaussian(sig_midpoints), 
#             marker='None', label='Gaussian')
    
    ax.set_ylabel('')
    ax.set_title('{} RA bins'.format(n_bins))
    ax.grid()
#     ax.legend()

fig.text(0.5, -0.025, 'Anisotropy significance [$\\sigma$]', ha='center', fontsize=16)
fig.text(-0.025, 0.5, 'Counts', va='center', rotation='vertical', fontsize=16)
plt.tight_layout()

# sig_vs_nbins_outfile = os.path.join(comp.paths.figures_dir,
#                                       'anisotropy', 'sig_vs_nbins_all.png')
# comp.check_output_dir(sig_vs_nbins_outfile)
# plt.savefig(sig_vs_nbins_outfile)
plt.show()


n_bins
1   -2.628571
1   -2.628571
1   -2.628571
1   -2.628571
1   -2.628571
1   -2.628571
1   -2.628571
1   -2.628571
1   -2.628571
1   -2.628571
Name: sig, dtype: float64
n_bins
2    -0.328377
2     4.605197
2    17.632022
2     1.877160
2     1.649151
2    10.539982
2     6.965440
2     4.109491
2    -0.940397
2     6.502411
Name: sig, dtype: float64
n_bins
3    31.355061
3    30.371360
3    27.296457
3    21.824074
3    32.688847
3    18.372389
3          inf
3    11.488752
3          inf
3    34.141636
Name: sig, dtype: float64

ValueErrorTraceback (most recent call last)
<ipython-input-141-d4e77b3127c1> in <module>()
     11     label_std = '$\sigma = {:0.2f}$'.format(df_noise_nbins.std())
     12     df_noise_nbins.plot(kind='hist', bins=sig_bins, histtype='stepfilled', alpha=0.5, lw=1.5, 
---> 13                         color=color_dict['total'], ax=ax, label=label_mean + '\n ' + label_std)
     14 
     15 #     ax.plot(sig_midpoints, n_noise_trials*sig_step*gaussian(sig_midpoints),

/home/jbourbeau/.virtualenvs/composition/lib/python2.7/site-packages/pandas/plotting/_core.pyc in __call__(self, kind, ax, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, label, secondary_y, **kwds)
   2444                            colormap=colormap, table=table, yerr=yerr,
   2445                            xerr=xerr, label=label, secondary_y=secondary_y,
-> 2446                            **kwds)
   2447     __call__.__doc__ = plot_series.__doc__
   2448 

/home/jbourbeau/.virtualenvs/composition/lib/python2.7/site-packages/pandas/plotting/_core.pyc in plot_series(data, kind, ax, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, label, secondary_y, **kwds)
   1886                  yerr=yerr, xerr=xerr,
   1887                  label=label, secondary_y=secondary_y,
-> 1888                  **kwds)
   1889 
   1890 

/home/jbourbeau/.virtualenvs/composition/lib/python2.7/site-packages/pandas/plotting/_core.pyc in _plot(data, x, y, subplots, ax, kind, **kwds)
   1680         plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds)
   1681 
-> 1682     plot_obj.generate()
   1683     plot_obj.draw()
   1684     return plot_obj.result

/home/jbourbeau/.virtualenvs/composition/lib/python2.7/site-packages/pandas/plotting/_core.pyc in generate(self)
    236         self._compute_plot_data()
    237         self._setup_subplots()
--> 238         self._make_plot()
    239         self._add_table()
    240         self._make_legend()

/home/jbourbeau/.virtualenvs/composition/lib/python2.7/site-packages/pandas/plotting/_core.pyc in _make_plot(self)
   1311             kwds = self._make_plot_keywords(kwds, y)
   1312             artists = self._plot(ax, y, column_num=i,
-> 1313                                  stacking_id=stacking_id, **kwds)
   1314             self._add_legend_handle(artists[0], label, index=i)
   1315 

/home/jbourbeau/.virtualenvs/composition/lib/python2.7/site-packages/pandas/plotting/_core.pyc in _plot(cls, ax, y, style, bins, bottom, column_num, stacking_id, **kwds)
   1289             cls._get_stacked_values(ax, stacking_id, base, kwds['label'])
   1290         # ignore style
-> 1291         n, bins, patches = ax.hist(y, bins=bins, bottom=bottom, **kwds)
   1292         cls._update_stacker(ax, stacking_id, n)
   1293         return patches

/home/jbourbeau/.virtualenvs/composition/lib/python2.7/site-packages/matplotlib/__init__.pyc in inner(ax, *args, **kwargs)
   1896                     warnings.warn(msg % (label_namer, func.__name__),
   1897                                   RuntimeWarning, stacklevel=2)
-> 1898             return func(ax, *args, **kwargs)
   1899         pre_doc = inner.__doc__
   1900         if pre_doc is None:

/home/jbourbeau/.virtualenvs/composition/lib/python2.7/site-packages/matplotlib/axes/_axes.pyc in hist(***failed resolving arguments***)
   6193             # this will automatically overwrite bins,
   6194             # so that each histogram uses the same bins
-> 6195             m, bins = np.histogram(x[i], bins, weights=w[i], **hist_kwargs)
   6196             m = m.astype(float)  # causes problems later if it's an int
   6197             if mlast is None:

/home/jbourbeau/.virtualenvs/composition/lib/python2.7/site-packages/numpy/lib/function_base.pyc in histogram(a, bins, range, normed, weights, density)
    667     if not np.all(np.isfinite([mn, mx])):
    668         raise ValueError(
--> 669             'range parameter must be finite.')
    670     if mn == mx:
    671         mn -= 0.5

ValueError: range parameter must be finite.

In [ ]:


In [ ]: