False Alarm Probabilities


In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from astropy.stats import LombScargle

plt.style.use('seaborn-whitegrid')

In [2]:
def make_data(N, T=100, period=0.6, amp=(1, 0.6, 0.3), err=0.1,
              err_range=0.0, alias_period=1, alias_level=0,
              return_model=False, rseed=None):
    rng = np.random.RandomState(rseed)
    t = T * rng.rand(N)
    t -= (t % alias_period) * alias_level
    phase = rng.rand(len(amp))[:, None]
    def model(t, amp=amp, phase=phase, period=period):
        k = np.arange(1, len(amp) + 1)[:, None]
        return np.dot(amp, np.sin(2 * np.pi * k * (t - phase) / period))
    dy = err + err_range * np.random.rand(N)
    y = model(t) + dy * rng.randn(N)
    if return_model:
        return t, y, dy, model
    else:
        return t, y, dy

Baluev Method


In [3]:
from scipy.special import gammaln


def weighted_sum(val, dy):
    return (val / dy ** 2).sum()


def weighted_mean(val, dy):
    return weighted_sum(val, dy) / weighted_sum(np.ones_like(val), dy)


def weighted_var(val, dy):
    return weighted_mean(val ** 2, dy) - weighted_mean(val, dy) ** 2


def gamma(N):
    # Note: this is closely approximated by (1 - 1 / N) for large N
    return np.sqrt(2 / N) * np.exp(gammaln(N / 2) - gammaln((N - 1) / 2))


def FAP_single(Z, N, normalization='standard'):
    """False Alarm Probability for a single observation"""
    NH = N - 1  # DOF for null hypothesis
    NK = N - 3  # DOF for periodic hypothesis
    if normalization == 'psd':
        return np.exp(-Z)
    elif normalization == 'standard':
        # Note: astropy's standard normalization is Z = 2/NH * z_1 in Baluev's terms
        return (1 - Z) ** (NK / 2)
    elif normalization == 'model':
        # Note: astropy's model normalization is Z = 2/NK * z_2 in Baluev's terms
        return (1 + Z) ** -(NK / 2)
    elif normalization == 'log':
        # Note: astropy's log normalization is Z = 2/NK * z_3 in Baluev's terms
        return np.exp(-0.5 * NK * Z)
    else:
        raise NotImplementedError("normalization={0}".format(normalization))
        

def P_single(Z, N, normalization='standard'):
    """Cumulative Probability for a single observation"""
    return 1 - FAP_single(Z, N, normalization=normalization)
        
    
def FAP_estimated(Z, N, fmax, t, normalization='standard'):
    """False Alarm Probability based on estimated number of indep frequencies"""
    T = max(t) - min(t)
    N_eff = fmax * T
    return 1 - P_single(Z, N, normalization=normalization) ** N_eff


def tau_davies(Z, N, fmax, t, y, dy, normalization='standard'):
    """tau factor for estimating Davies bound (see Baluev 2008, Table 1)"""
    # Variable names follow the discussion in Baluev 2008
    NH = N - 1  # DOF for null hypothesis
    NK = N - 3  # DOF for periodic hypothesis
    Dt = weighted_var(t, dy)
    Teff = np.sqrt(4 * np.pi * Dt)
    W = fmax * Teff
    if normalization == 'psd':
        return W * np.exp(-Z) * np.sqrt(Z)
    elif normalization == 'standard':
        # Note: astropy's standard normalization is Z = 2/NH * z_1 in Baluev's terms
        return gamma(NH) * W * (1 - Z) ** (0.5 * (NK - 1)) * np.sqrt(NH * Z / 2)
    elif normalization == 'model':
        # Note: astropy's model normalization is Z = 2/NK * z_2 in Baluev's terms
        return gamma(NK) * W * (1 + Z) ** (- 0.5 * NK) * np.sqrt(NK * Z / 2)
    elif normalization == 'log':
        # Note: astropy's log normalization is Z = 2/NK * z_3 in Baluev's terms
        return gamma(NK) * W * np.exp(-0.5 * Z * (NK - 0.5)) * np.sqrt(NK * np.sinh(0.5 * Z))
    else:
        raise NotImplementedError("normalization={0}".format(normalization))

    
def FAP_davies(Z, N, fmax, t, y, dy, normalization='standard'):
    """Davies bound (Eqn 5 of Baluev 2008)"""
    FAP_s = FAP_single(Z, N, normalization=normalization)
    tau = tau_davies(Z, N, fmax, t, y, dy, normalization=normalization)
    return FAP_s + tau


def FAP_aliasfree(Z, N, fmax, t, y, dy, normalization='standard'):
    """Alias-free approximation to FAP (Eqn 6 of Baluev 2008)"""
    P_s = P_single(Z, N, normalization=normalization)
    tau = tau_davies(Z, N, fmax, t, y, dy, normalization=normalization)
    return 1 - P_s * np.exp(-tau)

In [4]:
def FAP_bootstrap(Z, t, y, dy, fmax, n_bootstraps=1000,
                  random_seed=None, normalization='standard'):
    rng = np.random.RandomState(random_seed)
    
    def bootstrapped_power():
        resample = rng.randint(0, len(y), len(y))  # sample with replacement
        ls_boot = LombScargle(t, y[resample], dy[resample])
        freq, power = ls_boot.autopower(normalization=normalization,
                                        maximum_frequency=fmax)
        return power.max()
    
    pmax = np.array([bootstrapped_power() for i in range(n_bootstraps)])
    pmax.sort()
    
    return 1 - np.searchsorted(pmax, Z) / len(pmax)

In [5]:
import os
def cached(filename, overwrite=False):
    def decorator(func):
        def new_func(*args, **kwargs):
            if overwrite or not os.path.exists(filename):
                np.save(filename, func(*args, **kwargs))
            return np.load(filename)
        return new_func
    return decorator

In [6]:
Z = np.linspace(0, 0.3, 100)
fmax = 10
T = 5
N = 100
normalization='standard'

t, y, dy = make_data(N=N, T=T, amp=[0], err=1.0, rseed=1324)
FAP_bootstrap_cached = cached('FAP-cache.npy', overwrite=False)(FAP_bootstrap)
FAP_boot = FAP_bootstrap_cached(Z, t, y, dy, fmax=fmax, normalization=normalization,
                                n_bootstraps=10000, random_seed=1324)

t, y, dy = make_data(N=N, T=T, amp=[0], err=1.0, alias_level=0.8, rseed=1324)
FAP_bootstrap_cached = cached('FAP-alias-cache.npy', overwrite=False)(FAP_bootstrap)
FAP_boot_alias = FAP_bootstrap_cached(Z, t, y, dy, fmax=fmax, normalization=normalization,
                                      n_bootstraps=10000, random_seed=1324)

fig, ax = plt.subplots(figsize=(8, 5))
ax.semilogy(Z, FAP_boot, '-', color='black',
            label='Bootstrap, unstructured window')
ax.semilogy(Z, FAP_boot_alias, '-', color='gray',
            label='Bootstrap, structured window')
ax.semilogy(Z, FAP_aliasfree(Z, N, fmax, t, y, dy, normalization=normalization),
            '--k', label='Baluev method')
ax.semilogy(Z, FAP_estimated(Z, N, fmax, t, normalization=normalization),
            ':k', label='approx. $N_{eff}$ method')

ax.legend()

ax.set(xlim=(Z.min(), Z.max()), ylim=(1E-3, 2),
       xlabel='Observed Maximum Peak Value',
       ylabel='False Alarm Probability',
       title=r'100 observations over 5 days; $f_{max} = 10$ days$^{-1}$');

fig.savefig('fig27_FAP_bootstrap.pdf')