In [ ]:
# AAM additions
from __future__ import division, print_function, absolute_import

In [ ]:
%%bash

cd ./data/
tar -zxvf qpo.tar.gz

In [ ]:
import numpy as np
import math
#import cmath
from matplotlib import pyplot as plt
import pandas as pd
import astroML.time_series as ts
%matplotlib notebook
# the more familiar %matplotlib inline is not used to enable zoom functions for the plots within the notebook

Ex. 1. THE EFFECTS OF VARIATIONS IN THE TIME SAMPLING

1.1 Create arrays of time cadences as follows:

(a) 90 days of observations with a precise daily cadence; (b) for 90 days, but with 4 times per day evenly. (c) for four times 90 days, with a precise daily cadence;

Compute the spectral windows for all time samplings (a), (b) and (c) at a dense grid ("oversampled" grid), consisting of ten times more test frequencies as number of data. Plot them.

What symmetries do you see? What are the Fourier frequencies of each time sampling? What is the difference between the Fourier-sampled and the oversampled spectral window in (a)? What happens to the (oversampled) spectral windows when you densify the sampling, but keep the total observation time span at 90 days (b)? What happens to them when you keep the time sampling unchanged, but extend the observations for a year (c)?

Simulate one sinusoid with frequency 0.25 cycle/day and one with some uglier frequency between 0 and 1 (not falling on a Fourier frequency). Compute the periodograms of both signals on an oversampled frequency grid. What do you see in the oversampled periodogram, and what at the Fourier frequencies?


In [ ]:
# Oversampling factor: we would like to see spectral windows
# and periodograms in more detail than just at the Fourier frequencies
oversampling = 10

truefreq = 0.2284271247

# Time sampling (a):
ts = np.linspace(start = 1, stop = 90, num = 90)
df1 = pd.DataFrame({'time': ts,
                   'cst': np.ones(90),
                   'sin1': np.sin(2 * math.pi * truefreq * ts),
                   'sin2': np.sin(2 * math.pi * (71./90.) * ts)})
F_freqs1 = np.linspace(start = 0, stop = 90, num = 91) / 90
freqs1 = np.linspace(start = 0, stop = 2*90, num = oversampling*2*90+1) / 90

# Time sampling (b):
ts = np.linspace(start = 0.25, stop = 90, num = 4*90)
df2 = pd.DataFrame({'time': ts,
                   'cst': np.ones(len(ts)),
                   'sin1': np.sin(2 * math.pi * truefreq * ts),
                   'sin2': np.sin(2 * math.pi * (71./90.) * ts)})
F_freqs2 = np.linspace(start = 0, stop = 4*90, num = 4*90+1) / 90
freqs2 = np.linspace(start = 0, stop = 2*4*90, num = oversampling*2*4*90+1) / 90

# Time sampling (c):
ts = np.linspace(start = 1, stop = 4*90, num = 4*90)
df3 = pd.DataFrame({'time': ts,
                   'cst': np.ones(len(ts)),
                   'sin1': np.sin(2 * math.pi * truefreq * ts),
                   'sin2': np.sin(2 * math.pi * (71./90.) * ts)})
F_freqs3 = np.linspace(start = 0, stop = 4*90, num = 4*90+1) / (4*90)
freqs3 = np.linspace(start = 0, stop = 2*4*90, num = oversampling*2*4*90+1) / (4*90)

In [ ]:
# Define the Deeming function (fft gives only values at the Fourier frequencies,
# and is applicable only for even sampling.)
def deemingPSD_fun(times, obs, freqs):
    psd_tmp = np.ones(len(freqs))
    for jj in np.arange(start = 0, stop = len(freqs)):
        freq = freqs[jj]
        v_tmp = 2.0 * math.pi * freq * times
        s_tmp = np.dot(obs, np.sin(v_tmp))
        c_tmp = np.dot(obs, np.cos(v_tmp))
        psd_tmp[jj] = (s_tmp**2 + c_tmp**2) / len(times)**2
    return psd_tmp

In [ ]:
spw1 = deemingPSD_fun(times = df1['time'], obs = df1['cst'], freqs = freqs1)
psd11 = deemingPSD_fun(times = df1['time'], obs = df1['sin1'], freqs = freqs1)
psd12 = deemingPSD_fun(times = df1['time'], obs = df1['sin2'], freqs = freqs1)
spw2 = deemingPSD_fun(times = df2['time'], obs = df2['cst'], freqs = freqs2)
psd21 = deemingPSD_fun(times = df2['time'], obs = df2['sin1'], freqs = freqs2)
psd22 = deemingPSD_fun(times = df2['time'], obs = df2['sin2'], freqs = freqs2)
spw3 = deemingPSD_fun(times = df3['time'], obs = df3['cst'], freqs = freqs3)
psd31 = deemingPSD_fun(times = df3['time'], obs = df3['sin1'], freqs = freqs3)
psd32 = deemingPSD_fun(times = df3['time'], obs = df3['sin2'], freqs = freqs3)

Visualize first the three spectral windows. What are the differences between them? After inspecting it on the whole test frequency range, enlarge it (remove the comments from the last two lines).


In [ ]:
plt.rcParams["figure.figsize"] = [12.0, 9.0]
fig = plt.figure(1)
plt.subplots_adjust(left=0.05, bottom=0.05, right=0.95, top=0.95, wspace=None, hspace=0.35)
ax1 = fig.add_subplot(311)
plt.plot(freqs1, spw1, 'k', alpha=1)
plt.title('90 days, sampling 1/d')
plt.xlabel('Frequency [1/d]')
plt.ylabel('Sp.win.')
plt.margins(.01, .05)
#plt.vlines(F_freqs1[F_freqs1 < 0.1], ymin = 0, ymax = 1, color='r', linestyle='solid')
#plt.xlim(0., .1)

ax2 = fig.add_subplot(312)
plt.plot(freqs2, spw2, 'k', alpha=1)
plt.title('90 days, sampling 4/d')
plt.xlabel('Frequency [1/d]')
plt.ylabel('Sp.win.')
plt.margins(.01, .05)
#plt.vlines(F_freqs2[F_freqs2 < 0.1], ymin = 0, ymax = 1, color='r', linestyle='solid')
#plt.xlim(0., .1)

ax3 = fig.add_subplot(313)
plt.plot(freqs3, spw3, 'k', alpha=1)
plt.title('360 days, sampling 1/d')
plt.xlabel('Frequency [1/d]')
plt.ylabel('Sp.win.')
plt.margins(.01, .05)
#plt.vlines(F_freqs3[F_freqs3 < 0.1], ymin = 0, ymax = 1, color='r', linestyle='solid')
#plt.xlim(0., .1)

plt.show()

Consider now the sinusoid at the Fourier frequency. Again, enlarge it around its true frequency. What would be observed if computed only at the Fourier frequencies? Next replace ths sinusoid with the other one (stored in psd11, psd21, psd31) to see what could be seen at the Fourier frequencies.


In [ ]:
fig = plt.figure(1)
plt.subplots_adjust(left=0.05, bottom=0.05, right=0.95, top=0.95, wspace=None, hspace=0.35)

ax1 = fig.add_subplot(311)
plt.plot(freqs1, psd12, 'k', alpha=1)
plt.title('90 days, sampling 1/d')
plt.xlabel('Frequency [1/d]')
plt.ylabel('PSD')
plt.margins(0.01, 0.05)
#plt.vlines(F_freqs1, ymin = 0, ymax = 1, color='r', linestyle='solid')
#plt.xlim(0.5, 1)
plt.axvline(F_freqs1[71], ymin = 0, ymax = 1, color='c', linestyle='dashed')

ax2 = fig.add_subplot(312)
plt.plot(freqs2, psd22, 'k', alpha=1)
plt.title('90 days, sampling 4/d')
plt.xlabel('Frequency [1/d]')
plt.ylabel('PSD')
plt.margins(0.01, 0.05)
#plt.vlines(F_freqs2, ymin = 0, ymax = 1, color='r', linestyle='solid')
#plt.xlim(0.5, 1)
plt.axvline(F_freqs2[71], ymin = 0, ymax = 1, color='c', linestyle='dashed')

ax3 = fig.add_subplot(313)
plt.plot(freqs3, psd32, 'k', alpha=1)
plt.title('360 days, sampling 1/d')
plt.xlabel('Frequency [1/d]')
plt.ylabel('PSD')
plt.margins(0.01, 0.05)
#plt.vlines(F_freqs3, ymin = 0, ymax = 1, color='r', linestyle='solid')
#plt.xlim(0.5, 1)
plt.axvline(F_freqs3[284], ymin = 0, ymax = 1, color='c', linestyle='dashed')

plt.show()

TAKE-HOME MESSAGE:

  1. In an evenly-sampled case, the time intervals between the observations determine fmax, the maximum detectable frequency in the periodogram.
  2. The span of observations, together with the number of observations, determine the frequency resolution df/fmax of the resulting periodogram (this is equivalent to the separation between the Fourier frequencies).

Ex. 1.2 MORE COMPLEX REGULARITIES (DAILY RYTHM)

Take time sampling (c), and drop every halfday (keep the first 2 obs of every four). Compute the spectral window again. Take a look also what happens at the Fourier frequencies. Compute also the periodograms of the signals.


In [ ]:
drop_half_ind = (np.arange(360) % 4 < 2)
half_df = df3[drop_half_ind]

In [ ]:
half_spw3 = deemingPSD_fun(times = half_df['time'], obs = half_df['cst'], freqs = freqs3)
half_psd31 = deemingPSD_fun(times = half_df['time'], obs = half_df['sin1'], freqs = freqs3)
half_psd32 = deemingPSD_fun(times = half_df['time'], obs = half_df['sin2'], freqs = freqs3)

In [ ]:
fig = plt.figure(1)
plt.subplots_adjust(left=0.05, bottom=0.05, right=0.95, top=0.95, wspace=None, hspace=0.35)

ax1 = fig.add_subplot(311)
plt.plot(freqs3, half_spw3, 'k', alpha=1)
plt.title('90 days, sampling 4/d with 50% missing data \n Spectral window')
plt.xlabel('Frequency [1/d]')
plt.ylabel('Spectral window')
plt.margins(0.01, 0.05)
#plt.vlines(F_freqs3, ymin = 0, ymax = 1, color='r', linestyle='solid')
#plt.xlim(0.95,1.05)

ax2 = fig.add_subplot(312)
plt.plot(freqs3, half_psd32, 'k', alpha=1)
plt.title('Sine at Fourier frequency')
plt.xlabel('Frequency [1/d]')
plt.ylabel('PSD')
plt.margins(0.01, 0.05)
#plt.vlines(F_freqs2, ymin = 0, ymax = 1, color='r', linestyle='solid')
#plt.xlim(71./90.-.05, 71./90.+.05)
#plt.ylim(0.,0.3)
plt.axvline(71./90., ymin = 0, ymax = 1, color='c', linestyle='dashed')

ax3 = fig.add_subplot(313)
plt.plot(freqs3, half_psd31, 'k', alpha=1)
plt.title('Sine, F = 0.2284')
plt.xlabel('Frequency [1/d]')
plt.ylabel('PSD')
plt.margins(0.01, 0.05)
#plt.vlines(F_freqs3, ymin = 0, ymax = 1, color='r', linestyle='solid')
#plt.xlim(truefreq-.05, truefreq+.05)
#plt.ylim(0.,0.3)
plt.axvline(truefreq, ymin = 0, ymax = 1, color='c', linestyle='dashed')

plt.show()

TAKE-HOME MESSAGE: Any (quasi-)periodicity imposed on the time sampling will show up as additional peaks in the spectral window.

1.3 UNEVEN SAMPLING

Now randomly drop half of the dataframe rows, and add a random element to each time (uniformly sampled from the interval (-0.1,0.1). See what happens with the spectral window and the periodograms.


In [ ]:
rnd_df = half_df.sample(frac = 0.5).sort_values(axis = 0, by = 'time')
rnd_df['time'] = rnd_df['time'] + np.random.uniform(-0.1,0.1, size = 90)

In [ ]:
rnd_spw2 = deemingPSD_fun(times = rnd_df['time'], obs = rnd_df['cst'], freqs = freqs2)
rnd_psd21 = deemingPSD_fun(times = rnd_df['time'], obs = rnd_df['sin1'], freqs = freqs2)
rnd_psd22 = deemingPSD_fun(times = rnd_df['time'], obs = rnd_df['sin2'], freqs = freqs2)

Note the data does not contain any noise as yet: these periodograms are the pure consequence of the funny time sampling.

Ex. 2. PERIOD SEARCH ON REAL DATA

2.1 PERIODOGRAM OF DIFFERENT VARIABLES

The time series of a Cepheid and two eclipsing binaries from OGLE-III, a quasar from LINEAR, and an X-ray binary are given in the csv files. Compute the periodograms for each. Compare and analyse the results, by plotting the folded light curves.

Note %> pip install astroML_addons may be required to ensure that the LS code below is not super slow.


In [ ]:
from astroML.time_series import lomb_scargle

In [ ]:
cep1 = pd.read_csv("./data/OGLE-LMC-CEP-0727.csv")
ecl1 = pd.read_csv("./data/OGLE-SMC-ECL-0322.csv")
ecl2 = pd.read_csv("./data/OGLE-SMC-ECL-0124.csv")
qso = pd.read_csv("./data/q2803474.csv")
## Periods: 
p_cep1 = 14.4891397
p_ecl1 = 71.6113338
p_ecl2 = 0.3270255
p_qso = 10**(-0.00138752348664116)

In [ ]:
## The resolution computed from the timespan of the observations:
def fgrid_fun(times, ofac, fmax):
    r_tmp = 1 / (times.max() - times.min()) / ofac 
    n_tmp = np.floor(fmax/r_tmp)
    return np.linspace(r_tmp, fmax, n_tmp)

In [ ]:
# The frequency grids for each object 
fgrid_cep1 = fgrid_fun(cep1['time'], ofac = 10, fmax = 3) 
fgrid_ecl1 = fgrid_fun(ecl1['time'], ofac = 10, fmax = 3)
fgrid_ecl2 = fgrid_fun(ecl2['time'], ofac = 10, fmax = 10)
fgrid_qso = fgrid_fun(qso['time'], ofac = 10, fmax = 6)

In [ ]:
pgram_cep1 = lomb_scargle(t = cep1['time'], y = cep1['mag'], dy = cep1['mag.error'], omega = 2*np.pi*fgrid_cep1, generalized = True)
pgram_ecl1 = lomb_scargle(t = ecl1['time'], y = ecl1['mag'], dy = ecl1['mag.error'], omega = 2*np.pi*fgrid_ecl1, generalized = True)
pgram_ecl2 = lomb_scargle(t = ecl2['time'], y = ecl2['mag'], dy = ecl2['mag.error'], omega = 2*np.pi*fgrid_ecl2, generalized = True)
pgram_qso = lomb_scargle(t = qso['time'], y = qso['mag'], dy = qso['mag.error'], omega = 2*np.pi*fgrid_qso, generalized = True)

In [ ]:
plt.rcParams["figure.figsize"] = [16., 8.]
fig = plt.figure(1)
plt.subplots_adjust(left=0.05, bottom=0.05, right=0.95, top=0.95, wspace=None, hspace=0.35)

fig.add_subplot(221)
plt.plot(fgrid_cep1, pgram_cep1, 'k', alpha=1)
plt.title('Cepheid')
plt.xlabel('Frequency [1/d]')
plt.ylabel('Periodogram')
plt.margins(0.01, 0.05)
plt.axvline(1/p_cep1, ymin = 0, ymax = 0.1, color='c', lw = 2)

fig.add_subplot(222)
plt.plot(fgrid_ecl1, pgram_ecl1, 'k', alpha=1)
plt.title('Detached(?) binary')
plt.xlabel('Frequency [1/d]')
plt.ylabel('Periodogram')
plt.margins(0.01, 0.05)
plt.axvline(1/p_ecl1, ymin = 0, ymax = 0.1, color='c', lw = 2)

fig.add_subplot(223)
plt.plot(fgrid_ecl2, pgram_ecl2, 'k', alpha=1)
plt.title('Contact binary')
plt.xlabel('Frequency [1/d]')
plt.ylabel('Periodogram')
plt.margins(0.01, 0.05)
plt.axvline(1/p_ecl2, ymin = 0, ymax = 0.1, color='c', lw = 2)

fig.add_subplot(224)
plt.plot(fgrid_qso, pgram_qso, 'k', alpha=1)
plt.title('Quasar candidate')
plt.xlabel('Frequency [1/d]')
plt.ylabel('Periodogram')
plt.margins(0.01, 0.05)
plt.axvline(1/p_qso, ymin = 0, ymax = 0.1, color='c', lw = 2)

plt.show()

For the Cepheid, everything looks all right. For the quasar, the found frequency depended on the definition of the minimal frequency searched: here, the lowest frequency of the grid is the grid resolution, whereas with the pipeline that produced the results on the quasar, it was 1/timespan of observations (that is, oversampling factor times the grid resolution). The two eclipsing binaries exhibit the limitation of the generalized Lomb-Scargle method, when applied to light curves that are nonsinusoidal. The eclipsing binaries typically have two minima in one cycle, and thus, the GLS finds the double frequency (the half-period), which produces one single minimum in one cycle.

Fold the light curves of the two eclipsing binaries to take a look. In the file, there is a column 'phase' giving the phases according to the catalog period, so we have to compute only those according to our period estimate.


In [ ]:
# Compute the period corresponding to the maximum of the periodogram:
p_est_ecl1 = 1 / fgrid_ecl1[pgram_ecl1 == pgram_ecl1.max()]
p_est_ecl2 = 1 / fgrid_ecl2[pgram_ecl2 == pgram_ecl2.max()]
# Check that it is indeed roughly the half of the catalog period:
print(p_est_ecl1 *2)
print(p_ecl1)

In [ ]:
# Compute  the phases and add them as column to the dataframes:
ecl1['phase2'] = (ecl1['time'] % p_est_ecl1) / p_est_ecl1
ecl2['phase2'] = (ecl2['time'] % p_est_ecl2) / p_est_ecl2

In [ ]:
# plot:
plt.rcParams["figure.figsize"] = [12., 8.]
fig = plt.figure()
plt.subplots_adjust(left=0.05, bottom=0.1, right=0.95, top=0.95, wspace=None, hspace=0.35)

ax = fig.add_subplot(221)
plt.gca().invert_yaxis()
ax.errorbar(ecl1['phase2'], ecl1['mag'], ecl1['mag.error'], fmt='.k', ecolor='gray')
plt.title("ecl1, GLS period")
plt.xlabel('Phase')
plt.ylabel('Magnitude')

ax = fig.add_subplot(222)
plt.gca().invert_yaxis()
ax.errorbar(ecl1['phase'], ecl1['mag'], ecl1['mag.error'], fmt='.k', ecolor='gray')
plt.title("ecl1, catalog period")
plt.xlabel('Phase')
plt.ylabel('Magnitude')

ax = fig.add_subplot(223)
plt.gca().invert_yaxis()
ax.errorbar(ecl2['phase2'], ecl2['mag'], ecl2['mag.error'], fmt='.k', ecolor='gray')
plt.title("ecl2, GLS period")
plt.xlabel('Phase')
plt.ylabel('Magnitude')

ax = fig.add_subplot(224)
plt.gca().invert_yaxis()
ax.errorbar(ecl2['phase'], ecl2['mag'], ecl2['mag.error'], fmt='.k', ecolor='gray')
plt.title("ecl2, catalog period")
plt.xlabel('Phase')
plt.ylabel('Magnitude')

plt.show()

2.2 OPTIONAL: MULTITERM PERIODOGRAMS

We can try to improve the chances to find the correct period of the eclipsing binaries using a periodogram type that fits a harmonic model up to a pre-defined term at each frequency. Such a procedure is implemented in the astroML package's multiterm_periodogram procedure. Try to fit a 6-term periodogram to the two eclipsing binaries.


In [ ]:
from astroML.time_series import multiterm_periodogram

In [ ]:
fgrid_short_ecl1 = fgrid_ecl1[fgrid_ecl1 < 4.]
fgrid_short_ecl2 = fgrid_ecl2[(fgrid_ecl2 < 8.)]

In [ ]:
pgram6_ecl1 = multiterm_periodogram(t = ecl1['time'], y = ecl1['mag'], dy = ecl1['mag.error'], omega = 2*np.pi*fgrid_short_ecl1, n_terms = 6)
pgram6_ecl2 = multiterm_periodogram(t = ecl2['time'], y = ecl2['mag'], dy = ecl2['mag.error'], omega = 2*np.pi*fgrid_short_ecl2, n_terms = 6)

In [ ]:
p_est6_ecl1 = 1 / fgrid_short_ecl1[pgram6_ecl1 == pgram6_ecl1.max()]
print(p_est6_ecl1)
print(p_est_ecl1)

The multiterm periodogram found the right period in both cases. The reason is the presence of the even harmonics in the fitted light curve. While a sinusoid can have only one minimum within a period, harmonic 2 have double frequency (half the period), and therefore reproduce the two minima. Other period search methods, which do not fit sinusoids, can also perform better than the generalized Lomb-Scargle. Take a look at the periodograms. Use the zoom-in to enlarge the location of the true frequency.


In [ ]:
plt.rcParams["figure.figsize"] = [12., 8.]
fig = plt.figure(1)
plt.subplots_adjust(left=0.05, bottom=0.05, right=0.95, top=0.95, wspace=None, hspace=0.35)

fig.add_subplot(211)
plt.plot(fgrid_ecl1, pgram_ecl1, 'k', alpha=1, label = "1 term")
plt.plot(fgrid_short_ecl1, pgram6_ecl1, 'r', alpha=1, linestyle = "dashed", label = "6 terms")
plt.title('Detached(?) binary')
plt.xlabel('Frequency [1/d]')
plt.ylabel('Periodogram')
plt.margins(0.01, 0.05)
plt.xlim(-0.1,4.)
plt.axvline(1/p_ecl1, ymin = 0, ymax = 0.1, color='c', lw = 2)
plt.legend(loc = "best")

fig.add_subplot(212)
plt.plot(fgrid_ecl2, pgram_ecl2, 'k', alpha=1, label = "1 term")
plt.plot(fgrid_short_ecl2, pgram6_ecl2, 'r', alpha=1, linestyle = "dashed", label = "6 terms")
plt.title('Contact binary')
plt.xlabel('Frequency [1/d]')
plt.ylabel('Periodogram')
plt.margins(0.01, 0.05)
plt.xlim(-0.1,8.)
plt.axvline(1/p_ecl2, ymin = 0, ymax = 0.1, color='c', lw = 2)
plt.legend(loc = "best")

plt.show()

OPTIONAL: PRE-WHITENING AND SECONDARY PERIOD SEARCH

Pre-whitening is the procedure by which a found periodicity is removed from the light curve, in order to look for further frequencies. Very often, the light curves are not just sinusoids, so we have to find a good approximate model with the found period to fit. Such models can be a pair of Gaussians (for eclipsing binaries), Keplerian orbits (for radial velocity signals of planets), or simply harmonic models as we fitted the eclipsing binaries. We take now another Cepheid, pre-whiten it, and check whether it has a secondary period.


In [ ]:
import statsmodels.formula.api as smf

In [ ]:
cep2 = pd.read_csv("./data/OGLE-SMC-CEP-1211.csv")
# Catalog period of the star in days:
p_cep2 = 9.5311467

In [ ]:
# We'll fit models up to 10 harmonics with the catalog periodicity (normally, the  
# found period should be used), and decide by the BIC which is best fitting. The harmonic
# models in the form a + sum_k (b_k * sin( omega * t)) + sum_k (c_k * sin( omega * t))
# are linear in their parameters, so we use linear least squares.
# Create a dataframe with all the necessary variables for the linear least squares fit:
for jj in np.arange(10):
    cep2['s'+str(jj+1)] = np.sin(2*np.pi*cep2['time']*(jj+1)/p_cep2)
    cep2['c'+str(jj+1)] = np.cos(2*np.pi*cep2['time']*(jj+1)/p_cep2)

In [ ]:
bic_cep2 = np.zeros(10)
for jj in np.arange(10):
    # We first create the formula string that the least squares fit takes as input
    # (check the string that is created eg. by adding print formula_tmp):
    formula_tmp = 'mag ~ ' + '+'.join(cep2.columns[4:(2*(jj+1)+4)])
    # ... then fit the models and extract the BIC to decide which model is the best:
    bic_cep2[jj] = smf.ols(formula_tmp, data = cep2).fit().bic
# Check the array of BICs: which is minimal?
bic_cep2

In [ ]:
# Repeat the best model, and extract the residuals:
formula_tmp = 'mag ~ ' + '+'.join(cep2.columns[4:(2*(4+1)+4)])
cep2['resid'] = smf.ols(formula_tmp, data = cep2).fit().resid

In [ ]:
# Perform period search:
fgrid_cep2 = fgrid_fun(cep2['time'], ofac = 10, fmax = 10) 
pgram_resid_cep2 = lomb_scargle(t = cep2['time'], y = cep2['resid'], dy = cep2['mag.error'], omega = 2*np.pi*fgrid_cep2, generalized = True)

In [ ]:
plt.rcParams["figure.figsize"] = [12., 4.]
fig = plt.figure(1)
plt.subplots_adjust(left=0.05, bottom=0.05, right=0.95, top=0.95, wspace=None, hspace=None)

plt.plot(fgrid_cep2, pgram_resid_cep2, 'k', alpha=1)
plt.title('Cepheid 2')
plt.xlabel('Frequency [1/d]')
plt.ylabel('Periodogram')
plt.margins(0.01, 0.05)
plt.axvline(1/p_cep2, ymin = 0, ymax = 1, color='c', lw = 2, linestyle = "dashed")

plt.show()

Ex. 3. TIME-RESOLVED ANALYSIS

Ex. 3.1 A PERIOD-CHANGING OGLE CEPHEID

Do a time-resolved analysis of the former period-changing Cepheid in an interval around its average frequency, and represent it as a color map (time & frequency) (in frequency: between 0.1 and 0.11 c/d, using a window of 2 years).


In [ ]:
print(cep2['time'].max())
print(cep2['time'].min())
centers_tmp = np.linspace(470, 4950, num = 449)

In [ ]:
fgrid_short_cep2 = np.linspace(0.1, 0.11, 4000)
trsvd_cep2 = np.zeros([len(fgrid_short_cep2), len(centers_tmp)])
for ii in np.arange(0,len(centers_tmp)):
    df = cep2[(cep2['time'] < centers_tmp[ii] + 365) & (cep2['time'] > centers_tmp[ii] - 365)]
    trsvd_cep2[:,ii] = lomb_scargle(t = df['time'], y = df['mag'], dy = df['mag.error'], omega = 2*np.pi*fgrid_short_cep2, generalized = True)

In [ ]:
# Find the candidate frequencies in each window (where the periodogram takes its maximum value):
bestfr_vs_time = np.zeros(len(centers_tmp))
for jj in np.arange(len(centers_tmp)):
    bestfr_vs_time[jj] = fgrid_short_cep2[trsvd_cep2[:,jj] == trsvd_cep2[:,jj].max()]

In [ ]:
# Plot the spectrogram, and add the constant catalog frequency and the best local frequencies from the sliding windows:
plt.rcParams["figure.figsize"] = [8., 8.]
fig = plt.figure()

plt.imshow(trsvd_cep2, origin = 'lower', aspect = 'auto', 
           extent = [centers_tmp[0], centers_tmp[-1], fgrid_short_cep2[0], fgrid_short_cep2[-1]])
plt.title('Cepheid 3')
plt.ylabel('Frequency [1/d]')
plt.xlabel('Time [d]')
plt.margins(0.01, 0.05)
plt.axhline(1/p_cep2, xmin = 0, xmax = 1, color='w')
plt.plot(cep2['time'], [0.1]*len(cep2['time']), '|', color='k')
plt.plot(centers_tmp, bestfr_vs_time, 'k')

plt.show()

The time-resolved sliding window periodogram shows well the period change of the Cepheid.

3.2 HIGH-FREQUENCY QUASI-PERIODIC OSCILLATION OF AN X-RAY BINARY

High-frequency QPOs, broad humps concentrated around 1 kHz in the periodogram of the X-ray binary, are exciting and not well understood phenomena of X-ray binaries. Low-frequency QPOs may be related to Lense-Thirring precession of the inner parts of the accretion disk around the compact component, which produces strong X-ray radiation. For the high-frequency QPOs, no such commonly accepted explanations exist. Their characteristic frequency, the center of the hump, tends to move around, and the QPO may sometimes even disappear for some time. This is best to follow by some time-resolved methods. The data set consists of photon counts, binned in 2^(-11) s long time intervals. The record was very long (almost an hour), so we have an equally-spaced time series of almost 7 million counts. The binning was selected in a way that the sampling is dense enough to find such a high frequency in the spectrum.

First go: we use sliding window spectrograms from matplotlib.mlab.


In [ ]:
from scipy import fftpack
from matplotlib.mlab import specgram
from astroML.fourier import wavelet_PSD
from scipy import ndimage

In [ ]:
qpo = pd.read_csv("./data/qpo.csv")

In [ ]:
# The values are photon counts in 2**(-11) s long intervals.
dt = 2**(-11)
# The times of the flux values thus:
times = np.arange(0, np.shape(qpo)[0]*dt, dt, dtype = float)
#qpo_slices = np.linspace(qpo['t'].min(), qpo['t'].max(), 400)
# We wish to compute the PSD in 8 s long windows. The time limits for selection:
qpo_slices = np.arange(0, 426*8, 8, dtype = float) 
# The centers of the slices (for plotting):
center_times = np.arange(0, 425*8, 8, dtype = float)+4
# Spacing of the Fourier frequencies in a 8 s window:
df = 1. / 8
# the Fourier frequencies:
ffreqs = df * np.arange(16384)

In [ ]:
psd_specgram, freqs_specgram, centers_specgram = specgram(qpo['flux'],
            NFFT = 16384, Fs=2**11, noverlap=16384/2)

Plot the spectrogram (leaving out the high values of the red noise at low frequencies):


In [ ]:
plt.rcParams["figure.figsize"] = [8., 8.]
fig = plt.figure()
plt.imshow(psd_specgram[20:,:], origin = 'lower', aspect = 'auto', 
           extent = [centers_specgram[0], centers_specgram[-1], freqs_specgram[20], freqs_specgram[-1]])
plt.title('Raw PSD')
plt.ylabel('Frequency [Hz]')
plt.xlabel('Time [s]')
plt.margins(0.01, 0.05)

plt.show()

When omitting the low-frequency terms around f = 0 in the periodograms, the plot of the spectrogram would show just predominant noise. We know that the periodogram is an inconsistent estimator of the spectral density (that's because its values are distributed exponentially, and for the exponential distribution, its variance is equal to the square of its mean). With a moving average or a kernel smoother, we can make it a consistent one. Also, our time slices might have been too short, and therefore, the periodograms have too high variance. Therefore we do a 2D smoothing.


In [ ]:
psd_specgram_smoo = ndimage.filters.gaussian_filter(psd_specgram, [8,8], mode='constant', cval = 0)

In [ ]:
plt.rcParams["figure.figsize"] = [16., 8.]
fig = plt.figure()
plt.subplots_adjust(left=0.05, bottom=0.05, right=0.95, top=0.95, wspace=None, hspace=0.35)

fig.add_subplot(121)
plt.imshow(psd_specgram[20:,:], origin = 'lower', aspect = 'auto', 
           extent = [centers_specgram[0], centers_specgram[-1], freqs_specgram[20], freqs_specgram[-1]])
plt.title('Raw PSD')
plt.ylabel('Frequency [Hz]')
plt.xlabel('Time [s]')
plt.margins(0.01, 0.05)

fig.add_subplot(122)
plt.imshow(psd_specgram_smoo[40:,:], origin = 'lower', aspect = 'auto', 
           extent = [centers_specgram[0], centers_specgram[-1], freqs_specgram[40], freqs_specgram[-1]])
plt.title('Smoothed PSD')
plt.ylabel('Frequency [Hz]')
plt.xlabel('Time [s]')
plt.margins(0.01, 0.05)

plt.show()

The smoothing in frequency domain achieved that the periodograms are now consistent estimators of the spectral density of the true signal in the background. The smoothing in time-domain averaged (with Gaussian weights, so putting more weight on the central time) the close-by periodograms, emphasising those frequencies that are more persistently present in the spectrum. The QPO is now clearly emerging from the background.

However, the time-domain smoothing implies a loss of temporal resolution. Take a look at the QPO using wavelets now. The dataset is big, so choose (based on the previous exercise) a frequency window and a time window such that you can expect to find the QPO. Since we don't know the appropriate scale Q, it is necessary to try a few values.


In [ ]:
Q = 2000
freq_wavelet = np.arange(800,900, 0.25)
qpo_waveletpsd = wavelet_PSD(t = times[0:200000], h = qpo['flux'][0:200000], 
           f0 = freq_wavelet, Q = Q)

In [ ]:
plt.rcParams["figure.figsize"] = [8., 8.]
fig = plt.figure(1)

plt.imshow(qpo_waveletpsd, origin = 'lower', aspect = 'auto', 
           extent = [times[0], times[200000], freq_wavelet[0], freq_wavelet[-1]])
plt.title('Wavelet PSD')
plt.ylabel('Frequency [Hz]')
plt.xlabel('Time [s]')
plt.margins(0.01, 0.05)

plt.show()

EX. 4. GAUSSIAN PROCESSES

Ex. 4.1. SIMULATION

Three Gaussian Process covariance kernels are implemented in the following cell. The parameter h for each means the decay rate of the correlation between observations. For the quasi-periodic kernel, this is multiplied by a periodic term (added in the exponent): the correlation is periodically strengthens and weakens.


In [ ]:
from sklearn.gaussian_process import GaussianProcess

In [ ]:
## Define the three kernel functions we'll use:
def exponential(x1, x2, h):
    return np.exp(-0.5 * np.abs(x1 - x2) / h)
def squared_exponential(x1, x2, h):
    return np.exp(-0.5 * (x1 - x2) ** 2 / h ** 2)
def quasi_periodic(x1, x2, h, l, nu):
    return np.exp(-0.5 * (x1 - x2) ** 2 / h ** 2 - 0.5 * np.sin(2*np.pi*nu*(x1 - x2)) ** 2 / l)

In [ ]:
x = np.linspace(52610, 54625, 115)
h = 50.0
l = 0.1
nu = 0.1

mu = np.zeros(len(x))

Cse = squared_exponential(x, x[:, None], h)
Ce = exponential(x, x[:, None], h)
Cq = quasi_periodic(x, x[:, None], h = h, l = l, nu = nu)

In [ ]:
qper_rdvar = np.random.multivariate_normal(mu, Cq, 3)
ex_rdvar = np.random.multivariate_normal(mu, Ce, 3)
sqex_rdvar = np.random.multivariate_normal(mu, Cse, 3)

In [ ]:
plt.rcParams["figure.figsize"] = [5., 10.]
fig = plt.figure(1)
plt.subplots_adjust(left=0.07, bottom=0.05, right=0.95, top=0.95, wspace=None, hspace=0.35)

fig.add_subplot(311)
for ii in [0,1,2]:
    plt.plot(x, ex_rdvar[ii], 'k', alpha=1)
plt.title('Exponential')
plt.xlabel('x')
plt.ylabel('GP draw')
plt.margins(0.01, 0.05)

fig.add_subplot(312)
for ii in [0,1,2]:
    plt.plot(x, sqex_rdvar[ii], 'k', alpha=1)
plt.title('Squared exponential')
plt.xlabel('x')
plt.ylabel('GP draw')
plt.margins(0.01, 0.05)
#plt.xlim(-0.1,4.)

fig.add_subplot(313)
for ii in [0,1,2]:
    plt.plot(x, qper_rdvar[ii], 'k', alpha=1)
plt.title('Quasi-periodic')
plt.xlabel('x')
plt.ylabel('GP draw')
plt.margins(0.01, 0.05)

plt.show()

Ex. 4.2. CANDIDATE QUASAR FROM LINEAR

The Ornstein-Uhlenbeck process can be applied to fit quasar data. We will use the fact that the OU process is a Gaussian process, and apply Gaussian process modelling for its light curve, both with the squared exponential and the exponential kernel.


In [ ]:
qso = pd.read_csv("./data/q2803474.csv")

In [ ]:
qso_gp_sqex1 = GaussianProcess(corr='squared_exponential', theta0=0.5, thetaL = 0.001, thetaU = 100)
qso_gp_ex1 = GaussianProcess(corr='absolute_exponential', theta0=0.5, thetaL = 0.001, thetaU = 100)

In [ ]:
qso_gp_sqex1.fit(qso['time'].reshape(-1,1), qso['mag'].reshape(-1,1))

The above fit failed, because the given type of process could not produce light curves passing through each point. The observations are strongly grouped: several were taken during one night, with long gaps between the clusters. Adding the nugget effect will help.


In [ ]:
m_var = np.mean(qso['mag.error'].reshape(-1,1)**2)
qso_gp_sqex1 = GaussianProcess(corr='squared_exponential', theta0=100, thetaL = 1, thetaU = 1000,
                              nugget = m_var)
qso_gp_ex1 = GaussianProcess(corr='absolute_exponential', theta0=100, thetaL = 0.001, thetaU = 1000,
                              nugget = m_var)

In [ ]:
# Fit the model:
qso_gp_sqex1.fit(qso['time'].reshape(-1,1), qso['mag'].reshape(-1,1))
qso_gp_ex1.fit(qso['time'].reshape(-1,1), qso['mag'].reshape(-1,1))

We can take a look at various parameters of the model (the scale parameter, the residual scatter, and the R2, for example).


In [ ]:
print("Decay parameter in the squared exp. model:", qso_gp_sqex1.theta_)
print("Decay parameter in the absolute exp. model:", qso_gp_ex1.theta_)

In [ ]:
print("Variance in the squared exp. model:", qso_gp_sqex1.sigma2)
print("Variance in the absolute exp. model:", qso_gp_ex1.sigma2)

In [ ]:
print("R2 in the squared exp. model:", qso_gp_sqex1.score(qso['time'].reshape(-1,1), qso['mag'].reshape(-1,1)))
print("R2 in the absolute exp. model:",  qso_gp_ex1.score(qso['time'].reshape(-1,1), qso['mag'].reshape(-1,1)))

Finally, we can plot the model with its confidence band.


In [ ]:
tt = np.linspace(qso['time'].min(), qso['time'].max(), 500)
qso_gp_sqex1_pred, qso_gp_sqex1_mse = qso_gp_sqex1.predict(tt.reshape(-1,1), eval_MSE=True)
qso_gp_ex1_pred, qso_gp_ex1_mse = qso_gp_ex1.predict(tt.reshape(-1,1), eval_MSE=True)

In [ ]:
plt.rcParams["figure.figsize"] = [16., 5.]
fig = plt.figure(1)
plt.subplots_adjust(left=0.07, bottom=0.05, right=0.95, top=0.95, wspace=None, hspace=0.35)

ax = fig.add_subplot(121)
plt.gca().invert_yaxis()
ax.plot(tt, qso_gp_sqex1_pred, '-', color='gray')
ax.fill_between(tt, qso_gp_sqex1_pred.reshape(len(tt)) - 2 * np.sqrt(qso_gp_sqex1_mse), 
                qso_gp_sqex1_pred.reshape(len(tt)) + 2 * np.sqrt(qso_gp_sqex1_mse), 
                color='gray', alpha=0.3)
ax.errorbar(qso['time'], qso['mag'], qso['mag.error'], fmt='.k', ms=6)
ax.set_xlabel('Time [d]')
ax.set_xlabel('Magnitude')
plt.title('Squared exponential model')

ax = fig.add_subplot(122)
plt.gca().invert_yaxis()
ax.plot(tt, qso_gp_ex1_pred, '-', color='gray')
ax.fill_between(tt, qso_gp_ex1_pred.reshape(len(tt)) - 2 * np.sqrt(qso_gp_ex1_mse), 
                qso_gp_ex1_pred.reshape(len(tt)) + 2 * np.sqrt(qso_gp_ex1_mse), 
                color='gray', alpha=0.3)
ax.errorbar(qso['time'], qso['mag'], qso['mag.error'], fmt='.k', ms=6)
ax.set_xlabel('Time [d]')
ax.set_xlabel('Magnitude')
plt.title('Absolute exponential model')


#plt.xlim(-0.1,4.)

plt.show()

FURTHER IDEAS TO WORK ON

Continuation of exercise 3: generalization of the wavelet spectrum to uneven sampling

How would you create a generalization of the wavelet analysis for uneven sampling? The straightforward ideas are what corresponds to the Deeming method and what corresponds to the least squares method. Try to implement the first. Simulate a Morlet wavelet as signal.

(a) Sample it at regular times, and compute its wavelet transform with several Q parameters (not necessarily with what you used in the simulation).

(b) Sample it now at uneven times. How the naively generalized procedure performs? Compare the wavelet spectra.

(c) Look at the Cepheid spectrum with it. Compare with the time-resolved GLS spectrum from 3.1.

Continuation of exercise 4: how would you try to model the within-night variance other than with only the nugget effect?