In this notebook the maximum likelihood cross-match between the LOFAR HETDEX catalogue and the combined PansSTARRS WISE catalogue is computed.
In [1]:
import numpy as np
from astropy.table import Table, join
from astropy import units as u
from astropy.coordinates import SkyCoord, search_around_sky
from IPython.display import clear_output
import pickle
import os
In [2]:
import sys
In [3]:
sys.path.append("..")
In [54]:
from mltier1 import (get_center, Field, MultiMLEstimator, MultiMLEstimatorOld,
parallel_process, get_sigma_all, get_sigma_all_old, describe)
In [5]:
%load_ext autoreload
In [53]:
%autoreload
In [7]:
from IPython.display import clear_output
In [8]:
%pylab inline
In [9]:
save_intermediate = True
plot_intermediate = True
In [10]:
idp = "../idata/final_pdf_v0.9"
In [11]:
if not os.path.isdir(idp):
os.makedirs(idp)
In [12]:
# Busy week Edinburgh 2017
ra_down = 172.09
ra_up = 187.5833
dec_down = 46.106
dec_up = 56.1611
In [13]:
# Busy week Hatfield 2017
ra_down = 170.
ra_up = 190.
dec_down = 46.8
dec_up = 55.9
In [14]:
# Full field July 2017
ra_down = 160.
ra_up = 232.
dec_down = 42.
dec_up = 62.
In [15]:
field = Field(170.0, 190.0, 46.8, 55.9)
In [16]:
field_full = Field(160.0, 232.0, 42.0, 62.0)
In [17]:
combined_all = Table.read("../pw.fits")
In [18]:
lofar_all = Table.read("../data/LOFAR_HBA_T1_DR1_catalog_v0.9.srl.fixed.fits")
#lofar_all = Table.read("data/LOFAR_HBA_T1_DR1_merge_ID_optical_v0.8.fits")
In [19]:
np.array(combined_all.colnames)
Out[19]:
In [20]:
np.array(lofar_all.colnames)
Out[20]:
The following line has been corrected in the latest versions to use all the sources, including the extended. Hence the running of the "-extended" version of this notebook is no longer necessary.
In [21]:
lofar = field_full.filter_catalogue(lofar_all, colnames=("RA", "DEC"))
In [22]:
combined = field_full.filter_catalogue(combined_all,
colnames=("ra", "dec"))
In [23]:
combined["colour"] = combined["i"] - combined["W1mag"]
In [24]:
combined_aux_index = np.arange(len(combined))
In [25]:
coords_combined = SkyCoord(combined['ra'],
combined['dec'],
unit=(u.deg, u.deg),
frame='icrs')
In [26]:
coords_lofar = SkyCoord(lofar['RA'],
lofar['DEC'],
unit=(u.deg, u.deg),
frame='icrs')
In [27]:
combined_matched = (~np.isnan(combined["i"]) & ~np.isnan(combined["W1mag"])) # Matched i-W1 sources
In [28]:
combined_panstarrs = (~np.isnan(combined["i"]) & np.isnan(combined["W1mag"])) # Sources with only i-band
In [29]:
combined_wise =(np.isnan(combined["i"]) & ~np.isnan(combined["W1mag"])) # Sources with only W1-band
In [30]:
combined_i = combined_matched | combined_panstarrs
combined_w1 = combined_matched | combined_wise
#combined_only_i = combined_panstarrs & ~combined_matched
#combined_only_w1 = combined_wise & ~combined_matched
In [31]:
print("Total - ", len(combined))
print("i and W1 - ", np.sum(combined_matched))
print("Only i - ", np.sum(combined_panstarrs))
print("With i - ", np.sum(combined_i))
print("Only W1 - ", np.sum(combined_wise))
print("With W1 - ", np.sum(combined_w1))
In [32]:
colour_limits = [0.0, 0.5, 1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0, 3.5, 4.0]
In [33]:
# Start with the W1-only, i-only and "less than lower colour" bins
colour_bin_def = [{"name":"only W1", "condition": combined_wise},
{"name":"only i", "condition": combined_panstarrs},
{"name":"-inf to {}".format(colour_limits[0]),
"condition": (combined["colour"] < colour_limits[0])}]
# Get the colour bins
for i in range(len(colour_limits)-1):
name = "{} to {}".format(colour_limits[i], colour_limits[i+1])
condition = ((combined["colour"] >= colour_limits[i]) &
(combined["colour"] < colour_limits[i+1]))
colour_bin_def.append({"name":name, "condition":condition})
# Add the "more than higher colour" bin
colour_bin_def.append({"name":"{} to inf".format(colour_limits[-1]),
"condition": (combined["colour"] >= colour_limits[-1])})
In [34]:
combined["category"] = np.nan
for i in range(len(colour_bin_def)):
combined["category"][colour_bin_def[i]["condition"]] = i
In [35]:
np.sum(np.isnan(combined["category"]))
Out[35]:
We get the number of sources of the combined catalogue in each colour category. It will be used at a later stage to compute the $Q_0$ values
In [36]:
numbers_combined_bins = np.array([np.sum(a["condition"]) for a in colour_bin_def])
In [37]:
numbers_combined_bins
Out[37]:
In [40]:
bin_list, centers, Q_0_colour, n_m, q_m = pickle.load(open("../lofar_params.pckl", "rb"))
In [41]:
likelihood_ratio_function = MultiMLEstimator(Q_0_colour, n_m, q_m, centers)
In [45]:
likelihood_ratio_function_old = MultiMLEstimatorOld(Q_0_colour, n_m, q_m, centers)
In [42]:
radius = 15
In [43]:
selection = ~np.isnan(combined["category"]) # Avoid the dreaded sources with no actual data
catalogue = combined[selection]
In [44]:
def apply_ml(i, likelihood_ratio_function):
idx_0 = idx_i[idx_lofar == i]
d2d_0 = d2d[idx_lofar == i]
category = catalogue["category"][idx_0].astype(int)
mag = catalogue["i"][idx_0]
mag[category == 0] = catalogue["W1mag"][idx_0][category == 0]
lofar_ra = lofar[i]["RA"]
lofar_dec = lofar[i]["DEC"]
lofar_pa = lofar[i]["PA"]
lofar_maj_err = lofar[i]["E_Maj"]
lofar_min_err = lofar[i]["E_Min"]
c_ra = catalogue["ra"][idx_0]
c_dec = catalogue["dec"][idx_0]
c_ra_err = catalogue["raErr"][idx_0]
c_dec_err = catalogue["decErr"][idx_0]
sigma, sigma_maj, sigma_min = get_sigma_all(lofar_maj_err, lofar_min_err, lofar_pa,
lofar_ra, lofar_dec,
c_ra, c_dec, c_ra_err, c_dec_err)
lr_0 = likelihood_ratio_function(mag, d2d_0.arcsec, sigma, sigma_maj, sigma_min, category)
chosen_index = np.argmax(lr_0)
result = [combined_aux_index[selection][idx_0[chosen_index]], # Index
(d2d_0.arcsec)[chosen_index], # distance
lr_0[chosen_index]] # LR
return result
In [50]:
from mltier1 import fr_u, fr_u_old
In [69]:
def check_ml(i, likelihood_ratio_function, likelihood_ratio_function_old, verbose=True):
idx_0 = idx_i[idx_lofar == i]
d2d_0 = d2d[idx_lofar == i]
category = catalogue["category"][idx_0].astype(int)
mag = catalogue["i"][idx_0]
mag[category == 0] = catalogue["W1mag"][idx_0][category == 0]
lofar_ra = lofar[i]["RA"]
lofar_dec = lofar[i]["DEC"]
lofar_pa = lofar[i]["PA"]
lofar_maj_err = lofar[i]["E_Maj"]
lofar_min_err = lofar[i]["E_Min"]
c_ra = catalogue["ra"][idx_0]
c_dec = catalogue["dec"][idx_0]
c_ra_err = catalogue["raErr"][idx_0]
c_dec_err = catalogue["decErr"][idx_0]
sigma, sigma_maj, sigma_min = get_sigma_all_old(lofar_maj_err, lofar_min_err, lofar_pa,
lofar_ra, lofar_dec,
c_ra, c_dec, c_ra_err, c_dec_err)
sigma_0_0, det_sigma = get_sigma_all(lofar_maj_err, lofar_min_err, lofar_pa,
lofar_ra, lofar_dec,
c_ra, c_dec, c_ra_err, c_dec_err)
fr = fr_u(d2d_0.arcsec, sigma_0_0, det_sigma)
fr_old = np.array(fr_u_old(d2d_0.arcsec, sigma, sigma_maj, sigma_min))
if verbose:
print("NEW - s00: {}; sdet: {}; fr: {}".format(sigma_0_0, det_sigma, fr))
print("OLD - s: {}; smin: {}; smaj: {}; fr: {}".format(
np.array(sigma), np.array(sigma_maj), np.array(sigma_min), fr_old))
lr_0 = likelihood_ratio_function(mag, d2d_0.arcsec, sigma_0_0, det_sigma, category)
lr_0_old = likelihood_ratio_function_old(mag, d2d_0.arcsec, sigma, sigma_maj, sigma_min, category)
chosen_index = np.argmax(lr_0)
chosen_index_old = np.argmax(lr_0_old)
ix, dist, lr = (combined_aux_index[selection][idx_0[chosen_index]], # Index
(d2d_0.arcsec)[chosen_index], # distance
lr_0[chosen_index])
ix_old, dist_old, lr_old = (combined_aux_index[selection][idx_0[chosen_index_old]], # Index
(d2d_0.arcsec)[chosen_index_old], # distance
lr_0[chosen_index_old] )
if verbose:
print("NEW res - Ix: {}; dist: {}; LR: {}".format(ix, dist, lr)) # LR
print("OLD res - Ix: {}; dist: {}; LR: {}".format(ix_old, dist_old, lr_old))
return (sigma_0_0, det_sigma, fr, np.array(sigma), np.array(sigma_maj), np.array(sigma_min),
fr_old, ix, dist, lr, ix_old, dist_old, lr_old, (lofar_maj_err, lofar_min_err, lofar_pa,
lofar_ra, lofar_dec, c_ra, c_dec, c_ra_err, c_dec_err))
In [47]:
idx_lofar, idx_i, d2d, d3d = search_around_sky(
coords_lofar, coords_combined[selection], radius*u.arcsec)
In [48]:
idx_lofar_unique = np.unique(idx_lofar)
In [ ]:
list_i = [141, 235, 396, 412, 418, 711, 858, 887, 932, 965, 1039, 1389, 1680, 1699,
1787, 1927, 2168, 2267, 2339, 2410, 2548, 2838, 2969, 3136, 3163, 3265,
3348, 3353, 3401]
for i in range(100000):
s00, det_s, fr, s, s_maj, s_min, fr_o, ix, dist, lr, ix_o, dist_o, lr_o, p = check_ml(idx_lofar_unique[i],
likelihood_ratio_function, likelihood_ratio_function_old, verbose=False)
if (ix != ix_o) and ((lr > 6) or (lr_o > 6)):
print(i)
#print(ix, dist, lr)
#print(ix_o, dist_o, lr_o)
#print(s00, det_s, fr, s, s_maj, s_min, fr_o)
#print(p)
In [70]:
list_i = [141, 235, 396, 412, 418, 711, 858, 887, 932, 965, 1039, 1389, 1680, 1699,
1787, 1927, 2168, 2267, 2339, 2410, 2548, 2838, 2969, 3136, 3163, 3265,
3348, 3353, 3401, 3654, 3687, 4022, 4074, 4083, 4164, 4263]
for i in list_i:
s00, det_s, fr, s, s_maj, s_min, fr_o, ix, dist, lr, ix_o, dist_o, lr_o, p = check_ml(idx_lofar_unique[i],
likelihood_ratio_function, likelihood_ratio_function_old, verbose=False)
if ix != ix_o:
print(i)
print(ix, dist, lr)
print(ix_o, dist_o, lr_o)
print(s00, det_s, fr, s, s_maj, s_min, fr_o)
print(p)
In [43]:
import multiprocessing
n_cpus_total = multiprocessing.cpu_count()
n_cpus = max(1, n_cpus_total-1)
In [ ]:
def ml(i):
return apply_ml(i, likelihood_ratio_function)
In [ ]:
res = parallel_process(idx_lofar_unique, ml, n_jobs=n_cpus)
In [46]:
lofar["lr"] = np.nan # Likelihood ratio
lofar["lr_dist"] = np.nan # Distance to the selected source
lofar["lr_index"] = np.nan # Index of the PanSTARRS source in combined
In [47]:
(lofar["lr_index"][idx_lofar_unique],
lofar["lr_dist"][idx_lofar_unique],
lofar["lr"][idx_lofar_unique]) = list(map(list, zip(*res)))
In [48]:
total_sources = len(idx_lofar_unique)
combined_aux_index = np.arange(len(combined))
In [49]:
lofar["lrt"] = lofar["lr"]
lofar["lrt"][np.isnan(lofar["lr"])] = 0
In [50]:
q0 = np.sum(Q_0_colour)
In [51]:
def completeness(lr, threshold, q0):
n = len(lr)
lrt = lr[lr < threshold]
return 1. - np.sum((q0 * lrt)/(q0 * lrt + (1 - q0)))/float(n)/q0
def reliability(lr, threshold, q0):
n = len(lr)
lrt = lr[lr > threshold]
return 1. - np.sum((1. - q0)/(q0 * lrt + (1 - q0)))/float(n)/q0
completeness_v = np.vectorize(completeness, excluded=[0])
reliability_v = np.vectorize(reliability, excluded=[0])
In [52]:
n_test = 100
threshold_mean = np.percentile(lofar["lrt"], 100*(1 - q0))
In [53]:
thresholds = np.arange(0., 10., 0.01)
thresholds_fine = np.arange(0.1, 1., 0.001)
In [54]:
completeness_t = completeness_v(lofar["lrt"], thresholds, q0)
reliability_t = reliability_v(lofar["lrt"], thresholds, q0)
average_t = (completeness_t + reliability_t)/2
In [55]:
completeness_t_fine = completeness_v(lofar["lrt"], thresholds_fine, q0)
reliability_t_fine = reliability_v(lofar["lrt"], thresholds_fine, q0)
average_t_fine = (completeness_t_fine + reliability_t_fine)/2
In [56]:
threshold_sel = thresholds_fine[np.argmax(average_t_fine)]
In [57]:
plt.rcParams["figure.figsize"] = (15,6)
subplot(1,2,1)
plot(thresholds, completeness_t, "r-")
plot(thresholds, reliability_t, "g-")
plot(thresholds, average_t, "k-")
vlines(threshold_sel, 0.9, 1., "k", linestyles="dashed")
vlines(threshold_mean, 0.9, 1., "y", linestyles="dashed")
ylim([0.9, 1.])
xlabel("Threshold")
ylabel("Completeness/Reliability")
subplot(1,2,2)
plot(thresholds_fine, completeness_t_fine, "r-")
plot(thresholds_fine, reliability_t_fine, "g-")
plot(thresholds_fine, average_t_fine, "k-")
vlines(threshold_sel, 0.9, 1., "k", linestyles="dashed")
#vlines(threshold_mean, 0.9, 1., "y", linestyles="dashed")
ylim([0.97, 1.])
xlabel("Threshold")
ylabel("Completeness/Reliability")
Out[57]:
In [58]:
print(threshold_sel)
In [59]:
plt.rcParams["figure.figsize"] = (15,6)
subplot(1,2,1)
hist(lofar[lofar["lrt"] != 0]["lrt"], bins=200)
vlines([threshold_sel], 0, 5000)
ylim([0,5000])
subplot(1,2,2)
hist(np.log10(lofar[lofar["lrt"] != 0]["lrt"]+1), bins=200)
vlines(np.log10(threshold_sel+1), 0, 5000)
ticks, _ = xticks()
xticks(ticks, ["{:.1f}".format(10**t-1) for t in ticks])
ylim([0,5000]);
In [60]:
lofar["lr_index_sel"] = lofar["lr_index"]
lofar["lr_index_sel"][lofar["lrt"] < threshold_sel] = np.nan
In [61]:
combined["lr_index_sel"] = combined_aux_index.astype(float)
In [62]:
pwl = join(lofar, combined,
join_type='left',
keys='lr_index_sel',
uniq_col_name='{col_name}{table_name}',
table_names=['_input', ''])
In [63]:
pwl_columns = pwl.colnames
In [64]:
for col in pwl_columns:
fv = pwl[col].fill_value
if (isinstance(fv, np.float64) and (fv != 1e+20)):
print(col, fv)
pwl[col].fill_value = 1e+20
In [65]:
columns_save = ['Source_Name', 'RA', 'E_RA', 'DEC', 'E_DEC',
'Peak_flux', 'E_Peak_flux', 'Total_flux', 'E_Total_flux',
'Maj', 'E_Maj', 'Min', 'E_Min', 'PA', 'E_PA', 'Isl_rms', 'S_Code', 'Mosaic_ID',
'AllWISE', 'objID', 'ra', 'dec', 'raErr', 'decErr',
'W1mag', 'W1magErr', 'i', 'iErr', 'colour', 'category',
'lr', 'lr_dist']
In [66]:
pwl[columns_save].filled().write('lofar_pw_pdf.fits', format="fits")
In [ ]: