In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from astropy.io import fits
import seaborn as sns
%matplotlib notebook
The following query was run to select sources between declination -10 and -9.66666666666, in order to match these sources to those that were classified by the PS1 RF model:
select ps1.*,
sqrt(power(pmra,2) + power(pmdec,2)) as pm,
sqrt( power(pmra,2)*power(pmra_error, 2)/(power(pmra,2) + power(pmdec,2))
+ power(pmdec,2)*power(pmdec_error, 2)/(power(pmra,2) + power(pmdec,2))
+ 2*pmra*pmdec/(power(pmra,2) + power(pmdec,2))*pmra_pmdec_corr*pmra_error*pmdec_error) as pm_unc,
gaia.parallax_over_error, gaia.phot_g_mean_mag, gaia.b
from gaiadr2.panstarrs1_best_neighbour as ps1
inner join gaiadr2.gaia_source as gaia
on ps1.source_id = gaia.source_id
where gaia.astrometric_params_solved > 3
and gaia.dec between -10 and -9.6666666666666667
and phot_bp_rp_excess_factor < 1.3+0.06*power(phot_bp_mean_mag-phot_rp_mean_mag,2)
AND phot_bp_rp_excess_factor > 1.0+0.015*power(phot_bp_mean_mag-phot_rp_mean_mag,2)
AND astrometric_chi2_al/(astrometric_n_good_obs_al-5) < 1.44*greatest(1,exp(-0.4*(phot_g_mean_mag-19.5)))
order by ps1.original_ext_source_id
The results are stored in the file neg10_match-result.fits
In [16]:
gaia = fits.getdata("neg10_match-result.fits")
In [17]:
ps1 = pd.read_hdf("/Users/adamamiller/Desktop/PS1_fits/hdf5/dec_neg10_0_classifications.h5")
In [18]:
gaia_objid = np.array(gaia['original_ext_source_id'], dtype="int64") #.byteswap().newbyteorder()
gaia_parallax = np.array(gaia['parallax_over_error'], dtype="float") #.byteswap().newbyteorder()
gaia_pm = np.array(gaia['pm']/gaia['pm_unc'], dtype="float") #.byteswap().newbyteorder()
gaia_g = np.array(gaia["phot_g_mean_mag"], dtype="float")
gaia_b = np.array(gaia["b"], dtype="float")
In [19]:
gaia_df = pd.DataFrame(gaia_objid, columns=["objid"])
gaia_df["parallax_signif"] = gaia_parallax
gaia_df["pm_signif"] = gaia_pm
gaia_df["G"] = gaia_g
gaia_df["b"] = gaia_b
In [20]:
ps1_objid = np.array(ps1["objid"], dtype="int64") #.byteswap().newbyteorder()
ps1_sg_score = np.array(ps1["rf_score"], dtype="float") #.byteswap().newbyteorder()
In [21]:
ps1_df = pd.DataFrame(ps1_objid, columns=["objid"])
ps1_df["sg_score"] = ps1_sg_score
Merge the two DFs in order to directly compare the sg_score to the parallax measurements
In [22]:
merge = pd.merge(ps1_df, gaia_df, on="objid")
In [23]:
merge
Out[23]:
In [52]:
fig, ax = plt.subplots()
hax = ax.hexbin(merge["sg_score"], merge["parallax_signif"],
bins="log", mincnt=1, gridsize=1000,
extent=(0,1,-20,120))
ax.plot([-1,2], [6,6], 'DarkOrange', lw=2)
ax.set_xlim(0,1.02)
ax.set_ylim(-10,40)
ax.set_xlabel("sg_score")
ax.set_ylabel("parallax SNR")
fig.colorbar(hax)
fig.tight_layout()
In [22]:
# hax = sns.jointplot(merge["sg_score"], merge["parallax_signif"],
# kind="hex", stat_func=None,
# bins="log", mincnt=1, gridsize=1000,
# xlim=(0,1), ylim=(-20,120),
# cmap="viridis",
# marginal_kws={'bins':1000})
# hax.ax_joint.set_ylim(-10,40)
In [53]:
fig, ax = plt.subplots()
hax = ax.hexbin(merge["G"], merge["parallax_signif"],
bins="log", mincnt=1, gridsize=1000,
extent=(10,21,-20,120))
ax.plot([-1,22], [6,6], 'DarkOrange', lw=2)
ax.set_xlim(12,21)
ax.set_ylim(-10,40)
ax.set_xlabel("G (mag)")
ax.set_ylabel("parallax SNR")
fig.colorbar(hax)
fig.tight_layout()
In [54]:
fig, ax = plt.subplots()
hax = ax.hexbin(merge["b"], merge["parallax_signif"],
bins="log", mincnt=1, gridsize=1000,
extent=(-75,54,-20,120))
ax.plot([-75,54], [6,6], 'DarkOrange', lw=2)
ax.set_xlim(-75,54)
ax.set_ylim(-10,40)
ax.set_xlabel("Galactic latitude")
ax.set_ylabel("parallax SNR")
fig.colorbar(hax)
fig.tight_layout()
In [44]:
plt.hexbin?
Create the same 3 plots, but this time showing the proper motion SNR as a function of sg_score, G mag, and galactic latitude in the existing PS1 catalog.
In [55]:
fig, ax = plt.subplots()
hax = ax.hexbin(merge["sg_score"], merge["pm_signif"],
bins="log", mincnt=1, gridsize=1000,
extent=(0,1,-1,150))
ax.plot([-1,2], [54,54], 'DarkOrange', lw=2)
ax.set_xlim(0,1)
ax.set_ylim(-0.5,150)
ax.set_xlabel("sg_score")
ax.set_ylabel("pm SNR")
fig.colorbar(hax)
fig.tight_layout()
In [56]:
fig, ax = plt.subplots()
hax = ax.hexbin(merge["G"], merge["pm_signif"],
bins="log", mincnt=1, gridsize=1000,
extent=(10,21,-1,150))
# ax.plot([-1,22], [80,80], 'DarkOrange', lw=2)
ax.plot([-1,22], [54,54], 'DarkOrange', lw=2)
ax.set_xlim(12,21)
ax.set_ylim(-0.5,150)
ax.set_xlabel("G (mag)")
ax.set_ylabel("pm SNR")
fig.colorbar(hax)
fig.tight_layout()
In [59]:
fig, ax = plt.subplots()
hax = ax.hexbin(merge["b"], merge["pm_signif"], C=np.array(merge["G"]),
reduce_C_function=np.min,
# bins="log",
mincnt=1, gridsize=1000,
extent=(-75,54,-1,150), vmax=19)
ax.plot([-75,54], [54,54], 'DarkOrange', lw=2)
ax.set_xlim(-75,54)
ax.set_ylim(-0.5,150)
ax.set_xlabel("Galactic Latitude")
ax.set_ylabel("pm SNR")
fig.colorbar(hax)
fig.tight_layout()
The above plot shows a strong excess of sources at low galactic latitude with high significance proper motion measurements. The challenge here is to identify whether that excess is due to higher number counts, or poor measurements of proper motion due to crowding.
Finally, plot the distribution of PS1 sg_score values for sources that would be selected as stars via this method.
In [30]:
def_stars = np.where((merge["parallax_signif"] > 80) | (merge["pm_signif"] > 8))
def_stars_low_lat = np.where(((merge["parallax_signif"] > 80) | (merge["pm_signif"] > 8)) &
(np.abs(merge["b"]) < 7))
fig, ax = plt.subplots()
ax.hist(merge["sg_score"].iloc[def_stars],
range=(0,1), bins=100,
label="all")
ax.hist(merge["sg_score"].iloc[def_stars_low_lat],
range=(0,1), bins=100,
label="$|b| < 7^\circ$")
axins = plt.axes([0.3,0.3,0.5,0.5])
axins.hist(merge["sg_score"].iloc[def_stars],
range=(0,1), bins=100,
label="all")
axins.hist(merge["sg_score"].iloc[def_stars_low_lat],
range=(0,1), bins=100,
label="$|b| < 7^\circ$")
ax.set_xlabel("sg score")
ax.set_ylabel("N")
axins.set_yscale("log")
ax.legend()
fig.tight_layout()
Inspect the number of sources that have mutliple matches as a function of galactic latitude. We do this by examining both the number_of_neighbors
and the number_of_mates
. The former is the number of PS1 sources that match a given Gaia source, while the latter is the number of other gaia sources that have the same PS1 source as a match (this happens if Gaia resolves a PS1 source, for example).
In [31]:
gt1_neighbor = np.where(gaia["number_of_neighbours"] > 1)
gt1_mate = np.where(gaia["number_of_mates"] > 0)
In [32]:
# number of neighbors
fig, ax = plt.subplots()
ha = ax.hist(gaia["b"], range=(-90,90), bins=36)
ha1 = ax.hist(gaia["b"][gt1_neighbor], range=(-90,90), bins=36)
ax.set_yscale("log")
fig.tight_layout()
In [33]:
# number of mate
fig, ax = plt.subplots()
ha = ax.hist(gaia["b"], range=(-90,90), bins=36)
ha1 = ax.hist(gaia["b"][gt1_mate], range=(-90,90), bins=36)
ax.set_yscale("log")
fig.tight_layout()
The above plots show that the vast majority (> 99%) of sources in the catalog cross match have only 1 neighbor and 1 mate. It is also clear that, as a function of galactic latitude, the number of neighbors remains relatively constant, while the number of mates (i.e. resolved in gaia and not in PS1) is proportionally much higher in the Galactic plane.
Ultimately, it does not make sense to include sources with multiple mates in the ZTF catalog because it is unclear which of the 2 gaia sources dominates the signal in PS1. Furthermore, if there are multiple PS1 matches to a single Gaia source this suggests that there are problems in the PS1 extraction, and thus, ultimately only stars with 1 neighbor and 0 mates will be included in the final catalog.
In [34]:
gaia_only_df = pd.merge(gaia_df, ps1_df, on="objid", how="left") # perform a left join
gaia_only_df.drop(gaia_only_df[gaia_only_df.sg_score >= 0].index, inplace=True) # drop rows with PS1 matches
In [35]:
print("There are {}, {}, {} sources with parallax > 3, 4, 5 sigma".format(sum(merge["parallax_signif"] > 3),
sum(merge["parallax_signif"] > 4),
sum(merge["parallax_signif"] > 5)))
print("There are {}, {}, {} sources with PM > 3, 4, 5 sigma".format(sum(merge["pm_signif"] > 3),
sum(merge["pm_signif"] > 4),
sum(merge["pm_signif"] > 5)))
In [61]:
parallax_cut = 6
pm_cut = 7.5
print("There are {} PS1 sources not in PS1-ZTF catalog".format(len(gaia_only_df)))
print("\t{} of these sources have G < 19".format(len(np.where(gaia_only_df["G"] < 19)[0])))
print("\t\t{} of these sources have parallax_signif > {}".format(len(np.where((gaia_only_df["G"] < 19) &
(gaia_only_df["parallax_signif"] > parallax_cut))[0]),
parallax_cut))
print("\t\t{} of these sources have PM > {}".format(len(np.where((gaia_only_df["G"] < 19) &
(gaia_only_df["pm_signif"] > pm_cut))[0]),
pm_cut))
print("\t{} of these sources have parallax_signif > {}".format(len(np.where(gaia_only_df["parallax_signif"] > parallax_cut)[0]),
parallax_cut))
print("\t{} of these sources have PM > {}".format(len(np.where(gaia_only_df["pm_signif"] > pm_cut)[0]), pm_cut))
In [37]:
faint = np.where((merge["G"] >= 20.5))
print("There are {} faint sources ({:.2f}% of all)".format(len(faint[0]), 100*len(faint[0])/len(merge)))