In [1]:
# give access to importing dwarfz
import os, sys
dwarfz_package_dir = os.getcwd().split("dwarfz")[0]
if dwarfz_package_dir not in sys.path:
sys.path.insert(0, dwarfz_package_dir)
import dwarfz
# back to regular import statements
%matplotlib inline
from matplotlib import pyplot as plt
import matplotlib.colors as colors
import matplotlib.patches as patches
import seaborn as sns
sns.set(context="poster", style="ticks", font_scale=1.4)
import numpy as np
import pandas as pd
from scipy import interpolate
import astropy
from astropy import units as u
import pathlib
In [2]:
import matplotlib as mpl
mpl.rcParams['savefig.dpi'] = 80
mpl.rcParams['figure.dpi'] = 80
mpl.rcParams['figure.figsize'] = 2*np.array((8,6))
mpl.rcParams['figure.facecolor'] = "white"
In [3]:
COSMOS_filename = pathlib.Path(dwarfz.data_dir_default) / "COSMOS_reference.sqlite"
COSMOS = dwarfz.datasets.COSMOS(COSMOS_filename)
In [4]:
COSMOS.df.head()
Out[4]:
In [5]:
COSMOS.df.describe()
Out[5]:
In [6]:
sns.distplot(COSMOS.df.photo_z)
Out[6]:
In [7]:
downsample_factor = 10
mask_COSMOS_downsample = (COSMOS.df.index % downsample_factor == 0)
In [8]:
plt.scatter(COSMOS.df.ra[mask_COSMOS_downsample],
COSMOS.df.dec[mask_COSMOS_downsample],
label=COSMOS.label, color="r", s=2,
)
plt.xlabel("RA")
plt.ylabel("dec")
plt.xlim(149.25, 151.25)
plt.legend(loc="upper left", bbox_to_anchor=(1, 1),
markerscale=10)
Out[8]:
In [9]:
HSC_filename = pathlib.Path(dwarfz.data_dir_default) / "HSC_COSMOS_median_forced.sqlite3"
HSC = dwarfz.datasets.HSC(HSC_filename)
In [10]:
HSC.df.head()
Out[10]:
In [11]:
HSC.df.describe()
Out[11]:
In [12]:
downsample_factor = 10
mask_HSC_downsample = (HSC.df.index % downsample_factor == 0)
In [13]:
plt.scatter(HSC.df.ra[mask_HSC_downsample],
HSC.df.dec[mask_HSC_downsample],
label=HSC.label, color="b", s=2,
)
plt.xlabel("RA")
plt.ylabel("dec")
plt.xlim(149.25, 151.25)
plt.legend(loc="upper left", bbox_to_anchor=(1, 1),
markerscale=10)
Out[13]:
In [14]:
downsample_factor_COSMOS = 30
mask_COSMOS_downsample = (COSMOS.df.index % downsample_factor_COSMOS == 0)
downsample_factor_HSC = 30*2
mask_HSC_downsample = (HSC.df.index % downsample_factor_HSC == 0)
print("Number of COSMOS galaxies to plot: ", mask_COSMOS_downsample.sum())
print("Number of HSC galaxies to plot: ", mask_HSC_downsample.sum())
In [15]:
plt.scatter(HSC.df.ra[mask_HSC_downsample],
HSC.df.dec[mask_HSC_downsample],
label=HSC.label, color="b", s=2,
)
plt.scatter(COSMOS.df.ra[mask_COSMOS_downsample],
COSMOS.df.dec[mask_COSMOS_downsample],
label =COSMOS.label, color="r", s=2,
)
plt.xlabel("RA")
plt.ylabel("dec")
plt.xlim(149.25, 151.25)
plt.legend(loc="upper left", bbox_to_anchor=(1, 1),
markerscale=10)
Out[15]:
In [16]:
matches_filename = pathlib.Path(dwarfz.data_dir_default) / "matches.sqlite3"
matches = dwarfz.matching.Matches.load_from_filename(matches_filename)
In [17]:
print("threshold (error) : {:>5.2f}".format(
dwarfz.matching.Matches.threshold_error_default))
print("threshold (match) : {:>5.2f}".format(
dwarfz.matching.Matches.threshold_match_default))
In [18]:
print("overall completeness : {:.2f} %".format(100*np.mean(matches.match[~matches.error])))
In [19]:
print("min separation: {:.4f} [arcsec]".format(min(matches.sep)))
print("max separation: {:.4f} [arcsec]".format(max(matches.sep)))
In [20]:
plt.axvline(np.log10(dwarfz.matching.Matches.threshold_error_default.to(u.arcsec).value),
linestyle="dashed", color="r", label="error")
plt.axvline(np.log10(dwarfz.matching.Matches.threshold_match_default.to(u.arcsec).value),
linestyle="solid", color="k", label="match")
plt.legend(loc="upper left")
sns.distplot(np.log10(matches.sep))
plt.xlabel("log_10 separation [arcsec]")
plt.ylabel("probability density")
plt.title("Distribution of Nearest-Neighbor separations")
Out[20]:
In [21]:
plt.scatter(COSMOS.df.loc[matches.index[matches.match]].ra,
COSMOS.df.loc[matches.index[matches.match]].dec,
color="b", s=2,
label="Found in HSC",
)
plt.scatter(COSMOS.df.loc[matches.index[~matches.match & ~matches.error]].ra,
COSMOS.df.loc[matches.index[~matches.match & ~matches.error]].dec,
color="r", s=2,
label="Missing in HSC",
)
plt.scatter(COSMOS.df.loc[matches.index[matches.error]].ra,
COSMOS.df.loc[matches.index[matches.error]].dec,
color="k", s=2,
label="Mismatched Footprint",
)
plt.xlabel("RA (deg)")
plt.ylabel("Dec (deg)")
plt.legend(loc="upper left", bbox_to_anchor=(1,1), markerscale=10)
plt.title("COSMOS Galaxies by Matching Outcome")
Out[21]:
In [22]:
def get_COSMOS_completeness():
""" Gets interpolators for the COSMOS completeness limits as a function of redshift
I *think* this is the 50% completeness threshold for each type of galaxy
("all", "passive", "active" [star-forming]) in terms of the stellar mass.
(I.e. below this mass, cosmos isn't confident that it's complete)
Inputs
------
None
Outputs
-------
dict of scipy.interpolate.interpolate.interp1d objects
- dictionary keys are "all", "passive" and "active",
mapping to respective interpolators
- Interpolators take in redshift, and return the
stellar mass completeness limit in log M_solar units
Notes
-----
This is taken from Alexie's `ms_limit.pro` file. I *think* these are
completeness limits for respective galaxy types, not classification cuts
for active vs. passive.
"""
redshifts = [0.17, 0.24, 0.32, 0.37, 0.43, 0.5]
# ALL GALAXIES
log_masses = [ 7.53, 7.93, 8.22, 8.38, 8.52, 8.66]
interp_all_galaxies = interpolate.interp1d(redshifts, log_masses)
# PASSIVE GALAXIES
log_masses = [ 7.68, 8.07, 8.38, 8.59, 8.74, 8.88]
interp_passive_galaxies = interpolate.interp1d(redshifts, log_masses)
# ACTIVE GALAXIES
log_masses = [ 7.51, 7.91, 8.17, 8.30, 8.43, 8.55]
interp_active_galaxies = interpolate.interp1d(redshifts, log_masses)
return {
"all" : interp_all_galaxies,
"passive" : interp_passive_galaxies,
"active" : interp_active_galaxies,
}
COSMOS_completeness_limits = get_COSMOS_completeness()
In [23]:
masses = COSMOS.df.loc[matches.index[~matches.error]].mass_med
photo_z = COSMOS.df.loc[matches.index[~matches.error]].photo_z
matched = np.array(matches.match[~matches.error], dtype=float)
In [24]:
num_bins = 100
num_galaxies, x_edges, y_edges = np.histogram2d(photo_z, masses, bins=num_bins)
num_matches, _, _ = np.histogram2d(photo_z, masses, bins=[x_edges, y_edges], weights=matched)
num_matches = num_matches.T
num_galaxies = num_galaxies.T
completeness = num_matches / num_galaxies
In [25]:
completeness[np.isfinite(completeness)].min()
Out[25]:
In [26]:
completeness[np.isfinite(completeness)].max()
Out[26]:
In [27]:
xx, yy = np.meshgrid(x_edges[:-1], y_edges[:-1])
yy = 10**yy # get back into linear space for masses
In [28]:
plt.pcolormesh(xx, yy, num_galaxies,
norm=colors.LogNorm(vmin=1, vmax=num_galaxies.max()),
# cmap=plt.cm.viridis_r,
)
plt.colorbar(label="Number of Galaxies")
plt.xlabel("z (photo)")
plt.ylabel("M_stellar")
zs = np.linspace(COSMOS_completeness_limits["all"].x.min(), COSMOS_completeness_limits["all"].x.max())
plt.plot(zs, 10**COSMOS_completeness_limits["all"](zs),
color="cyan", linewidth=4,
label="COSMOS completeness limit (50%)",
)
plt.legend(loc="lower right")
plt.yscale("log")
plt.title("COSMOS")
Out[28]:
In [29]:
plt.pcolormesh(xx, yy, completeness,
vmin=0, vmax=1,
# cmap=plt.cm.viridis_r,
)
plt.colorbar(label="{} Completeness".format(HSC.label))
plt.xlabel("z (photo)")
plt.ylabel("M_stellar")
contours = plt.contour(xx, yy, num_galaxies, colors="r",
# norm=colors.LogNorm(vmin=100, vmax=num_galaxies.max()),
label="{} density".format(COSMOS.label)
)
contours.collections[0].set_label("COSMOS density")
zs = np.linspace(COSMOS_completeness_limits["all"].x.min(), COSMOS_completeness_limits["all"].x.max())
plt.plot(zs, 10**COSMOS_completeness_limits["all"](zs),
color="cyan", linewidth=4,
label="COSMOS completeness limit (50%)",
)
plt.legend(loc="lower right")
plt.yscale("log")
In [ ]:
In [30]:
num_bins = 100
histogram_range = ((0, .5), (6.5, 12.5))
num_galaxies, x_edges, y_edges = np.histogram2d(photo_z, masses,
range=histogram_range, bins=num_bins)
num_matches, _, _ = np.histogram2d(photo_z, masses, bins=[x_edges, y_edges], weights=matched)
num_matches = num_matches.T
num_galaxies = num_galaxies.T
completeness = num_matches / num_galaxies
In [31]:
completeness[np.isfinite(completeness)].min()
Out[31]:
In [32]:
completeness[np.isfinite(completeness)].max()
Out[32]:
In [33]:
xx, yy = np.meshgrid(x_edges[:-1], y_edges[:-1])
yy = 10**yy # get back into linear space for masses
In [34]:
def add_target_box(ax):
ax.add_patch(
patches.Rectangle(
(0, 10**8), # (x,y) of bottom left corner
0.15, # width
10**9, # height
fill=False,
**add_target_box.line_kwargs,
)
)
add_target_box.line_kwargs = dict(color="r", linewidth=6)
In [35]:
plt.pcolormesh(xx, yy, num_galaxies,
norm=colors.LogNorm(vmin=1, vmax=num_galaxies.max()),
# cmap=plt.cm.viridis_r,
)
plt.colorbar(label="Number of Galaxies")
plt.xlabel("z (photo)")
plt.ylabel("M_stellar")
add_target_box(plt.gca())
zs = np.linspace(COSMOS_completeness_limits["all"].x.min(), COSMOS_completeness_limits["all"].x.max())
plt.plot(zs, 10**COSMOS_completeness_limits["all"](zs),
color="cyan", linewidth=4,
label="COSMOS completeness limit (50%)",
)
handles, labels = plt.gca().get_legend_handles_labels()
fake_line = plt.Line2D([0,0],[0,0], **add_target_box.line_kwargs)
plt.yscale("log")
plt.legend(
[*handles, fake_line],
[*labels, "targetted region"],
loc="upper left",
)
plt.title("COSMOS")
Out[35]:
In [36]:
plt.pcolormesh(xx, yy, completeness,
vmin=0, vmax=1,
# cmap=plt.cm.viridis_r,
)
plt.colorbar(label="{} Completeness".format(HSC.label))
plt.xlabel("z (photo)")
plt.ylabel("M_stellar")
add_target_box(plt.gca())
zs = np.linspace(COSMOS_completeness_limits["all"].x.min(), COSMOS_completeness_limits["all"].x.max())
plt.plot(zs, 10**COSMOS_completeness_limits["all"](zs),
color="cyan", linewidth=4,
label="COSMOS completeness limit (50%)",
)
handles, labels = plt.gca().get_legend_handles_labels()
fake_line = plt.Line2D([0,0],[0,0], **add_target_box.line_kwargs)
plt.yscale("log")
plt.legend(
[*handles, fake_line],
[*labels, "targetted region"],
loc="upper left",
)
Out[36]:
How many times do we have 2, 3, ..., N COSMOS galaxies pointing to the same HSC galaxy? Note that we expect this value to typically be 0-1, not 2+ (since there are more HSC galaxies than COSMOS galaxies).
(To understand issues with HSC's object identification algorithms, you would want to do the opposite: start with HSC objects, then match the nearest COSMOS object -- if an object is artificially broken up, it would look like a large number of repeated matches.)
In [37]:
from collections import Counter
# first see which HSC galaxies have multiple COSMOS matches
# then get a histogram of the number of matches
counts = Counter(list(Counter(matches.catalog_2_ids[matches.match]).values()))
# add HSC galaxies which have no COSMOS object matched to them
number_of_HSC_objects_total = HSC.df.index.size
number_of_HSC_objects_with_matches = np.sum(list(counts.values()))
counts[0] = number_of_HSC_objects_total - number_of_HSC_objects_with_matches
counts
Out[37]:
In [38]:
counts.keys()
Out[38]:
In [39]:
plt.bar(list(counts.keys()), list(counts.values()))
plt.yscale("log")
plt.xlabel("Number of COSMOS Galaxies \n Matched to a Particular HSC Object")
plt.ylabel("Number of HSC Objects \n with that many repeated matches")
plt.xticks(list(counts.keys()))
Out[39]:
In [ ]: