In [1]:

    
# give access to importing dwarfz
import os, sys
dwarfz_package_dir = os.getcwd().split("dwarfz")[0]
if dwarfz_package_dir not in sys.path:
    sys.path.insert(0, dwarfz_package_dir)

import dwarfz
    
# back to regular import statements

%matplotlib inline
from matplotlib import pyplot as plt
import matplotlib.colors as colors
import matplotlib.patches as patches

import seaborn as sns
sns.set(context="poster", style="ticks", font_scale=1.4)

import numpy as np
import pandas as pd

from scipy import interpolate

import astropy
from astropy import units as u

import pathlib



In [2]:

    
import matplotlib as mpl
mpl.rcParams['savefig.dpi'] = 80
mpl.rcParams['figure.dpi'] = 80
mpl.rcParams['figure.figsize'] = 2*np.array((8,6))
mpl.rcParams['figure.facecolor'] = "white"

COSMOS Data



In [3]:

    
COSMOS_filename = pathlib.Path(dwarfz.data_dir_default) / "COSMOS_reference.sqlite"
COSMOS = dwarfz.datasets.COSMOS(COSMOS_filename)



In [4]:

    
COSMOS.df.head()









    Out[4]:







  
    
      
      alpha
      delta
      flag_Capak
      flag_UVISTA
      flag_deep
      flag_shallow
      photo_z
      classification
      mass_med
      mass_med_min68
      mass_med_max68
      ra
      dec
    
    
      id
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      216439
      149.697606
      1.616639
      0
      0
      0
      0.0
      0.0386
      1
      7.74447
      7.61479
      8.19060
      149.697606
      1.616639
    
    
      218604
      149.749393
      1.618068
      0
      0
      0
      0.0
      0.3797
      0
      11.07610
      11.00020
      11.12550
      149.749393
      1.618068
    
    
      219020
      150.125093
      1.616900
      0
      0
      0
      0.0
      0.1600
      1
      10.90000
      10.86600
      10.93410
      150.125093
      1.616900
    
    
      219656
      150.388349
      1.614538
      0
      0
      0
      0.0
      2.3343
      1
      8.99275
      8.82478
      9.12685
      150.388349
      1.614538
    
    
      219657
      150.383607
      1.614532
      0
      0
      0
      0.0
      1.3916
      1
      8.51774
      7.95630
      9.01445
      150.383607
      1.614532



In [5]:

    
COSMOS.df.describe()









    Out[5]:







  
    
      
      alpha
      delta
      flag_Capak
      flag_UVISTA
      flag_deep
      flag_shallow
      photo_z
      classification
      mass_med
      mass_med_min68
      mass_med_max68
      ra
      dec
    
  
  
    
      count
      518331.000000
      518331.000000
      518331.0
      518331.0
      518331.000000
      518331.000000
      518331.000000
      518331.000000
      518331.000000
      518331.000000
      518331.000000
      518331.000000
      518331.000000
    
    
      mean
      150.072394
      2.232306
      0.0
      0.0
      0.356195
      0.325722
      1.501246
      0.947572
      8.932841
      8.778190
      9.063062
      150.072394
      2.232306
    
    
      std
      0.381040
      0.336753
      0.0
      0.0
      0.478874
      0.468645
      1.056322
      0.222889
      0.924280
      1.247627
      1.204603
      0.381040
      0.336753
    
    
      min
      149.411366
      1.614532
      0.0
      0.0
      0.000000
      0.000000
      0.005000
      0.000000
      4.064430
      -99.000000
      -99.000000
      149.411366
      1.614532
    
    
      25%
      149.750004
      1.954606
      0.0
      0.0
      0.000000
      0.000000
      0.698100
      1.000000
      8.324890
      8.130915
      8.494390
      149.750004
      1.954606
    
    
      50%
      150.073257
      2.228252
      0.0
      0.0
      0.000000
      0.000000
      1.225400
      1.000000
      8.952890
      8.791550
      9.091950
      150.073257
      2.228252
    
    
      75%
      150.388587
      2.523300
      0.0
      0.0
      1.000000
      1.000000
      2.073100
      1.000000
      9.548460
      9.422860
      9.655005
      150.388587
      2.523300
    
    
      max
      150.786592
      2.814121
      0.0
      0.0
      1.000000
      1.000000
      5.994900
      1.000000
      12.316700
      12.254600
      13.066000
      150.786592
      2.814121



In [6]:

    
sns.distplot(COSMOS.df.photo_z)









    



/Users/egentry/anaconda3/lib/python3.7/site-packages/scipy/stats/stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval






    Out[6]:





<matplotlib.axes._subplots.AxesSubplot at 0x1a47689e48>



In [7]:

    
downsample_factor = 10
mask_COSMOS_downsample = (COSMOS.df.index % downsample_factor == 0)



In [8]:

    
plt.scatter(COSMOS.df.ra[mask_COSMOS_downsample], 
            COSMOS.df.dec[mask_COSMOS_downsample],
            label=COSMOS.label, color="r", s=2,
           )

plt.xlabel("RA")
plt.ylabel("dec")

plt.xlim(149.25, 151.25)

plt.legend(loc="upper left", bbox_to_anchor=(1, 1), 
          markerscale=10)









    Out[8]:





<matplotlib.legend.Legend at 0x1a20cbe2e8>

HSC Data

This won't be included in the git repo by default. See get_data.ipynb.



In [9]:

    
HSC_filename = pathlib.Path(dwarfz.data_dir_default) / "HSC_COSMOS_median_forced.sqlite3"
HSC = dwarfz.datasets.HSC(HSC_filename)



In [10]:

    
HSC.df.head()









    Out[10]:







  
    
      
      ra
      dec
      detect_is_patch_inner
      detect_is_tract_inner
      detect_is_primary
      gcmodel_flux
      gcmodel_flux_err
      gcmodel_flux_flags
      gcmodel_mag
      rcmodel_flux
      ...
      icmodel_flux_flags
      icmodel_mag
      zcmodel_flux
      zcmodel_flux_err
      zcmodel_flux_flags
      zcmodel_mag
      ycmodel_flux
      ycmodel_flux_err
      ycmodel_flux_flags
      ycmodel_mag
    
    
      object_id
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      43158034708430853
      150.903878
      1.697209
      True
      True
      True
      NaN
      NaN
      True
      NaN
      NaN
      ...
      True
      NaN
      NaN
      NaN
      True
      NaN
      NaN
      NaN
      True
      NaN
    
    
      43158034708430859
      150.902865
      1.700115
      True
      True
      True
      1.589821e-30
      3.225965e-31
      False
      25.896629
      NaN
      ...
      False
      24.963961
      -9.020090e-31
      5.404378e-31
      False
      NaN
      1.464685e-30
      1.823984e-30
      False
      25.98564
    
    
      43158034708430861
      150.906167
      1.700661
      True
      True
      True
      NaN
      NaN
      True
      NaN
      NaN
      ...
      True
      NaN
      NaN
      NaN
      True
      NaN
      NaN
      NaN
      True
      NaN
    
    
      43158034708430866
      150.909037
      1.705647
      True
      True
      True
      NaN
      NaN
      True
      NaN
      NaN
      ...
      True
      NaN
      NaN
      NaN
      True
      NaN
      NaN
      NaN
      True
      NaN
    
    
      43158034708430870
      150.909058
      1.707563
      True
      True
      True
      NaN
      NaN
      True
      NaN
      NaN
      ...
      True
      NaN
      NaN
      NaN
      True
      NaN
      NaN
      NaN
      True
      NaN
    
  

5 rows × 25 columns



In [11]:

    
HSC.df.describe()









    Out[11]:







  
    
      
      ra
      dec
      gcmodel_flux
      gcmodel_flux_err
      gcmodel_mag
      rcmodel_flux
      rcmodel_flux_err
      rcmodel_mag
      icmodel_flux
      icmodel_flux_err
      icmodel_mag
      zcmodel_flux
      zcmodel_flux_err
      zcmodel_mag
      ycmodel_flux
      ycmodel_flux_err
      ycmodel_mag
    
  
  
    
      count
      928534.000000
      928534.000000
      7.690350e+05
      7.551790e+05
      7.396050e+05
      8.027340e+05
      7.961300e+05
      7.896650e+05
      8.275200e+05
      8.166680e+05
      8.095610e+05
      8.283700e+05
      8.167040e+05
      8.041950e+05
      8.195020e+05
      8.005810e+05
      7.400920e+05
    
    
      mean
      150.203410
      2.228374
      6.519398e-30
      2.609099e-31
      inf
      1.156106e-29
      2.450737e-31
      inf
      2.041056e-29
      4.497654e-31
      inf
      2.687119e-29
      6.071187e-31
      inf
      3.272011e-29
      1.831965e-30
      inf
    
    
      std
      0.390018
      0.393860
      6.638197e-29
      1.358594e-31
      NaN
      1.158724e-28
      5.136422e-30
      NaN
      1.869028e-28
      2.747879e-31
      NaN
      2.539900e-28
      4.223019e-31
      NaN
      3.122411e-28
      1.111928e-30
      NaN
    
    
      min
      149.504135
      1.487604
      -1.387765e-29
      1.106159e-31
      1.583224e+01
      -1.065718e-26
      7.876289e-32
      1.522354e+01
      -1.003394e-28
      2.055935e-31
      1.486172e+01
      -9.183730e-29
      3.004014e-31
      1.452095e+01
      -1.932652e-28
      9.309609e-31
      1.435206e+01
    
    
      25%
      149.878660
      1.904190
      6.320586e-31
      2.033603e-31
      2.512712e+01
      1.063674e-30
      1.727344e-31
      2.477960e+01
      1.346148e-30
      3.331775e-31
      2.431791e+01
      1.463098e-30
      4.413547e-31
      2.405960e+01
      1.192934e-30
      1.361729e-30
      2.372802e+01
    
    
      50%
      150.200577
      2.229233
      1.418287e-30
      2.402754e-31
      2.596487e+01
      1.958424e-30
      2.184194e-31
      2.564796e+01
      2.832784e-30
      4.045115e-31
      2.523776e+01
      3.291403e-30
      5.408081e-31
      2.505931e+01
      4.051469e-30
      1.639472e-30
      2.470167e+01
    
    
      75%
      150.516367
      2.552068
      3.103480e-30
      2.849902e-31
      2.675559e+01
      4.369804e-30
      2.660773e-31
      2.629728e+01
      6.630696e-30
      4.948445e-31
      2.600889e+01
      8.306420e-30
      6.549200e-31
      2.588803e+01
      1.039705e-29
      1.973811e-30
      2.566023e+01
    
    
      max
      150.991734
      2.975204
      1.686952e-26
      1.707486e-29
      inf
      2.955184e-26
      4.581177e-27
      inf
      4.123954e-26
      2.487757e-29
      inf
      5.644434e-26
      6.039158e-29
      inf
      6.594440e-26
      1.004648e-28
      inf



In [12]:

    
downsample_factor = 10
mask_HSC_downsample = (HSC.df.index % downsample_factor == 0)



In [13]:

    
plt.scatter(HSC.df.ra[mask_HSC_downsample], 
            HSC.df.dec[mask_HSC_downsample],
            label=HSC.label, color="b", s=2,
           )

plt.xlabel("RA")
plt.ylabel("dec")

plt.xlim(149.25, 151.25)

plt.legend(loc="upper left", bbox_to_anchor=(1, 1), 
          markerscale=10)









    Out[13]:





<matplotlib.legend.Legend at 0x1a5b8a6198>

Plot both datasets



In [14]:

    
downsample_factor_COSMOS = 30
mask_COSMOS_downsample = (COSMOS.df.index % downsample_factor_COSMOS == 0)

downsample_factor_HSC = 30*2
mask_HSC_downsample = (HSC.df.index % downsample_factor_HSC == 0)

print("Number of COSMOS galaxies to plot: ", mask_COSMOS_downsample.sum())
print("Number of HSC galaxies to plot:    ", mask_HSC_downsample.sum())









    



Number of COSMOS galaxies to plot:  17270
Number of HSC galaxies to plot:     15443



In [15]:

    
plt.scatter(HSC.df.ra[mask_HSC_downsample], 
            HSC.df.dec[mask_HSC_downsample],
            label=HSC.label, color="b", s=2,
           )


plt.scatter(COSMOS.df.ra[mask_COSMOS_downsample], 
            COSMOS.df.dec[mask_COSMOS_downsample],
            label =COSMOS.label, color="r", s=2,
           )


plt.xlabel("RA")
plt.ylabel("dec")

plt.xlim(149.25, 151.25)

plt.legend(loc="upper left", bbox_to_anchor=(1, 1), 
          markerscale=10)









    Out[15]:





<matplotlib.legend.Legend at 0x1a5eadb3c8>

Load matches



In [16]:

    
matches_filename = pathlib.Path(dwarfz.data_dir_default) / "matches.sqlite3"

matches = dwarfz.matching.Matches.load_from_filename(matches_filename)

Evaluate matches



In [17]:

    
print("threshold (error) : {:>5.2f}".format(
    dwarfz.matching.Matches.threshold_error_default))
print("threshold (match) : {:>5.2f}".format(
    dwarfz.matching.Matches.threshold_match_default))









    



threshold (error) : 15.00 arcsec
threshold (match) :  1.00 arcsec



In [18]:

    
print("overall completeness : {:.2f} %".format(100*np.mean(matches.match[~matches.error])))









    



overall completeness : 83.92 %



In [19]:

    
print("min separation: {:.4f} [arcsec]".format(min(matches.sep)))
print("max separation: {:.4f} [arcsec]".format(max(matches.sep)))









    



min separation: 0.0002 [arcsec]
max separation: 335.4579 [arcsec]



In [20]:

    
plt.axvline(np.log10(dwarfz.matching.Matches.threshold_error_default.to(u.arcsec).value),   
            linestyle="dashed", color="r", label="error")
plt.axvline(np.log10(dwarfz.matching.Matches.threshold_match_default.to(u.arcsec).value), 
            linestyle="solid",  color="k", label="match")
plt.legend(loc="upper left")

sns.distplot(np.log10(matches.sep))
plt.xlabel("log_10 separation [arcsec]")
plt.ylabel("probability density")

plt.title("Distribution of Nearest-Neighbor separations")









    



/Users/egentry/anaconda3/lib/python3.7/site-packages/scipy/stats/stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval






    Out[20]:





Text(0.5, 1.0, 'Distribution of Nearest-Neighbor separations')



In [21]:

    
plt.scatter(COSMOS.df.loc[matches.index[matches.match]].ra,
            COSMOS.df.loc[matches.index[matches.match]].dec,
            color="b", s=2,
            label="Found in HSC",
           )


plt.scatter(COSMOS.df.loc[matches.index[~matches.match & ~matches.error]].ra,
            COSMOS.df.loc[matches.index[~matches.match & ~matches.error]].dec,
            color="r", s=2,
            label="Missing in HSC",
           )


plt.scatter(COSMOS.df.loc[matches.index[matches.error]].ra,
            COSMOS.df.loc[matches.index[matches.error]].dec,
            color="k", s=2,
            label="Mismatched Footprint",
           )


plt.xlabel("RA (deg)")
plt.ylabel("Dec (deg)")

plt.legend(loc="upper left", bbox_to_anchor=(1,1), markerscale=10)
plt.title("COSMOS Galaxies by Matching Outcome")









    Out[21]:





Text(0.5, 1.0, 'COSMOS Galaxies by Matching Outcome')

Make the 2d completeness plot (overall)

First, implement the quoted completeness for COSMOS



In [22]:

    
def get_COSMOS_completeness():
    """ Gets interpolators for the COSMOS completeness limits as a function of redshift

    I *think* this is the 50% completeness threshold for each type of galaxy
    ("all", "passive", "active" [star-forming]) in terms of the stellar mass.
    (I.e. below this mass, cosmos isn't confident that it's complete)
    
    
    Inputs
    ------
        None
        
    Outputs
    -------
        dict of scipy.interpolate.interpolate.interp1d objects
            - dictionary keys are "all", "passive" and "active", 
              mapping to respective interpolators
            - Interpolators take in redshift, and return the 
              stellar mass completeness limit in log M_solar units
    
    
    Notes
    -----
    This is taken from Alexie's `ms_limit.pro` file. I *think* these are 
    completeness limits for respective galaxy types, not classification cuts
    for active vs. passive.
    
    """

    redshifts = [0.17, 0.24, 0.32, 0.37, 0.43, 0.5]

    # ALL GALAXIES
    log_masses = [ 7.53,  7.93,  8.22,  8.38,  8.52,  8.66]
    interp_all_galaxies = interpolate.interp1d(redshifts, log_masses)
    
    # PASSIVE GALAXIES
    log_masses = [ 7.68,  8.07,  8.38,  8.59,  8.74,  8.88]
    interp_passive_galaxies = interpolate.interp1d(redshifts, log_masses)
    
    # ACTIVE GALAXIES
    log_masses = [ 7.51,  7.91,  8.17,  8.30,  8.43,  8.55]
    interp_active_galaxies = interpolate.interp1d(redshifts, log_masses)
    
    
    return {
        "all"     : interp_all_galaxies,
        "passive" : interp_passive_galaxies,
        "active"  : interp_active_galaxies,
    }

COSMOS_completeness_limits = get_COSMOS_completeness()

Now determine completeness for HSC



In [23]:

    
masses  = COSMOS.df.loc[matches.index[~matches.error]].mass_med
photo_z = COSMOS.df.loc[matches.index[~matches.error]].photo_z

matched = np.array(matches.match[~matches.error], dtype=float)



In [24]:

    
num_bins = 100
num_galaxies, x_edges, y_edges = np.histogram2d(photo_z, masses, bins=num_bins)
num_matches, _, _ = np.histogram2d(photo_z, masses, bins=[x_edges, y_edges], weights=matched)

num_matches  = num_matches.T
num_galaxies = num_galaxies.T

completeness = num_matches / num_galaxies









    



/Users/egentry/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:8: RuntimeWarning: divide by zero encountered in true_divide
  
/Users/egentry/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:8: RuntimeWarning: invalid value encountered in true_divide



In [25]:

    
completeness[np.isfinite(completeness)].min()









    Out[25]:





0.0



In [26]:

    
completeness[np.isfinite(completeness)].max()









    Out[26]:





1.0



In [27]:

    
xx, yy = np.meshgrid(x_edges[:-1], y_edges[:-1])
yy = 10**yy # get back into linear space for masses

Plot overall histogram of COSMOS Galaxies:



In [28]:

    
plt.pcolormesh(xx, yy, num_galaxies,
                 norm=colors.LogNorm(vmin=1, vmax=num_galaxies.max()),
#                cmap=plt.cm.viridis_r, 
              )
plt.colorbar(label="Number of Galaxies")
plt.xlabel("z (photo)")
plt.ylabel("M_stellar")

zs = np.linspace(COSMOS_completeness_limits["all"].x.min(), COSMOS_completeness_limits["all"].x.max())
plt.plot(zs, 10**COSMOS_completeness_limits["all"](zs), 
         color="cyan", linewidth=4,
         label="COSMOS completeness limit (50%)",
        )
plt.legend(loc="lower right")


plt.yscale("log")
plt.title("COSMOS")









    Out[28]:





Text(0.5, 1.0, 'COSMOS')

Plot completeness of COSMOS galaxies in HSC

Red contours are linearly spaced contours of the COSMOS galaxy distribution



In [29]:

    
plt.pcolormesh(xx, yy, completeness,
               vmin=0, vmax=1, 
#                cmap=plt.cm.viridis_r, 
              )
plt.colorbar(label="{} Completeness".format(HSC.label))
plt.xlabel("z (photo)")
plt.ylabel("M_stellar")

contours = plt.contour(xx, yy, num_galaxies, colors="r",
#             norm=colors.LogNorm(vmin=100, vmax=num_galaxies.max()),
            label="{} density".format(COSMOS.label)
           )
contours.collections[0].set_label("COSMOS density")


zs = np.linspace(COSMOS_completeness_limits["all"].x.min(), COSMOS_completeness_limits["all"].x.max())
plt.plot(zs, 10**COSMOS_completeness_limits["all"](zs), 
         color="cyan", linewidth=4,
         label="COSMOS completeness limit (50%)",
        )

plt.legend(loc="lower right")

plt.yscale("log")









    



/Users/egentry/anaconda3/lib/python3.7/site-packages/matplotlib/contour.py:1000: UserWarning: The following kwargs were not used by contour: 'label'
  s)



In [ ]:

Make the 2d completeness plot (low-$z$)

Recreate the same two plots as above, but now only look at the low redshift area



In [30]:

    
num_bins = 100
histogram_range = ((0, .5), (6.5, 12.5))
num_galaxies, x_edges, y_edges = np.histogram2d(photo_z, masses, 
                                                range=histogram_range, bins=num_bins)
num_matches, _, _ = np.histogram2d(photo_z, masses, bins=[x_edges, y_edges], weights=matched)

num_matches  = num_matches.T
num_galaxies = num_galaxies.T

completeness = num_matches / num_galaxies









    



/Users/egentry/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: divide by zero encountered in true_divide
  # Remove the CWD from sys.path while we load stuff.
/Users/egentry/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in true_divide
  # Remove the CWD from sys.path while we load stuff.



In [31]:

    
completeness[np.isfinite(completeness)].min()









    Out[31]:





0.0



In [32]:

    
completeness[np.isfinite(completeness)].max()









    Out[32]:





1.0



In [33]:

    
xx, yy = np.meshgrid(x_edges[:-1], y_edges[:-1])
yy = 10**yy # get back into linear space for masses

Plot overall histogram of COSMOS Galaxies:

The red box is the area that we will be trying to isolate in HSC_COSMOS_cuts.ipynb



In [34]:

    
def add_target_box(ax):
    ax.add_patch(
        patches.Rectangle(
            (0, 10**8),   # (x,y) of bottom left corner
            0.15,          # width
            10**9,          # height
        fill=False, 
        **add_target_box.line_kwargs,
        )
    )
    

add_target_box.line_kwargs = dict(color="r", linewidth=6)



In [35]:

    
plt.pcolormesh(xx, yy, num_galaxies,
                 norm=colors.LogNorm(vmin=1, vmax=num_galaxies.max()),
#                cmap=plt.cm.viridis_r, 
              )
plt.colorbar(label="Number of Galaxies")
plt.xlabel("z (photo)")
plt.ylabel("M_stellar")

add_target_box(plt.gca())

zs = np.linspace(COSMOS_completeness_limits["all"].x.min(), COSMOS_completeness_limits["all"].x.max())
plt.plot(zs, 10**COSMOS_completeness_limits["all"](zs), 
         color="cyan", linewidth=4,
         label="COSMOS completeness limit (50%)",
        )

handles, labels = plt.gca().get_legend_handles_labels()

fake_line = plt.Line2D([0,0],[0,0], **add_target_box.line_kwargs)
                
plt.yscale("log")
plt.legend(
    [*handles, fake_line],
    [*labels, "targetted region"],
    loc="upper left",
)
plt.title("COSMOS")









    Out[35]:





Text(0.5, 1.0, 'COSMOS')

Plot completeness of COSMOS galaxies in HSC

The red box is the area that we will be trying to isolate in HSC_COSMOS_cuts.ipynb



In [36]:

    
plt.pcolormesh(xx, yy, completeness,
               vmin=0, vmax=1, 
#                cmap=plt.cm.viridis_r, 
              )
plt.colorbar(label="{} Completeness".format(HSC.label))
plt.xlabel("z (photo)")
plt.ylabel("M_stellar")

add_target_box(plt.gca())

zs = np.linspace(COSMOS_completeness_limits["all"].x.min(), COSMOS_completeness_limits["all"].x.max())
plt.plot(zs, 10**COSMOS_completeness_limits["all"](zs), 
         color="cyan", linewidth=4,
         label="COSMOS completeness limit (50%)",
        )

handles, labels = plt.gca().get_legend_handles_labels()

fake_line = plt.Line2D([0,0],[0,0], **add_target_box.line_kwargs)
                
plt.yscale("log")
plt.legend(
    [*handles, fake_line],
    [*labels, "targetted region"],
    loc="upper left",
)









    Out[36]:





<matplotlib.legend.Legend at 0x1a26861278>

How many matches repeat?

How many times do we have 2, 3, ..., N COSMOS galaxies pointing to the same HSC galaxy? Note that we expect this value to typically be 0-1, not 2+ (since there are more HSC galaxies than COSMOS galaxies).

(To understand issues with HSC's object identification algorithms, you would want to do the opposite: start with HSC objects, then match the nearest COSMOS object -- if an object is artificially broken up, it would look like a large number of repeated matches.)



In [37]:

    
from collections import Counter

# first see which HSC galaxies have multiple COSMOS matches
# then get a histogram of the number of matches
counts = Counter(list(Counter(matches.catalog_2_ids[matches.match]).values()))
# add HSC galaxies which have no COSMOS object matched to them
number_of_HSC_objects_total = HSC.df.index.size
number_of_HSC_objects_with_matches = np.sum(list(counts.values()))
counts[0] = number_of_HSC_objects_total - number_of_HSC_objects_with_matches
counts









    Out[37]:





Counter({1: 403794, 2: 654, 3: 1, 0: 524085})



In [38]:

    
counts.keys()









    Out[38]:





dict_keys([1, 2, 3, 0])



In [39]:

    
plt.bar(list(counts.keys()), list(counts.values()))
plt.yscale("log")
plt.xlabel("Number of COSMOS Galaxies \n  Matched to a Particular HSC Object")
plt.ylabel("Number of HSC Objects \n with that many repeated matches")
plt.xticks(list(counts.keys()))









    Out[39]:





([<matplotlib.axis.XTick at 0x1a307cdcf8>,
  <matplotlib.axis.XTick at 0x1a307e9dd8>,
  <matplotlib.axis.XTick at 0x1a307e9a90>,
  <matplotlib.axis.XTick at 0x1a2486de10>],
 <a list of 4 Text xticklabel objects>)



In [ ]:

	alpha	delta	flag_Capak	flag_UVISTA	flag_deep	flag_shallow	photo_z	classification	mass_med	mass_med_min68	mass_med_max68	ra	dec
id
216439	149.697606	1.616639	0	0	0	0.0	0.0386	1	7.74447	7.61479	8.19060	149.697606	1.616639
218604	149.749393	1.618068	0	0	0	0.0	0.3797	0	11.07610	11.00020	11.12550	149.749393	1.618068
219020	150.125093	1.616900	0	0	0	0.0	0.1600	1	10.90000	10.86600	10.93410	150.125093	1.616900
219656	150.388349	1.614538	0	0	0	0.0	2.3343	1	8.99275	8.82478	9.12685	150.388349	1.614538
219657	150.383607	1.614532	0	0	0	0.0	1.3916	1	8.51774	7.95630	9.01445	150.383607	1.614532

	alpha	delta	flag_Capak	flag_UVISTA	flag_deep	flag_shallow	photo_z	classification	mass_med	mass_med_min68	mass_med_max68	ra	dec
count	518331.000000	518331.000000	518331.0	518331.0	518331.000000	518331.000000	518331.000000	518331.000000	518331.000000	518331.000000	518331.000000	518331.000000	518331.000000
mean	150.072394	2.232306	0.0	0.0	0.356195	0.325722	1.501246	0.947572	8.932841	8.778190	9.063062	150.072394	2.232306
std	0.381040	0.336753	0.0	0.0	0.478874	0.468645	1.056322	0.222889	0.924280	1.247627	1.204603	0.381040	0.336753
min	149.411366	1.614532	0.0	0.0	0.000000	0.000000	0.005000	0.000000	4.064430	-99.000000	-99.000000	149.411366	1.614532
25%	149.750004	1.954606	0.0	0.0	0.000000	0.000000	0.698100	1.000000	8.324890	8.130915	8.494390	149.750004	1.954606
50%	150.073257	2.228252	0.0	0.0	0.000000	0.000000	1.225400	1.000000	8.952890	8.791550	9.091950	150.073257	2.228252
75%	150.388587	2.523300	0.0	0.0	1.000000	1.000000	2.073100	1.000000	9.548460	9.422860	9.655005	150.388587	2.523300
max	150.786592	2.814121	0.0	0.0	1.000000	1.000000	5.994900	1.000000	12.316700	12.254600	13.066000	150.786592	2.814121

	ra	dec	detect_is_patch_inner	detect_is_tract_inner	detect_is_primary	gcmodel_flux	gcmodel_flux_err	gcmodel_flux_flags	gcmodel_mag	rcmodel_flux	...	icmodel_flux_flags	icmodel_mag	zcmodel_flux	zcmodel_flux_err	zcmodel_flux_flags	zcmodel_mag	ycmodel_flux	ycmodel_flux_err	ycmodel_flux_flags	ycmodel_mag
object_id
43158034708430853	150.903878	1.697209	True	True	True	NaN	NaN	True	NaN	NaN	...	True	NaN	NaN	NaN	True	NaN	NaN	NaN	True	NaN
43158034708430859	150.902865	1.700115	True	True	True	1.589821e-30	3.225965e-31	False	25.896629	NaN	...	False	24.963961	-9.020090e-31	5.404378e-31	False	NaN	1.464685e-30	1.823984e-30	False	25.98564
43158034708430861	150.906167	1.700661	True	True	True	NaN	NaN	True	NaN	NaN	...	True	NaN	NaN	NaN	True	NaN	NaN	NaN	True	NaN
43158034708430866	150.909037	1.705647	True	True	True	NaN	NaN	True	NaN	NaN	...	True	NaN	NaN	NaN	True	NaN	NaN	NaN	True	NaN
43158034708430870	150.909058	1.707563	True	True	True	NaN	NaN	True	NaN	NaN	...	True	NaN	NaN	NaN	True	NaN	NaN	NaN	True	NaN

	ra	dec	gcmodel_flux	gcmodel_flux_err	gcmodel_mag	rcmodel_flux	rcmodel_flux_err	rcmodel_mag	icmodel_flux	icmodel_flux_err	icmodel_mag	zcmodel_flux	zcmodel_flux_err	zcmodel_mag	ycmodel_flux	ycmodel_flux_err	ycmodel_mag
count	928534.000000	928534.000000	7.690350e+05	7.551790e+05	7.396050e+05	8.027340e+05	7.961300e+05	7.896650e+05	8.275200e+05	8.166680e+05	8.095610e+05	8.283700e+05	8.167040e+05	8.041950e+05	8.195020e+05	8.005810e+05	7.400920e+05
mean	150.203410	2.228374	6.519398e-30	2.609099e-31	inf	1.156106e-29	2.450737e-31	inf	2.041056e-29	4.497654e-31	inf	2.687119e-29	6.071187e-31	inf	3.272011e-29	1.831965e-30	inf
std	0.390018	0.393860	6.638197e-29	1.358594e-31	NaN	1.158724e-28	5.136422e-30	NaN	1.869028e-28	2.747879e-31	NaN	2.539900e-28	4.223019e-31	NaN	3.122411e-28	1.111928e-30	NaN
min	149.504135	1.487604	-1.387765e-29	1.106159e-31	1.583224e+01	-1.065718e-26	7.876289e-32	1.522354e+01	-1.003394e-28	2.055935e-31	1.486172e+01	-9.183730e-29	3.004014e-31	1.452095e+01	-1.932652e-28	9.309609e-31	1.435206e+01
25%	149.878660	1.904190	6.320586e-31	2.033603e-31	2.512712e+01	1.063674e-30	1.727344e-31	2.477960e+01	1.346148e-30	3.331775e-31	2.431791e+01	1.463098e-30	4.413547e-31	2.405960e+01	1.192934e-30	1.361729e-30	2.372802e+01
50%	150.200577	2.229233	1.418287e-30	2.402754e-31	2.596487e+01	1.958424e-30	2.184194e-31	2.564796e+01	2.832784e-30	4.045115e-31	2.523776e+01	3.291403e-30	5.408081e-31	2.505931e+01	4.051469e-30	1.639472e-30	2.470167e+01
75%	150.516367	2.552068	3.103480e-30	2.849902e-31	2.675559e+01	4.369804e-30	2.660773e-31	2.629728e+01	6.630696e-30	4.948445e-31	2.600889e+01	8.306420e-30	6.549200e-31	2.588803e+01	1.039705e-29	1.973811e-30	2.566023e+01
max	150.991734	2.975204	1.686952e-26	1.707486e-29	inf	2.955184e-26	4.581177e-27	inf	4.123954e-26	2.487757e-29	inf	5.644434e-26	6.039158e-29	inf	6.594440e-26	1.004648e-28	inf