Comparing G16 vs ML for the Referee


In [17]:
# imports
import numpy as np
import os, sys

from linetools.utils import dv_from_z

from dla_cnn.io import load_ml_dr7, load_ml_dr12, load_garnett16
from dla_cnn.catalogs import match_boss_catalogs

sys.path.append(os.path.abspath("../Analysis/py"))
import defs as analy_defs

Setup


In [2]:
g16_pcut = analy_defs.g16_pcut
dztoler = 0.015
g16_pcut


Out[2]:
0.95

Load em up

ML


In [3]:
# Load BOSS ML
_, dr12_abs = load_ml_dr12()
# Cut on DLA
dlas = dr12_abs['NHI'] >= 20.3
no_bals = dr12_abs['flg_BAL'] == 0
high_conf = dr12_abs['conf'] > 0.9
#
zem = (dr12_abs['zem'] > dr12_abs['zabs']) & (dr12_abs['zem'] > 2.15)  # G16 cut on zem
zcut = dr12_abs['zabs'] > 2.15
zprox = dr12_abs['zabs'] < (dr12_abs['zem'] - 3000./3e5)
# cut
dr12_cut = dlas & no_bals & high_conf & zem & zcut  & zprox
dr12_dla = dr12_abs[dr12_cut]
len(dr12_dla)


Out[3]:
15922

Load Garnett


In [4]:
g16_abs = load_garnett16()
g_dlas = g16_abs['log.NHI'] >= 20.3
g_conf = g16_abs['pDLAD'] >= g16_pcut
# Cut
g_cut = g_dlas & g_conf
g16_dlas = g16_abs[g_cut]
len(g16_dlas)


/home/xavier/.pyenv/versions/anaconda3-4.3.1/lib/python3.6/site-packages/ipykernel_launcher.py:2: RuntimeWarning: invalid value encountered in greater_equal
  
/home/xavier/.pyenv/versions/anaconda3-4.3.1/lib/python3.6/site-packages/ipykernel_launcher.py:3: RuntimeWarning: invalid value encountered in greater_equal
  This is separate from the ipykernel package so we can avoid doing imports until
Out[4]:
18173

Match ML to G16


In [5]:
dr12_to_g16 = match_boss_catalogs(dr12_dla, g16_dlas, dztoler=dztoler)
matched = dr12_to_g16 >= 0
g16_idx = dr12_to_g16[matched]
print("We matched {:d} of {:d} DLAs between high quality ML and G16 within dz={:g}".format(
    np.sum(matched), np.sum(dr12_cut), dztoler))

high_conf = (dr12_dla['conf'][matched] > 0.9) & (g16_dlas['pDLAD'][g16_idx] > analy_defs.g16_pcut)
print("Of these, {:d} are high confidence in both".format(np.sum(high_conf)))


We matched 10205 of 15922 DLAs between high quality ML and G16 within dz=0.015
Of these, 10205 are high confidence in both

Consider large dNHI


In [6]:
NHI = dr12_dla['NHI'][matched]
dNHI = dr12_dla['NHI'][matched] - g16_dlas['log.NHI'][g16_idx]

In [7]:
large_dNHI = np.abs(dNHI) > 0.5
np.sum(large_dNHI)


Out[7]:
133

Examine an ML DLA not in G16


In [27]:
not_matched = dr12_to_g16 < 0
np.sum(not_matched)
#
not_idx = np.where(not_matched)[0]

In [29]:
jj = not_idx[0]
dr12_dla[jj]


Out[29]:
<Row index=1>
PlateFiberMJDRADECzemzabsNHIsigNHIconfflg_BALSNR
int64int64int64float64float64float64float64float64float64float64int64float64
3587617551828.07540.13543.788477701652.9562557806320.67910751220.06244299933310.962501.3497

Match to full G16 catalog


In [30]:
jj


Out[30]:
1

In [33]:
jj1 = match_boss_catalogs(dr12_dla[jj:jj+2], g16_abs, dztoler=1.)  # Needed 2 for the code to run
jj1


Out[33]:
array([ 9039, 11200])

In [34]:
g16_abs[9039]


Out[34]:
<Row index=9039 masked=True>
THINGIDSDSSPlateMJDFiberRAdegDEdegz_QSOSNRflg_BALzminzmaxlog.pnDLAlog.pDLAlog.pDnDLAlog.pDDLApnDLADpDLADz_DLAlog.NHI
int64str18int64int64int64float64float64float64float64int64float64float64float64float64float64float64float64float64float64float64
96897814003218.09+000807.33587551826178.07539610.13536593.79941.349702.60963.7894-0.10889-2.2714-1237.94-1228.960.00109180.9989082.834720.301

In [19]:
dv_from_z(2.956255, 2.8347)


Out[19]:
$9352.4873 \; \mathrm{\frac{km}{s}}$

Flip me


In [16]:
g16_to_dr12 = match_boss_catalogs(g16_dlas, dr12_dla, dztoler=dztoler, reverse=True)
matched2 = g16_to_dr12 >= 0
dr12_idx = g16_to_dr12[matched2]
print("We matched {:d} of {:d} DLAs between high quality ML and G16 within dz={:g}".format(
    np.sum(matched2), np.sum(g_cut), dztoler))


We matched 10205 of 18173 DLAs between high quality ML and G16 within dz=0.015

In [15]:
matched2


Out[15]:
array([ True,  True,  True, ...,  True,  True,  True], dtype=bool)

In [ ]: