In [2]:
## Read in the Training Data and Instantiating the Photo-z Algorithm
%matplotlib inline
from astropy.table import Table
import numpy as np
import matplotlib.pyplot as plt
#data = Table.read('GTR-ADM-QSO-ir-testhighz_findbw_lup_2016_starclean.fits')
#JT PATH ON TRITON to training set after classification
data = Table.read('/Users/johntimlin/Catalogs/QSO_candidates/Training_set/GTR-ADM-QSO-ir-testhighz_findbw_lup_2016_starclean_with_shenlabel.fits')
#JT PATH HOME USE SHEN ZCUT
#data = Table.read('/home/john/Catalogs/QSO_Candidates/Training_set/GTR-ADM-QSO-ir-testhighz_findbw_lup_2016_starclean_with_shenlabel.fits')
data = data.filled()
# Remove stars
qmask = (data['zspec']>0)
qdata = data[qmask]
print len(qdata)
# X is in the format need for all of the sklearn tools, it just has the colors
Xtrain = np.vstack([ qdata['ug'], qdata['gr'], qdata['ri'], qdata['iz'], qdata['zs1'], qdata['s1s2']]).T
#y = np.array(data['labels'])
ytrain = np.array(qdata['zspec'])
Since we are running on separate test data, we don't need to do a train_test_split
here. But we will scale the data. Need to remember to scale the test data later!
In [3]:
# For algorithms that need scaled data:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(Xtrain) # Don't cheat - fit only on training data
Out[3]:
Quasars candidates from the legacy KDE algorithm are in
GTR-ADM-QSO-ir-testhighz_kdephotoz_lup_2016_quasar_candidates.dat
Quasars candidates from the Random Forest Algorithm are in
GTR-ADM-QSO-ir_good_test_2016_out.fits
Quasar candidates from the RF, SVM, and/or bagging algorithms are in
GTR-ADM-QSO-ir_good_test_2016_out_Stripe82all.fits
In the case of the latter file, this includes Stripe82 only. If we run on the other files, we might want to limit to Stripe 82 to keep the computing time reasonable.
In [4]:
#testdata = Table.read('GTR-ADM-QSO-ir_good_test_2016_out_Stripe82all.fits')
# TEST DATA USING 3.5<z<5 zrange ON TRITON
#testdata = Table.read('/Users/johntimlin/Catalogs/QSO_candidates/Final_S82_candidates_full/GTR-ADM-QSO-ir_good_test_2016_out_Stripe82all.fits')
# TEST DATA USING 2.9<z<5.4 zrange ON HOME
testdata = Table.read('/Users/johntimlin/Catalogs/QSO_Candidates/photoz/SpIES_SHELA_Quasar_Canidates_Shen_zrange_JTmultiproc.fits')
In [5]:
#Limit to objects that have been classified as quasars
qsocandmask = ((testdata['ypredRFC']==0) | (testdata['ypredSVM']==0) | (testdata['ypredBAG']==0))
testdatacand = testdata[qsocandmask]
print len(testdata),len(testdatacand)
In [8]:
## Test zspec objects with zspec >=2.9 and see how well the zphot matches with zspec
testdata = Table.read('/Users/johntimlin/Catalogs/QSO_candidates/Final_S82_candidates_full/QSOs_S82_wzspec_wcolors.fits')
#Limit to objects that have been classified as quasars
#qsocandmask = ((testdata['ypredRFC']==0) | (testdata['ypredSVM']==0) | (testdata['ypredBAG']==0))
#qsocandmask = (testdata['ZSPEC'] >= 2.9)
testdatacand = testdata#[qsocandmask]
print len(testdata),len(testdatacand)
In [9]:
Xtest = np.vstack([ testdatacand['ug'], testdatacand['gr'], testdatacand['ri'], testdatacand['iz'], testdatacand['zs1'], testdatacand['s1s2']]).T
XStest = scaler.transform(Xtest) # apply same transformation to test data
In [ ]:
# Read in KDE candidates
dataKDE = Table.read('GTR-ADM-QSO-ir-testhighz_kdephotoz_lup_2016_quasar_candidates.dat', format='ascii')
print dataKDE.keys()
print len(XKDE)
XKDE = np.vstack([ dataKDE['ug'], dataKDE['gr'], dataKDE['ri'], dataKDE['iz'], dataKDE['zch1'], dataKDE['ch1ch2'] ]).T
In [ ]:
# Read in RF candidates
dataRF = Table.read('GTR-ADM-QSO-ir_good_test_2016_out.fits')
print dataRF.keys()
print len(dataRF)
# Canidates only
maskRF = (dataRF['ypred']==0)
dataRF = dataRF[maskRF]
print len(dataRF)
# X is in the format need for all of the sklearn tools, it just has the colors
XRF = np.vstack([ dataRF['ug'], dataRF['gr'], dataRF['ri'], dataRF['iz'], dataRF['zs1'], dataRF['s1s2']]).T
In [10]:
import numpy as np
from astroML.linear_model import NadarayaWatson
model = NadarayaWatson('gaussian', 0.05)
In [11]:
model.fit(Xtrain,ytrain)
Out[11]:
In [12]:
from sklearn.ensemble import RandomForestRegressor
modelRF = RandomForestRegressor()
modelRF.fit(Xtrain,ytrain)
Out[12]:
In [13]:
zphotRF = modelRF.predict(Xtest)
In [1]:
zphotNW = model.predict(Xtest)
In [14]:
from dask import compute, delayed
def process(Xin):
return model.predict(Xin)
# Create dask objects
dobjs = [delayed(process)(x.reshape(1,-1)) for x in Xtest]
In [15]:
import dask.threaded
ypred = compute(*dobjs, get=dask.threaded.get)
In [16]:
# The dask output needs to be reformatted.
zphotNW = np.array(ypred).reshape(1,-1)[0]
In [17]:
testdatacand['zphotNW'] = zphotNW
testdatacand['zphotRF'] = zphotRF
In [19]:
#TRITON PATH
#testdatacand.write('/Users/johntimlin/Catalogs/QSO_candidates/photoz/Candidates_photoz_S82_shenzrange.fits', format='fits')
#HOME PATH
#testdatacand.write('/home/john/Catalogs/QSO_Candidates/photoz/Candidates_photoz_S82_shenzrange.fits', format='fits')
testdatacand.write('/Users/johntimlin/Catalogs/QSO_candidates/Final_S82_candidates_full/QSOs_S82_wzspec_wcolors_wphotoz.fits')
In [16]:
from densityplot import *
from pylab import *
fig = plt.figure(figsize=(5,5))
hex_scatter(testdatacand['zphotNW'],testdatacand['ug'], min_cnt=10, levels=2, std=True, smoothing=1,
hkwargs={'gridsize': 100, 'cmap': plt.cm.Blues},
skwargs={'color': 'k'})
plt.xlabel('zphot')
plt.ylabel('u-g')
#plt.xlim([-0.1,5.5])
#plt.ylim([-0.1,5.5])
plt.show()
In [24]:
from astroML.plotting import hist as fancyhist
fancyhist(testdatacand['zphotRF'], bins="freedman", histtype="step")
Out[24]: