In [2]:
from astropy.table import Table
from astropy.coordinates import SkyCoord
from astropy import units as u
from astropy.table import hstack
import matplotlib.pyplot as plt
import numpy as np
def sigG(arr):
return 0.741*(np.quantile(arr, 0.75)-np.quantile(arr, 0.25))
Define all the SDSS, Gaia and other catalogs
In [3]:
ZIdataDir = "/Users/ivezic/Work/Science/CalibrationV2/Data"
# the original SDSS catalog from 2007
sdssOldCat = ZIdataDir + "/" + "stripe82calibStars_v2.6.dat"
# Karun's new catalog from 2020
sdssNewCat = ZIdataDir + "/" + "NEW_stripe82calibStars_v0.dat"
# Gaia DR2
GaiaDR2Cat = ZIdataDir + "/" + "Stripe82_GaiaDR2.dat"
# for testing, 1% of the sample
GaiaDR2Cat1perc = ZIdataDir + "/" + "Stripe82_GaiaDR2_1percent.dat"
In [4]:
# both new and old files use identical data structure
colnamesSDSS = ['calib_fla', 'ra', 'dec', 'raRMS', 'decRMS', 'nEpochs', 'AR_val',
'u_Nobs', 'u_mMed', 'u_mMean', 'u_mErr', 'u_rms_scatt', 'u_chi2',
'g_Nobs', 'g_mMed', 'g_mMean', 'g_mErr', 'g_rms_scatt', 'g_chi2',
'r_Nobs', 'r_mMed', 'r_mMean', 'r_mErr', 'r_rms_scatt', 'r_chi2',
'i_Nobs', 'i_mMed', 'i_mMean', 'i_mErr', 'i_rms_scatt', 'i_chi2',
'z_Nobs', 'z_mMed', 'z_mMean', 'z_mErr', 'z_rms_scatt', 'z_chi2']
In [5]:
%%time
# old
sdssOld = Table.read(sdssOldCat, format='ascii', names=colnamesSDSS)
In [6]:
np.size(sdssOld)
Out[6]:
In [7]:
%%time
# new
sdssNewRaw = Table.read(sdssNewCat, format='csv', names=colnamesSDSS)
# rejects objects with bad Declination, sample size from 1007136 to 1005470
sdssNew = sdssNewRaw[sdssNewRaw['dec']<90]
In [38]:
sdssOld_coords = SkyCoord(ra = sdssOld['ra']*u.degree, dec= sdssOld['dec']*u.degree)
sdssNew_coords = SkyCoord(ra = sdssNew['ra']*u.degree, dec= sdssNew['dec']*u.degree)
# this is matching sdssNew to sdssOld, so that indices are into sdssNew catalog
# makes sense in this case since the sdssOld catalog is (a little bit) bigger
# than sdssNew (1006849 vs 1005470)
idx, d2d, d3d = sdssNew_coords.match_to_catalog_sky(sdssOld_coords)
In [39]:
# object separation is an object with units,
# I add that as a column so that one can
# select based on separation to the nearest matching object
sdss = hstack([sdssNew, sdssOld[idx]], table_names = ['new', 'old'])
sdss['sep_2d_arcsec'] = d2d.arcsec
# since we matched sdssNew to sdssOld, the resulting catalog has the same lengthas sdssNew: 1005470
In [40]:
### code for generating new quantities, such as dra, ddec, colors, differences in mags, etc
def derivedColumns(matches):
matches['dra'] = (matches['ra_new']-matches['ra_old'])*3600
matches['ddec'] = (matches['dec_new']-matches['dec_old'])*3600
matches['ra'] = matches['ra_old']
ra = matches['ra']
matches['raW'] = np.where(ra > 180, ra-360, ra)
matches['dec'] = matches['dec_old']
matches['u'] = matches['u_mMed_old']
matches['g'] = matches['g_mMed_old']
matches['r'] = matches['r_mMed_old']
matches['i'] = matches['i_mMed_old']
matches['z'] = matches['z_mMed_old']
matches['ug'] = matches['u_mMed_old'] - matches['g_mMed_old']
matches['gr'] = matches['g_mMed_old'] - matches['r_mMed_old']
matches['ri'] = matches['r_mMed_old'] - matches['i_mMed_old']
matches['gi'] = matches['g_mMed_old'] - matches['i_mMed_old']
matches['du'] = matches['u_mMed_old'] - matches['u_mMed_new']
matches['dg'] = matches['g_mMed_old'] - matches['g_mMed_new']
matches['dr'] = matches['r_mMed_old'] - matches['r_mMed_new']
matches['di'] = matches['i_mMed_old'] - matches['i_mMed_new']
matches['dz'] = matches['z_mMed_old'] - matches['z_mMed_new']
return
derivedColumns(sdss)
In [60]:
colnamesGaia = ['ra', 'dec', 'nObs', 'Gmag', 'flux', 'fluxErr', 'pmra', 'pmdec']
if (1):
gaia = Table.read(GaiaDR2Cat, format='ascii', names=colnamesGaia)
else:
gaia = Table.read(GaiaDR2Cat1perc, format='ascii', names=colnamesGaia)
In [61]:
sdss_coords = SkyCoord(ra = sdss['ra_old']*u.degree, dec= sdss['dec_old']*u.degree)
gaia_coords = SkyCoord(ra = gaia['ra']*u.degree, dec= gaia['dec']*u.degree)
# this is matching gaia to sdss, so that indices are into sdss catalog
# makes sense in this case since the sdss catalog is bigger than gaia
idxG, d2dG, d3dG = gaia_coords.match_to_catalog_sky(sdss_coords)
# object separation is an object with units,
# I add that as a column so that one can
# select based on separation to the nearest matching object
gaia_sdss = hstack([gaia, sdss[idxG]], table_names = ['gaia', 'sdss'])
gaia_sdss['sep_2d_arcsec'] = d2dG.arcsec
In [68]:
matchedG = gaia_sdss[d2dG.arcsec<0.5]
In [70]:
# print(np.size(gaia_sdss), np.size(matchedG))
matchedG['draGold'] = 3600*(matchedG['ra_old'] - matchedG['ra_gaia'])
matchedG['draGnew'] = 3600*(matchedG['ra_new'] - matchedG['ra_gaia'])
matchedG['ddecGold'] = 3600*(matchedG['dec_old'] - matchedG['dec_gaia'])
matchedG['ddecGnew'] = 3600*(matchedG['dec_new'] - matchedG['dec_gaia'])
In [72]:
# quick plot - binned median
def qpBM(d, Xstr, Xmin, Xmax, Ystr, Ymin, Ymax, nBin, Nsigma=3, offset=0.01):
ax = plt.axes()
ax.scatter(d[Xstr], d[Ystr], s=0.01, c='black')
# binning
xBinM, nPtsM, medianBinM, sigGbinM = fitMedians(d[Xstr], d[Ystr], Xmin, Xmax, nBin, 1)
# plotting
ax.scatter(xBinM, medianBinM, s=30.0, c='black', alpha=0.8)
ax.scatter(xBinM, medianBinM, s=15.0, c='yellow', alpha=0.3)
#
TwoSigP = medianBinM + Nsigma*sigGbinM
TwoSigM = medianBinM - Nsigma*sigGbinM
ax.plot(xBinM, TwoSigP, c='yellow')
ax.plot(xBinM, TwoSigM, c='yellow')
#
rmsBin = np.sqrt(nPtsM) / np.sqrt(np.pi/2) * sigGbinM
rmsP = medianBinM + rmsBin
rmsM = medianBinM - rmsBin
ax.plot(xBinM, rmsP, c='cyan')
ax.plot(xBinM, rmsM, c='cyan')
#
xL = np.linspace(-100,100)
ax.plot(xL, 0*xL+offset, c='red')
ax.plot(xL, 0*xL-offset, c='red')
#
ax.set_xlabel(Xstr)
ax.set_ylabel(Ystr)
ax.set_xlim(Xmin, Xmax)
ax.set_ylim(Ymin, Ymax)
plt.show()
return
# given vectors x and y, fit medians in bins from xMin to xMax, with Nbin steps,
# and return xBin, medianBin, medianErrBin
def fitMedians(x, y, xMin, xMax, Nbin, verbose=1):
# first generate bins
xEdge = np.linspace(xMin, xMax, (Nbin+1))
xBin = np.linspace(0, 1, Nbin)
nPts = 0*np.linspace(0, 1, Nbin)
medianBin = 0*np.linspace(0, 1, Nbin)
sigGbin = -1+0*np.linspace(0, 1, Nbin)
for i in range(0, Nbin):
xBin[i] = 0.5*(xEdge[i]+xEdge[i+1])
yAux = y[(x>xEdge[i])&(x<=xEdge[i+1])]
if (yAux.size > 0):
nPts[i] = yAux.size
medianBin[i] = np.median(yAux)
# robust estimate of standard deviation: 0.741*(q75-q25)
sigmaG = 0.741*(np.percentile(yAux,75)-np.percentile(yAux,25))
# uncertainty of the median: sqrt(pi/2)*st.dev/sqrt(N)
sigGbin[i] = np.sqrt(np.pi/2)*sigmaG/np.sqrt(nPts[i])
else:
nPts[i] = 0
medianBin[i] = 0
sigGbin[i] = 0
if (verbose):
print('median:', np.median(medianBin[nPts>0]))
return xBin, nPts, medianBin, sigGbin
In [74]:
qpBM(matchedG, 'raW', -51.5, 60.0, 'draGold', -0.5, 0.5, 120)
In [75]:
qpBM(matchedG, 'raW', -51.5, 60.0, 'draGnew', -0.5, 0.5, 120)
In [84]:
qpBM(matchedG, 'dec_new', -1.3, 1.3, 'draGold', -0.2, 0.2, 120)
In [85]:
qpBM(matchedG, 'dec_new', -1.3, 1.3, 'draGnew', -0.2, 0.2, 120)
In [88]:
qpBM(matchedG, 'dec_new', -1.3, 1.3, 'ddecGnew', -0.2, 0.2, 120)
In [92]:
qpBM(matchedG, 'gi', -0.5, 3.5, 'ddecGnew', -0.4, 0.4, 120)
In [96]:
qpBM(matchedG, 'gi', -0.5, 3.5, 'ddecGold', -0.4, 0.4, 120)
In [79]:
draSGold = 3600*(matchedG['ra_old'] - matchedG['ra_gaia'])
ddecSGold = 3600*(matchedG['dec_old'] - matchedG['dec_gaia'])
draSGnew = 3600*(matchedG['ra_new'] - matchedG['ra_gaia'])
ddecSGnew = 3600*(matchedG['dec_new'] - matchedG['dec_gaia'])
In [83]:
fig,ax = plt.subplots(1,1,figsize=(8,6))
draOKold = draSGold[(draSGold>-1.0)&(draSGold<1.0)]
draOKnew = draSGnew[(draSGnew>-1.0)&(draSGnew<1.0)]
# draOKold = ddecSGold[(ddecSGold>-1.0)&(ddecSGold<1.0)]
# draOKnew = ddecSGnew[(ddecSGnew>-1.0)&(ddecSGnew<1.0)]
# 0.07502404806762897 0.08788627264795751 0.06829905389712634
# 0.06610463317925364 0.08853499324595368 0.06827260187222484
hist, bins = np.histogram(draOKold, bins=50)
center = (bins[:-1]+bins[1:])/2
ax.plot(center, hist, drawstyle='steps', c='red')
hist2, bins2 = np.histogram(draOKnew, bins=50)
center2 = (bins2[:-1]+bins2[1:])/2
ax.plot(center2, hist2, drawstyle='steps', c='blue')
ax.set_xlabel('dRA (arcsec)')
ax.set_ylabel('dN')
ax.set_xlim(-1.0, 1.0)
plt.show()
print(np.median(draOKold), np.std(draOKold), sigG(draOKold))
print(np.median(draOKnew), np.std(draOKnew), sigG(draOKnew))
In [98]:
raW = matchedG['raW']
raWold = raW[(draSGold>-1.0)&(draSGold<1.0)]
raWnew = raW[(draSGnew>-1.0)&(draSGnew<1.0)]
draPlusOld = draOKold[raWold>0]
draMinusOld = draOKold[raWold<0]
draPlusNew = draOKnew[raWnew>0]
draMinusNew = draOKnew[raWnew<0]
In [99]:
print(sigG(draMinusOld), sigG(draMinusNew))
In [100]:
print(sigG(draPlusOld), sigG(draPlusNew))
In [101]:
ax = plt.axes()
#hist(draMinusOld, bins='knuth', ax=ax, histtype='stepfilled', ec='k', fc='yellow')
#hist(draPlusOld, bins='knuth', ax=ax, histtype='stepfilled', ec='k', fc='#AAAAAA')
plt.hist(draMinusOld, bins='auto', histtype='stepfilled', ec='k', fc='yellow')
plt.hist(draPlusOld, bins='auto', histtype='stepfilled', ec='red', fc='blue')
ax.set_xlabel('dRA (arcsec)')
ax.set_ylabel('N(r, r+dr)')
ax.set_xlim(-0.6, 0.6)
plt.show()
print(sigG(draMinusOld))
print(sigG(draPlusOld))
In [102]:
ax = plt.axes()
#hist(draMinusOld, bins='knuth', ax=ax, histtype='stepfilled', ec='k', fc='yellow')
#hist(draPlusOld, bins='knuth', ax=ax, histtype='stepfilled', ec='k', fc='#AAAAAA')
plt.hist(draMinusNew, bins='auto', histtype='stepfilled', ec='k', fc='yellow')
plt.hist(draPlusNew, bins='auto', histtype='stepfilled', ec='red', fc='blue')
ax.set_xlabel('dRA (arcsec)')
ax.set_ylabel('N(r, r+dr)')
ax.set_xlim(-0.6, 0.6)
plt.show()
print(sigG(draMinusNew))
print(sigG(draPlusNew))
In [103]:
print(np.median(d2dG.arcsec), np.max(d2dG.arcsec))
In [10]:
### code for generating new quantities, such as dra, ddec, colors, differences in mags, etc
def derivedColumns(matches):
matches['dra'] = (matches['ra_new']-matches['ra_old'])*3600
matches['ddec'] = (matches['dec_new']-matches['dec_old'])*3600
matches['ra'] = matches['ra_old']
ra = matches['ra']
matches['raW'] = np.where(ra > 180, ra-360, ra)
matches['dec'] = matches['dec_old']
matches['u'] = matches['u_mMed_old']
matches['g'] = matches['g_mMed_old']
matches['r'] = matches['r_mMed_old']
matches['i'] = matches['i_mMed_old']
matches['z'] = matches['z_mMed_old']
matches['ug'] = matches['u_mMed_old'] - matches['g_mMed_old']
matches['gr'] = matches['g_mMed_old'] - matches['r_mMed_old']
matches['ri'] = matches['r_mMed_old'] - matches['i_mMed_old']
matches['gi'] = matches['g_mMed_old'] - matches['i_mMed_old']
matches['du'] = matches['u_mMed_old'] - matches['u_mMed_new']
matches['dg'] = matches['g_mMed_old'] - matches['g_mMed_new']
matches['dr'] = matches['r_mMed_old'] - matches['r_mMed_new']
matches['di'] = matches['i_mMed_old'] - matches['i_mMed_new']
matches['dz'] = matches['z_mMed_old'] - matches['z_mMed_new']
return
derivedColumns(new_old)
In [11]:
fig,ax = plt.subplots(1,1,figsize=(8,6))
ax.scatter(new_old['dra'], new_old['ddec'], s=0.01, c='blue')
ax.set_xlim(-10,10)
ax.set_ylim(-10,10)
ax.set_xlabel('dRA (arcsec)')
ax.set_ylabel('dDec (arcsec)')
print(np.median(new_old['dra']))
print(np.std(new_old['dra']))
print(np.median(new_old['ddec']))
print(np.std(new_old['ddec']))
In [12]:
ax = plt.axes()
hist(new_old['dra'], bins='knuth', ax=ax, histtype='stepfilled', ec='k', fc='#AAAAAA')
ax.set_xlabel('dRA (arcsec)')
ax.set_ylabel('N(r, r+dr)')
ax.set_xlim(-0.6, 0.6)
plt.show()
sigG(new_old['dra'])
Out[12]:
In [13]:
ax = plt.axes()
hist(new_old['ddec'], bins='knuth', ax=ax, histtype='stepfilled', ec='k', fc='#AAAAAA')
ax.set_xlabel('dDec (arcsec)')
ax.set_ylabel('N(r, r+dr)')
ax.set_xlim(-0.15, 0.15)
plt.show()
sigG(new_old['ddec'])
Out[13]:
In [14]:
## split abs(dRA) to <0.03, 0.1-0.15 and 0.3-0.4 and look for correlations with ra/dec, color, mag, distance...
adra = np.abs(new_old['dra'])
s1 = new_old[adra<0.03]
s2 = new_old[(adra>0.10)&(adra<0.15)]
s3 = new_old[(adra>0.30)&(adra<0.40)]
In [15]:
print(np.size(s1),np.size(s2),np.size(s3))
In [16]:
# quick plot - compare three subsamples
def qp3(d1, d2, d3, Xstr, Xmin, Xmax, Ystr, Ymin, Ymax):
ax = plt.axes()
ax.set_xlabel(Xstr)
ax.set_ylabel(Ystr)
ax.scatter(d1[Xstr], d1[Ystr], s=0.01, c='blue')
ax.scatter(d2[Xstr], d2[Ystr], s=0.01, c='red')
ax.scatter(d3[Xstr], d3[Ystr], s=0.01, c='green')
ax.set_xlim(Xmin, Xmax)
ax.set_ylim(Ymin, Ymax)
plt.show()
return
In [17]:
qp3(s1, s2, s3, 'gi', -1.0, 4.5, 'g', 24, 13)
In [18]:
qp3(s1, s2, s3, 'ug', -0.6, 3.5, 'gr', -0.7, 2.2)
In [19]:
qp3(s1, s2, s3, 'gr', -0.7, 2.2, 'ri', -0.7, 2.5)
In [20]:
qp3(s1, s2, s3, 'raW', -60, 70, 'dec', -1.3, 1.3)
In [21]:
ax = plt.axes()
hist(s1['raW'], bins='knuth', ax=ax, histtype='stepfilled', ec='k', fc='blue')
hist(s2['raW'], bins='knuth', ax=ax, histtype='stepfilled', ec='k', fc='red')
hist(s3['raW'], bins='knuth', ax=ax, histtype='stepfilled', ec='k', fc='green')
ax.set_xlabel('RA')
ax.set_ylabel('N(r, r+dr)')
ax.set_xlim(-60, 65)
plt.show()
In [22]:
draPlus = new_old['dra'][new_old['raW']>0]
draMinus = new_old['dra'][new_old['raW']<0]
ax = plt.axes()
hist(draMinus, bins='knuth', ax=ax, histtype='stepfilled', ec='k', fc='yellow')
hist(draPlus, bins='knuth', ax=ax, histtype='stepfilled', ec='k', fc='#AAAAAA')
ax.set_xlabel('dRA (arcsec)')
ax.set_ylabel('N(r, r+dr)')
ax.set_xlim(-0.6, 0.6)
plt.show()
print(sigG(draMinus))
print(sigG(draPlus))
In [129]:
m1 = new_old[(new_old['sep_2d_arcsec'] < 0.5)]
a1 = m1['g_Nobs_new']
a2 = m1['r_Nobs_new']
a3 = m1['i_Nobs_new']
matched = m1[(a1>3)&(a2>3)&(a3>3)]
print(len(new_old))
print(len(m1))
print(len(matched))
In [136]:
mm = m1[(a1<4)]
print(len(mm))
print(np.median(m1['g_mMed_old']), np.median(mm['g_mMed_old']),)
In [64]:
def checkNobs(d, band):
str1 = band + '_Nobs_old'
arr1 = matched[str1]
print(band, 'band, OLD:')
printStats(arr1)
str1 = band + '_Nobs_new'
arr1 = matched[str1]
print(' NEW:')
printStats(arr1)
print(' DIFF:')
str2 = 'd' + band
arr2 = matched[str2]
printStats(arr2)
return
def printStats(arr):
print(' ', np.min(arr), np.median(arr), np.max(arr))
return
In [66]:
checkNobs(matched, 'g')
checkNobs(matched, 'r')
checkNobs(matched, 'i')
In [71]:
matchedOKu = matched[(matched['u_Nobs_old']>3)&(matched['u_Nobs_new']>3)]
matchedOKz = matched[(matched['z_Nobs_old']>3)&(matched['u_Nobs_new']>3)]
print(len(matched))
print(len(matchedOKu))
print(len(matchedOKz))
checkNobs(matchedOKu, 'u')
checkNobs(matchedOKz, 'z')
In [48]:
n1, b1, p1 = plt.hist(matched['u_Nobs_new'], bins='auto', histtype='stepfilled', ec='red', fc='yellow')
n2, b2, p2 = plt.hist(matched['u_Nobs_old'], bins='auto', histtype='stepfilled', ec='k', fc='#AAAAAA')
In [33]:
ax = plt.axes()
n1, b1, p1 = plt.hist(matched['u_Nobs_new'], bins='auto', histtype='stepfilled', ec='red', fc='yellow')
n2, b2, p2 = plt.hist(matched['u_Nobs_old'], bins='auto', histtype='stepfilled', ec='blue', fc='yellow')
ax.set_xlabel('Nobs(u)')
ax.set_ylabel('dN/dNobs')
ax.set_xlim(1, 42)
plt.show()
In [118]:
dmMax = 0.15
dgOK = matched['dg'][np.abs(matched['dg'])<dmMax]
ax = plt.axes()
plt.hist(dgOK, bins='auto', histtype='stepfilled', ec='k', fc='#AAAAAA')
ax.set_xlabel('dg (mag, old-new)')
ax.set_ylabel('n')
ax.set_xlim(-0.15, 0.15)
plt.show()
print(np.std(matched['dg']))
print(sigG(matched['dg']))
print(np.std(dgOK))
print(sigG(dgOK))
In [25]:
qpBM(matched, 'raW', -51.5, 60.0, 'dg', -0.1, 0.1, 500)
In [40]:
qpBM(matched, 'dec', -1.3, 1.3, 'dg', -0.1, 0.1, 120)
In [119]:
qpBM(matched, 'dec', -1.3, 1.3, 'dg', -0.025, 0.025, 120)
In [43]:
qpBM(matched, 'g', 14, 23.0, 'dg', -0.1, 0.1, 150)
In [45]:
qpBM(matched, 'gi', -0.7, 3.5, 'dg', -0.1, 0.1, 150)
In [109]:
qpBM(matchedOKu, 'u', 15, 23, 'du', -0.2, 0.2, 120)
In [123]:
matchedOKu2 = matchedOKu[(matchedOKu['u']>15.5)&(matchedOKu['u']<21.5)]
print(len(matchedOKu))
print(len(matchedOKu2))
print(np.median(matchedOKu['du']), np.median(matchedOKu2['du']))
print(np.median(matched['dg']),np.median(matched['dr']),np.median(matched['di']))
In [125]:
qpBM(matchedOKu2, 'dec', -1.3, 1.3, 'du', -0.05, 0.05, 120)
In [126]:
qpBM(matched, 'dec', -1.3, 1.3, 'dr', -0.025, 0.025, 120)
In [99]:
Gr = gaia_matched['Gmag'] - gaia_matched['r_mMed']
gi = gaia_matched['g_mMed'] - gaia_matched['i_mMed']
GrOK = gaia_matchedOK['Gmag'] - gaia_matchedOK['r_mMed']
giOK = gaia_matchedOK['g_mMed'] - gaia_matchedOK['i_mMed']
# medians
#xBin, nPts, medianBin, sigGbin = fitMedians(gi, Gr, -0.7, 4.0, 47, 0)
#xBinOK, nPtsOK, medianBinOK, sigGbinOK = fitMedians(giOK, GrOK, -0.2, 3.2, 34, 0)
xBin, nPts, medianBin, sigGbin = fitMedians(gi, Gr, 0.0, 3.0, 30, 0)
xBinOK, nPtsOK, medianBinOK, sigGbinOK = fitMedians(giOK, GrOK, 0.0, 3.0, 30, 0)
#print xBin, nPts, medianBin, sigGbin
medOK1 = medianBin[(xBin>2)&(xBin<3)]
medOK2 = medianBinOK[(xBinOK>2)&(xBinOK<3)]
dmedOK = medOK2 - medOK1
#print dmedOK
In [100]:
from scipy import stats
from scipy import optimize
# this function computes polynomial models given some data x
# and parameters theta
def polynomial_fit(theta, x):
"""Polynomial model of degree (len(theta) - 1)"""
return sum(t * x ** n for (n, t) in enumerate(theta))
# compute the data log-likelihood given a model
def logL(data, theta, model=polynomial_fit):
"""Gaussian log-likelihood of the model at theta"""
x, y, sigma_y = data
y_fit = model(theta, x)
return sum(stats.norm.logpdf(*args)
for args in zip(y, y_fit, sigma_y))
# a direct optimization approach is used to get best model
# parameters (which minimize -logL)
def best_theta(data, degree, model=polynomial_fit):
theta_0 = (degree + 1) * [0]
neg_logL = lambda theta: -logL(data, theta, model)
return optimize.fmin_bfgs(neg_logL, theta_0, disp=False)
In [101]:
data = np.array([xBin, medianBin, sigGbin])
x, y, sigma_y = data
theta1 = best_theta(data,1)
In [208]:
from scipy import stats
from scipy import optimize
# this function computes a linear combination of 4 functions
# given parameters theta
def linear_fit(coeffs, x, w, y, z):
ffit = coeffs[0]*x + coeffs[1]*w + coeffs[2]*y + coeffs[3]*z
return ffit
# compute the data log-likelihood given a model
def logL(dataL, coeffs, model=linear_fit):
"""Gaussian log-likelihood of the model at theta"""
x, w, y, z, f, sigma_f = dataL
f_fit = model(coeffs, x, w, y, z)
return sum(stats.norm.logpdf(*args)
for args in zip(f, f_fit, sigma_f))
# a direct optimization approach is used to get best model
# parameters (which minimize -logL)
def best_lintheta(dataL, degree=4, model=linear_fit):
coeffs_0 = degree * [0]
neg_logL = lambda coeffs: -logL(dataL, coeffs, model)
return optimize.fmin_bfgs(neg_logL, coeffs_0, disp=False)
In [209]:
GmagOK = gaia_matchedOK['Gmag']
GG = 25.525-2.5*np.log10(gaia_matchedOK['flux'])
dG = GG - GmagOK
print np.median(dG)
print np.std(dG)
print np.median(gaia_matchedOK['flux'])
In [210]:
ra = gaia_matchedOK['ra_gaia']
raW = np.where(ra > 180, ra-360, ra)
gi = gaia_matchedOK['g_mMed'] - gaia_matchedOK['i_mMed']
rMed = gaia_matchedOK['r_mMed']
flagOK = ((raW > -10) & (raW < 50) & (rMed>16) & (rMed<19) & (gi>0) & (gi<3.0))
Gmag = gaia_matchedOK['Gmag']
flagOK2 = (flagOK & (Gmag > 16) & (Gmag < 16.5))
gaia_matchedOK2 = gaia_matchedOK[flagOK2]
print(len(gaia_matchedOK))
print(len(gaia_matchedOK2))
In [211]:
fluxGaia = gaia_matchedOK2['flux']
fluxGaiaErr = gaia_matchedOK2['fluxErr']
gFlux = 10**(0.4*gaia_matchedOK2['g_mMean'])
rFlux = 10**(0.4*gaia_matchedOK2['r_mMean'])
iFlux = 10**(0.4*gaia_matchedOK2['i_mMean'])
zFlux = 10**(0.4*gaia_matchedOK2['z_mMean'])
dataL = np.array([gFlux, rFlux, iFlux, zFlux, fluxGaia, fluxGaiaErr])
x, w, y, z, f, sigma_f = dataL
coeffs1 = best_lintheta(dataL)
ffit = linear_fit(coeffs1, x, w, y, z)
dmag = -2.5*np.log10(ffit/f)
In [215]:
print np.median(dmag)
print np.std(dmag)
print coeffs1
print np.median(f)
print np.std(f)
print np.median(ffit)
print np.std(ffit)
In [214]:
fluxGaia = gaia_matchedOK['flux']
gFlux = 10**(0.4*gaia_matchedOK['g_mMean'])
rFlux = 10**(0.4*gaia_matchedOK['r_mMean'])
iFlux = 10**(0.4*gaia_matchedOK['i_mMean'])
zFlux = 10**(0.4*gaia_matchedOK['z_mMean'])
ffitOK = linear_fit(coeffs1, gFlux, rFlux, iFlux, zFlux)
dmagOK = -2.5*np.log10(ffitOK/fluxGaia)
print np.median(dmagOK)
print np.std(dmagOK)
In [102]:
data = np.array([xBin, medianBin, sigGbin])
Ndata = xBin.size
# get best-fit parameters for linear, quadratic and cubic models
theta1 = best_theta(data,3)
theta2 = best_theta(data,5)
theta3 = best_theta(data,7)
# generate best fit lines on a fine grid
xfit = np.linspace(-1.1, 4.3, 1000)
yfit1 = polynomial_fit(theta1, xfit)
yfit2 = polynomial_fit(theta2, xfit)
yfit3 = polynomial_fit(theta3, xfit)
# and compute chi2 per degree of freedom: sum{[(y-yfit)/sigma_y]^2}
chi21 = np.sum(((y-polynomial_fit(theta1, x))/sigma_y)**2)
chi22 = np.sum(((y-polynomial_fit(theta2, x))/sigma_y)**2)
chi23 = np.sum(((y-polynomial_fit(theta3, x))/sigma_y)**2)
# the number of fitted parameters is 2, 3, 4
chi2dof1 = chi21/(Ndata - 2)
chi2dof2 = chi22/(Ndata - 3)
chi2dof3 = chi23/(Ndata - 4)
In [103]:
print "CHI2:"
print ' best linear model:', chi21
print 'best quadratic model:', chi22
print ' best cubic model:', chi23
print "CHI2 per degree of freedom:"
print ' best linear model:', chi2dof1
print 'best quadratic model:', chi2dof2
print ' best cubic model:', chi2dof3
In [104]:
# Plot a (gaia - r) vs (g-i) for photometric transformation
%matplotlib inline
# plot
fig,ax = plt.subplots(1,1,figsize=(12,8))
ax.scatter(gi, Gr, s=0.01, c='blue')
ax.scatter(giOK, GrOK, s=0.01, c='red')
# medians
ax.scatter(xBin, medianBin, s=30.0, c='black', alpha=0.5)
ax.scatter(xBinOK, medianBinOK, s=30.0, c='yellow', alpha=0.5)
ax.set_xlim(-1,4)
ax.set_ylim(-2.0,0.5)
ax.set_xlabel('SDSS(g-i)')
ax.set_ylabel('Gaia G - SDSS r')
ax.errorbar(x, y, sigma_y, fmt='ok', ecolor='gray')
ax.plot(xfit, polynomial_fit(theta1, xfit), label='best P3 model')
ax.plot(xfit, polynomial_fit(theta2, xfit), label='best P5 model')
ax.plot(xfit, polynomial_fit(theta3, xfit), label='best P7 model')
ax.legend(loc='best', fontsize=14)
Out[104]:
In [105]:
print theta3
In [155]:
# GrModel = -0.06348 -0.03111*gi +0.08643*gi*gi -0.05593*gi*gi*gi
GrModel = sum(t * gi ** n for (n, t) in enumerate(theta3))
GrResid = Gr - GrModel
xBinM, nPtsM, medianBinM, sigGbinM = fitMedians(gi, GrResid, -0.7, 4.0, 47, 0)
In [157]:
fig,ax = plt.subplots(1,1,figsize=(12,8))
ax.scatter(gi, GrResid, s=0.01, c='blue')
# medians
ax.scatter(xBinM, medianBinM, s=30.0, c='black', alpha=0.8)
ax.scatter(xBinM, medianBinM, s=20.0, c='yellow', alpha=0.3)
TwoSigP = medianBinM + 2*sigGbinM
TwoSigM = medianBinM - 2*sigGbinM
ax.plot(xBinM, TwoSigP, c='yellow')
ax.plot(xBinM, TwoSigM, c='yellow')
rmsBin = np.sqrt(nPtsM) / np.sqrt(np.pi/2) * sigGbinM
rmsP = medianBinM + rmsBin
rmsM = medianBinM - rmsBin
ax.plot(xBinM, rmsP, c='cyan')
ax.plot(xBinM, rmsM, c='cyan')
ax.set_xlim(-1,4.2)
ax.set_ylim(-0.14,0.14)
ax.set_xlabel('g-i')
ax.set_ylabel('residuals for (Gaia G - SDSS r)')
xL = np.linspace(-10,10)
ax.plot(xL, 0*xL+0.00, c='red')
ax.plot(xL, 0*xL+0.01, c='red')
ax.plot(xL, 0*xL-0.01, c='red')
ax.plot(0*xL+0.4, xL, c='yellow')
ax.plot(0*xL+2.0, xL, c='yellow')
Out[157]:
In [108]:
medOK = medianBinM[(xBinM>0.4)&(xBinM<2.0)]
In [109]:
print medOK
In [110]:
np.median(medOK)
Out[110]:
In [111]:
np.std(medOK)
Out[111]:
In [112]:
np.max(medOK)
Out[112]:
In [113]:
np.min(medOK)
Out[113]:
In [169]:
residOK = GrResid[(gi>0.4)&(gi<2.0)&(raW>-10)&(raW<50)]
magOK = rMed[(gi>0.4)&(gi<2.0)&(raW>-10)&(raW<50)]
gaiaG = gaia_matched['Gmag']
GOK = gaiaG[(gi>0.4)&(gi<2.0)&(raW>-10)&(raW<50)]
print np.median(residOK)
print np.std(residOK)
In [115]:
xBinMg, nPtsMg, medianBinMg, sigGbinMg = fitMedians(magOK, residOK, 14, 20.5, 65, 0)
In [158]:
fig,ax = plt.subplots(1,1,figsize=(8,6))
ax.scatter(magOK, residOK, s=0.01, c='blue')
# medians
ax.scatter(xBinMg, medianBinMg, s=30.0, c='black', alpha=0.8)
ax.scatter(xBinMg, medianBinMg, s=20.0, c='yellow', alpha=0.3)
ax.set_xlim(13,23)
ax.set_ylim(-0.15,0.15)
ax.set_xlabel('SDSS r')
ax.set_ylabel('residuals for $G-G_{SDSS}$ ')
xL = np.linspace(-10,30)
ax.plot(xL, 0*xL+0.00, c='yellow')
ax.plot(xL, 0*xL+0.01, c='red')
ax.plot(xL, 0*xL-0.01, c='red')
Out[158]:
In [117]:
print medianBinMg
In [123]:
fig,ax = plt.subplots(1,1,figsize=(8,6))
ax.scatter(magOK, residOK, s=0.01, c='blue')
# medians
ax.scatter(xBinMg, medianBinMg, s=30.0, c='black', alpha=0.8)
ax.scatter(xBinMg, medianBinMg, s=20.0, c='yellow', alpha=0.3)
ax.set_xlim(13,23)
ax.set_ylim(-0.05,0.05)
ax.set_xlabel('SDSS r')
ax.set_ylabel('residuals for $G-G_{SDSS}$ ')
xL = np.linspace(-10,30)
ax.plot(xL, 0*xL+0.00, c='yellow')
ax.plot(xL, 0*xL+0.01, c='red')
ax.plot(xL, 0*xL-0.01, c='red')
Out[123]:
In [161]:
print magOK.size
print GOK.size
xBinMg, nPtsMg, medianBinMg, sigGbinMg = fitMedians(GOK, residOK, 14, 20.5, 130, 0)
In [162]:
fig,ax = plt.subplots(1,1,figsize=(8,6))
ax.scatter(GOK, residOK, s=0.01, c='blue')
# medians
ax.scatter(xBinMg, medianBinMg, s=30.0, c='black', alpha=0.8)
ax.scatter(xBinMg, medianBinMg, s=20.0, c='yellow', alpha=0.3)
TwoSigP = medianBinMg + 2*sigGbinMg
TwoSigM = medianBinMg - 2*sigGbinMg
ax.plot(xBinMg, TwoSigP, c='yellow')
ax.plot(xBinMg, TwoSigM, c='yellow')
rmsBin = np.sqrt(nPtsMg) / np.sqrt(np.pi/2) * sigGbinMg
rmsP = medianBinMg + rmsBin
rmsM = medianBinMg - rmsBin
ax.plot(xBinMg, rmsP, c='cyan')
ax.plot(xBinMg, rmsM, c='cyan')
ax.set_xlim(13,23)
ax.set_ylim(-0.05,0.05)
ax.set_xlabel('Gaia G mag')
ax.set_ylabel('residuals for $G_{Gaia}-G_{SDSS}$ ')
xL = np.linspace(-10,30)
ax.plot(xL, 0*xL+0.00, c='cyan')
ax.plot(xL, 0*xL+0.01, c='red')
ax.plot(xL, 0*xL-0.01, c='red')
Out[162]:
In [181]:
gi = gaia_matched['g_mMed'] - gaia_matched['i_mMed']
ra = gaia_matched['ra_gaia']
raW = np.where(ra > 180, ra-360, ra)
flux = gaia_matched['flux']
fluxErr = gaia_matched['fluxErr']
fluxOK = flux[(gi>0.4)&(gi<2.0)&(raW>-10)&(raW<50)]
fluxErrOK = fluxErr[(gi>0.4)&(gi<2.0)&(raW>-10)&(raW<50)]
rBandErr = gaia_matched['r_mErr']
rBandErrOK = rBandErr[(gi>0.4)&(gi<2.0)&(raW>-10)&(raW<50)]
## Gaia's errors underestimated by a factor of ~2
sigma = np.sqrt((2*fluxErrOK/fluxOK)**2 + 1*rBandErrOK**2)
chi = residOK / sigma
xBinMg, nPtsMg, medianBinMg, sigGbinMg = fitMedians(GOK, chi, 14, 20.5, 130, 0)
fig,ax = plt.subplots(1,1,figsize=(8,6))
ax.scatter(GOK, chi, s=0.01, c='blue')
# medians
ax.scatter(xBinMg, medianBinMg, s=30.0, c='black', alpha=0.8)
ax.scatter(xBinMg, medianBinMg, s=20.0, c='yellow', alpha=0.3)
TwoSigP = medianBinMg + 2*sigGbinMg
TwoSigM = medianBinMg - 2*sigGbinMg
ax.plot(xBinMg, TwoSigP, c='yellow')
ax.plot(xBinMg, TwoSigM, c='yellow')
rmsBin = np.sqrt(nPtsMg) / np.sqrt(np.pi/2) * sigGbinMg
rmsP = medianBinMg + rmsBin
rmsM = medianBinMg - rmsBin
ax.plot(xBinMg, rmsP, c='cyan')
ax.plot(xBinMg, rmsM, c='cyan')
ax.set_xlim(13,23)
ax.set_ylim(-5,5)
ax.set_xlabel('Gaia G mag')
ax.set_ylabel('($G_{Gaia}-G_{SDSS}$)/$\sigma$')
xL = np.linspace(-10,30)
ax.plot(xL, 0*xL+0.00, c='cyan')
ax.plot(xL, 0*xL+2, c='red')
ax.plot(xL, 0*xL-2, c='red')
Out[181]:
In [179]:
Gerr = fluxErrOK/fluxOK
print np.median(Gerr)
print np.median(rBandErrOK)
In [166]:
residOK2 = residOK[(magOK>15)&(magOK<16)]
print np.median(residOK2)
mm = medianBinMg[(xBinMg>15)&(xBinMg<16)]
xx = xBinMg[(xBinMg>15)&(xBinMg<16)]
print mm
print xx
print "transition at G ~ 15.6"
In [130]:
## conclusions
# 1) select:(-10 < RA < 50) & (16 < SDSSr < 19) & (0.4< g-i < 2.0)
thetaFinal = theta3
print thetaFinal
In [142]:
rMed = gaia_matched['r_mMed']
gi = gaia_matched['g_mMed'] - gaia_matched['i_mMed']
ra = gaia_matched['ra_gaia']
raW = np.where(ra > 180, ra-360, ra)
flagOK = ((raW > -10) & (raW < 50) & (rMed>16) & (rMed<18) & (gi>0) & (gi<3.0))
# flagOK = ((raW > -10) & (raW < 50) & (rMed>16) & (rMed<19) & (gi>0) & (gi<3.0))
gaia_matchedOK = gaia_matched[flagOK]
print(len(gaia_matchedOK))
In [143]:
giOK = gaia_matchedOK['g_mMed'] - gaia_matchedOK['i_mMed']
GrOK = gaia_matchedOK['Gmag'] - gaia_matchedOK['r_mMed']
GmagOK = gaia_matchedOK['Gmag']
GrModel = sum(t * giOK ** n for (n, t) in enumerate(theta3))
GrResid = GrOK - GrModel
print np.median(GrResid)
print np.std(GrResid)
print np.min(GrResid)
print np.max(GrResid)
In [144]:
#xBinMg, nPtsMg, medianBinMg, sigGbinMg = fitMedians(GmagOK, GrResid, 16, 18.8, 14, 0)
xBinMg, nPtsMg, medianBinMg, sigGbinMg = fitMedians(GmagOK, GrResid, 16, 17.8, 14, 0)
In [145]:
fig,ax = plt.subplots(1,1,figsize=(8,6))
ax.scatter(GmagOK, GrResid, s=0.01, c='blue')
# medians
ax.scatter(xBinMg, medianBinMg, s=30.0, c='black', alpha=0.9)
ax.scatter(xBinMg, medianBinMg, s=15.0, c='yellow', alpha=0.5)
ax.set_xlim(15.5,19)
ax.set_ylim(-0.07,0.07)
ax.set_xlabel('Gaia G mag')
ax.set_ylabel('Gr residuals')
xL = np.linspace(-10,30)
ax.plot(xL, 0*xL+0.00, c='yellow')
ax.plot(xL, 0*xL+0.01, c='red')
ax.plot(xL, 0*xL-0.01, c='red')
Out[145]:
In [146]:
print np.median(medianBinMg)
print np.std(medianBinMg)
In [147]:
GrResidN = GrResid - np.median(medianBinMg)
In [148]:
ra = gaia_matchedOK['ra_sdss']
raW = np.where(ra > 180, ra-360, ra)
fig,ax = plt.subplots(1,1,figsize=(8,6))
ax.scatter(raW, GrResidN, s=0.01, c='blue')
ax.set_xlim(-12,52)
ax.set_ylim(-0.06,0.06)
ax.set_xlabel('R.A.')
ax.set_ylabel('Gmag residual')
xBin, nPts, medianBin, sigGbin = fitMedians(raW, GrResidN, -10, 50, 60, 0)
ax.scatter(xBin, medianBin, s=30.0, c='black', alpha=0.9)
ax.scatter(xBin, medianBin, s=15.0, c='yellow', alpha=0.5)
TwoSigP = medianBin + 2*sigGbin
TwoSigM = medianBin - 2*sigGbin
ax.plot(xBin, TwoSigP, c='yellow')
ax.plot(xBin, TwoSigM, c='yellow')
xL = np.linspace(-100,100)
ax.plot(xL, 0*xL+0.00, c='yellow')
ax.plot(xL, 0*xL+0.01, c='red')
ax.plot(xL, 0*xL-0.01, c='red')
Out[148]:
In [149]:
print np.median(medianBin)
print np.std(medianBin)
print np.min(medianBin)
print np.max(medianBin)
In [153]:
dec = gaia_matchedOK['dec_sdss']
fig,ax = plt.subplots(1,1,figsize=(8,6))
ax.scatter(dec, GrResidN, s=0.01, c='blue')
ax.set_xlim(-1.3,1.3)
ax.set_ylim(-0.06,0.06)
ax.set_xlabel('Declination')
ax.set_ylabel('Gmag residual')
xBin, nPts, medianBin, sigGbin = fitMedians(dec, GrResidN, -1.2, 1.2, 120, 0)
ax.scatter(xBin, medianBin, s=30.0, c='black', alpha=0.9)
ax.scatter(xBin, medianBin, s=15.0, c='yellow', alpha=0.5)
TwoSigP = medianBin + 2*sigGbin
TwoSigM = medianBin - 2*sigGbin
ax.plot(xBin, TwoSigP, c='yellow')
ax.plot(xBin, TwoSigM, c='yellow')
xL = np.linspace(-100,100)
ax.plot(xL, 0*xL+0.00, c='yellow')
ax.plot(xL, 0*xL+0.01, c='red')
ax.plot(xL, 0*xL-0.01, c='red')
for i in range(1,12):
decCol = -1.2655 + i*0.2109
ax.plot(0*xL+decCol, xL, c='red')
In [151]:
print np.median(medianBin)
print np.std(medianBin)
print np.min(medianBin)
print np.max(medianBin)
In [ ]: