In [1]:
%matplotlib inline
from astropy.table import Table
from astropy.coordinates import SkyCoord
from astropy import units as u
from astropy.table import hstack
import matplotlib.pyplot as plt
import numpy as np
from astroML.plotting import hist
# for astroML installation see https://www.astroml.org/user_guide/installation.html
In [ ]:
## automatically reload any modules read below that might have changed (e.g. plots)
%load_ext autoreload
%autoreload 2
# importing ZI tools:
import ZItools as zit
In [2]:
### selection tools and numerical analysis
# robust standard deviation
def sigG(arr):
return 0.741*(np.quantile(arr, 0.75)-np.quantile(arr, 0.25))
def checkNobs(d, band):
str1 = band + '_Nobs_old'
arr1 = d[str1]
print(band, 'band, OLD:')
printStats(arr1)
str1 = band + '_Nobs_new'
arr1 = d[str1]
print(' NEW:')
printStats(arr1)
print(' DIFF:')
str2 = 'd' + band
arr2 = d[str2]
printStats(arr2)
return
def printStats(arr):
print(' ', np.min(arr), np.mean(arr), np.median(arr), np.max(arr), np.size(arr))
return
# given vectors x and y, fit medians in bins from xMin to xMax, with Nbin steps,
# and return xBin, medianBin, medianErrBin
def fitMedians(x, y, xMin, xMax, Nbin, verbose=1):
# first generate bins
xEdge = np.linspace(xMin, xMax, (Nbin+1))
xBin = np.linspace(0, 1, Nbin)
nPts = 0*np.linspace(0, 1, Nbin)
medianBin = 0*np.linspace(0, 1, Nbin)
sigGbin = -1+0*np.linspace(0, 1, Nbin)
for i in range(0, Nbin):
xBin[i] = 0.5*(xEdge[i]+xEdge[i+1])
yAux = y[(x>xEdge[i])&(x<=xEdge[i+1])]
if (yAux.size > 0):
nPts[i] = yAux.size
medianBin[i] = np.median(yAux)
# robust estimate of standard deviation: 0.741*(q75-q25)
sigmaG = 0.741*(np.percentile(yAux,75)-np.percentile(yAux,25))
# uncertainty of the median: sqrt(pi/2)*st.dev/sqrt(N)
sigGbin[i] = np.sqrt(np.pi/2)*sigmaG/np.sqrt(nPts[i])
else:
nPts[i] = 0
medianBin[i] = 0
sigGbin[i] = 0
if (verbose):
print('median:', np.median(medianBin[nPts>0]), 'std.dev:', np.std(medianBin[nPts>0]))
return xBin, nPts, medianBin, sigGbin
In [3]:
## for Gaia-related analysis
from scipy import stats
from scipy import optimize
# this function computes polynomial models given some data x
# and parameters theta
def polynomial_fit(theta, x):
"""Polynomial model of degree (len(theta) - 1)"""
return sum(t * x ** n for (n, t) in enumerate(theta))
# compute the data log-likelihood given a model
def logL(data, theta, model=polynomial_fit):
"""Gaussian log-likelihood of the model at theta"""
x, y, sigma_y = data
y_fit = model(theta, x)
return sum(stats.norm.logpdf(*args)
for args in zip(y, y_fit, sigma_y))
# a direct optimization approach is used to get best model
# parameters (which minimize -logL)
def best_theta(data, degree, model=polynomial_fit):
theta_0 = (degree + 1) * [0]
neg_logL = lambda theta: -logL(data, theta, model)
return optimize.fmin_bfgs(neg_logL, theta_0, disp=False)
# this function computes a linear combination of 4 functions
# given parameters theta
def linear_fit(coeffs, x, w, y, z):
ffit = coeffs[0]*x + coeffs[1]*w + coeffs[2]*y + coeffs[3]*z
return ffit
# compute the data log-likelihood given a model
def logL2(dataL, coeffs, model=linear_fit):
"""Gaussian log-likelihood of the model at theta"""
x, w, y, z, f, sigma_f = dataL
f_fit = model(coeffs, x, w, y, z)
return sum(stats.norm.logpdf(*args)
for args in zip(f, f_fit, sigma_f))
# a direct optimization approach is used to get best model
# parameters (which minimize -logL)
def best_lintheta(dataL, degree=4, model=linear_fit):
coeffs_0 = degree * [0]
neg_logL = lambda coeffs: -logL2(dataL, coeffs, model)
return optimize.fmin_bfgs(neg_logL, coeffs_0, disp=False)
In [4]:
### plots
# quick plot
def qp(d, Xstr, Xmin, Xmax, Ystr, Ymin, Ymax):
ax = plt.axes()
ax.set_xlabel(Xstr)
ax.set_ylabel(Ystr)
ax.scatter(d[Xstr], d[Ystr], s=0.01, c='blue')
ax.set_xlim(Xmin, Xmax)
ax.set_ylim(Ymin, Ymax)
plt.show()
return
# quick plot - compare three subsamples
def qp3(d1, d2, d3, Xstr, Xmin, Xmax, Ystr, Ymin, Ymax):
ax = plt.axes()
ax.set_xlabel(Xstr)
ax.set_ylabel(Ystr)
ax.scatter(d1[Xstr], d1[Ystr], s=0.01, c='green')
ax.scatter(d2[Xstr], d2[Ystr], s=0.01, c='red')
ax.scatter(d3[Xstr], d3[Ystr], s=0.01, c='blue')
ax.set_xlim(Xmin, Xmax)
ax.set_ylim(Ymin, Ymax)
plt.show()
return
# quick plot - binned median
def qpBM(d, Xstr, Xmin, Xmax, Ystr, Ymin, Ymax, nBin, Nsigma=3, offset=0.01):
print('medianAll:', np.median(d[Ystr]), 'std.dev.All:', sigG(d[Ystr]))
print('N=', np.size(d[Ystr]), 'min=', np.min(d[Ystr]), 'max=', np.max(d[Ystr]))
ax = plt.axes()
ax.scatter(d[Xstr], d[Ystr], s=0.01, c='black')
# binning
xBinM, nPtsM, medianBinM, sigGbinM = fitMedians(d[Xstr], d[Ystr], Xmin, Xmax, nBin, 1)
# plotting
ax.scatter(xBinM, medianBinM, s=30.0, c='black', alpha=0.8)
ax.scatter(xBinM, medianBinM, s=15.0, c='yellow', alpha=0.3)
#
TwoSigP = medianBinM + Nsigma*sigGbinM
TwoSigM = medianBinM - Nsigma*sigGbinM
ax.plot(xBinM, TwoSigP, c='yellow')
ax.plot(xBinM, TwoSigM, c='yellow')
#
rmsBin = np.sqrt(nPtsM) / np.sqrt(np.pi/2) * sigGbinM
rmsP = medianBinM + rmsBin
rmsM = medianBinM - rmsBin
ax.plot(xBinM, rmsP, c='cyan')
ax.plot(xBinM, rmsM, c='cyan')
#
xL = np.linspace(-100,100)
ax.plot(xL, 0*xL, c='red')
ax.plot(xL, 0*xL+offset, '--', c='red')
ax.plot(xL, 0*xL-offset, '--', c='red')
#
ax.set_xlabel(Xstr)
ax.set_ylabel(Ystr)
ax.set_xlim(Xmin, Xmax)
ax.set_ylim(Ymin, Ymax)
plt.show()
return
def qphist(arr, xMin, xMax, xLabel, verbose = False):
ax = plt.axes()
hist(arr, bins='knuth', ax=ax, histtype='stepfilled', ec='k', fc='#AAAAAA')
ax.set_xlabel(xLabel)
ax.set_ylabel('n')
ax.set_xlim(xMin, xMax)
plt.show()
if (verbose):
print('Min, max: ', np.min(arr),np.max(arr))
print('Mean, median: ', np.mean(arr),np.median(arr))
print('sigG, st.dev.: ', sigG(arr),np.std(arr))
return
def qpH0(arr, xMin, xMax, xLabel, nBins=0, verbose = False):
ax = plt.axes()
if (nBins>0):
hist, bins = np.histogram(arr, bins=nBins)
center = (bins[:-1]+bins[1:])/2
ax.plot(center, hist, drawstyle='steps', c='blue')
else:
plt.hist(arr, bins='auto', histtype='stepfilled', ec='k', fc='red')
ax.set_xlabel(xLabel)
ax.set_ylabel('n')
ax.set_xlim(xMin, xMax)
ax.plot([-1000, 1000], [0, 0], '--k')
plt.show()
if (verbose):
print('Min, max: ', np.min(arr),np.max(arr))
print('Mean, median: ', np.mean(arr),np.median(arr))
print('sigG, st.dev.: ', sigG(arr),np.std(arr))
return
def qpHdm(d, band, dmMax, xMin, xMax, nBins=50, verbose=False):
str = 'd' + band
dm = 1000*d[str]
dmOK = dm[np.abs(dm)<(1000*dmMax)]
xLabel = str + ' (milimag)'
qpH0(dmOK, 1000*xMin, 1000*xMax, xLabel, nBins, verbose)
return np.mean(dmOK), np.median(dmOK), sigG(dmOK)
def qp2hist(arr1, arr2, xMin, xMax, xLabel, nBins=0, verbose = False):
ax = plt.axes()
if (nBins>0):
hist, bins = np.histogram(arr1, bins=nBins)
center = (bins[:-1]+bins[1:])/2
ax.plot(center, hist, drawstyle='steps', c='red')
hist2, bins2 = np.histogram(arr2, bins=nBins)
center2 = (bins2[:-1]+bins2[1:])/2
ax.plot(center2, hist2, drawstyle='steps', c='blue')
else:
plt.hist(arr1, bins='auto', histtype='stepfilled', ec='k', fc='yellow')
plt.hist(arr2, bins='auto', histtype='stepfilled', ec='red', fc='blue')
ax.set_xlabel(xLabel)
ax.set_ylabel('n')
ax.set_xlim(xMin, xMax)
plt.show()
if (verbose):
print('Min: ', np.min(arr1),np.min(arr2))
print('Median: ', np.median(arr1),np.median(arr2))
print('sigG: ', sigG(arr1),sigG(arr2))
print('Max: ', np.max(arr1),np.max(arr2))
return
In [5]:
## plots involving Gaia data
In [6]:
ZIdataDir = "/Users/ivezic/Work/Science/CalibrationV2/Data"
# the original SDSS catalog from 2007
sdssOldCat = ZIdataDir + "/" + "stripe82calibStars_v2.6.dat"
# Karun's new catalog from 2020
sdssNewCat = ZIdataDir + "/" + "N2020_stripe82calibStars.dat"
readFormat = 'csv'
## ZI version based on Karun's version
sdssNewCatRecalib = ZIdataDir + "/" + "stripe82calibStars_v3.1.dat"
if (0):
# for testing new recalibrated catalog
sdssNewCat = sdssNewCatRecalib
readFormat = 'ascii'
# Gaia DR2
GaiaDR2Cat = ZIdataDir + "/" + "Stripe82_GaiaDR2.dat"
# for testing, 1% of the sample
GaiaDR2Cat1perc = ZIdataDir + "/" + "Stripe82_GaiaDR2_1percent.dat"
In [7]:
# both new and old files use identical data structure
colnamesSDSS = ['calib_fla', 'ra', 'dec', 'raRMS', 'decRMS', 'nEpochs', 'AR_val',
'u_Nobs', 'u_mMed', 'u_mMean', 'u_mErr', 'u_rms_scatt', 'u_chi2',
'g_Nobs', 'g_mMed', 'g_mMean', 'g_mErr', 'g_rms_scatt', 'g_chi2',
'r_Nobs', 'r_mMed', 'r_mMean', 'r_mErr', 'r_rms_scatt', 'r_chi2',
'i_Nobs', 'i_mMed', 'i_mMean', 'i_mErr', 'i_rms_scatt', 'i_chi2',
'z_Nobs', 'z_mMed', 'z_mMean', 'z_mErr', 'z_rms_scatt', 'z_chi2']
In [8]:
%%time
# old
sdssOld = Table.read(sdssOldCat, format='ascii', names=colnamesSDSS)
np.size(sdssOld)
Out[8]:
In [9]:
%%time
# new
sdssNewRaw = Table.read(sdssNewCat, format='ascii', names=colnamesSDSS)
# sdssNewRaw = Table.read(sdssNewCat, format='csv', names=colnamesSDSS)
# rejects objects with bad Declination, sample size from 1007136 to 1005470
sdssNew = sdssNewRaw[sdssNewRaw['dec']<90]
np.size(sdssNew)
Out[9]:
In [10]:
sdssNewCat
Out[10]:
In [11]:
sdssOld_coords = SkyCoord(ra = sdssOld['ra']*u.degree, dec= sdssOld['dec']*u.degree)
sdssNew_coords = SkyCoord(ra = sdssNew['ra']*u.degree, dec= sdssNew['dec']*u.degree)
# this is matching sdssNew to sdssOld, so that indices are into sdssNew catalog
# makes sense in this case since the sdssOld catalog is (a little bit) bigger
# than sdssNew (1006849 vs 1005470)
idx, d2d, d3d = sdssNew_coords.match_to_catalog_sky(sdssOld_coords)
In [12]:
# object separation is an object with units,
# I add that as a column so that one can
# select based on separation to the nearest matching object
new_old = hstack([sdssNew, sdssOld[idx]], table_names = ['new', 'old'])
new_old['sep_2d_arcsec'] = d2d.arcsec
# good matches between the old and new catalogs
MAX_DISTANCE_ARCSEC = 0.5
sdss = new_old[(new_old['sep_2d_arcsec'] < MAX_DISTANCE_ARCSEC)]
print(np.size(sdss))
In [13]:
colnamesGaia = ['ra', 'dec', 'nObs', 'Gmag', 'flux', 'fluxErr', 'pmra', 'pmdec']
if (1):
gaia = Table.read(GaiaDR2Cat, format='ascii', names=colnamesGaia)
else:
gaia = Table.read(GaiaDR2Cat1perc, format='ascii', names=colnamesGaia)
gaia['raG'] = gaia['ra']
gaia['decG'] = gaia['dec']
In [14]:
sdss_coords = SkyCoord(ra = sdss['ra_old']*u.degree, dec= sdss['dec_old']*u.degree)
gaia_coords = SkyCoord(ra = gaia['raG']*u.degree, dec= gaia['decG']*u.degree)
# this is matching gaia to sdss, so that indices are into sdss catalog
# makes sense in this case since the sdss catalog is bigger than gaia
idxG, d2dG, d3dG = gaia_coords.match_to_catalog_sky(sdss_coords)
# object separation is an object with units,
# I add that as a column so that one can
# select based on separation to the nearest matching object
gaia_sdss = hstack([gaia, sdss[idxG]], table_names = ['gaia', 'sdss'])
gaia_sdss['sepSG_2d_arcsec'] = d2dG.arcsec
In [15]:
### code for generating new quantities, such as dra, ddec, colors, differences in mags, etc
def derivedColumns(matches):
matches['dra'] = (matches['ra_new']-matches['ra_old'])*3600
matches['ddec'] = (matches['dec_new']-matches['dec_old'])*3600
matches['ra'] = matches['ra_old']
ra = matches['ra']
matches['raW'] = np.where(ra > 180, ra-360, ra)
matches['dec'] = matches['dec_old']
matches['u'] = matches['u_mMed_old']
matches['g'] = matches['g_mMed_old']
matches['r'] = matches['r_mMed_old']
matches['i'] = matches['i_mMed_old']
matches['z'] = matches['z_mMed_old']
matches['ug'] = matches['u_mMed_old'] - matches['g_mMed_old']
matches['gr'] = matches['g_mMed_old'] - matches['r_mMed_old']
matches['ri'] = matches['r_mMed_old'] - matches['i_mMed_old']
matches['gi'] = matches['g_mMed_old'] - matches['i_mMed_old']
matches['du'] = matches['u_mMed_old'] - matches['u_mMed_new']
matches['dg'] = matches['g_mMed_old'] - matches['g_mMed_new']
matches['dr'] = matches['r_mMed_old'] - matches['r_mMed_new']
matches['di'] = matches['i_mMed_old'] - matches['i_mMed_new']
matches['dz'] = matches['z_mMed_old'] - matches['z_mMed_new']
# Gaia
matches['draGold'] = -3600*(matches['ra_old'] - matches['raG'])
matches['draGnew'] = -3600*(matches['ra_new'] - matches['raG'])
matches['ddecGold'] = -3600*(matches['dec_old'] - matches['decG'])
matches['ddecGnew'] = -3600*(matches['dec_new'] - matches['decG'])
# photometric
matches['gGr_old'] = matches['Gmag'] - matches['r_mMed_old']
matches['gGr_new'] = matches['Gmag'] - matches['r_mMed_new']
return
In [16]:
derivedColumns(gaia_sdss)
In [17]:
## first limit astrometric distance and
## require at least 4 epochs as in the old catalog
MAX_DISTANCE_ARCSEC = 0.5
m1 = gaia_sdss[(gaia_sdss['sepSG_2d_arcsec'] < MAX_DISTANCE_ARCSEC)]
a1 = m1['g_Nobs_new']
a2 = m1['r_Nobs_new']
a3 = m1['i_Nobs_new']
mOK = m1[(a1>3)&(a2>3)&(a3>3)]
print(len(new_old))
print(len(m1))
print(len(mOK))
In [18]:
# doGaiaAll(mOK)
def doGaiaAll(d, Cstr):
# Cstr = 'gGr_old' or 'gGr_new'
gi = d['gi']
Gr = d[Cstr]
Gmag = d['Gmag']
xBin, nPts, medianBin, sigGbin = fitMedians(gi, Gr, -0.7, 4.0, 47, 0)
data = np.array([xBin, medianBin, sigGbin])
Ndata = xBin.size
# get best-fit parameters
thetaCloc = best_theta(data,7)
# generate best fit lines on a fine grid
xfit = np.linspace(-1.1, 4.3, 1000)
yfit = polynomial_fit(thetaCloc, xfit)
d['gGrFit'] = polynomial_fit(thetaCloc, gi)
d['dgGr'] = d[Cstr] - d['gGrFit']
Dc = d[(d['gi']>0.4)&(d['gi']<3.0)]
DcB = Dc[(Dc['Gmag']>14.5)&(Dc['Gmag']<20.0)]
DcB['GrResid'] = DcB['dgGr'] - np.median(DcB['dgGr'])
DcBok = DcB[np.abs(DcB['dgGr'])<0.1]
qpBM(DcBok, 'dec', -1.3, 1.3, 'GrResid', -0.03, 0.03, 260)
qpBM(DcBok, 'raW', -51.5, 60, 'GrResid', -0.03, 0.03, 112)
return thetaCloc, DcBok
In [19]:
thetaC, mOKcBok = doGaiaAll(mOK, 'gGr_new')
In [20]:
## use 7-th degree polynomial and 0.4 < g-i < 3.0 for recalibration
mOK['gGrFit'] = polynomial_fit(thetaC, mOK['gi'])
mOK['dgGr'] = mOK['gGr_new'] - mOK['gGrFit']
mOK['Gsynth'] = mOK['gGrFit'] + mOK['r_mMed_new']
mOK['GmGsynth'] = mOK['Gmag'] - mOK['Gsynth']
mOKc = mOK[(mOK['gi']>0.4)&(mOK['gi']<3.0)]
In [21]:
## for zero point calibration, take 14.5 < G < 20.0
mOKcB = mOKc[(mOKc['Gmag']>14.5)&(mOKc['Gmag']<20.0)]
mOKcB['GrResid'] = mOKcB['dgGr'] - np.median(mOKcB['dgGr'])
mOKcBok = mOKcB[np.abs(mOKcB['dgGr'])<0.1]
print(np.size(mOKcB), np.size(mOKcBok))
In [22]:
sigG(mOKcBok['GrResid'])
Out[22]:
In [23]:
%load_ext autoreload
%autoreload 2
# importing ZI tools:
import ZItools as zit
In [ ]:
In [24]:
kwargs = {"Xstr":'dec', "Xmin":-1.3, "Xmax":1.3, "Xlabel":'Declination (deg)', \
"Ystr":'GmGsynth', "Ymin":-0.05, "Ymax":0.05, "Ylabel":'Gaia G - synth. SDSS G', \
"XminBin":-1.26, "XmaxBin":1.26, "nBin":252, \
"plotName":'myPlot.png', "Nsigma":3, "offset":0.01}
In [25]:
zit.plotdelMag(mOKcBok, kwargs)
In [26]:
raArr = np.array(mOKcBok['raW'])
decArr = np.array(mOKcBok['dec'])
dmagArr = np.array(mOKcBok['GmGsynth'])
dmagArr = dmagArr - np.median(dmagArr)
In [27]:
kwargsArr = {"Xstr":'Xstr', "Xmin":-52, "Xmax":60.5, "Xlabel":'R.A. (deg)', \
"Ystr":'Ystr', "Ymin":-0.06, "Ymax":0.06, "Ylabel":'Gaia G - synth. SDSS G', \
"XminBin":-51.5, "XmaxBin":60, "nBin":112, \
"plotName":'RAcorrections.png', "Nsigma":3, "offset":0.01}
zit.plotdelMagArr(raArr, dmagArr, kwargsArr)
In [28]:
kwargsArr = {"Xstr":'Xstr', "Xmin":-1.3, "Xmax":1.3, "Xlabel":'Declination (deg)', \
"Ystr":'Ystr', "Ymin":-0.06, "Ymax":0.06, "Ylabel":'Gaia G - synth. SDSS G', \
"XminBin":-1.26, "XmaxBin":1.26, "nBin":252, \
"plotName":'Deccorrections.png', "Nsigma":3, "offset":0.01}
zit.plotdelMagArr(decArr, dmagArr, kwargsArr)
In [ ]: