TAKEN FROM GITHUB ON APRIL 23, 2020 https://github.com/Karuntg/SDSS_SSC/raw/master/analysis/sdss_gaia_matching.ipynb
CHANGES DONE
In [1]:
    
# workhorse packages
import matplotlib.pyplot as plt 
import numpy as np
# Dask
import dask
from dask import compute, delayed
import dask.dataframe as dd
from dask.distributed import Client
from astropy.table import Table
from astropy.coordinates import SkyCoord
from astropy import units as u
from astropy.table import hstack
# Scipy
from scipy.spatial import cKDTree as KDT
# for fits with log likelihood
import scipy
from scipy import stats
from scipy import optimize
    
In [2]:
    
SDSS_CAT = 'NEW_stripe82calibStars_prematch.dat'
GAIA_CAT = 'Stripe82_GaiaDR1.dat'
# MATCHED GAIA-SDSS OBJECTS
SDSS2GAIA = 'S82_newSDSS2GAIA_matchkln.csv'
# MAGS/ERRS OF MATCHED GAIA-SDSS OBJECTS
SDSS2GAIA_magerr = 'S82_newSDSS2GAIA_matchkln_magerr.csv'
    
In [3]:
    
# CONVERT IQD TO STD DEV
IQD2STD = 0.741
# GAIA ZEROPOINT
GAIA_ZP = 25.525
# DEFINE POLYNOMIAL DEGREES
deg1,deg2,deg3,deg5,deg7 = 1,2,3,5,7
    
In [4]:
    
%%time
colnames = ['calib_fla', 'ra', 'dec', 'raRMS', 'decRMS', 'nEpochs', 'AR_val', 
                'u_Nobs', 'u_mMed', 'u_mMean', 'u_mErr', 'u_rms_scatt', 'u_chi2',
                'g_Nobs', 'g_mMed', 'g_mMean', 'g_mErr', 'g_rms_scatt', 'g_chi2',
                'r_Nobs', 'r_mMed', 'r_mMean', 'r_mErr', 'r_rms_scatt', 'r_chi2',
                'i_Nobs', 'i_mMed', 'i_mMean', 'i_mErr', 'i_rms_scatt', 'i_chi2',
                'z_Nobs', 'z_mMed', 'z_mMean', 'z_mErr', 'z_rms_scatt', 'z_chi2']
# sdss = Table.read(SDSS_CAT, format='ascii', names=colnames)
sdss = Table.read(SDSS_CAT, format='csv', fast_reader=False, names=colnames)
# USING DASK DATAFRAME
# df_nSSC = dd.read_csv(SDSS_CAT,delimiter=",",header=None,names=colnames,
#                      assume_missing=True,low_memory=False,comment='#')
# sdss = df_nSSC.compute()
# sdss.head()
# Snrows,Sncols = sdss.shape
# print('SDSS, as read: num rows, cols: ',Snrows,Sncols)
#outlog.write('New df, as read: num rows %d, cols %d\n' % (nrows,ncols));
    
    
In [7]:
    
%%time
colnames = ['ra', 'dec', 'nObs', 'Gmag', 'flux', 'fluxErr']
# gaia = Table.read('Stripe82_GaiaDR1_small.dat', format='ascii', names=colnames)
gaia = Table.read(GAIA_CAT, format='ascii', names=colnames)
# USING DASK DATAFRAME
# df_gaia = dd.read_csv(GAIA_CAT,sep="\s+",
#                       header=None,names=colnames,
#                       assume_missing=True,low_memory=False,comment='#')
#df_gaia = dd.read_table(GAIA_CAT,sep="\s+",
#                        header=None,names=colnames,
#                        assume_missing=True,low_memory=False,comment='#')
#gaia = df_gaia.compute()
# gaia = gaia_table.to_pandas()
# gaia.head()
# Gnrows,Gncols = gaia.shape
# print('Gaia, as read: num rows, cols: ',Gnrows,Gncols)
    
    
In [8]:
    
# PRINT OUT NUMBER OF OBJ READ
print('Num 2007 cat obj: {0}'.format(len(sdss['ra'])))
print('Num gaia obj: {0}'.format(len(gaia['ra'])))
    
    
In [9]:
    
%%time
sdss_coords = SkyCoord(ra = sdss['ra']*u.degree, dec= sdss['dec']*u.degree) 
gaia_coords = SkyCoord(ra = gaia['ra']*u.degree, dec= gaia['dec']*u.degree) 
# this is matching gaia to sdss, so that indices are into sdss catalog
# makes sense in this case since the sdss catalog is bigger than gaia
idx, d2d, d3d = gaia_coords.match_to_catalog_sky(sdss_coords)
    
    
In [10]:
    
# THIS IS THE PRIMARY DF AFTER SDSS-GAIA MATCHING
# object separation is an object with units, 
# I add that as a column so that one can 
# select based on separation to the nearest matching object
# since it's matching gaia to sdss,
# the resulting catalog has the same length 
# as gaia ... 
gaia_sdss = hstack([gaia, sdss[idx]], table_names = ['gaia', 'sdss'])
gaia_sdss['sep_2d_arcsec'] = d2d.arcsec
print('Num gaia-SDSS 2007 matched: {0}'.format(len(idx)))
print(gaia_sdss.info())
    
    
In [11]:
    
r_all = gaia_sdss['r_mMed']
G_all = gaia_sdss['Gmag']
# sdss colors
gr_all = gaia_sdss['g_mMed'] - gaia_sdss['r_mMed']
gi_all = gaia_sdss['g_mMed'] - gaia_sdss['i_mMed']
gz_all = gaia_sdss['g_mMed'] - gaia_sdss['z_mMed']
ri_all = gaia_sdss['r_mMed'] - gaia_sdss['i_mMed']
# Gaia colors
Gg_all = gaia_sdss['Gmag'] - gaia_sdss['g_mMed']
Gr_all = gaia_sdss['Gmag'] - gaia_sdss['r_mMed']
Gi_all = gaia_sdss['Gmag'] - gaia_sdss['i_mMed']
ra_all = gaia_sdss['ra_gaia'] 
raW_all = np.where(ra_all > 180, ra_all-360, ra_all)
dec_all = gaia_sdss['dec_gaia']
    
In [12]:
    
# I would call good match to be within a certain limit 
# there is no built-in boundary - match_to_catalog_sky()
# will find the nearest match, regardless if it's an arcsecond
# or five degrees to the nearest one.
# gaia sources that have a good sdss match 
flag = (gaia_sdss['sep_2d_arcsec'] < 0.5)  # 486812 for <1 arcsec
gaia_matched = gaia_sdss[flag]
print('Num matched obj: %d' % len(gaia_sdss))
print('Num dist < 0.5 arc.sec: %d' % len(gaia_matched))
    
    
In [13]:
    
## MAGS AND COLORS
r_pk1 = gaia_matched['r_mMed']
G_pk1 = gaia_matched['Gmag']
# sdss colors
gr_pk1 = gaia_matched['g_mMed'] - gaia_matched['r_mMed']
gi_pk1 = gaia_matched['g_mMed'] - gaia_matched['i_mMed']
gz_pk1 = gaia_matched['g_mMed'] - gaia_matched['z_mMed']
ri_pk1 = gaia_matched['r_mMed'] - gaia_matched['i_mMed']
# gaia colors
Gg_pk1 = gaia_matched['Gmag'] - gaia_matched['g_mMed']
Gr_pk1 = gaia_matched['Gmag'] - gaia_matched['r_mMed']
Gi_pk1 = gaia_matched['Gmag'] - gaia_matched['i_mMed']
# POSITIONS BASED ON GAIA
ra_pk1 = gaia_matched['ra_gaia'] 
raW_pk1 = np.where(ra_pk1 > 180, ra_pk1-360, ra_pk1)
dec_pk1 = gaia_matched['dec_gaia']
    
In [14]:
    
# flagOK = ((raW > -10) & (raW < 50) & (rMed>15) & (rMed<20))
# flagOK = ((raW > -10) & (raW < 50) & (rMed>16) & (rMed<19))
flagOK = ((raW_pk1 > -10) & (raW_pk1 < 50) & (r_pk1>16) & (r_pk1<19) & (gi_pk1>0) & (gi_pk1<3.0))
gaia_matchedOK = gaia_matched[flagOK]
print('Num obj after select by RA, rMag, gi color: %d' % (len(gaia_matchedOK)))
    
    
In [15]:
    
g_pk2 = gaia_matchedOK['g_mMed']
r_pk2 = gaia_matchedOK['r_mMed']
i_pk2 = gaia_matchedOK['i_mMed']
z_pk2 = gaia_matchedOK['z_mMed']
G_pk2 = gaia_matchedOK['Gmag']
# sdss colors
gr_pk2 = gaia_matchedOK['g_mMed'] - gaia_matchedOK['r_mMed']
gi_pk2 = gaia_matchedOK['g_mMed'] - gaia_matchedOK['i_mMed']
gz_pk2 = gaia_matchedOK['g_mMed'] - gaia_matchedOK['z_mMed']
ri_pk2 = gaia_matchedOK['r_mMed'] - gaia_matchedOK['i_mMed']
# Gaia colors
Gg_pk2 = gaia_matchedOK['Gmag'] - gaia_matchedOK['g_mMed']
Gr_pk2 = gaia_matchedOK['Gmag'] - gaia_matchedOK['r_mMed']
Gi_pk2 = gaia_matchedOK['Gmag'] - gaia_matchedOK['i_mMed']
ra_pk2 = gaia_matchedOK['ra_gaia'] 
raW_pk2 = np.where(ra_pk2 > 180, ra_pk2-360, ra_pk2)
dec_pk2 = gaia_matchedOK['dec_gaia']
    
In [16]:
    
flagOK = ((raW_pk2 > -10) & (raW_pk2 < 50) & (r_pk2>16) & (r_pk2<19) & (gi_pk2>0) & (gi_pk2<3.0))
flagOK2 = (flagOK & (G_pk2 > 16) & (G_pk2 < 16.5))
gaia_matchedOK2 = gaia_matchedOK[flagOK2]
print('Num obj after select by RA, rMag, gi color: %d' % len(gaia_matchedOK))
print('Num obj after Gaia G-mag cut: %d' % len(gaia_matchedOK2))
    
    
In [17]:
    
print(len(flagOK2))
    
    
In [18]:
    
g_pk3 = gaia_matchedOK2['g_mMed']
r_pk3 = gaia_matchedOK2['r_mMed']
i_pk3 = gaia_matchedOK2['i_mMed']
z_pk3 = gaia_matchedOK2['z_mMed']
G_pk3 = gaia_matchedOK2['Gmag']
# also get mag errors for the next selection cut
# Gaia err
Gflux_pk3 = gaia_matchedOK2['flux']
Gfluxerr_pk3 = gaia_matchedOK2['fluxErr']
G_err_pk3 = -2.5*np.log10(1.+ (Gfluxerr_pk3/Gflux_pk3))
# sdss errs
g_err_pk3 = gaia_matchedOK2['g_mErr']
r_err_pk3 = gaia_matchedOK2['r_mErr']
i_err_pk3 = gaia_matchedOK2['i_mErr']
z_err_pk3 = gaia_matchedOK2['z_mErr']
# sdss colors
gr_pk3 = gaia_matchedOK2['g_mMed'] - gaia_matchedOK2['r_mMed']
gi_pk3 = gaia_matchedOK2['g_mMed'] - gaia_matchedOK2['i_mMed']
gz_pk3 = gaia_matchedOK2['g_mMed'] - gaia_matchedOK2['z_mMed']
ri_pk3 = gaia_matchedOK2['r_mMed'] - gaia_matchedOK2['i_mMed']
# Gaia colors
Gg_pk3 = gaia_matchedOK2['Gmag'] - gaia_matchedOK2['g_mMed']
Gr_pk3 = gaia_matchedOK2['Gmag'] - gaia_matchedOK2['r_mMed']
Gi_pk3 = gaia_matchedOK2['Gmag'] - gaia_matchedOK2['i_mMed']
ra_pk3 = gaia_matchedOK2['ra_gaia'] 
raW_pk3 = np.where(ra_pk3 > 180, ra_pk3-360, ra_pk3)
dec_pk3 = gaia_matchedOK2['dec_gaia']
    
In [19]:
    
flagOK3 = ((G_err_pk3 < 0.01) & (g_err_pk3 < 0.01) & (r_err_pk3 < 0.01) & (i_err_pk3 < 0.01) & (z_err_pk3 < 0.01))
gaia_matchedOK3 = gaia_matchedOK2[flagOK3]
print('Num obj after select by Gaia mags: %d' % len(gaia_matchedOK2))
print('Num obj after Gaia G, SDSS griz err cuts: %d' % len(gaia_matchedOK3))
    
    
In [20]:
    
g_pk4 = gaia_matchedOK3['g_mMed']
r_pk4 = gaia_matchedOK3['r_mMed']
i_pk4 = gaia_matchedOK3['i_mMed']
z_pk4 = gaia_matchedOK3['z_mMed']
G_pk4 = gaia_matchedOK3['Gmag']
# also get mag errors for the next selection cut
# Gaia errs
Gflux_pk4 = gaia_matchedOK3['flux']
Gfluxerr_pk4 = gaia_matchedOK3['fluxErr']
G_err_pk4 = -2.5*np.log10(1.+ (Gfluxerr_pk4/Gflux_pk4))
# sdss errs
g_err_pk4 = gaia_matchedOK3['g_mErr']
r_err_pk4 = gaia_matchedOK3['r_mErr']
i_err_pk4 = gaia_matchedOK3['i_mErr']
z_err_pk4 = gaia_matchedOK3['z_mErr']
# SDSS clrs
gr_pk4 = gaia_matchedOK3['g_mMed'] - gaia_matchedOK3['r_mMed']
gi_pk4 = gaia_matchedOK3['g_mMed'] - gaia_matchedOK3['i_mMed']
gz_pk4 = gaia_matchedOK3['g_mMed'] - gaia_matchedOK3['z_mMed']
ri_pk4 = gaia_matchedOK3['r_mMed'] - gaia_matchedOK3['i_mMed']
# Gaia-SDSS clrs
Gg_pk4 = gaia_matchedOK3['Gmag'] - gaia_matchedOK3['g_mMed']
Gr_pk4 = gaia_matchedOK3['Gmag'] - gaia_matchedOK3['r_mMed']
Gi_pk4 = gaia_matchedOK3['Gmag'] - gaia_matchedOK3['i_mMed']
ra_pk4 = gaia_matchedOK3['ra_gaia'] 
raW_pk4 = np.where(ra_pk4 > 180, ra_pk4-360, ra_pk4)
dec_pk4 = gaia_matchedOK3['dec_gaia']
    
In [21]:
    
%%time
paths = SDSS2GAIA  
gaia_matchedOK2.write(paths,format='ascii.csv',
                      delimiter=',', comment='#',overwrite=True)
    
    
In [22]:
    
# GET A SUBSET OF MAG/ERR COLS ONLY
cols_needed = ['ra_gaia','dec_gaia','Gmag','flux','fluxErr',
               'g_mMed','g_mErr','r_mMed','r_mErr','i_mMed','i_mErr','z_mMed','z_mErr']
gaia_matchedOK2_subset = gaia_matchedOK2[cols_needed]
# WRITE OUT A CSV FILE
paths = SDSS2GAIA_magerr
gaia_matchedOK2_subset.write(paths,format='ascii.csv',
                      delimiter=',', comment='#',overwrite=True)
    
In [23]:
    
# given vectors x and y, fit medians in bins from xMin to xMax, with Nbin steps,
# and return xBin, medianBin, medianErrBin 
def fitMedians(x, y, xMin, xMax, Nbin, verbose=1): 
    # first generate bins
    xEdge = np.linspace(xMin, xMax, (Nbin+1)) 
    xBin = np.linspace(0, 1, Nbin)
    nPts = 0*np.linspace(0, 1, Nbin)
    medianBin = 0*np.linspace(0, 1, Nbin)
    sigGbin = -1+0*np.linspace(0, 1, Nbin) 
    for i in range(0, Nbin): 
        xBin[i] = 0.5*(xEdge[i]+xEdge[i+1]) 
        yAux = y[(x>xEdge[i])&(x<=xEdge[i+1])]
        if (yAux.size > 0):
            nPts[i] = yAux.size
            medianBin[i] = np.median(yAux)
            # robust estimate of standard deviation: 0.741*(q75-q25)
            sigmaG = 0.741*(np.percentile(yAux,75)-np.percentile(yAux,25))
            # uncertainty of the median: sqrt(pi/2)*st.dev/sqrt(N)
            sigGbin[i] = np.sqrt(np.pi/2)*sigmaG/np.sqrt(nPts[i])
        else:
            nPts[i], medianBin[i], sigGBin[i] = 0
        
    if (verbose):
        print('median:', np.median(medianBin[Npts>0]))
    return xBin, nPts, medianBin, sigGbin
    
In [24]:
    
def mag2flx(SDSSmag):
    Flux = 10**(0.4*SDSSmag)
    return Flux
    
In [25]:
    
# this function computes polynomial models given some data x
# and parameters theta
def polynomial_fit(theta, x):
    """Polynomial model of degree (len(theta) - 1)"""
    return sum(t * x ** n for (n, t) in enumerate(theta))
# compute the data log-likelihood given a model
def logLv0(data, theta, model=polynomial_fit):
    """Gaussian log-likelihood of the model at theta"""
    x, y, sigma_y = data
    y_fit = model(theta, x)
    return sum(stats.norm.logpdf(*args)
               for args in zip(y, y_fit, sigma_y))
# a direct optimization approach is used to get best model 
# parameters (which minimize -logL)
def best_theta(data, degree, model=polynomial_fit):
    theta_0 = (degree + 1) * [0]
    neg_logLv0 = lambda theta: -logLv0(data, theta, model)
    return optimize.fmin_bfgs(neg_logLv0, theta_0, disp=False)
    
In [26]:
    
# this function computes a linear combination of 4 functions
# given parameters theta
def linear_fit(coeffs, x, w, y, z):
    ffit = coeffs[0]*x + coeffs[1]*w + coeffs[2]*y + coeffs[3]*z 
    return ffit
# compute the data log-likelihood given a model
def logLv1(dataL, coeffs, model=linear_fit):
    """Gaussian log-likelihood of the model at theta"""
    x, w, y, z, f, sigma_f = dataL
    f_fit = model(coeffs, x, w, y, z)
    return sum(stats.norm.logpdf(*args)
               for args in zip(f, f_fit, sigma_f))
# a direct optimization approach is used to get best model 
# parameters (which minimize -logL)
def best_lintheta(dataL, degree=4, model=linear_fit):
    coeffs_0 = degree * [0]
    neg_logLv1 = lambda coeffs: -logLv1(dataL, coeffs, model)
    return optimize.fmin_bfgs(neg_logLv1, coeffs_0, disp=False)
    
In [27]:
    
# FUNCTION TO PLOT THE CCD, FIT THE POLY, AND OVERPLOT THE FITTED VALUES
def CCDpltNfit(clr1a,clr1b,clr1c,clr1d,clr2a,clr2b,clr2c,clr2d,labls,
               xlim,ylim,ax_labl,plt_titl,fitclr1,fitclr2,Binz,degr=3):
    # clr1a,clr1b,clr1c,clr1d = the four cuts on clr on x-axis
    # clr2a,clr2b,clr2c,clr2d = the four cuts on clr on y-axis
    # labls = labels for the datasets given as a 4-element list
    # xlim, ylim = plot limits on x and y as 2-element lists
    # ax_labl,plt_titl = axes labels, and plot title
    # fitclr1,fitclr2 = cut on clr1, clr2 with which to comp medians and fit poly
    # Binz = minx, maxx and nbins for fitting range
    # degr = degree of fit poly, default = 3
    
    # plot the base CCD
    fig,ax = plt.subplots(1,1,figsize=(8,6))
    ax.scatter(clr1a,clr2a, s=0.01, c='orange',label=labls[0])
    ax.scatter(clr1b,clr2b, s=0.01, c='green',label=labls[1])
    ax.scatter(clr1c,clr2c, s=0.01, c='blue',label=labls[2])
    ax.scatter(clr1d,clr2d, s=0.01, c='red',label=labls[3])
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)
    ax.set_xlabel(ax_labl[0])
    ax.set_ylabel(ax_labl[1])
    ax.set_title(plt_titl,fontdict={'fontsize': 14, 'fontweight': 'bold'})
    ax.legend(loc='best')
    ax.grid(True)
    plt.show()
    
    # now fit the medians to the specified data set
    minx,maxx,nbin = Binz[0],Binz[1],Binz[2]
    xBin, nPts, medBin, sigBin = fitMedians(fitclr1,fitclr2,minx,maxx,nbin,0)
    
    # now fit the poly of spec degr
    data = np.array([xBin,medBin,sigBin])
    theta = best_theta(data,degr)
    return xBin, nPts, medBin, sigBin,theta
    
In [28]:
    
clr1a,clr1b,clr1c,clr1d = gr_pk1,gr_pk2,gr_pk3,gr_pk4
clr2a,clr2b,clr2c,clr2d = Gg_pk1,Gg_pk2,Gg_pk3,Gg_pk4
labls = ['pk1','pk2','pk3','pk4']
xlim = (-0.6,1.8)
ylim = (-3.0,0.5)
ax_labl = ['(g-r)','(G-g)']
plt_titl = 'CCD SDSS gr VS Gaia Gg'
fitclr1,fitclr2 = gr_pk4,Gg_pk4
Binz = [0.2,1.2,20]
xBin, nPts, medBin, sigBin,theta = CCDpltNfit(clr1a,clr1b,clr1c,clr1d,clr2a,clr2b,clr2c,clr2d,labls,
               xlim,ylim,ax_labl,plt_titl,fitclr1,fitclr2,Binz,degr=3)
print(theta)
    
    
    
In [29]:
    
clr1a,clr1b,clr1c,clr1d = gi_pk1,gi_pk2,gi_pk3,gi_pk4
clr2a,clr2b,clr2c,clr2d = Gg_pk1,Gg_pk2,Gg_pk3,Gg_pk4
labls = ['pk1','pk2','pk3','pk4']
xlim = (-1.,3.5)
ylim = (-3.2,0.4)
ax_labl = ['(g-i)','(G-g)']
plt_titl = 'CCD SDSS gi VS Gaia Gg'
fitclr1,fitclr2 = gi_pk4,Gg_pk4
Binz = [0.5,2.5,30]
xBin, nPts, medBin, sigBin,theta = CCDpltNfit(clr1a,clr1b,clr1c,clr1d,clr2a,clr2b,clr2c,clr2d,labls,
               xlim,ylim,ax_labl,plt_titl,fitclr1,fitclr2,Binz,degr=3)
print(theta)
    
    
    
In [30]:
    
clr1a,clr1b,clr1c,clr1d = gz_pk1,gz_pk2,gz_pk3,gz_pk4
clr2a,clr2b,clr2c,clr2d = Gg_pk1,Gg_pk2,Gg_pk3,Gg_pk4
labls = ['pk1','pk2','pk3','pk4']
xlim = (-1.5,5)
ylim = (-3.2,0.4)
ax_labl = ['(g-z)','(G-g)']
plt_titl = 'CCD SDSS gz VS Gaia Gg'
fitclr1,fitclr2 = gz_pk4,Gg_pk4
Binz = [0.5,3,35]
xBin, nPts, medBin, sigBin,theta = CCDpltNfit(clr1a,clr1b,clr1c,clr1d,clr2a,clr2b,clr2c,clr2d,labls,
               xlim,ylim,ax_labl,plt_titl,fitclr1,fitclr2,Binz,degr=3)
print(theta)
    
    
    
In [31]:
    
# plot
fig,ax = plt.subplots(1,1,figsize=(8,6))
ax.scatter(gr_all, ri_all, s=0.01, c='green')
ax.scatter(gr_pk1, ri_pk1, s=0.01, c='blue')
ax.scatter(gr_pk2, ri_pk2, s=0.01, c='red')
ax.set_xlim(-0.7,2.2)
ax.set_ylim(-0.7,2.4)
ax.set_xlabel('SDSS(g-r)')
ax.set_ylabel('SDSS(r-i)')
ax.set_title('CCD: gr vs ri all-pk1-pk2',fontdict={'fontsize': 14, 'fontweight': 'bold'})
    
    Out[31]:
    
In [32]:
    
# plot
fig,ax = plt.subplots(1,1,figsize=(8,6))
ax.scatter(gi_all, Gr_all, s=0.01, c='green')
ax.scatter(gi_pk1, Gr_pk1, s=0.01, c='blue')
ax.scatter(gi_pk2, Gr_pk2, s=0.01, c='red')
ax.set_xlim(-0.5,3.5)
ax.set_ylim(-1.5,1.)
ax.set_xlabel('SDSS(g-i)')
ax.set_ylabel('Gaia G - SDSS r')
ax.set_title('CCD: gi vs Gr all-pk1-pk2',fontdict={'fontsize': 14, 'fontweight': 'bold'})
    
    Out[32]:
    
In [33]:
    
%%time
# medians
#xBin, nPts, medianBin, sigGbin = fitMedians(gi, Gr, -0.7, 4.0, 47, 0)
#xBinOK, nPtsOK, medianBinOK, sigGbinOK = fitMedians(giOK, GrOK, -0.2, 3.2, 34, 0)
xBin, nPts, medianBin, sigGbin = fitMedians(gi_all, Gr_all, 0.0, 3.0, 30, 0)
xBinOK1, nPtsOK1, medianBinOK1, sigGbinOK1 = fitMedians(gi_pk1, Gr_pk1, 0.0, 3.0, 30, 0)
xBinOK2, nPtsOK2, medianBinOK2, sigGbinOK2 = fitMedians(gi_pk2, Gr_pk2, 0.0, 3.0, 30, 0)
xBinOK3, nPtsOK3, medianBinOK3, sigGbinOK3 = fitMedians(gi_pk3, Gr_pk3, 0.0, 3.0, 30, 0)
#print xBin, nPts, medianBin, sigGbin
medOK1 = medianBinOK1[(xBinOK1>2)&(xBinOK1<3)]
medOK2 = medianBinOK2[(xBinOK2>2)&(xBinOK2<3)]
medOK3 = medianBinOK3[(xBinOK3>2)&(xBinOK3<3)]
dmedOK21 = medOK2 - medOK1
dmedOK32 = medOK3 - medOK2
print('Med offset pk2 - pk1: ',dmedOK21)
print('Med offset pk3 - pk2: ',dmedOK32)
    
    
In [ ]:
    
%%time
data = np.array([xBinOK1, medianBinOK1, sigGbinOK1])
#x, y, sigma_y = data
theta1 = best_theta(data,deg1)
print('Fit values: ',theta1)
    
In [ ]:
    
%%time
data = np.array([xBinOK2, medianBinOK2, sigGbinOK2])
# x, y, sigma_y = data
Ndata = xBinOK2.size
# get best-fit parameters for linear, quadratic and cubic models
theta1 = best_theta(data,deg3)
theta2 = best_theta(data,deg5)
theta3 = best_theta(data,deg7)
    
In [ ]:
    
# generate best fit lines on a fine grid 
xfit = np.linspace(-1.1, 4.3, 1000)
yfit1 = polynomial_fit(theta1, xfit)
yfit2 = polynomial_fit(theta2, xfit)
yfit3 = polynomial_fit(theta3, xfit)
# and compute chi2 per degree of freedom: sum{[(y-yfit)/sigma_y]^2} 
x, y, sigma_y = xBinOK2, medianBinOK2, sigGbinOK2
chi21 = np.sum(((y-polynomial_fit(theta1, x))/sigma_y)**2) 
chi22 = np.sum(((y-polynomial_fit(theta2, x))/sigma_y)**2) 
chi23 = np.sum(((y-polynomial_fit(theta3, x))/sigma_y)**2) 
# the number of fitted parameters is 2, 3, 4
chi2dof1 = chi21/(Ndata - deg3)
chi2dof2 = chi22/(Ndata - deg5)
chi2dof3 = chi23/(Ndata - deg7)
print("CHI2:")
print('   Model deg, chi2 :', deg3,chi21)
print('Model deg, chi2 ::', deg5, chi22)
print('    Model deg, chi2 ::', deg7, chi23)
print("CHI2 per degree of freedom:")
print('   best model 1:', chi2dof1)
print('best model 2:', chi2dof2)
print('    best model 3:', chi2dof3)
    
In [ ]:
    
# Plot a (gaia - r)  vs (g-i)  for photometric transformation
%matplotlib inline
# plot
fig,ax = plt.subplots(1,1,figsize=(12,8))
ax.scatter(gi_pk1, Gr_pk1, s=0.01, c='green')
ax.scatter(gi_pk2, Gr_pk2, s=0.01, c='blue')
ax.scatter(gi_pk3, Gr_pk3, s=0.01, c='red')
# medians
ax.scatter(xBinOK1, medianBinOK1, s=30.0, c='black', alpha=0.5)
ax.scatter(xBinOK2, medianBinOK2, s=30.0, c='yellow', alpha=0.5)
ax.scatter(xBinOK2, medianBinOK2, s=30.0, c='green', alpha=0.5)
ax.set_xlim(-1,4)
ax.set_ylim(-2.0,0.5)
ax.set_xlabel('SDSS(g-i)')
ax.set_ylabel('Gaia G - SDSS r')
ax.errorbar(x, y, sigma_y, fmt='ok', ecolor='gray')
ax.plot(xfit, polynomial_fit(theta1, xfit), label='best P3 model')
ax.plot(xfit, polynomial_fit(theta2, xfit), label='best P5 model')
ax.plot(xfit, polynomial_fit(theta3, xfit), label='best P7 model')
ax.legend(loc='best', fontsize=14)
    
In [ ]:
    
print('Coeffts 3rd order fit: ',theta3)
    
In [ ]:
    
# GrModel = -0.06348 -0.03111*gi +0.08643*gi*gi -0.05593*gi*gi*gi
GrModel = sum(t * gi_pk2 ** n for (n, t) in enumerate(theta3))
GrResid = Gr_pk2 - GrModel
minx,maxx,nx = min(xBinOK2),max(xBinOK2),10*len(xBinOK2)
xBinM, nPtsM, medianBinM, sigGbinM = fitMedians(gi_pk2, GrResid,minx,maxx,nx, 0)
    
In [ ]:
    
fig,ax = plt.subplots(1,1,figsize=(12,8))
ax.scatter(gi_pk2, GrResid, s=0.01, c='blue')
# medians
ax.scatter(xBinM, medianBinM, s=30.0, c='black', alpha=0.8)
ax.scatter(xBinM, medianBinM, s=20.0, c='yellow', alpha=0.3)
TwoSigP = medianBinM + 2*sigGbinM
TwoSigM = medianBinM - 2*sigGbinM 
ax.plot(xBinM, TwoSigP, c='yellow')
ax.plot(xBinM, TwoSigM, c='yellow')
rmsBin = np.sqrt(nPtsM) / np.sqrt(np.pi/2) * sigGbinM
rmsP = medianBinM + rmsBin
rmsM = medianBinM - rmsBin
ax.plot(xBinM, rmsP, c='cyan')
ax.plot(xBinM, rmsM, c='cyan')
ax.set_xlim(-1,4.2)
ax.set_ylim(-0.14,0.14)
ax.set_xlabel('g-i')
ax.set_ylabel('residuals for (Gaia G - SDSS r)')
xL = np.linspace(-10,10)
ax.plot(xL, 0*xL+0.00, c='red')
ax.plot(xL, 0*xL+0.01, c='red')
ax.plot(xL, 0*xL-0.01, c='red')
ax.plot(0*xL+0.4, xL, c='yellow')
ax.plot(0*xL+2.0, xL, c='yellow')
    
In [ ]:
    
medOK = medianBinM[(xBinM>0.4)&(xBinM<2.0)]
    
In [ ]:
    
print('Median mag diff: ',medOK)
    
In [ ]:
    
np.median(medOK)
    
In [ ]:
    
np.std(medOK)
    
In [ ]:
    
np.max(medOK)
    
In [ ]:
    
np.min(medOK)
    
In [ ]:
    
residOK = GrResid[(gi_pk2>0.4)&(gi_pk2<2.0)&(raW_pk2>-10)&(raW_pk2<50)]
magOK = r_pk2[(gi_pk2>0.4)&(gi_pk2<2.0)&(raW_pk2>-10)&(raW_pk2<50)]
gaiaG = gaia_matched['Gmag']
GOK = gaiaG[(gi>0.4)&(gi<2.0)&(raW>-10)&(raW<50)]
print('Med of resid: ',np.median(residOK))
print('Std of resid: ',np.std(residOK))
    
In [ ]:
    
xBinMg, nPtsMg, medianBinMg, sigGbinMg = fitMedians(magOK, residOK, 14, 20.5, 65, 0)
    
In [ ]:
    
fig,ax = plt.subplots(1,1,figsize=(8,6))
ax.scatter(magOK, residOK, s=0.01, c='blue')
# medians
ax.scatter(xBinMg, medianBinMg, s=30.0, c='black', alpha=0.8)
ax.scatter(xBinMg, medianBinMg, s=20.0, c='yellow', alpha=0.3)
ax.set_xlim(13,23)
ax.set_ylim(-0.15,0.15)
ax.set_xlabel('SDSS r')
ax.set_ylabel('residuals for $G-G_{SDSS}$ ')
xL = np.linspace(-10,30)
ax.plot(xL, 0*xL+0.00, c='yellow')
ax.plot(xL, 0*xL+0.01, c='red')
ax.plot(xL, 0*xL-0.01, c='red')
    
In [ ]:
    
print medianBinMg
    
In [ ]:
    
fig,ax = plt.subplots(1,1,figsize=(8,6))
ax.scatter(magOK, residOK, s=0.01, c='blue')
# medians
ax.scatter(xBinMg, medianBinMg, s=30.0, c='black', alpha=0.8)
ax.scatter(xBinMg, medianBinMg, s=20.0, c='yellow', alpha=0.3)
ax.set_xlim(13,23)
ax.set_ylim(-0.05,0.05)
ax.set_xlabel('SDSS r')
ax.set_ylabel('residuals for $G-G_{SDSS}$ ')
xL = np.linspace(-10,30)
ax.plot(xL, 0*xL+0.00, c='yellow')
ax.plot(xL, 0*xL+0.01, c='red')
ax.plot(xL, 0*xL-0.01, c='red')
    
In [ ]:
    
print(magOK.size)
print(GOK.size)
xBinMg, nPtsMg, medianBinMg, sigGbinMg = fitMedians(GOK, residOK, 14, 20.5, 130, 0)
    
In [ ]:
    
fig,ax = plt.subplots(1,1,figsize=(8,6))
ax.scatter(GOK, residOK, s=0.01, c='blue')
# medians
ax.scatter(xBinMg, medianBinMg, s=30.0, c='black', alpha=0.8)
ax.scatter(xBinMg, medianBinMg, s=20.0, c='yellow', alpha=0.3)
TwoSigP = medianBinMg + 2*sigGbinMg 
TwoSigM = medianBinMg - 2*sigGbinMg 
ax.plot(xBinMg, TwoSigP, c='yellow')
ax.plot(xBinMg, TwoSigM, c='yellow')
rmsBin = np.sqrt(nPtsMg) / np.sqrt(np.pi/2) * sigGbinMg 
rmsP = medianBinMg + rmsBin
rmsM = medianBinMg - rmsBin
ax.plot(xBinMg, rmsP, c='cyan')
ax.plot(xBinMg, rmsM, c='cyan')
ax.set_xlim(13,23)
ax.set_ylim(-0.05,0.05)
ax.set_xlabel('Gaia G mag')
ax.set_ylabel('residuals for $G_{Gaia}-G_{SDSS}$ ')
xL = np.linspace(-10,30)
ax.plot(xL, 0*xL+0.00, c='cyan')
ax.plot(xL, 0*xL+0.01, c='red')
ax.plot(xL, 0*xL-0.01, c='red')
    
In [ ]:
    
gi = gaia_matched['g_mMed'] - gaia_matched['i_mMed']
ra = gaia_matched['ra_gaia'] 
raW = np.where(ra > 180, ra-360, ra)
flux = gaia_matched['flux']
fluxErr = gaia_matched['fluxErr']
fluxOK = flux[(gi>0.4)&(gi<2.0)&(raW>-10)&(raW<50)]
fluxErrOK = fluxErr[(gi>0.4)&(gi<2.0)&(raW>-10)&(raW<50)]
rBandErr = gaia_matched['r_mErr']
rBandErrOK = rBandErr[(gi>0.4)&(gi<2.0)&(raW>-10)&(raW<50)]
## Gaia's errors underestimated by a factor of ~2
sigma = np.sqrt((2*fluxErrOK/fluxOK)**2 + 1*rBandErrOK**2)
chi = residOK / sigma
xBinMg, nPtsMg, medianBinMg, sigGbinMg = fitMedians(GOK, chi, 14, 20.5, 130, 0)
fig,ax = plt.subplots(1,1,figsize=(8,6))
ax.scatter(GOK, chi, s=0.01, c='blue')
# medians
ax.scatter(xBinMg, medianBinMg, s=30.0, c='black', alpha=0.8)
ax.scatter(xBinMg, medianBinMg, s=20.0, c='yellow', alpha=0.3)
TwoSigP = medianBinMg + 2*sigGbinMg 
TwoSigM = medianBinMg - 2*sigGbinMg 
ax.plot(xBinMg, TwoSigP, c='yellow')
ax.plot(xBinMg, TwoSigM, c='yellow')
rmsBin = np.sqrt(nPtsMg) / np.sqrt(np.pi/2) * sigGbinMg 
rmsP = medianBinMg + rmsBin
rmsM = medianBinMg - rmsBin
ax.plot(xBinMg, rmsP, c='cyan')
ax.plot(xBinMg, rmsM, c='cyan')
ax.set_xlim(13,23)
ax.set_ylim(-5,5)
ax.set_xlabel('Gaia G mag')
ax.set_ylabel('($G_{Gaia}-G_{SDSS}$)/$\sigma$')
xL = np.linspace(-10,30)
ax.plot(xL, 0*xL+0.00, c='cyan')
ax.plot(xL, 0*xL+2, c='red')
ax.plot(xL, 0*xL-2, c='red')
    
In [ ]:
    
Gerr = fluxErrOK/fluxOK
print(np.median(Gerr))
print(np.median(rBandErrOK))
    
In [ ]:
    
residOK2 = residOK[(magOK>15)&(magOK<16)]
print np.median(residOK2)
mm = medianBinMg[(xBinMg>15)&(xBinMg<16)]
xx = xBinMg[(xBinMg>15)&(xBinMg<16)]
print(mm)
print(xx)
print("transition at G ~ 15.6")
    
In [ ]:
    
## conclusions
# 1) select:(-10 < RA < 50) & (16 < SDSSr < 19) & (0.4< g-i < 2.0)
thetaFinal = theta3
print(thetaFinal)
    
In [ ]:
    
rMed = gaia_matched['r_mMed'] 
gi = gaia_matched['g_mMed'] - gaia_matched['i_mMed'] 
ra = gaia_matched['ra_gaia'] 
raW = np.where(ra > 180, ra-360, ra)
flagOK = ((raW > -10) & (raW < 50) & (rMed>16) & (rMed<18) & (gi>0) & (gi<3.0))
# flagOK = ((raW > -10) & (raW < 50) & (rMed>16) & (rMed<19) & (gi>0) & (gi<3.0))
gaia_matchedOK = gaia_matched[flagOK]
print(len(gaia_matchedOK))
    
In [ ]:
    
giOK = gaia_matchedOK['g_mMed'] - gaia_matchedOK['i_mMed'] 
GrOK = gaia_matchedOK['Gmag'] - gaia_matchedOK['r_mMed'] 
GmagOK = gaia_matchedOK['Gmag'] 
GrModel = sum(t * giOK ** n for (n, t) in enumerate(theta3))
GrResid = GrOK - GrModel
print(np.median(GrResid))
print(np.std(GrResid))
print(np.min(GrResid))
print(np.max(GrResid))
    
In [ ]:
    
#xBinMg, nPtsMg, medianBinMg, sigGbinMg = fitMedians(GmagOK, GrResid, 16, 18.8, 14, 0)
xBinMg, nPtsMg, medianBinMg, sigGbinMg = fitMedians(GmagOK, GrResid, 16, 17.8, 14, 0)
    
In [ ]:
    
fig,ax = plt.subplots(1,1,figsize=(8,6))
ax.scatter(GmagOK, GrResid, s=0.01, c='blue')
# medians
ax.scatter(xBinMg, medianBinMg, s=30.0, c='black', alpha=0.9)
ax.scatter(xBinMg, medianBinMg, s=15.0, c='yellow', alpha=0.5)
ax.set_xlim(15.5,19)
ax.set_ylim(-0.07,0.07)
ax.set_xlabel('Gaia G mag')
ax.set_ylabel('Gr residuals')
xL = np.linspace(-10,30)
ax.plot(xL, 0*xL+0.00, c='yellow')
ax.plot(xL, 0*xL+0.01, c='red')
ax.plot(xL, 0*xL-0.01, c='red')
    
In [ ]:
    
print(np.median(medianBinMg))
print(np.std(medianBinMg))
    
In [ ]:
    
GrResidN = GrResid - np.median(medianBinMg)
    
In [ ]:
    
ra = gaia_matchedOK['ra_sdss'] 
raW = np.where(ra > 180, ra-360, ra)
fig,ax = plt.subplots(1,1,figsize=(8,6))
ax.scatter(raW, GrResidN, s=0.01, c='blue')
ax.set_xlim(-12,52)
ax.set_ylim(-0.06,0.06)
ax.set_xlabel('R.A.')
ax.set_ylabel('Gmag residual')
xBin, nPts, medianBin, sigGbin = fitMedians(raW, GrResidN, -10, 50, 60, 0)
ax.scatter(xBin, medianBin, s=30.0, c='black', alpha=0.9)
ax.scatter(xBin, medianBin, s=15.0, c='yellow', alpha=0.5)
TwoSigP = medianBin + 2*sigGbin
TwoSigM = medianBin - 2*sigGbin 
ax.plot(xBin, TwoSigP, c='yellow')
ax.plot(xBin, TwoSigM, c='yellow')
xL = np.linspace(-100,100)
ax.plot(xL, 0*xL+0.00, c='yellow')
ax.plot(xL, 0*xL+0.01, c='red')
ax.plot(xL, 0*xL-0.01, c='red')
    
In [ ]:
    
print(np.median(medianBin))
print(np.std(medianBin))
print(np.min(medianBin))
print(np.max(medianBin))
    
In [ ]:
    
dec = gaia_matchedOK['dec_sdss'] 
fig,ax = plt.subplots(1,1,figsize=(8,6))
ax.scatter(dec, GrResidN, s=0.01, c='blue')
ax.set_xlim(-1.3,1.3)
ax.set_ylim(-0.06,0.06)
ax.set_xlabel('Declination')
ax.set_ylabel('Gmag residual')
xBin, nPts, medianBin, sigGbin = fitMedians(dec, GrResidN, -1.2, 1.2, 120, 0)
ax.scatter(xBin, medianBin, s=30.0, c='black', alpha=0.9)
ax.scatter(xBin, medianBin, s=15.0, c='yellow', alpha=0.5)
TwoSigP = medianBin + 2*sigGbin
TwoSigM = medianBin - 2*sigGbin 
ax.plot(xBin, TwoSigP, c='yellow')
ax.plot(xBin, TwoSigM, c='yellow')
xL = np.linspace(-100,100)
ax.plot(xL, 0*xL+0.00, c='yellow')
ax.plot(xL, 0*xL+0.01, c='red')
ax.plot(xL, 0*xL-0.01, c='red')
for i in range(1,12):
    decCol = -1.2655 + i*0.2109
    ax.plot(0*xL+decCol, xL, c='red')
    
In [ ]:
    
print(np.median(medianBin))
print(np.std(medianBin))
print(np.min(medianBin))
print(np.max(medianBin))
    
In [ ]: