This matches only the high nEpoch stars in the new cat with the old SSC.
Notes reg steps done:
NO COMPARISONS DONE HERE
In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from astropy.table import Table
from astropy.coordinates import SkyCoord
from astropy import units as u
from astropy.table import hstack
import os
import glob
import tarfile
from urllib.request import urlretrieve
In [2]:
# Dask
import dask
from dask import compute, delayed
import dask.dataframe as dd
from dask.distributed import Client
# Scipy
from scipy.spatial import cKDTree as KDT
# astroML
from astroML.plotting import hist
In [3]:
import pyspherematch
In [4]:
# NEPOCH THRESHOLD FOR CLEANING NEW SSC
IZ_low_epoch = 5
# MATCH RAD FOR PYSPHEREMATCH
tol_asec = 2. # matching radius in arc.sec
tol_deg = tol_asec/3600.
# MAX PERMITTED MAG ERROR
max_MAGerr = 0.05
# The new SSC
nSSC = 'NEW_stripe82calibStars_v0.dat'
# The new PREMATCH SSC
nSSC_prematch = 'NEW_stripe82calibStars_prematch.csv'
# The new MATCHED SSC
nSSC_matched = 'NEW_stripe82calibStars_matched.csv'
# The old SSC
oSSC = 'stripe82calibStars_v2.6.dat'
# The old MATCHED SSC
oSSC_matched = 'stripe82calibStars_matched.csv'
new_df = as read in KT 2020 cat
new_df1 = after removing null and low nepoch obj
new_df2 = select good r-mag and r-err
new_df3 = select good g-mag and g-err
new_df4 = select good i-mag and i-err
old_df = as read in ZI 2007 cat
new_prematch_df = prior to matching cleaned 2020
old_prematch_df = prior to matching as read 2007
new_matched_df = matched new df
old_matched_df = matched old df
The following df are written out to csv files
new_prematch_df
new_matched_df
old_matched_df
In [5]:
log_fil = 'Match_NvsO_SSCv2.log'
outlog = open(log_fil,'w')
from datetime import date
today = date.today()
outlog.write("Running Match_NvsO_SSCv1 on %s\n" % (today));
In [6]:
%%time
Ncolnames = ['Ncalib_fla', 'Nra', 'Ndec', 'NraRMS', 'NdecRMS', 'NnEpochs', 'NAR_val',
'Nu_Nobs', 'Nu_mMed', 'Nu_mMean', 'Nu_mErr', 'Nu_rms_scatt', 'Nu_chi2',
'Ng_Nobs', 'Ng_mMed', 'Ng_mMean', 'Ng_mErr', 'Ng_rms_scatt', 'Ng_chi2',
'Nr_Nobs', 'Nr_mMed', 'Nr_mMean', 'Nr_mErr', 'Nr_rms_scatt', 'Nr_chi2',
'Ni_Nobs', 'Ni_mMed', 'Ni_mMean', 'Ni_mErr', 'Ni_rms_scatt', 'Ni_chi2',
'Nz_Nobs', 'Nz_mMed', 'Nz_mMean', 'Nz_mErr', 'Nz_rms_scatt', 'Nz_chi2']
# USING DASK DATAFRAME
df_nSSC = dd.read_csv(nSSC,delimiter=",",header=None,names=Ncolnames,
assume_missing=True,low_memory=False,comment='#')
new_df = df_nSSC.compute()
new_df.head()
nrows,ncols = new_df.shape
print('New df, as read: num rows, cols: ',nrows,ncols)
outlog.write('New df, as read: num rows %d, cols %d\n' % (nrows,ncols));
Out[6]:
In [7]:
new_df = new_df.dropna()
nrows,ncols = new_df.shape
print('New df, no NULLS: num rows, cols: ',nrows,ncols)
outlog.write('*************************\n')
outlog.write('Dropping all nulls and nans\n')
outlog.write('New df, no NULLS: num rows %d, cols %d \n' % (nrows,ncols));
In [8]:
%%time
# new ssc
new_nepochs = new_df['NnEpochs'].astype(int)
no_nepochs = list(filter(lambda x: x == 0, new_nepochs)) # 0 obs = no matches
lo_nepochs = list(filter(lambda x: ((x > 0) and (x < IZ_low_epoch)), new_nepochs)) # 1 <= x < 5, will have matches
hi_nepochs = list(filter(lambda x: x >= IZ_low_epoch, new_nepochs)) # > 5, will have close matches
print('Nepoch threshold used: ', IZ_low_epoch)
print('Num no nepoch obj: ',len(no_nepochs))
print('Num lo nepoch obj: ',len(lo_nepochs))
print('Num hi nepoch obj: ',len(hi_nepochs))
outlog.write('*************************\n')
outlog.write('Finding low nepoch objects\n')
outlog.write('Num no nepoch obj: %d\n' % len(no_nepochs))
outlog.write('Num lo nepoch obj: %d\n' % len(lo_nepochs))
outlog.write('Num hi nepoch obj: %d\n' % len(hi_nepochs));
Out[8]:
In [9]:
%%time
new_df1 = new_df[new_df['NnEpochs'].astype(int) >= IZ_low_epoch]
nrows1,ncols1 = new_df1.shape
print('New df, nEpochs > threshold: num rows, cols: ',nrows1,ncols1)
outlog.write('*************************\n')
outlog.write('New df, nEpochs > threshold: num rows %d, cols %d\n' % (nrows1,ncols1));
Out[9]:
In [10]:
# get a df of obj with good phot
# r_mag > 0
new_df2 = new_df1[(new_df1['Nr_mMed'] > 0)]
print(new_df2.shape)
nrows2,ncols2 = new_df2.shape
print('New df, RMAG > 0: num rows, cols: ',nrows2,ncols2)
outlog.write('*************************\n')
outlog.write('New df, RMAG > 0: num rows %d, cols %d\n' % (nrows2,ncols2));
# r_err
new_df2 = new_df2[(new_df2['Nr_mErr'] <= max_MAGerr)]
print(new_df2.shape)
nrows2,ncols2 = new_df2.shape
print('New df, RERR < MAX ERR: num rows, cols: ',nrows2,ncols2)
outlog.write('*************************\n')
outlog.write('New df, RERR < MAX ERR: num rows %d, cols %d\n' % (nrows2,ncols2));
In [11]:
# get a df of obj with good phot
# g_mag > 0
new_df3 = new_df2[(new_df2['Ng_mMed'] > 0)]
print(new_df3.shape)
nrows3,ncols3 = new_df3.shape
print('New df, GMAG > 0: num rows, cols: ',nrows3,ncols3)
outlog.write('*************************\n')
outlog.write('New df, GMAG > 0: num rows %d, cols %d\n' % (nrows3,ncols3));
# g_err
new_df3 = new_df3[(new_df3['Ng_mErr'] <= max_MAGerr)]
print(new_df3.shape)
nrows3,ncols3 = new_df3.shape
print('New df, GERR < MAX ERR: num rows, cols: ',nrows3,ncols3)
outlog.write('*************************\n')
outlog.write('New df, GERR < MAX ERR: num rows %d, cols %d\n' % (nrows3,ncols3));
In [12]:
# get a df of obj with good phot
# i_mag > 0
new_df4 = new_df3[(new_df3['Ni_mMed'] > 0)]
print(new_df4.shape)
nrows4,ncols4 = new_df4.shape
print('New df, IMAG > 0: num rows, cols: ',nrows4,ncols4)
outlog.write('*************************\n')
outlog.write('New df, IMAG > 0: num rows %d, cols %d\n' % (nrows4,ncols4));
# i_err
new_df4 = new_df4[(new_df4['Ni_mErr'] <= max_MAGerr)]
print(new_df4.shape)
nrows4,ncols4 = new_df4.shape
print('New df, IERR < MAX ERR: num rows, cols: ',nrows4,ncols4)
outlog.write('*************************\n')
outlog.write('New df, IERR < MAX ERR: num rows %d, cols %d\n' % (nrows4,ncols4));
In [13]:
%%time
# 37 Columns
# Col 1 = junk str; Col 2-5 RA/Dec and errs; Col 6 nEpochs; Col 7 Ar_val
# nobs, mMed, mMean, mErr, mRMS, mChi2 for
# Col 8 - 13 = u, Col 14 - 19 = g, Col 20 - 25 = r, Col 26 - 31 = i, Col 32 - 37 = z
Ocolnames = ['Ocalib_fla', 'Ora', 'Odec', 'OraRMS', 'OdecRMS', 'OnEpochs', 'OAR_val',
'Ou_Nobs', 'Ou_mMed', 'Ou_mMean', 'Ou_mErr', 'Ou_rms_scatt', 'Ou_chi2',
'Og_Nobs', 'Og_mMed', 'Og_mMean', 'Og_mErr', 'Og_rms_scatt', 'Og_chi2',
'Or_Nobs', 'Or_mMed', 'Or_mMean', 'Or_mErr', 'Or_rms_scatt', 'Or_chi2',
'Oi_Nobs', 'Oi_mMed', 'Oi_mMean', 'Oi_mErr', 'Oi_rms_scatt', 'Oi_chi2',
'Oz_Nobs', 'Oz_mMed', 'Oz_mMean', 'Oz_mErr', 'Oz_rms_scatt', 'Oz_chi2']
# USING DASK DATAFRAME
df_oSSC = dd.read_csv(oSSC,delim_whitespace=True,comment='#',names=Ocolnames)
old_df = df_oSSC.compute()
# old_df.head()
orows,ocols = old_df.shape
print('Old df, as read: num rows, cols: ',orows,ocols)
outlog.write('*************************\n')
outlog.write('Old df, as read: num rows %d, cols %d\n' % (orows,ocols));
Out[13]:
In [14]:
new_prematch_df = new_df4
old_prematch_df = old_df
prematch_orows,prematch_ocols = old_prematch_df.shape
print('Old df, prematch: num rows, cols: ',prematch_orows,prematch_ocols)
outlog.write('*************************\n')
outlog.write('Old df, prematch: num rows %d, cols %d\n' % (prematch_orows,prematch_ocols));
prematch_nrows,prematch_ncols = new_prematch_df.shape
print('New df, prematch: num rows, cols: ',prematch_nrows,prematch_ncols)
outlog.write('*************************\n')
outlog.write('New df, prematch: num rows %d, cols %d\n' % (prematch_nrows,prematch_ncols));
In [15]:
%%time
paths = nSSC_prematch
prematch_csv = new_prematch_df.to_csv(paths,na_rep=-99.99,index=False)
print('Prematch csv: ',nSSC_prematch)
outlog.write('*************************\n')
outlog.write('Prematch csv %s\n' % nSSC_prematch);
Out[15]:
In [16]:
%%time
idxs1, idxs2, ds = pyspherematch.spherematch(
np.array(new_prematch_df['Nra']),
np.array(new_prematch_df['Ndec']),
np.array(old_prematch_df['Ora']),
np.array(old_prematch_df['Odec']),
tol=tol_deg)
nmatch = len(idxs1)
print('Num matched new - old:', nmatch)
outlog.write('*************************\n')
outlog.write('Num matched new - old: %d\n' % nmatch)
Out[16]:
In [17]:
new_matched_df = new_prematch_df.iloc[idxs1]
old_matched_df = old_prematch_df.iloc[idxs2]
matched_orows,matched_ocols = old_matched_df.shape
print('Old df, as matched: num rows, cols: ',matched_orows,matched_ocols)
outlog.write('*************************\n')
outlog.write('Old df, as matched: num rows %d, cols %d\n' % (matched_orows,matched_ocols));
matched_nrows,matched_ncols = new_matched_df.shape
print('New df, as matched: num rows, cols: ',matched_nrows,matched_ncols)
outlog.write('*************************\n')
outlog.write('New df, as matched: num rows %d, cols %d\n' % (matched_nrows,matched_ncols));
In [18]:
%%time
paths = nSSC_matched
nmatched_csv = new_matched_df.to_csv(paths,na_rep=-99.99,index=False)
print('New Matched csv: ',nSSC_matched)
outlog.write('*************************\n')
outlog.write('New Matched csv %s\n' % nSSC_matched);
paths = oSSC_matched
omatched_csv = old_matched_df.to_csv(paths,na_rep=-99.99,index=False)
print('Old Matched csv: ',oSSC_matched)
outlog.write('*************************\n')
outlog.write('Old Matched csv %s\n' % oSSC_matched);
Out[18]:
In [19]:
# CLOSE THE LOG FILE
outlog.close()
In [ ]: