In [1]:
import pandas as pd

In [2]:
sdss = pd.read_csv("../alasdair/data/sdss_dr7_photometry_source.csv.gz", compression="gzip")

In [3]:
sdss.head(4).transpose()


Out[3]:
0 1 2 3
ra 189.43 189.454 189.469 196.237
dec -0.131042 -0.0973128 -0.0359997 0.412347
class Star Star Star Galaxy
subclass A0 F5 F5 null
redshift 0.000648475 5.90614e-07 0.000579112 0.617936
redshiftErr 7.36059e-06 9.35495e-06 1.1806e-05 0.000131875
zWarning 0 0 0 0
psfMag_u 17.8481 17.6663 17.2815 24.6692
psfMagErr_u 0.0170338 0.0165735 0.0158687 0.841852
psfMag_g 16.6671 16.6459 16.2146 23.0541
psfMagErr_g 0.0144116 0.0143999 0.0143403 0.206945
psfMag_r 16.8553 16.2893 15.7688 21.4753
psfMagErr_r 0.0174833 0.0170755 0.0170243 0.117695
psfMag_i 17.0429 16.173 15.5095 20.4132
psfMagErr_i 0.0145105 0.0116746 0.0116027 0.165401
psfMag_z 17.1667 16.1347 15.4771 20.0413
psfMagErr_z 0.0246265 0.0176829 0.0171779 0.246179
petroMag_u 17.8809 17.7051 17.3333 23.9766
petroMagErr_u 0.0111319 0.00992255 0.00803849 2.12046
petroMag_g 16.6631 16.6762 16.253 23.0009
petroMagErr_g 0.00316584 0.00286717 0.00243847 0.672942
petroMag_r 16.8756 16.3333 15.8059 20.8641
petroMagErr_r 0.00861086 0.00274989 0.00226355 0.250036
petroMag_i 17.0827 16.216 15.5625 19.8014
petroMagErr_i 0.0201462 0.00314024 0.00228773 0.379935
petroMag_z 17.1984 16.1933 15.5178 19.405
petroMagErr_z 0.0425377 0.00919845 0.00564683 0.474357
extinction_u 0.119657 0.115112 0.118604 0.102667
extinction_g 0.0880428 0.0846981 0.0872679 0.0755416
extinction_r 0.0638559 0.0614302 0.063294 0.0547891
extinction_i 0.04842 0.0465806 0.0479939 0.0415449
extinction_z 0.0343304 0.0330262 0.0340283 0.0294559
petroRad_r 1.287 1.26579 1.26503 2.01832
petroRadErr_r 0.0195374 0.017794 0.0178495 0.390549

In [4]:
def filter_raw(all_data, mag_type):
    """(g-r) > 1.5(r-i) + 0.3"""
    gr = all_data[mag_type+'Mag_g'] - all_data[mag_type+'Mag_r']
    ri = all_data[mag_type+'Mag_r'] - all_data[mag_type+'Mag_i']
    to_keep = gr-1.5*ri-0.3 > 0
    return all_data[to_keep]

def filter_norm(all_data, mag_type):
    """(g-r) > 1.5(r-i) + 0.3"""
    to_keep = all_data[mag_type+'_g_r_w14'] - 1.5*all_data[mag_type+'_r_i_w14'] - 0.3 > 0
    return all_data[to_keep]

def filter_galaxy(all_data):
    """return only galaxies"""
    return all_data[all_data['class'] == 'Galaxy']

In [5]:
julie_psf = filter_raw(filter_galaxy(sdss), 'psf')
julie_petro = filter_raw(filter_galaxy(sdss), 'petro')
print(sdss.shape, julie_psf.shape, julie_petro.shape)


(2801002, 34) (811096, 34) (813824, 34)

In [6]:
norm_sdss = pd.read_hdf('../alasdair/data/sdss.h5')
norm_sdss.head(4).transpose()


Out[6]:
0 1 2 3
ra 189.43 189.454 189.469 196.237
dec -0.131042 -0.0973128 -0.0359997 0.412347
class Star Star Star Galaxy
psfMag_r_w14 -1.6622 -2.01257 -2.33694 1.21261
psf_u_g_w14 -0.241727 -0.43927 -0.382566 0.299037
psf_g_r_w14 -1.82226 -0.909222 -0.761093 1.1396
psf_r_i_w14 -1.68724 -0.912979 -0.550549 1.49573
psf_i_z_w14 -1.71165 -1.08084 -1.10478 0.218688
petroMag_r_w14 -1.11639 -1.42294 -1.72309 1.15091
petro_u_g_w14 -0.0706262 -0.203637 -0.167706 -0.239479
petro_g_r_w14 -1.59351 -0.830489 -0.688523 1.63325
petro_r_i_w14 -1.56011 -0.827023 -0.5433 1.30943
petro_i_z_w14 -0.898704 -0.549991 -0.495537 0.39127
petroRad_r -0.753122 -0.761758 -0.762067 -0.455298

In [7]:
norm_julie_psf = filter_norm(filter_galaxy(norm_sdss), 'psf')
norm_julie_petro = filter_norm(filter_galaxy(norm_sdss), 'petro')

print(norm_sdss.shape, norm_julie_psf.shape, norm_julie_petro.shape)


(2801002, 14) (635708, 14) (509501, 14)

In [ ]: