Libraries


In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

Dataset


In [2]:
my_data = np.loadtxt('/home/mldantas/Google Drive/Doutorado/GAMAII/Catalogue/Match06_small_mags.csv', 
                     delimiter=',', dtype=str)

In [3]:
my_dictionary = {}
for i in range(len(my_data[0, :])):                                         # Converting numpy array into dictionary
    my_dictionary[my_data[0, i]] = np.array(my_data[0 + 1:, i], dtype=str)

In [4]:
redshift = my_dictionary['Z'].astype(float)
fuv_band = my_dictionary['MAG_AB_FUV'].astype(float)   
nuv_band = my_dictionary['MAG_AB_NUV'].astype(float)
r_band   = my_dictionary['MAG_AB_R'].astype(float)

In [5]:
print redshift[[fuv_band>0]].shape
print fuv_band[[fuv_band>0]].shape


(71783,)
(71783,)

Cleaning the catalogue

We need to remove all the spurious data here.


In [17]:
indexes = np.arange(redshift.size)
index_clean = indexes[(redshift>0.015)*(r_band>0)*(nuv_band>0)*(fuv_band>0)*((fuv_band-nuv_band)<50)]

In [18]:
print redshift[index_clean].size


70640

In [19]:
my_clean_data = my_data[index_clean].astype(str)

In [21]:
print my_clean_data.shape
print my_clean_data[0,:]  #checking if the header is ok!


(70640, 161)
['CATAID' 'OBJID_SDSSDR7' 'ALPHA_J2000' 'DELTA_J2000' 'KRON_RADIUS'
 'PETRO_RADIUS' 'MAG_PETRO_u' 'MAGERR_PETRO_u' 'FLAGS_u' 'MAG_PETRO_g'
 'MAGERR_PETRO_g' 'FLAGS_g' 'MAG_PETRO_r' 'MAGERR_PETRO_r' 'FLAGS_r'
 'MAG_PETRO_i' 'MAGERR_PETRO_i' 'FLAGS_i' 'MAG_PETRO_z' 'MAGERR_PETRO_z'
 'FLAGS_z' 'MAG_PETRO_X' 'MAGERR_PETRO_X' 'FLAGS_X' 'MAG_PETRO_Y'
 'MAGERR_PETRO_Y' 'FLAGS_Y' 'MAG_PETRO_J' 'MAGERR_PETRO_J' 'FLAGS_J'
 'MAG_PETRO_H' 'MAGERR_PETRO_H' 'FLAGS_H' 'MAG_PETRO_K' 'MAGERR_PETRO_K'
 'FLAGS_K' 'FLAGS' 'PSFMAG_R' 'FIBERMAG_R' 'FLAGS_R_SDSS' 'PETRORAD_R'
 'PETROR90_R' 'PETROR50_R' 'PETROMAG_R' 'MODELMAG_U' 'MODELMAG_G'
 'MODELMAG_R' 'MODELMAG_I' 'MODELMAG_Z' 'STATUS' 'BEST_MAG_NUV'
 'BEST_MAGERR_NUV' 'BEST_MAG_FUV' 'BEST_MAGERR_FUV' 'BEST_METHOD'
 'NMATCHUV' 'NMATCHOPT' 'NUVFLAG' 'FUVFLAG' 'NN_DIST' 'NN_NMATCH4'
 'NN_MANY2ONE' 'NN_SFLAGS_NUV' 'NN_SFLAGS_FUV' 'KCORR_FUV' 'KCORR_NUV'
 'KCORR_U' 'KCORR_G' 'KCORR_R' 'KCORR_I' 'KCORR_Z' 'KCORR_Y' 'KCORR_J'
 'KCORR_H' 'KCORR_K' 'CHI2' 'MASS' 'INTSFH' 'METS' 'B300' 'B1000'
 'SURVEY_CODE' 'SURVEY_CLASS' 'nbands' 'S2N' 'PPP' 'logmstar' 'dellogmstar'
 'logmoverl_i' 'dellogmoverl_i' 'logage' 'dellogage' 'logtau' 'dellogtau'
 'logmintsfh' 'dellogmintsfh' 'logmremnants' 'dellogmremnants' 'metal'
 'delmetal' 'extBV' 'delextBV' 'logLWage' 'dellogLWage' 'gminusi'
 'delgminusi' 'uminusr' 'deluminusr' 'gminusi_stars' 'uminusr_stars'
 'C_logM_ur' 'C_logM_gi' 'C_logM_eBV' 'SPECID' 'SURVEY_CODE_EMLINES' 'SN'
 'D4000N' 'D4000N_ERR' 'HB_FLUX' 'HB_FLUX_ERR' 'HB_EW' 'HB_EW_ERR'
 'OIIIR_FLUX' 'OIIIR_FLUX_ERR' 'OIIIR_EW' 'OIIIR_EW_ERR' 'HA_FLUX'
 'HA_FLUX_ERR' 'HA_EW' 'HA_EW_ERR' 'NIIR_FLUX' 'NIIR_FLUX_ERR' 'NIIR_EW'
 'NIIR_EW_ERR' 'SURVEY' 'Z' 'NQ' 'PROB' 'MAG_AB_FUV' 'MAG_AB_NUV'
 'MAG_AB_U' 'MAG_AB_G' 'MAG_AB_R' 'MAG_AB_I' 'MAG_AB_Z' 'MAG_AB_Y'
 'MAG_AB_J' 'MAG_AB_H' 'MAG_AB_K' 'MAG_ABSOLUTE_FUV' 'MAG_ABSOLUTE_NUV'
 'MAG_ABSOLUTE_U' 'MAG_ABSOLUTE_G' 'MAG_ABSOLUTE_R' 'MAG_ABSOLUTE_I'
 'MAG_ABSOLUTE_Z' 'MAG_ABSOLUTE_Y' 'MAG_ABSOLUTE_J' 'MAG_ABSOLUTE_H'
 'MAG_ABSOLUTE_K' 'UV_CLASS_YI2011']

In [22]:
my_df = pd.DataFrame(my_clean_data)

In [23]:
my_df.to_csv('../Catalogue/Match06_small_mags_clean.csv', sep=',', header=None, index=False)

In [ ]: