notebook.community

Edit and run



In [1]:

    
import splat
import wisps
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from os.path import expanduser
homedir = expanduser("~")
from astropy.io import ascii
%matplotlib inline
from scipy import stats
import ast



In [2]:

    
def strip_diction(s):
        #from string to dictionary
        return pd.Series(ast.literal_eval(((s.strip().replace('nan', "'nan'").replace('-inf', "'nan'")).replace('inf', "'nan'"))))
    
def reformat_index_table(df):
    #assign dictionary keys
    ids_list=[]
    snr_list=[]
    for k in df.columns:
        first=df[k].iloc[0]
        print (k)
        if isinstance(first, str):
            if first.strip().startswith("{"):
                if not k=='indices':
                    snr_list.append(df[k].apply(lambda x: strip_diction(x)))
                else:
                    ids_list.append(df[k].apply(lambda x: strip_diction(x)).applymap(tuple))
      
    
    return snr_list, ids_list

def replace(x):
    #print (x)
    if (isinstance(x, float)):
        if (np.isnan(x)):
            return tuple([np.nan, np.nan])
    else:
        return x
    
def comp_f_test(tupl):
    num=tupl[0]
    den=tupl[1]
    if den ==0: return np.nan
    else:
        x=num/den
        return stats.f.cdf(x, 2, 1, 0, scale=1)



In [3]:

    
#get photometry catalogs
w_phot=pd.read_csv(wisps.OUTPUT_FILES+'/wisp_photometry.csv')
hst_phot=pd.read_csv(wisps.OUTPUT_FILES+'/hst3d_photometry_all.csv')



In [4]:

    
hst_phot.columns









    Out[4]:





Index(['Unnamed: 0', 'phot_id_x', 'ra_x', 'dec_x', 'x_x', 'y_x', 'grism_id',
       'ifield', 'field_x', 'pointings',
       ...
       'y_y', 'z_spec_y', 'F160_mag', 'Faper160_mag', 'F160_mag_er',
       'Faper160_mag_er', 'F140_mag', 'Faper140_mag', 'F140_mag_er',
       'Faper140_mag_er'],
      dtype='object', length=435)



In [6]:

    
(hst_phot[['grism_id','ra_x', 'dec_x','field_x', 'phot_id_x', 'jh_mag', 'npoint','pointings', 'number', ]]).sample(n=100).head(10)









    Out[6]:







  
    
      
      grism_id
      ra_x
      dec_x
      field_x
      phot_id_x
      jh_mag
      npoint
      pointings
      number
    
  
  
    
      47098
      goodss-21-G141_07329
      53.165222
      -27.874525
      goodss
      7329
      23.9714
      1
      21
      7329
    
    
      23613
      cosmos-09-G141_16988
      150.195282
      2.352195
      cosmos
      16988
      25.4664
      1
      09
      16988
    
    
      4162
      aegis-07-G141_12964
      215.010742
      52.914192
      aegis
      12964
      25.3692
      2
      07,08
      12964
    
    
      57977
      goodss-11-G141_39715
      53.154591
      -27.725349
      goodss
      39715
      24.2746
      2
      11,33
      39715
    
    
      3317
      aegis-02-G141_10542
      214.908615
      52.831768
      aegis
      10542
      23.9244
      1
      02
      10542
    
    
      39125
      goodsn-36-G141_22396
      189.305695
      62.252220
      goodsn
      22396
      25.1177
      1
      36
      22396
    
    
      65199
      uds-03-G141_12237
      34.480518
      -5.235213
      uds
      12237
      24.7177
      2
      02,03
      12237
    
    
      20001
      cosmos-26-G141_11211
      150.057220
      2.293641
      cosmos
      11211
      23.7014
      1
      26
      11211
    
    
      21442
      cosmos-20-G141_13383
      150.155579
      2.315413
      cosmos
      13383
      25.3157
      1
      20
      13383
    
    
      8732
      aegis-08-G141_25679
      214.978973
      52.942734
      aegis
      25679
      25.0299
      1
      08
      25679



In [7]:

    
(w_phot[['RA_DEC_NAME', 'NUMBER', 'FIELD', 'grism_id']]).sample(n=100).head(10)









    Out[7]:







  
    
      
      RA_DEC_NAME
      NUMBER
      FIELD
      grism_id
    
  
  
    
      10192
      20_212.414169_26.35299
      609
      Par20
      Par20-00609
    
    
      137843
      358_357.770721_-43.393
      106
      Par358
      Par358-00106
    
    
      74990
      198_15.021750_-51.1405
      20
      Par198
      Par198-00020
    
    
      67635
      170_151.751099_-29.886
      99
      Par170
      Par170-00099
    
    
      14989
      32_196.347931_-25.6431
      285
      Par32
      Par32-00285
    
    
      126832
      337_165.576996_5.33420
      554
      Par337
      Par337-00554
    
    
      58809
      141_223.586304_16.1812
      278
      Par141
      Par141-00278
    
    
      104992
      299_138.669220_47.9024
      319
      Par299
      Par299-00319
    
    
      9319
      19_38.723774_-4.121420
      288
      Par19
      Par19-00288
    
    
      46443
      105_230.063873_-24.999
      226
      Par105
      Par105-00226



In [ ]:



In [9]:

    
#use the indices
hst_3d_indices=pd.read_pickle('/users/caganze/new_indices_measurements_october.pkl')
wisp_indices=pd.read_pickle('/users/caganze/new_wisps_indices_measurements_october.pkl')



In [18]:

    
#drop nan columns
hst_3d_indices=(hst_3d_indices[~ hst_3d_indices.isnull().all(1)])



In [20]:

    
#combine both
new_indices=pd.concat([wisp_indices,hst_3d_indices ])



In [21]:

    
new_indices=new_indices[~ new_indices.isnull().all(1)]



In [25]:

    
new_indices=new_indices.reset_index(drop=True)



In [26]:

    
new_indices.columns









    Out[26]:





Index(['CH_4/H-Cont', 'CH_4/H_2O-1', 'CH_4/H_2O-2', 'CH_4/J-Cont',
       'H-cont/H_2O-1', 'H-cont/H_2O-2', 'H-cont/J-Cont', 'H_2O-1/J-Cont',
       'H_2O-2/H_2O-1', 'H_2O-2/J-Cont', 'cdf_snr', 'f_test', 'line_chi',
       'name', 'snr1', 'snr2', 'snr3', 'snr4', 'spex_chi', 'spt'],
      dtype='object')



In [28]:

    
#cols=list(wisps.INDEX_NAMES)



In [29]:

    
def combined_wisp_hst_catalogs(hst3d_phot,wisp_phot, indices):
    """
    combine both hst-3d and wisps into one big file with all the information
    """
    #hst_3d does not have 110 photometry
    hst3d_phot['F110_mag']=np.nan
    hst3d_phot['F110_mag_er']=np.nan

    
    #combine flags into one flag
    flgs=hst3d_phot[['use_phot_x', 'f_cover', 'f_flagged', 'f_negative']].values
    hst3d_phot['phot_flags']= pd.Series([i for i in flgs])
    
    hst3d_phot['survey']='HST3D'
    wisp_phot['survey']='WISP'
    wisp_phot=wisp_phot.rename(columns={'EXTRACTION_FLAG':'phot_flags'})
   
    #rename some columns
    indices=indices.rename(columns={'name':'grism_id'})
    
    ##drop .ascii from hst_phot
    indices['grism_id']=indices['grism_id']
    
    #combined_photometry (the order matters: HST3D+WISPP
    comb_phot=pd.DataFrame()
    grism_ids=hst3d_phot['grism_id'].append(wisp_phot['grism_id'])
    star_flags=hst3d_phot['star_flag'].append(wisp_phot['star_flag'])
    comb_phot['grism_id']=grism_ids
    comb_phot['star_flag']=star_flags
    comb_phot['class_star']=hst3d_phot['class_star'].append(wisp_phot['star_flag'])
    
    print(comb_phot.columns)
    for flt in ['110', '140', '160']:
        mag_tuple1=hst3d_phot[['F'+flt+'_mag', 'F'+flt+'_mag_er']].apply(tuple, axis=1)
        mag_tuple2=wisp_phot[['F'+flt+'W', 'F'+flt+'W_ER']].apply(tuple, axis=1)
        mags=mag_tuple1.append(mag_tuple2)
        comb_phot['F'+flt]=mags
        
    ras=hst3d_phot['ra_x'].append(wisp_phot['RA'])
    decs=hst3d_phot['dec_x'].append(wisp_phot['DEC'])
        
    comb_phot['RA']=ras
    comb_phot['DEC']=decs
    comb_phot['survey']=hst3d_phot['survey'].append(wisp_phot['survey'])
    #comb_phot['flags']=hst3d_phot['flags'].append(wisp_phot['flags'])
    
    #strip white spaces from grism_ids #the combination might pose problems
    comb_phot['grism_id']=comb_phot['grism_id']
    indices['grism_id']=indices['grism_id']
    
    indices=indices.drop_duplicates(subset='grism_id')
    comb_phot=comb_phot.drop_duplicates(subset='grism_id')
    
    comb_phot.grism_id=comb_phot.grism_id.apply(lambda x: x.replace('.1D.ascii', ' ').strip().lower())
    indices.grism_id=indices.grism_id.apply(lambda x: x.replace('.1D.ascii', ' ').strip().lower())
    
    print (comb_phot.grism_id.iloc[0], indices.grism_id.iloc[0])
    master_table=pd.merge(indices, comb_phot, on='grism_id', validate='one_to_one')
    
    # I probably lost tons of objects with grism id ='0000'
    print (master_table.shape, comb_phot.shape, indices.shape)

    
    master_table[['spex_chi', 'line_chi']]=master_table[['spex_chi', 'line_chi']].applymap(np.float)
    master_table['x']=master_table.spex_chi/master_table.line_chi
    master_table['f_test']=master_table.f_test

    #save the result
    #master_table=master_table.join(df)

    #drop the spectrum column because it makes the file heavier
    #master_table=master_table.drop(columns='spectra')

    #make the cut 
    

    return master_table



In [36]:

    
w_phot.columns









    Out[36]:





Index(['Unnamed: 0', 'EXTRACTION_FLAG', 'NIMCOS_110W', 'NIMCOS_110W_ER',
       'NIMCOS_140W', 'NIMCOS_140W_ER', 'NIMCOS_160W', 'NIMCOS_160W_ER',
       'NUMBER', 'RA_DEC_NAME', 'index', 'star_flag', '2', '3', '4', '5', '6',
       '7', '8', '9', '10', '11', '12', '13', '14', 'FLAGS', 'RA', 'DEC',
       'FIELD', 'grism_id'],
      dtype='object')



In [61]:

    
(w_phot[w_phot.grism_id.isin(['Par32-00075', 'Par58-00112', 'Par130-00092'])])[['NIMCOS_110W', 'NIMCOS_110W_ER',
       'NIMCOS_140W', 'NIMCOS_140W_ER', 'NIMCOS_160W', 'NIMCOS_160W_ER',
       'NUMBER', 'RA_DEC_NAME']]









    Out[61]:







  
    
      
      NIMCOS_110W
      NIMCOS_110W_ER
      NIMCOS_140W
      NIMCOS_140W_ER
      NIMCOS_160W
      NIMCOS_160W_ER
      NUMBER
      RA_DEC_NAME
    
  
  
    
      14779
      23.12
      0.035
      23.045
      0.028
      22.653
      0.051
      75
      32_196.356232_-25.6413
    
    
      26643
      NaN
      NaN
      23.142
      0.045
      NaN
      NaN
      112
      58_188.176712_-0.55185
    
    
      54237
      NaN
      NaN
      NaN
      NaN
      22.726
      0.036
      92
      130_46.921608_-72.7326



In [30]:

    
hst_phot.shape, w_phot.shape









    Out[30]:





((79609, 435), (204078, 30))



In [39]:

    
mdf=combined_wisp_hst_catalogs(hst_phot, w_phot, new_indices)









    



Index(['grism_id', 'star_flag', 'class_star'], dtype='object')
aegis-26-g141_00002 par321-00514
(271915, 28) (279595, 9) (430098, 20)



In [40]:

    
mdf.shape









    Out[40]:





(271915, 29)



In [41]:

    
#get all the observation info for each field
obs=pd.read_csv(wisps.OUTPUT_FILES+'/observation_log.csv')
obs=obs.drop(columns=['Unnamed: 0']).drop_duplicates(subset='POINTING').reindex()



In [42]:

    
obs.columns=[x.lower() for x in obs.columns]



In [43]:

    
obs.columns









    Out[43]:





Index(['ra (deg)', 'dec(deg)', 'l (deg)', 'b (deg)', 'exposure (s)',
       'observation date (ut)', 'pointing'],
      dtype='object')



In [44]:

    
(obs[obs.pointing=='par1'])['exposure (s)'].values[0]









    Out[44]:





2609.0



In [45]:

    
def get_proper_pointing(grism_id):
    grism_id=grism_id.lower()
    if grism_id.startswith('par'):
        return grism_id.split('-')[0]
    else:
        return grism_id.split('-g141')[0]
    
def get_time(pntg):
    try:
        g=(obs[obs.pointing==pntg])[['exposure (s)', 'observation date (ut)']]
        return g.values[0]
    except:
        return (np.nan, np.nan)

def add_pointing_information(row):
    
    pntg=get_proper_pointing(row.grism_id)
    obst,   obsdate = get_time(pntg)
    s3 = pd.Series({'pointing':pntg, 'exposure_time': obst, 'observation_date':obsdate})
    row=row.append(s3)
    return row



In [46]:

    
obs.columns=obs.columns.str.lower()



In [47]:

    
mt=mdf.reset_index(drop=True).apply(add_pointing_information, axis=1)



In [48]:

    
mt.exposure_time.values, mt.shape









    Out[48]:





(array([4915., 4915., 4915., ..., 3812., 3812., 3812.]), (271915, 32))



In [67]:

    
mt.to_hdf(wisps.COMBINED_PHOTO_SPECTRO_FILE, key='all_phot_spec_data')



In [50]:

    
obs.columns









    Out[50]:





Index(['ra (deg)', 'dec(deg)', 'l (deg)', 'b (deg)', 'exposure (s)',
       'observation date (ut)', 'pointing'],
      dtype='object')



In [51]:

    
stars=mt[mt.class_star !=0.]



In [52]:

    
stars.shape









    Out[52]:





(110930, 32)



In [53]:

    
stars.to_hdf(wisps.COMBINED_PHOTO_SPECTRO_FILE, key='stars')



In [54]:

    
st=wisps.Annotator.reformat_table(mt)



In [64]:

    
snrs=(st[st.F140.between(15.0, 26.)]).snr1.values
mags_F140=(st[st.F140.between(15.0, 26.)]).F140.values
mags_F110=(st[st.F140.between(15.0, 26.)]).F110.values
mags_F160=(st[st.F140.between(15.0, 26.)]).F160.values
xs=(st[st.F140.between(15.0, 26.)]).class_star



In [56]:

    
idx=np.logical_and(snrs<200., snrs>0.1)



In [57]:

    
import seaborn as sns



In [58]:

    
cmap=sns.diverging_palette(150, 275, s=80, l=55, n=9, as_cmap=True)



In [59]:

    
fig, ax=plt.subplots(figsize=(8, 6))
c=plt.scatter(np.log10(snrs[idx]), mags_140[idx], marker='+', alpha=0.3, c=xs[idx], cmap=cmap)
plt.xlabel('Log SNR', fontsize=18)
plt.ylabel('F140W', fontsize=18)
#plt.axvline(np.log10(3.), c='k')
plt.minorticks_on()

cbar=plt.colorbar()
plt.tight_layout()
cbar.ax.set_title('CLASS STAR FLAG', fontsize=18)









    Out[59]:





Text(0.5, 1, 'CLASS STAR FLAG')



In [65]:

    
fig, ax=plt.subplots(figsize=(8, 6))
c=plt.scatter(np.log10(snrs[idx]), mags_F110[idx], marker='+', alpha=0.3, c=xs[idx], cmap=cmap)
plt.xlabel('Log SNR', fontsize=18)
plt.ylabel('F110W', fontsize=18)
#plt.axvline(np.log10(3.), c='k')
plt.minorticks_on()

cbar=plt.colorbar()
plt.tight_layout()
cbar.ax.set_title('CLASS STAR FLAG', fontsize=18)









    Out[65]:





Text(0.5, 1, 'CLASS STAR FLAG')



In [66]:

    
fig, ax=plt.subplots(figsize=(8, 6))
c=plt.scatter(np.log10(snrs[idx]), mags_F160[idx], marker='+', alpha=0.3, c=xs[idx], cmap=cmap)
plt.xlabel('Log SNR', fontsize=18)
plt.ylabel('F110W', fontsize=18)
#plt.axvline(np.log10(3.), c='k')
plt.minorticks_on()

cbar=plt.colorbar()
plt.tight_layout()
cbar.ax.set_title('CLASS STAR FLAG', fontsize=18)









    Out[66]:





Text(0.5, 1, 'CLASS STAR FLAG')



In [ ]:

	grism_id	ra_x	dec_x	field_x	phot_id_x	jh_mag	npoint	pointings	number
47098	goodss-21-G141_07329	53.165222	-27.874525	goodss	7329	23.9714	1	21	7329
23613	cosmos-09-G141_16988	150.195282	2.352195	cosmos	16988	25.4664	1	09	16988
4162	aegis-07-G141_12964	215.010742	52.914192	aegis	12964	25.3692	2	07,08	12964
57977	goodss-11-G141_39715	53.154591	-27.725349	goodss	39715	24.2746	2	11,33	39715
3317	aegis-02-G141_10542	214.908615	52.831768	aegis	10542	23.9244	1	02	10542
39125	goodsn-36-G141_22396	189.305695	62.252220	goodsn	22396	25.1177	1	36	22396
65199	uds-03-G141_12237	34.480518	-5.235213	uds	12237	24.7177	2	02,03	12237
20001	cosmos-26-G141_11211	150.057220	2.293641	cosmos	11211	23.7014	1	26	11211
21442	cosmos-20-G141_13383	150.155579	2.315413	cosmos	13383	25.3157	1	20	13383
8732	aegis-08-G141_25679	214.978973	52.942734	aegis	25679	25.0299	1	08	25679

	RA_DEC_NAME	NUMBER	FIELD	grism_id
10192	20_212.414169_26.35299	609	Par20	Par20-00609
137843	358_357.770721_-43.393	106	Par358	Par358-00106
74990	198_15.021750_-51.1405	20	Par198	Par198-00020
67635	170_151.751099_-29.886	99	Par170	Par170-00099
14989	32_196.347931_-25.6431	285	Par32	Par32-00285
126832	337_165.576996_5.33420	554	Par337	Par337-00554
58809	141_223.586304_16.1812	278	Par141	Par141-00278
104992	299_138.669220_47.9024	319	Par299	Par299-00319
9319	19_38.723774_-4.121420	288	Par19	Par19-00288
46443	105_230.063873_-24.999	226	Par105	Par105-00226

	NIMCOS_110W	NIMCOS_110W_ER	NIMCOS_140W	NIMCOS_140W_ER	NIMCOS_160W	NIMCOS_160W_ER	NUMBER	RA_DEC_NAME
14779	23.12	0.035	23.045	0.028	22.653	0.051	75	32_196.356232_-25.6413
26643	NaN	NaN	23.142	0.045	NaN	NaN	112	58_188.176712_-0.55185
54237	NaN	NaN	NaN	NaN	22.726	0.036	92	130_46.921608_-72.7326