In [1]:
import splat
import wisps
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from os.path import expanduser
homedir = expanduser("~")
from astropy.io import ascii
%matplotlib inline
from scipy import stats
import ast
In [2]:
def strip_diction(s):
#from string to dictionary
return pd.Series(ast.literal_eval(((s.strip().replace('nan', "'nan'").replace('-inf', "'nan'")).replace('inf', "'nan'"))))
def reformat_index_table(df):
#assign dictionary keys
ids_list=[]
snr_list=[]
for k in df.columns:
first=df[k].iloc[0]
print (k)
if isinstance(first, str):
if first.strip().startswith("{"):
if not k=='indices':
snr_list.append(df[k].apply(lambda x: strip_diction(x)))
else:
ids_list.append(df[k].apply(lambda x: strip_diction(x)).applymap(tuple))
return snr_list, ids_list
def replace(x):
#print (x)
if (isinstance(x, float)):
if (np.isnan(x)):
return tuple([np.nan, np.nan])
else:
return x
def comp_f_test(tupl):
num=tupl[0]
den=tupl[1]
if den ==0: return np.nan
else:
x=num/den
return stats.f.cdf(x, 2, 1, 0, scale=1)
In [3]:
#get photometry catalogs
w_phot=pd.read_csv(wisps.OUTPUT_FILES+'/wisp_photometry.csv')
hst_phot=pd.read_csv(wisps.OUTPUT_FILES+'/hst3d_photometry_all.csv')
In [4]:
hst_phot.columns
Out[4]:
In [6]:
(hst_phot[['grism_id','ra_x', 'dec_x','field_x', 'phot_id_x', 'jh_mag', 'npoint','pointings', 'number', ]]).sample(n=100).head(10)
Out[6]:
In [7]:
(w_phot[['RA_DEC_NAME', 'NUMBER', 'FIELD', 'grism_id']]).sample(n=100).head(10)
Out[7]:
In [ ]:
In [9]:
#use the indices
hst_3d_indices=pd.read_pickle('/users/caganze/new_indices_measurements_october.pkl')
wisp_indices=pd.read_pickle('/users/caganze/new_wisps_indices_measurements_october.pkl')
In [18]:
#drop nan columns
hst_3d_indices=(hst_3d_indices[~ hst_3d_indices.isnull().all(1)])
In [20]:
#combine both
new_indices=pd.concat([wisp_indices,hst_3d_indices ])
In [21]:
new_indices=new_indices[~ new_indices.isnull().all(1)]
In [25]:
new_indices=new_indices.reset_index(drop=True)
In [26]:
new_indices.columns
Out[26]:
In [28]:
#cols=list(wisps.INDEX_NAMES)
In [29]:
def combined_wisp_hst_catalogs(hst3d_phot,wisp_phot, indices):
"""
combine both hst-3d and wisps into one big file with all the information
"""
#hst_3d does not have 110 photometry
hst3d_phot['F110_mag']=np.nan
hst3d_phot['F110_mag_er']=np.nan
#combine flags into one flag
flgs=hst3d_phot[['use_phot_x', 'f_cover', 'f_flagged', 'f_negative']].values
hst3d_phot['phot_flags']= pd.Series([i for i in flgs])
hst3d_phot['survey']='HST3D'
wisp_phot['survey']='WISP'
wisp_phot=wisp_phot.rename(columns={'EXTRACTION_FLAG':'phot_flags'})
#rename some columns
indices=indices.rename(columns={'name':'grism_id'})
##drop .ascii from hst_phot
indices['grism_id']=indices['grism_id']
#combined_photometry (the order matters: HST3D+WISPP
comb_phot=pd.DataFrame()
grism_ids=hst3d_phot['grism_id'].append(wisp_phot['grism_id'])
star_flags=hst3d_phot['star_flag'].append(wisp_phot['star_flag'])
comb_phot['grism_id']=grism_ids
comb_phot['star_flag']=star_flags
comb_phot['class_star']=hst3d_phot['class_star'].append(wisp_phot['star_flag'])
print(comb_phot.columns)
for flt in ['110', '140', '160']:
mag_tuple1=hst3d_phot[['F'+flt+'_mag', 'F'+flt+'_mag_er']].apply(tuple, axis=1)
mag_tuple2=wisp_phot[['F'+flt+'W', 'F'+flt+'W_ER']].apply(tuple, axis=1)
mags=mag_tuple1.append(mag_tuple2)
comb_phot['F'+flt]=mags
ras=hst3d_phot['ra_x'].append(wisp_phot['RA'])
decs=hst3d_phot['dec_x'].append(wisp_phot['DEC'])
comb_phot['RA']=ras
comb_phot['DEC']=decs
comb_phot['survey']=hst3d_phot['survey'].append(wisp_phot['survey'])
#comb_phot['flags']=hst3d_phot['flags'].append(wisp_phot['flags'])
#strip white spaces from grism_ids #the combination might pose problems
comb_phot['grism_id']=comb_phot['grism_id']
indices['grism_id']=indices['grism_id']
indices=indices.drop_duplicates(subset='grism_id')
comb_phot=comb_phot.drop_duplicates(subset='grism_id')
comb_phot.grism_id=comb_phot.grism_id.apply(lambda x: x.replace('.1D.ascii', ' ').strip().lower())
indices.grism_id=indices.grism_id.apply(lambda x: x.replace('.1D.ascii', ' ').strip().lower())
print (comb_phot.grism_id.iloc[0], indices.grism_id.iloc[0])
master_table=pd.merge(indices, comb_phot, on='grism_id', validate='one_to_one')
# I probably lost tons of objects with grism id ='0000'
print (master_table.shape, comb_phot.shape, indices.shape)
master_table[['spex_chi', 'line_chi']]=master_table[['spex_chi', 'line_chi']].applymap(np.float)
master_table['x']=master_table.spex_chi/master_table.line_chi
master_table['f_test']=master_table.f_test
#save the result
#master_table=master_table.join(df)
#drop the spectrum column because it makes the file heavier
#master_table=master_table.drop(columns='spectra')
#make the cut
return master_table
In [36]:
w_phot.columns
Out[36]:
In [61]:
(w_phot[w_phot.grism_id.isin(['Par32-00075', 'Par58-00112', 'Par130-00092'])])[['NIMCOS_110W', 'NIMCOS_110W_ER',
'NIMCOS_140W', 'NIMCOS_140W_ER', 'NIMCOS_160W', 'NIMCOS_160W_ER',
'NUMBER', 'RA_DEC_NAME']]
Out[61]:
In [30]:
hst_phot.shape, w_phot.shape
Out[30]:
In [39]:
mdf=combined_wisp_hst_catalogs(hst_phot, w_phot, new_indices)
In [40]:
mdf.shape
Out[40]:
In [41]:
#get all the observation info for each field
obs=pd.read_csv(wisps.OUTPUT_FILES+'/observation_log.csv')
obs=obs.drop(columns=['Unnamed: 0']).drop_duplicates(subset='POINTING').reindex()
In [42]:
obs.columns=[x.lower() for x in obs.columns]
In [43]:
obs.columns
Out[43]:
In [44]:
(obs[obs.pointing=='par1'])['exposure (s)'].values[0]
Out[44]:
In [45]:
def get_proper_pointing(grism_id):
grism_id=grism_id.lower()
if grism_id.startswith('par'):
return grism_id.split('-')[0]
else:
return grism_id.split('-g141')[0]
def get_time(pntg):
try:
g=(obs[obs.pointing==pntg])[['exposure (s)', 'observation date (ut)']]
return g.values[0]
except:
return (np.nan, np.nan)
def add_pointing_information(row):
pntg=get_proper_pointing(row.grism_id)
obst, obsdate = get_time(pntg)
s3 = pd.Series({'pointing':pntg, 'exposure_time': obst, 'observation_date':obsdate})
row=row.append(s3)
return row
In [46]:
obs.columns=obs.columns.str.lower()
In [47]:
mt=mdf.reset_index(drop=True).apply(add_pointing_information, axis=1)
In [48]:
mt.exposure_time.values, mt.shape
Out[48]:
In [67]:
mt.to_hdf(wisps.COMBINED_PHOTO_SPECTRO_FILE, key='all_phot_spec_data')
In [50]:
obs.columns
Out[50]:
In [51]:
stars=mt[mt.class_star !=0.]
In [52]:
stars.shape
Out[52]:
In [53]:
stars.to_hdf(wisps.COMBINED_PHOTO_SPECTRO_FILE, key='stars')
In [54]:
st=wisps.Annotator.reformat_table(mt)
In [64]:
snrs=(st[st.F140.between(15.0, 26.)]).snr1.values
mags_F140=(st[st.F140.between(15.0, 26.)]).F140.values
mags_F110=(st[st.F140.between(15.0, 26.)]).F110.values
mags_F160=(st[st.F140.between(15.0, 26.)]).F160.values
xs=(st[st.F140.between(15.0, 26.)]).class_star
In [56]:
idx=np.logical_and(snrs<200., snrs>0.1)
In [57]:
import seaborn as sns
In [58]:
cmap=sns.diverging_palette(150, 275, s=80, l=55, n=9, as_cmap=True)
In [59]:
fig, ax=plt.subplots(figsize=(8, 6))
c=plt.scatter(np.log10(snrs[idx]), mags_140[idx], marker='+', alpha=0.3, c=xs[idx], cmap=cmap)
plt.xlabel('Log SNR', fontsize=18)
plt.ylabel('F140W', fontsize=18)
#plt.axvline(np.log10(3.), c='k')
plt.minorticks_on()
cbar=plt.colorbar()
plt.tight_layout()
cbar.ax.set_title('CLASS STAR FLAG', fontsize=18)
Out[59]:
In [65]:
fig, ax=plt.subplots(figsize=(8, 6))
c=plt.scatter(np.log10(snrs[idx]), mags_F110[idx], marker='+', alpha=0.3, c=xs[idx], cmap=cmap)
plt.xlabel('Log SNR', fontsize=18)
plt.ylabel('F110W', fontsize=18)
#plt.axvline(np.log10(3.), c='k')
plt.minorticks_on()
cbar=plt.colorbar()
plt.tight_layout()
cbar.ax.set_title('CLASS STAR FLAG', fontsize=18)
Out[65]:
In [66]:
fig, ax=plt.subplots(figsize=(8, 6))
c=plt.scatter(np.log10(snrs[idx]), mags_F160[idx], marker='+', alpha=0.3, c=xs[idx], cmap=cmap)
plt.xlabel('Log SNR', fontsize=18)
plt.ylabel('F110W', fontsize=18)
#plt.axvline(np.log10(3.), c='k')
plt.minorticks_on()
cbar=plt.colorbar()
plt.tight_layout()
cbar.ax.set_title('CLASS STAR FLAG', fontsize=18)
Out[66]:
In [ ]: