This demonstrates all the steps in my candidate selection before conducting visual inspection
In [1]:
import numpy as np
import splat
import wisps.data_analysis as wispd
from wisps.data_analysis import selection_criteria as sel_crt
import wispshapes
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import wisps
import matplotlib as mpl
import random
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
stars=wisps.Annotator.reformat_table(pd.read_hdf(wisps.COMBINED_PHOTO_SPECTRO_FILE, key='stars'))
In [3]:
stars[wisps.INDEX_NAMES]=(stars[wisps.INDEX_NAMES].applymap(float)).applymap(np.log10)
In [4]:
cands=wisps.datasets['candidates']
In [5]:
#
In [6]:
plt.scatter(cands.snr1.apply(np.log10), cands.f_test.values,
c=cands.spt.apply(splat.typeToNum).values, cmap='viridis',label='cdf')
plt.xlabel("log snr")
plt.ylabel(" F-test values")
plt.legend()
Out[6]:
In [7]:
cands.f_test.max()
Out[7]:
In [8]:
dt=stars[(stars.f_test<0.4) & (stars.snr1>=3.)].reset_index(drop=True)
In [9]:
len(dt), len(stars[stars.f_test< 0.4])
Out[9]:
In [10]:
len(dt[dt.survey=='WISP']), len(dt[dt.survey !='WISP'])
Out[10]:
In [11]:
#(wisps.datasets['schneider'])[wisps.INDEX_NAMES]
In [12]:
gbhio=sel_crt.save_criteria(conts=dt)
In [13]:
crts=sel_crt.crts_from_file()
In [14]:
#crts
In [15]:
#for idx in crts.values(): idx.plot(save=True)
In [16]:
compl=pd.DataFrame([x.completeness for x in crts.values()], index=[x.name for x in crts.values()])
cont=pd.DataFrame([x.contamination for x in crts.values()], index=[x.name for x in crts.values()])
In [17]:
def return_best_index(spt_range):
return cont[cont[spt_range]<=cont[spt_range].min()+0.002][spt_range]
In [18]:
for x in cont.columns:
print (return_best_index(x), " \n")
In [19]:
to_use={'H_2O-1/J-Cont CH_4/H-Cont':'L0-L5',
'CH_4/H_2O-1 CH_4/H-Cont':'L5-T0' ,
'H_2O-2/J-Cont CH_4/J-Cont':'M7-L0',
'H_2O-2/H_2O-1 H-cont/H_2O-1': 'T0-T5',
'H_2O-2/J-Cont CH_4/H-Cont': 'T5-T9',
'H_2O-1/J-Cont CH_4/J-Cont': 'Y dwarfs'}
In [20]:
good_indices=[crts[k] for k in to_use.keys()]
In [21]:
[x.shape_name for x in good_indices[4].shapes]
Out[21]:
In [22]:
def box_parameters(idx, spt_range):
bs=idx.shapes
b=[x for x in bs if x.shape_name==spt_range][0]
print ('{} {} m: {} b: {} s:{}, comp : {}, cont: {}'.format(spt_range, idx, round(b.coeffs[0], 2), round(b.coeffs[1], 2), round(b.scatter, 2), round(idx.completeness[spt_range], 2), round(idx.contamination[spt_range], 3)))
In [23]:
#for idx, k in zip(good_indices, to_use.keys()):
# spt_range=to_use[k][0]
# box_parameters(idx, spt_range)
In [24]:
import matplotlib as mpl
mpl.rcParams['legend.fontsize'] = 'small'
In [25]:
#idx=good_indices[0]
In [26]:
#idx.contaminants[[idx.ykey, idx.ykey]].applymap(float).round(2)
In [27]:
#dft
In [28]:
spex_df=wisps.Annotator.reformat_table(wisps.datasets['spex']).reset_index(drop=True)
manj=wisps.Annotator.reformat_table(wisps.datasets['manjavacas']).reset_index(drop=True)
schn=wisps.Annotator.reformat_table(wisps.datasets['schneider']).reset_index(drop=True)
In [29]:
subdwarfs=wisps.Annotator.reformat_table(good_indices[0]._subdwarfs).reset_index(drop=True)
In [30]:
good_indices[0].shapes
Out[30]:
In [31]:
dft=dt.rename(columns={'grism_id':'Names'})
In [32]:
import itertools
In [33]:
cands=[]
boxes_used={}
for idx, k in zip(good_indices, to_use.keys()):
spt_range=to_use[k]
bs=idx.shapes
bx=[x for x in bs if x.shape_name==spt_range][0]
df_to_use=wisps.Annotator.reformat_table(dft[[idx.xkey, idx.ykey]])
selec_indx_array=bx.select(df_to_use).index
ls=dft.iloc[selec_indx_array].Names
boxes_used["{}".format(spt_range)]=[bx, ls.values]
cands.append(ls)
In [34]:
to_use
Out[34]:
In [35]:
merged = np.array(list(itertools.chain(*cands)))
In [36]:
final_cands=np.unique(merged.flatten())
In [37]:
dfn=dt[dt.grism_id.isin(np.unique(np.append(final_cands, ls)))]
In [38]:
dfn=dfn[dfn.spt.apply(splat.typeToNum)>=19]
In [39]:
dfn.to_pickle(wisps.OUTPUT_FILES+'/selected_by_indices.pkl')
In [40]:
dframe=wisps.datasets['candidates']
In [41]:
real_ls=dframe[dframe.spt.apply(splat.typeToNum)>19.]
In [42]:
slsctdddd=dft[dft.Names.isin(dframe.grism_id)]
In [43]:
#slsctdddd[slsctdddd.spt.apply(wisps.make_spt_number).between(20, 25)]
In [44]:
boxes_used.keys(), len( boxes_used.keys()), to_use.keys()
Out[44]:
In [45]:
dft.columns, dft.shape
Out[45]:
In [46]:
#fig, ax=plt.subplots(ncols=3, nrows=2, figsize=(12*1.5, 8*1.5))
for k in boxes_used.keys():
pass
#plt.minorticks_on()
#plt.tight_layout()
#plt.savefig(wisps.OUTPUT_FIGURES+'/index_index_plots.pdf', bbox_inches='tight', dpi=200)
In [47]:
import seaborn as sns
cmap=sns.light_palette((260, 75, 60), input="husl", as_cmap=True)
In [48]:
sel_crt.plot_cont_compl(cmap=cmap)
In [49]:
crt_vals=crts.values()
conts=pd.DataFrame([ x.contamination for x in crt_vals]).applymap( lambda x: round(x, 2))
compls=pd.DataFrame([ x.completeness for x in crt_vals]).applymap(lambda x: round(x, 2))
conts['index-space']=[x.name for x in crt_vals]
compls['index-space']=[x.name for x in crt_vals]
In [50]:
conts.to_latex(wisps.LIBRARIES+'/completeness.tex', index=False, na_rep=' ')
conts.to_latex(wisps.LIBRARIES+'/contamination.tex', index=False, na_rep=' ')
In [51]:
#save the indices used here
In [52]:
import pickle
#save the random forest
output_file=wisps.OUTPUT_FILES+'/best_indices_to_use.pkl'
with open(output_file, 'wb') as file:
pickle.dump(to_use,file)
In [53]:
df=wisps.datasets['candidates']
In [54]:
df.f_test.plot(kind='hist')
Out[54]:
In [55]:
spex_df1=wisps.Annotator.reformat_table(wisps.datasets['spex'])
cands_df2=wisps.Annotator.reformat_table(wisps.datasets['candidates'])
manj_df3=wisps.Annotator.reformat_table(wisps.datasets['manjavacas'])
ydwarf_df=manj_df3[manj_df3.spt.apply(wisps.make_spt_number)>38]
In [56]:
#spex_df1.spt
In [57]:
def plot_index_box(index_name, box_name, ax):
#get the index and the box
idx=crts[index_name]
bx=[x for x in idx.shapes if x.shape_name==box_name][0]
xkey=idx.xkey
ykey=idx.ykey
spex_slctd=bx.select((spex_df1[[xkey, ykey]]).applymap(float))
if box_name.lower().startswith('sub'):
subdwarfs_df=wisps.Annotator.reformat_table(idx._subdwarfs).reset_index(drop=True)
spex_slctd=bx.select((subdwarfs_df[[xkey, ykey]]).applymap(float))
if box_name.lower().startswith('y'):
spex_slctd=bx.select((ydwarf_df[[xkey, ykey]]).applymap(float))
cands_slctd, cands_bools=bx._select(np.array([cands_df2[xkey].values,cands_df2[ykey].values]))
spts_df=cands_df2.spt[cands_bools]
cand_bools2=[wisps.is_in_that_classification(s, box_name) for s in spts_df.values]
ax.scatter(stars[xkey].apply(float).values, stars[ykey].apply(float).values, marker='o', facecolors='none', alpha=0.1,edgecolors='#AAAAAA')
ax.scatter(spex_slctd.x, spex_slctd.y, s=5, label='SpeX')
ax.scatter(cands_slctd[0], cands_slctd[1], marker='x', facecolors='#111111', edgecolors='#2ECC40', label='candidates')
ax.scatter(cands_slctd[0][cand_bools2], cands_slctd[1][cand_bools2], marker='x', facecolors='#FF851B', edgecolors='#2ECC40', label='candidates')
bx.color='#2ECC40'
bx.alpha=.1
bx.plot(ax=ax, only_shape=True, highlight=False)
ax.set_xlabel(r'$'+str(idx.name.split(' ')[0])+'$', fontsize=18)
ax.set_ylabel(r'$'+str(idx.name.split(' ')[1])+'$', fontsize=18)
ax.set_title(box_name, fontsize=18)
buffer=np.nanstd(spex_slctd.values)
ax.set_xlim([ spex_slctd.x.values.min()-buffer, spex_slctd.x.values.max()+buffer])
ax.set_ylim([ spex_slctd.y.values.min()-buffer, spex_slctd.y.values.max()+buffer])
In [58]:
#fig, ax=plt.subplots(nolcs)
#for k in to_use.keys():
ks=[k for k in to_use.keys()]
In [59]:
ks
Out[59]:
In [67]:
fig, ax=plt.subplots(ncols=3, nrows=2, figsize=(12, 8))
plot_index_box(ks[0],to_use[ks[0]], ax[0][0])
plot_index_box(ks[1],to_use[ks[1]], ax[0][1])
plot_index_box(ks[2],to_use[ks[2]], ax[0][2])
plot_index_box(ks[3],to_use[ks[3]], ax[1][0])
plot_index_box(ks[4],to_use[ks[4]], ax[1][1])
plot_index_box(ks[5],to_use[ks[5]], ax[1][2])
plt.tight_layout()
plt.savefig(wisps.OUTPUT_FIGURES+'/index_index_plots.pdf')
In [61]:
def round_tuple(tpl, n=2):
return round(tpl[0], n), round(tpl[1],n)
In [68]:
for idx, k in zip(good_indices, to_use.keys()):
spt_range=to_use[k]
bs=idx.shapes
bx=[x for x in bs if x.shape_name==spt_range][0]
print (" {} & {} & {} & {} & {} & {} & {} & {} & {} \\\ ".format(spt_range,idx.xkey, idx.ykey,
round_tuple(bx.vertices[0]), round_tuple(bx.vertices[1])
, round_tuple(bx.vertices[2]), round_tuple(bx.vertices[3]),
round(idx.completeness[spt_range], 2), round(idx.contamination[spt_range], 3)))
In [63]:
j=r"""
\begin<figure>\ContinuedFloat
\begin<tabular><cc>
\includegraphics[width=0.5\linewidth]<\figfolder spectrum{}.jpeg> &
\includegraphics[width=0.5\linewidth]<\figfolder spectrum{}.jpeg> \\
\includegraphics[width=0.5\linewidth]<\figfolder spectrum{}.jpeg> &
\includegraphics[width=0.5\linewidth]<\figfolder spectrum{}.jpeg> \\
\includegraphics[width=0.5\linewidth]<\figfolder spectrum{}.jpeg> &
\includegraphics[width=0.5\linewidth]<\figfolder spectrum{}.jpeg> \\
\includegraphics[width=0.5\linewidth]<\figfolder spectrum{}.jpeg> &
\includegraphics[width=0.5\linewidth]<\figfolder spectrum{}.jpeg> \\
\end<tabular>
\end<figure>
"""
In [64]:
for idx in np.arange(0, 45, 8):
ids=range(idx, idx + 8)
print (j.format(ids[0], ids[1], ids[2] , ids[3], ids[4], ids[5], ids[6], ids[7]).replace("<", "{").replace(">", "}"))
In [65]:
to_use
Out[65]:
In [66]:
wisps.datasets.keys()
Out[66]:
In [25]:
wisps.datasets['stars'].grism_id
Out[25]:
In [1]:
import wisps
In [27]:
s=wisps.Source(name='par27-00036')
In [28]:
s_splat=s.splat_spectrum
In [29]:
s_splat.trim([1.15, 1.65])
In [30]:
import splat
import splat.plot as splot
In [40]:
splot.plotSequence(s_splat, std_class='dwarf',type_range=2, fit_ranges=[1.2, 1.65],
xrange=[1.15, 1.65], scale=True, filename='/users/caganze/desktop/plot2.png')
Out[40]:
In [37]:
splot.plotSequence(s_splat, std_class='subdwarf',type_range=3,fit_ranges=[1.15, 1.65], xrange=[1.2, 1.65], filename='/users/caganze/desktop/plot1.png')
Out[37]:
In [ ]: