This demonstrates all the steps in my candidate selection before conducting visual inspection
In [1]:
    
import numpy as np
import splat
import wisps.data_analysis as wispd
from wisps.data_analysis import selection_criteria as sel_crt
import wispshapes
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import wisps
import matplotlib as mpl
import random
import matplotlib.pyplot as plt
%matplotlib inline
    
    
In [2]:
    
stars=wisps.Annotator.reformat_table(pd.read_hdf(wisps.COMBINED_PHOTO_SPECTRO_FILE, key='stars'))
    
In [3]:
    
stars[wisps.INDEX_NAMES]=(stars[wisps.INDEX_NAMES].applymap(float)).applymap(np.log10)
    
In [4]:
    
cands=wisps.datasets['candidates']
    
In [5]:
    
#
    
In [6]:
    
plt.scatter(cands.snr1.apply(np.log10), cands.f_test.values, 
            c=cands.spt.apply(splat.typeToNum).values, cmap='viridis',label='cdf')
plt.xlabel("log snr")
plt.ylabel(" F-test values")
plt.legend()
    
    Out[6]:
    
In [7]:
    
cands.f_test.max()
    
    Out[7]:
In [8]:
    
dt=stars[(stars.f_test<0.4) & (stars.snr1>=3.)].reset_index(drop=True)
    
In [9]:
    
len(dt), len(stars[stars.f_test< 0.4])
    
    Out[9]:
In [10]:
    
len(dt[dt.survey=='WISP']), len(dt[dt.survey !='WISP'])
    
    Out[10]:
In [11]:
    
#(wisps.datasets['schneider'])[wisps.INDEX_NAMES]
    
In [12]:
    
gbhio=sel_crt.save_criteria(conts=dt)
    
    
    
    
In [13]:
    
crts=sel_crt.crts_from_file()
    
In [14]:
    
#crts
    
In [15]:
    
#for idx in crts.values(): idx.plot(save=True)
    
In [16]:
    
compl=pd.DataFrame([x.completeness for x in crts.values()], index=[x.name for x in crts.values()])
cont=pd.DataFrame([x.contamination for x in crts.values()], index=[x.name for x in crts.values()])
    
In [17]:
    
def return_best_index(spt_range):
    return cont[cont[spt_range]<=cont[spt_range].min()+0.002][spt_range]
    
In [18]:
    
for x in cont.columns:
    print (return_best_index(x), " \n")
    
    
In [19]:
    
to_use={'H_2O-1/J-Cont CH_4/H-Cont':'L0-L5',
        'CH_4/H_2O-1 CH_4/H-Cont':'L5-T0' ,
        'H_2O-2/J-Cont CH_4/J-Cont':'M7-L0',
        'H_2O-2/H_2O-1 H-cont/H_2O-1': 'T0-T5',
        'H_2O-2/J-Cont CH_4/H-Cont': 'T5-T9',
        'H_2O-1/J-Cont CH_4/J-Cont': 'Y dwarfs'}
    
In [20]:
    
good_indices=[crts[k] for k in to_use.keys()]
    
In [21]:
    
[x.shape_name for x in good_indices[4].shapes]
    
    Out[21]:
In [22]:
    
def box_parameters(idx, spt_range):
    bs=idx.shapes
    b=[x for x in bs if x.shape_name==spt_range][0]
    print ('{}  {} m: {} b: {} s:{}, comp : {}, cont: {}'.format(spt_range, idx,  round(b.coeffs[0], 2), round(b.coeffs[1], 2), round(b.scatter, 2), round(idx.completeness[spt_range], 2),  round(idx.contamination[spt_range], 3)))
    
In [23]:
    
#for idx, k in zip(good_indices, to_use.keys()):
#    spt_range=to_use[k][0]
#    box_parameters(idx, spt_range)
    
In [24]:
    
import matplotlib as mpl
mpl.rcParams['legend.fontsize'] = 'small'
    
In [25]:
    
#idx=good_indices[0]
    
In [26]:
    
#idx.contaminants[[idx.ykey, idx.ykey]].applymap(float).round(2)
    
In [27]:
    
#dft
    
In [28]:
    
spex_df=wisps.Annotator.reformat_table(wisps.datasets['spex']).reset_index(drop=True)
manj=wisps.Annotator.reformat_table(wisps.datasets['manjavacas']).reset_index(drop=True)
schn=wisps.Annotator.reformat_table(wisps.datasets['schneider']).reset_index(drop=True)
    
In [29]:
    
subdwarfs=wisps.Annotator.reformat_table(good_indices[0]._subdwarfs).reset_index(drop=True)
    
In [30]:
    
good_indices[0].shapes
    
    Out[30]:
In [31]:
    
dft=dt.rename(columns={'grism_id':'Names'})
    
In [32]:
    
import itertools
    
In [33]:
    
cands=[]
boxes_used={}
for idx, k in zip(good_indices, to_use.keys()):
    spt_range=to_use[k]
    
    bs=idx.shapes
    bx=[x for x in bs if x.shape_name==spt_range][0]
    df_to_use=wisps.Annotator.reformat_table(dft[[idx.xkey, idx.ykey]])
    selec_indx_array=bx.select(df_to_use).index
    ls=dft.iloc[selec_indx_array].Names
   
    boxes_used["{}".format(spt_range)]=[bx, ls.values]
    cands.append(ls)
    
In [34]:
    
to_use
    
    Out[34]:
In [35]:
    
merged = np.array(list(itertools.chain(*cands)))
    
In [36]:
    
final_cands=np.unique(merged.flatten())
    
In [37]:
    
dfn=dt[dt.grism_id.isin(np.unique(np.append(final_cands, ls)))]
    
In [38]:
    
dfn=dfn[dfn.spt.apply(splat.typeToNum)>=19]
    
In [39]:
    
dfn.to_pickle(wisps.OUTPUT_FILES+'/selected_by_indices.pkl')
    
In [40]:
    
dframe=wisps.datasets['candidates']
    
In [41]:
    
real_ls=dframe[dframe.spt.apply(splat.typeToNum)>19.]
    
In [42]:
    
slsctdddd=dft[dft.Names.isin(dframe.grism_id)]
    
In [43]:
    
#slsctdddd[slsctdddd.spt.apply(wisps.make_spt_number).between(20, 25)]
    
In [44]:
    
boxes_used.keys(), len( boxes_used.keys()), to_use.keys()
    
    Out[44]:
In [45]:
    
dft.columns, dft.shape
    
    Out[45]:
In [46]:
    
#fig, ax=plt.subplots(ncols=3, nrows=2, figsize=(12*1.5, 8*1.5))
for k in boxes_used.keys():
    pass
#plt.minorticks_on()
#plt.tight_layout()
#plt.savefig(wisps.OUTPUT_FIGURES+'/index_index_plots.pdf', bbox_inches='tight', dpi=200)
    
In [47]:
    
import seaborn as sns 
cmap=sns.light_palette((260, 75, 60), input="husl", as_cmap=True)
    
In [48]:
    
sel_crt.plot_cont_compl(cmap=cmap)
    
    
In [49]:
    
crt_vals=crts.values()
conts=pd.DataFrame([ x.contamination for x in crt_vals]).applymap( lambda x: round(x, 2))
compls=pd.DataFrame([ x.completeness for x in crt_vals]).applymap(lambda x: round(x, 2))
conts['index-space']=[x.name for x in crt_vals]
compls['index-space']=[x.name for x in crt_vals]
    
In [50]:
    
conts.to_latex(wisps.LIBRARIES+'/completeness.tex', index=False, na_rep=' ')
conts.to_latex(wisps.LIBRARIES+'/contamination.tex', index=False, na_rep=' ')
    
In [51]:
    
#save the indices used here
    
In [52]:
    
import pickle
#save the random forest
output_file=wisps.OUTPUT_FILES+'/best_indices_to_use.pkl'
with open(output_file, 'wb') as file:
    pickle.dump(to_use,file)
    
In [53]:
    
df=wisps.datasets['candidates']
    
In [54]:
    
df.f_test.plot(kind='hist')
    
    Out[54]:
    
In [55]:
    
spex_df1=wisps.Annotator.reformat_table(wisps.datasets['spex'])
cands_df2=wisps.Annotator.reformat_table(wisps.datasets['candidates'])
manj_df3=wisps.Annotator.reformat_table(wisps.datasets['manjavacas'])
ydwarf_df=manj_df3[manj_df3.spt.apply(wisps.make_spt_number)>38]
    
In [56]:
    
#spex_df1.spt
    
In [57]:
    
def plot_index_box(index_name, box_name, ax):
    
    #get the index and the box
    idx=crts[index_name]
    bx=[x for x in idx.shapes if x.shape_name==box_name][0]
    
    xkey=idx.xkey
    ykey=idx.ykey
   
    spex_slctd=bx.select((spex_df1[[xkey, ykey]]).applymap(float))
 
    
    
    if box_name.lower().startswith('sub'):
        subdwarfs_df=wisps.Annotator.reformat_table(idx._subdwarfs).reset_index(drop=True)
        spex_slctd=bx.select((subdwarfs_df[[xkey, ykey]]).applymap(float))
        
    if box_name.lower().startswith('y'):
        spex_slctd=bx.select((ydwarf_df[[xkey, ykey]]).applymap(float))
    
    cands_slctd, cands_bools=bx._select(np.array([cands_df2[xkey].values,cands_df2[ykey].values]))
    
    
    spts_df=cands_df2.spt[cands_bools]
    
    cand_bools2=[wisps.is_in_that_classification(s, box_name) for s in spts_df.values]
    
    ax.scatter(stars[xkey].apply(float).values, stars[ykey].apply(float).values, marker='o',  facecolors='none',  alpha=0.1,edgecolors='#AAAAAA')
    
    ax.scatter(spex_slctd.x, spex_slctd.y, s=5, label='SpeX')
    
    ax.scatter(cands_slctd[0],   cands_slctd[1], marker='x', facecolors='#111111', edgecolors='#2ECC40', label='candidates')
    
    ax.scatter(cands_slctd[0][cand_bools2],   cands_slctd[1][cand_bools2], marker='x', facecolors='#FF851B', edgecolors='#2ECC40', label='candidates')
    bx.color='#2ECC40'
    bx.alpha=.1
    bx.plot(ax=ax, only_shape=True, highlight=False)
    
    
    ax.set_xlabel(r'$'+str(idx.name.split(' ')[0])+'$', fontsize=18)
    ax.set_ylabel(r'$'+str(idx.name.split(' ')[1])+'$', fontsize=18)
    
    ax.set_title(box_name, fontsize=18)
    
    buffer=np.nanstd(spex_slctd.values)
    ax.set_xlim([ spex_slctd.x.values.min()-buffer, spex_slctd.x.values.max()+buffer])
    ax.set_ylim([ spex_slctd.y.values.min()-buffer, spex_slctd.y.values.max()+buffer])
    
In [58]:
    
#fig, ax=plt.subplots(nolcs)
#for k in to_use.keys():
ks=[k for k in to_use.keys()]
    
In [59]:
    
ks
    
    Out[59]:
In [67]:
    
fig, ax=plt.subplots(ncols=3, nrows=2, figsize=(12, 8))
plot_index_box(ks[0],to_use[ks[0]], ax[0][0])
plot_index_box(ks[1],to_use[ks[1]], ax[0][1])
plot_index_box(ks[2],to_use[ks[2]], ax[0][2])
plot_index_box(ks[3],to_use[ks[3]], ax[1][0])
plot_index_box(ks[4],to_use[ks[4]], ax[1][1])
plot_index_box(ks[5],to_use[ks[5]], ax[1][2])
plt.tight_layout()
plt.savefig(wisps.OUTPUT_FIGURES+'/index_index_plots.pdf')
    
    
In [61]:
    
def round_tuple(tpl, n=2):
    return round(tpl[0], n), round(tpl[1],n)
    
In [68]:
    
for idx, k in zip(good_indices, to_use.keys()):
    spt_range=to_use[k]
    bs=idx.shapes
    
    bx=[x for x in bs if x.shape_name==spt_range][0]
    print (" {} & {} &  {} & {} & {} & {} & {} & {} & {} \\\ ".format(spt_range,idx.xkey, idx.ykey, 
                                                                           round_tuple(bx.vertices[0]), round_tuple(bx.vertices[1])
                                                                           , round_tuple(bx.vertices[2]), round_tuple(bx.vertices[3]),
                                                                          round(idx.completeness[spt_range], 2), round(idx.contamination[spt_range], 3)))
    
    
In [63]:
    
j=r"""
\begin<figure>\ContinuedFloat
\begin<tabular><cc>
  \includegraphics[width=0.5\linewidth]<\figfolder spectrum{}.jpeg> &  
  \includegraphics[width=0.5\linewidth]<\figfolder spectrum{}.jpeg> \\
 \includegraphics[width=0.5\linewidth]<\figfolder spectrum{}.jpeg> &  
  \includegraphics[width=0.5\linewidth]<\figfolder spectrum{}.jpeg> \\
\includegraphics[width=0.5\linewidth]<\figfolder spectrum{}.jpeg> &  
  \includegraphics[width=0.5\linewidth]<\figfolder spectrum{}.jpeg> \\
  
\includegraphics[width=0.5\linewidth]<\figfolder spectrum{}.jpeg> &  
  \includegraphics[width=0.5\linewidth]<\figfolder spectrum{}.jpeg> \\
\end<tabular>
\end<figure>
"""
    
In [64]:
    
for idx in np.arange(0, 45, 8):
    ids=range(idx, idx + 8)
    print (j.format(ids[0], ids[1], ids[2] , ids[3], ids[4], ids[5], ids[6], ids[7]).replace("<", "{").replace(">", "}"))
    
    
In [65]:
    
to_use
    
    Out[65]:
In [66]:
    
wisps.datasets.keys()
    
    Out[66]:
In [25]:
    
wisps.datasets['stars'].grism_id
    
    Out[25]:
In [1]:
    
import wisps
    
In [27]:
    
s=wisps.Source(name='par27-00036')
    
In [28]:
    
s_splat=s.splat_spectrum
    
In [29]:
    
s_splat.trim([1.15, 1.65])
    
In [30]:
    
import splat
import splat.plot as splot
    
In [40]:
    
splot.plotSequence(s_splat, std_class='dwarf',type_range=2, fit_ranges=[1.2, 1.65],
                   xrange=[1.15, 1.65], scale=True, filename='/users/caganze/desktop/plot2.png')
    
    Out[40]:
    
In [37]:
    
splot.plotSequence(s_splat, std_class='subdwarf',type_range=3,fit_ranges=[1.15, 1.65], xrange=[1.2, 1.65], filename='/users/caganze/desktop/plot1.png')
    
    Out[37]:
    
In [ ]: