In [1]:
import wisps
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
import numba
%matplotlib inline

In [2]:
fold='/users/caganze/research/wisps/spectra/trash/*.jpeg'

In [3]:
spect=glob.glob(fold)

In [4]:
len(spect)


Out[4]:
7659

In [5]:
bools=[not (('AM' in x) or ('PM' in x)) for x in spect]

In [6]:
spectrs=np.array(spect)[np.array(bools)]

In [7]:
trash_ids=[x.split('/')[-1].split('.jpeg')[0].split(' ')[0] for x in spectrs]

In [8]:
trash_ids=np.unique(trash_ids)

In [9]:
len(trash_ids)


Out[9]:
5862

In [10]:
def proper_grism_id(n):
    if n.startswith('par'): 
        n=n.replace('_', '-', 1)
    if not n.startswith('par'):
        n=n.replace('_', '-', 2)
    return n.lower()

In [11]:
propids=[proper_grism_id(x) for x in trash_ids]

In [12]:
len(propids)


Out[12]:
5862

In [13]:
df=pd.read_hdf(wisps.COMBINED_PHOTO_SPECTRO_FILE, key='all_phot_spec_data')

In [14]:
df2=wisps.datasets['candidates']

In [15]:
df2['grism_id']=df2[0].values

In [16]:
len(df2)


Out[16]:
364

In [17]:
df3=df[(df.grism_id.isin(propids)) & (~ (df.grism_id.isin(df2.grism_id)))]

In [18]:
len(df3)


Out[18]:
5798

In [19]:
#

In [20]:
#lowsnr.shape

In [21]:
#
dfn=df3

In [22]:
#dfn=pd.concat([df3, lowsnr]).reset_index(drop=True)

In [23]:
len(dfn)


Out[23]:
5798

In [24]:
dfn.to_pickle(wisps.OUTPUT_FILES+'/trash.pkl')

In [25]:
#wisps.datasets['stars']

In [ ]: