notebook.community

Edit and run



In [91]:

    
import splat
import wisps
import numpy as np
import matplotlib.pyplot as plt
from wisps import Annotator as an
from wisps import datasets
%matplotlib inline



In [92]:

    
spex= an.reformat_table(wisps.datasets['spex_data_set']).rename(columns={'f_x': 'f_test', 'l_snr':'t_snr'}).dropna(how='any')
hst3d=an.reformat_table(wisps.datasets['aegis_cosmos']).replace(np.inf, np.nan).dropna(how='any')



In [93]:

    
#label spex
spex=an.group_by_spt(spex, spt_label='spt', assign_from_one=True)



In [110]:

    
#only look at things that fit better to a spectral standard than a line
hst3d=hst3d[(hst3d.f_test>0.7) & (hst3d.t_snr>5.0)]

Create a training set, a test set and a set to predict for



In [94]:

    
features=list(hst3d.columns)
features.remove('name')

Inspect the features, I know these features (at leasr spectral indices) are correlated but also have high variance, I could pick my favorite features and use those instead



In [95]:

    
import seaborn as sns
#plt.xscale('log')
sns.pairplot(spex[features], hue=None)









    Out[95]:





<seaborn.axisgrid.PairGrid at 0x1c2b3f72b0>



In [96]:

    
good_features=['H_2O-1/J-Cont', 'CH_4/H-Cont', 'H_2O-2/J-Cont']



In [112]:

    
from sklearn.decomposition import PCA
pca = PCA(n_components=2, svd_solver='full')
pca.fit(spex[good_features].values)
spex_pcaed=pca.transform(spex[good_features].values)
proj_sample=pca.transform(hst3d[good_features].values)



In [113]:

    
colors=an.color_from_spts(spex.spt.values, cmap='viridis')



In [117]:

    
plt.scatter(proj_sample[:,0],proj_sample[:,1], alpha=0.6,color='k')
plt.scatter(spex_pcaed[:,0], spex_pcaed[:, 1], color=colors)

plt.xlabel('axis-1', fontsize=18)
plt.ylabel('axis-2', fontsize=18)
plt.xlim([-1.5, 1.5])
plt.ylim([-.3, 1.5])









    Out[117]:





(-0.3, 1.5)



In [123]:

    
sns.distplot(spex.spt)









    Out[123]:





<matplotlib.axes._subplots.AxesSubplot at 0x1c5a8444a8>



In [ ]: