In [91]:
import splat
import wisps
import numpy as np
import matplotlib.pyplot as plt
from wisps import Annotator as an
from wisps import datasets
%matplotlib inline

In [92]:
spex= an.reformat_table(wisps.datasets['spex_data_set']).rename(columns={'f_x': 'f_test', 'l_snr':'t_snr'}).dropna(how='any')
hst3d=an.reformat_table(wisps.datasets['aegis_cosmos']).replace(np.inf, np.nan).dropna(how='any')

In [93]:
#label spex
spex=an.group_by_spt(spex, spt_label='spt', assign_from_one=True)

In [110]:
#only look at things that fit better to a spectral standard than a line
hst3d=hst3d[(hst3d.f_test>0.7) & (hst3d.t_snr>5.0)]

Create a training set, a test set and a set to predict for


In [94]:
features=list(hst3d.columns)
features.remove('name')

Inspect the features, I know these features (at leasr spectral indices) are correlated but also have high variance, I could pick my favorite features and use those instead


In [95]:
import seaborn as sns
#plt.xscale('log')
sns.pairplot(spex[features], hue=None)


Out[95]:
<seaborn.axisgrid.PairGrid at 0x1c2b3f72b0>

In [96]:
good_features=['H_2O-1/J-Cont', 'CH_4/H-Cont', 'H_2O-2/J-Cont']

In [112]:
from sklearn.decomposition import PCA
pca = PCA(n_components=2, svd_solver='full')
pca.fit(spex[good_features].values)
spex_pcaed=pca.transform(spex[good_features].values)
proj_sample=pca.transform(hst3d[good_features].values)

In [113]:
colors=an.color_from_spts(spex.spt.values, cmap='viridis')

In [117]:
plt.scatter(proj_sample[:,0],proj_sample[:,1], alpha=0.6,color='k')
plt.scatter(spex_pcaed[:,0], spex_pcaed[:, 1], color=colors)

plt.xlabel('axis-1', fontsize=18)
plt.ylabel('axis-2', fontsize=18)
plt.xlim([-1.5, 1.5])
plt.ylim([-.3, 1.5])


Out[117]:
(-0.3, 1.5)

In [123]:
sns.distplot(spex.spt)


Out[123]:
<matplotlib.axes._subplots.AxesSubplot at 0x1c5a8444a8>

In [ ]: