In [ ]:
%matplotlib inline
In [ ]:
import matplotlib.pyplot as plt
import h5py
import spectraldl.preprocessing as preprocessing
import tensorflow.contrib.keras as keras
import numpy as np
import sklearn.metrics
import spectraldl.lamost as lamost
import spectraldl.plot as plot
import astropy.io.fits as fits
import random
In [ ]:
with h5py.File('data/data.hdf5') as f:
X_ = f['X'][...]
y_ = f['y'][...]
In [ ]:
size = 1870
X = np.zeros((size, 140), dtype=np.float64)
y = np.zeros((size,), dtype=np.int8)
In [ ]:
X[:X_.shape[0], :] = X_
y[:X_.shape[0]] = y_
In [ ]:
# only on LAMOST data if you skip cell above
i = 0
for cl, label in [('data/emission.csv', 0),
('data/absorption.csv', 1),
('data/double-peak.csv', 2),
('data/noise.csv', 3)]:
with open(cl, newline='') as f:
for path in f:
with fits.open(path.strip()) as hdulist:
fluxes = lamost.get_fluxes(hdulist)
waves = lamost.get_waves(hdulist)
X[i, :] = preprocessing.resample_spectrum(waves, fluxes)
y[i] = label
i += 1
In [ ]:
X = preprocessing.scale_samples(X)
X, y = preprocessing.smote_over_sample(X, y, n_classes=4)
y_oh = keras.utils.to_categorical(y, num_classes=4)
X.shape
In [ ]:
idx = random.sample(range(X.shape[0]), 8)
fig, axs = plt.subplots(4, 2)
for label, x, ax in zip(y[idx], X[idx], axs.ravel()):
ax.set_title('label: ' + str(label))
ax.plot(x.reshape(-1))
fig.tight_layout()
In [ ]:
HEIGHT = 1
WIDTH = 140
DEPTH = 1
In [ ]:
X = X.reshape(-1, HEIGHT, WIDTH, DEPTH)
input_shape = X.shape[1:]
model = keras.models.Sequential([
keras.layers.Conv2D(64, (1, 3), activation='relu', input_shape=input_shape),
keras.layers.Conv2D(64, (1, 3), activation='relu'),
keras.layers.MaxPooling2D(pool_size=(1, 2)),
keras.layers.Conv2D(128, (1, 3), activation='relu'),
keras.layers.Conv2D(128, (1, 3), activation='relu'),
keras.layers.MaxPooling2D(pool_size=(1, 2)),
keras.layers.Conv2D(256, (1, 3), activation='relu'),
keras.layers.Conv2D(256, (1, 3), activation='relu'),
keras.layers.MaxPooling2D(pool_size=(1, 2)),
keras.layers.Flatten(),
keras.layers.Dense(512, activation='relu'),
keras.layers.Dropout(0.5),
keras.layers.Dense(512, activation='relu'),
keras.layers.Dropout(0.5),
keras.layers.Dense(4, activation='softmax')
])
model.compile(
loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy']
)
Keras model is saved in data/convnet.hdf5 file.
In [ ]:
%%time
callback = keras.callbacks.EarlyStopping(
monitor='loss',
min_delta=10e-5,
patience=50,
verbose=2
)
hist = model.fit(
X, y_oh,
epochs=1000, batch_size=256, verbose=2,
callbacks=[callback]
)
plt.plot(hist.epoch, hist.history['loss'])
# model.save('data/convnet.hdf5')
# model = keras.models.load_model('data/convnet.hdf5')
In [ ]:
y_pred = model.predict_classes(X)
idx = y_pred != y
np.count_nonzero(idx)
In [ ]:
# will took first 8 misclassified spectra because ax in zip will exceed first
fig, axs = plt.subplots(4, 2)
for true, pred, x, ax in zip(y[idx], y_pred[idx], X[idx], axs.ravel()):
ax.set_title('true: ' + str(true) + ' pred: ' + str(pred))
ax.plot(x.reshape(-1))
fig.tight_layout()
In [ ]:
cm = sklearn.metrics.confusion_matrix(y, y_pred)
plot.plot_confusion_matrix(cm, ['emission', 'absorption', 'double-peak', 'noise'])
In [ ]:
%%time
with h5py.File('data/data.hdf5') as f:
X_lam = f['X_lam']
id_lam = f['id_lam']
size = X_lam.shape[0]
labels = np.zeros((size, ), dtype=np.int8)
batch = 256
for start in range(0, size, batch):
end = start + batch if start + batch <= size else size
X_tmp = preprocessing.scale_samples(X_lam[start:end])
X_tmp = X_tmp.reshape(-1, HEIGHT, WIDTH, DEPTH)
labels[start:end] = model.predict_classes(X_tmp, batch_size=X_tmp.shape[0], verbose=0)
In [ ]:
np.count_nonzero(labels == 0), np.count_nonzero(labels == 1), np.count_nonzero(labels == 2), np.count_nonzero(labels == 3)
In [ ]:
cl = 2
with h5py.File('data/data.hdf5') as f:
id_lam = iter(f['id_lam'][labels == cl])
In [ ]:
path = next(id_lam)
with fits.open(path) as hdulist:
fluxes = lamost.get_fluxes(hdulist)
waves = lamost.get_waves(hdulist)
idx = (waves > 6400) & (waves < 6700)
fig, ax = plt.subplots(figsize=(13, 5))
ax.scatter(waves[idx], fluxes[idx], marker='+')
ax.plot(waves[idx], fluxes[idx])
ax.axvline(x=6564.6, color='black', linestyle='dashed', label='H-alpha')
ax.grid()
ax.legend()
ax.set_xlabel('wavelength (Angstrom)')
ax.set_ylabel('flux')
ax.set_title(hdulist[0].header['FILENAME'])
In [ ]:
# import csv
# with h5py.File('data/data.hdf5') as f, open('data/candidates.csv', 'w', newline='') as ofile:
# id_lam = f['id_lam']
# writer = csv.writer(ofile)
# writer.writerows(filter(lambda x: x[1] == 0 or x[1] == 2, zip(id_lam, labels)))