In [1]:
# remove after testing
%load_ext autoreload
%autoreload 2
In [2]:
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.colors import LogNorm
from sklearn.ensemble import RandomForestClassifier
from mclearn.preprocessing import balanced_train_test_split
from mclearn.performance import (get_beta_parameters,
beta_avg_inv_cdf)
from mclearn.viz import (plot_hex_map,
plot_balanced_accuracy_violin)
from mclearn.photometry import (reddening_correction_sfd98,
reddening_correction_sf11,
reddening_correction_w14,
correct_magnitudes,
compute_colours,
clean_up_subclasses)
from mclearn.classifier import train_classifier
from mclearn.tools import results_exist, load_results
%matplotlib inline
sns.set_style('ticks')
In [3]:
fig_dir = '../thesis/figures/'
target_col = 'class'
features = ['psfMag_r', 'psf_u_g', 'psf_g_r', 'psf_r_i', 'psf_i_z',
'petroMag_r', 'petro_u_g', 'petro_g_r', 'petro_r_i', 'petro_i_z',
'petroRad_r']
high_memory = False
In [3]:
sdss = pd.read_csv("../data/sdss_dr7_photometry_source.csv.gz", compression="gzip")
In [4]:
# compute the three sets of reddening correction
A_u_sfd98, A_g_sfd98, A_r_sfd98, A_i_sfd98, A_z_sfd98 \
= reddening_correction_sfd98(sdss['extinction_r'])
A_u_sf11, A_g_sf11, A_r_sf11, A_i_sf11, A_z_sf11 \
= reddening_correction_sf11(sdss['extinction_r'])
A_u_w14, A_g_w14, A_r_w14, A_i_w14, A_z_w14 \
= reddening_correction_w14(sdss['extinction_r'])
# useful variables
psf_magnitudes = ['psfMag_u', 'psfMag_g', 'psfMag_r', 'psfMag_i', 'psfMag_z']
petro_magnitudes = ['petroMag_u', 'petroMag_g', 'petroMag_r', 'petroMag_i', 'petroMag_z']
sfd98_corrections = [A_u_sfd98, A_g_sfd98, A_r_sfd98, A_i_sfd98, A_z_sfd98]
sf11_corrections = [A_u_sf11, A_g_sf11, A_r_sf11, A_i_sf11, A_z_sf11]
w14_corrections = [A_u_w14, A_g_w14, A_r_w14, A_i_w14, A_z_w14]
colours = [('psfMag_u', 'psfMag_g'), ('psfMag_g', 'psfMag_r'), ('psfMag_r', 'psfMag_i'), ('psfMag_i', 'psfMag_z'),
('petroMag_u', 'petroMag_g'), ('petroMag_g', 'petroMag_r'), ('petroMag_r', 'petroMag_i'), ('petroMag_i', 'petroMag_z'),]
# calculate the corrected magnitudes
correct_magnitudes(sdss, psf_magnitudes, sfd98_corrections, '_sfd98')
correct_magnitudes(sdss, petro_magnitudes, sfd98_corrections, '_sfd98')
correct_magnitudes(sdss, psf_magnitudes, sf11_corrections, '_sf11')
correct_magnitudes(sdss, petro_magnitudes, sf11_corrections, '_sf11')
correct_magnitudes(sdss, psf_magnitudes, w14_corrections, '_w14')
correct_magnitudes(sdss, petro_magnitudes, w14_corrections, '_w14')
# calculate the corrected magnitudes
compute_colours(sdss, colours, '')
compute_colours(sdss, colours, '_sfd98')
compute_colours(sdss, colours, '_sf11')
compute_colours(sdss, colours, '_w14')
In [4]:
(sdss['extinction_r'] / 2.751).describe()
Out[4]:
In [6]:
fig = plt.figure(figsize=(10,5))
ax = plot_hex_map(sdss['ra'], sdss['dec'], C=sdss['extinction_r'] / 2.751, reduce_C_function=np.mean,
origin=180, vmin=0.001, vmax=2, mincnt=1, milky_way=True, norm=LogNorm(), cmap=plt.cm.pink_r)
fig.savefig(fig_dir + '2_astro/ebv_map.png', bbox_inches='tight', dpi=300)
In [ ]:
if high_memory:
ebv = pd.read_hdf('../data/sdss_ebv.h5', 'sdss_ebv')
fig = plt.figure(figsize=(10,5))
ax = plot_hex_map(ebv['ra'], ebv['dec'], C=ebv['ebv'], reduce_C_function=np.mean,
origin=180, vmin=0.001, vmax=2, mincnt=1, milky_way=True, norm=LogNorm(), cmap=plt.cm.pink_r)
fig.savefig('../ebv_map_all.png', bbox_inches='tight', dpi=300)
In [5]:
appendix_dir = fig_dir + 'appendix/'
train_size = 600000
test_size = 300000
uncorrected_features = ['ra', 'dec', 'psfMag_r', 'psf_u_g', 'psf_g_r', 'psf_r_i',
'psf_i_z', 'petroMag_r', 'petro_u_g', 'petro_g_r',
'petro_r_i', 'petro_i_z', 'petroRad_r']
sfd98_features = ['ra', 'dec', 'psfMag_r_sfd98', 'psf_u_g_sfd98', 'psf_g_r_sfd98', 'psf_r_i_sfd98',
'psf_i_z_sfd98', 'petroMag_r_sfd98', 'petro_u_g_sfd98', 'petro_g_r_sfd98',
'petro_r_i_sfd98', 'petro_i_z_sfd98', 'petroRad_r']
sf11_features = ['ra', 'dec', 'psfMag_r_sf11', 'psf_u_g_sf11', 'psf_g_r_sf11', 'psf_r_i_sf11',
'psf_i_z_sf11', 'petroMag_r_sf11', 'petro_u_g_sf11', 'petro_g_r_sf11',
'petro_r_i_sf11', 'petro_i_z_sf11', 'petroRad_r']
w14_features = ['ra', 'dec', 'psfMag_r_w14', 'psf_u_g_w14', 'psf_g_r_w14', 'psf_r_i_w14',
'psf_i_z_w14', 'petroMag_r_w14', 'petro_u_g_w14', 'petro_g_r_w14',
'petro_r_i_w14', 'petro_i_z_w14', 'petroRad_r']
In [7]:
correct_baseline, confusion_baseline = train_classifier(sdss, uncorrected_features,
target_col, train_size, test_size, 'uncorrected', fig_dir=appendix_dir, random_state=19)
In [8]:
correct_sfd98, confusion_sfd98 = train_classifier(sdss, sfd98_features, target_col,
train_size, test_size, 'sfd98', random_state=19, fig_dir=appendix_dir, correct_baseline=correct_baseline)
In [9]:
correct_sf11, confusion_sf11 = train_classifier(sdss, sf11_features, target_col,
train_size, test_size, 'sf11', random_state=19, fig_dir=appendix_dir, correct_baseline=correct_baseline)
In [10]:
correct_w14, confusion_w14 = train_classifier(sdss, w14_features, target_col,
train_size, test_size, 'w14', random_state=19, fig_dir=appendix_dir, correct_baseline=correct_baseline)
In [4]:
pickle_file = '../pickle/03_dust_extinction/accuracy_samples.pickle'
if not results_exist(pickle_file):
confusions = [confusion_baseline, confusion_sfd98, confusion_sf11, confusion_w14]
accuracy_samples = []
rs = np.random.RandomState(29)
for confusion in confusions:
parameters = get_beta_parameters(confusion)
samples = [beta_avg_inv_cdf(rs.uniform(0, 1), parameters) for _ in range(100)]
accuracy_samples.append(samples)
with open(pickle_file, 'wb') as f:
pickle.dump(accuracy_samples, f, protocol=4)
accuracy_samples = load_results(pickle_file)
In [5]:
columns = ['Uncorrected', 'SFD98', 'SF11', 'W14']
accuracy_df = pd.DataFrame(accuracy_samples, index=columns).transpose()
fig = plt.figure(figsize=(8,3))
ax = plot_balanced_accuracy_violin(accuracy_df)
ax.set_ylabel('PBA')
fig.savefig(fig_dir + '4_expt1/violin_reddening_correction.pdf', bbox_inches='tight')
In [22]:
for sample in accuracy_samples:
print(pd.Series(sample).describe())