In [1]:
import sys
sys.path.append('/home/jbourbeau/cr-composition')
sys.path
Out[1]:
In [2]:
import numpy as np
import pandas as pd
from pandas.tools.plotting import radviz
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import seaborn.apionly as sns
from sklearn.metrics import accuracy_score
from sklearn.model_selection import validation_curve, GridSearchCV, cross_val_score, ParameterGrid
from sklearn.neighbors import KNeighborsClassifier
import composition as comp
%matplotlib inline
In [3]:
sns.set_palette('muted')
sns.set_color_codes()
In [4]:
df, cut_dict = comp.load_sim(return_cut_dict=True)
selection_mask = np.array([True] * len(df))
standard_cut_keys = ['lap_reco_success', 'lap_zenith', 'num_hits_1_30', 'IT_signal',
'StationDensity', 'max_qfrac_1_30', 'lap_containment', 'energy_range_lap']
for key in standard_cut_keys:
selection_mask *= cut_dict[key]
df = df[selection_mask]
feature_list, feature_labels = comp.get_training_features()
print(feature_list)
X_train, X_test, y_train, y_test, le = comp.get_train_test_sets(
df, feature_list)
print('number training events = ' + str(y_train.shape[0]))
In [15]:
radviz?
In [6]:
plt.figure()
feature_list = ['lap_log_energy', 'InIce_log_charge_1_30', 'lap_cos_zenith', 'lap_chi2', 'log_NChannels_1_30']
tmp = df[feature_list+['MC_comp']]
tmp.columns = ['energy', 'charge', 'zenith', '$\chi^2$', '$\mathrm{N}_{\mathrm{channels}}$', 'comp']
opts = {'alpha': 0.5}
radviz(tmp.sample(2000), 'comp', color=['b', 'g', 'r'], **opts)
plt.legend(title='True composition')
Out[6]:
In [17]:
fig, ax = plt.subplots()
feature_list = ['lap_log_energy', 'InIce_log_charge_1_30', 'lap_chi2', 'log_NChannels_1_30']
tmp = df[feature_list+['MC_comp']]
tmp.columns = ['energy', 'charge', '$\chi^2$', '$\mathrm{N}_{\mathrm{channels}}$', 'comp']
opts = {'alpha': 0.5}
radviz(tmp.sample(5000), 'comp', color=['b', 'g', 'r'], ax=ax, **opts)
leg = plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.05),
ncol=3, fancybox=False, shadow=False)
ax.set_xlim([-1.5, 1.5])
# ax.set_ylim([-1.5, 1.5])
plt.show()
In [22]:
plt.figure()
feature_list = ['InIce_log_charge_1_30', 'lap_chi2', 'log_NChannels_1_30']
tmp = df[feature_list+['MC_comp']]
tmp.columns = ['charge', 'chisquared', 'nchannels','comp']
opts = {'alpha': 0.75}
radviz(tmp.sample(10000), 'comp', color=['b', 'g', 'r'], **opts)
Out[22]:
In [24]:
feature_list = ['lap_log_energy', 'InIce_log_charge_1_30', 'lap_cos_zenith', 'lap_chi2', 'log_NChannels_1_30']
tmp = df[feature_list+['MC_comp']]
tmp.columns = ['energy', 'charge', 'zenith', 'chisquared', 'nchannels', 'comp']
tmp.sample(frac=1)
Out[24]:
In [15]:
small.columns = ['energy', 'charge', 'zenith', 'chisquared', 'nchannels', 'comp']
In [16]:
small
Out[16]:
In [ ]: