In [1]:
from __future__ import division
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn.apionly as sns
from sklearn.metrics import confusion_matrix
from composition.analysis.load_sim import load_sim
from composition.analysis.preprocessing import get_train_test_sets
from composition.analysis.features import get_training_features
from composition.analysis.pipelines import get_pipeline
%matplotlib inline
In [2]:
sns.set_palette('muted')
sns.set_color_codes()
In [3]:
df, cut_dict = load_sim(return_cut_dict=True)
selection_mask = np.array([True] * len(df))
standard_cut_keys = ['reco_exists', 'reco_zenith', 'num_hits', 'IT_signal',
'StationDensity', 'max_charge_frac', 'reco_containment',
'min_energy', 'max_energy']
for key in standard_cut_keys:
selection_mask *= cut_dict[key]
df = df[selection_mask]
feature_list = get_training_features()
X_train, X_test, y_train, y_test, le = get_train_test_sets(df, feature_list)
print('events = ' + str(y_train.shape[0]))
In [4]:
pipeline = get_pipeline('KN')
In [12]:
pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)
confmat = confusion_matrix(y_true=y_test, y_pred=y_pred)/len(y_pred)
print(confmat)
print(le.transform(['P','Fe']))
In [17]:
fig, ax = plt.subplots(figsize=(2.5, 2.5))
ax.matshow(confmat, cmap=plt.cm.Blues, alpha=0.3)
for i in range(confmat.shape[0]):
for j in range(confmat.shape[1]):
ax.text(x=j, y=i, s='{:0.2f} \%'.format(100*confmat[i, j]), va='center', ha='center')
plt.xlabel('predicted label')
plt.ylabel('true label')
plt.tight_layout()
# plt.savefig('./figures/confusion_matrix.png', dpi=300)
plt.show()
In [ ]: