In [1]:
from __future__ import division
import argparse
import numpy as np
import pandas as pd
from pandas.tools.plotting import radviz
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import seaborn.apionly as sns
from sklearn.metrics import accuracy_score
from sklearn.model_selection import validation_curve, GridSearchCV, cross_val_score, ParameterGrid
from sklearn.neighbors import KNeighborsClassifier
from load_sim import load_sim
from preprocessing import get_train_test_sets
from features import get_training_features
from pipelines import get_pipeline
import plotting_functions as plotting
import data_functions as data_functions
%matplotlib inline
In [2]:
sns.set_palette('muted')
sns.set_color_codes()
In [3]:
df, cut_dict = load_sim(return_cut_dict=True)
selection_mask = np.array([True] * len(df))
standard_cut_keys = ['reco_exists', 'reco_zenith', 'num_hits', 'IT_signal',
'StationDensity', 'max_charge_frac', 'reco_containment',
'energy_range']
for key in standard_cut_keys:
selection_mask *= cut_dict[key]
df = df[selection_mask]
feature_list, feature_labels = get_training_features()
print(feature_list)
X_train, X_test, y_train, y_test, le = get_train_test_sets(
df, feature_list)
print('number training events = ' + str(y_train.shape[0]))
In [25]:
radviz?
In [4]:
plt.figure()
feature_list = ['reco_log_energy', 'InIce_log_charge', 'reco_cos_zenith', 'lap_chi2', 'log_NChannels']
tmp = df[feature_list+['MC_comp']]
tmp.columns = ['energy', 'charge', 'zenith', 'chisquared', 'nchannels', 'comp']
opts = {'alpha': 0.75}
radviz(tmp.sample(3000), 'comp', color=['b', 'g'], **opts)
Out[4]:
In [8]:
plt.figure()
feature_list = ['reco_log_energy', 'InIce_log_charge', 'lap_chi2', 'log_NChannels']
tmp = df[feature_list+['MC_comp']]
tmp.columns = ['energy', 'charge', 'chisquared', 'nchannels', 'comp']
opts = {'alpha': 0.75}
radviz(tmp.sample(10000), 'comp', color=['b', 'g'], **opts)
Out[8]:
In [29]:
plt.figure()
feature_list = ['InIce_log_charge', 'lap_chi2', 'NChannels']
tmp = df[feature_list+['MC_comp']]
tmp.columns = ['charge', 'chisquared', 'nchannels','comp']
opts = {'alpha': 0.75}
radviz(tmp, 'comp', color=['b', 'g'], **opts)
Out[29]:
In [21]:
tmp = df[feature_list+['MC_comp']][:10000]
tmp.columns = ['energy', 'charge', 'zenith', 'chisquared', 'nchannels', 'comp']
tmp
Out[21]:
In [15]:
small.columns = ['energy', 'charge', 'zenith', 'chisquared', 'nchannels', 'comp']
In [16]:
small
Out[16]:
In [ ]: