In [1]:
from __future__ import division
import argparse
import numpy as np
import pandas as pd
from pandas.tools.plotting import radviz
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import seaborn.apionly as sns

from sklearn.metrics import accuracy_score
from sklearn.model_selection import validation_curve, GridSearchCV, cross_val_score, ParameterGrid
from sklearn.neighbors import KNeighborsClassifier

from load_sim import load_sim
from preprocessing import get_train_test_sets
from features import get_training_features
from pipelines import get_pipeline
import plotting_functions as plotting
import data_functions as data_functions

%matplotlib inline

In [2]:
sns.set_palette('muted')
sns.set_color_codes()

In [3]:
df, cut_dict = load_sim(return_cut_dict=True)
selection_mask = np.array([True] * len(df))
standard_cut_keys = ['reco_exists', 'reco_zenith', 'num_hits', 'IT_signal',
                     'StationDensity', 'max_charge_frac', 'reco_containment',
                     'energy_range']
for key in standard_cut_keys:
    selection_mask *= cut_dict[key]

df = df[selection_mask]

feature_list, feature_labels = get_training_features()
print(feature_list)
X_train, X_test, y_train, y_test, le = get_train_test_sets(
    df, feature_list)

print('number training events = ' + str(y_train.shape[0]))


load_sim.py:70: RuntimeWarning: divide by zero encountered in log10
  df['reco_log_energy'] = np.nan_to_num(np.log10(df['reco_energy']))
load_sim.py:76: RuntimeWarning: divide by zero encountered in log10
  df['log_NChannels'] = np.log10(df['NChannels'])
['reco_log_energy', 'InIce_log_charge_half', 'reco_cos_zenith', 'lap_chi2', 'NChannels_half']
number training events = 36125

In [25]:
radviz?

In [4]:
plt.figure()
feature_list = ['reco_log_energy', 'InIce_log_charge', 'reco_cos_zenith', 'lap_chi2', 'log_NChannels']
tmp = df[feature_list+['MC_comp']]
tmp.columns = ['energy', 'charge', 'zenith', 'chisquared', 'nchannels', 'comp']
opts = {'alpha': 0.75}
radviz(tmp.sample(3000), 'comp', color=['b', 'g'], **opts)


Out[4]:
<matplotlib.axes._subplots.AxesSubplot at 0x66c8a10>

In [8]:
plt.figure()
feature_list = ['reco_log_energy', 'InIce_log_charge', 'lap_chi2', 'log_NChannels']
tmp = df[feature_list+['MC_comp']]
tmp.columns = ['energy', 'charge', 'chisquared', 'nchannels', 'comp']
opts = {'alpha': 0.75}
radviz(tmp.sample(10000), 'comp', color=['b', 'g'], **opts)


Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0x9f67a10>

In [29]:
plt.figure()
feature_list = ['InIce_log_charge', 'lap_chi2', 'NChannels']
tmp = df[feature_list+['MC_comp']]
tmp.columns = ['charge', 'chisquared', 'nchannels','comp']
opts = {'alpha': 0.75}
radviz(tmp, 'comp', color=['b', 'g'], **opts)


Out[29]:
<matplotlib.axes._subplots.AxesSubplot at 0xd476490>

In [21]:
tmp = df[feature_list+['MC_comp']][:10000]
tmp.columns = ['energy', 'charge', 'zenith', 'chisquared', 'nchannels', 'comp']
tmp


Out[21]:
energy charge zenith chisquared nchannels comp
1 7.725 3.051990 0.964200 0.561476 109 P
9 7.675 3.295126 0.962817 0.384772 176 P
19 7.525 3.579235 0.928059 0.518567 103 P
27 7.575 3.681950 0.928684 0.517789 185 P
49 6.975 2.839009 0.917719 0.615560 99 P
73 6.375 2.062633 0.945604 0.451978 47 P
75 6.425 2.406748 0.947448 0.498216 72 P
77 6.275 2.059584 0.944470 0.557761 68 P
78 6.325 2.519856 0.946402 0.300512 69 P
81 6.275 2.757371 0.945170 1.078451 83 P
84 6.325 2.263880 0.946007 0.592236 66 P
86 6.275 2.055534 0.947078 0.593084 58 P
88 6.325 2.489739 0.947001 0.550288 65 P
90 6.325 2.201566 0.950838 0.735060 76 P
136 7.225 3.014547 0.928905 0.494618 142 P
142 7.325 3.038665 0.923595 0.510882 70 P
145 7.275 2.966847 0.921612 0.489856 69 P
151 7.275 2.542038 0.922848 0.504945 65 P
160 7.275 3.140900 0.922350 0.474093 125 P
178 7.175 3.217519 0.960754 0.418504 110 P
179 7.175 2.824425 0.961044 0.740394 111 P
195 6.875 2.293906 0.937275 0.459018 57 P
198 6.925 2.542890 0.936863 0.624998 27 P
201 6.875 2.574011 0.934358 0.427254 63 P
204 6.925 2.607750 0.935271 0.547687 73 P
209 7.125 2.415006 0.929247 0.328844 76 P
228 6.575 2.241281 0.896567 0.460350 56 P
239 6.225 2.238046 0.987381 0.326708 62 P
240 6.225 1.854647 0.987758 0.443949 36 P
257 6.675 2.610629 0.906273 0.593782 30 P
... ... ... ... ... ... ...
65077 6.375 1.977145 0.989144 0.547386 55 P
65078 6.375 2.437647 0.989724 0.522169 41 P
65079 6.425 2.198833 0.990251 0.576218 34 P
65080 6.325 2.462100 0.987495 0.554505 43 P
65083 6.275 2.597586 0.988937 0.353583 128 P
65084 6.325 1.952352 0.989393 0.578420 53 P
65085 6.325 2.229628 0.988370 0.579989 70 P
65093 7.925 3.458767 0.940536 0.375130 186 P
65096 7.975 3.451657 0.940499 0.514386 152 P
65101 7.975 3.584253 0.938845 0.333840 152 P
65109 6.575 1.851615 0.972366 0.526746 40 P
65111 6.675 1.847947 0.972700 0.393357 41 P
65112 6.625 1.738855 0.971674 0.447196 28 P
65113 6.575 2.054635 0.974112 0.516976 41 P
65116 6.575 2.040496 0.972259 0.518562 33 P
65118 6.625 1.851046 0.970901 0.328098 37 P
65132 7.075 2.831762 0.955688 0.509426 140 P
65134 7.125 3.252748 0.957473 0.385422 132 P
65137 7.475 3.143384 0.914588 0.423921 149 P
65139 7.475 3.000453 0.911318 0.381900 46 P
65150 6.275 2.571009 0.953694 1.321801 69 P
65151 6.325 2.372965 0.955008 0.707625 78 P
65152 6.275 2.476368 0.954392 0.465552 82 P
65153 6.275 2.670046 0.958332 0.699299 72 P
65159 6.225 2.477131 0.956653 0.662634 63 P
65160 6.225 2.448866 0.958939 0.245831 71 P
65165 6.325 2.734199 0.954546 0.530654 72 P
65166 6.225 2.636894 0.957139 0.542227 101 P
65169 6.475 2.443929 0.957201 0.834468 89 P
65172 6.375 1.949254 0.955760 0.880012 34 P

10000 rows × 6 columns


In [15]:
small.columns = ['energy', 'charge', 'zenith', 'chisquared', 'nchannels', 'comp']

In [16]:
small


Out[16]:
energy charge zenith chisquared nchannels comp
1 7.725 3.051990 0.964200 0.561476 109 P
9 7.675 3.295126 0.962817 0.384772 176 P
19 7.525 3.579235 0.928059 0.518567 103 P
27 7.575 3.681950 0.928684 0.517789 185 P
49 6.975 2.839009 0.917719 0.615560 99 P
73 6.375 2.062633 0.945604 0.451978 47 P
75 6.425 2.406748 0.947448 0.498216 72 P
77 6.275 2.059584 0.944470 0.557761 68 P
78 6.325 2.519856 0.946402 0.300512 69 P
81 6.275 2.757371 0.945170 1.078451 83 P

In [ ]: