In [1]:
import sys
sys.path.append('/home/jbourbeau/cr-composition')
sys.path


Out[1]:
['',
 '/cvmfs/icecube.opensciencegrid.org/py2-v1/RHEL_6_x86_64/lib/python2.7/site-packages/setuptools-15.2-py2.7.egg',
 '/cvmfs/icecube.opensciencegrid.org/py2-v1/RHEL_6_x86_64/lib/python2.7/site-packages/setuptools-15.2-py2.7.egg',
 '/home/jbourbeau/.local/lib/python2.7/site-packages',
 '/cvmfs/icecube.opensciencegrid.org/py2-v1/RHEL_6_x86_64/i3ports/root-v5.34.18/lib',
 '/cvmfs/icecube.opensciencegrid.org/py2-v1/RHEL_6_x86_64/lib/python2.7/site-packages',
 '/cvmfs/icecube.opensciencegrid.org/py2-v1/RHEL_6_x86_64/i3ports/lib/python2.7/site-packages',
 '/data/user/jbourbeau/metaprojects/icerec/V05-00-00/build/lib',
 '/home/jbourbeau/cr-composition/analysis',
 '/home/jbourbeau',
 '/home/jbourbeau/useful',
 '/home/jbourbeau/anisotropy',
 '/home/jbourbeau/ShowerLLH_scripts',
 '/cvmfs/icecube.opensciencegrid.org/py2-v1/RHEL_6_x86_64/lib/python27.zip',
 '/cvmfs/icecube.opensciencegrid.org/py2-v1/RHEL_6_x86_64/lib/python2.7',
 '/cvmfs/icecube.opensciencegrid.org/py2-v1/RHEL_6_x86_64/lib/python2.7/plat-linux2',
 '/cvmfs/icecube.opensciencegrid.org/py2-v1/RHEL_6_x86_64/lib/python2.7/lib-tk',
 '/cvmfs/icecube.opensciencegrid.org/py2-v1/RHEL_6_x86_64/lib/python2.7/lib-old',
 '/cvmfs/icecube.opensciencegrid.org/py2-v1/RHEL_6_x86_64/lib/python2.7/lib-dynload',
 '/home/jbourbeau/.local/lib/python2.7/site-packages/IPython/extensions',
 '/home/jbourbeau/.ipython',
 '/home/jbourbeau/cr-composition']

In [2]:
import numpy as np
import pandas as pd
from pandas.tools.plotting import radviz
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import seaborn.apionly as sns

from sklearn.metrics import accuracy_score
from sklearn.model_selection import validation_curve, GridSearchCV, cross_val_score, ParameterGrid
from sklearn.neighbors import KNeighborsClassifier

import composition as comp

%matplotlib inline


/home/jbourbeau/.local/lib/python2.7/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.
  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')

In [3]:
sns.set_palette('muted')
sns.set_color_codes()

In [4]:
df, cut_dict = comp.load_sim(return_cut_dict=True)
selection_mask = np.array([True] * len(df))
standard_cut_keys = ['lap_reco_success', 'lap_zenith', 'num_hits_1_30', 'IT_signal',
                     'StationDensity', 'max_qfrac_1_30', 'lap_containment', 'energy_range_lap']
for key in standard_cut_keys:
    selection_mask *= cut_dict[key]

df = df[selection_mask]

feature_list, feature_labels = comp.get_training_features()
print(feature_list)
X_train, X_test, y_train, y_test, le = comp.get_train_test_sets(
    df, feature_list)

print('number training events = ' + str(y_train.shape[0]))


/home/jbourbeau/cr-composition/composition/load_sim.py:105: RuntimeWarning: divide by zero encountered in log10
  df['log_NChannels_1_30'] = np.nan_to_num(np.log10(df['NChannels_1_30']))
['lap_log_energy', 'InIce_log_charge_1_30', 'lap_cos_zenith', 'NChannels_1_30', 'log_s125']
number training events = 73257

In [15]:
radviz?

In [6]:
plt.figure()
feature_list = ['lap_log_energy', 'InIce_log_charge_1_30', 'lap_cos_zenith', 'lap_chi2', 'log_NChannels_1_30']
tmp = df[feature_list+['MC_comp']]
tmp.columns = ['energy', 'charge', 'zenith', '$\chi^2$', '$\mathrm{N}_{\mathrm{channels}}$', 'comp']
opts = {'alpha': 0.5}
radviz(tmp.sample(2000), 'comp', color=['b', 'g', 'r'], **opts)
plt.legend(title='True composition')


Out[6]:
<matplotlib.legend.Legend at 0xf313f10>

In [17]:
fig, ax = plt.subplots()
feature_list = ['lap_log_energy', 'InIce_log_charge_1_30', 'lap_chi2', 'log_NChannels_1_30']
tmp = df[feature_list+['MC_comp']]
tmp.columns = ['energy', 'charge', '$\chi^2$', '$\mathrm{N}_{\mathrm{channels}}$', 'comp']
opts = {'alpha': 0.5}
radviz(tmp.sample(5000), 'comp', color=['b', 'g', 'r'], ax=ax, **opts)
leg = plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.05),
          ncol=3, fancybox=False, shadow=False)
ax.set_xlim([-1.5, 1.5])
# ax.set_ylim([-1.5, 1.5])
plt.show()



In [22]:
plt.figure()
feature_list = ['InIce_log_charge_1_30', 'lap_chi2', 'log_NChannels_1_30']
tmp = df[feature_list+['MC_comp']]
tmp.columns = ['charge', 'chisquared', 'nchannels','comp']
opts = {'alpha': 0.75}
radviz(tmp.sample(10000), 'comp', color=['b', 'g', 'r'], **opts)


Out[22]:
<matplotlib.axes._subplots.AxesSubplot at 0xd134d90>

In [24]:
feature_list = ['lap_log_energy', 'InIce_log_charge_1_30', 'lap_cos_zenith', 'lap_chi2', 'log_NChannels_1_30']
tmp = df[feature_list+['MC_comp']]
tmp.columns = ['energy', 'charge', 'zenith', 'chisquared', 'nchannels', 'comp']
tmp.sample(frac=1)


Out[24]:
energy charge zenith chisquared nchannels comp
410625 6.397034 2.582136 0.903370 0.018033 1.556303 He
150495 6.989937 3.040066 0.998077 0.459173 2.212188 Fe
168491 6.375964 2.810805 0.876818 1.077457 1.612784 Fe
310862 6.777805 2.582845 0.977905 0.018323 1.662758 He
477876 7.157385 3.282042 0.930907 0.015347 2.120574 He
202219 7.828750 3.303651 0.905371 1.051695 2.041393 Fe
65381 6.451769 2.437444 0.974462 0.536076 1.954243 P
9738 6.248129 2.613261 0.977961 0.962385 1.819544 P
460497 7.750908 3.392241 0.967225 0.004413 1.991226 He
225685 6.417087 3.057016 0.909740 0.529079 2.000000 Fe
288323 6.259881 2.782538 0.997396 1.296459 1.892095 Fe
312338 7.021274 3.385866 0.936394 0.008081 2.008600 He
682676 7.943206 3.759114 0.941402 0.005726 2.367356 He
395070 7.206418 2.981299 0.971848 0.005997 2.190332 He
230632 6.354967 2.553778 0.889854 0.621153 1.851258 Fe
335190 7.808167 3.407286 0.925869 0.004090 2.064458 He
165148 6.935387 3.874148 0.998741 0.544677 2.264818 Fe
666833 7.469054 3.509535 0.997292 0.007213 2.206826 He
439029 7.485090 3.095054 0.948641 0.006108 1.892095 He
359011 7.409304 3.031690 0.901930 0.011204 1.919078 He
203077 7.591679 4.049980 0.989314 0.424587 2.397940 Fe
226766 6.279322 2.531278 0.999118 0.848731 1.954243 Fe
478497 6.409206 2.348780 0.933751 0.020602 1.913814 He
751112 7.378921 3.698391 0.979064 0.006050 2.255273 He
179042 6.578899 2.714567 0.990158 0.875237 2.060698 Fe
407339 7.246759 3.466579 0.995805 0.008744 2.107210 He
35980 6.439591 2.274554 0.985640 0.572963 1.924279 P
418578 7.737861 3.888848 0.982745 0.006232 2.551450 He
472418 7.029702 2.966702 0.984996 0.012064 2.309630 He
72605 6.616210 1.863344 0.939302 0.908468 1.662758 P
... ... ... ... ... ... ...
540438 7.635235 3.344949 0.916680 0.371357 2.068186 P
745381 7.224881 3.543076 0.946783 0.008685 2.209515 He
636034 7.694195 3.208489 0.875337 0.563716 1.845098 Fe
689572 7.887085 3.582302 0.986074 0.004891 2.357935 He
197066 6.389286 2.821031 0.991168 0.871667 2.220108 Fe
538707 7.120678 3.652542 0.953604 0.720714 2.193125 P
266672 6.606214 2.834531 0.886157 0.701748 1.832509 Fe
319847 6.294831 1.875603 0.910515 0.016761 1.146128 He
141390 7.361120 2.967779 0.929718 0.549327 2.012837 P
340039 6.457139 2.406038 0.983427 0.015175 1.913814 He
426059 7.891147 4.143046 0.943648 0.005663 2.264818 He
591808 7.727805 3.578130 0.990121 0.726145 2.245513 Fe
704250 7.197488 3.609758 0.967279 0.012840 2.285557 He
315139 6.868255 2.913922 0.983329 0.010590 2.049218 He
280997 6.213838 2.408053 0.926214 0.666286 1.799341 Fe
354127 6.984879 3.164351 0.937214 0.010250 1.944483 He
201615 6.630084 2.876483 0.957543 0.649181 2.152288 Fe
702213 7.741076 3.590199 0.970695 0.006664 2.326336 He
236064 7.929628 3.901189 0.930726 0.389490 2.232996 Fe
177427 7.312445 3.582322 0.987446 0.460195 2.324282 Fe
767748 7.319944 3.342337 0.945847 0.009085 1.857332 He
128234 7.164379 3.139925 0.998584 0.418169 2.075547 P
750497 7.413582 3.334821 0.999974 0.006774 2.336460 He
35284 6.260259 2.113491 0.954701 0.476327 1.755875 P
154305 6.629496 2.921224 0.985331 0.775961 2.096910 Fe
136074 7.209143 2.876873 0.893183 0.667331 1.863323 P
656771 7.184678 3.439246 0.984420 0.014684 2.396199 He
54070 7.944360 3.829021 0.991293 0.451271 2.534026 P
484171 6.335288 2.416516 0.997545 0.017997 1.954243 He
5286 6.993968 2.093272 0.860772 0.583192 1.278754 P

89659 rows × 6 columns


In [15]:
small.columns = ['energy', 'charge', 'zenith', 'chisquared', 'nchannels', 'comp']

In [16]:
small


Out[16]:
energy charge zenith chisquared nchannels comp
1 7.725 3.051990 0.964200 0.561476 109 P
9 7.675 3.295126 0.962817 0.384772 176 P
19 7.525 3.579235 0.928059 0.518567 103 P
27 7.575 3.681950 0.928684 0.517789 185 P
49 6.975 2.839009 0.917719 0.615560 99 P
73 6.375 2.062633 0.945604 0.451978 47 P
75 6.425 2.406748 0.947448 0.498216 72 P
77 6.275 2.059584 0.944470 0.557761 68 P
78 6.325 2.519856 0.946402 0.300512 69 P
81 6.275 2.757371 0.945170 1.078451 83 P

In [ ]: