In [1]:
%load_ext watermark
%watermark -u -d -v -p numpy,matplotlib,scipy,pandas,sklearn,mlxtend


last updated: 2017-04-27 

CPython 2.7.10
IPython 5.3.0

numpy 1.12.1
matplotlib 2.0.0
scipy 0.19.0
pandas 0.19.2
sklearn 0.18.1
mlxtend 0.6.0

In [1]:
%matplotlib notebook
from __future__ import division, print_function
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn.apionly as sns
from mlxtend.plotting import plot_decision_regions

import comptools as comp
import comptools.analysis.plotting as plotting
    
color_dict = comp.analysis.get_color_dict()


/home/jbourbeau/.virtualenvs/composition/lib/python2.7/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)

In [3]:
df_sim = comp.load_dataframe(datatype='sim')


sim quality cut event flow:
             IceTopQualityCuts:    1.0    1.0
         lap_InIce_containment:  0.776  0.776
              InIceQualityCuts:  0.786   0.75
                 num_hits_1_60:  0.999   0.75
             reco_energy_range:  0.837  0.623
                 IceTop_charge:  0.896  0.562


/home/jbourbeau/cr-composition/comptools/dataframe_functions.py:145: RuntimeWarning: divide by zero encountered in log10
  df['log_IceTop_charge_175m'] = np.log10(df['IceTop_charge_175m'])

In [6]:
xkey = 'log_dEdX'
ykey = 'avg_inice_radius_1_60'
xres = 0.5
yres = 0.5

Get plotting feature array for xkey and ykey values


In [23]:
x_min = df_sim[xkey].min()
x_max = df_sim[xkey].max()
y_min = df_sim[ykey].min()
y_max = df_sim[ykey].max()
x_array = np.arange(x_min, x_max, xres)
y_array = np.arange(y_min, y_max, yres)
xx1, xx2 = np.meshgrid(x_array, y_array)
print(xx1.shape)
X = np.array([xx1.ravel(), xx2.ravel()]).T


(396, 9)

In [16]:
X


Out[16]:
array([[ -5.76903219e-01,   1.74552948e+01],
       [ -7.69032188e-02,   1.74552948e+01],
       [  4.23096781e-01,   1.74552948e+01],
       ..., 
       [  2.42309678e+00,   2.14955295e+02],
       [  2.92309678e+00,   2.14955295e+02],
       [  3.42309678e+00,   2.14955295e+02]])

In [17]:
df_temp = pd.DataFrame({xkey: X[:, 0], ykey: X[:, 1]}, columns=[xkey, ykey])
df_temp


Out[17]:
log_dEdX avg_inice_radius_1_60
0 -0.576903 17.455295
1 -0.076903 17.455295
2 0.423097 17.455295
3 0.923097 17.455295
4 1.423097 17.455295
5 1.923097 17.455295
6 2.423097 17.455295
7 2.923097 17.455295
8 3.423097 17.455295
9 -0.576903 17.955295
10 -0.076903 17.955295
11 0.423097 17.955295
12 0.923097 17.955295
13 1.423097 17.955295
14 1.923097 17.955295
15 2.423097 17.955295
16 2.923097 17.955295
17 3.423097 17.955295
18 -0.576903 18.455295
19 -0.076903 18.455295
20 0.423097 18.455295
21 0.923097 18.455295
22 1.423097 18.455295
23 1.923097 18.455295
24 2.423097 18.455295
25 2.923097 18.455295
26 3.423097 18.455295
27 -0.576903 18.955295
28 -0.076903 18.955295
29 0.423097 18.955295
... ... ...
3534 2.423097 213.455295
3535 2.923097 213.455295
3536 3.423097 213.455295
3537 -0.576903 213.955295
3538 -0.076903 213.955295
3539 0.423097 213.955295
3540 0.923097 213.955295
3541 1.423097 213.955295
3542 1.923097 213.955295
3543 2.423097 213.955295
3544 2.923097 213.955295
3545 3.423097 213.955295
3546 -0.576903 214.455295
3547 -0.076903 214.455295
3548 0.423097 214.455295
3549 0.923097 214.455295
3550 1.423097 214.455295
3551 1.923097 214.455295
3552 2.423097 214.455295
3553 2.923097 214.455295
3554 3.423097 214.455295
3555 -0.576903 214.955295
3556 -0.076903 214.955295
3557 0.423097 214.955295
3558 0.923097 214.955295
3559 1.423097 214.955295
3560 1.923097 214.955295
3561 2.423097 214.955295
3562 2.923097 214.955295
3563 3.423097 214.955295

3564 rows × 2 columns


In [18]:
training_features = ['lap_log_energy', 'log_dEdX', 'avg_inice_radius_1_60', 'lap_cos_zenith']
feature_dict =  {'lap_log_energy': 7.5, 'lap_cos_zenith': 9.5}

In [19]:
for key, value in feature_dict.iteritems():
    df_temp[key] = value
df_temp


Out[19]:
log_dEdX avg_inice_radius_1_60 lap_cos_zenith lap_log_energy
0 -0.576903 17.455295 9.5 7.5
1 -0.076903 17.455295 9.5 7.5
2 0.423097 17.455295 9.5 7.5
3 0.923097 17.455295 9.5 7.5
4 1.423097 17.455295 9.5 7.5
5 1.923097 17.455295 9.5 7.5
6 2.423097 17.455295 9.5 7.5
7 2.923097 17.455295 9.5 7.5
8 3.423097 17.455295 9.5 7.5
9 -0.576903 17.955295 9.5 7.5
10 -0.076903 17.955295 9.5 7.5
11 0.423097 17.955295 9.5 7.5
12 0.923097 17.955295 9.5 7.5
13 1.423097 17.955295 9.5 7.5
14 1.923097 17.955295 9.5 7.5
15 2.423097 17.955295 9.5 7.5
16 2.923097 17.955295 9.5 7.5
17 3.423097 17.955295 9.5 7.5
18 -0.576903 18.455295 9.5 7.5
19 -0.076903 18.455295 9.5 7.5
20 0.423097 18.455295 9.5 7.5
21 0.923097 18.455295 9.5 7.5
22 1.423097 18.455295 9.5 7.5
23 1.923097 18.455295 9.5 7.5
24 2.423097 18.455295 9.5 7.5
25 2.923097 18.455295 9.5 7.5
26 3.423097 18.455295 9.5 7.5
27 -0.576903 18.955295 9.5 7.5
28 -0.076903 18.955295 9.5 7.5
29 0.423097 18.955295 9.5 7.5
... ... ... ... ...
3534 2.423097 213.455295 9.5 7.5
3535 2.923097 213.455295 9.5 7.5
3536 3.423097 213.455295 9.5 7.5
3537 -0.576903 213.955295 9.5 7.5
3538 -0.076903 213.955295 9.5 7.5
3539 0.423097 213.955295 9.5 7.5
3540 0.923097 213.955295 9.5 7.5
3541 1.423097 213.955295 9.5 7.5
3542 1.923097 213.955295 9.5 7.5
3543 2.423097 213.955295 9.5 7.5
3544 2.923097 213.955295 9.5 7.5
3545 3.423097 213.955295 9.5 7.5
3546 -0.576903 214.455295 9.5 7.5
3547 -0.076903 214.455295 9.5 7.5
3548 0.423097 214.455295 9.5 7.5
3549 0.923097 214.455295 9.5 7.5
3550 1.423097 214.455295 9.5 7.5
3551 1.923097 214.455295 9.5 7.5
3552 2.423097 214.455295 9.5 7.5
3553 2.923097 214.455295 9.5 7.5
3554 3.423097 214.455295 9.5 7.5
3555 -0.576903 214.955295 9.5 7.5
3556 -0.076903 214.955295 9.5 7.5
3557 0.423097 214.955295 9.5 7.5
3558 0.923097 214.955295 9.5 7.5
3559 1.423097 214.955295 9.5 7.5
3560 1.923097 214.955295 9.5 7.5
3561 2.423097 214.955295 9.5 7.5
3562 2.923097 214.955295 9.5 7.5
3563 3.423097 214.955295 9.5 7.5

3564 rows × 4 columns


In [28]:
df_temp = df_temp[training_features]
df_temp


Out[28]:
lap_log_energy log_dEdX avg_inice_radius_1_60 lap_cos_zenith
0 7.5 -0.576903 17.455295 9.5
1 7.5 -0.076903 17.455295 9.5
2 7.5 0.423097 17.455295 9.5
3 7.5 0.923097 17.455295 9.5
4 7.5 1.423097 17.455295 9.5
5 7.5 1.923097 17.455295 9.5
6 7.5 2.423097 17.455295 9.5
7 7.5 2.923097 17.455295 9.5
8 7.5 3.423097 17.455295 9.5
9 7.5 -0.576903 17.955295 9.5
10 7.5 -0.076903 17.955295 9.5
11 7.5 0.423097 17.955295 9.5
12 7.5 0.923097 17.955295 9.5
13 7.5 1.423097 17.955295 9.5
14 7.5 1.923097 17.955295 9.5
15 7.5 2.423097 17.955295 9.5
16 7.5 2.923097 17.955295 9.5
17 7.5 3.423097 17.955295 9.5
18 7.5 -0.576903 18.455295 9.5
19 7.5 -0.076903 18.455295 9.5
20 7.5 0.423097 18.455295 9.5
21 7.5 0.923097 18.455295 9.5
22 7.5 1.423097 18.455295 9.5
23 7.5 1.923097 18.455295 9.5
24 7.5 2.423097 18.455295 9.5
25 7.5 2.923097 18.455295 9.5
26 7.5 3.423097 18.455295 9.5
27 7.5 -0.576903 18.955295 9.5
28 7.5 -0.076903 18.955295 9.5
29 7.5 0.423097 18.955295 9.5
... ... ... ... ...
3534 7.5 2.423097 213.455295 9.5
3535 7.5 2.923097 213.455295 9.5
3536 7.5 3.423097 213.455295 9.5
3537 7.5 -0.576903 213.955295 9.5
3538 7.5 -0.076903 213.955295 9.5
3539 7.5 0.423097 213.955295 9.5
3540 7.5 0.923097 213.955295 9.5
3541 7.5 1.423097 213.955295 9.5
3542 7.5 1.923097 213.955295 9.5
3543 7.5 2.423097 213.955295 9.5
3544 7.5 2.923097 213.955295 9.5
3545 7.5 3.423097 213.955295 9.5
3546 7.5 -0.576903 214.455295 9.5
3547 7.5 -0.076903 214.455295 9.5
3548 7.5 0.423097 214.455295 9.5
3549 7.5 0.923097 214.455295 9.5
3550 7.5 1.423097 214.455295 9.5
3551 7.5 1.923097 214.455295 9.5
3552 7.5 2.423097 214.455295 9.5
3553 7.5 2.923097 214.455295 9.5
3554 7.5 3.423097 214.455295 9.5
3555 7.5 -0.576903 214.955295 9.5
3556 7.5 -0.076903 214.955295 9.5
3557 7.5 0.423097 214.955295 9.5
3558 7.5 0.923097 214.955295 9.5
3559 7.5 1.423097 214.955295 9.5
3560 7.5 1.923097 214.955295 9.5
3561 7.5 2.423097 214.955295 9.5
3562 7.5 2.923097 214.955295 9.5
3563 7.5 3.423097 214.955295 9.5

3564 rows × 4 columns


In [29]:
df_temp['predicted'] = 3.14159
df_temp


Out[29]:
lap_log_energy log_dEdX avg_inice_radius_1_60 lap_cos_zenith predicted
0 7.5 -0.576903 17.455295 9.5 3.14159
1 7.5 -0.076903 17.455295 9.5 3.14159
2 7.5 0.423097 17.455295 9.5 3.14159
3 7.5 0.923097 17.455295 9.5 3.14159
4 7.5 1.423097 17.455295 9.5 3.14159
5 7.5 1.923097 17.455295 9.5 3.14159
6 7.5 2.423097 17.455295 9.5 3.14159
7 7.5 2.923097 17.455295 9.5 3.14159
8 7.5 3.423097 17.455295 9.5 3.14159
9 7.5 -0.576903 17.955295 9.5 3.14159
10 7.5 -0.076903 17.955295 9.5 3.14159
11 7.5 0.423097 17.955295 9.5 3.14159
12 7.5 0.923097 17.955295 9.5 3.14159
13 7.5 1.423097 17.955295 9.5 3.14159
14 7.5 1.923097 17.955295 9.5 3.14159
15 7.5 2.423097 17.955295 9.5 3.14159
16 7.5 2.923097 17.955295 9.5 3.14159
17 7.5 3.423097 17.955295 9.5 3.14159
18 7.5 -0.576903 18.455295 9.5 3.14159
19 7.5 -0.076903 18.455295 9.5 3.14159
20 7.5 0.423097 18.455295 9.5 3.14159
21 7.5 0.923097 18.455295 9.5 3.14159
22 7.5 1.423097 18.455295 9.5 3.14159
23 7.5 1.923097 18.455295 9.5 3.14159
24 7.5 2.423097 18.455295 9.5 3.14159
25 7.5 2.923097 18.455295 9.5 3.14159
26 7.5 3.423097 18.455295 9.5 3.14159
27 7.5 -0.576903 18.955295 9.5 3.14159
28 7.5 -0.076903 18.955295 9.5 3.14159
29 7.5 0.423097 18.955295 9.5 3.14159
... ... ... ... ... ...
3534 7.5 2.423097 213.455295 9.5 3.14159
3535 7.5 2.923097 213.455295 9.5 3.14159
3536 7.5 3.423097 213.455295 9.5 3.14159
3537 7.5 -0.576903 213.955295 9.5 3.14159
3538 7.5 -0.076903 213.955295 9.5 3.14159
3539 7.5 0.423097 213.955295 9.5 3.14159
3540 7.5 0.923097 213.955295 9.5 3.14159
3541 7.5 1.423097 213.955295 9.5 3.14159
3542 7.5 1.923097 213.955295 9.5 3.14159
3543 7.5 2.423097 213.955295 9.5 3.14159
3544 7.5 2.923097 213.955295 9.5 3.14159
3545 7.5 3.423097 213.955295 9.5 3.14159
3546 7.5 -0.576903 214.455295 9.5 3.14159
3547 7.5 -0.076903 214.455295 9.5 3.14159
3548 7.5 0.423097 214.455295 9.5 3.14159
3549 7.5 0.923097 214.455295 9.5 3.14159
3550 7.5 1.423097 214.455295 9.5 3.14159
3551 7.5 1.923097 214.455295 9.5 3.14159
3552 7.5 2.423097 214.455295 9.5 3.14159
3553 7.5 2.923097 214.455295 9.5 3.14159
3554 7.5 3.423097 214.455295 9.5 3.14159
3555 7.5 -0.576903 214.955295 9.5 3.14159
3556 7.5 -0.076903 214.955295 9.5 3.14159
3557 7.5 0.423097 214.955295 9.5 3.14159
3558 7.5 0.923097 214.955295 9.5 3.14159
3559 7.5 1.423097 214.955295 9.5 3.14159
3560 7.5 1.923097 214.955295 9.5 3.14159
3561 7.5 2.423097 214.955295 9.5 3.14159
3562 7.5 2.923097 214.955295 9.5 3.14159
3563 7.5 3.423097 214.955295 9.5 3.14159

3564 rows × 5 columns


In [30]:
X_predict = df_temp.values
X_predict.shape


Out[30]:
(3564, 5)

In [31]:
Z = df_temp['predicted'].values
Z


Out[31]:
array([ 3.14159,  3.14159,  3.14159, ...,  3.14159,  3.14159,  3.14159])

In [33]:
Z.reshape(xx1.shape)


Out[33]:
(396, 9)

In [34]:
from matplotlib.colors import ListedColormap

In [35]:
ListedColormap(['C0', 'C1'])


Out[35]:
<matplotlib.colors.ListedColormap at 0x7f88eb804510>

In [ ]: