In [1]:
%load_ext watermark
%watermark -u -d -v -p numpy,matplotlib,scipy,pandas,sklearn,mlxtend
last updated: 2017-04-27
CPython 2.7.10
IPython 5.3.0
numpy 1.12.1
matplotlib 2.0.0
scipy 0.19.0
pandas 0.19.2
sklearn 0.18.1
mlxtend 0.6.0
In [1]:
%matplotlib notebook
from __future__ import division, print_function
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn.apionly as sns
from mlxtend.plotting import plot_decision_regions
import comptools as comp
import comptools.analysis.plotting as plotting
color_dict = comp.analysis.get_color_dict()
/home/jbourbeau/.virtualenvs/composition/lib/python2.7/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
"This module will be removed in 0.20.", DeprecationWarning)
In [3]:
df_sim = comp.load_dataframe(datatype='sim')
sim quality cut event flow:
IceTopQualityCuts: 1.0 1.0
lap_InIce_containment: 0.776 0.776
InIceQualityCuts: 0.786 0.75
num_hits_1_60: 0.999 0.75
reco_energy_range: 0.837 0.623
IceTop_charge: 0.896 0.562
/home/jbourbeau/cr-composition/comptools/dataframe_functions.py:145: RuntimeWarning: divide by zero encountered in log10
df['log_IceTop_charge_175m'] = np.log10(df['IceTop_charge_175m'])
In [6]:
xkey = 'log_dEdX'
ykey = 'avg_inice_radius_1_60'
xres = 0.5
yres = 0.5
Get plotting feature array for xkey and ykey values
In [23]:
x_min = df_sim[xkey].min()
x_max = df_sim[xkey].max()
y_min = df_sim[ykey].min()
y_max = df_sim[ykey].max()
x_array = np.arange(x_min, x_max, xres)
y_array = np.arange(y_min, y_max, yres)
xx1, xx2 = np.meshgrid(x_array, y_array)
print(xx1.shape)
X = np.array([xx1.ravel(), xx2.ravel()]).T
(396, 9)
In [16]:
X
Out[16]:
array([[ -5.76903219e-01, 1.74552948e+01],
[ -7.69032188e-02, 1.74552948e+01],
[ 4.23096781e-01, 1.74552948e+01],
...,
[ 2.42309678e+00, 2.14955295e+02],
[ 2.92309678e+00, 2.14955295e+02],
[ 3.42309678e+00, 2.14955295e+02]])
In [17]:
df_temp = pd.DataFrame({xkey: X[:, 0], ykey: X[:, 1]}, columns=[xkey, ykey])
df_temp
Out[17]:
log_dEdX
avg_inice_radius_1_60
0
-0.576903
17.455295
1
-0.076903
17.455295
2
0.423097
17.455295
3
0.923097
17.455295
4
1.423097
17.455295
5
1.923097
17.455295
6
2.423097
17.455295
7
2.923097
17.455295
8
3.423097
17.455295
9
-0.576903
17.955295
10
-0.076903
17.955295
11
0.423097
17.955295
12
0.923097
17.955295
13
1.423097
17.955295
14
1.923097
17.955295
15
2.423097
17.955295
16
2.923097
17.955295
17
3.423097
17.955295
18
-0.576903
18.455295
19
-0.076903
18.455295
20
0.423097
18.455295
21
0.923097
18.455295
22
1.423097
18.455295
23
1.923097
18.455295
24
2.423097
18.455295
25
2.923097
18.455295
26
3.423097
18.455295
27
-0.576903
18.955295
28
-0.076903
18.955295
29
0.423097
18.955295
...
...
...
3534
2.423097
213.455295
3535
2.923097
213.455295
3536
3.423097
213.455295
3537
-0.576903
213.955295
3538
-0.076903
213.955295
3539
0.423097
213.955295
3540
0.923097
213.955295
3541
1.423097
213.955295
3542
1.923097
213.955295
3543
2.423097
213.955295
3544
2.923097
213.955295
3545
3.423097
213.955295
3546
-0.576903
214.455295
3547
-0.076903
214.455295
3548
0.423097
214.455295
3549
0.923097
214.455295
3550
1.423097
214.455295
3551
1.923097
214.455295
3552
2.423097
214.455295
3553
2.923097
214.455295
3554
3.423097
214.455295
3555
-0.576903
214.955295
3556
-0.076903
214.955295
3557
0.423097
214.955295
3558
0.923097
214.955295
3559
1.423097
214.955295
3560
1.923097
214.955295
3561
2.423097
214.955295
3562
2.923097
214.955295
3563
3.423097
214.955295
3564 rows × 2 columns
In [18]:
training_features = ['lap_log_energy', 'log_dEdX', 'avg_inice_radius_1_60', 'lap_cos_zenith']
feature_dict = {'lap_log_energy': 7.5, 'lap_cos_zenith': 9.5}
In [19]:
for key, value in feature_dict.iteritems():
df_temp[key] = value
df_temp
Out[19]:
log_dEdX
avg_inice_radius_1_60
lap_cos_zenith
lap_log_energy
0
-0.576903
17.455295
9.5
7.5
1
-0.076903
17.455295
9.5
7.5
2
0.423097
17.455295
9.5
7.5
3
0.923097
17.455295
9.5
7.5
4
1.423097
17.455295
9.5
7.5
5
1.923097
17.455295
9.5
7.5
6
2.423097
17.455295
9.5
7.5
7
2.923097
17.455295
9.5
7.5
8
3.423097
17.455295
9.5
7.5
9
-0.576903
17.955295
9.5
7.5
10
-0.076903
17.955295
9.5
7.5
11
0.423097
17.955295
9.5
7.5
12
0.923097
17.955295
9.5
7.5
13
1.423097
17.955295
9.5
7.5
14
1.923097
17.955295
9.5
7.5
15
2.423097
17.955295
9.5
7.5
16
2.923097
17.955295
9.5
7.5
17
3.423097
17.955295
9.5
7.5
18
-0.576903
18.455295
9.5
7.5
19
-0.076903
18.455295
9.5
7.5
20
0.423097
18.455295
9.5
7.5
21
0.923097
18.455295
9.5
7.5
22
1.423097
18.455295
9.5
7.5
23
1.923097
18.455295
9.5
7.5
24
2.423097
18.455295
9.5
7.5
25
2.923097
18.455295
9.5
7.5
26
3.423097
18.455295
9.5
7.5
27
-0.576903
18.955295
9.5
7.5
28
-0.076903
18.955295
9.5
7.5
29
0.423097
18.955295
9.5
7.5
...
...
...
...
...
3534
2.423097
213.455295
9.5
7.5
3535
2.923097
213.455295
9.5
7.5
3536
3.423097
213.455295
9.5
7.5
3537
-0.576903
213.955295
9.5
7.5
3538
-0.076903
213.955295
9.5
7.5
3539
0.423097
213.955295
9.5
7.5
3540
0.923097
213.955295
9.5
7.5
3541
1.423097
213.955295
9.5
7.5
3542
1.923097
213.955295
9.5
7.5
3543
2.423097
213.955295
9.5
7.5
3544
2.923097
213.955295
9.5
7.5
3545
3.423097
213.955295
9.5
7.5
3546
-0.576903
214.455295
9.5
7.5
3547
-0.076903
214.455295
9.5
7.5
3548
0.423097
214.455295
9.5
7.5
3549
0.923097
214.455295
9.5
7.5
3550
1.423097
214.455295
9.5
7.5
3551
1.923097
214.455295
9.5
7.5
3552
2.423097
214.455295
9.5
7.5
3553
2.923097
214.455295
9.5
7.5
3554
3.423097
214.455295
9.5
7.5
3555
-0.576903
214.955295
9.5
7.5
3556
-0.076903
214.955295
9.5
7.5
3557
0.423097
214.955295
9.5
7.5
3558
0.923097
214.955295
9.5
7.5
3559
1.423097
214.955295
9.5
7.5
3560
1.923097
214.955295
9.5
7.5
3561
2.423097
214.955295
9.5
7.5
3562
2.923097
214.955295
9.5
7.5
3563
3.423097
214.955295
9.5
7.5
3564 rows × 4 columns
In [28]:
df_temp = df_temp[training_features]
df_temp
Out[28]:
lap_log_energy
log_dEdX
avg_inice_radius_1_60
lap_cos_zenith
0
7.5
-0.576903
17.455295
9.5
1
7.5
-0.076903
17.455295
9.5
2
7.5
0.423097
17.455295
9.5
3
7.5
0.923097
17.455295
9.5
4
7.5
1.423097
17.455295
9.5
5
7.5
1.923097
17.455295
9.5
6
7.5
2.423097
17.455295
9.5
7
7.5
2.923097
17.455295
9.5
8
7.5
3.423097
17.455295
9.5
9
7.5
-0.576903
17.955295
9.5
10
7.5
-0.076903
17.955295
9.5
11
7.5
0.423097
17.955295
9.5
12
7.5
0.923097
17.955295
9.5
13
7.5
1.423097
17.955295
9.5
14
7.5
1.923097
17.955295
9.5
15
7.5
2.423097
17.955295
9.5
16
7.5
2.923097
17.955295
9.5
17
7.5
3.423097
17.955295
9.5
18
7.5
-0.576903
18.455295
9.5
19
7.5
-0.076903
18.455295
9.5
20
7.5
0.423097
18.455295
9.5
21
7.5
0.923097
18.455295
9.5
22
7.5
1.423097
18.455295
9.5
23
7.5
1.923097
18.455295
9.5
24
7.5
2.423097
18.455295
9.5
25
7.5
2.923097
18.455295
9.5
26
7.5
3.423097
18.455295
9.5
27
7.5
-0.576903
18.955295
9.5
28
7.5
-0.076903
18.955295
9.5
29
7.5
0.423097
18.955295
9.5
...
...
...
...
...
3534
7.5
2.423097
213.455295
9.5
3535
7.5
2.923097
213.455295
9.5
3536
7.5
3.423097
213.455295
9.5
3537
7.5
-0.576903
213.955295
9.5
3538
7.5
-0.076903
213.955295
9.5
3539
7.5
0.423097
213.955295
9.5
3540
7.5
0.923097
213.955295
9.5
3541
7.5
1.423097
213.955295
9.5
3542
7.5
1.923097
213.955295
9.5
3543
7.5
2.423097
213.955295
9.5
3544
7.5
2.923097
213.955295
9.5
3545
7.5
3.423097
213.955295
9.5
3546
7.5
-0.576903
214.455295
9.5
3547
7.5
-0.076903
214.455295
9.5
3548
7.5
0.423097
214.455295
9.5
3549
7.5
0.923097
214.455295
9.5
3550
7.5
1.423097
214.455295
9.5
3551
7.5
1.923097
214.455295
9.5
3552
7.5
2.423097
214.455295
9.5
3553
7.5
2.923097
214.455295
9.5
3554
7.5
3.423097
214.455295
9.5
3555
7.5
-0.576903
214.955295
9.5
3556
7.5
-0.076903
214.955295
9.5
3557
7.5
0.423097
214.955295
9.5
3558
7.5
0.923097
214.955295
9.5
3559
7.5
1.423097
214.955295
9.5
3560
7.5
1.923097
214.955295
9.5
3561
7.5
2.423097
214.955295
9.5
3562
7.5
2.923097
214.955295
9.5
3563
7.5
3.423097
214.955295
9.5
3564 rows × 4 columns
In [29]:
df_temp['predicted'] = 3.14159
df_temp
Out[29]:
lap_log_energy
log_dEdX
avg_inice_radius_1_60
lap_cos_zenith
predicted
0
7.5
-0.576903
17.455295
9.5
3.14159
1
7.5
-0.076903
17.455295
9.5
3.14159
2
7.5
0.423097
17.455295
9.5
3.14159
3
7.5
0.923097
17.455295
9.5
3.14159
4
7.5
1.423097
17.455295
9.5
3.14159
5
7.5
1.923097
17.455295
9.5
3.14159
6
7.5
2.423097
17.455295
9.5
3.14159
7
7.5
2.923097
17.455295
9.5
3.14159
8
7.5
3.423097
17.455295
9.5
3.14159
9
7.5
-0.576903
17.955295
9.5
3.14159
10
7.5
-0.076903
17.955295
9.5
3.14159
11
7.5
0.423097
17.955295
9.5
3.14159
12
7.5
0.923097
17.955295
9.5
3.14159
13
7.5
1.423097
17.955295
9.5
3.14159
14
7.5
1.923097
17.955295
9.5
3.14159
15
7.5
2.423097
17.955295
9.5
3.14159
16
7.5
2.923097
17.955295
9.5
3.14159
17
7.5
3.423097
17.955295
9.5
3.14159
18
7.5
-0.576903
18.455295
9.5
3.14159
19
7.5
-0.076903
18.455295
9.5
3.14159
20
7.5
0.423097
18.455295
9.5
3.14159
21
7.5
0.923097
18.455295
9.5
3.14159
22
7.5
1.423097
18.455295
9.5
3.14159
23
7.5
1.923097
18.455295
9.5
3.14159
24
7.5
2.423097
18.455295
9.5
3.14159
25
7.5
2.923097
18.455295
9.5
3.14159
26
7.5
3.423097
18.455295
9.5
3.14159
27
7.5
-0.576903
18.955295
9.5
3.14159
28
7.5
-0.076903
18.955295
9.5
3.14159
29
7.5
0.423097
18.955295
9.5
3.14159
...
...
...
...
...
...
3534
7.5
2.423097
213.455295
9.5
3.14159
3535
7.5
2.923097
213.455295
9.5
3.14159
3536
7.5
3.423097
213.455295
9.5
3.14159
3537
7.5
-0.576903
213.955295
9.5
3.14159
3538
7.5
-0.076903
213.955295
9.5
3.14159
3539
7.5
0.423097
213.955295
9.5
3.14159
3540
7.5
0.923097
213.955295
9.5
3.14159
3541
7.5
1.423097
213.955295
9.5
3.14159
3542
7.5
1.923097
213.955295
9.5
3.14159
3543
7.5
2.423097
213.955295
9.5
3.14159
3544
7.5
2.923097
213.955295
9.5
3.14159
3545
7.5
3.423097
213.955295
9.5
3.14159
3546
7.5
-0.576903
214.455295
9.5
3.14159
3547
7.5
-0.076903
214.455295
9.5
3.14159
3548
7.5
0.423097
214.455295
9.5
3.14159
3549
7.5
0.923097
214.455295
9.5
3.14159
3550
7.5
1.423097
214.455295
9.5
3.14159
3551
7.5
1.923097
214.455295
9.5
3.14159
3552
7.5
2.423097
214.455295
9.5
3.14159
3553
7.5
2.923097
214.455295
9.5
3.14159
3554
7.5
3.423097
214.455295
9.5
3.14159
3555
7.5
-0.576903
214.955295
9.5
3.14159
3556
7.5
-0.076903
214.955295
9.5
3.14159
3557
7.5
0.423097
214.955295
9.5
3.14159
3558
7.5
0.923097
214.955295
9.5
3.14159
3559
7.5
1.423097
214.955295
9.5
3.14159
3560
7.5
1.923097
214.955295
9.5
3.14159
3561
7.5
2.423097
214.955295
9.5
3.14159
3562
7.5
2.923097
214.955295
9.5
3.14159
3563
7.5
3.423097
214.955295
9.5
3.14159
3564 rows × 5 columns
In [30]:
X_predict = df_temp.values
X_predict.shape
Out[30]:
(3564, 5)
In [31]:
Z = df_temp['predicted'].values
Z
Out[31]:
array([ 3.14159, 3.14159, 3.14159, ..., 3.14159, 3.14159, 3.14159])
In [33]:
Z.reshape(xx1.shape)
Out[33]:
(396, 9)
In [34]:
from matplotlib.colors import ListedColormap
In [35]:
ListedColormap(['C0', 'C1'])
Out[35]:
<matplotlib.colors.ListedColormap at 0x7f88eb804510>
In [ ]:
Content source: jrbourbeau/cr-composition
Similar notebooks: