In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
/Users/kou2k/.pyenv/versions/anaconda3-5.3.0/lib/python3.6/site-packages/matplotlib/__init__.py:886: MatplotlibDeprecationWarning:
examples.directory is deprecated; in the future, examples will be found relative to the 'datapath' directory.
"found relative to the 'datapath' directory.".format(key))
In [3]:
data = pd.read_csv("data/train.csv")
In [4]:
data.head()
Out[4]:
crew
experiment
time
seat
eeg_fp1
eeg_f7
eeg_f8
eeg_t4
eeg_t6
eeg_t5
...
eeg_c4
eeg_p4
eeg_poz
eeg_c3
eeg_cz
eeg_o2
ecg
r
gsr
event
0
1
CA
0.011719
1
-5.28545
26.775801
-9.527310
-12.793200
16.717800
33.737499
...
37.368999
17.437599
19.201900
20.5968
-3.95115
14.507600
-4520.0
817.705994
388.829987
A
1
1
CA
0.015625
1
-2.42842
28.430901
-9.323510
-3.757230
15.969300
30.443600
...
31.170799
19.399700
19.689501
21.3547
1.33212
17.750200
-4520.0
817.705994
388.829987
A
2
1
CA
0.019531
1
10.67150
30.420200
15.350700
24.724001
16.143101
32.142799
...
-12.012600
19.396299
23.171700
22.4076
1.53786
22.247000
-4520.0
817.705994
388.829987
A
3
1
CA
0.023438
1
11.45250
25.609800
2.433080
12.412500
20.533300
31.494101
...
18.574100
23.156401
22.641199
19.3367
2.54492
18.998600
-4520.0
817.705994
388.829987
A
4
1
CA
0.027344
1
7.28321
25.942600
0.113564
5.748000
19.833599
28.753599
...
6.555440
22.754700
22.670300
20.2932
1.69962
22.812799
-4520.0
817.705994
388.829987
A
5 rows × 28 columns
In [5]:
data.describe()
Out[5]:
crew
time
seat
eeg_fp1
eeg_f7
eeg_f8
eeg_t4
eeg_t6
eeg_t5
eeg_t3
...
eeg_f4
eeg_c4
eeg_p4
eeg_poz
eeg_c3
eeg_cz
eeg_o2
ecg
r
gsr
count
4.867421e+06
4.867421e+06
4.867421e+06
4.867421e+06
4.867421e+06
4.867421e+06
4.867421e+06
4.867421e+06
4.867421e+06
4.867421e+06
...
4.867421e+06
4.867421e+06
4.867421e+06
4.867421e+06
4.867421e+06
4.867421e+06
4.867421e+06
4.867421e+06
4.867421e+06
4.867421e+06
mean
5.538783e+00
1.782358e+02
4.999531e-01
3.746336e+00
1.360002e+00
1.213644e+00
7.350926e-02
7.845481e-02
8.675488e-02
2.299909e-01
...
1.208597e+00
6.050047e-01
2.413972e-01
1.947635e-01
6.243715e-01
4.429119e-01
2.393738e-01
5.285460e+03
7.376090e+02
8.518467e+02
std
3.409353e+00
1.039592e+02
5.000000e-01
4.506763e+01
3.518923e+01
3.519242e+01
2.431472e+01
1.803932e+01
1.832606e+01
2.531132e+01
...
4.205516e+01
2.052105e+01
1.660196e+01
1.833801e+01
1.975695e+01
1.974815e+01
2.351859e+01
1.214126e+04
8.187979e+01
5.039324e+02
min
1.000000e+00
3.000000e-03
0.000000e+00
-1.361360e+03
-1.581330e+03
-1.643950e+03
-1.516640e+03
-1.220510e+03
-1.266430e+03
-1.279940e+03
...
-2.333830e+03
-1.212030e+03
-1.228030e+03
-1.229130e+03
-1.230480e+03
-6.962790e+02
-1.176370e+03
-1.858570e+04
4.820600e+02
0.000000e+00
25%
3.000000e+00
8.808100e+01
0.000000e+00
-9.200250e+00
-8.325150e+00
-8.767610e+00
-7.367240e+00
-6.102000e+00
-6.007260e+00
-6.904030e+00
...
-9.306430e+00
-7.495970e+00
-6.713860e+00
-6.774840e+00
-7.161160e+00
-7.817650e+00
-6.526950e+00
-2.550070e+03
6.631430e+02
5.241140e+02
50%
5.000000e+00
1.769297e+02
0.000000e+00
3.819020e-01
4.264100e-02
1.140390e-01
0.000000e+00
0.000000e+00
0.000000e+00
0.000000e+00
...
5.667500e-02
0.000000e+00
0.000000e+00
0.000000e+00
0.000000e+00
0.000000e+00
0.000000e+00
5.920510e+03
7.434380e+02
7.701970e+02
75%
7.000000e+00
2.683398e+02
1.000000e+00
1.030610e+01
8.753340e+00
9.282560e+00
7.437780e+00
6.176630e+00
6.086460e+00
7.071460e+00
...
9.775770e+00
7.765670e+00
6.831320e+00
6.905020e+00
7.466520e+00
8.025190e+00
6.615180e+00
1.029010e+04
8.134120e+02
1.197120e+03
max
1.300000e+01
3.603711e+02
1.000000e+00
1.972240e+03
2.048790e+03
2.145710e+03
1.731880e+03
9.009370e+02
1.176540e+03
1.514820e+03
...
2.034170e+03
8.917290e+02
9.080890e+02
1.435800e+03
9.284070e+02
6.136690e+02
2.443550e+03
3.418830e+04
8.401840e+02
1.999860e+03
8 rows × 26 columns
In [9]:
data.isna().sum(axis=0)
# 欠損データはなし
Out[9]:
crew 0
experiment 0
time 0
seat 0
eeg_fp1 0
eeg_f7 0
eeg_f8 0
eeg_t4 0
eeg_t6 0
eeg_t5 0
eeg_t3 0
eeg_fp2 0
eeg_o1 0
eeg_p3 0
eeg_pz 0
eeg_f3 0
eeg_fz 0
eeg_f4 0
eeg_c4 0
eeg_p4 0
eeg_poz 0
eeg_c3 0
eeg_cz 0
eeg_o2 0
ecg 0
r 0
gsr 0
event 0
dtype: int64
In [14]:
data[["crew", "event"]].groupby(["crew"]).count()
# crew is unique id for pair of pilots
Out[14]:
event
crew
1
447652
2
552868
3
552795
4
552881
5
552815
6
552958
7
552769
8
549959
13
552724
In [16]:
data[["experiment", "event"]].groupby(["experiment"]).count()
# eventの状態
# CAは端的に言えば良い状態
#
# SSは非常に驚いた状態
Out[16]:
event
experiment
CA
1658376
DA
1658393
SS
1550652
In [21]:
data.loc[ data["crew"] == 1, "time"]
# どうやらtimeは実験時間を示しているらしい
Out[21]:
0 0.011719
1 0.015625
2 0.019531
3 0.023438
4 0.027344
5 0.031250
6 0.035156
7 0.039062
8 0.042969
9 0.046875
10 0.050781
11 0.054688
12 0.058594
13 0.062500
14 0.066406
15 0.070312
16 0.074219
17 0.078125
18 0.082031
19 0.085938
20 0.089844
21 0.093750
22 0.097656
23 0.101562
24 0.105469
25 0.109375
26 0.113281
27 0.117188
28 0.121094
29 0.125000
...
447622 99.941406
447623 99.941406
447624 99.945312
447625 99.945312
447626 99.949219
447627 99.949219
447628 99.953125
447629 99.953125
447630 99.957031
447631 99.957031
447632 99.960938
447633 99.960938
447634 99.964844
447635 99.964844
447636 99.968750
447637 99.968750
447638 99.972656
447639 99.972656
447640 99.976562
447641 99.976562
447642 99.980469
447643 99.980469
447644 99.984375
447645 99.984375
447646 99.988281
447647 99.988281
447648 99.992188
447649 99.992188
447650 99.996094
447651 99.996094
Name: time, Length: 447652, dtype: float64
In [ ]:
Content source: kousukekikuchi1984/kaggle
Similar notebooks: