In [1]:
import pandas as pd
import numpy as np
import seaborn as sns


/Users/kou2k/.pyenv/versions/anaconda3-5.3.0/lib/python3.6/site-packages/matplotlib/__init__.py:886: MatplotlibDeprecationWarning: 
examples.directory is deprecated; in the future, examples will be found relative to the 'datapath' directory.
  "found relative to the 'datapath' directory.".format(key))

In [3]:
data = pd.read_csv("data/train.csv")

In [4]:
data.head()


Out[4]:
crew experiment time seat eeg_fp1 eeg_f7 eeg_f8 eeg_t4 eeg_t6 eeg_t5 ... eeg_c4 eeg_p4 eeg_poz eeg_c3 eeg_cz eeg_o2 ecg r gsr event
0 1 CA 0.011719 1 -5.28545 26.775801 -9.527310 -12.793200 16.717800 33.737499 ... 37.368999 17.437599 19.201900 20.5968 -3.95115 14.507600 -4520.0 817.705994 388.829987 A
1 1 CA 0.015625 1 -2.42842 28.430901 -9.323510 -3.757230 15.969300 30.443600 ... 31.170799 19.399700 19.689501 21.3547 1.33212 17.750200 -4520.0 817.705994 388.829987 A
2 1 CA 0.019531 1 10.67150 30.420200 15.350700 24.724001 16.143101 32.142799 ... -12.012600 19.396299 23.171700 22.4076 1.53786 22.247000 -4520.0 817.705994 388.829987 A
3 1 CA 0.023438 1 11.45250 25.609800 2.433080 12.412500 20.533300 31.494101 ... 18.574100 23.156401 22.641199 19.3367 2.54492 18.998600 -4520.0 817.705994 388.829987 A
4 1 CA 0.027344 1 7.28321 25.942600 0.113564 5.748000 19.833599 28.753599 ... 6.555440 22.754700 22.670300 20.2932 1.69962 22.812799 -4520.0 817.705994 388.829987 A

5 rows × 28 columns


In [5]:
data.describe()


Out[5]:
crew time seat eeg_fp1 eeg_f7 eeg_f8 eeg_t4 eeg_t6 eeg_t5 eeg_t3 ... eeg_f4 eeg_c4 eeg_p4 eeg_poz eeg_c3 eeg_cz eeg_o2 ecg r gsr
count 4.867421e+06 4.867421e+06 4.867421e+06 4.867421e+06 4.867421e+06 4.867421e+06 4.867421e+06 4.867421e+06 4.867421e+06 4.867421e+06 ... 4.867421e+06 4.867421e+06 4.867421e+06 4.867421e+06 4.867421e+06 4.867421e+06 4.867421e+06 4.867421e+06 4.867421e+06 4.867421e+06
mean 5.538783e+00 1.782358e+02 4.999531e-01 3.746336e+00 1.360002e+00 1.213644e+00 7.350926e-02 7.845481e-02 8.675488e-02 2.299909e-01 ... 1.208597e+00 6.050047e-01 2.413972e-01 1.947635e-01 6.243715e-01 4.429119e-01 2.393738e-01 5.285460e+03 7.376090e+02 8.518467e+02
std 3.409353e+00 1.039592e+02 5.000000e-01 4.506763e+01 3.518923e+01 3.519242e+01 2.431472e+01 1.803932e+01 1.832606e+01 2.531132e+01 ... 4.205516e+01 2.052105e+01 1.660196e+01 1.833801e+01 1.975695e+01 1.974815e+01 2.351859e+01 1.214126e+04 8.187979e+01 5.039324e+02
min 1.000000e+00 3.000000e-03 0.000000e+00 -1.361360e+03 -1.581330e+03 -1.643950e+03 -1.516640e+03 -1.220510e+03 -1.266430e+03 -1.279940e+03 ... -2.333830e+03 -1.212030e+03 -1.228030e+03 -1.229130e+03 -1.230480e+03 -6.962790e+02 -1.176370e+03 -1.858570e+04 4.820600e+02 0.000000e+00
25% 3.000000e+00 8.808100e+01 0.000000e+00 -9.200250e+00 -8.325150e+00 -8.767610e+00 -7.367240e+00 -6.102000e+00 -6.007260e+00 -6.904030e+00 ... -9.306430e+00 -7.495970e+00 -6.713860e+00 -6.774840e+00 -7.161160e+00 -7.817650e+00 -6.526950e+00 -2.550070e+03 6.631430e+02 5.241140e+02
50% 5.000000e+00 1.769297e+02 0.000000e+00 3.819020e-01 4.264100e-02 1.140390e-01 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 ... 5.667500e-02 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 5.920510e+03 7.434380e+02 7.701970e+02
75% 7.000000e+00 2.683398e+02 1.000000e+00 1.030610e+01 8.753340e+00 9.282560e+00 7.437780e+00 6.176630e+00 6.086460e+00 7.071460e+00 ... 9.775770e+00 7.765670e+00 6.831320e+00 6.905020e+00 7.466520e+00 8.025190e+00 6.615180e+00 1.029010e+04 8.134120e+02 1.197120e+03
max 1.300000e+01 3.603711e+02 1.000000e+00 1.972240e+03 2.048790e+03 2.145710e+03 1.731880e+03 9.009370e+02 1.176540e+03 1.514820e+03 ... 2.034170e+03 8.917290e+02 9.080890e+02 1.435800e+03 9.284070e+02 6.136690e+02 2.443550e+03 3.418830e+04 8.401840e+02 1.999860e+03

8 rows × 26 columns


In [9]:
data.isna().sum(axis=0)
# 欠損データはなし


Out[9]:
crew          0
experiment    0
time          0
seat          0
eeg_fp1       0
eeg_f7        0
eeg_f8        0
eeg_t4        0
eeg_t6        0
eeg_t5        0
eeg_t3        0
eeg_fp2       0
eeg_o1        0
eeg_p3        0
eeg_pz        0
eeg_f3        0
eeg_fz        0
eeg_f4        0
eeg_c4        0
eeg_p4        0
eeg_poz       0
eeg_c3        0
eeg_cz        0
eeg_o2        0
ecg           0
r             0
gsr           0
event         0
dtype: int64

In [14]:
data[["crew", "event"]].groupby(["crew"]).count()
# crew is unique id for pair of pilots


Out[14]:
event
crew
1 447652
2 552868
3 552795
4 552881
5 552815
6 552958
7 552769
8 549959
13 552724

In [16]:
data[["experiment", "event"]].groupby(["experiment"]).count()
# eventの状態
# CAは端的に言えば良い状態
# 
# SSは非常に驚いた状態


Out[16]:
event
experiment
CA 1658376
DA 1658393
SS 1550652

In [21]:
data.loc[ data["crew"] == 1, "time"]
# どうやらtimeは実験時間を示しているらしい


Out[21]:
0          0.011719
1          0.015625
2          0.019531
3          0.023438
4          0.027344
5          0.031250
6          0.035156
7          0.039062
8          0.042969
9          0.046875
10         0.050781
11         0.054688
12         0.058594
13         0.062500
14         0.066406
15         0.070312
16         0.074219
17         0.078125
18         0.082031
19         0.085938
20         0.089844
21         0.093750
22         0.097656
23         0.101562
24         0.105469
25         0.109375
26         0.113281
27         0.117188
28         0.121094
29         0.125000
            ...    
447622    99.941406
447623    99.941406
447624    99.945312
447625    99.945312
447626    99.949219
447627    99.949219
447628    99.953125
447629    99.953125
447630    99.957031
447631    99.957031
447632    99.960938
447633    99.960938
447634    99.964844
447635    99.964844
447636    99.968750
447637    99.968750
447638    99.972656
447639    99.972656
447640    99.976562
447641    99.976562
447642    99.980469
447643    99.980469
447644    99.984375
447645    99.984375
447646    99.988281
447647    99.988281
447648    99.992188
447649    99.992188
447650    99.996094
447651    99.996094
Name: time, Length: 447652, dtype: float64

In [ ]: