In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')
plt.rcParams['font.family']='SimHei' #顯示中文(for Mac)
plt.rcParams['axes.unicode_minus']=False #正常顯示負號
pd.set_option("display.max_columns",80) #設定pandas最多顯示出50個欄位資訊
In [2]:
X = pd.read_csv("./input/train.csv",header=None)
y = pd.read_csv("./input/trainLabels.csv",header=None)
test = pd.read_csv("./input/test.csv",header=None)
In [3]:
X.head(3)
Out[3]:
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
0
0.299403
-1.226624
1.498425
-1.176150
5.289853
0.208297
2.404498
1.594506
-0.051608
0.663234
-1.408370
1.114744
0.910415
2.218110
4.305643
0.088924
0.169149
0.413448
1.513862
2.662967
-1.072765
0.149111
0.559579
4.378885
-0.463603
-0.063959
0.544930
0.712772
-1.494050
-2.636169
-0.850465
-0.622990
-1.833057
0.293024
3.552681
0.717611
3.305972
-2.715559
-2.682409
0.101050
1
-1.174176
0.332157
0.949919
-1.285328
2.199061
-0.151268
-0.427039
2.619246
-0.765884
-0.093780
0.935347
1.057796
-0.539275
-0.172662
-0.679051
0.607362
1.148635
2.437077
-0.313069
0.528104
-0.513476
0.766221
-1.466939
-2.318885
1.647223
-1.556443
-1.645581
-0.198467
-1.472066
-1.906147
-0.819750
0.012037
2.038836
0.468579
-0.517657
0.422326
0.803699
1.213219
1.382932
-1.817761
2
1.192222
-0.414371
0.067054
-2.233568
3.658881
0.089007
0.203439
-4.219054
-1.184919
-1.240310
-0.890270
0.909969
-11.851312
3.352420
-4.862125
-0.903317
-1.824344
0.045446
-2.126474
1.161563
-1.027912
-1.078792
1.848525
3.758918
0.623649
-0.091044
-1.033094
-0.254151
-4.377542
-1.196298
-0.604501
0.750054
-3.360521
0.856988
-2.751451
-1.582735
1.672246
0.656438
-0.932473
2.987436
In [4]:
y.head(3)
Out[4]:
0
0
1
1
0
2
0
In [5]:
test.head(3)
Out[5]:
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
0
2.808909
-0.242894
-0.546421
0.255162
1.749736
-0.030458
-1.322071
3.578071
-0.667578
-0.884257
-0.247797
1.527068
3.359963
1.172500
1.823865
0.032365
-0.506474
0.116775
0.302796
-0.069677
-1.067652
0.268855
-2.114816
-2.829966
-0.379520
2.498289
-0.943095
0.826023
0.738353
-1.250244
-0.261688
-0.224375
-1.675606
-0.479584
-0.244388
-0.672355
0.517860
0.010665
-0.419214
2.818387
1
-0.374101
0.537669
0.081063
0.756773
0.915231
2.557282
3.703187
1.673835
-0.764122
-1.228040
-0.392141
0.396406
-6.404201
0.892782
-0.277077
-2.277002
0.020142
0.244746
-3.129533
0.646215
0.596033
-0.183858
-1.724840
-4.744641
0.735539
1.987149
0.308611
0.191700
-3.309266
-1.474186
-0.969463
0.574154
-2.200519
-1.612240
0.179031
-2.924596
0.643610
-1.470939
-0.067408
-0.976265
2
-0.088370
0.154743
0.380716
-1.176126
1.699867
-0.258627
-1.384999
1.093584
1.596633
0.230631
-1.152022
0.219019
-1.660207
1.830836
-2.055934
0.361885
0.406346
1.473729
1.697615
0.212823
-0.126665
-0.267433
-2.318000
-2.978056
0.122586
0.411849
-0.591939
-0.112360
-3.033888
-2.880890
-0.769885
-0.005143
1.467490
0.483803
-3.542981
0.814561
-1.652948
1.265866
-1.749248
1.773784
In [6]:
len(test)
Out[6]:
9000
In [7]:
corr = X.corr()
print(corr)
0 1 2 3 4 5 6 \
0 1.000000 -0.029022 0.033437 0.012054 -0.022404 0.037209 0.019134
1 -0.029022 1.000000 0.014202 0.009663 0.044758 -0.034173 -0.008474
2 0.033437 0.014202 1.000000 0.007579 0.019658 -0.013532 -0.054348
3 0.012054 0.009663 0.007579 1.000000 0.023827 0.021305 -0.028903
4 -0.022404 0.044758 0.019658 0.023827 1.000000 0.027705 -0.338883
5 0.037209 -0.034173 -0.013532 0.021305 0.027705 1.000000 -0.068367
6 0.019134 -0.008474 -0.054348 -0.028903 -0.338883 -0.068367 1.000000
7 0.015277 0.006568 -0.008825 -0.044407 0.206964 0.036041 -0.000915
8 -0.045568 0.039895 0.019443 0.001253 0.063638 0.031510 -0.031153
9 -0.023777 0.008368 -0.015920 -0.049892 0.079723 -0.023893 0.065493
10 -0.037427 0.022105 0.011994 -0.031417 -0.051509 -0.004594 -0.006394
11 0.012477 -0.034953 0.023328 0.052135 -0.019005 -0.058050 0.036516
12 -0.004410 -0.065364 0.026965 -0.048933 -0.184640 0.043318 -0.419642
13 0.044999 -0.039695 -0.003345 -0.031286 0.050186 -0.003975 0.001454
14 0.002984 -0.069600 -0.024474 -0.044111 0.209115 0.016991 -0.184330
15 0.006890 -0.001228 0.014957 -0.046394 0.011283 0.002038 -0.004506
16 0.023151 -0.033658 0.037882 0.038765 -0.010417 -0.004314 0.059714
17 0.039499 0.044709 0.054775 0.015674 -0.001717 -0.000589 -0.043495
18 -0.051614 0.018864 0.001878 -0.017277 0.080150 0.039427 -0.107176
19 0.004408 0.009298 -0.005757 0.025663 0.025250 -0.042694 -0.004120
20 -0.001347 0.021246 0.053421 -0.002158 0.004765 0.044357 0.024973
21 0.016485 0.020263 0.004037 -0.008345 0.009427 -0.041301 -0.030886
22 0.021047 0.044744 -0.010850 -0.012444 0.449976 -0.024654 0.116646
23 -0.020664 0.051050 0.051547 0.013371 0.568959 0.003663 -0.217741
24 -0.028256 0.020862 0.001834 -0.034712 -0.059516 -0.003748 0.070511
25 -0.014671 0.019272 -0.006478 -0.017270 0.024766 -0.022389 0.054907
26 0.047728 0.010341 -0.017693 0.040604 -0.016014 0.029581 -0.013347
27 -0.060396 -0.002228 -0.009868 0.021951 0.049695 -0.006412 -0.014378
28 0.026154 -0.067760 0.005040 -0.033828 -0.522373 -0.021571 0.048105
29 0.031174 -0.010763 -0.005418 0.032422 -0.465130 0.010774 0.069492
30 0.012446 -0.000782 0.034527 0.025678 -0.039205 0.011408 -0.050016
31 0.023033 -0.052864 -0.002177 -0.006878 0.007981 -0.050032 -0.020219
32 -0.004701 0.005583 -0.038702 -0.030960 -0.325592 0.016181 -0.105353
33 0.005180 0.009125 0.046611 -0.018130 0.031156 0.017013 -0.025403
34 0.012961 -0.026617 0.008141 -0.055653 -0.212497 0.019377 0.171086
35 0.047455 0.053087 0.014150 0.024811 0.015455 -0.034101 -0.003786
36 -0.026327 0.039265 0.020865 0.048042 0.016657 -0.009464 0.090158
37 0.038844 0.019537 0.030127 0.016295 0.021133 -0.046527 0.075050
38 0.016098 -0.046295 -0.004390 0.041164 -0.084677 0.032984 -0.141340
39 -0.045840 -0.005667 0.020101 0.021217 -0.227957 0.013334 -0.233928
7 8 9 10 11 12 13 \
0 0.015277 -0.045568 -0.023777 -0.037427 0.012477 -0.004410 0.044999
1 0.006568 0.039895 0.008368 0.022105 -0.034953 -0.065364 -0.039695
2 -0.008825 0.019443 -0.015920 0.011994 0.023328 0.026965 -0.003345
3 -0.044407 0.001253 -0.049892 -0.031417 0.052135 -0.048933 -0.031286
4 0.206964 0.063638 0.079723 -0.051509 -0.019005 -0.184640 0.050186
5 0.036041 0.031510 -0.023893 -0.004594 -0.058050 0.043318 -0.003975
6 -0.000915 -0.031153 0.065493 -0.006394 0.036516 -0.419642 0.001454
7 1.000000 0.026522 0.040061 -0.021951 -0.003977 0.056134 0.012564
8 0.026522 1.000000 0.016758 -0.022337 -0.033289 0.051579 0.028123
9 0.040061 0.016758 1.000000 0.046935 0.019443 -0.076660 0.014033
10 -0.021951 -0.022337 0.046935 1.000000 -0.014517 0.030087 0.011825
11 -0.003977 -0.033289 0.019443 -0.014517 1.000000 -0.055474 -0.009768
12 0.056134 0.051579 -0.076660 0.030087 -0.055474 1.000000 -0.014580
13 0.012564 0.028123 0.014033 0.011825 -0.009768 -0.014580 1.000000
14 -0.021095 0.017060 0.004008 -0.030630 -0.117767 0.362282 -0.004561
15 -0.008298 0.016582 0.020943 0.053711 -0.015528 -0.017225 -0.011772
16 -0.011443 0.036039 0.010802 -0.021356 -0.034338 -0.024656 0.004645
17 -0.056082 -0.004586 -0.087833 0.027280 -0.037096 0.018956 0.018460
18 0.130023 0.008089 -0.010817 0.039504 -0.001982 0.412329 -0.020628
19 -0.044051 0.000207 0.010940 0.002963 0.032569 0.012603 0.010127
20 0.056280 0.059946 0.030396 -0.021865 0.063568 -0.041725 0.053751
21 -0.002344 -0.062792 -0.000048 0.038276 -0.018037 0.028953 -0.012551
22 0.069903 0.027927 0.059458 -0.008752 -0.045922 -0.384888 0.020854
23 -0.113899 0.090151 0.015708 -0.010862 -0.019819 0.232464 0.038501
24 -0.058485 0.001375 -0.013757 0.028362 0.030544 -0.036451 0.035203
25 0.007029 0.004733 0.020467 -0.021192 -0.013193 -0.005000 -0.008517
26 0.013849 -0.020321 0.057604 0.008756 0.027331 0.026252 -0.012335
27 -0.036551 -0.013502 0.002832 0.010650 -0.001312 -0.007732 0.002863
28 0.045134 0.032840 -0.045896 0.023255 -0.036131 0.616709 -0.022731
29 -0.025301 -0.031475 -0.034611 0.008099 -0.015197 0.050795 0.002666
30 -0.037795 0.016460 -0.039496 -0.005073 -0.018678 0.025155 -0.006837
31 0.000762 0.011961 0.055498 0.018423 -0.000034 0.034641 0.003951
32 0.049853 -0.076989 -0.037393 0.038048 0.020030 -0.095718 -0.047729
33 -0.040604 -0.012986 0.010608 0.019366 0.020090 -0.052491 0.002338
34 0.115860 0.016908 -0.058954 0.011873 -0.036264 0.453173 -0.002142
35 -0.014349 0.005058 0.001854 0.021806 -0.001962 -0.004232 0.048044
36 0.124554 -0.007465 -0.004522 -0.045234 0.038142 -0.182658 0.000486
37 0.017283 0.015940 -0.034966 -0.011955 -0.019648 -0.046350 0.001191
38 -0.049654 -0.019059 -0.020446 -0.013516 0.040810 0.090493 0.017037
39 -0.150962 -0.041355 -0.070172 0.026616 -0.020987 0.110447 -0.057128
14 15 16 17 18 19 20 \
0 0.002984 0.006890 0.023151 0.039499 -0.051614 0.004408 -0.001347
1 -0.069600 -0.001228 -0.033658 0.044709 0.018864 0.009298 0.021246
2 -0.024474 0.014957 0.037882 0.054775 0.001878 -0.005757 0.053421
3 -0.044111 -0.046394 0.038765 0.015674 -0.017277 0.025663 -0.002158
4 0.209115 0.011283 -0.010417 -0.001717 0.080150 0.025250 0.004765
5 0.016991 0.002038 -0.004314 -0.000589 0.039427 -0.042694 0.044357
6 -0.184330 -0.004506 0.059714 -0.043495 -0.107176 -0.004120 0.024973
7 -0.021095 -0.008298 -0.011443 -0.056082 0.130023 -0.044051 0.056280
8 0.017060 0.016582 0.036039 -0.004586 0.008089 0.000207 0.059946
9 0.004008 0.020943 0.010802 -0.087833 -0.010817 0.010940 0.030396
10 -0.030630 0.053711 -0.021356 0.027280 0.039504 0.002963 -0.021865
11 -0.117767 -0.015528 -0.034338 -0.037096 -0.001982 0.032569 0.063568
12 0.362282 -0.017225 -0.024656 0.018956 0.412329 0.012603 -0.041725
13 -0.004561 -0.011772 0.004645 0.018460 -0.020628 0.010127 0.053751
14 1.000000 -0.012944 -0.027140 -0.012015 0.111663 0.060125 -0.058008
15 -0.012944 1.000000 -0.010347 0.010681 -0.015560 0.019584 0.068294
16 -0.027140 -0.010347 1.000000 0.012879 -0.019432 -0.001441 0.021596
17 -0.012015 0.010681 0.012879 1.000000 -0.027898 0.036028 -0.057326
18 0.111663 -0.015560 -0.019432 -0.027898 1.000000 -0.018719 0.008588
19 0.060125 0.019584 -0.001441 0.036028 -0.018719 1.000000 0.011194
20 -0.058008 0.068294 0.021596 -0.057326 0.008588 0.011194 1.000000
21 0.052579 -0.000982 -0.022916 -0.004039 -0.013545 -0.031750 -0.056689
22 0.166519 0.040514 -0.001622 -0.050048 0.009677 0.011854 0.007039
23 0.055610 0.018240 -0.008523 0.011325 0.321297 0.011392 0.013588
24 -0.030331 -0.012557 0.066975 -0.050565 -0.029749 0.004969 0.005406
25 -0.011636 0.003095 -0.029033 -0.011279 0.004478 -0.021433 -0.034250
26 0.010856 0.043180 -0.004819 -0.001844 0.045385 -0.006371 -0.014428
27 0.039229 0.018744 0.026556 -0.035613 -0.018485 -0.018536 0.024719
28 0.168620 -0.006029 0.003740 -0.018427 0.232997 0.020285 -0.002046
29 0.055592 -0.025886 0.016282 -0.008814 0.166979 -0.032372 0.012035
30 -0.023788 -0.043673 -0.019440 0.037146 -0.029637 0.042964 -0.030868
31 0.007467 0.012588 -0.011451 -0.016741 0.016622 -0.067189 -0.056389
32 -0.031780 0.043055 -0.073987 -0.028353 -0.003343 -0.021787 0.020425
33 -0.000320 -0.016423 0.065442 0.003116 0.006133 0.038550 -0.018514
34 0.148442 -0.004044 0.025378 0.013375 -0.033309 -0.003289 -0.026723
35 -0.009477 -0.021116 -0.063987 -0.014939 0.028044 -0.027955 0.010036
36 -0.114092 0.001448 -0.006681 0.008701 -0.042455 0.001016 0.078414
37 0.029476 0.001919 -0.012295 0.028183 0.006534 -0.032087 -0.026363
38 -0.164538 -0.006399 0.041778 0.040681 -0.282431 0.027369 -0.028073
39 0.092740 -0.042175 0.013661 0.026468 0.300155 0.009386 -0.036576
21 22 23 24 25 26 27 \
0 0.016485 0.021047 -0.020664 -0.028256 -0.014671 0.047728 -0.060396
1 0.020263 0.044744 0.051050 0.020862 0.019272 0.010341 -0.002228
2 0.004037 -0.010850 0.051547 0.001834 -0.006478 -0.017693 -0.009868
3 -0.008345 -0.012444 0.013371 -0.034712 -0.017270 0.040604 0.021951
4 0.009427 0.449976 0.568959 -0.059516 0.024766 -0.016014 0.049695
5 -0.041301 -0.024654 0.003663 -0.003748 -0.022389 0.029581 -0.006412
6 -0.030886 0.116646 -0.217741 0.070511 0.054907 -0.013347 -0.014378
7 -0.002344 0.069903 -0.113899 -0.058485 0.007029 0.013849 -0.036551
8 -0.062792 0.027927 0.090151 0.001375 0.004733 -0.020321 -0.013502
9 -0.000048 0.059458 0.015708 -0.013757 0.020467 0.057604 0.002832
10 0.038276 -0.008752 -0.010862 0.028362 -0.021192 0.008756 0.010650
11 -0.018037 -0.045922 -0.019819 0.030544 -0.013193 0.027331 -0.001312
12 0.028953 -0.384888 0.232464 -0.036451 -0.005000 0.026252 -0.007732
13 -0.012551 0.020854 0.038501 0.035203 -0.008517 -0.012335 0.002863
14 0.052579 0.166519 0.055610 -0.030331 -0.011636 0.010856 0.039229
15 -0.000982 0.040514 0.018240 -0.012557 0.003095 0.043180 0.018744
16 -0.022916 -0.001622 -0.008523 0.066975 -0.029033 -0.004819 0.026556
17 -0.004039 -0.050048 0.011325 -0.050565 -0.011279 -0.001844 -0.035613
18 -0.013545 0.009677 0.321297 -0.029749 0.004478 0.045385 -0.018485
19 -0.031750 0.011854 0.011392 0.004969 -0.021433 -0.006371 -0.018536
20 -0.056689 0.007039 0.013588 0.005406 -0.034250 -0.014428 0.024719
21 1.000000 0.015132 -0.001050 -0.042485 0.029820 -0.024140 -0.017697
22 0.015132 1.000000 0.358961 -0.059243 -0.009635 0.007524 0.024709
23 -0.001050 0.358961 1.000000 -0.055337 0.025651 -0.015144 0.014318
24 -0.042485 -0.059243 -0.055337 1.000000 0.018229 -0.049296 0.021148
25 0.029820 -0.009635 0.025651 0.018229 1.000000 0.020201 -0.004100
26 -0.024140 0.007524 -0.015144 -0.049296 0.020201 1.000000 0.017038
27 -0.017697 0.024709 0.014318 0.021148 -0.004100 0.017038 1.000000
28 0.034840 -0.098051 -0.055651 -0.030025 0.003924 0.046134 -0.047810
29 -0.002580 -0.065422 -0.160173 0.021535 -0.039607 0.018766 -0.049897
30 -0.006735 -0.002532 -0.022266 -0.024620 -0.011454 -0.049255 -0.031830
31 -0.027499 -0.008018 0.032428 0.011665 -0.027106 -0.000785 -0.031253
32 0.010885 -0.075722 -0.453138 -0.014389 -0.040435 0.013379 -0.055318
33 0.044612 0.045062 0.008407 -0.002748 -0.019240 -0.011437 -0.051371
34 -0.017645 -0.006324 0.257133 -0.012643 0.029309 -0.010916 0.030509
35 0.012711 -0.014390 0.012386 0.008924 -0.015760 -0.036908 0.009340
36 -0.021266 -0.135267 0.204730 -0.024168 0.039201 -0.013289 -0.040914
37 0.037789 0.045044 0.003117 -0.023394 -0.027665 0.021426 -0.003187
38 0.001307 -0.279579 -0.306711 0.007385 0.039480 0.010005 0.018008
39 -0.012076 -0.109855 -0.310490 -0.000780 -0.031934 0.045605 0.052344
28 29 30 31 32 33 34 \
0 0.026154 0.031174 0.012446 0.023033 -0.004701 0.005180 0.012961
1 -0.067760 -0.010763 -0.000782 -0.052864 0.005583 0.009125 -0.026617
2 0.005040 -0.005418 0.034527 -0.002177 -0.038702 0.046611 0.008141
3 -0.033828 0.032422 0.025678 -0.006878 -0.030960 -0.018130 -0.055653
4 -0.522373 -0.465130 -0.039205 0.007981 -0.325592 0.031156 -0.212497
5 -0.021571 0.010774 0.011408 -0.050032 0.016181 0.017013 0.019377
6 0.048105 0.069492 -0.050016 -0.020219 -0.105353 -0.025403 0.171086
7 0.045134 -0.025301 -0.037795 0.000762 0.049853 -0.040604 0.115860
8 0.032840 -0.031475 0.016460 0.011961 -0.076989 -0.012986 0.016908
9 -0.045896 -0.034611 -0.039496 0.055498 -0.037393 0.010608 -0.058954
10 0.023255 0.008099 -0.005073 0.018423 0.038048 0.019366 0.011873
11 -0.036131 -0.015197 -0.018678 -0.000034 0.020030 0.020090 -0.036264
12 0.616709 0.050795 0.025155 0.034641 -0.095718 -0.052491 0.453173
13 -0.022731 0.002666 -0.006837 0.003951 -0.047729 0.002338 -0.002142
14 0.168620 0.055592 -0.023788 0.007467 -0.031780 -0.000320 0.148442
15 -0.006029 -0.025886 -0.043673 0.012588 0.043055 -0.016423 -0.004044
16 0.003740 0.016282 -0.019440 -0.011451 -0.073987 0.065442 0.025378
17 -0.018427 -0.008814 0.037146 -0.016741 -0.028353 0.003116 0.013375
18 0.232997 0.166979 -0.029637 0.016622 -0.003343 0.006133 -0.033309
19 0.020285 -0.032372 0.042964 -0.067189 -0.021787 0.038550 -0.003289
20 -0.002046 0.012035 -0.030868 -0.056389 0.020425 -0.018514 -0.026723
21 0.034840 -0.002580 -0.006735 -0.027499 0.010885 0.044612 -0.017645
22 -0.098051 -0.065422 -0.002532 -0.008018 -0.075722 0.045062 -0.006324
23 -0.055651 -0.160173 -0.022266 0.032428 -0.453138 0.008407 0.257133
24 -0.030025 0.021535 -0.024620 0.011665 -0.014389 -0.002748 -0.012643
25 0.003924 -0.039607 -0.011454 -0.027106 -0.040435 -0.019240 0.029309
26 0.046134 0.018766 -0.049255 -0.000785 0.013379 -0.011437 -0.010916
27 -0.047810 -0.049897 -0.031830 -0.031253 -0.055318 -0.051371 0.030509
28 1.000000 0.322438 0.030054 0.018346 -0.086240 -0.059175 0.293913
29 0.322438 1.000000 0.027909 0.019592 -0.110933 0.008248 0.137749
30 0.030054 0.027909 1.000000 -0.024786 0.005798 -0.026837 -0.016300
31 0.018346 0.019592 -0.024786 1.000000 -0.027278 -0.003418 0.006635
32 -0.086240 -0.110933 0.005798 -0.027278 1.000000 0.015406 -0.259151
33 -0.059175 0.008248 -0.026837 -0.003418 0.015406 1.000000 -0.049568
34 0.293913 0.137749 -0.016300 0.006635 -0.259151 -0.049568 1.000000
35 -0.025472 -0.023510 -0.038520 -0.031574 -0.009889 -0.047772 -0.035899
36 0.018826 0.241248 -0.073321 -0.005343 -0.050540 -0.019554 0.107606
37 -0.013701 0.010129 -0.061697 0.003566 -0.038393 0.009121 -0.022208
38 0.180924 0.049550 0.028444 -0.004915 -0.197690 -0.030572 0.145990
39 0.283127 0.287253 0.018338 -0.016312 -0.110049 0.024840 -0.147988
35 36 37 38 39
0 0.047455 -0.026327 0.038844 0.016098 -0.045840
1 0.053087 0.039265 0.019537 -0.046295 -0.005667
2 0.014150 0.020865 0.030127 -0.004390 0.020101
3 0.024811 0.048042 0.016295 0.041164 0.021217
4 0.015455 0.016657 0.021133 -0.084677 -0.227957
5 -0.034101 -0.009464 -0.046527 0.032984 0.013334
6 -0.003786 0.090158 0.075050 -0.141340 -0.233928
7 -0.014349 0.124554 0.017283 -0.049654 -0.150962
8 0.005058 -0.007465 0.015940 -0.019059 -0.041355
9 0.001854 -0.004522 -0.034966 -0.020446 -0.070172
10 0.021806 -0.045234 -0.011955 -0.013516 0.026616
11 -0.001962 0.038142 -0.019648 0.040810 -0.020987
12 -0.004232 -0.182658 -0.046350 0.090493 0.110447
13 0.048044 0.000486 0.001191 0.017037 -0.057128
14 -0.009477 -0.114092 0.029476 -0.164538 0.092740
15 -0.021116 0.001448 0.001919 -0.006399 -0.042175
16 -0.063987 -0.006681 -0.012295 0.041778 0.013661
17 -0.014939 0.008701 0.028183 0.040681 0.026468
18 0.028044 -0.042455 0.006534 -0.282431 0.300155
19 -0.027955 0.001016 -0.032087 0.027369 0.009386
20 0.010036 0.078414 -0.026363 -0.028073 -0.036576
21 0.012711 -0.021266 0.037789 0.001307 -0.012076
22 -0.014390 -0.135267 0.045044 -0.279579 -0.109855
23 0.012386 0.204730 0.003117 -0.306711 -0.310490
24 0.008924 -0.024168 -0.023394 0.007385 -0.000780
25 -0.015760 0.039201 -0.027665 0.039480 -0.031934
26 -0.036908 -0.013289 0.021426 0.010005 0.045605
27 0.009340 -0.040914 -0.003187 0.018008 0.052344
28 -0.025472 0.018826 -0.013701 0.180924 0.283127
29 -0.023510 0.241248 0.010129 0.049550 0.287253
30 -0.038520 -0.073321 -0.061697 0.028444 0.018338
31 -0.031574 -0.005343 0.003566 -0.004915 -0.016312
32 -0.009889 -0.050540 -0.038393 -0.197690 -0.110049
33 -0.047772 -0.019554 0.009121 -0.030572 0.024840
34 -0.035899 0.107606 -0.022208 0.145990 -0.147988
35 1.000000 -0.016145 -0.007901 -0.038879 0.014993
36 -0.016145 1.000000 -0.009176 0.000086 -0.046425
37 -0.007901 -0.009176 1.000000 -0.059341 -0.021385
38 -0.038879 0.000086 -0.059341 1.000000 0.223299
39 0.014993 -0.046425 -0.021385 0.223299 1.000000
In [8]:
import seaborn as sns
plt.rcParams['font.family']='DFKai-SB' #顯示中文
plt.figure(figsize=(40,40))
sns.heatmap(corr, square=True, annot=True, cmap="RdBu_r") #center=0, cmap="YlGnBu"
#sns.plt.show()
Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0xc154198>
In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3) # 30% for testing, 70% for training
X_train.head(2)
Out[9]:
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
836
0.102779
-0.199372
0.463495
-0.847312
-1.445708
-0.326604
-0.293438
-2.202522
1.078323
-0.865261
0.502683
-0.009290
6.049494
0.456662
3.487071
-1.272826
-0.430860
-0.780380
-0.420346
-1.741490
1.207195
-0.820815
-3.329656
2.868839
-0.498860
1.246022
-0.016919
0.719957
2.912453
3.552154
-0.488331
0.367702
-5.031359
-2.016386
1.605913
0.012293
3.582551
-0.388994
1.078361
1.384469
724
0.039585
0.723498
0.912530
1.090686
6.182949
-0.035000
-2.967893
-2.513158
-0.666391
-2.285819
0.657216
0.145245
-3.275642
-0.226957
0.417443
-0.651984
-0.346036
1.029841
-2.062241
-0.356049
1.054455
-0.281530
1.020549
3.602082
-0.631751
0.664629
-0.457342
-0.448230
-4.007717
-1.569231
0.117934
-0.229665
-1.945187
0.227123
-0.676655
-0.051030
-0.277648
1.707033
-0.693985
1.528447
In [10]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
knn = KNeighborsClassifier(n_neighbors=3, weights='uniform')
knn.fit(X_train, y_train)
print(metrics.classification_report(y_test, knn.predict(X_test)))
print(metrics.confusion_matrix(y_test, knn.predict(X_test)))
precision recall f1-score support
0 0.94 0.86 0.90 149
1 0.87 0.95 0.91 151
avg / total 0.91 0.90 0.90 300
[[128 21]
[ 8 143]]
C:\Anaconda3\lib\site-packages\ipykernel_launcher.py:5: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
"""
In [11]:
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(n_estimators=500, criterion='gini', max_features='auto', oob_score=True)
rfc.fit(X_train, y_train) #不標準化
print("oob_score(accuary):",rfc.oob_score_)
print(metrics.classification_report(y_test, rfc.predict(X_test)))
C:\Anaconda3\lib\site-packages\ipykernel_launcher.py:4: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
after removing the cwd from sys.path.
oob_score(accuary): 0.86
precision recall f1-score support
0 0.86 0.91 0.89 149
1 0.91 0.85 0.88 151
avg / total 0.88 0.88 0.88 300
In [12]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb.fit(X_train, y_train)
print(metrics.classification_report(y_test, gnb.predict(X_test)))
print(metrics.confusion_matrix(y_test, gnb.predict(X_test)))
precision recall f1-score support
0 0.81 0.87 0.84 149
1 0.86 0.79 0.83 151
avg / total 0.84 0.83 0.83 300
[[130 19]
[ 31 120]]
C:\Anaconda3\lib\site-packages\sklearn\utils\validation.py:547: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
y = column_or_1d(y, warn=True)
In [13]:
from sklearn.svm import SVC
svc = SVC(C=1.0, kernel="rbf", probability=True)
svc.fit(X_train, y_train)
print(metrics.classification_report(y_test, svc.predict(X_test)))
print(metrics.confusion_matrix(y_test, svc.predict(X_test)))
C:\Anaconda3\lib\site-packages\sklearn\utils\validation.py:547: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
y = column_or_1d(y, warn=True)
precision recall f1-score support
0 0.90 0.94 0.92 149
1 0.94 0.90 0.92 151
avg / total 0.92 0.92 0.92 300
[[140 9]
[ 15 136]]
In [14]:
#from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from mlxtend.classifier import StackingClassifier
import xgboost as xgb
clf1 = KNeighborsClassifier(n_neighbors=3, weights='uniform')
clf2 = RandomForestClassifier(n_estimators=500, criterion='gini', max_features='auto', oob_score=True)
#clf3 = GaussianNB()
clf4 = SVC(C=1.0, kernel="rbf", probability=True)
meta_clf = xgb.XGBClassifier(n_estimators= 2000, max_depth= 4)
stacking_clf = StackingClassifier(classifiers=[clf1, clf2, clf4], meta_classifier=meta_clf)
clf1.fit(X_train, y_train)
clf2.fit(X_train, y_train)
#clf3.fit(X_train, y_train)
clf4.fit(X_train, y_train)
stacking_clf.fit(X_train, y_train)
print('KNN Score:',clf1.score(X_test, y_test))
print('RF Score:',clf2.score(X_test, y_test))
#print('GNB Score:',clf3.score(X_test, y_test))
print('SVC Score:',clf4.score(X_test, y_test))
print('Stacking Score:',stacking_clf.score(X_test, y_test))
C:\Anaconda3\lib\site-packages\sklearn\cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
"This module will be removed in 0.20.", DeprecationWarning)
C:\Anaconda3\lib\site-packages\ipykernel_launcher.py:16: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
app.launch_new_instance()
C:\Anaconda3\lib\site-packages\ipykernel_launcher.py:17: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
C:\Anaconda3\lib\site-packages\sklearn\utils\validation.py:547: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
y = column_or_1d(y, warn=True)
C:\Anaconda3\lib\site-packages\mlxtend\classifier\stacking_classification.py:115: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
clf.fit(X, y)
C:\Anaconda3\lib\site-packages\mlxtend\classifier\stacking_classification.py:115: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
clf.fit(X, y)
C:\Anaconda3\lib\site-packages\sklearn\utils\validation.py:547: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
y = column_or_1d(y, warn=True)
C:\Anaconda3\lib\site-packages\sklearn\preprocessing\label.py:95: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
y = column_or_1d(y, warn=True)
C:\Anaconda3\lib\site-packages\sklearn\preprocessing\label.py:128: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
y = column_or_1d(y, warn=True)
KNN Score: 0.903333333333
RF Score: 0.876666666667
SVC Score: 0.92
Stacking Score: 0.88
In [15]:
row_id = [i for i in range(1,9001)]
In [16]:
pred = stacking_clf.predict(test)
# Generate Submission File
StackingSubmission = pd.DataFrame({ 'Id': row_id, 'Solution': pred })[["Id","Solution"]]
StackingSubmission.to_csv("submission_xgb_3m_stk.csv", index=False)
In [17]:
##xgb 4m score: 0.86810
##xgb 3m score: 0.87071
Content source: stuser/temp
Similar notebooks: