In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline
plt.style.use('ggplot')
plt.rcParams['font.family']='SimHei' #顯示中文(for Mac)
plt.rcParams['axes.unicode_minus']=False #正常顯示負號

pd.set_option("display.max_columns",80) #設定pandas最多顯示出50個欄位資訊

In [2]:
X = pd.read_csv("./input/train.csv",header=None)
y = pd.read_csv("./input/trainLabels.csv",header=None)
test = pd.read_csv("./input/test.csv",header=None)

In [3]:
X.head(3)


Out[3]:
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
0 0.299403 -1.226624 1.498425 -1.176150 5.289853 0.208297 2.404498 1.594506 -0.051608 0.663234 -1.408370 1.114744 0.910415 2.218110 4.305643 0.088924 0.169149 0.413448 1.513862 2.662967 -1.072765 0.149111 0.559579 4.378885 -0.463603 -0.063959 0.544930 0.712772 -1.494050 -2.636169 -0.850465 -0.622990 -1.833057 0.293024 3.552681 0.717611 3.305972 -2.715559 -2.682409 0.101050
1 -1.174176 0.332157 0.949919 -1.285328 2.199061 -0.151268 -0.427039 2.619246 -0.765884 -0.093780 0.935347 1.057796 -0.539275 -0.172662 -0.679051 0.607362 1.148635 2.437077 -0.313069 0.528104 -0.513476 0.766221 -1.466939 -2.318885 1.647223 -1.556443 -1.645581 -0.198467 -1.472066 -1.906147 -0.819750 0.012037 2.038836 0.468579 -0.517657 0.422326 0.803699 1.213219 1.382932 -1.817761
2 1.192222 -0.414371 0.067054 -2.233568 3.658881 0.089007 0.203439 -4.219054 -1.184919 -1.240310 -0.890270 0.909969 -11.851312 3.352420 -4.862125 -0.903317 -1.824344 0.045446 -2.126474 1.161563 -1.027912 -1.078792 1.848525 3.758918 0.623649 -0.091044 -1.033094 -0.254151 -4.377542 -1.196298 -0.604501 0.750054 -3.360521 0.856988 -2.751451 -1.582735 1.672246 0.656438 -0.932473 2.987436

In [4]:
y.head(3)


Out[4]:
0
0 1
1 0
2 0

In [5]:
test.head(3)


Out[5]:
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
0 2.808909 -0.242894 -0.546421 0.255162 1.749736 -0.030458 -1.322071 3.578071 -0.667578 -0.884257 -0.247797 1.527068 3.359963 1.172500 1.823865 0.032365 -0.506474 0.116775 0.302796 -0.069677 -1.067652 0.268855 -2.114816 -2.829966 -0.379520 2.498289 -0.943095 0.826023 0.738353 -1.250244 -0.261688 -0.224375 -1.675606 -0.479584 -0.244388 -0.672355 0.517860 0.010665 -0.419214 2.818387
1 -0.374101 0.537669 0.081063 0.756773 0.915231 2.557282 3.703187 1.673835 -0.764122 -1.228040 -0.392141 0.396406 -6.404201 0.892782 -0.277077 -2.277002 0.020142 0.244746 -3.129533 0.646215 0.596033 -0.183858 -1.724840 -4.744641 0.735539 1.987149 0.308611 0.191700 -3.309266 -1.474186 -0.969463 0.574154 -2.200519 -1.612240 0.179031 -2.924596 0.643610 -1.470939 -0.067408 -0.976265
2 -0.088370 0.154743 0.380716 -1.176126 1.699867 -0.258627 -1.384999 1.093584 1.596633 0.230631 -1.152022 0.219019 -1.660207 1.830836 -2.055934 0.361885 0.406346 1.473729 1.697615 0.212823 -0.126665 -0.267433 -2.318000 -2.978056 0.122586 0.411849 -0.591939 -0.112360 -3.033888 -2.880890 -0.769885 -0.005143 1.467490 0.483803 -3.542981 0.814561 -1.652948 1.265866 -1.749248 1.773784

In [6]:
len(test)


Out[6]:
9000

In [7]:
corr = X.corr()
print(corr)


          0         1         2         3         4         5         6   \
0   1.000000 -0.029022  0.033437  0.012054 -0.022404  0.037209  0.019134   
1  -0.029022  1.000000  0.014202  0.009663  0.044758 -0.034173 -0.008474   
2   0.033437  0.014202  1.000000  0.007579  0.019658 -0.013532 -0.054348   
3   0.012054  0.009663  0.007579  1.000000  0.023827  0.021305 -0.028903   
4  -0.022404  0.044758  0.019658  0.023827  1.000000  0.027705 -0.338883   
5   0.037209 -0.034173 -0.013532  0.021305  0.027705  1.000000 -0.068367   
6   0.019134 -0.008474 -0.054348 -0.028903 -0.338883 -0.068367  1.000000   
7   0.015277  0.006568 -0.008825 -0.044407  0.206964  0.036041 -0.000915   
8  -0.045568  0.039895  0.019443  0.001253  0.063638  0.031510 -0.031153   
9  -0.023777  0.008368 -0.015920 -0.049892  0.079723 -0.023893  0.065493   
10 -0.037427  0.022105  0.011994 -0.031417 -0.051509 -0.004594 -0.006394   
11  0.012477 -0.034953  0.023328  0.052135 -0.019005 -0.058050  0.036516   
12 -0.004410 -0.065364  0.026965 -0.048933 -0.184640  0.043318 -0.419642   
13  0.044999 -0.039695 -0.003345 -0.031286  0.050186 -0.003975  0.001454   
14  0.002984 -0.069600 -0.024474 -0.044111  0.209115  0.016991 -0.184330   
15  0.006890 -0.001228  0.014957 -0.046394  0.011283  0.002038 -0.004506   
16  0.023151 -0.033658  0.037882  0.038765 -0.010417 -0.004314  0.059714   
17  0.039499  0.044709  0.054775  0.015674 -0.001717 -0.000589 -0.043495   
18 -0.051614  0.018864  0.001878 -0.017277  0.080150  0.039427 -0.107176   
19  0.004408  0.009298 -0.005757  0.025663  0.025250 -0.042694 -0.004120   
20 -0.001347  0.021246  0.053421 -0.002158  0.004765  0.044357  0.024973   
21  0.016485  0.020263  0.004037 -0.008345  0.009427 -0.041301 -0.030886   
22  0.021047  0.044744 -0.010850 -0.012444  0.449976 -0.024654  0.116646   
23 -0.020664  0.051050  0.051547  0.013371  0.568959  0.003663 -0.217741   
24 -0.028256  0.020862  0.001834 -0.034712 -0.059516 -0.003748  0.070511   
25 -0.014671  0.019272 -0.006478 -0.017270  0.024766 -0.022389  0.054907   
26  0.047728  0.010341 -0.017693  0.040604 -0.016014  0.029581 -0.013347   
27 -0.060396 -0.002228 -0.009868  0.021951  0.049695 -0.006412 -0.014378   
28  0.026154 -0.067760  0.005040 -0.033828 -0.522373 -0.021571  0.048105   
29  0.031174 -0.010763 -0.005418  0.032422 -0.465130  0.010774  0.069492   
30  0.012446 -0.000782  0.034527  0.025678 -0.039205  0.011408 -0.050016   
31  0.023033 -0.052864 -0.002177 -0.006878  0.007981 -0.050032 -0.020219   
32 -0.004701  0.005583 -0.038702 -0.030960 -0.325592  0.016181 -0.105353   
33  0.005180  0.009125  0.046611 -0.018130  0.031156  0.017013 -0.025403   
34  0.012961 -0.026617  0.008141 -0.055653 -0.212497  0.019377  0.171086   
35  0.047455  0.053087  0.014150  0.024811  0.015455 -0.034101 -0.003786   
36 -0.026327  0.039265  0.020865  0.048042  0.016657 -0.009464  0.090158   
37  0.038844  0.019537  0.030127  0.016295  0.021133 -0.046527  0.075050   
38  0.016098 -0.046295 -0.004390  0.041164 -0.084677  0.032984 -0.141340   
39 -0.045840 -0.005667  0.020101  0.021217 -0.227957  0.013334 -0.233928   

          7         8         9         10        11        12        13  \
0   0.015277 -0.045568 -0.023777 -0.037427  0.012477 -0.004410  0.044999   
1   0.006568  0.039895  0.008368  0.022105 -0.034953 -0.065364 -0.039695   
2  -0.008825  0.019443 -0.015920  0.011994  0.023328  0.026965 -0.003345   
3  -0.044407  0.001253 -0.049892 -0.031417  0.052135 -0.048933 -0.031286   
4   0.206964  0.063638  0.079723 -0.051509 -0.019005 -0.184640  0.050186   
5   0.036041  0.031510 -0.023893 -0.004594 -0.058050  0.043318 -0.003975   
6  -0.000915 -0.031153  0.065493 -0.006394  0.036516 -0.419642  0.001454   
7   1.000000  0.026522  0.040061 -0.021951 -0.003977  0.056134  0.012564   
8   0.026522  1.000000  0.016758 -0.022337 -0.033289  0.051579  0.028123   
9   0.040061  0.016758  1.000000  0.046935  0.019443 -0.076660  0.014033   
10 -0.021951 -0.022337  0.046935  1.000000 -0.014517  0.030087  0.011825   
11 -0.003977 -0.033289  0.019443 -0.014517  1.000000 -0.055474 -0.009768   
12  0.056134  0.051579 -0.076660  0.030087 -0.055474  1.000000 -0.014580   
13  0.012564  0.028123  0.014033  0.011825 -0.009768 -0.014580  1.000000   
14 -0.021095  0.017060  0.004008 -0.030630 -0.117767  0.362282 -0.004561   
15 -0.008298  0.016582  0.020943  0.053711 -0.015528 -0.017225 -0.011772   
16 -0.011443  0.036039  0.010802 -0.021356 -0.034338 -0.024656  0.004645   
17 -0.056082 -0.004586 -0.087833  0.027280 -0.037096  0.018956  0.018460   
18  0.130023  0.008089 -0.010817  0.039504 -0.001982  0.412329 -0.020628   
19 -0.044051  0.000207  0.010940  0.002963  0.032569  0.012603  0.010127   
20  0.056280  0.059946  0.030396 -0.021865  0.063568 -0.041725  0.053751   
21 -0.002344 -0.062792 -0.000048  0.038276 -0.018037  0.028953 -0.012551   
22  0.069903  0.027927  0.059458 -0.008752 -0.045922 -0.384888  0.020854   
23 -0.113899  0.090151  0.015708 -0.010862 -0.019819  0.232464  0.038501   
24 -0.058485  0.001375 -0.013757  0.028362  0.030544 -0.036451  0.035203   
25  0.007029  0.004733  0.020467 -0.021192 -0.013193 -0.005000 -0.008517   
26  0.013849 -0.020321  0.057604  0.008756  0.027331  0.026252 -0.012335   
27 -0.036551 -0.013502  0.002832  0.010650 -0.001312 -0.007732  0.002863   
28  0.045134  0.032840 -0.045896  0.023255 -0.036131  0.616709 -0.022731   
29 -0.025301 -0.031475 -0.034611  0.008099 -0.015197  0.050795  0.002666   
30 -0.037795  0.016460 -0.039496 -0.005073 -0.018678  0.025155 -0.006837   
31  0.000762  0.011961  0.055498  0.018423 -0.000034  0.034641  0.003951   
32  0.049853 -0.076989 -0.037393  0.038048  0.020030 -0.095718 -0.047729   
33 -0.040604 -0.012986  0.010608  0.019366  0.020090 -0.052491  0.002338   
34  0.115860  0.016908 -0.058954  0.011873 -0.036264  0.453173 -0.002142   
35 -0.014349  0.005058  0.001854  0.021806 -0.001962 -0.004232  0.048044   
36  0.124554 -0.007465 -0.004522 -0.045234  0.038142 -0.182658  0.000486   
37  0.017283  0.015940 -0.034966 -0.011955 -0.019648 -0.046350  0.001191   
38 -0.049654 -0.019059 -0.020446 -0.013516  0.040810  0.090493  0.017037   
39 -0.150962 -0.041355 -0.070172  0.026616 -0.020987  0.110447 -0.057128   

          14        15        16        17        18        19        20  \
0   0.002984  0.006890  0.023151  0.039499 -0.051614  0.004408 -0.001347   
1  -0.069600 -0.001228 -0.033658  0.044709  0.018864  0.009298  0.021246   
2  -0.024474  0.014957  0.037882  0.054775  0.001878 -0.005757  0.053421   
3  -0.044111 -0.046394  0.038765  0.015674 -0.017277  0.025663 -0.002158   
4   0.209115  0.011283 -0.010417 -0.001717  0.080150  0.025250  0.004765   
5   0.016991  0.002038 -0.004314 -0.000589  0.039427 -0.042694  0.044357   
6  -0.184330 -0.004506  0.059714 -0.043495 -0.107176 -0.004120  0.024973   
7  -0.021095 -0.008298 -0.011443 -0.056082  0.130023 -0.044051  0.056280   
8   0.017060  0.016582  0.036039 -0.004586  0.008089  0.000207  0.059946   
9   0.004008  0.020943  0.010802 -0.087833 -0.010817  0.010940  0.030396   
10 -0.030630  0.053711 -0.021356  0.027280  0.039504  0.002963 -0.021865   
11 -0.117767 -0.015528 -0.034338 -0.037096 -0.001982  0.032569  0.063568   
12  0.362282 -0.017225 -0.024656  0.018956  0.412329  0.012603 -0.041725   
13 -0.004561 -0.011772  0.004645  0.018460 -0.020628  0.010127  0.053751   
14  1.000000 -0.012944 -0.027140 -0.012015  0.111663  0.060125 -0.058008   
15 -0.012944  1.000000 -0.010347  0.010681 -0.015560  0.019584  0.068294   
16 -0.027140 -0.010347  1.000000  0.012879 -0.019432 -0.001441  0.021596   
17 -0.012015  0.010681  0.012879  1.000000 -0.027898  0.036028 -0.057326   
18  0.111663 -0.015560 -0.019432 -0.027898  1.000000 -0.018719  0.008588   
19  0.060125  0.019584 -0.001441  0.036028 -0.018719  1.000000  0.011194   
20 -0.058008  0.068294  0.021596 -0.057326  0.008588  0.011194  1.000000   
21  0.052579 -0.000982 -0.022916 -0.004039 -0.013545 -0.031750 -0.056689   
22  0.166519  0.040514 -0.001622 -0.050048  0.009677  0.011854  0.007039   
23  0.055610  0.018240 -0.008523  0.011325  0.321297  0.011392  0.013588   
24 -0.030331 -0.012557  0.066975 -0.050565 -0.029749  0.004969  0.005406   
25 -0.011636  0.003095 -0.029033 -0.011279  0.004478 -0.021433 -0.034250   
26  0.010856  0.043180 -0.004819 -0.001844  0.045385 -0.006371 -0.014428   
27  0.039229  0.018744  0.026556 -0.035613 -0.018485 -0.018536  0.024719   
28  0.168620 -0.006029  0.003740 -0.018427  0.232997  0.020285 -0.002046   
29  0.055592 -0.025886  0.016282 -0.008814  0.166979 -0.032372  0.012035   
30 -0.023788 -0.043673 -0.019440  0.037146 -0.029637  0.042964 -0.030868   
31  0.007467  0.012588 -0.011451 -0.016741  0.016622 -0.067189 -0.056389   
32 -0.031780  0.043055 -0.073987 -0.028353 -0.003343 -0.021787  0.020425   
33 -0.000320 -0.016423  0.065442  0.003116  0.006133  0.038550 -0.018514   
34  0.148442 -0.004044  0.025378  0.013375 -0.033309 -0.003289 -0.026723   
35 -0.009477 -0.021116 -0.063987 -0.014939  0.028044 -0.027955  0.010036   
36 -0.114092  0.001448 -0.006681  0.008701 -0.042455  0.001016  0.078414   
37  0.029476  0.001919 -0.012295  0.028183  0.006534 -0.032087 -0.026363   
38 -0.164538 -0.006399  0.041778  0.040681 -0.282431  0.027369 -0.028073   
39  0.092740 -0.042175  0.013661  0.026468  0.300155  0.009386 -0.036576   

          21        22        23        24        25        26        27  \
0   0.016485  0.021047 -0.020664 -0.028256 -0.014671  0.047728 -0.060396   
1   0.020263  0.044744  0.051050  0.020862  0.019272  0.010341 -0.002228   
2   0.004037 -0.010850  0.051547  0.001834 -0.006478 -0.017693 -0.009868   
3  -0.008345 -0.012444  0.013371 -0.034712 -0.017270  0.040604  0.021951   
4   0.009427  0.449976  0.568959 -0.059516  0.024766 -0.016014  0.049695   
5  -0.041301 -0.024654  0.003663 -0.003748 -0.022389  0.029581 -0.006412   
6  -0.030886  0.116646 -0.217741  0.070511  0.054907 -0.013347 -0.014378   
7  -0.002344  0.069903 -0.113899 -0.058485  0.007029  0.013849 -0.036551   
8  -0.062792  0.027927  0.090151  0.001375  0.004733 -0.020321 -0.013502   
9  -0.000048  0.059458  0.015708 -0.013757  0.020467  0.057604  0.002832   
10  0.038276 -0.008752 -0.010862  0.028362 -0.021192  0.008756  0.010650   
11 -0.018037 -0.045922 -0.019819  0.030544 -0.013193  0.027331 -0.001312   
12  0.028953 -0.384888  0.232464 -0.036451 -0.005000  0.026252 -0.007732   
13 -0.012551  0.020854  0.038501  0.035203 -0.008517 -0.012335  0.002863   
14  0.052579  0.166519  0.055610 -0.030331 -0.011636  0.010856  0.039229   
15 -0.000982  0.040514  0.018240 -0.012557  0.003095  0.043180  0.018744   
16 -0.022916 -0.001622 -0.008523  0.066975 -0.029033 -0.004819  0.026556   
17 -0.004039 -0.050048  0.011325 -0.050565 -0.011279 -0.001844 -0.035613   
18 -0.013545  0.009677  0.321297 -0.029749  0.004478  0.045385 -0.018485   
19 -0.031750  0.011854  0.011392  0.004969 -0.021433 -0.006371 -0.018536   
20 -0.056689  0.007039  0.013588  0.005406 -0.034250 -0.014428  0.024719   
21  1.000000  0.015132 -0.001050 -0.042485  0.029820 -0.024140 -0.017697   
22  0.015132  1.000000  0.358961 -0.059243 -0.009635  0.007524  0.024709   
23 -0.001050  0.358961  1.000000 -0.055337  0.025651 -0.015144  0.014318   
24 -0.042485 -0.059243 -0.055337  1.000000  0.018229 -0.049296  0.021148   
25  0.029820 -0.009635  0.025651  0.018229  1.000000  0.020201 -0.004100   
26 -0.024140  0.007524 -0.015144 -0.049296  0.020201  1.000000  0.017038   
27 -0.017697  0.024709  0.014318  0.021148 -0.004100  0.017038  1.000000   
28  0.034840 -0.098051 -0.055651 -0.030025  0.003924  0.046134 -0.047810   
29 -0.002580 -0.065422 -0.160173  0.021535 -0.039607  0.018766 -0.049897   
30 -0.006735 -0.002532 -0.022266 -0.024620 -0.011454 -0.049255 -0.031830   
31 -0.027499 -0.008018  0.032428  0.011665 -0.027106 -0.000785 -0.031253   
32  0.010885 -0.075722 -0.453138 -0.014389 -0.040435  0.013379 -0.055318   
33  0.044612  0.045062  0.008407 -0.002748 -0.019240 -0.011437 -0.051371   
34 -0.017645 -0.006324  0.257133 -0.012643  0.029309 -0.010916  0.030509   
35  0.012711 -0.014390  0.012386  0.008924 -0.015760 -0.036908  0.009340   
36 -0.021266 -0.135267  0.204730 -0.024168  0.039201 -0.013289 -0.040914   
37  0.037789  0.045044  0.003117 -0.023394 -0.027665  0.021426 -0.003187   
38  0.001307 -0.279579 -0.306711  0.007385  0.039480  0.010005  0.018008   
39 -0.012076 -0.109855 -0.310490 -0.000780 -0.031934  0.045605  0.052344   

          28        29        30        31        32        33        34  \
0   0.026154  0.031174  0.012446  0.023033 -0.004701  0.005180  0.012961   
1  -0.067760 -0.010763 -0.000782 -0.052864  0.005583  0.009125 -0.026617   
2   0.005040 -0.005418  0.034527 -0.002177 -0.038702  0.046611  0.008141   
3  -0.033828  0.032422  0.025678 -0.006878 -0.030960 -0.018130 -0.055653   
4  -0.522373 -0.465130 -0.039205  0.007981 -0.325592  0.031156 -0.212497   
5  -0.021571  0.010774  0.011408 -0.050032  0.016181  0.017013  0.019377   
6   0.048105  0.069492 -0.050016 -0.020219 -0.105353 -0.025403  0.171086   
7   0.045134 -0.025301 -0.037795  0.000762  0.049853 -0.040604  0.115860   
8   0.032840 -0.031475  0.016460  0.011961 -0.076989 -0.012986  0.016908   
9  -0.045896 -0.034611 -0.039496  0.055498 -0.037393  0.010608 -0.058954   
10  0.023255  0.008099 -0.005073  0.018423  0.038048  0.019366  0.011873   
11 -0.036131 -0.015197 -0.018678 -0.000034  0.020030  0.020090 -0.036264   
12  0.616709  0.050795  0.025155  0.034641 -0.095718 -0.052491  0.453173   
13 -0.022731  0.002666 -0.006837  0.003951 -0.047729  0.002338 -0.002142   
14  0.168620  0.055592 -0.023788  0.007467 -0.031780 -0.000320  0.148442   
15 -0.006029 -0.025886 -0.043673  0.012588  0.043055 -0.016423 -0.004044   
16  0.003740  0.016282 -0.019440 -0.011451 -0.073987  0.065442  0.025378   
17 -0.018427 -0.008814  0.037146 -0.016741 -0.028353  0.003116  0.013375   
18  0.232997  0.166979 -0.029637  0.016622 -0.003343  0.006133 -0.033309   
19  0.020285 -0.032372  0.042964 -0.067189 -0.021787  0.038550 -0.003289   
20 -0.002046  0.012035 -0.030868 -0.056389  0.020425 -0.018514 -0.026723   
21  0.034840 -0.002580 -0.006735 -0.027499  0.010885  0.044612 -0.017645   
22 -0.098051 -0.065422 -0.002532 -0.008018 -0.075722  0.045062 -0.006324   
23 -0.055651 -0.160173 -0.022266  0.032428 -0.453138  0.008407  0.257133   
24 -0.030025  0.021535 -0.024620  0.011665 -0.014389 -0.002748 -0.012643   
25  0.003924 -0.039607 -0.011454 -0.027106 -0.040435 -0.019240  0.029309   
26  0.046134  0.018766 -0.049255 -0.000785  0.013379 -0.011437 -0.010916   
27 -0.047810 -0.049897 -0.031830 -0.031253 -0.055318 -0.051371  0.030509   
28  1.000000  0.322438  0.030054  0.018346 -0.086240 -0.059175  0.293913   
29  0.322438  1.000000  0.027909  0.019592 -0.110933  0.008248  0.137749   
30  0.030054  0.027909  1.000000 -0.024786  0.005798 -0.026837 -0.016300   
31  0.018346  0.019592 -0.024786  1.000000 -0.027278 -0.003418  0.006635   
32 -0.086240 -0.110933  0.005798 -0.027278  1.000000  0.015406 -0.259151   
33 -0.059175  0.008248 -0.026837 -0.003418  0.015406  1.000000 -0.049568   
34  0.293913  0.137749 -0.016300  0.006635 -0.259151 -0.049568  1.000000   
35 -0.025472 -0.023510 -0.038520 -0.031574 -0.009889 -0.047772 -0.035899   
36  0.018826  0.241248 -0.073321 -0.005343 -0.050540 -0.019554  0.107606   
37 -0.013701  0.010129 -0.061697  0.003566 -0.038393  0.009121 -0.022208   
38  0.180924  0.049550  0.028444 -0.004915 -0.197690 -0.030572  0.145990   
39  0.283127  0.287253  0.018338 -0.016312 -0.110049  0.024840 -0.147988   

          35        36        37        38        39  
0   0.047455 -0.026327  0.038844  0.016098 -0.045840  
1   0.053087  0.039265  0.019537 -0.046295 -0.005667  
2   0.014150  0.020865  0.030127 -0.004390  0.020101  
3   0.024811  0.048042  0.016295  0.041164  0.021217  
4   0.015455  0.016657  0.021133 -0.084677 -0.227957  
5  -0.034101 -0.009464 -0.046527  0.032984  0.013334  
6  -0.003786  0.090158  0.075050 -0.141340 -0.233928  
7  -0.014349  0.124554  0.017283 -0.049654 -0.150962  
8   0.005058 -0.007465  0.015940 -0.019059 -0.041355  
9   0.001854 -0.004522 -0.034966 -0.020446 -0.070172  
10  0.021806 -0.045234 -0.011955 -0.013516  0.026616  
11 -0.001962  0.038142 -0.019648  0.040810 -0.020987  
12 -0.004232 -0.182658 -0.046350  0.090493  0.110447  
13  0.048044  0.000486  0.001191  0.017037 -0.057128  
14 -0.009477 -0.114092  0.029476 -0.164538  0.092740  
15 -0.021116  0.001448  0.001919 -0.006399 -0.042175  
16 -0.063987 -0.006681 -0.012295  0.041778  0.013661  
17 -0.014939  0.008701  0.028183  0.040681  0.026468  
18  0.028044 -0.042455  0.006534 -0.282431  0.300155  
19 -0.027955  0.001016 -0.032087  0.027369  0.009386  
20  0.010036  0.078414 -0.026363 -0.028073 -0.036576  
21  0.012711 -0.021266  0.037789  0.001307 -0.012076  
22 -0.014390 -0.135267  0.045044 -0.279579 -0.109855  
23  0.012386  0.204730  0.003117 -0.306711 -0.310490  
24  0.008924 -0.024168 -0.023394  0.007385 -0.000780  
25 -0.015760  0.039201 -0.027665  0.039480 -0.031934  
26 -0.036908 -0.013289  0.021426  0.010005  0.045605  
27  0.009340 -0.040914 -0.003187  0.018008  0.052344  
28 -0.025472  0.018826 -0.013701  0.180924  0.283127  
29 -0.023510  0.241248  0.010129  0.049550  0.287253  
30 -0.038520 -0.073321 -0.061697  0.028444  0.018338  
31 -0.031574 -0.005343  0.003566 -0.004915 -0.016312  
32 -0.009889 -0.050540 -0.038393 -0.197690 -0.110049  
33 -0.047772 -0.019554  0.009121 -0.030572  0.024840  
34 -0.035899  0.107606 -0.022208  0.145990 -0.147988  
35  1.000000 -0.016145 -0.007901 -0.038879  0.014993  
36 -0.016145  1.000000 -0.009176  0.000086 -0.046425  
37 -0.007901 -0.009176  1.000000 -0.059341 -0.021385  
38 -0.038879  0.000086 -0.059341  1.000000  0.223299  
39  0.014993 -0.046425 -0.021385  0.223299  1.000000  

In [8]:
import seaborn as sns
plt.rcParams['font.family']='DFKai-SB' #顯示中文
plt.figure(figsize=(40,40))
sns.heatmap(corr, square=True, annot=True, cmap="RdBu_r") #center=0, cmap="YlGnBu"
#sns.plt.show()


Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0xc154198>

In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3) # 30% for testing, 70% for training
X_train.head(2)


Out[9]:
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
836 0.102779 -0.199372 0.463495 -0.847312 -1.445708 -0.326604 -0.293438 -2.202522 1.078323 -0.865261 0.502683 -0.009290 6.049494 0.456662 3.487071 -1.272826 -0.430860 -0.780380 -0.420346 -1.741490 1.207195 -0.820815 -3.329656 2.868839 -0.498860 1.246022 -0.016919 0.719957 2.912453 3.552154 -0.488331 0.367702 -5.031359 -2.016386 1.605913 0.012293 3.582551 -0.388994 1.078361 1.384469
724 0.039585 0.723498 0.912530 1.090686 6.182949 -0.035000 -2.967893 -2.513158 -0.666391 -2.285819 0.657216 0.145245 -3.275642 -0.226957 0.417443 -0.651984 -0.346036 1.029841 -2.062241 -0.356049 1.054455 -0.281530 1.020549 3.602082 -0.631751 0.664629 -0.457342 -0.448230 -4.007717 -1.569231 0.117934 -0.229665 -1.945187 0.227123 -0.676655 -0.051030 -0.277648 1.707033 -0.693985 1.528447

In [10]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics

knn = KNeighborsClassifier(n_neighbors=3, weights='uniform')
knn.fit(X_train, y_train)

print(metrics.classification_report(y_test, knn.predict(X_test)))
print(metrics.confusion_matrix(y_test, knn.predict(X_test)))


             precision    recall  f1-score   support

          0       0.94      0.86      0.90       149
          1       0.87      0.95      0.91       151

avg / total       0.91      0.90      0.90       300

[[128  21]
 [  8 143]]
C:\Anaconda3\lib\site-packages\ipykernel_launcher.py:5: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  """

In [11]:
from sklearn.ensemble import RandomForestClassifier

rfc = RandomForestClassifier(n_estimators=500, criterion='gini', max_features='auto', oob_score=True)
rfc.fit(X_train, y_train) #不標準化

print("oob_score(accuary):",rfc.oob_score_)
print(metrics.classification_report(y_test, rfc.predict(X_test)))


C:\Anaconda3\lib\site-packages\ipykernel_launcher.py:4: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  after removing the cwd from sys.path.
oob_score(accuary): 0.86
             precision    recall  f1-score   support

          0       0.86      0.91      0.89       149
          1       0.91      0.85      0.88       151

avg / total       0.88      0.88      0.88       300


In [12]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb.fit(X_train, y_train)

print(metrics.classification_report(y_test, gnb.predict(X_test)))
print(metrics.confusion_matrix(y_test, gnb.predict(X_test)))


             precision    recall  f1-score   support

          0       0.81      0.87      0.84       149
          1       0.86      0.79      0.83       151

avg / total       0.84      0.83      0.83       300

[[130  19]
 [ 31 120]]
C:\Anaconda3\lib\site-packages\sklearn\utils\validation.py:547: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)

In [13]:
from sklearn.svm import SVC

svc = SVC(C=1.0, kernel="rbf", probability=True)
svc.fit(X_train, y_train)

print(metrics.classification_report(y_test, svc.predict(X_test)))
print(metrics.confusion_matrix(y_test, svc.predict(X_test)))


C:\Anaconda3\lib\site-packages\sklearn\utils\validation.py:547: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
             precision    recall  f1-score   support

          0       0.90      0.94      0.92       149
          1       0.94      0.90      0.92       151

avg / total       0.92      0.92      0.92       300

[[140   9]
 [ 15 136]]

In [14]:
#from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from mlxtend.classifier import StackingClassifier
import xgboost as xgb

clf1 = KNeighborsClassifier(n_neighbors=3, weights='uniform')
clf2 = RandomForestClassifier(n_estimators=500, criterion='gini', max_features='auto', oob_score=True)
#clf3 = GaussianNB()
clf4 = SVC(C=1.0, kernel="rbf", probability=True)
meta_clf = xgb.XGBClassifier(n_estimators= 2000, max_depth= 4)
stacking_clf = StackingClassifier(classifiers=[clf1, clf2, clf4], meta_classifier=meta_clf)

clf1.fit(X_train, y_train)
clf2.fit(X_train, y_train)
#clf3.fit(X_train, y_train)
clf4.fit(X_train, y_train)
stacking_clf.fit(X_train, y_train)

print('KNN Score:',clf1.score(X_test, y_test))
print('RF Score:',clf2.score(X_test, y_test))
#print('GNB Score:',clf3.score(X_test, y_test))
print('SVC Score:',clf4.score(X_test, y_test))
print('Stacking Score:',stacking_clf.score(X_test, y_test))


C:\Anaconda3\lib\site-packages\sklearn\cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)
C:\Anaconda3\lib\site-packages\ipykernel_launcher.py:16: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  app.launch_new_instance()
C:\Anaconda3\lib\site-packages\ipykernel_launcher.py:17: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
C:\Anaconda3\lib\site-packages\sklearn\utils\validation.py:547: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda3\lib\site-packages\mlxtend\classifier\stacking_classification.py:115: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  clf.fit(X, y)
C:\Anaconda3\lib\site-packages\mlxtend\classifier\stacking_classification.py:115: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  clf.fit(X, y)
C:\Anaconda3\lib\site-packages\sklearn\utils\validation.py:547: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda3\lib\site-packages\sklearn\preprocessing\label.py:95: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
C:\Anaconda3\lib\site-packages\sklearn\preprocessing\label.py:128: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
KNN Score: 0.903333333333
RF Score: 0.876666666667
SVC Score: 0.92
Stacking Score: 0.88

In [15]:
row_id = [i for i in range(1,9001)]

In [16]:
pred = stacking_clf.predict(test)

# Generate Submission File 
StackingSubmission = pd.DataFrame({ 'Id': row_id, 'Solution': pred })[["Id","Solution"]]
StackingSubmission.to_csv("submission_xgb_3m_stk.csv", index=False)

In [17]:
##xgb 4m score: 0.86810
##xgb 3m score: 0.87071