In [1]:
%matplotlib inline
import numpy as np
from sklearn.datasets import make_hastie_10_2
import pandas as pd
X, Y = make_hastie_10_2(n_samples=500,random_state=1)
data = pd.DataFrame(X)
control_index = pd.core.series.Series(Y==-1)
patient_index = pd.core.series.Series(Y==1)
variables = data.columns
# First of all we show the generated data.
data
Out[1]:
0
1
2
3
4
5
6
7
8
9
0
1.624345
-0.611756
-0.528172
-1.072969
0.865408
-2.301539
1.744812
-0.761207
0.319039
-0.249370
1
1.462108
-2.060141
-0.322417
-0.384054
1.133769
-1.099891
-0.172428
-0.877858
0.042214
0.582815
2
-1.100619
1.144724
0.901591
0.502494
0.900856
-0.683728
-0.122890
-0.935769
-0.267888
0.530355
3
-0.691661
-0.396754
-0.687173
-0.845206
-0.671246
-0.012665
-1.117310
0.234416
1.659802
0.742044
4
-0.191836
-0.887629
-0.747158
1.692455
0.050808
-0.636996
0.190915
2.100255
0.120159
0.617203
5
0.300170
-0.352250
-1.142518
-0.349343
-0.208894
0.586623
0.838983
0.931102
0.285587
0.885141
6
-0.754398
1.252868
0.512930
-0.298093
0.488518
-0.075572
1.131629
1.519817
2.185575
-1.396496
7
-1.444114
-0.504466
0.160037
0.876169
0.315635
-2.022201
-0.306204
0.827975
0.230095
0.762011
8
-0.222328
-0.200758
0.186561
0.410052
0.198300
0.119009
-0.670662
0.377564
0.121821
1.129484
9
1.198918
0.185156
-0.375285
-0.638730
0.423494
0.077340
-0.343854
0.043597
-0.620001
0.698032
10
-0.447129
1.224508
0.403492
0.593579
-1.094912
0.169382
0.740556
-0.953701
-0.266219
0.032615
11
-1.373117
0.315159
0.846161
-0.859516
0.350546
-1.312283
-0.038696
-1.615772
1.121418
0.408901
12
-0.024617
-0.775162
1.273756
1.967102
-1.857982
1.236164
1.627651
0.338012
-1.199268
0.863345
13
-0.180920
-0.603921
-1.230058
0.550537
0.792807
-0.623531
0.520576
-1.144341
0.801861
0.046567
14
-0.186570
-0.101746
0.868886
0.750412
0.529465
0.137701
0.077821
0.618380
0.232495
0.682551
15
-0.310117
-2.434838
1.038825
2.186980
0.441364
-0.100155
-0.136445
-0.119054
0.017409
-1.122019
16
-0.517094
-0.997027
0.248799
-0.296641
0.495211
-0.174703
0.986335
0.213534
2.190700
-1.896361
17
-0.646917
0.901487
2.528326
-0.248635
0.043669
-0.226314
1.331457
-0.287308
0.680070
-0.319802
18
-1.272559
0.313548
0.503185
1.293226
-0.110447
-0.617362
0.562761
0.240737
0.280665
-0.073113
19
1.160339
0.369493
1.904659
1.111057
0.659050
-1.627438
0.602319
0.420282
0.810952
1.044442
20
-0.400878
0.824006
-0.562305
1.954878
-1.331952
-1.760689
-1.650721
-0.890556
-1.119115
1.956079
21
-0.326499
-1.342676
1.114383
-0.586524
-1.236853
0.875839
0.623362
-0.434957
1.407540
0.129102
22
1.616950
0.502741
1.558806
0.109403
-1.219744
2.449369
-0.545774
-0.198838
-0.700399
-0.203394
23
0.242669
0.201830
0.661020
1.792158
-0.120465
-1.233121
-1.182318
-0.665755
-1.674196
0.825030
24
-0.498214
-0.310985
-0.001891
-1.396620
-0.861316
0.674712
0.618539
-0.443172
1.810535
-1.305727
25
-0.344987
-0.230840
-2.793085
1.937529
0.366332
-1.044589
2.051173
0.585662
0.429526
-0.606998
26
0.106223
-1.525680
0.795026
-0.374438
0.134048
1.202055
0.284748
0.262467
0.276499
-0.733272
27
0.836005
1.543359
0.758806
0.884909
-0.877282
-0.867787
-1.440876
1.232253
-0.254180
1.399844
28
-0.781912
-0.437509
0.095425
0.921450
0.060750
0.211125
0.016528
0.177188
-1.116470
0.080927
29
-0.186579
-0.056824
0.492337
-0.680678
-0.084508
-0.297362
0.417302
0.784771
-0.955425
0.585910
...
...
...
...
...
...
...
...
...
...
...
470
1.274644
-0.251978
0.488770
0.390650
0.793533
-0.038972
1.085841
-0.525425
1.050737
1.222600
471
-0.033376
0.453018
0.092786
0.598285
0.676390
0.260472
-0.224019
0.825005
0.462314
2.382766
472
-1.077009
1.946349
0.419161
0.805295
-0.614908
-0.401759
0.363118
0.031152
-1.379569
0.080526
473
0.088642
-0.457645
1.031118
1.222996
-0.814204
-0.301463
-0.555928
0.179861
1.030308
0.443613
474
-0.164995
-0.506947
1.524929
1.306188
0.494037
-1.209679
0.986078
0.147101
-0.974137
1.413984
475
0.541875
-1.341885
-0.251552
-0.144955
-0.530613
0.629411
1.051341
-0.365471
2.032605
0.267639
476
1.818012
0.480226
1.039288
1.457876
-0.576148
1.117410
-0.821370
-0.035256
0.123906
0.857984
477
-1.730940
0.185582
-0.019720
-1.621492
-1.729204
-0.274889
-1.501111
0.303395
-1.078250
-0.438778
478
-0.607886
0.238707
0.414893
-2.095658
-2.105616
0.788023
-1.031113
0.441734
0.350267
0.184748
479
0.051395
0.426512
-1.159493
0.130236
0.425973
0.794730
0.680195
-0.181871
0.199017
0.208783
480
-0.713199
0.139410
0.229994
-0.382562
0.134935
1.170489
-0.569885
2.360613
0.163634
0.631239
481
1.004047
0.337559
-0.619752
-0.165303
0.686348
0.478878
-2.719938
1.700912
-0.294362
-0.632691
482
0.590651
-0.746269
0.484516
-0.891281
-0.075059
-0.008983
0.959094
0.662568
-0.294168
-2.096333
483
-0.786384
0.430063
0.748611
-1.035397
0.141397
-0.266128
1.473576
1.235114
0.422512
-1.962186
484
-0.214058
-0.418429
0.055025
-1.440749
1.183870
1.565740
-0.218949
-0.250793
-0.404506
-0.002312
485
0.600634
1.534580
0.050447
0.004432
1.406048
-1.169348
0.999195
1.006266
0.656831
-0.617617
486
0.620002
0.589806
1.740310
-0.169440
0.668278
-1.108418
0.254955
-0.029205
-0.593111
-0.323822
487
1.919382
0.496884
0.118606
-1.103974
-0.497031
0.312941
0.226165
1.463109
0.025264
-0.138792
488
-0.016996
0.097042
-0.061843
0.167654
0.653907
1.302428
-0.410169
-1.331084
0.617350
1.569187
489
-0.082087
0.681881
-0.725774
0.204400
-0.443207
-0.316081
-0.611369
-0.406946
-0.369148
0.180657
490
0.135075
0.968622
-0.094317
-0.947628
-0.234292
-1.091390
0.997176
1.086481
-1.299026
-1.304266
491
1.182556
-0.429521
0.082419
-2.139513
1.246868
1.805312
0.316882
-0.971564
1.646764
0.146397
492
-0.242427
2.023996
-0.915662
0.646477
-1.199921
0.783797
-0.537908
-1.217081
2.012857
0.098533
493
-0.942571
-1.525285
-0.751174
1.153165
-0.092815
-1.111537
1.553357
-1.027565
-0.071713
0.445525
494
-0.121859
0.190849
-2.272159
-2.914499
0.390935
-1.165738
0.811856
-0.962849
-1.567960
-0.151947
495
-0.432381
-0.035763
1.512820
-0.775995
-0.271769
1.019646
-0.435244
-1.015457
0.185910
0.563153
496
-2.418973
-0.716820
0.575042
0.480736
0.370113
-0.162448
-1.302508
-0.192115
1.660739
0.868712
497
-2.015168
-0.996905
-1.034251
0.879521
0.712505
1.772187
1.246123
-1.163626
0.061574
-0.576460
498
-1.592936
0.715686
0.924852
-0.366147
1.521629
1.297955
-0.253514
-0.382463
0.973071
1.069714
499
-0.544415
-0.063071
0.893819
1.837916
0.584679
1.604546
0.566613
-0.775988
1.084889
2.241989
500 rows × 10 columns
In [2]:
from bamboo.stats import do_ttest
# We can show all p-values
do_ttest(data,control_index, patient_index,variables)
Out[2]:
P-Value
0
0.901654
1
0.997262
2
0.352764
3
0.005924
4
0.847878
5
0.791415
6
0.532488
7
0.355828
8
0.451349
9
0.777758
In [3]:
# We can also plot only variables with significative difference.
do_ttest(data,control_index, patient_index,variables, only_significative=True)
Out[3]:
P-Value
3
0.005924
In [4]:
from bamboo.stats import var_group_means
# We can calculate for each variable and group some statistical measures like mean and var.
var_group_means(data, variables, control_index, patient_index, group1_name='First group', group2_name='Second group')
Out[4]:
First group (mean)
First group (var)
Second group (mean)
Second group (var)
0
0.009456
0.676415
-0.001105
-0.001105
1
0.008824
0.637848
0.009123
0.009123
2
0.053015
0.639056
-0.031731
-0.031731
3
-0.044687
0.651155
0.206211
0.206211
4
-0.032501
0.632060
-0.049900
-0.049900
5
-0.012368
0.595146
-0.034861
-0.034861
6
0.104961
0.591871
0.049414
0.049414
7
0.080120
0.725397
-0.002234
-0.002234
8
0.090273
0.682330
0.023162
0.023162
9
0.018938
0.700555
0.044163
0.044163
In [5]:
from bamboo.plot import plot_feature_importances
# We can also plot feature importances and save our plot.
example_plot = plot_feature_importances(data, variables, control_index, patient_index, plot_title='Example')
In [6]:
from bamboo.classification import perform_experiments
# And finally perform some classification experiments over our dataset.
perform_experiments(data, control_index, patient_index)
Out[6]:
Decision Tree
Random Forest
SVM (RBF)
SVM (Linear)
Accuracy
61.20
70.60
92.80
46.80
Sensitivity
64.73
75.10
89.63
38.17
Specificity
57.92
66.41
95.75
54.83
Area Under Roc Curve
0.61
0.71
0.93
0.46
Content source: Neurita/bamboo
Similar notebooks: