In [1]:
import make_dataset as data
import HBA_analysis as hba
import pandas as pd
In [2]:
%matplotlib inline
In [3]:
exp = data.get_HBA_dataset(reannotate=True)
Processed HBA brain dataset found locally. Loading from ./data/processed/brainarea_vs_genes_exp_w_reannotations.tsv
In [4]:
exp
Out[4]:
CA1 field
CA2 field
CA3 field
CA4 field
Crus I, lateral hemisphere
Crus I, paravermis
Crus II, lateral hemisphere
Crus II, paravermis
Edinger-Westphal nucleus
Heschl's gyrus
...
temporal pole, inferior aspect
temporal pole, medial aspect
temporal pole, superior aspect
transverse gyri
trochlear nucleus
tuberomammillary nucleus
ventral tegmental area
ventromedial hypothalamic nucleus
vestibular nuclei
zona incerta
gene_symbol
A1BG
0.856487
-1.773695
-0.678679
-0.986914
0.826986
0.948039
0.935427
1.120774
-1.018554
0.170282
...
0.277830
0.514923
0.733368
-0.104286
-0.910245
1.039610
-0.155167
-0.444398
-0.901361
-0.236790
A1BG-AS1
0.257664
-1.373085
-0.619923
-0.636275
0.362799
0.353296
0.422766
0.346853
-0.812015
0.903358
...
1.074116
0.821031
1.219272
0.901213
-1.522431
0.598719
-1.709745
-0.054156
-1.695843
-1.155961
A1CF
-0.089614
-0.546903
0.282914
-0.528926
0.507916
0.577696
0.647671
0.306824
0.089958
0.149820
...
-0.030265
-0.187367
-0.428358
-0.465863
-0.136936
1.229487
-0.110680
-0.118175
-0.139776
0.123829
A2M
0.552415
-0.635485
-0.954995
-0.259745
-1.687391
-1.756847
-1.640242
-1.733110
-0.091695
0.003428
...
-0.058505
0.207109
-0.161808
0.183630
0.948098
-0.977692
0.911896
-0.499357
1.469386
0.557998
A2ML1
0.758031
1.549857
1.262225
1.338780
-0.289888
-0.407026
-0.358798
-0.589988
0.944684
-0.466327
...
-0.472908
-0.598317
-0.247797
-0.282673
1.396365
0.945043
0.158202
0.572771
0.073088
-0.886780
A3GALT2
0.435751
1.125971
1.479726
0.010520
0.877256
0.617134
0.797456
0.724969
0.692655
-0.691691
...
-0.339939
-0.628313
-0.246285
0.049359
0.223667
1.164986
-0.102258
-0.849872
-0.354230
-0.421125
A4GALT
1.277084
0.710266
0.272435
0.452544
0.127867
-0.423357
0.222945
-0.453995
-0.919359
0.714507
...
0.100831
0.322003
0.622643
0.698657
0.060055
-1.546518
-0.578289
-1.117115
-0.442883
-0.352982
A4GNT
-0.330903
-0.477462
0.267584
-0.426862
0.223556
0.260993
0.632328
0.097173
-0.173487
-0.023084
...
0.555680
-0.462533
0.752745
0.648017
1.639433
0.353052
0.249200
0.934437
-0.165556
-0.715761
AAAS
-1.257280
-0.888406
-1.124108
-1.118677
1.234268
1.421483
1.429758
1.603573
-0.827011
-0.006078
...
0.073441
-0.372438
-0.564808
-0.011762
-0.351065
0.150541
-0.238958
-1.075925
-0.066461
0.145102
AACS
0.018061
0.611623
-0.300200
-0.821343
-0.486892
-0.306668
-0.465894
-0.297026
-1.019541
1.262075
...
1.072971
1.265018
1.176459
0.958009
-0.074185
-0.419782
-0.959281
-1.004142
-1.073246
-1.126886
AADAC
-0.418754
-0.743635
-0.433086
-0.462726
0.188694
0.494526
0.012282
0.040732
1.210733
-0.456389
...
-0.399180
1.371592
-0.169385
0.365338
0.411701
0.005770
-0.116276
0.206295
0.035387
-0.204272
AADACL2
-0.739850
-0.416179
-0.475723
-0.068475
1.110947
1.685804
1.306960
1.253008
0.705721
0.080000
...
-0.408245
-0.708654
-0.159180
-0.547126
-0.781150
-0.628516
-0.050861
-0.779897
0.485490
0.935686
AADACL3
1.108194
1.386945
1.477108
1.200111
-0.274527
-0.678899
-0.403163
-0.774954
2.124794
-0.670346
...
-0.876347
-0.774968
-0.795357
-0.913619
2.347311
0.021025
0.820876
0.378004
0.552325
-0.287383
AADACL4
0.673841
1.032418
0.743477
0.742370
-0.291613
0.021451
-0.554058
-0.536587
0.040467
0.319437
...
-0.424853
0.046653
0.290214
-0.180625
-0.823943
-0.002756
-0.390497
0.044369
-0.110865
0.457199
AADAT
-0.097061
-0.396935
-0.415740
-0.480333
-2.471918
-2.365847
-2.409060
-2.221568
0.063529
0.691914
...
0.737076
0.710800
0.724051
0.736833
-0.195001
0.508776
0.400334
0.373558
0.180442
0.474810
AAED1
0.080014
0.236678
0.133051
-0.707775
0.338703
0.237322
0.404090
0.174832
0.194069
-0.688665
...
-0.549908
-0.181569
0.136963
-0.763695
0.562343
0.608038
0.421273
0.074200
0.526496
0.509921
AAGAB
0.136759
0.916251
1.132288
1.080193
-2.252801
-2.302349
-2.315404
-2.522820
1.019805
0.008384
...
-0.035198
-0.230523
0.009387
-0.106312
0.844723
0.967473
0.715529
0.920956
0.409426
0.116037
AAK1
1.375904
2.559039
2.906553
2.548207
-0.602239
-0.552184
-0.579009
-0.653592
0.906213
0.301835
...
-0.023674
-0.211661
-0.220104
0.457623
1.430953
-1.193619
-0.580699
-0.780286
0.081041
-0.847606
AAMDC
-1.703243
-3.118140
-3.202097
-2.447309
-0.546011
-0.439274
-0.550264
-0.454309
0.349273
-0.227842
...
-0.222259
0.161765
-0.275787
-0.448079
0.334789
0.918820
1.164607
-0.308308
1.237187
1.689524
AAMP
-0.138176
-0.160149
-0.845647
-1.056648
0.642338
0.845547
0.709152
1.053754
-0.165156
0.324537
...
0.695031
1.142455
1.112718
0.241136
-1.334684
0.260117
-0.816841
-0.707583
-1.133472
-0.053468
AANAT
0.626314
1.286435
1.109016
0.019730
1.541146
1.181566
1.286602
1.342582
1.533386
-0.437644
...
-0.679131
-0.837788
-0.135181
-0.725092
-0.041818
-0.299137
-0.491085
0.033633
-1.015544
-0.550629
AAR2
-0.637436
-0.491603
0.462139
0.504746
0.316785
0.532390
0.306266
0.564711
0.419222
-0.409236
...
-0.382095
-1.107220
-0.674133
-0.101266
-1.095007
0.353233
0.311098
0.484539
-0.477158
0.857170
AARD
0.780613
-0.369988
-0.522566
-0.862950
1.258964
1.028636
1.199312
1.023481
-0.449502
0.498611
...
-0.332064
-0.852406
-0.673939
0.266307
-1.623119
0.805689
-0.637187
-0.408195
-0.952082
-0.984320
AARS
-1.387741
-0.580060
-0.287489
-0.283949
-0.614049
-0.356330
-0.702986
-0.200804
-0.677430
0.845559
...
0.419986
0.019013
0.319323
0.784200
0.269346
-0.320917
-0.162518
-1.567622
-0.288255
-0.448679
AARS2
0.058378
0.135328
0.104272
-0.034123
2.414833
2.447712
2.561560
2.520044
-0.364720
-0.111054
...
-0.160882
-0.064053
-0.231928
0.056706
-0.279352
-1.146123
-0.690496
-1.254763
-0.332982
-0.709474
AARSD1
0.390367
0.217131
-0.665606
-0.959893
0.832267
0.853926
0.783592
0.967144
-0.541098
0.735554
...
0.976104
0.960404
0.929054
0.459339
-1.242252
0.101320
-1.353801
-0.363769
-1.233360
-0.751560
AASDH
-0.745896
-1.253315
-1.431641
-1.841098
1.904299
1.911990
1.792869
1.972364
0.253791
-0.162710
...
-0.205595
0.144856
-0.091575
0.025782
-0.096413
-0.543086
-0.492317
0.046842
-0.608590
-0.306944
AASDHPPT
1.131527
1.488091
1.533886
1.277502
-0.940444
-0.973751
-1.139535
-1.059888
-0.271924
0.332693
...
0.231368
-0.157624
0.244976
0.283954
-0.477352
1.330813
-0.830671
1.088126
-0.546691
-0.267114
AASS
0.088031
-0.730388
-0.737429
0.077760
-1.461933
-1.424646
-1.383434
-1.443019
0.160156
-0.311445
...
-0.207375
-0.243300
-0.424924
-0.434480
0.427726
-0.172836
1.230589
0.933177
1.188380
1.721663
AATF
-1.413886
-1.417889
-1.986543
-2.040064
1.136925
1.161923
1.158211
1.153190
1.713408
-0.769378
...
-0.530795
-0.701973
-0.844786
-0.395790
-0.082904
0.881439
0.201334
-1.192327
0.877924
1.158489
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
ZSCAN26
-0.750389
-1.462827
-1.592350
-1.565792
1.778130
1.931377
1.836888
1.870250
-1.724138
0.274401
...
-0.133950
0.087962
0.162166
0.164266
-1.473008
-0.500173
-0.635597
0.185856
-0.808220
-0.434089
ZSCAN29
-1.433257
-0.477716
-0.818885
-0.766524
0.344280
0.363606
0.494910
0.299088
-1.087825
0.985920
...
-0.101508
-0.286872
-0.798553
0.468023
-0.678084
0.120120
0.174372
-2.536420
0.089184
0.802768
ZSCAN30
-0.174396
0.362413
0.448133
0.284722
1.575286
1.568597
1.757976
1.656865
0.352766
-0.700548
...
-0.826004
-1.003294
-0.808904
-0.613195
0.787276
-0.381542
0.774398
-0.303855
0.368903
0.732406
ZSCAN31
-0.833916
-2.190571
-1.780334
-1.221047
0.826766
0.973273
0.639237
0.792751
-0.900787
0.895136
...
0.509417
0.023122
0.233225
0.893815
0.602918
-0.756294
-0.164419
-1.846738
-0.561152
-0.593170
ZSCAN32
-1.113376
-0.586503
-1.067393
-0.999727
0.963417
0.788529
0.852762
0.866603
-1.138847
0.230437
...
0.289061
0.081792
0.162084
-0.012883
-1.604187
0.032852
0.362911
-1.012352
0.007778
0.989420
ZSCAN4
-0.606993
0.299429
-0.667870
-0.342348
0.522595
1.011503
0.368341
1.292830
1.332640
0.332605
...
-0.171931
0.145878
-0.246947
0.005249
0.795520
0.952491
-0.249030
0.458810
-0.150910
-0.826341
ZSCAN5A
1.828036
1.469749
0.682074
-0.511445
-0.606073
-0.638284
-0.523978
-0.805322
1.128197
-0.430763
...
-0.085916
0.628877
-0.136047
-0.497448
3.023357
1.014005
0.199544
-0.475723
0.339935
-0.371764
ZSCAN5B
0.153966
-1.039581
-0.884949
-0.762964
-0.438961
-0.503989
-0.412100
-0.310373
-0.840239
1.315756
...
0.966943
0.709617
1.011692
1.140827
-0.272171
-0.269364
-0.682772
-0.349789
-0.786178
-0.820169
ZSCAN9
-1.833808
-1.783650
-1.814708
-1.455654
1.845276
1.834143
1.890968
1.865571
-0.494760
-0.219693
...
-0.764778
-0.839148
-0.808930
-0.197488
0.370678
0.258238
0.293474
-0.286349
0.362314
0.739464
ZSWIM1
0.310692
1.586376
1.361255
0.601085
1.319903
1.438609
1.393511
1.422923
-0.159378
0.299359
...
0.266781
0.231151
0.531764
0.296167
-0.627288
0.000852
-1.335572
-0.790924
-0.629643
-0.846488
ZSWIM2
0.751703
0.331662
-0.350502
-0.290929
-0.470608
-0.395837
-0.066218
0.050482
1.358787
-0.413461
...
0.595547
-0.208666
0.078206
-0.187196
0.104178
1.564844
-0.216184
1.803188
-0.583961
-0.534671
ZSWIM3
0.875307
1.582899
1.476825
0.869885
1.276476
1.186219
1.303560
1.315013
1.336625
0.036904
...
0.082470
-0.665672
0.223621
-0.231535
-0.438534
0.830633
-0.997245
0.237536
-1.000210
-0.961062
ZSWIM4
0.393512
1.255511
1.062102
1.465903
-0.877055
-0.925123
-0.935640
-0.934302
2.521815
-0.433944
...
-1.338186
-1.255525
-1.246488
-0.363806
2.047569
0.571628
1.200225
0.071680
0.574713
0.640109
ZSWIM5
0.005656
0.373029
0.821921
1.351311
1.299190
1.183455
1.416636
1.329408
0.954076
-0.914544
...
-0.786825
-1.241399
-0.746524
-0.353784
1.302197
0.301077
-0.169702
0.704482
0.774260
-0.017853
ZSWIM6
-0.164698
-0.416140
-0.343751
-0.817253
0.578371
0.700177
0.550485
0.635327
0.856563
-0.693457
...
-0.121243
-0.281921
-0.325248
0.018470
0.064166
-0.478725
-0.201862
0.219078
0.043061
0.245409
ZSWIM7
-0.126135
-0.282466
-1.039411
-0.905026
-1.047481
-0.874176
-1.178496
-0.854338
0.639093
0.407007
...
0.944817
1.124877
0.973801
0.461125
-0.285708
0.674971
0.269460
-0.102437
0.071874
0.733805
ZSWIM8
0.212115
-0.004018
0.828299
0.668425
1.124810
0.799217
1.382162
0.945715
0.330958
-0.315954
...
-1.206422
-0.957529
-0.680614
-0.607566
-0.790710
-1.364014
0.436602
0.138183
0.525957
0.902036
ZSWIM8-AS1
-0.345071
-0.555438
0.141335
0.027159
0.067171
0.083667
-0.035536
0.135157
1.188639
0.070069
...
-0.622128
-0.685330
-0.234216
-0.181988
0.050139
-0.782298
0.543359
-0.165537
0.545716
0.183376
ZUFSP
0.345643
0.105403
0.073821
-0.204641
1.857305
1.775845
1.807027
1.762088
-1.144257
-0.020741
...
0.124343
0.332051
0.317355
-0.172142
-1.531111
0.007284
-0.525445
0.157817
-1.459855
0.069972
ZW10
-0.219718
-0.918331
-0.143605
-0.290296
0.539863
0.631755
0.581768
0.589174
-1.490887
-0.126129
...
-0.096162
-0.374694
0.346708
0.455669
1.709321
0.393079
0.167503
-0.886841
-0.004854
-0.425424
ZWILCH
1.555528
0.600544
-0.097465
-0.758729
-0.193265
-0.193110
-0.211693
-0.247740
-1.591464
0.476850
...
0.606949
0.840376
0.726342
0.446371
-0.385893
1.004661
-1.283622
1.251733
-1.337677
-0.860070
ZWINT
3.154188
3.326311
2.876940
1.813612
-1.054751
-1.103545
-1.080372
-1.171860
-1.682247
0.463577
...
0.605577
0.920985
1.071082
0.519482
1.304839
1.645788
-0.402724
-1.210645
-0.818001
-0.473278
ZXDA
-0.126906
0.826386
0.550746
0.249874
1.530419
1.389272
1.666272
1.274833
-0.339106
-0.417740
...
-0.328058
-1.353879
-1.234595
-0.572824
2.454783
0.436573
-0.291151
-0.083075
-0.798016
-0.787461
ZXDB
-0.317512
0.596062
0.279705
-0.192733
1.710448
1.462793
1.701114
1.368309
-0.057427
-0.401245
...
-0.154834
-1.568411
-1.042027
-0.481521
3.044441
0.318120
-0.201213
-0.500049
-0.808466
-0.953015
ZXDC
-0.240672
0.373022
0.360301
0.151329
1.871988
1.690685
1.907057
1.586018
0.135540
-0.574079
...
-0.678735
-0.439606
-0.615279
-0.537236
0.918003
-0.857600
0.115913
-0.441675
0.003835
-0.041147
ZYG11A
-0.496398
-0.408362
0.325555
-0.399866
0.158885
-0.094342
0.463648
-0.274908
0.461833
-0.112904
...
0.193362
0.529334
-0.337553
0.200088
1.856857
0.799540
-0.229960
1.313573
-0.187878
0.124338
ZYG11B
-0.856866
0.363752
0.701878
0.225195
0.337138
0.263296
0.194983
0.230569
0.035351
0.666866
...
0.613598
0.402680
0.625637
0.606332
0.203002
0.320450
-0.828416
0.596003
-0.777892
-1.464067
ZYX
-1.941816
-1.413384
-0.681255
-0.698285
0.872683
0.596888
0.844219
0.549523
-0.431510
0.566219
...
-0.289916
-0.376545
-0.691721
0.499645
1.475544
-1.015530
-0.411662
-1.925270
0.814704
-0.577433
ZZEF1
-0.015748
0.782454
0.743609
0.746774
1.108376
0.741788
1.013920
0.783854
0.730573
-0.470114
...
-1.410528
-1.645797
-1.399790
-0.238882
1.732430
-0.997546
0.807997
-0.759040
1.303571
0.961459
ZZZ3
-0.924901
-0.168036
0.108320
-0.289186
-1.591413
-1.766489
-1.738419
-1.816359
0.057552
0.018605
...
0.238555
-0.297488
0.059393
-0.186241
-0.710683
0.825393
0.694799
1.379935
0.469622
1.504594
20869 rows × 232 columns
In [5]:
deg_list = pd.read_csv('./data/gene_lists/deg_genelist.csv')
In [6]:
deg_list = deg_list.Symbol
In [7]:
print(deg_list.shape)
deg_list.head()
(361,)
Out[7]:
0 TRPC6P
1 RNU6-1095P
2 THRSP
3 NUPR1L
4 RPL23AP69
Name: Symbol, dtype: object
In [8]:
results = hba.generate_stats_table(exp, gene_list=deg_list)
You submitted a gene list with 361 genes.
275 of those genes are present in the HBA dataset.
Genes not found in our reference data: ['TRPC6P' 'RNU6-1095P' 'RPL23AP69' 'CALML3-AS1' 'RN7SKP234' 'RNU6-476P'
'MIR4458HG' 'ITM2BP1' 'RNA5SP323' 'RPSAP53' 'TPT1P10' 'MICF' 'TTC6'
'CDC42-IT1' 'TIMM9P2' 'VTRNA1-3' 'RNA5SP195' 'ZSWIM5P2' 'PPIEL' 'OR7E12P'
'MRPS31P2' 'PARP4P1' 'PSMD10P1' 'PGBD4P3' 'RNU6-703P' 'RPL21P43'
'RNU6-125P' 'ACTG1P11' 'FAM27E4' 'RP4-747G18.5' 'GRIK1-AS1' 'RPL39P5'
'OXCT1-AS1' 'CLRN1-AS1' 'ALMS1-IT1' 'LINC01123' 'UBE2Q2P6' 'MIR4712'
'KRR1P1' 'LINC00441' 'HMGN2P30' 'VCAN-AS1' 'KCTD21-AS1' 'HOXB-AS2'
'LINC00106' 'MRPL23-AS1' 'LINC01252' 'LMNTD2' 'FOXD4L1' 'C16orf47'
'RNF144A-AS1' 'LDHAP7' 'KLF7-IT1' 'LURAP1L-AS1' 'LINC00565' 'C4orf50'
'LINC00920' 'ARMC4P1' 'SLC9A7P1' 'OGFRP1' 'ERICH5' 'KRT8P50' 'MIR4435-2HG'
'ADGRE4P' 'SNAI1P1' 'LINC01340' 'MTND4P24' 'SETP17' 'LDHAP3' 'PSPHP1'
'LINC00941' 'NEFHP1' 'HCG24' 'CASC16' 'LMNTD1' 'RASGRF2-AS1' 'LINC01549'
'ROR1-AS1' 'AGAP1-IT1' 'HLA-U' 'EEF1DP5' 'MYO5BP2' 'ADGRF5P1' 'SLC10A5P1'
'AC093850.2' 'IMPA1P']
In [9]:
# take a look at top 10 brain areas
top_10_brain_areas = results.head(10)
top_10_brain_areas
Out[9]:
raw p_values
corrected_p
AUC
lateral parabrachial nucleus
0.000016
0.000117
0.575688
superior colliculus
0.000028
0.000189
0.573451
abducens nucleus
0.000036
0.000236
0.572450
pontine raphe nucleus
0.000039
0.000249
0.572082
medial parabrachial nucleus
0.000040
0.000249
0.572008
ventral tegmental area
0.000116
0.000706
0.567559
superior olivary complex
0.000143
0.000845
0.566641
motor nucleus of trigeminal nerve
0.000146
0.000845
0.566562
vestibular nuclei
0.000163
0.000900
0.566074
facial motor nucleus
0.000172
0.000930
0.565827
In [10]:
hba.make_ROC_plot(exp_df=exp, gene_list=deg_list, brain_area='lateral parabrachial nucleus')
Out[10]:
<matplotlib.axes._subplots.AxesSubplot at 0x10da9d400>
In [11]:
import plotly
plotly.offline.init_notebook_mode()
In [12]:
hba.interactive_distplot(exp_df=exp, brain_area='lateral parabrachial nucleus', disease_label='Anorexia', gene_list=deg_list)
In [13]:
from genometools.expression import ExpMatrix
from genometools.expression.cluster import bicluster
from plotly.offline import iplot
from plotly.offline.offline import _plot_html
import plotly.graph_objs as go
In [14]:
# get reference expression matrix, select the the genes that are in our gene list and cluster
expression = ExpMatrix.read_tsv('./data/processed/brainarea_vs_genes_exp_w_reannotations.tsv')
AN_exp = expression[expression.index.isin(deg_list)]
AN_exp = bicluster(AN_exp)
In [16]:
fig = AN_exp.loc[:, top_10_brain_areas.index].get_figure(
height=800, width=900,
emin=-3.0, emax=3.0,
margin_bottom=100,
heatmap_kw=dict(colorbar_label='Centered Expression<br>(log<sub>2</sub>-scale)'))
iplot(fig)
In [ ]:
Content source: derekhoward/HBAsets
Similar notebooks: