In [1]:
import pandas as pd
import numpy as np
import scipy as sp
In [2]:
ls
Feb1_majority_label.py Jan27.py eluted_peptide_prediction.py
Feb5_hamming_isomap.py Jan28.py elution_compact.csv
Feb5_mds.py Jan30_exclude_hla_a2.py eval_dataset.py
Feb7_tumor_specific_antigens.py Jan31_bigram.py iedb.py
Feb7_tumor_vs_self.py LICENSE imma.py
IEDB_TCELL_HUMAN_IMM.txt README.md immuno_enhance.py
IEDB_TCELL_HUMAN_NON.txt Toxin_Protein_Table.txt mds_9mer_cytotoxicity.png
IEDB_duplicates.png Tumor_Mutant_Antigens_HLA_I.txt mds_cytotoxicity_9mer.png
IEDB_noisy_labels.png Tumor_Self_Antigens_HLA_I.txt pipeline/
IMMA2_imm.txt Untitled0.ipynb pipeline2/
IMMA2_non.txt amino_acid.py prop_of_mhc.py
Jan17.py amino_acid_properties.txt reduced_alphabet.py
Jan18.py conv.py s1.csv
Jan21_toxin.py danafarber_verified_antigens.txt s2.csv
Jan21_toxin_positional.py data/ seq_feature_tests.py
Jan22.py data.py toxin.py
Jan23.py df_tumor_antigens.py toxins.txt
Jan24.py dimensionality_reduction.py viz/
In [3]:
df = pd.read_csv("elution_compact.csv", skipinitialspace=True)
/usr/local/Cellar/python/2.7.6/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pandas/io/parsers.py:1070: DtypeWarning: Columns (5,11,12,20,21,22,23,24,25,30,31,33,34,35,36,37,38,43,44,46,47) have mixed types. Specify dtype option on import or set low_memory=False.
data = self._reader.read(nrows)
In [8]:
df.count()
Out[8]:
Elution ID 267658
Reference ID 267658
Reference Type 267658
PubMed ID 87865
Author 267658
Journal 87865
Year 267658
Epitope ID 267658
Epitope Object Type 267658
Epitope Object Description 267658
Epitope Linear Sequence 267364
Epitope Modification 1371
Epitope Modified Residues 1371
Epitope Starting Position 241740
Epitope Ending Position 241740
Epitope Source Molecule Accession 253700
Epitope Source Molecule Name 253700
Epitope Source Organism ID 253765
Epitope Source Organism Name 253765
Host Organism ID 11374
Host Organism Name 11374
Host Geolocation 36
In Vivo 1 Process Type 10831
In Vivo 1 Immunogen Object Type 352
In Vivo 1 Immunogen Object Description 352
In Vivo 1 Immunogen Linear Sequence 3
In Vivo 1 Immunogen Modification 0
In Vivo 1 Immunogen Modified Residues 0
In Vivo 1 Immunogen Starting Position 3
In Vivo 1 Immunogen Ending Position 3
In Vivo 1 Immunogen Source Molecule Accession 3
In Vivo 1 Immunogen Source Molecule Name 3
In Vivo 1 Immunogen Source Organism ID 352
In Vivo 1 Immunogen Source Organism Name 352
In Vivo 1 Immunogen Epitope Relation 354
In Vitro Process Type 1319
In Vitro Immunogen Object Type 1313
In Vitro Immunogen Object Description 1313
In Vitro Immunogen Linear Sequence 71
In Vitro Immunogen Modification 0
In Vitro Immunogen Modified Residues 0
In Vitro Immunogen Starting Position 65
In Vitro Immunogen Ending Position 65
In Vitro Immunogen Source Molecule Accession 65
In Vitro Immunogen Source Molecule Name 65
In Vitro Immunogen Source Organism ID 1307
In Vitro Immunogen Source Organism Name 1307
In Vitro Immunogen Epitope Relation 1313
MHC Allele ID 267119
MHC Allele Name 267119
Method/Technique ID 267658
Method/Technique 267658
Assay Group 267658
Qualitative Measure 267651
Unnamed: 54 0
Length: 55, dtype: int64
In [9]:
df.head()
Out[9]:
Elution ID
Reference ID
Reference Type
PubMed ID
Author
Journal
Year
Epitope ID
Epitope Object Type
Epitope Object Description
Epitope Linear Sequence
Epitope Modification
Epitope Modified Residues
Epitope Starting Position
Epitope Ending Position
Epitope Source Molecule Accession
Epitope Source Molecule Name
Epitope Source Organism ID
Epitope Source Organism Name
Host Organism ID
0
26
274
Literature
15448372
Yi-Hsiang Huang; Mi-Hua Tao; Cheng-po Hu; Wan-...
J Gen Virol
2004
31803
Linear peptide
KLEDLERDL
KLEDLERDL
NaN
NaN
26
34
11022742
large delta antigen
10000523
Hepatitis delta virus TW2667
NaN
...
1
115
299
Literature
15140958
Yue-Dan Wang; Wan-Yee Fion Sin; Guo-Bing Xu; H...
J Virol
2004
36724
Linear peptide
LITGRLQSL
LITGRLQSL
NaN
NaN
978
986
30173397
Spike glycoprotein precursor
227859
SARS coronavirus
NaN
...
2
143
304
Literature
15102821
Alberto Diaz-Qui�onez; Natalia Martin-Orozco; ...
Infect Immun
2004
66114
Linear peptide
TRVAFAGL
TRVAFAGL
NaN
NaN
94
101
7428872
outer membrane porin C precursor - Salmonella ...
90371
Salmonella enterica subsp. enterica serovar Ty...
NaN
...
3
144
304
Literature
15102821
Alberto Diaz-Qui�onez; Natalia Martin-Orozco; ...
Infect Immun
2004
55063
Linear peptide
RNTDFFGL
RNTDFFGL
NaN
NaN
153
160
7428872
outer membrane porin C precursor - Salmonella ...
90371
Salmonella enterica subsp. enterica serovar Ty...
NaN
...
4
247
329
Literature
15104671
C Sylvester-Hvid; M Nielsen; K Lamberth; G R�d...
Tissue Antigens
2004
14829
Linear peptide
EVMPVSMAK
EVMPVSMAK
NaN
NaN
707
715
30173397
Spike glycoprotein precursor
227859
SARS coronavirus
NaN
...
5 rows × 55 columns
In [10]:
df['Host Organism Name'].value_counts()
Out[10]:
Homo sapiens 7847
Mus musculus NOD 1095
Mus musculus C57BL/6 852
Mus musculus 714
Mus musculus C57BL/6N 587
B6.ERAAP null 160
Mus musculus B6.P 56
Gallus gallus 34
Mus musculus BALB/c 10
Mus musculus BALB.B 3
Mus musculus SNF1 3
Sus scrofa 2
Mus musculus B10 X 129 2
Mus musculus C3H 2
Mus musculus SV40 Tg 2
Mus musculus C57BL/10 2
Rattus norvegicus Lewis 1
Mus musculus NOD/Lt 1
Pan troglodytes 1
dtype: int64
In [11]:
df["Epitope Source Organism Name"].value_counts()
Out[11]:
Vaccinia virus WR 35120
Homo sapiens 22361
Phleum pratense 17819
Mycobacterium tuberculosis 9989
SARS coronavirus Tor2 9290
Zaire ebolavirus 7069
Mus musculus 4992
Lymphocytic choriomeningitis virus (strain Armstrong) 4251
Giardia lamblia ATCC 50803 4204
SARS coronavirus 3914
Vaccinia virus Copenhagen 3035
Sabia virus 2881
Junin virus 2852
Guanarito virus 2759
Hepatitis B virus 2656
...
Influenza A virus (A/Chicken/Nanchang/2-220/2001(H3N6)) 1
Hepatitis C virus genotype 4 1
Hepatitis C virus genotype 5 1
Plasmodium cynomolgi strain Berok 1
Influenza A virus (A/New York/348/2003(H1N1)) 1
Mumps virus 1
Influenza A virus (A/Weiss/1943(H1N1)) 1
Zea mays 1
Moorella thermoacetica ATCC 39073 1
Influenza A virus (A/turkey/Wisconsin/1968(H5N9)) 1
Orcinus orca 1
Encephalomyocarditis virus 1
Autographa californica nucleopolyhedrovirus 1
Influenza A virus (A/Kitakyushu/93(H3N2)) 1
Human herpesvirus 5 strain Merlin 1
Length: 1456, dtype: int64
In [28]:
df["MHC Allele Name"].value_counts()
Out[28]:
HLA-A*02:01 23550
HLA-DRB1*01:01 9635
HLA-A*03:01 7921
HLA-A*11:01 7214
HLA-A*68:02 6455
HLA-A*02:03 6312
HLA-B*15:01 6029
HLA-A*02:06 5774
HLA-B*07:02 5601
HLA-A*31:01 5539
HLA-A*01:01 5485
H-2-Db 5295
H-2-Kb 5215
HLA-A*02:02 5121
HLA-DRB1*04:01 5005
...
HLA-DQ5 1
HLA-Cw3 1
HLA-DQ9 1
bMR1 1
chCD1-2 1
HLA-DP E69K mutant 1
HLA-A*02:01 W167A mutant 1
HLA-A*02:01 T163A mutant 1
H-2-Dbm13 1
HLA-B53 1
BoLA-DQ 1
RT1-Ac 1
HLA-DQA1*01:01/DQB1*05:03 1
HLA-B*35:02 1
HLA-DRB1*13:05 1
Length: 472, dtype: int64
In [22]:
df2 = pd.read_csv("tcell_compact.csv", skipinitialspace=True)
In [452]:
reload(iedb)
import iedb
min_count = 0
hla_type = None
In [453]:
tcell = iedb.load_tcell(min_count=min_count, hla_type = hla_type)
Class I MHC Entries 60202
Class II MHC Entries 95524
Human entries 133735
Human Class I MHCs 54432
Human Class II MHCs 65320
Dropping 3824 null sequences
Dropping 83 bad sequences
Filtered sequences epitope sequences 132503
In [454]:
mhc = iedb.load_mhc(min_count=min_count, hla_type = hla_type)
Class I MHC Entries 166713
Class II MHC Entries 72612
Human entries 239619
Human Class I MHCs 166708
Human Class II MHCs 72612
Dropping 294 null sequences
Dropping 45 bad sequences
Filtered sequences epitope sequences 239475
In [455]:
df = pd.DataFrame({'mhc':mhc, 'tcell':tcell})
df.index.name = 'epitope'
In [456]:
both = ~(df.mhc.isnull() | df.tcell.isnull())
both.sum()
Out[456]:
9493
In [457]:
df[both]
Out[457]:
mhc
tcell
epitope
AAAGAEAGKATTEEQ
0.240000
0.000000
AAAGLAAAAPLESRQ
0.828571
0.500000
AAALGIGTDSVILIKCDERG
0.000000
0.000000
AAASVPAADKFKTFE
0.840000
0.000000
AAATATATAAVGAAT
0.400000
0.000000
AAAWYLWEV
1.000000
1.000000
AACIVGCENV
0.000000
0.000000
AADHAAPEDKYEAFV
0.400000
0.000000
AADHCPVVEVNGVTI
0.000000
0.000000
AAEAMEVA
1.000000
0.000000
AAEKLLEKVPSDVLEMYKAI
0.857143
1.000000
AAESSSKAALTSKLD
0.600000
0.000000
AAFDRKSDAK
1.000000
0.666667
AAFEDLRVL
1.000000
1.000000
AAFKIAATAANSAPA
1.000000
1.000000
AAFNNAIKAGTGGAY
0.600000
0.125000
AAFTSSSKAATAKAP
0.800000
0.000000
AAGAATTAAGAASGA
0.480000
0.000000
AAGAAVKGV
0.166667
0.000000
AAGGHNAVFNFPPNG
0.000000
0.333333
AAGIGILTV
0.909091
0.882353
AAGLQDCTMLV
0.000000
0.000000
AAGTAAQAAVVRFQE
0.666667
0.750000
AAGVPPADKYRTFVA
0.600000
0.000000
AAHARFVAA
1.000000
1.000000
AAIGLSMAGSSAMILAAYHP
1.000000
0.666667
AAKDASIPTATIRRH
0.000000
0.000000
AAKEDFLGCLVKEIP
0.760000
0.000000
AAKPAAAATATATAA
0.320000
0.000000
AALAAAAGVPPADKY
0.720000
0.125000
AALFYTHRFNASGCS
0.000000
1.000000
AALGLWLSV
1.000000
0.000000
AALGVATAAQITAGI
0.000000
1.000000
AALLVVAVGLRV
0.500000
1.000000
AALLVVAVGLRVVCAKYALA
0.800000
1.000000
AANIRALNVPPSLDCRY
0.000000
0.000000
AANKQKQELDEISTN
0.074074
0.333333
AANPHATFGV
1.000000
0.000000
AANWILRGTSFVYVP
0.769231
1.000000
AAPAAGYTPATPAAP
0.440000
0.000000
AAPANDKFTVFEAAF
0.920000
0.250000
AAPANPGLIIGA
0.400000
1.000000
AAPGAGYTPATPAAP
0.400000
0.000000
AAPLSWSKDIYNYME
0.880000
0.000000
AAQNRFTAIATTQQAGSNNL
1.000000
1.000000
AARDRFPGL
1.000000
1.000000
AARLFKAFILDGDKL
0.971429
1.000000
AARVTAILSSLTVTQLLRRL
1.000000
0.000000
AASGAATVAAGGYKV
0.360000
0.000000
AASGADGTYDITKLG
0.280000
0.000000
AASTLLYATV
1.000000
0.500000
AATAAAAAAVDRGDP
0.333333
0.000000
AATAVMAASASAQSVPASRQ
1.000000
1.000000
AATEVELKERKHRIEDAVRN
0.000000
0.000000
AATGAATAATGGYKV
0.400000
0.055556
AAVDLSHFL
0.000000
0.000000
AAVEELKAL
1.000000
0.000000
AAVGATPEAKFDSFV
0.640000
0.000000
AAVLFAATAAAAAAV
0.777778
0.000000
AAVLLLVTHY
1.000000
1.000000
...
...
9493 rows × 2 columns
In [458]:
dfb = df[both]
In [459]:
pylab.scatter(dfb.tcell, dfb.mhc)
Out[459]:
<matplotlib.collections.PathCollection at 0x116e91550>
In [460]:
heatmap, xedges, yedges = np.histogram2d(dfb.tcell, dfb.mhc, bins=10)
extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
plt.clf()
plt.imshow(heatmap.T, extent=extent, origin='lower')
plt.colorbar()
plt.show()
In [461]:
plt.hexbin(dfb.tcell, dfb.mhc, gridsize=15)
Out[461]:
<matplotlib.collections.PolyCollection at 0x12a1d09d0>
In [393]:
x = iedb.load_tcell(min_count=5, hla_type = 1)
y = iedb.load_mhc(min_count=5, hla_type = 1)
df = pd.DataFrame({'x':x, 'y':y})
df.index.name = 'epitope'
both = ~(df.x.isnull() | df.y.isnull())
print "COUNT", both.sum()
dfb = df[both]
Class I MHC Entries 60202
Class II MHC Entries 95524
Human entries 133735
Human Class I MHCs 54432
Human Class II MHCs 65320
Dropping 3824 null sequences
Dropping 83 bad sequences
HLA A-2 count: 3709
Filtered sequences epitope sequences 54315
Class I MHC Entries 166713
Class II MHC Entries 72612
Human entries 239619
Human Class I MHCs 166708
Human Class II MHCs 72612
Dropping 294 null sequences
Dropping 45 bad sequences
HLA A-2 count: 2254
Filtered sequences epitope sequences 166669
COUNT 308
In [394]:
plt.scatter(dfb.x, dfb.y)
Out[394]:
<matplotlib.collections.PathCollection at 0x114cd6ed0>
In [395]:
plt.hexbin(dfb.x, dfb.y, gridsize=6)
plt.xlabel("t-cell response")
plt.ylabel("mhc binding")
plt.title("6+ samples, MHC I")
plt.colorbar()
Out[395]:
<matplotlib.colorbar.Colorbar instance at 0x10f7d27e8>
In [429]:
reload(iedb)
import iedb
In [430]:
tcell2 = iedb.load_tcell(min_count=2, key_by_allele=True)
Class I MHC Entries 60202
Class II MHC Entries 95524
Human entries 133735
Human Class I MHCs 54432
Human Class II MHCs 65320
Dropping 3824 null sequences
Dropping 83 bad sequences
Filtered sequences epitope sequences 132503
In [431]:
mhc2 = iedb.load_mhc(min_count=2, key_by_allele=True)
Class I MHC Entries 166713
Class II MHC Entries 72612
Human entries 239619
Human Class I MHCs 166708
Human Class II MHCs 72612
Dropping 294 null sequences
Dropping 45 bad sequences
Filtered sequences epitope sequences 239475
In [432]:
df_combined = pd.DataFrame({'mhc':mhc2, 'tcell':tcell2})
both = ~(df_combined.mhc.isnull() | df_combined.tcell.isnull())
print "COUNT", both.sum()
df_combined_filt = df_combined[both]
COUNT 369
In [433]:
#df_combined_filt.to_csv("mhc_vs_tcell_allele.csv")
In [434]:
df_combined_filt
Out[434]:
mhc
tcell
Epitope Linear Sequence
MHC Allele Name
AAGIGILTV
HLA-A*02:01
0.900000
0.909091
AGFKGEQGPKGEP
HLA-DR4
1.000000
1.000000
AIMDKNIIL
HLA-A*02:01
1.000000
0.963636
ALFGIKLPAL
HLA-A*02:01
1.000000
0.000000
ALMPLYACI
HLA-A*02:01
1.000000
0.400000
ALNIALVAV
HLA-A*02:01
1.000000
1.000000
ALPHIIDEV
HLA-A*02:01
1.000000
0.636364
ALSTGLIHL
HLA-A*02:01
1.000000
0.727273
ALWGFFPVL
HLA-A*02:01
1.000000
1.000000
ALWGPDPAAA
HLA-A*02:01
1.000000
0.909091
AMASTEGNV
HLA-A*02:01
1.000000
1.000000
AMDSNTLEL
HLA-A*02:01
1.000000
1.000000
AMPGVLSYV
HLA-A*02:01
1.000000
1.000000
AMSTTDLEA
HLA-A*02:01
0.750000
0.250000
APASSLLPAL
HLA-B*07:02
1.000000
0.250000
ARKLLLDNL
HLA-B*27:05
1.000000
1.000000
AVFDRKSDAK
HLA-A*11:01
1.000000
0.866667
HLA-A11
1.000000
0.852941
AVYGNIKHK
HLA-A*11:01
0.857143
1.000000
AVYNFATCGI
HLA-A*02:01
1.000000
0.800000
CINGVCWTV
HLA-A*02:01
0.600000
0.960000
CLFKDWEEL
HLA-A*02:01
1.000000
0.750000
CLGGLLTMV
HLA-A*02:01
1.000000
0.958333
HLA-A2
0.857143
0.913043
CPSQEPMSIYVY
HLA-B*35:08
1.000000
1.000000
CTELKLSDY
HLA-A*01:01
1.000000
0.800000
HLA-A1
1.000000
0.764706
CVNGVCWTV
HLA-A*02:01
0.666667
0.863636
DLKPDNILL
HLA-A*02:01
0.750000
0.666667
DLMGYIPLV
HLA-A*02:01
1.000000
0.880952
DMWEHAFYL
HLA-A*02:01
1.000000
0.888889
DSNIMNSINNVMDEIDFFEK
HLA-DQA1*03:01/DQB1*03:02
1.000000
1.000000
DVMNILLQYVVKSFDRSTKV
HLA-DRB1*04:01
1.000000
0.875000
EAAGIGILTV
HLA-A*02:01
0.857143
1.000000
EENLLDFVRF
HLA-B*44:05
1.000000
1.000000
EEVDMTPADALDDFD
HLA-DQB1*03:02
1.000000
1.000000
ELAGIGILTV
HLA-A*02:01
1.000000
0.981481
ELRSRYWAI
HLA-B8
1.000000
0.925926
ENPVVHFFANIVTPR
HLA-DRB1*15:01
1.000000
1.000000
ENPVVHFFKNIVTPR
HLA-DR2
1.000000
0.971429
HLA-DRB1*15:01
1.000000
0.971429
EPLITKLIL
HLA-B*07:02
0.666667
0.000000
EPLPQGQLTAY
HLA-B*35:01
1.000000
0.923077
ETLLRAVESYLLAHS
HLA-DRB1*01:01
1.000000
1.000000
HLA-DRB1*04:01
1.000000
1.000000
HLA-DRB1*15:01
1.000000
1.000000
EYLVSFGVW
HLA-A*24:02
1.000000
0.538462
FAPGFFPYL
HLA-A*02:01
1.000000
0.666667
FATGIGIITV
HLA-A*02:01
1.000000
1.000000
FIDSYICQV
HLA-A*02:01
1.000000
0.809524
FILGIIITV
HLA-A*02:01
1.000000
0.900000
FIVVATAAV
HLA-A*02:01
1.000000
0.250000
FLCKQYLNL
HLA-A*02:01
1.000000
0.333333
FLFWFLKSGA
HLA-A*02:01
1.000000
0.000000
FLIVSLCPT
HLA-A*02:01
1.000000
0.750000
FLLLADARV
HLA-A*02:01
1.000000
0.750000
FLLPLTSLV
HLA-A*02:01
1.000000
0.400000
FLLPLTSLVI
HLA-A*02:01
1.000000
0.200000
FLLSLGIHL
HLA-A*02:01
1.000000
0.833333
FLLTRILTI
HLA-A*02:01
1.000000
0.961538
...
...
369 rows × 2 columns
In [435]:
plt.hexbin(df_combined_filt.tcell, df_combined_filt.mhc, gridsize=6)
Out[435]:
<matplotlib.collections.PolyCollection at 0x1171d9710>
In [437]:
reload(iedb)
df_combined = iedb.load_tcell_vs_mhc(hla_type=1, min_count = 3, key_by_allele=True)
df_combined
Class I MHC Entries 166713
Class II MHC Entries 72612
Human entries 239619
Human Class I MHCs 166708
Human Class II MHCs 72612
Dropping 294 null sequences
Dropping 45 bad sequences
Filtered sequences epitope sequences 166669
Class I MHC Entries 60202
Class II MHC Entries 95524
Human entries 133735
Human Class I MHCs 54432
Human Class II MHCs 65320
Dropping 3824 null sequences
Dropping 83 bad sequences
Filtered sequences epitope sequences 54315
Out[437]:
mhc
tcell
Epitope Linear Sequence
MHC Allele Name
AAGIGILTV
HLA-A*02:01
0.900000
0.909091
AIMDKNIIL
HLA-A*02:01
1.000000
0.963636
ALMPLYACI
HLA-A*02:01
1.000000
0.400000
ALNIALVAV
HLA-A*02:01
1.000000
1.000000
ALSTGLIHL
HLA-A*02:01
1.000000
0.727273
ALWGFFPVL
HLA-A*02:01
1.000000
1.000000
ALWGPDPAAA
HLA-A*02:01
1.000000
0.909091
AMASTEGNV
HLA-A*02:01
1.000000
1.000000
AMPGVLSYV
HLA-A*02:01
1.000000
1.000000
AMSTTDLEA
HLA-A*02:01
0.750000
0.250000
AVYNFATCGI
HLA-A*02:01
1.000000
0.800000
CINGVCWTV
HLA-A*02:01
0.600000
0.960000
CLFKDWEEL
HLA-A*02:01
1.000000
0.750000
CLGGLLTMV
HLA-A*02:01
1.000000
0.958333
HLA-A2
0.857143
0.913043
CPSQEPMSIYVY
HLA-B*35:08
1.000000
1.000000
CTELKLSDY
HLA-A*01:01
1.000000
0.800000
HLA-A1
1.000000
0.764706
DLMGYIPLV
HLA-A*02:01
1.000000
0.880952
EAAGIGILTV
HLA-A*02:01
0.857143
1.000000
ELAGIGILTV
HLA-A*02:01
1.000000
0.981481
ELRSRYWAI
HLA-B8
1.000000
0.925926
EPLPQGQLTAY
HLA-B*35:01
1.000000
0.923077
FIDSYICQV
HLA-A*02:01
1.000000
0.809524
FILGIIITV
HLA-A*02:01
1.000000
0.900000
FLIVSLCPT
HLA-A*02:01
1.000000
0.750000
FLLLADARV
HLA-A*02:01
1.000000
0.750000
FLLSLGIHL
HLA-A*02:01
1.000000
0.833333
FLLTRILTI
HLA-A*02:01
1.000000
0.961538
FLPIIFDAFL
HLA-A*02:01
1.000000
0.250000
FLPSDFFPSI
HLA-A*02:01
1.000000
0.769231
FLPSDFFPSV
HLA-A*02:01
1.000000
0.915254
HLA-A*02:06
1.000000
0.500000
FLRGRAYGL
HLA-B8
1.000000
0.990991
FLTSVINRV
HLA-A*02:01
1.000000
0.888889
FLVIAINAM
HLA-A*02:01
0.600000
0.250000
FMYSDFHFI
HLA-A*02:01
1.000000
0.909091
FPTKDVAL
HLA-B*35:08
1.000000
1.000000
FPYEGGKVF
HLA-B*07:02
0.875000
0.750000
FTASLFLHL
HLA-A*02:01
0.500000
0.428571
FVDTMSIYI
HLA-A*02:01
1.000000
0.000000
FVDYNFTIV
HLA-A*02:01
1.000000
1.000000
GILGFVFTL
HLA-A*02:01
1.000000
0.978102
HLA-A2
0.833333
1.000000
GLCTLVAML
HLA-A2
1.000000
0.978723
GLFDFVNFV
HLA-A*02:01
1.000000
0.857143
GLLDRLYDL
HLA-A*02:01
1.000000
0.250000
GLMWLSYFV
HLA-A*02:01
0.857143
1.000000
GLNDYLHSV
HLA-A*02:01
1.000000
0.888889
GLSPTVWLSV
HLA-A*02:01
1.000000
0.875000
GLSRYVARL
HLA-A*02:01
1.000000
0.857143
GLYSSTVPV
HLA-A*02:01
1.000000
0.500000
GMSRIGMEV
HLA-A*02:01
0.833333
1.000000
HLSLRGLPV
HLA-A*02:01
0.600000
0.750000
HLVEALYLV
HLA-A*02:01
1.000000
0.769231
HLYSHPIIL
HLA-A*02:01
1.000000
0.800000
HMWNFISGI
HLA-A*02:01
1.000000
0.750000
HVDGKILFV
HLA-A*02:01
1.000000
0.800000
IIIPFIAYFV
HLA-A*02:01
1.000000
1.000000
ILAGYGAGV
HLA-A*02:01
1.000000
0.565217
...
...
160 rows × 2 columns
In [451]:
plt.hexbin(df_combined.tcell, df_combined.mhc, gridsize=10)
Out[451]:
<matplotlib.collections.PolyCollection at 0x12a11a1d0>
In [443]:
plt.hexbin?
In [ ]:
Content source: hammerlab/immuno_research
Similar notebooks: