In [1]:
from pandas import *
import os, os.path
import csv

os.chdir('/home/will/HIVSystemsBio/')

In [3]:
cocaine_genes = read_csv('CocaineGeneList.csv')
hiv_genes = read_csv('HIVGeneList.csv', sep = '\t')
biomart_conv = read_csv('mart_export.txt', sep = '\t')

In [11]:
hiv_genes = merge(hiv_genes, biomart_conv,
                    left_on = 'Gene identifier',
                    right_on = 'Ensembl Gene ID',
                    how = 'inner')

In [24]:
cocaine_genes


Out[24]:
ID Species Gene Name
0 3725 Homo sapiens jun oncogene
1 805 Homo sapiens calmodulin 3 (phosphorylase kinase, delta); ca...
2 6347 Homo sapiens chemokine (C-C motif) ligand 2
3 5535 Homo sapiens protein phosphatase 3 (formerly 2B), regulator...
4 1390 Homo sapiens cAMP responsive element modulator
5 1387 Homo sapiens CREB binding protein
6 4311 Homo sapiens membrane metallo-endopeptidase
7 5747 Homo sapiens PTK2 protein tyrosine kinase 2
8 1385 Homo sapiens cAMP responsive element binding protein 1
9 348 Homo sapiens hypothetical LOC100129500; apolipoprotein E
10 161 Homo sapiens adaptor-related protein complex 2, alpha 2 sub...
11 2 Homo sapiens alpha-2-macroglobulin
12 6879 Homo sapiens TAF7 RNA polymerase II, TATA box binding prote...
13 3688 Homo sapiens integrin, beta 1 (fibronectin receptor, beta p...
14 7342 Homo sapiens upstream binding protein 1 (LBP-1a)
15 3689 Homo sapiens integrin, beta 2 (complement component 3 recep...
16 1984 Homo sapiens eukaryotic translation initiation factor 5A; e...
17 2023 Homo sapiens enolase 1, (alpha)
18 1278 Homo sapiens collagen, type I, alpha 2
19 4790 Homo sapiens nuclear factor of kappa light polypeptide gene...
20 5141 Homo sapiens phosphodiesterase 4A, cAMP-specific (phosphodi...
21 7514 Homo sapiens exportin 1 (CRM1 homolog, yeast)
22 5144 Homo sapiens phosphodiesterase 4D, cAMP-specific (phosphodi...
23 8945 Homo sapiens beta-transducin repeat containing
24 23198 Homo sapiens proteasome (prosome, macropain) activator subu...
25 51196 Homo sapiens phospholipase C, epsilon 1
26 108 Homo sapiens adenylate cyclase 2 (brain)
27 64919 Homo sapiens B-cell CLL/lymphoma 11B (zinc finger protein)
28 5137 Homo sapiens phosphodiesterase 1C, calmodulin-dependent 70kDa
29 1267 Homo sapiens 2',3'-cyclic nucleotide 3' phosphodiesterase
30 1445 Homo sapiens c-src tyrosine kinase
31 5153 Homo sapiens phosphodiesterase 1B, calmodulin-dependent
32 7529 Homo sapiens tyrosine 3-monooxygenase/tryptophan 5-monooxyg...
33 815 Homo sapiens calcium/calmodulin-dependent protein kinase II...
34 8850 Homo sapiens K(lysine) acetyltransferase 2B
35 3836 Homo sapiens karyopherin alpha 1 (importin alpha 5)
36 5567 Homo sapiens protein kinase, cAMP-dependent, catalytic, beta
37 915 Homo sapiens CD3d molecule, delta (CD3-TCR complex)
38 916 Homo sapiens CD3e molecule, epsilon (CD3-TCR complex)
39 917 Homo sapiens CD3g molecule, gamma (CD3-TCR complex)
40 3014 Homo sapiens H2A histone family, member X
41 2775 Homo sapiens guanine nucleotide binding protein (G protein)...
42 5813 Homo sapiens purine-rich element binding protein A
43 6387 Homo sapiens chemokine (C-X-C motif) ligand 12 (stromal cel...
44 54205 Homo sapiens cytochrome c, somatic
45 5335 Homo sapiens phospholipase C, gamma 1
46 6383 Homo sapiens syndecan 2
47 5573 Homo sapiens protein kinase, cAMP-dependent, regulatory, ty...
48 5575 Homo sapiens protein kinase, cAMP-dependent, regulatory, ty...
49 808 Homo sapiens calmodulin 3 (phosphorylase kinase, delta); ca...
50 7533 Homo sapiens tyrosine 3-monooxygenase/tryptophan 5-monooxyg...
51 5430 Homo sapiens polymerase (RNA) II (DNA directed) polypeptide...
52 2185 Homo sapiens PTK2B protein tyrosine kinase 2 beta
53 7532 Homo sapiens tyrosine 3-monooxygenase/tryptophan 5-monooxyg...
54 7531 Homo sapiens similar to 14-3-3 protein epsilon (14-3-3E) (M...
55 7057 Homo sapiens thrombospondin 1
56 2967 Homo sapiens general transcription factor IIH, polypeptide ...
57 3122 Homo sapiens major histocompatibility complex, class II, DR...
58 114 Homo sapiens adenylate cyclase 8 (brain)
59 3627 Homo sapiens chemokine (C-X-C motif) ligand 10
60 23236 Homo sapiens phospholipase C, beta 1 (phosphoinositide-spec...
61 112 Homo sapiens adenylate cyclase 6
62 7846 Homo sapiens tubulin, alpha 1a
63 5894 Homo sapiens v-raf-1 murine leukemia viral oncogene homolog 1
64 5578 Homo sapiens protein kinase C, alpha
65 111 Homo sapiens adenylate cyclase 5
66 7124 Homo sapiens tumor necrosis factor (TNF superfamily, member 2)
67 10399 Homo sapiens guanine nucleotide binding protein (G protein)...
68 5685 Homo sapiens proteasome (prosome, macropain) subunit, alpha...
69 1051 Homo sapiens CCAAT/enhancer binding protein (C/EBP), beta
70 2247 Homo sapiens fibroblast growth factor 2 (basic)
71 3107 Homo sapiens major histocompatibility complex, class I, C; ...
72 7852 Homo sapiens chemokine (C-X-C motif) receptor 4
73 6921 Homo sapiens similar to elongin C; transcription elongation...
74 5582 Homo sapiens protein kinase C, gamma
75 708 Homo sapiens complement component 1, q subcomponent binding...
76 5328 Homo sapiens plasminogen activator, urokinase
77 5581 Homo sapiens protein kinase C, epsilon
78 2902 Homo sapiens glutamate receptor, ionotropic, N-methyl D-asp...
79 2903 Homo sapiens glutamate receptor, ionotropic, N-methyl D-asp...
80 5880 Homo sapiens ras-related C3 botulinum toxin substrate 2 (rh...
81 2905 Homo sapiens glutamate receptor, ionotropic, N-methyl D-asp...
82 637 Homo sapiens BH3 interacting domain death agonist
83 2906 Homo sapiens glutamate receptor, ionotropic, N-methyl D-asp...
84 3303 Homo sapiens heat shock 70kDa protein 1A; heat shock 70kDa ...
85 5604 Homo sapiens mitogen-activated protein kinase kinase 1
86 2908 Homo sapiens nuclear receptor subfamily 3, group C, member ...
87 5690 Homo sapiens proteasome (prosome, macropain) subunit, beta ...
88 5925 Homo sapiens retinoblastoma 1
89 5692 Homo sapiens proteasome (prosome, macropain) subunit, beta ...
90 196883 Homo sapiens adenylate cyclase 4
91 5695 Homo sapiens proteasome (prosome, macropain) subunit, beta ...
92 5694 Homo sapiens proteasome (prosome, macropain) subunit, beta ...
93 836 Homo sapiens caspase 3, apoptosis-related cysteine peptidase
94 5590 Homo sapiens protein kinase C, zeta
95 375 Homo sapiens ADP-ribosylation factor 1
96 2885 Homo sapiens growth factor receptor-bound protein 2
97 5501 Homo sapiens protein phosphatase 1, catalytic subunit, gamm...
98 5499 Homo sapiens protein phosphatase 1, catalytic subunit, alph...
99 5595 Homo sapiens hypothetical LOC100271831; mitogen-activated p...
100 5594 Homo sapiens mitogen-activated protein kinase 1
101 5717 Homo sapiens proteasome (prosome, macropain) 26S subunit, n...
102 10971 Homo sapiens tyrosine 3-monooxygenase/tryptophan 5-monooxyg...
103 5599 Homo sapiens mitogen-activated protein kinase 8
104 1958 Homo sapiens early growth response 1
105 5105 Homo sapiens phosphoenolpyruvate carboxykinase 1 (soluble)
106 11103 Homo sapiens KRR1, small subunit (SSU) processome component...
107 867 Homo sapiens Cas-Br-M (murine) ecotropic retroviral transfo...
108 4734 Homo sapiens neural precursor cell expressed, developmental...
109 1026 Homo sapiens cyclin-dependent kinase inhibitor 1A (p21, Cip1)
110 7278 Homo sapiens tubulin, alpha 3d; tubulin, alpha 3c
111 5516 Homo sapiens protein phosphatase 2 (formerly 2A), catalytic...
112 5515 Homo sapiens protein phosphatase 2 (formerly 2A), catalytic...
113 1803 Homo sapiens dipeptidyl-peptidase 4
114 7277 Homo sapiens tubulin, alpha 4a
115 10262 Homo sapiens splicing factor 3b, subunit 4, 49kDa
116 60 Homo sapiens actin, beta
117 1175 Homo sapiens adaptor-related protein complex 2, sigma 1 sub...
118 143 Homo sapiens poly (ADP-ribose) polymerase family, member 4
119 6432 Homo sapiens splicing factor, arginine/serine-rich 7, 35kDa
120 5532 Homo sapiens protein phosphatase 3 (formerly 2B), catalytic...
121 5901 Homo sapiens RAN, member RAS oncogene family
122 7157 Homo sapiens tumor protein p53
123 351 Homo sapiens amyloid beta (A4) precursor protein
124 5530 Homo sapiens protein phosphatase 3 (formerly 2B), catalytic...
125 5524 Homo sapiens protein phosphatase 2A activator, regulatory s...
126 5522 Homo sapiens protein phosphatase 2 (formerly 2A), regulator...
127 5527 Homo sapiens protein phosphatase 2, regulatory subunit B', ...
128 8337 Homo sapiens histone cluster 2, H2aa3; histone cluster 2, H...
129 2534 Homo sapiens FYN oncogene related to SRC, FGR, YES
130 2547 Homo sapiens X-ray repair complementing defective repair in...
131 2353 Homo sapiens v-fos FBJ murine osteosarcoma viral oncogene h...
132 6426 Homo sapiens splicing factor, arginine/serine-rich 1

In [26]:
printed = set()
with open('out_gene_list.tsv', 'w') as handle:
    writer = csv.writer(handle, delimiter = '\t')
    for gene, direc in hiv_genes[['EntrezGene ID', 'Expression']].dropna().values:
        geneid = int(gene)
        group = 'HIV-' + direc
        tup = (geneid, group)
        if tup not in printed:
            writer.writerow(tup)
            printed.add(tup)
            
    for gene in cocaine_genes['ID'].values:
        geneid = int(gene)
        writer.writerow((geneid, 'Cocaine'))

In [27]:
both_genes = merge(hiv_genes, cocaine_genes,
                    left_on = 'Gene identifier',
                    right_on = 'ID',
                    how = 'inner')

In [28]:
both_genes


Out[28]:
<class 'pandas.core.frame.DataFrame'>
Int64Index: 0 entries
Data columns:
Gene symbol               0  non-null values
Gene identifier           0  non-null values
Organism                  0  non-null values
Experimental factor       0  non-null values
Factor value              0  non-null values
Experiment accession      0  non-null values
Array Design accession    0  non-null values
Expression                0  non-null values
P-value                   0  non-null values
Ensembl Gene ID           0  non-null values
Ensembl Transcript ID     0  non-null values
EntrezGene ID             0  non-null values
ID                        0  non-null values
Species                   0  non-null values
Gene Name                 0  non-null values
dtypes: float64(2), int64(1), object(12)

In [ ]: