In [1]:
from pandas import *
import os, os.path
import csv
os.chdir('/home/will/HIVSystemsBio/')
In [3]:
cocaine_genes = read_csv('CocaineGeneList.csv')
hiv_genes = read_csv('HIVGeneList.csv', sep = '\t')
biomart_conv = read_csv('mart_export.txt', sep = '\t')
In [11]:
hiv_genes = merge(hiv_genes, biomart_conv,
left_on = 'Gene identifier',
right_on = 'Ensembl Gene ID',
how = 'inner')
In [24]:
cocaine_genes
Out[24]:
ID
Species
Gene Name
0
3725
Homo sapiens
jun oncogene
1
805
Homo sapiens
calmodulin 3 (phosphorylase kinase, delta); ca...
2
6347
Homo sapiens
chemokine (C-C motif) ligand 2
3
5535
Homo sapiens
protein phosphatase 3 (formerly 2B), regulator...
4
1390
Homo sapiens
cAMP responsive element modulator
5
1387
Homo sapiens
CREB binding protein
6
4311
Homo sapiens
membrane metallo-endopeptidase
7
5747
Homo sapiens
PTK2 protein tyrosine kinase 2
8
1385
Homo sapiens
cAMP responsive element binding protein 1
9
348
Homo sapiens
hypothetical LOC100129500; apolipoprotein E
10
161
Homo sapiens
adaptor-related protein complex 2, alpha 2 sub...
11
2
Homo sapiens
alpha-2-macroglobulin
12
6879
Homo sapiens
TAF7 RNA polymerase II, TATA box binding prote...
13
3688
Homo sapiens
integrin, beta 1 (fibronectin receptor, beta p...
14
7342
Homo sapiens
upstream binding protein 1 (LBP-1a)
15
3689
Homo sapiens
integrin, beta 2 (complement component 3 recep...
16
1984
Homo sapiens
eukaryotic translation initiation factor 5A; e...
17
2023
Homo sapiens
enolase 1, (alpha)
18
1278
Homo sapiens
collagen, type I, alpha 2
19
4790
Homo sapiens
nuclear factor of kappa light polypeptide gene...
20
5141
Homo sapiens
phosphodiesterase 4A, cAMP-specific (phosphodi...
21
7514
Homo sapiens
exportin 1 (CRM1 homolog, yeast)
22
5144
Homo sapiens
phosphodiesterase 4D, cAMP-specific (phosphodi...
23
8945
Homo sapiens
beta-transducin repeat containing
24
23198
Homo sapiens
proteasome (prosome, macropain) activator subu...
25
51196
Homo sapiens
phospholipase C, epsilon 1
26
108
Homo sapiens
adenylate cyclase 2 (brain)
27
64919
Homo sapiens
B-cell CLL/lymphoma 11B (zinc finger protein)
28
5137
Homo sapiens
phosphodiesterase 1C, calmodulin-dependent 70kDa
29
1267
Homo sapiens
2',3'-cyclic nucleotide 3' phosphodiesterase
30
1445
Homo sapiens
c-src tyrosine kinase
31
5153
Homo sapiens
phosphodiesterase 1B, calmodulin-dependent
32
7529
Homo sapiens
tyrosine 3-monooxygenase/tryptophan 5-monooxyg...
33
815
Homo sapiens
calcium/calmodulin-dependent protein kinase II...
34
8850
Homo sapiens
K(lysine) acetyltransferase 2B
35
3836
Homo sapiens
karyopherin alpha 1 (importin alpha 5)
36
5567
Homo sapiens
protein kinase, cAMP-dependent, catalytic, beta
37
915
Homo sapiens
CD3d molecule, delta (CD3-TCR complex)
38
916
Homo sapiens
CD3e molecule, epsilon (CD3-TCR complex)
39
917
Homo sapiens
CD3g molecule, gamma (CD3-TCR complex)
40
3014
Homo sapiens
H2A histone family, member X
41
2775
Homo sapiens
guanine nucleotide binding protein (G protein)...
42
5813
Homo sapiens
purine-rich element binding protein A
43
6387
Homo sapiens
chemokine (C-X-C motif) ligand 12 (stromal cel...
44
54205
Homo sapiens
cytochrome c, somatic
45
5335
Homo sapiens
phospholipase C, gamma 1
46
6383
Homo sapiens
syndecan 2
47
5573
Homo sapiens
protein kinase, cAMP-dependent, regulatory, ty...
48
5575
Homo sapiens
protein kinase, cAMP-dependent, regulatory, ty...
49
808
Homo sapiens
calmodulin 3 (phosphorylase kinase, delta); ca...
50
7533
Homo sapiens
tyrosine 3-monooxygenase/tryptophan 5-monooxyg...
51
5430
Homo sapiens
polymerase (RNA) II (DNA directed) polypeptide...
52
2185
Homo sapiens
PTK2B protein tyrosine kinase 2 beta
53
7532
Homo sapiens
tyrosine 3-monooxygenase/tryptophan 5-monooxyg...
54
7531
Homo sapiens
similar to 14-3-3 protein epsilon (14-3-3E) (M...
55
7057
Homo sapiens
thrombospondin 1
56
2967
Homo sapiens
general transcription factor IIH, polypeptide ...
57
3122
Homo sapiens
major histocompatibility complex, class II, DR...
58
114
Homo sapiens
adenylate cyclase 8 (brain)
59
3627
Homo sapiens
chemokine (C-X-C motif) ligand 10
60
23236
Homo sapiens
phospholipase C, beta 1 (phosphoinositide-spec...
61
112
Homo sapiens
adenylate cyclase 6
62
7846
Homo sapiens
tubulin, alpha 1a
63
5894
Homo sapiens
v-raf-1 murine leukemia viral oncogene homolog 1
64
5578
Homo sapiens
protein kinase C, alpha
65
111
Homo sapiens
adenylate cyclase 5
66
7124
Homo sapiens
tumor necrosis factor (TNF superfamily, member 2)
67
10399
Homo sapiens
guanine nucleotide binding protein (G protein)...
68
5685
Homo sapiens
proteasome (prosome, macropain) subunit, alpha...
69
1051
Homo sapiens
CCAAT/enhancer binding protein (C/EBP), beta
70
2247
Homo sapiens
fibroblast growth factor 2 (basic)
71
3107
Homo sapiens
major histocompatibility complex, class I, C; ...
72
7852
Homo sapiens
chemokine (C-X-C motif) receptor 4
73
6921
Homo sapiens
similar to elongin C; transcription elongation...
74
5582
Homo sapiens
protein kinase C, gamma
75
708
Homo sapiens
complement component 1, q subcomponent binding...
76
5328
Homo sapiens
plasminogen activator, urokinase
77
5581
Homo sapiens
protein kinase C, epsilon
78
2902
Homo sapiens
glutamate receptor, ionotropic, N-methyl D-asp...
79
2903
Homo sapiens
glutamate receptor, ionotropic, N-methyl D-asp...
80
5880
Homo sapiens
ras-related C3 botulinum toxin substrate 2 (rh...
81
2905
Homo sapiens
glutamate receptor, ionotropic, N-methyl D-asp...
82
637
Homo sapiens
BH3 interacting domain death agonist
83
2906
Homo sapiens
glutamate receptor, ionotropic, N-methyl D-asp...
84
3303
Homo sapiens
heat shock 70kDa protein 1A; heat shock 70kDa ...
85
5604
Homo sapiens
mitogen-activated protein kinase kinase 1
86
2908
Homo sapiens
nuclear receptor subfamily 3, group C, member ...
87
5690
Homo sapiens
proteasome (prosome, macropain) subunit, beta ...
88
5925
Homo sapiens
retinoblastoma 1
89
5692
Homo sapiens
proteasome (prosome, macropain) subunit, beta ...
90
196883
Homo sapiens
adenylate cyclase 4
91
5695
Homo sapiens
proteasome (prosome, macropain) subunit, beta ...
92
5694
Homo sapiens
proteasome (prosome, macropain) subunit, beta ...
93
836
Homo sapiens
caspase 3, apoptosis-related cysteine peptidase
94
5590
Homo sapiens
protein kinase C, zeta
95
375
Homo sapiens
ADP-ribosylation factor 1
96
2885
Homo sapiens
growth factor receptor-bound protein 2
97
5501
Homo sapiens
protein phosphatase 1, catalytic subunit, gamm...
98
5499
Homo sapiens
protein phosphatase 1, catalytic subunit, alph...
99
5595
Homo sapiens
hypothetical LOC100271831; mitogen-activated p...
100
5594
Homo sapiens
mitogen-activated protein kinase 1
101
5717
Homo sapiens
proteasome (prosome, macropain) 26S subunit, n...
102
10971
Homo sapiens
tyrosine 3-monooxygenase/tryptophan 5-monooxyg...
103
5599
Homo sapiens
mitogen-activated protein kinase 8
104
1958
Homo sapiens
early growth response 1
105
5105
Homo sapiens
phosphoenolpyruvate carboxykinase 1 (soluble)
106
11103
Homo sapiens
KRR1, small subunit (SSU) processome component...
107
867
Homo sapiens
Cas-Br-M (murine) ecotropic retroviral transfo...
108
4734
Homo sapiens
neural precursor cell expressed, developmental...
109
1026
Homo sapiens
cyclin-dependent kinase inhibitor 1A (p21, Cip1)
110
7278
Homo sapiens
tubulin, alpha 3d; tubulin, alpha 3c
111
5516
Homo sapiens
protein phosphatase 2 (formerly 2A), catalytic...
112
5515
Homo sapiens
protein phosphatase 2 (formerly 2A), catalytic...
113
1803
Homo sapiens
dipeptidyl-peptidase 4
114
7277
Homo sapiens
tubulin, alpha 4a
115
10262
Homo sapiens
splicing factor 3b, subunit 4, 49kDa
116
60
Homo sapiens
actin, beta
117
1175
Homo sapiens
adaptor-related protein complex 2, sigma 1 sub...
118
143
Homo sapiens
poly (ADP-ribose) polymerase family, member 4
119
6432
Homo sapiens
splicing factor, arginine/serine-rich 7, 35kDa
120
5532
Homo sapiens
protein phosphatase 3 (formerly 2B), catalytic...
121
5901
Homo sapiens
RAN, member RAS oncogene family
122
7157
Homo sapiens
tumor protein p53
123
351
Homo sapiens
amyloid beta (A4) precursor protein
124
5530
Homo sapiens
protein phosphatase 3 (formerly 2B), catalytic...
125
5524
Homo sapiens
protein phosphatase 2A activator, regulatory s...
126
5522
Homo sapiens
protein phosphatase 2 (formerly 2A), regulator...
127
5527
Homo sapiens
protein phosphatase 2, regulatory subunit B', ...
128
8337
Homo sapiens
histone cluster 2, H2aa3; histone cluster 2, H...
129
2534
Homo sapiens
FYN oncogene related to SRC, FGR, YES
130
2547
Homo sapiens
X-ray repair complementing defective repair in...
131
2353
Homo sapiens
v-fos FBJ murine osteosarcoma viral oncogene h...
132
6426
Homo sapiens
splicing factor, arginine/serine-rich 1
In [26]:
printed = set()
with open('out_gene_list.tsv', 'w') as handle:
writer = csv.writer(handle, delimiter = '\t')
for gene, direc in hiv_genes[['EntrezGene ID', 'Expression']].dropna().values:
geneid = int(gene)
group = 'HIV-' + direc
tup = (geneid, group)
if tup not in printed:
writer.writerow(tup)
printed.add(tup)
for gene in cocaine_genes['ID'].values:
geneid = int(gene)
writer.writerow((geneid, 'Cocaine'))
In [27]:
both_genes = merge(hiv_genes, cocaine_genes,
left_on = 'Gene identifier',
right_on = 'ID',
how = 'inner')
In [28]:
both_genes
Out[28]:
<class 'pandas.core.frame.DataFrame'>
Int64Index: 0 entries
Data columns:
Gene symbol 0 non-null values
Gene identifier 0 non-null values
Organism 0 non-null values
Experimental factor 0 non-null values
Factor value 0 non-null values
Experiment accession 0 non-null values
Array Design accession 0 non-null values
Expression 0 non-null values
P-value 0 non-null values
Ensembl Gene ID 0 non-null values
Ensembl Transcript ID 0 non-null values
EntrezGene ID 0 non-null values
ID 0 non-null values
Species 0 non-null values
Gene Name 0 non-null values
dtypes: float64(2), int64(1), object(12)
In [ ]:
Content source: JudoWill/ResearchNotebooks
Similar notebooks: