In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import os
# from small_script.myFunctions import *
from Bio.PDB.Polypeptide import three_to_one
%matplotlib inline
%load_ext autoreload
%autoreload 2
In [2]:
plt.rcParams['figure.figsize'] = [16.18033, 10] #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
In [3]:
data = pd.read_csv("/Users/weilu/Research/database/membrane_training_set/proteins-2019-05-01.csv")
data.pdbid = data.pdbid.apply(lambda x: x[2:-1])
In [4]:
info = pd.read_csv("/Users/weilu/Research/database/membrane_contact_dtabase/for_iter0_training_complete_jun06.csv", index_col=0)
info = info.drop_duplicates().reset_index(drop=True)
In [5]:
data["Protein"] = data["pdbid"]
In [6]:
info.shape
Out[6]:
(1560, 3)
In [7]:
len(info.Protein.unique())
Out[7]:
1560
In [8]:
d = data.query("classtype_id == 1").reset_index(drop=True)
d = d.drop_duplicates(subset="Protein").reset_index(drop=True)
In [9]:
d.shape
Out[9]:
(1592, 32)
In [10]:
d = d.merge(info, on="Protein")
In [11]:
d.shape
Out[11]:
(1560, 34)
In [24]:
d.query("Protein == '5mg3'").T
Out[24]:
926
id
3306
ordering
560
family_name_cache
Protein translocase
species_name_cache
Escherichia coli
membrane_name_cache
Gram-neg. inner
name
Holo-translocon
description
NaN
comments
NaN
pdbid
5mg3
resolution
14.0
topology_subunit
Y
topology_show_in
True
thickness
30.6
thicknesserror
0.8
subunit_segments
29
tilt
5
tilterror
0
gibbs
-123.2
tau
NaN
verification
NaN
membrane_id
2
species_id
9
family_id
27
superfamily_id
19
classtype_id
1
type_id
1
secondary_representations_count
1
structure_subunits_count
6
citations_count
0
created_at
2018-08-13 03:54:54 UTC
updated_at
2018-08-13 03:54:54 UTC
Protein
5mg3
Length
1698
InMembraneRatio
0.36808
In [13]:
d.query("Length > 500").shape
Out[13]:
(1065, 34)
In [19]:
d.columns
Out[19]:
Index(['id', 'ordering', 'family_name_cache', 'species_name_cache',
'membrane_name_cache', 'name', 'description', 'comments', 'pdbid',
'resolution', 'topology_subunit', 'topology_show_in', 'thickness',
'thicknesserror', 'subunit_segments', 'tilt', 'tilterror', 'gibbs',
'tau', 'verification', 'membrane_id', 'species_id', 'family_id',
'superfamily_id', 'classtype_id', 'type_id',
'secondary_representations_count', 'structure_subunits_count',
'citations_count', 'created_at', 'updated_at', 'Protein', 'Length',
'InMembraneRatio'],
dtype='object')
In [18]:
d.query("Length > 500").hist("InMembraneRatio", bins=50)
Out[18]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x1a1643ae48>]],
dtype=object)
In [17]:
d.hist("InMembraneRatio", bins=50)
Out[17]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x1a17106630>]],
dtype=object)
In [22]:
d.query("structure_subunits_count == 1 and InMembraneRatio < 0.46 and InMembraneRatio > 0.2 and Length > 500").sort_values("InMembraneRatio")[['pdbid', 'subunit_segments'
, 'type_id','secondary_representations_count', 'structure_subunits_count']]
Out[22]:
pdbid
subunit_segments
type_id
secondary_representations_count
structure_subunits_count
513
4ksd
12
1
0
1
1132
6bhu
17
1
0
1
334
3vg9
7
1
1
1
910
5uar
12
1
0
1
1151
6c0v
12
1
0
1
1039
5w81
12
1
0
1
1431
6msm
12
1
0
1
951
5uja
17
1
0
1
1163
6fn4
12
1
0
1
1462
6q81
12
1
0
1
1201
5ywd
12
1
0
1
1235
6gdi
12
1
0
1
510
4lsg
12
1
0
1
919
5uak
12
1
0
1
950
5uj9
17
1
0
1
1379
4xwk
12
1
0
1
1200
5yw7
12
1
0
1
546
4m1m
12
1
2
1
1162
6fn1
12
1
0
1
1270
6dmy
12
1
0
1
906
5ko2
12
1
2
1
907
5kpi
12
1
0
1
1354
6d3r
12
1
1
1
704
4q9l
12
1
0
1
594
3wmg
6
1
0
1
512
4ksc
12
1
2
1
908
5kpj
12
1
0
1
63
2zbd
10
1
1
1
706
4q9j
12
1
0
1
178
3g5u
12
1
2
1
...
...
...
...
...
...
637
4umv
8
1
0
1
1069
5wo7
6
1
1
1
850
5l7i
7
1
0
1
269
4lde
7
1
4
1
1234
6d32
7
1
1
1
1140
5yqz
7
1
0
1
552
4nab
10
1
0
1
1362
6mxt
7
1
0
1
1089
5u74
12
1
0
1
1078
5u73
12
1
2
1
812
4zjc
7
1
1
1
1455
6ajf
12
1
1
1
1456
6ajg
12
1
2
1
816
5ejz
8
1
2
1
1385
5gmy
13
1
0
1
313
3tt3
12
1
0
1
992
5xap
12
1
0
1
991
5xan
12
1
0
1
536
3waj
13
1
1
1
290
3aqp
12
1
0
1
1153
5yhf
12
1
0
1
990
5xam
12
1
0
1
1495
6n3t
12
1
1
1
1091
5ogl
13
1
1
1
61
1wpg
10
1
0
1
297
3rce
13
1
0
1
1029
5n6h
8
1
1
1
335
3ayf
14
1
1
1
1168
6fwf
14
1
0
1
1511
6nt4
24
1
0
1
106 rows × 5 columns
In [20]:
d.query("InMembraneRatio < 0.46 and InMembraneRatio > 0.2 and Length > 500").sort_values("InMembraneRatio")[['pdbid', 'type_id','secondary_representations_count', 'structure_subunits_count']]
Out[20]:
pdbid
type_id
secondary_representations_count
structure_subunits_count
1116
6bpq
1
0
4
306
3sya
1
3
4
528
3wgu
1
1
3
164
5aji
1
4
7
1387
6hco
1
0
2
921
5tji
1
0
4
982
5x41
1
0
2
1215
6f0k
1
0
4
513
4ksd
1
0
1
1158
6be1
1
0
5
220
3kdp
1
0
3
581
4mrs
1
4
2
1187
5ylv
1
1
2
1395
6cvl
1
0
2
915
5tj6
1
0
4
1132
6bhu
1
0
1
527
4hqj
1
0
3
1388
6his
1
0
5
1392
6hiq
1
1
5
1505
6a6m
1
0
2
334
3vg9
1
1
1
910
5uar
1
0
1
479
4huq
1
0
2
1216
6btm
1
0
4
391
2yn9
1
0
2
1151
6c0v
1
0
1
187
2zxe
1
21
3
815
4xe5
1
0
3
274
4hyt
1
2
3
689
4res
1
0
3
...
...
...
...
...
1008
5mdx
1
0
48
533
3j45
1
0
3
335
3ayf
1
1
1
1328
5z1f
1
0
2
555
4chw
1
0
4
1502
6igz
1
0
20
15
1lgh
1
0
16
38
1yce
1
1
11
1180
6c96
1
0
2
1551
6ijo
1
0
20
1129
6bgi
1
0
2
540
4jcb
1
1
31
128
2bhw
1
0
3
541
4jc9
1
0
32
354
2yev
1
0
3
20
1m56
1
11
4
97
3pjs
1
0
4
1457
6h8k
1
0
28
1181
6c9a
1
0
2
1211
5yq7
1
0
32
204
3h90
1
0
2
1330
6e1m
1
1
2
23
1fft
1
0
3
1168
6fwf
1
0
1
490
3zk1
1
1
11
1067
5v8k
1
0
2
556
4chv
1
0
4
1318
6c70
1
0
4
186
3eff
1
0
4
1511
6nt4
1
0
1
551 rows × 4 columns
In [14]:
d.query("InMembraneRatio < 0.46 and InMembraneRatio > 0.2 and Length > 500").sort_values("InMembraneRatio")
Out[14]:
id
ordering
family_name_cache
species_name_cache
membrane_name_cache
name
description
comments
pdbid
resolution
...
classtype_id
type_id
secondary_representations_count
structure_subunits_count
citations_count
created_at
updated_at
Protein
Length
InMembraneRatio
1116
3626
790.0
Polycystin cation channel
Ficedula albicollis
Eykaryo. plasma
TRPM8 channel
NaN
NaN
6bpq
4.10
...
1
1
0
4
0
2018-08-13 03:55:09 UTC
2018-08-13 03:55:09 UTC
6bpq
3072
0.201172
306
1343
700.0
Inward rectifier potassium channels
Mus musculus
Eykaryo. plasma
G protein-activated inward rectifier potassium...
NaN
NaN
3sya
2.98
...
1
1
3
4
0
2018-08-13 03:51:42 UTC
2018-08-13 03:51:42 UTC
3sya
1312
0.201220
528
2279
452.0
P-ATPase
Sus scrofa
Eykaryo. plasma
Sodium-potassium pump, Na+ bound E1P preceedin...
NaN
NaN
3wgu
2.80
...
1
1
1
3
0
2018-08-13 03:53:49 UTC
2018-08-13 03:53:49 UTC
3wgu
1331
0.201352
164
813
1006.0
Small conductance mechanosensitive ion channel...
Escherichia coli
Gram-neg. inner
Mechanosensitive channel protein MscS, open state
NaN
NaN
5aji
2.99
...
1
1
4
7
0
2018-08-13 03:50:49 UTC
2018-08-13 03:50:49 UTC
5aji
1802
0.201443
1387
4138
529.0
ABC transporter G family
Homo sapiens
Eykaryo. plasma
ABC transporter ABCG2, structure 6
NaN
NaN
6hco
3.58
...
1
1
0
2
0
2018-11-07 23:56:06 UTC
2018-11-07 23:56:06 UTC
6hco
1576
0.201777
921
3298
618.0
Slo potassium channels
Aplysia californica
Eykaryo. plasma
High conductance calcium-activated potassium c...
NaN
NaN
5tji
3.80
...
1
1
0
4
0
2018-08-13 03:54:54 UTC
2018-08-13 03:54:54 UTC
5tji
3528
0.202381
982
3412
1315.0
Cobalt uptake transporter
Rhodobacter capsulatus
Gram-neg. inner
CbiMQO-complex, structure 2
NaN
Tilt angles were calculated as for non-TM subu...
5x41
3.47
...
1
1
0
2
0
2018-08-13 03:55:00 UTC
2018-08-13 03:55:00 UTC
5x41
988
0.202429
1215
3850
298.0
Polysulfide reductase
Rhodothermus marinus
Gram-neg. inner
Polysulphide reductase complex
NaN
NaN
6f0k
3.87
...
1
1
0
4
0
2018-08-13 03:55:16 UTC
2018-08-13 03:55:16 UTC
6f0k
2504
0.204473
513
2240
515.0
Multidrug resistance exporter (MDR)
Mus musculus
Eykaryo. plasma
P-glycoprotein, inward-facing conformation 2c
NaN
NaN
4ksd
4.10
...
1
1
0
1
0
2018-08-13 03:53:44 UTC
2018-08-13 03:53:44 UTC
4ksd
1300
0.204615
1158
3705
925.0
Ligand-gated ion channel of neurotransmitter r...
Mus musculus
Eykaryo. plasma
5-hydroxytryptamine receptor 3A, structure 2
NaN
NaN
6be1
4.31
...
1
1
0
5
0
2018-08-13 03:55:11 UTC
2018-08-13 03:55:11 UTC
6be1
1995
0.205013
220
973
446.0
P-ATPase
Sus scrofa
Eykaryo. plasma
Sodium-potassium pump, E2P state, conformation 3
NaN
NaN
3kdp
3.50
...
1
1
0
3
0
2018-08-13 03:51:04 UTC
2018-08-13 03:51:04 UTC
3kdp
1311
0.205187
581
2393
478.0
ABC transporter B family (ABCB)
Novosphingobium aromaticivorans
Gram-neg. inner
ABC transporter related protein
NaN
NaN
4mrs
2.35
...
1
1
4
2
0
2018-08-13 03:53:55 UTC
2018-08-13 03:53:55 UTC
4mrs
1174
0.205281
1187
3806
450.0
P-ATPase
Sus scrofa
Eykaryo. plasma
Sodium-potassium pump, E2P state, conformation 7
NaN
NaN
5ylv
2.80
...
1
1
1
2
0
2018-08-13 03:55:14 UTC
2018-08-13 03:55:14 UTC
5ylv
1247
0.205293
1395
4148
474.0
Binding-protein-dependent transport system
Escherichia coli
Gram-neg. inner
Methionine importer MetNI, with MetQ
NaN
NaN
6cvl
2.95
...
1
1
0
2
0
2018-11-16 01:53:03 UTC
2018-11-16 01:53:03 UTC
6cvl
1343
0.205510
915
3286
617.0
Slo potassium channels
Aplysia californica
Eykaryo. plasma
High conductance calcium-activated potassium c...
NaN
NaN
5tj6
3.50
...
1
1
0
4
0
2018-08-13 03:54:53 UTC
2018-08-13 03:54:53 UTC
5tj6
3560
0.205618
1132
3650
539.0
Drug conjugate transporter (ABC C family)
Bos taurus
Eykaryo. plasma
Multidrug resistance protein 1 (MRP1), outward...
NaN
NaN
6bhu
3.14
...
1
1
0
1
0
2018-08-13 03:55:09 UTC
2018-11-13 03:56:08 UTC
6bhu
1208
0.206126
527
2274
453.0
P-ATPase
Sus scrofa
Eykaryo. plasma
Sodium-potassium pump, Na+ bound state
NaN
NaN
4hqj
4.30
...
1
1
0
3
0
2018-08-13 03:53:49 UTC
2018-08-13 03:53:49 UTC
4hqj
1297
0.206631
1388
4139
926.0
Ligand-gated ion channel of neurotransmitter r...
Mus musculus
Eykaryo. plasma
5-hydroxytryptamine receptor 3A, structure 3
NaN
NaN
6his
4.50
...
1
1
0
5
0
2018-11-08 00:05:13 UTC
2018-11-08 00:12:33 UTC
6his
1935
0.206718
1392
4143
930.0
Ligand-gated ion channel of neurotransmitter r...
Mus musculus
Eykaryo. plasma
5-hydroxytryptamine receptor 3A, structure 7
NaN
NaN
6hiq
4.50
...
1
1
1
5
0
2018-11-08 00:14:47 UTC
2018-11-08 00:14:47 UTC
6hiq
1925
0.206753
1505
4291
485.0
ABC transporter B family (ABCB)
Cyanidoschyzon merolae
Eykaryo. plasma
ATP-binding transporter CmABCB1, outward-open ...
NaN
NaN
6a6m
1.90
...
1
1
0
2
0
2019-02-23 21:39:15 UTC
2019-02-23 21:39:15 UTC
6a6m
1178
0.207131
334
1925
69.0
G-protein coupled receptors, family A
Homo sapiens
Eykaryo. plasma
Adenosine receptor A2a, inactive, with antibody
NaN
NaN
3vg9
2.70
...
1
1
1
1
0
2018-08-13 03:53:17 UTC
2018-11-08 23:11:07 UTC
3vg9
733
0.207367
910
3279
535.0
Drug conjugate transporter (ABC C family)
Danio rerio
Endosome
Cystic fibrosis transmembrane conductance regu...
NaN
NaN
5uar
3.73
...
1
1
0
1
0
2018-08-13 03:54:53 UTC
2018-08-13 03:54:53 UTC
5uar
1184
0.208615
479
2173
1311.0
Energy-coupling factor transporters
Lactobacillus brevis
Gram-pos. inner
Energy-coupling factor transporter EcfA, confo...
NaN
NaN
4huq
3.00
...
1
1
0
2
0
2018-08-13 03:53:38 UTC
2018-08-13 03:53:38 UTC
4huq
968
0.208678
1216
3851
297.0
Polysulfide reductase
Flavobacterium johnsoniae
Gram-neg. inner
Polysulphide reductase complex
NaN
NaN
6btm
3.40
...
1
1
0
4
0
2018-08-13 03:55:16 UTC
2018-08-13 03:55:16 UTC
6btm
2361
0.208810
391
2034
448.0
P-ATPase
Sus scrofa
Eykaryo. plasma
Sodium-potassium pump, E2P state, conformation 5
NaN
NaN
2yn9
8.0
...
1
1
0
2
0
2018-08-13 03:53:26 UTC
2018-08-13 03:53:26 UTC
2yn9
1239
0.209040
1151
3684
542.0
Drug conjugate transporter (ABC C family)
Homo sapiens
Eykaryo. plasma
Multidrug resistance protein 1, outward-facing...
NaN
NaN
6c0v
3.40
...
1
1
0
1
0
2018-08-13 03:55:10 UTC
2018-11-13 03:51:35 UTC
6c0v
1154
0.209705
187
864
443.0
P-ATPase
Squalus acanthias
Eykaryo. plasma
Sodium-potassium pump, E2P state
NaN
NaN
2zxe
2.40
...
1
1
21
3
0
2018-08-13 03:50:53 UTC
2018-08-13 03:50:53 UTC
2zxe
1296
0.209877
815
3019
451.0
P-ATPase
Bos taurus
Eykaryo. plasma
Sodium-potassium pump, E2 state
NaN
NaN
4xe5
3.90
...
1
1
0
3
0
2018-08-13 03:54:34 UTC
2018-08-13 03:54:34 UTC
4xe5
1307
0.210406
274
1107
445.0
P-ATPase
Sus scrofa
Eykaryo. plasma
Sodium-potassium pump, E2P state, conformation 2
NaN
NaN
4hyt
3.40
...
1
1
2
3
0
2018-08-13 03:51:22 UTC
2018-11-06 18:47:05 UTC
4hyt
1316
0.210486
689
2745
447.0
P-ATPase
Sus scrofa
Eykaryo. plasma
Sodium-potassium pump, E2P state, conformation 4
NaN
NaN
4res
3.41
...
1
1
0
3
0
2018-08-13 03:54:17 UTC
2018-08-13 03:54:17 UTC
4res
1315
0.211407
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
1008
3461
252.0
Photosystem II
Arabidopsis thaliana
Thylakoid
PSII-LHCII supercomplex
NaN
NaN
5mdx
5.3
...
1
1
0
48
0
2018-08-13 03:55:03 UTC
2018-08-13 03:55:03 UTC
5mdx
8271
0.415186
533
2286
587.0
Protein translocase
Escherichia coli
Gram-neg. inner
Ribosome-SecYE complex, structure 2
NaN
NaN
3j45
9.9
...
1
1
0
3
0
2018-08-13 03:53:50 UTC
2018-08-13 03:53:50 UTC
3j45
820
0.415854
335
1926
355.0
Cytochrome c oxidases
Bacillus stearothermophilus
Gram-pos. inner
Nitric oxide reductase
NaN
NaN
3ayf
2.50
...
1
1
1
1
0
2018-08-13 03:53:17 UTC
2018-08-13 03:53:17 UTC
3ayf
754
0.417772
1328
4004
1057.0
Mechanosensitive OSCA channels
Arabidopsis thaliana
Eykaryo. plasma
OSCA3.1 channel
NaN
NaN
5z1f
4.80
...
1
1
0
2
0
2018-09-14 20:57:18 UTC
2018-11-16 02:15:46 UTC
5z1f
1202
0.418469
555
2324
621.0
Cyclic nucleotide-gated ion channel
Rhizobium loti
Gram-neg. inner
Bacterial cyclic nucleotide regulated ion chan...
NaN
NaN
4chw
7.0
...
1
1
0
4
0
2018-08-13 03:53:51 UTC
2018-08-13 03:53:51 UTC
4chw
1372
0.419825
1502
4288
234.0
Photosystem I
Bryopsis corticulans
Thylakoid
Photosystem I of algae
NaN
The hydrophobic thickness, transfer energy and...
6igz
3.49
...
1
1
0
20
0
2019-02-23 21:28:11 UTC
2019-02-23 21:31:20 UTC
6igz
4518
0.420761
15
45
258.0
Light-harvesting complexes from bacteria
Rhodospirillum molischianum
Gram-neg. inner
Light-harvesting complex
NaN
NaN
1lgh
2.40
...
1
1
0
16
0
2018-08-13 03:49:49 UTC
2018-08-13 03:49:49 UTC
1lgh
792
0.424242
38
69
389.0
V-type and F-type ATPases
Ilyobacter tartaricus
Gram-neg. inner
F-type Sodium ATPase
NaN
Hydrophobic boundaries expand when calculated ...
1yce
2.40
...
1
1
1
11
2
2018-08-13 03:49:51 UTC
2018-08-13 03:49:51 UTC
1yce
979
0.426966
1180
3794
645.0
Two pore Ca2+ channels
Mus musculus
Endosome
Two pore calcium channel TPC1, structure 1
NaN
NaN
6c96
3.40
...
1
1
0
2
0
2018-08-13 03:55:14 UTC
2018-08-13 03:55:14 UTC
6c96
1446
0.427386
1551
4376
246.0
Photosystem I
Chlamydomonas reinhardtii
Thylakoid
Photosystem I, with light-harvesting complex I...
NaN
NaN
6ijo
3.3
...
1
1
0
20
0
2019-03-30 03:11:48 UTC
2019-03-30 03:11:48 UTC
6ijo
4382
0.428571
1129
3647
1044.0
Apoctamin (TMEM16) family
Mus musculus
Eykaryo. plasma
Chloride channel TMEM16A, structure 3
NaN
NaN
6bgi
3.80
...
1
1
0
2
0
2018-08-13 03:55:09 UTC
2018-08-13 03:55:09 UTC
6bgi
1020
0.430392
540
2296
256.0
Bacterial photosystems
Rhodobacter sphaeroides
Gram-neg. inner
Reaction center-LH1-PufX dimer complex, unit 1
NaN
NaN
4jcb
7.78
...
1
1
1
31
0
2018-08-13 03:53:50 UTC
2018-08-13 03:53:50 UTC
4jcb
2152
0.430762
128
675
266.0
Light-harvesting complexes from chloroplasts
Pisum sativum
Thylakoid
Light-Harvesting Complex II
NaN
NaN
2bhw
2.5
...
1
1
0
3
0
2018-08-13 03:50:40 UTC
2018-08-13 03:50:40 UTC
2bhw
669
0.434978
541
2297
257.0
Bacterial photosystems
Rhodobacter sphaeroides
Gram-neg. inner
Reaction center-LH1-PufX dimer complex, unit 2
NaN
NaN
4jc9
7.78
...
1
1
0
32
0
2018-08-13 03:53:50 UTC
2018-08-13 03:53:50 UTC
4jc9
2152
0.436803
354
1959
345.0
Cytochrome c oxidases
Thermus thermophilus
Gram-neg. inner
Bacterial cytochrome c oxidase, caa3-type
NaN
NaN
2yev
2.36
...
1
1
0
3
0
2018-08-13 03:53:19 UTC
2018-08-13 03:53:19 UTC
2yev
1162
0.438038
20
50
348.0
Cytochrome c oxidases
Rhodobacter sphaeroides
Gram-neg. inner
Bacterial cytochrome c oxidase, with C subunit
NaN
NaN
1m56
2.30
...
1
1
11
4
0
2018-08-13 03:49:50 UTC
2018-11-06 16:10:56 UTC
1m56
1114
0.438061
97
315
603.0
KcsA voltage-gated K+ channels
Streptomyces lividans
Gram-pos. inner
Potassium channel KcsA, full length, open
NaN
NaN
3pjs
3.8
...
1
1
0
4
0
2018-08-13 03:50:09 UTC
2018-08-13 03:50:09 UTC
3pjs
556
0.438849
1457
4239
292.0
H+ or Na+ translocating NADH dehydrogenase
Yarrowia lipolytica
Mitochon. inner
Respiratory complex I, structure 3
NaN
There are problems with defining names of subu...
6h8k
3.79
...
1
1
0
28
0
2019-01-30 14:56:17 UTC
2019-01-30 14:59:03 UTC
6h8k
2442
0.438984
1181
3795
647.0
Two pore Ca2+ channels
Mus musculus
Endosome
Two pore calcium channel TPC1, structure 2
NaN
NaN
6c9a
3.20
...
1
1
0
2
0
2018-08-13 03:55:14 UTC
2018-08-13 03:55:14 UTC
6c9a
1446
0.439834
1211
3845
255.0
Bacterial photosystems
Roseiflexus castenholzii
Gram-neg. inner
LH-RC complex
NaN
NaN
5yq7
4.1
...
1
1
0
32
0
2018-08-13 03:55:16 UTC
2018-08-13 03:55:16 UTC
5yq7
2107
0.440911
204
923
1177.0
Bacterial zinc transporters
Escherichia coli
Gram-neg. inner
Ferrous-iron efflux pump fieF
NaN
NaN
3h90
2.90
...
1
1
0
2
0
2018-08-13 03:50:58 UTC
2018-08-13 03:50:58 UTC
3h90
566
0.441696
1330
4012
646.0
Two pore Ca2+ channels
Arabidopsis thaliana
Vacuole
Two pore calcium channel TPC1, structure 2
NaN
NaN
6e1m
3.30
...
1
1
1
2
0
2018-10-02 18:22:55 UTC
2018-10-18 00:10:23 UTC
6e1m
1066
0.443715
23
53
357.0
Cytochrome c oxidases
Escherichia coli
Gram-neg. inner
Ubiquinol Oxidase
NaN
NaN
1fft
3.50
...
1
1
0
3
0
2018-08-13 03:49:50 UTC
2018-08-13 03:49:50 UTC
1fft
943
0.446448
1168
3735
352.0
Cytochrome c oxidases
Neisseria meningitidis
Gram-neg. inner
Nitric-oxide reductase
NaN
NaN
6fwf
4.20
...
1
1
0
1
0
2018-08-13 03:55:11 UTC
2018-08-13 03:55:11 UTC
6fwf
712
0.448034
490
2191
373.0
V-type and F-type ATPases
Fusobacterium nucleatum
Gram-neg. inner
F0 ATP synthase
NaN
NaN
3zk1
2.20
...
1
1
1
11
0
2018-08-13 03:53:40 UTC
2018-08-13 03:53:40 UTC
3zk1
979
0.449438
1067
3545
231.0
Photosynthetic reaction centers from bacteria
Heliobacterium modesticaldum
Gram-pos. inner
Photosynthetic reaction center-photosystem
NaN
The arrangement in membrane was calculated wit...
5v8k
2.20
...
1
1
0
2
0
2018-08-13 03:55:06 UTC
2018-08-13 03:55:06 UTC
5v8k
625
0.449600
556
2325
622.0
Cyclic nucleotide-gated ion channel
Rhizobium loti
Gram-neg. inner
Bacterial cyclic nucleotide regulated ion chan...
NaN
NaN
4chv
7.0
...
1
1
0
4
0
2018-08-13 03:53:51 UTC
2018-08-13 03:53:51 UTC
4chv
1372
0.451895
1318
3982
1578.0
Odorant receptor channel
Apocrypta bakeri
Mitochon. inner
Odorant receptor
NaN
NaN
6c70
3.50
...
1
1
0
4
0
2018-09-04 22:41:14 UTC
2018-09-04 22:41:14 UTC
6c70
1552
0.453608
186
857
602.0
KcsA voltage-gated K+ channels
Streptomyces lividans
Gram-pos. inner
Potassium channel KcsA, full-length, closed
NaN
NaN
3eff
3.80
...
1
1
0
4
0
2018-08-13 03:50:53 UTC
2018-08-13 03:50:53 UTC
3eff
556
0.455036
1511
4297
669.0
Voltage-sensitive Na+ channel Nav
Homo sapiens
Eykaryo. plasma
Nav channel, human-cockroach hybrid, with alph...
NaN
NaN
6nt4
3.5
...
1
1
0
1
0
2019-02-23 22:04:26 UTC
2019-02-23 22:04:26 UTC
6nt4
1406
0.455903
551 rows × 34 columns
In [ ]:
["4nv6", "4p79", "5dsg", "6g7o", "6a93"]
["4zyo", "5n6m"]
In [ ]:
# two chains
["4rws", "4xt3", "6iu3", "6iu4"]
In [ ]:
# many, seems like two chain
["5uig"]
In [26]:
d.query("InMembraneRatio < 0.5 and InMembraneRatio > 0.4 and Length < 500")
Out[26]:
id
ordering
family_name_cache
species_name_cache
membrane_name_cache
name
description
comments
pdbid
resolution
...
classtype_id
type_id
secondary_representations_count
structure_subunits_count
citations_count
created_at
updated_at
Protein
Length
InMembraneRatio
59
93
171.0
G-protein coupled receptors, family A
Bos taurus
Eykaryo. plasma
Rhodopsin, inactive, with 11-cis retinal
NaN
Structures of intermediate states: bathorhodop...
1gzm
2.70
...
1
1
4
1
6
2018-08-13 03:49:53 UTC
2018-11-09 00:12:32 UTC
1gzm
328
0.481707
133
704
173.0
G-protein coupled receptors, family A
Bos taurus
Eykaryo. plasma
Rhodopsin, partially active, photobleached
NaN
This is probably a photobleached state, not me...
2i37
4.15
...
1
1
0
1
0
2018-08-13 03:50:42 UTC
2018-08-13 03:50:42 UTC
2i37
317
0.492114
219
972
1362.0
Vitamin K epoxide reductase
Synechococcus sp.
Gram-neg. inner
Vitamin K epoxide reductase, structure 2
NaN
NaN
4nv6
4.19
...
1
1
0
1
0
2018-08-13 03:51:04 UTC
2018-11-06 17:09:22 UTC
4nv6
264
0.405303
247
1023
177.0
G-protein coupled receptors, family A
Todarodes pacificus
Eykaryo. plasma
Squid rhodopsin, inactive, with 11-cis retinal
NaN
NaN
2ziy
3.70
...
1
1
2
1
0
2018-08-13 03:51:10 UTC
2018-11-09 00:14:27 UTC
2ziy
370
0.435135
268
1101
169.0
G-protein coupled receptors, family A
Bos taurus
Eykaryo. plasma
Rhodopsin, inactive, structure 2 (with beta-io...
NaN
NaN
3oax
2.6
...
1
1
2
1
0
2018-08-13 03:51:21 UTC
2018-08-13 03:51:21 UTC
3oax
348
0.456897
270
1103
165.0
G-protein coupled receptors, family A
Bos taurus
Eykaryo. plasma
Rhodopsin, active, with transducin peptide
NaN
NaN
4x1h
2.29
...
1
1
4
1
0
2018-08-13 03:51:21 UTC
2018-11-09 00:19:01 UTC
4x1h
337
0.465875
279
1119
259.0
Light-harvesting complexes from bacteria
Rhodospirillum rubrum
Gram-neg. inner
Light-harvesting complex LH1, alpha chain
NaN
Structure in chloroform/methanol.
1xrd
NMR
...
1
1
0
1
0
2018-08-13 03:51:23 UTC
2018-08-13 03:51:23 UTC
1xrd
52
0.442308
291
1242
63.0
G-protein coupled receptors, family A
Homo sapiens
Eykaryo. plasma
Adenosine receptor A2a, engineered, intermedia...
NaN
NaN
2ydv
2.6
...
1
1
1
1
0
2018-08-13 03:51:35 UTC
2018-11-08 23:06:48 UTC
2ydv
315
0.479365
368
1988
1449.0
Peptidase family M48
Homo sapiens
Endoplasm. reticulum
CAAX prenyl protease 1 homolog, structure 2
NaN
NaN
4aw6
3.4
...
1
1
1
1
0
2018-08-13 03:53:22 UTC
2018-08-13 03:53:22 UTC
4aw6
427
0.402810
410
2066
1450.0
Peptidase family M48
Saccharomyces mikatae
Endoplasm. reticulum
CaaX Protease Ste24p
NaN
NaN
4il3
3.10
...
1
1
0
1
0
2018-08-13 03:53:29 UTC
2018-08-13 03:53:29 UTC
4il3
422
0.424171
417
2090
53.0
G-protein coupled receptors, family A
Homo sapiens
Eykaryo. plasma
5-hydroxytryptamine receptor 2B, structure 1, ...
NaN
NaN
4ib4
2.70
...
1
1
3
1
0
2018-08-13 03:53:32 UTC
2018-11-08 23:01:37 UTC
4ib4
375
0.418667
418
2091
48.0
G-protein coupled receptors, family A
Homo sapiens
Eykaryo. plasma
5-hydroxytryptamine receptor 1B, structure 1, ...
NaN
NaN
4iar
2.70
...
1
1
0
1
0
2018-08-13 03:53:32 UTC
2018-11-08 23:00:00 UTC
4iar
379
0.408971
419
2092
49.0
G-protein coupled receptors, family A
Homo sapiens
Eykaryo. plasma
5-hydroxytryptamine receptor 1B, structure 2, ...
NaN
NaN
4iaq
2.80
...
1
1
0
1
0
2018-08-13 03:53:32 UTC
2018-11-08 23:00:41 UTC
4iaq
367
0.430518
421
2094
170.0
G-protein coupled receptors, family A
Bos taurus
Eykaryo. plasma
Rhodopsin, inactive, structure 3
NaN
NaN
1u19
2.20
...
1
1
4
1
0
2018-08-13 03:53:32 UTC
2018-08-13 03:53:32 UTC
1u19
348
0.459770
562
2334
132.0
G-protein coupled receptors, family A
Rattus norvegicus
Eykaryo. plasma
Neurotensin receptor type 1, structure 2, inac...
NaN
NaN
4buo
2.75
...
1
1
3
1
0
2018-08-13 03:53:52 UTC
2018-11-08 23:58:04 UTC
4buo
310
0.496774
571
2346
1363.0
Vitamin K epoxide reductase
Synechococcus sp.
Gram-neg. inner
Vitamin K epoxide reductase, structure 3
NaN
NaN
4nv2
3.61
...
1
1
1
1
0
2018-08-13 03:53:53 UTC
2018-11-06 17:08:44 UTC
4nv2
264
0.424242
601
2447
1013.0
Claudins
Mus musculus
Eykaryo. plasma
Claudin-15
NaN
NaN
4p79
2.40
...
1
1
0
1
0
2018-08-13 03:53:56 UTC
2018-08-13 03:53:56 UTC
4p79
173
0.497110
655
2535
145.0
G-protein coupled receptors, family A
Bos taurus
Eykaryo. plasma
Opsin, active, complex with arrestin peptide
NaN
NaN
4pxf
2.75
...
1
1
1
1
0
2018-08-13 03:54:02 UTC
2018-11-09 00:20:41 UTC
4pxf
332
0.478916
696
2753
103.0
G-protein coupled receptors, family A
Homo sapiens
Eykaryo. plasma
C-X-C chemokine receptor type 4, inactive stat...
NaN
NaN
4rws
3.10
...
1
1
0
1
0
2018-08-13 03:54:18 UTC
2018-11-08 23:35:42 UTC
4rws
346
0.447977
700
2762
190.0
G-protein coupled receptors, family A
Human herpesvirus
Eykaryo. plasma
Viral GPCR US28, active, with fractalkine
NaN
NaN
4xt3
3.80
...
1
1
0
1
0
2018-08-13 03:54:19 UTC
2018-11-09 00:27:17 UTC
4xt3
356
0.426966
717
2808
70.0
G-protein coupled receptors, family A
Homo sapiens
Eykaryo. plasma
Adenosine receptor A2a, intermediate state, wi...
NaN
NaN
4uhr
2.60
...
1
1
1
1
0
2018-08-13 03:54:21 UTC
2018-11-08 23:12:24 UTC
4uhr
310
0.490323
735
2848
1517.0
Fatty acid desaturase
Homo sapiens
Endoplasm. reticulum
Acyl-CoA desaturase
NaN
NaN
4zyo
3.25
...
1
1
0
1
0
2018-08-13 03:54:25 UTC
2018-08-13 03:54:25 UTC
4zyo
298
0.476510
746
2890
1519.0
Fatty acid hydroxylase
Saccharomyces cerevisiae
Endoplasm. reticulum
Ceramide fatty acid hydroxylase SCS7
NaN
NaN
4zr1
2.60
...
1
1
1
1
0
2018-08-13 03:54:27 UTC
2018-08-13 03:54:27 UTC
4zr1
274
0.489051
758
2914
1521.0
Fluoride exporter
Bordetella pertussis
Gram-neg. inner
Fluoride ion transporter CrcB, structure 1
NaN
This is a "dual topology" antiparallel dimer.
5a40
3.60
...
1
1
1
2
0
2018-08-13 03:54:28 UTC
2018-08-13 03:54:28 UTC
5a40
429
0.475524
772
2936
1518.0
Fatty acid desaturase
Mus musculus
Endoplasm. reticulum
Acyl-CoA desaturase
NaN
NaN
4ymk
2.61
...
1
1
0
1
0
2018-08-13 03:54:29 UTC
2018-08-13 03:54:29 UTC
4ymk
322
0.422360
810
3013
128.0
G-protein coupled receptors, family A
Homo sapiens
Eykaryo. plasma
Muscarinic acetylcholine receptor M4, inactive...
NaN
NaN
5dsg
2.6
...
1
1
0
1
0
2018-08-13 03:54:34 UTC
2018-11-08 23:56:13 UTC
5dsg
392
0.408163
827
3046
1398.0
Gamma-secretase
Homo sapiens
Endoplasm. reticulum
Nicastrin, TM helix
NaN
This is structure in DPC micelles; 2n7q is str...
2n7r
NMR
...
1
1
1
1
0
2018-08-13 03:54:37 UTC
2019-04-19 16:21:00 UTC
2n7r
46
0.434783
897
3253
1541.0
Tetraspanin
Homo sapiens
Eykaryo. plasma
CD81 antigen
NaN
NaN
5tcx
2.95
...
1
1
0
1
0
2018-08-13 03:54:51 UTC
2018-08-13 03:54:51 UTC
5tcx
206
0.412621
935
3317
54.0
G-protein coupled receptors, family A
Homo sapiens
Eykaryo. plasma
5-hydroxytryptamine receptor 2B, structure 2, ...
NaN
NaN
5tvn
2.90
...
1
1
1
1
0
2018-08-13 03:54:55 UTC
2018-11-08 23:02:41 UTC
5tvn
393
0.402036
936
3318
68.0
G-protein coupled receptors, family A
Homo sapiens
Eykaryo. plasma
Adenosine receptor A2a, inactive, with antagonist
NaN
NaN
5uig
3.50
...
1
1
0
1
0
2018-08-13 03:54:55 UTC
2018-11-08 23:14:23 UTC
5uig
387
0.410853
954
3353
1543.0
CD36 glycoprotein
Mus musculus
Eykaryo. plasma
Scavenger receptor B-1
NaN
The protein also has uncleaved N-terminal tran...
5ktf
NMR
...
1
1
0
1
0
2018-08-13 03:54:57 UTC
2018-08-13 03:54:57 UTC
5ktf
73
0.410959
958
3365
172.0
G-protein coupled receptors, family A
Bos taurus
Eykaryo. plasma
Rhodopsin, inactive, with cyclic retinal analog
NaN
NaN
5te5
4.01
...
1
1
0
1
0
2018-08-13 03:54:58 UTC
2018-11-09 00:23:20 UTC
5te5
348
0.462644
964
3389
188.0
G-protein coupled receptors, family A
Homo sapiens
Eykaryo. plasma
Type-2 angiotensin II receptor, active state, ...
NaN
NaN
5ung
2.80
...
1
1
1
1
0
2018-08-13 03:54:59 UTC
2018-11-08 23:19:16 UTC
5ung
310
0.483871
997
3439
203.0
GPCR Secretin (B) family
Homo sapiens
Eykaryo. plasma
Glucagon receptor, inactive state, full-length...
NaN
NaN
5xez
3.00
...
1
1
1
1
0
2018-08-13 03:55:01 UTC
2018-11-09 01:04:56 UTC
5xez
389
0.401028
1006
3449
72.0
G-protein coupled receptors, family A
Homo sapiens
Eykaryo. plasma
Apelin receptor, inactive state
NaN
See 2lot, 2low and 2lov for peptides in micell...
5vbl
2.60
...
1
1
0
1
0
2018-08-13 03:55:02 UTC
2019-04-20 17:21:24 UTC
5vbl
311
0.482315
1015
3468
96.0
G-protein coupled receptors, family A
Homo sapiens
Eykaryo. plasma
C-C chemokine receptor type 5, inactive state,...
NaN
NaN
5uiw
2.20
...
1
1
0
1
0
2018-08-13 03:55:03 UTC
2018-11-08 23:33:44 UTC
5uiw
365
0.452055
1022
3477
200.0
GPCR Secretin (B) family
Homo sapiens
Eykaryo. plasma
Glucagon-like peptide 1 receptor, intermediate...
NaN
NaN
5nx2
3.70
...
1
1
0
1
0
2018-08-13 03:55:03 UTC
2018-11-09 01:02:48 UTC
5nx2
394
0.428934
1030
3488
1549.0
Carbon-nitrogen hydrolase
Escherichia coli
Gram-neg. inner
Apolipoprotein N-acyl transferase, structure 2
NaN
NaN
5n6m
3.10
...
1
1
0
1
0
2018-08-13 03:55:04 UTC
2018-08-13 03:55:04 UTC
5n6m
491
0.419552
1175
3745
1567.0
Designed TM alpha-hairpin proteins
Designed proteins
Undefined
Protein TMHC2_E
NaN
NaN
6b87
2.95
...
1
1
0
2
0
2018-08-13 03:55:12 UTC
2018-08-13 03:55:12 UTC
6b87
200
0.420000
1179
3793
161.0
G-protein coupled receptors, family A
Bos taurus
Eykaryo. plasma
Rhodopsin, active, open channel structure
NaN
NaN
6fk6
2.36
...
1
1
7
1
0
2018-08-13 03:55:14 UTC
2018-11-09 00:24:18 UTC
6fk6
326
0.475460
1228
3865
1352.0
Sulfate transporter (CysZ)
Pseudomonas fragi
Gram-neg. inner
Sulfate transporter CysZ
NaN
A dimer with dual topology was suggested for C...
6d79
3.50
...
1
1
0
2
0
2018-08-13 03:55:16 UTC
2018-08-13 03:55:16 UTC
6d79
409
0.447433
1243
3884
86.0
G-protein coupled receptors, family A
Homo sapiens
Eykaryo. plasma
C5a anaphylatoxin chemotactic receptor 1, inac...
NaN
NaN
6c1r
2.20
...
1
1
1
1
0
2018-08-13 03:55:17 UTC
2018-11-08 23:22:54 UTC
6c1r
374
0.411765
1317
3981
208.0
Frizzled/Smoothened family
Homo sapiens
Eykaryo. plasma
Frizzled-4 receptor, inactive state
NaN
NaN
6bd4
2.40
...
1
1
0
1
0
2018-09-04 22:21:55 UTC
2018-11-09 01:08:36 UTC
6bd4
325
0.480000
1376
4126
176.0
G-protein coupled receptors, family A
Todarodes pacificus
Eykaryo. plasma
Squid rhodopsin, inactive, lumi intermediate
NaN
NaN
4ww3
2.80
...
1
1
0
1
0
2018-11-06 16:34:02 UTC
2018-11-09 00:21:29 UTC
4ww3
347
0.472622
1382
4132
1361.0
Vitamin K epoxide reductase
Synechococcus sp.
Gram-neg. inner
Vitamin K epoxide reductase, structure 1
NaN
NaN
3kp9
3.60
...
1
1
0
1
0
2018-11-06 17:10:58 UTC
2018-11-06 17:10:58 UTC
3kp9
259
0.424710
1408
4170
110.0
G-protein coupled receptors, family A
Homo sapiens
Eykaryo. plasma
Endothelin B receptor, intermediate state 2
NaN
NaN
6igk
2.00
...
1
1
0
1
0
2018-12-25 05:23:32 UTC
2018-12-25 05:23:32 UTC
6igk
332
0.463855
1409
4171
111.0
G-protein coupled receptors, family A
Homo sapiens
Eykaryo. plasma
Endothelin B receptor, intermediate state 3
NaN
NaN
6igl
2.70
...
1
1
0
1
0
2018-12-25 05:24:30 UTC
2018-12-25 05:24:30 UTC
6igl
323
0.482972
1449
4228
225.0
Ceramidase
Homo sapiens
Endoplasm. reticulum
Alkaline ceramidase 3
NaN
NaN
6g7o
2.70
...
1
1
0
1
0
2019-01-30 01:33:11 UTC
2019-01-30 01:34:07 UTC
6g7o
350
0.431429
1493
4277
1585.0
Vacuolar iron transporter
Eucalyptus grandis
Vacuole
Iron transporter VIT1, structure 1
NaN
Similar to subunit Y ( Ndufa11) of respiratory...
6iu3
2.7
...
1
1
0
2
0
2019-02-10 17:03:08 UTC
2019-02-11 04:04:11 UTC
6iu3
448
0.491071
1494
4278
1586.0
Vacuolar iron transporter
Eucalyptus grandis
Vacuole
Iron transporter VIT1, structure 2
NaN
Similar to subunit Y ( Ndufa11) of respiratory...
6iu4
3.5
...
1
1
0
2
0
2019-02-10 17:20:20 UTC
2019-02-11 04:04:11 UTC
6iu4
450
0.497778
1497
4282
51.0
G-protein coupled receptors, family A
Homo sapiens
Eykaryo. plasma
5-hydroxytryptamine receptor 2A, structure 1
NaN
NaN
6a93
3.0
...
1
1
0
1
0
2019-02-23 00:38:47 UTC
2019-02-23 00:38:47 UTC
6a93
370
0.410811
1498
4283
52.0
G-protein coupled receptors, family A
Homo sapiens
Eykaryo. plasma
5-hydroxytryptamine receptor 2A, structure 2
NaN
NaN
6a94
2.90
...
1
1
0
1
0
2019-02-23 00:39:56 UTC
2019-02-23 00:39:56 UTC
6a94
359
0.420613
52 rows × 34 columns
In [48]:
chosen = info.query("structure_subunits_count == 1 and InMembraneRatio < 0.5 and Length < 500")
In [50]:
chosen.shape
Out[50]:
(102, 34)
In [57]:
picked = chosen.groupby("superfamily_id").apply(pd.DataFrame.sample, 1)
In [62]:
picked = picked.reset_index(drop=True)
In [63]:
picked.to_csv("/Users/weilu/Research/database/hybrid_prediction_database/picked.csv")
In [52]:
chosen["superfamily_id"].unique()
Out[52]:
array([ 6, 173, 18, 2, 394, 63, 194, 202, 431, 406, 244, 327, 456,
466, 470, 476])
In [68]:
chosen = info.query("structure_subunits_count == 1 and InMembraneRatio < 0.6 and InMembraneRatio > 0.4 and Length < 500")
chosen.shape
Out[68]:
(165, 34)
In [74]:
picked2 = chosen.sort_values("Length").groupby("superfamily_id").head(1).reset_index(drop=True)
In [75]:
picked2.to_csv("/Users/weilu/Research/database/hybrid_prediction_database/picked2.csv")
In [69]:
chosen["superfamily_id"].unique()
Out[69]:
array([ 15, 6, 14, 220, 21, 173, 18, 2, 267, 159, 64, 392, 394,
63, 218, 396, 8, 202, 194, 409, 431, 244, 327, 456, 466, 415,
493, 92])
In [ ]:
In [66]:
a = pd.read_csv("/Users/weilu/Research/database/hybrid_prediction_database/picked.csv", index_col=0)
pdb_list = a["Protein"].to_list()
Out[66]:
['1xrd',
'5uiw',
'3kp9',
'2mi2',
'3e9j',
'6akg',
'5y83',
'2n7r',
'5tcx',
'4aw6',
'5d91',
'4zyo',
'5ktf',
'5vrh',
'5mm0',
'6bms']
In [67]:
picked
Out[67]:
Protein
Length
InMembraneRatio
id
ordering
family_name_cache
species_name_cache
membrane_name_cache
name
description
...
species_id
family_id
superfamily_id
classtype_id
type_id
secondary_representations_count
structure_subunits_count
citations_count
created_at
updated_at
0
1xrd
52
0.442308
1119
259.0
Light-harvesting complexes from bacteria
Rhodospirillum rubrum
Gram-neg. inner
Light-harvesting complex LH1, alpha chain
NaN
...
321
1
2
1
1
0
1
0
2018-08-13 03:51:23 UTC
2018-08-13 03:51:23 UTC
1
5uiw
365
0.452055
3468
96.0
G-protein coupled receptors, family A
Homo sapiens
Eykaryo. plasma
C-C chemokine receptor type 5, inactive state,...
NaN
...
14
14
6
1
1
0
1
0
2018-08-13 03:55:03 UTC
2018-11-08 23:33:44 UTC
2
3kp9
259
0.424710
4132
1361.0
Vitamin K epoxide reductase
Synechococcus sp.
Gram-neg. inner
Vitamin K epoxide reductase, structure 1
NaN
...
133
307
18
1
1
0
1
0
2018-11-06 17:10:58 UTC
2018-11-06 17:10:58 UTC
3
2mi2
104
0.317308
2443
1470.0
TatB protein
Escherichia coli
Gram-neg. inner
Sec-independent protein translocase protein TatB
NaN
...
9
699
63
1
1
0
1
0
2018-08-13 03:53:56 UTC
2018-08-13 03:53:56 UTC
4
3e9j
322
0.270186
2136
1341.0
Disulfide bond oxidoreductase-B (DsbB)
Escherichia coli
Gram-neg. inner
DsbB - DsbA complex, conformation 4
NaN
...
9
247
173
1
1
0
1
0
2018-08-13 03:53:35 UTC
2018-08-13 03:53:35 UTC
5
6akg
301
0.285714
4289
1015.0
Claudins
Mus musculus
Eykaryo. plasma
Claudin-3, structure 1
NaN
...
52
702
194
1
1
0
1
0
2019-02-23 21:34:55 UTC
2019-02-23 21:34:55 UTC
6
5y83
342
0.327485
3927
1226.0
OxaA/YidC
Thermotoga maritima
Gram-neg. inner
Membrane protein insertase YidC
NaN
...
61
293
202
1
1
0
1
0
2018-08-13 03:55:19 UTC
2018-08-13 03:55:19 UTC
7
2n7r
46
0.434783
3046
1398.0
Gamma-secretase
Homo sapiens
Endoplasm. reticulum
Nicastrin, TM helix
NaN
...
14
767
244
1
1
1
1
0
2018-08-13 03:54:37 UTC
2019-04-19 16:21:00 UTC
8
5tcx
206
0.412621
3253
1541.0
Tetraspanin
Homo sapiens
Eykaryo. plasma
CD81 antigen
NaN
...
14
567
327
1
1
0
1
0
2018-08-13 03:54:51 UTC
2018-08-13 03:54:51 UTC
9
4aw6
427
0.402810
1988
1449.0
Peptidase family M48
Homo sapiens
Endoplasm. reticulum
CAAX prenyl protease 1 homolog, structure 2
NaN
...
14
646
394
1
1
1
1
0
2018-08-13 03:53:22 UTC
2018-08-13 03:53:22 UTC
10
5d91
335
0.364179
2951
1501.0
Choline/ethanolamine phosphotransferase 1
Renibacterium salmoninarum
Gram-pos. inner
Phosphatidylinositolphosphate synthase, struct...
NaN
...
648
701
406
1
1
0
1
0
2018-08-13 03:54:31 UTC
2018-08-13 03:54:31 UTC
11
4zyo
298
0.476510
2848
1517.0
Fatty acid desaturase
Homo sapiens
Endoplasm. reticulum
Acyl-CoA desaturase
NaN
...
14
768
431
1
1
0
1
0
2018-08-13 03:54:25 UTC
2018-08-13 03:54:25 UTC
12
5ktf
73
0.410959
3353
1543.0
CD36 glycoprotein
Mus musculus
Eykaryo. plasma
Scavenger receptor B-1
NaN
...
52
840
456
1
1
0
1
0
2018-08-13 03:54:57 UTC
2018-08-13 03:54:57 UTC
13
5vrh
490
0.397959
3489
1550.0
Carbon-nitrogen hydrolase
Escherichia coli
Gram-neg. inner
Apolipoprotein N-acyl transferase, structure 3
NaN
...
9
866
466
1
1
1
1
0
2018-08-13 03:55:04 UTC
2018-11-06 22:23:30 UTC
14
5mm0
355
0.292958
3516
1554.0
Dolichyl-phosphate beta-glucosyltransferase
Pyrococcus furiosus
Archaebac.
Dolichyl phosphate mannose synthase, structure 2
NaN
...
305
870
470
1
1
0
1
0
2018-08-13 03:55:05 UTC
2018-08-13 03:55:05 UTC
15
6bms
282
0.368794
3652
1566.0
DHHC palmitoyltransferase
Danio rerio
Golgi
Palmitoyltransferase
NaN
...
129
886
476
1
1
0
1
0
2018-08-13 03:55:09 UTC
2018-08-13 03:55:09 UTC
16 rows × 34 columns
In [76]:
picked2
Out[76]:
Protein
Length
InMembraneRatio
id
ordering
family_name_cache
species_name_cache
membrane_name_cache
name
description
...
species_id
family_id
superfamily_id
classtype_id
type_id
secondary_representations_count
structure_subunits_count
citations_count
created_at
updated_at
0
2n7r
46
0.434783
3046
1398.0
Gamma-secretase
Homo sapiens
Endoplasm. reticulum
Nicastrin, TM helix
NaN
...
14
767
244
1
1
1
1
0
2018-08-13 03:54:37 UTC
2019-04-19 16:21:00 UTC
1
1jo5
48
0.520833
1320
263.0
Light-harvesting complexes from bacteria
Rhodobacter sphaeroides
Gram-neg. inner
Light-harvesting protein B-875, beta chain
NaN
...
31
1
2
1
1
1
1
0
2018-08-13 03:51:42 UTC
2018-08-13 03:51:42 UTC
2
5ktf
73
0.410959
3353
1543.0
CD36 glycoprotein
Mus musculus
Eykaryo. plasma
Scavenger receptor B-1
NaN
...
52
840
456
1
1
0
1
0
2018-08-13 03:54:57 UTC
2018-08-13 03:54:57 UTC
3
2moz
81
0.506173
2498
1347.0
MerF Mercuric ion uptake family
Morganella morganii
Gram-neg. inner
MerF bacterial mercury uptake transporter, str...
NaN
...
300
322
218
1
1
0
1
0
2018-08-13 03:53:59 UTC
2018-08-13 03:53:59 UTC
4
3zd0
85
0.517647
2254
1479.0
Nucleocapsid P7 protein
Hepatitis C virus
Endoplasm. reticulum
P7 protein (747-809), structure 1
NaN
...
328
674
396
1
1
0
1
0
2018-08-13 03:53:45 UTC
2018-08-13 03:53:45 UTC
5
2lor
97
0.525773
1962
1444.0
Transmembrane protein 141
Homo sapiens
Undefined
Transmembrane protein 141
NaN
...
14
641
392
1
1
0
1
0
2018-08-13 03:53:19 UTC
2018-08-13 03:53:19 UTC
6
2ksr
140
0.592857
321
956.0
Ligand-gated ion channel of neurotransmitter r...
Homo sapiens
Eykaryo. plasma
Nicotinic acetylcholine receptor, beta-2, in h...
NaN
...
14
22
14
1
1
0
1
0
2018-08-13 03:50:09 UTC
2018-08-13 03:50:09 UTC
7
3wkv
140
0.571429
2381
805.0
Voltage-sensing proton channel
Mus musculus
Eykaryo. plasma
Voltage-gated sensor domain of proton channel Hv1
NaN
...
52
806
8
1
1
0
1
0
2018-08-13 03:53:54 UTC
2018-08-13 03:53:54 UTC
8
4p79
173
0.497110
2447
1013.0
Claudins
Mus musculus
Eykaryo. plasma
Claudin-15
NaN
...
52
702
194
1
1
0
1
0
2018-08-13 03:53:56 UTC
2018-08-13 03:53:56 UTC
9
2k74
183
0.530055
819
1335.0
Disulfide bond oxidoreductase-B (DsbB)
Escherichia coli
Gram-neg. inner
Disulfide bond formation protein B, conformati...
NaN
...
9
247
173
1
1
1
1
0
2018-08-13 03:50:49 UTC
2018-08-13 03:50:49 UTC
10
4a2n
192
0.567708
1655
1374.0
Isoprenylcysteine carboxyl methyltransferase (...
Methanosarcina acetivorans
Archaebac.
Integral Membrane Methyltransferase
NaN
...
252
471
159
1
1
0
1
0
2018-08-13 03:52:00 UTC
2018-08-13 03:52:00 UTC
11
5tcx
206
0.412621
3253
1541.0
Tetraspanin
Homo sapiens
Eykaryo. plasma
CD81 antigen
NaN
...
14
567
327
1
1
0
1
0
2018-08-13 03:54:51 UTC
2018-08-13 03:54:51 UTC
12
4b4a
221
0.588235
2040
1465.0
TatC
Aquifex aeolicus
Gram-neg. inner
Sec-independent protein translocase TatC
NaN
...
57
484
63
1
1
2
1
0
2018-08-13 03:53:26 UTC
2018-08-13 03:53:26 UTC
13
3wo7
224
0.575893
2426
1227.0
OxaA/YidC
Bacillus halodurans
Gram-pos. inner
Membrane protein insertase YidC
NaN
...
550
293
202
1
1
1
1
0
2018-08-13 03:53:56 UTC
2018-08-13 03:53:56 UTC
14
3tx3
227
0.519824
1355
1351.0
Sulfate transporter (CysZ)
Idiomarina loihiensis
Gram-neg. inner
Putative sulfate permease CysZ
NaN
...
356
389
267
1
1
0
1
0
2018-08-13 03:51:45 UTC
2018-08-13 03:51:45 UTC
15
3ddl
252
0.587302
823
46.0
Microbial and algal rhodopsins
Salinibacter ruber
Gram-neg. inner
Xanthorhodopsin
NaN
...
256
13
6
1
1
0
1
0
2018-08-13 03:50:49 UTC
2018-08-13 03:50:49 UTC
16
5jwy
254
0.535433
2453
1505.0
Transmembrane lipid phosphatase
Escherichia coli
Gram-neg. inner
Phosphatidylglycerophosphatase
NaN
...
9
704
409
1
1
1
1
0
2018-08-13 03:53:57 UTC
2018-10-28 03:10:41 UTC
17
3kp9
259
0.424710
4132
1361.0
Vitamin K epoxide reductase
Synechococcus sp.
Gram-neg. inner
Vitamin K epoxide reductase, structure 1
NaN
...
133
307
18
1
1
0
1
0
2018-11-06 17:10:58 UTC
2018-11-06 17:10:58 UTC
18
5xpd
269
0.539033
3548
1326.0
Eukaryotic SWEET transporters
Arabidopsis thaliana
Eykaryo. plasma
Bidirectional sugar transporter SWEET13, inwar...
NaN
...
246
789
415
1
1
0
1
0
2018-08-13 03:55:06 UTC
2018-11-13 03:35:09 UTC
19
4zr1
274
0.489051
2890
1519.0
Fatty acid hydroxylase
Saccharomyces cerevisiae
Endoplasm. reticulum
Ceramide fatty acid hydroxylase SCS7
NaN
...
36
779
431
1
1
1
1
0
2018-08-13 03:54:27 UTC
2018-08-13 03:54:27 UTC
20
6gci
292
0.582192
4231
1248.0
ADP/ATP carrier
Myceliophthora thermophila
Mitochon. inner
Mitochondrial ADP-ATP carrier
NaN
...
814
29
21
1
1
0
1
0
2019-01-30 13:47:49 UTC
2019-01-30 13:47:49 UTC
21
6a2j
309
0.576052
4201
338.0
Heme A synthase
Bacillus subtilis
Gram-pos. inner
Heme A synthase
NaN
...
89
971
92
1
1
1
1
0
2018-12-25 17:30:58 UTC
2018-12-25 17:30:58 UTC
22
4jr9
409
0.599022
2187
1116.0
Nitrate/nitrite porter
Escherichia coli
Gram-neg. inner
Nitrate/nitrite exchanger NarK, partially occl...
NaN
...
9
666
15
1
1
1
1
0
2018-08-13 03:53:39 UTC
2018-08-13 03:53:39 UTC
23
4il3
422
0.424171
2066
1450.0
Peptidase family M48
Saccharomyces mikatae
Endoplasm. reticulum
CaaX Protease Ste24p
NaN
...
517
646
394
1
1
0
1
0
2018-08-13 03:53:29 UTC
2018-08-13 03:53:29 UTC
24
6ids
438
0.586758
4254
1292.0
Multi antimicrobial extrusion (MATE) family
Vibrio cholerae
Gram-neg. inner
MATE transporter VcmN, structure 3
NaN
...
45
327
220
1
1
0
1
0
2019-01-30 18:13:02 UTC
2019-01-30 18:13:02 UTC
25
5n6m
491
0.419552
3488
1549.0
Carbon-nitrogen hydrolase
Escherichia coli
Gram-neg. inner
Apolipoprotein N-acyl transferase, structure 2
NaN
...
9
866
466
1
1
0
1
0
2018-08-13 03:55:04 UTC
2018-08-13 03:55:04 UTC
26
6bug
492
0.585366
4049
1579.0
Membrane-bound O-acyltransferase
Streptococcus thermophilus
Gram-neg. outer
D-alanyl transfer protein DltB, with D-alanyl ...
NaN
...
565
924
493
1
1
2
1
0
2018-10-12 16:46:37 UTC
2018-10-18 16:48:56 UTC
27
4dji
493
0.598377
1938
1153.0
Amino acid-Polyamine-Organocation (APC) family
Escherichia coli
Gram-neg. inner
Glutamate/gamma-aminobutyrate antiporter
NaN
...
9
281
64
1
1
1
1
0
2018-08-13 03:53:18 UTC
2018-08-13 03:53:18 UTC
28 rows × 34 columns
In [ ]:
Content source: luwei0917/awsemmd_script
Similar notebooks: