In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import os
# from small_script.myFunctions import *
from Bio.PDB.Polypeptide import three_to_one


%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
plt.rcParams['figure.figsize'] = [16.18033, 10]    #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100

In [3]:
data = pd.read_csv("/Users/weilu/Research/database/membrane_training_set/proteins-2019-05-01.csv")
data.pdbid = data.pdbid.apply(lambda x: x[2:-1])

In [4]:
info = pd.read_csv("/Users/weilu/Research/database/membrane_contact_dtabase/for_iter0_training_complete_jun06.csv", index_col=0)
info = info.drop_duplicates().reset_index(drop=True)

In [5]:
data["Protein"] = data["pdbid"]

In [6]:
info.shape


Out[6]:
(1560, 3)

In [7]:
len(info.Protein.unique())


Out[7]:
1560

In [8]:
d = data.query("classtype_id == 1").reset_index(drop=True)
d = d.drop_duplicates(subset="Protein").reset_index(drop=True)

In [9]:
d.shape


Out[9]:
(1592, 32)

In [10]:
d = d.merge(info, on="Protein")

In [11]:
d.shape


Out[11]:
(1560, 34)

In [24]:
d.query("Protein == '5mg3'").T


Out[24]:
926
id 3306
ordering 560
family_name_cache Protein translocase
species_name_cache Escherichia coli
membrane_name_cache Gram-neg. inner
name Holo-translocon
description NaN
comments NaN
pdbid 5mg3
resolution 14.0
topology_subunit Y
topology_show_in True
thickness 30.6
thicknesserror 0.8
subunit_segments 29
tilt 5
tilterror 0
gibbs -123.2
tau NaN
verification NaN
membrane_id 2
species_id 9
family_id 27
superfamily_id 19
classtype_id 1
type_id 1
secondary_representations_count 1
structure_subunits_count 6
citations_count 0
created_at 2018-08-13 03:54:54 UTC
updated_at 2018-08-13 03:54:54 UTC
Protein 5mg3
Length 1698
InMembraneRatio 0.36808

In [13]:
d.query("Length > 500").shape


Out[13]:
(1065, 34)

In [19]:
d.columns


Out[19]:
Index(['id', 'ordering', 'family_name_cache', 'species_name_cache',
       'membrane_name_cache', 'name', 'description', 'comments', 'pdbid',
       'resolution', 'topology_subunit', 'topology_show_in', 'thickness',
       'thicknesserror', 'subunit_segments', 'tilt', 'tilterror', 'gibbs',
       'tau', 'verification', 'membrane_id', 'species_id', 'family_id',
       'superfamily_id', 'classtype_id', 'type_id',
       'secondary_representations_count', 'structure_subunits_count',
       'citations_count', 'created_at', 'updated_at', 'Protein', 'Length',
       'InMembraneRatio'],
      dtype='object')

In [18]:
d.query("Length > 500").hist("InMembraneRatio", bins=50)


Out[18]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x1a1643ae48>]],
      dtype=object)

In [17]:
d.hist("InMembraneRatio", bins=50)


Out[17]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x1a17106630>]],
      dtype=object)

In [22]:
d.query("structure_subunits_count == 1 and InMembraneRatio < 0.46 and InMembraneRatio > 0.2 and Length > 500").sort_values("InMembraneRatio")[['pdbid', 'subunit_segments'
                                                                                                                                               , 'type_id','secondary_representations_count', 'structure_subunits_count']]


Out[22]:
pdbid subunit_segments type_id secondary_representations_count structure_subunits_count
513 4ksd 12 1 0 1
1132 6bhu 17 1 0 1
334 3vg9 7 1 1 1
910 5uar 12 1 0 1
1151 6c0v 12 1 0 1
1039 5w81 12 1 0 1
1431 6msm 12 1 0 1
951 5uja 17 1 0 1
1163 6fn4 12 1 0 1
1462 6q81 12 1 0 1
1201 5ywd 12 1 0 1
1235 6gdi 12 1 0 1
510 4lsg 12 1 0 1
919 5uak 12 1 0 1
950 5uj9 17 1 0 1
1379 4xwk 12 1 0 1
1200 5yw7 12 1 0 1
546 4m1m 12 1 2 1
1162 6fn1 12 1 0 1
1270 6dmy 12 1 0 1
906 5ko2 12 1 2 1
907 5kpi 12 1 0 1
1354 6d3r 12 1 1 1
704 4q9l 12 1 0 1
594 3wmg 6 1 0 1
512 4ksc 12 1 2 1
908 5kpj 12 1 0 1
63 2zbd 10 1 1 1
706 4q9j 12 1 0 1
178 3g5u 12 1 2 1
... ... ... ... ... ...
637 4umv 8 1 0 1
1069 5wo7 6 1 1 1
850 5l7i 7 1 0 1
269 4lde 7 1 4 1
1234 6d32 7 1 1 1
1140 5yqz 7 1 0 1
552 4nab 10 1 0 1
1362 6mxt 7 1 0 1
1089 5u74 12 1 0 1
1078 5u73 12 1 2 1
812 4zjc 7 1 1 1
1455 6ajf 12 1 1 1
1456 6ajg 12 1 2 1
816 5ejz 8 1 2 1
1385 5gmy 13 1 0 1
313 3tt3 12 1 0 1
992 5xap 12 1 0 1
991 5xan 12 1 0 1
536 3waj 13 1 1 1
290 3aqp 12 1 0 1
1153 5yhf 12 1 0 1
990 5xam 12 1 0 1
1495 6n3t 12 1 1 1
1091 5ogl 13 1 1 1
61 1wpg 10 1 0 1
297 3rce 13 1 0 1
1029 5n6h 8 1 1 1
335 3ayf 14 1 1 1
1168 6fwf 14 1 0 1
1511 6nt4 24 1 0 1

106 rows × 5 columns


In [20]:
d.query("InMembraneRatio < 0.46 and InMembraneRatio > 0.2 and Length > 500").sort_values("InMembraneRatio")[['pdbid', 'type_id','secondary_representations_count', 'structure_subunits_count']]


Out[20]:
pdbid type_id secondary_representations_count structure_subunits_count
1116 6bpq 1 0 4
306 3sya 1 3 4
528 3wgu 1 1 3
164 5aji 1 4 7
1387 6hco 1 0 2
921 5tji 1 0 4
982 5x41 1 0 2
1215 6f0k 1 0 4
513 4ksd 1 0 1
1158 6be1 1 0 5
220 3kdp 1 0 3
581 4mrs 1 4 2
1187 5ylv 1 1 2
1395 6cvl 1 0 2
915 5tj6 1 0 4
1132 6bhu 1 0 1
527 4hqj 1 0 3
1388 6his 1 0 5
1392 6hiq 1 1 5
1505 6a6m 1 0 2
334 3vg9 1 1 1
910 5uar 1 0 1
479 4huq 1 0 2
1216 6btm 1 0 4
391 2yn9 1 0 2
1151 6c0v 1 0 1
187 2zxe 1 21 3
815 4xe5 1 0 3
274 4hyt 1 2 3
689 4res 1 0 3
... ... ... ... ...
1008 5mdx 1 0 48
533 3j45 1 0 3
335 3ayf 1 1 1
1328 5z1f 1 0 2
555 4chw 1 0 4
1502 6igz 1 0 20
15 1lgh 1 0 16
38 1yce 1 1 11
1180 6c96 1 0 2
1551 6ijo 1 0 20
1129 6bgi 1 0 2
540 4jcb 1 1 31
128 2bhw 1 0 3
541 4jc9 1 0 32
354 2yev 1 0 3
20 1m56 1 11 4
97 3pjs 1 0 4
1457 6h8k 1 0 28
1181 6c9a 1 0 2
1211 5yq7 1 0 32
204 3h90 1 0 2
1330 6e1m 1 1 2
23 1fft 1 0 3
1168 6fwf 1 0 1
490 3zk1 1 1 11
1067 5v8k 1 0 2
556 4chv 1 0 4
1318 6c70 1 0 4
186 3eff 1 0 4
1511 6nt4 1 0 1

551 rows × 4 columns


In [14]:
d.query("InMembraneRatio < 0.46 and InMembraneRatio > 0.2 and Length > 500").sort_values("InMembraneRatio")


Out[14]:
id ordering family_name_cache species_name_cache membrane_name_cache name description comments pdbid resolution ... classtype_id type_id secondary_representations_count structure_subunits_count citations_count created_at updated_at Protein Length InMembraneRatio
1116 3626 790.0 Polycystin cation channel Ficedula albicollis Eykaryo. plasma TRPM8 channel NaN NaN 6bpq 4.10 ... 1 1 0 4 0 2018-08-13 03:55:09 UTC 2018-08-13 03:55:09 UTC 6bpq 3072 0.201172
306 1343 700.0 Inward rectifier potassium channels Mus musculus Eykaryo. plasma G protein-activated inward rectifier potassium... NaN NaN 3sya 2.98 ... 1 1 3 4 0 2018-08-13 03:51:42 UTC 2018-08-13 03:51:42 UTC 3sya 1312 0.201220
528 2279 452.0 P-ATPase Sus scrofa Eykaryo. plasma Sodium-potassium pump, Na+ bound E1P preceedin... NaN NaN 3wgu 2.80 ... 1 1 1 3 0 2018-08-13 03:53:49 UTC 2018-08-13 03:53:49 UTC 3wgu 1331 0.201352
164 813 1006.0 Small conductance mechanosensitive ion channel... Escherichia coli Gram-neg. inner Mechanosensitive channel protein MscS, open state NaN NaN 5aji 2.99 ... 1 1 4 7 0 2018-08-13 03:50:49 UTC 2018-08-13 03:50:49 UTC 5aji 1802 0.201443
1387 4138 529.0 ABC transporter G family Homo sapiens Eykaryo. plasma ABC transporter ABCG2, structure 6 NaN NaN 6hco 3.58 ... 1 1 0 2 0 2018-11-07 23:56:06 UTC 2018-11-07 23:56:06 UTC 6hco 1576 0.201777
921 3298 618.0 Slo potassium channels Aplysia californica Eykaryo. plasma High conductance calcium-activated potassium c... NaN NaN 5tji 3.80 ... 1 1 0 4 0 2018-08-13 03:54:54 UTC 2018-08-13 03:54:54 UTC 5tji 3528 0.202381
982 3412 1315.0 Cobalt uptake transporter Rhodobacter capsulatus Gram-neg. inner CbiMQO-complex, structure 2 NaN Tilt angles were calculated as for non-TM subu... 5x41 3.47 ... 1 1 0 2 0 2018-08-13 03:55:00 UTC 2018-08-13 03:55:00 UTC 5x41 988 0.202429
1215 3850 298.0 Polysulfide reductase Rhodothermus marinus Gram-neg. inner Polysulphide reductase complex NaN NaN 6f0k 3.87 ... 1 1 0 4 0 2018-08-13 03:55:16 UTC 2018-08-13 03:55:16 UTC 6f0k 2504 0.204473
513 2240 515.0 Multidrug resistance exporter (MDR) Mus musculus Eykaryo. plasma P-glycoprotein, inward-facing conformation 2c NaN NaN 4ksd 4.10 ... 1 1 0 1 0 2018-08-13 03:53:44 UTC 2018-08-13 03:53:44 UTC 4ksd 1300 0.204615
1158 3705 925.0 Ligand-gated ion channel of neurotransmitter r... Mus musculus Eykaryo. plasma 5-hydroxytryptamine receptor 3A, structure 2 NaN NaN 6be1 4.31 ... 1 1 0 5 0 2018-08-13 03:55:11 UTC 2018-08-13 03:55:11 UTC 6be1 1995 0.205013
220 973 446.0 P-ATPase Sus scrofa Eykaryo. plasma Sodium-potassium pump, E2P state, conformation 3 NaN NaN 3kdp 3.50 ... 1 1 0 3 0 2018-08-13 03:51:04 UTC 2018-08-13 03:51:04 UTC 3kdp 1311 0.205187
581 2393 478.0 ABC transporter B family (ABCB) Novosphingobium aromaticivorans Gram-neg. inner ABC transporter related protein NaN NaN 4mrs 2.35 ... 1 1 4 2 0 2018-08-13 03:53:55 UTC 2018-08-13 03:53:55 UTC 4mrs 1174 0.205281
1187 3806 450.0 P-ATPase Sus scrofa Eykaryo. plasma Sodium-potassium pump, E2P state, conformation 7 NaN NaN 5ylv 2.80 ... 1 1 1 2 0 2018-08-13 03:55:14 UTC 2018-08-13 03:55:14 UTC 5ylv 1247 0.205293
1395 4148 474.0 Binding-protein-dependent transport system Escherichia coli Gram-neg. inner Methionine importer MetNI, with MetQ NaN NaN 6cvl 2.95 ... 1 1 0 2 0 2018-11-16 01:53:03 UTC 2018-11-16 01:53:03 UTC 6cvl 1343 0.205510
915 3286 617.0 Slo potassium channels Aplysia californica Eykaryo. plasma High conductance calcium-activated potassium c... NaN NaN 5tj6 3.50 ... 1 1 0 4 0 2018-08-13 03:54:53 UTC 2018-08-13 03:54:53 UTC 5tj6 3560 0.205618
1132 3650 539.0 Drug conjugate transporter (ABC C family) Bos taurus Eykaryo. plasma Multidrug resistance protein 1 (MRP1), outward... NaN NaN 6bhu 3.14 ... 1 1 0 1 0 2018-08-13 03:55:09 UTC 2018-11-13 03:56:08 UTC 6bhu 1208 0.206126
527 2274 453.0 P-ATPase Sus scrofa Eykaryo. plasma Sodium-potassium pump, Na+ bound state NaN NaN 4hqj 4.30 ... 1 1 0 3 0 2018-08-13 03:53:49 UTC 2018-08-13 03:53:49 UTC 4hqj 1297 0.206631
1388 4139 926.0 Ligand-gated ion channel of neurotransmitter r... Mus musculus Eykaryo. plasma 5-hydroxytryptamine receptor 3A, structure 3 NaN NaN 6his 4.50 ... 1 1 0 5 0 2018-11-08 00:05:13 UTC 2018-11-08 00:12:33 UTC 6his 1935 0.206718
1392 4143 930.0 Ligand-gated ion channel of neurotransmitter r... Mus musculus Eykaryo. plasma 5-hydroxytryptamine receptor 3A, structure 7 NaN NaN 6hiq 4.50 ... 1 1 1 5 0 2018-11-08 00:14:47 UTC 2018-11-08 00:14:47 UTC 6hiq 1925 0.206753
1505 4291 485.0 ABC transporter B family (ABCB) Cyanidoschyzon merolae Eykaryo. plasma ATP-binding transporter CmABCB1, outward-open ... NaN NaN 6a6m 1.90 ... 1 1 0 2 0 2019-02-23 21:39:15 UTC 2019-02-23 21:39:15 UTC 6a6m 1178 0.207131
334 1925 69.0 G-protein coupled receptors, family A Homo sapiens Eykaryo. plasma Adenosine receptor A2a, inactive, with antibody NaN NaN 3vg9 2.70 ... 1 1 1 1 0 2018-08-13 03:53:17 UTC 2018-11-08 23:11:07 UTC 3vg9 733 0.207367
910 3279 535.0 Drug conjugate transporter (ABC C family) Danio rerio Endosome Cystic fibrosis transmembrane conductance regu... NaN NaN 5uar 3.73 ... 1 1 0 1 0 2018-08-13 03:54:53 UTC 2018-08-13 03:54:53 UTC 5uar 1184 0.208615
479 2173 1311.0 Energy-coupling factor transporters Lactobacillus brevis Gram-pos. inner Energy-coupling factor transporter EcfA, confo... NaN NaN 4huq 3.00 ... 1 1 0 2 0 2018-08-13 03:53:38 UTC 2018-08-13 03:53:38 UTC 4huq 968 0.208678
1216 3851 297.0 Polysulfide reductase Flavobacterium johnsoniae Gram-neg. inner Polysulphide reductase complex NaN NaN 6btm 3.40 ... 1 1 0 4 0 2018-08-13 03:55:16 UTC 2018-08-13 03:55:16 UTC 6btm 2361 0.208810
391 2034 448.0 P-ATPase Sus scrofa Eykaryo. plasma Sodium-potassium pump, E2P state, conformation 5 NaN NaN 2yn9 8.0 ... 1 1 0 2 0 2018-08-13 03:53:26 UTC 2018-08-13 03:53:26 UTC 2yn9 1239 0.209040
1151 3684 542.0 Drug conjugate transporter (ABC C family) Homo sapiens Eykaryo. plasma Multidrug resistance protein 1, outward-facing... NaN NaN 6c0v 3.40 ... 1 1 0 1 0 2018-08-13 03:55:10 UTC 2018-11-13 03:51:35 UTC 6c0v 1154 0.209705
187 864 443.0 P-ATPase Squalus acanthias Eykaryo. plasma Sodium-potassium pump, E2P state NaN NaN 2zxe 2.40 ... 1 1 21 3 0 2018-08-13 03:50:53 UTC 2018-08-13 03:50:53 UTC 2zxe 1296 0.209877
815 3019 451.0 P-ATPase Bos taurus Eykaryo. plasma Sodium-potassium pump, E2 state NaN NaN 4xe5 3.90 ... 1 1 0 3 0 2018-08-13 03:54:34 UTC 2018-08-13 03:54:34 UTC 4xe5 1307 0.210406
274 1107 445.0 P-ATPase Sus scrofa Eykaryo. plasma Sodium-potassium pump, E2P state, conformation 2 NaN NaN 4hyt 3.40 ... 1 1 2 3 0 2018-08-13 03:51:22 UTC 2018-11-06 18:47:05 UTC 4hyt 1316 0.210486
689 2745 447.0 P-ATPase Sus scrofa Eykaryo. plasma Sodium-potassium pump, E2P state, conformation 4 NaN NaN 4res 3.41 ... 1 1 0 3 0 2018-08-13 03:54:17 UTC 2018-08-13 03:54:17 UTC 4res 1315 0.211407
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1008 3461 252.0 Photosystem II Arabidopsis thaliana Thylakoid PSII-LHCII supercomplex NaN NaN 5mdx 5.3 ... 1 1 0 48 0 2018-08-13 03:55:03 UTC 2018-08-13 03:55:03 UTC 5mdx 8271 0.415186
533 2286 587.0 Protein translocase Escherichia coli Gram-neg. inner Ribosome-SecYE complex, structure 2 NaN NaN 3j45 9.9 ... 1 1 0 3 0 2018-08-13 03:53:50 UTC 2018-08-13 03:53:50 UTC 3j45 820 0.415854
335 1926 355.0 Cytochrome c oxidases Bacillus stearothermophilus Gram-pos. inner Nitric oxide reductase NaN NaN 3ayf 2.50 ... 1 1 1 1 0 2018-08-13 03:53:17 UTC 2018-08-13 03:53:17 UTC 3ayf 754 0.417772
1328 4004 1057.0 Mechanosensitive OSCA channels Arabidopsis thaliana Eykaryo. plasma OSCA3.1 channel NaN NaN 5z1f 4.80 ... 1 1 0 2 0 2018-09-14 20:57:18 UTC 2018-11-16 02:15:46 UTC 5z1f 1202 0.418469
555 2324 621.0 Cyclic nucleotide-gated ion channel Rhizobium loti Gram-neg. inner Bacterial cyclic nucleotide regulated ion chan... NaN NaN 4chw 7.0 ... 1 1 0 4 0 2018-08-13 03:53:51 UTC 2018-08-13 03:53:51 UTC 4chw 1372 0.419825
1502 4288 234.0 Photosystem I Bryopsis corticulans Thylakoid Photosystem I of algae NaN The hydrophobic thickness, transfer energy and... 6igz 3.49 ... 1 1 0 20 0 2019-02-23 21:28:11 UTC 2019-02-23 21:31:20 UTC 6igz 4518 0.420761
15 45 258.0 Light-harvesting complexes from bacteria Rhodospirillum molischianum Gram-neg. inner Light-harvesting complex NaN NaN 1lgh 2.40 ... 1 1 0 16 0 2018-08-13 03:49:49 UTC 2018-08-13 03:49:49 UTC 1lgh 792 0.424242
38 69 389.0 V-type and F-type ATPases Ilyobacter tartaricus Gram-neg. inner F-type Sodium ATPase NaN Hydrophobic boundaries expand when calculated ... 1yce 2.40 ... 1 1 1 11 2 2018-08-13 03:49:51 UTC 2018-08-13 03:49:51 UTC 1yce 979 0.426966
1180 3794 645.0 Two pore Ca2+ channels Mus musculus Endosome Two pore calcium channel TPC1, structure 1 NaN NaN 6c96 3.40 ... 1 1 0 2 0 2018-08-13 03:55:14 UTC 2018-08-13 03:55:14 UTC 6c96 1446 0.427386
1551 4376 246.0 Photosystem I Chlamydomonas reinhardtii Thylakoid Photosystem I, with light-harvesting complex I... NaN NaN 6ijo 3.3 ... 1 1 0 20 0 2019-03-30 03:11:48 UTC 2019-03-30 03:11:48 UTC 6ijo 4382 0.428571
1129 3647 1044.0 Apoctamin (TMEM16) family Mus musculus Eykaryo. plasma Chloride channel TMEM16A, structure 3 NaN NaN 6bgi 3.80 ... 1 1 0 2 0 2018-08-13 03:55:09 UTC 2018-08-13 03:55:09 UTC 6bgi 1020 0.430392
540 2296 256.0 Bacterial photosystems Rhodobacter sphaeroides Gram-neg. inner Reaction center-LH1-PufX dimer complex, unit 1 NaN NaN 4jcb 7.78 ... 1 1 1 31 0 2018-08-13 03:53:50 UTC 2018-08-13 03:53:50 UTC 4jcb 2152 0.430762
128 675 266.0 Light-harvesting complexes from chloroplasts Pisum sativum Thylakoid Light-Harvesting Complex II NaN NaN 2bhw 2.5 ... 1 1 0 3 0 2018-08-13 03:50:40 UTC 2018-08-13 03:50:40 UTC 2bhw 669 0.434978
541 2297 257.0 Bacterial photosystems Rhodobacter sphaeroides Gram-neg. inner Reaction center-LH1-PufX dimer complex, unit 2 NaN NaN 4jc9 7.78 ... 1 1 0 32 0 2018-08-13 03:53:50 UTC 2018-08-13 03:53:50 UTC 4jc9 2152 0.436803
354 1959 345.0 Cytochrome c oxidases Thermus thermophilus Gram-neg. inner Bacterial cytochrome c oxidase, caa3-type NaN NaN 2yev 2.36 ... 1 1 0 3 0 2018-08-13 03:53:19 UTC 2018-08-13 03:53:19 UTC 2yev 1162 0.438038
20 50 348.0 Cytochrome c oxidases Rhodobacter sphaeroides Gram-neg. inner Bacterial cytochrome c oxidase, with C subunit NaN NaN 1m56 2.30 ... 1 1 11 4 0 2018-08-13 03:49:50 UTC 2018-11-06 16:10:56 UTC 1m56 1114 0.438061
97 315 603.0 KcsA voltage-gated K+ channels Streptomyces lividans Gram-pos. inner Potassium channel KcsA, full length, open NaN NaN 3pjs 3.8 ... 1 1 0 4 0 2018-08-13 03:50:09 UTC 2018-08-13 03:50:09 UTC 3pjs 556 0.438849
1457 4239 292.0 H+ or Na+ translocating NADH dehydrogenase Yarrowia lipolytica Mitochon. inner Respiratory complex I, structure 3 NaN There are problems with defining names of subu... 6h8k 3.79 ... 1 1 0 28 0 2019-01-30 14:56:17 UTC 2019-01-30 14:59:03 UTC 6h8k 2442 0.438984
1181 3795 647.0 Two pore Ca2+ channels Mus musculus Endosome Two pore calcium channel TPC1, structure 2 NaN NaN 6c9a 3.20 ... 1 1 0 2 0 2018-08-13 03:55:14 UTC 2018-08-13 03:55:14 UTC 6c9a 1446 0.439834
1211 3845 255.0 Bacterial photosystems Roseiflexus castenholzii Gram-neg. inner LH-RC complex NaN NaN 5yq7 4.1 ... 1 1 0 32 0 2018-08-13 03:55:16 UTC 2018-08-13 03:55:16 UTC 5yq7 2107 0.440911
204 923 1177.0 Bacterial zinc transporters Escherichia coli Gram-neg. inner Ferrous-iron efflux pump fieF NaN NaN 3h90 2.90 ... 1 1 0 2 0 2018-08-13 03:50:58 UTC 2018-08-13 03:50:58 UTC 3h90 566 0.441696
1330 4012 646.0 Two pore Ca2+ channels Arabidopsis thaliana Vacuole Two pore calcium channel TPC1, structure 2 NaN NaN 6e1m 3.30 ... 1 1 1 2 0 2018-10-02 18:22:55 UTC 2018-10-18 00:10:23 UTC 6e1m 1066 0.443715
23 53 357.0 Cytochrome c oxidases Escherichia coli Gram-neg. inner Ubiquinol Oxidase NaN NaN 1fft 3.50 ... 1 1 0 3 0 2018-08-13 03:49:50 UTC 2018-08-13 03:49:50 UTC 1fft 943 0.446448
1168 3735 352.0 Cytochrome c oxidases Neisseria meningitidis Gram-neg. inner Nitric-oxide reductase NaN NaN 6fwf 4.20 ... 1 1 0 1 0 2018-08-13 03:55:11 UTC 2018-08-13 03:55:11 UTC 6fwf 712 0.448034
490 2191 373.0 V-type and F-type ATPases Fusobacterium nucleatum Gram-neg. inner F0 ATP synthase NaN NaN 3zk1 2.20 ... 1 1 1 11 0 2018-08-13 03:53:40 UTC 2018-08-13 03:53:40 UTC 3zk1 979 0.449438
1067 3545 231.0 Photosynthetic reaction centers from bacteria Heliobacterium modesticaldum Gram-pos. inner Photosynthetic reaction center-photosystem NaN The arrangement in membrane was calculated wit... 5v8k 2.20 ... 1 1 0 2 0 2018-08-13 03:55:06 UTC 2018-08-13 03:55:06 UTC 5v8k 625 0.449600
556 2325 622.0 Cyclic nucleotide-gated ion channel Rhizobium loti Gram-neg. inner Bacterial cyclic nucleotide regulated ion chan... NaN NaN 4chv 7.0 ... 1 1 0 4 0 2018-08-13 03:53:51 UTC 2018-08-13 03:53:51 UTC 4chv 1372 0.451895
1318 3982 1578.0 Odorant receptor channel Apocrypta bakeri Mitochon. inner Odorant receptor NaN NaN 6c70 3.50 ... 1 1 0 4 0 2018-09-04 22:41:14 UTC 2018-09-04 22:41:14 UTC 6c70 1552 0.453608
186 857 602.0 KcsA voltage-gated K+ channels Streptomyces lividans Gram-pos. inner Potassium channel KcsA, full-length, closed NaN NaN 3eff 3.80 ... 1 1 0 4 0 2018-08-13 03:50:53 UTC 2018-08-13 03:50:53 UTC 3eff 556 0.455036
1511 4297 669.0 Voltage-sensitive Na+ channel Nav Homo sapiens Eykaryo. plasma Nav channel, human-cockroach hybrid, with alph... NaN NaN 6nt4 3.5 ... 1 1 0 1 0 2019-02-23 22:04:26 UTC 2019-02-23 22:04:26 UTC 6nt4 1406 0.455903

551 rows × 34 columns


In [ ]:
["4nv6", "4p79", "5dsg", "6g7o", "6a93"]
["4zyo", "5n6m"]

In [ ]:
# two chains
["4rws", "4xt3", "6iu3", "6iu4"]

In [ ]:
# many, seems like two chain
["5uig"]

In [26]:
d.query("InMembraneRatio < 0.5 and InMembraneRatio > 0.4 and Length < 500")


Out[26]:
id ordering family_name_cache species_name_cache membrane_name_cache name description comments pdbid resolution ... classtype_id type_id secondary_representations_count structure_subunits_count citations_count created_at updated_at Protein Length InMembraneRatio
59 93 171.0 G-protein coupled receptors, family A Bos taurus Eykaryo. plasma Rhodopsin, inactive, with 11-cis retinal NaN Structures of intermediate states: bathorhodop... 1gzm 2.70 ... 1 1 4 1 6 2018-08-13 03:49:53 UTC 2018-11-09 00:12:32 UTC 1gzm 328 0.481707
133 704 173.0 G-protein coupled receptors, family A Bos taurus Eykaryo. plasma Rhodopsin, partially active, photobleached NaN This is probably a photobleached state, not me... 2i37 4.15 ... 1 1 0 1 0 2018-08-13 03:50:42 UTC 2018-08-13 03:50:42 UTC 2i37 317 0.492114
219 972 1362.0 Vitamin K epoxide reductase Synechococcus sp. Gram-neg. inner Vitamin K epoxide reductase, structure 2 NaN NaN 4nv6 4.19 ... 1 1 0 1 0 2018-08-13 03:51:04 UTC 2018-11-06 17:09:22 UTC 4nv6 264 0.405303
247 1023 177.0 G-protein coupled receptors, family A Todarodes pacificus Eykaryo. plasma Squid rhodopsin, inactive, with 11-cis retinal NaN NaN 2ziy 3.70 ... 1 1 2 1 0 2018-08-13 03:51:10 UTC 2018-11-09 00:14:27 UTC 2ziy 370 0.435135
268 1101 169.0 G-protein coupled receptors, family A Bos taurus Eykaryo. plasma Rhodopsin, inactive, structure 2 (with beta-io... NaN NaN 3oax 2.6 ... 1 1 2 1 0 2018-08-13 03:51:21 UTC 2018-08-13 03:51:21 UTC 3oax 348 0.456897
270 1103 165.0 G-protein coupled receptors, family A Bos taurus Eykaryo. plasma Rhodopsin, active, with transducin peptide NaN NaN 4x1h 2.29 ... 1 1 4 1 0 2018-08-13 03:51:21 UTC 2018-11-09 00:19:01 UTC 4x1h 337 0.465875
279 1119 259.0 Light-harvesting complexes from bacteria Rhodospirillum rubrum Gram-neg. inner Light-harvesting complex LH1, alpha chain NaN Structure in chloroform/methanol. 1xrd NMR ... 1 1 0 1 0 2018-08-13 03:51:23 UTC 2018-08-13 03:51:23 UTC 1xrd 52 0.442308
291 1242 63.0 G-protein coupled receptors, family A Homo sapiens Eykaryo. plasma Adenosine receptor A2a, engineered, intermedia... NaN NaN 2ydv 2.6 ... 1 1 1 1 0 2018-08-13 03:51:35 UTC 2018-11-08 23:06:48 UTC 2ydv 315 0.479365
368 1988 1449.0 Peptidase family M48 Homo sapiens Endoplasm. reticulum CAAX prenyl protease 1 homolog, structure 2 NaN NaN 4aw6 3.4 ... 1 1 1 1 0 2018-08-13 03:53:22 UTC 2018-08-13 03:53:22 UTC 4aw6 427 0.402810
410 2066 1450.0 Peptidase family M48 Saccharomyces mikatae Endoplasm. reticulum CaaX Protease Ste24p NaN NaN 4il3 3.10 ... 1 1 0 1 0 2018-08-13 03:53:29 UTC 2018-08-13 03:53:29 UTC 4il3 422 0.424171
417 2090 53.0 G-protein coupled receptors, family A Homo sapiens Eykaryo. plasma 5-hydroxytryptamine receptor 2B, structure 1, ... NaN NaN 4ib4 2.70 ... 1 1 3 1 0 2018-08-13 03:53:32 UTC 2018-11-08 23:01:37 UTC 4ib4 375 0.418667
418 2091 48.0 G-protein coupled receptors, family A Homo sapiens Eykaryo. plasma 5-hydroxytryptamine receptor 1B, structure 1, ... NaN NaN 4iar 2.70 ... 1 1 0 1 0 2018-08-13 03:53:32 UTC 2018-11-08 23:00:00 UTC 4iar 379 0.408971
419 2092 49.0 G-protein coupled receptors, family A Homo sapiens Eykaryo. plasma 5-hydroxytryptamine receptor 1B, structure 2, ... NaN NaN 4iaq 2.80 ... 1 1 0 1 0 2018-08-13 03:53:32 UTC 2018-11-08 23:00:41 UTC 4iaq 367 0.430518
421 2094 170.0 G-protein coupled receptors, family A Bos taurus Eykaryo. plasma Rhodopsin, inactive, structure 3 NaN NaN 1u19 2.20 ... 1 1 4 1 0 2018-08-13 03:53:32 UTC 2018-08-13 03:53:32 UTC 1u19 348 0.459770
562 2334 132.0 G-protein coupled receptors, family A Rattus norvegicus Eykaryo. plasma Neurotensin receptor type 1, structure 2, inac... NaN NaN 4buo 2.75 ... 1 1 3 1 0 2018-08-13 03:53:52 UTC 2018-11-08 23:58:04 UTC 4buo 310 0.496774
571 2346 1363.0 Vitamin K epoxide reductase Synechococcus sp. Gram-neg. inner Vitamin K epoxide reductase, structure 3 NaN NaN 4nv2 3.61 ... 1 1 1 1 0 2018-08-13 03:53:53 UTC 2018-11-06 17:08:44 UTC 4nv2 264 0.424242
601 2447 1013.0 Claudins Mus musculus Eykaryo. plasma Claudin-15 NaN NaN 4p79 2.40 ... 1 1 0 1 0 2018-08-13 03:53:56 UTC 2018-08-13 03:53:56 UTC 4p79 173 0.497110
655 2535 145.0 G-protein coupled receptors, family A Bos taurus Eykaryo. plasma Opsin, active, complex with arrestin peptide NaN NaN 4pxf 2.75 ... 1 1 1 1 0 2018-08-13 03:54:02 UTC 2018-11-09 00:20:41 UTC 4pxf 332 0.478916
696 2753 103.0 G-protein coupled receptors, family A Homo sapiens Eykaryo. plasma C-X-C chemokine receptor type 4, inactive stat... NaN NaN 4rws 3.10 ... 1 1 0 1 0 2018-08-13 03:54:18 UTC 2018-11-08 23:35:42 UTC 4rws 346 0.447977
700 2762 190.0 G-protein coupled receptors, family A Human herpesvirus Eykaryo. plasma Viral GPCR US28, active, with fractalkine NaN NaN 4xt3 3.80 ... 1 1 0 1 0 2018-08-13 03:54:19 UTC 2018-11-09 00:27:17 UTC 4xt3 356 0.426966
717 2808 70.0 G-protein coupled receptors, family A Homo sapiens Eykaryo. plasma Adenosine receptor A2a, intermediate state, wi... NaN NaN 4uhr 2.60 ... 1 1 1 1 0 2018-08-13 03:54:21 UTC 2018-11-08 23:12:24 UTC 4uhr 310 0.490323
735 2848 1517.0 Fatty acid desaturase Homo sapiens Endoplasm. reticulum Acyl-CoA desaturase NaN NaN 4zyo 3.25 ... 1 1 0 1 0 2018-08-13 03:54:25 UTC 2018-08-13 03:54:25 UTC 4zyo 298 0.476510
746 2890 1519.0 Fatty acid hydroxylase Saccharomyces cerevisiae Endoplasm. reticulum Ceramide fatty acid hydroxylase SCS7 NaN NaN 4zr1 2.60 ... 1 1 1 1 0 2018-08-13 03:54:27 UTC 2018-08-13 03:54:27 UTC 4zr1 274 0.489051
758 2914 1521.0 Fluoride exporter Bordetella pertussis Gram-neg. inner Fluoride ion transporter CrcB, structure 1 NaN This is a "dual topology" antiparallel dimer. 5a40 3.60 ... 1 1 1 2 0 2018-08-13 03:54:28 UTC 2018-08-13 03:54:28 UTC 5a40 429 0.475524
772 2936 1518.0 Fatty acid desaturase Mus musculus Endoplasm. reticulum Acyl-CoA desaturase NaN NaN 4ymk 2.61 ... 1 1 0 1 0 2018-08-13 03:54:29 UTC 2018-08-13 03:54:29 UTC 4ymk 322 0.422360
810 3013 128.0 G-protein coupled receptors, family A Homo sapiens Eykaryo. plasma Muscarinic acetylcholine receptor M4, inactive... NaN NaN 5dsg 2.6 ... 1 1 0 1 0 2018-08-13 03:54:34 UTC 2018-11-08 23:56:13 UTC 5dsg 392 0.408163
827 3046 1398.0 Gamma-secretase Homo sapiens Endoplasm. reticulum Nicastrin, TM helix NaN This is structure in DPC micelles; 2n7q is str... 2n7r NMR ... 1 1 1 1 0 2018-08-13 03:54:37 UTC 2019-04-19 16:21:00 UTC 2n7r 46 0.434783
897 3253 1541.0 Tetraspanin Homo sapiens Eykaryo. plasma CD81 antigen NaN NaN 5tcx 2.95 ... 1 1 0 1 0 2018-08-13 03:54:51 UTC 2018-08-13 03:54:51 UTC 5tcx 206 0.412621
935 3317 54.0 G-protein coupled receptors, family A Homo sapiens Eykaryo. plasma 5-hydroxytryptamine receptor 2B, structure 2, ... NaN NaN 5tvn 2.90 ... 1 1 1 1 0 2018-08-13 03:54:55 UTC 2018-11-08 23:02:41 UTC 5tvn 393 0.402036
936 3318 68.0 G-protein coupled receptors, family A Homo sapiens Eykaryo. plasma Adenosine receptor A2a, inactive, with antagonist NaN NaN 5uig 3.50 ... 1 1 0 1 0 2018-08-13 03:54:55 UTC 2018-11-08 23:14:23 UTC 5uig 387 0.410853
954 3353 1543.0 CD36 glycoprotein Mus musculus Eykaryo. plasma Scavenger receptor B-1 NaN The protein also has uncleaved N-terminal tran... 5ktf NMR ... 1 1 0 1 0 2018-08-13 03:54:57 UTC 2018-08-13 03:54:57 UTC 5ktf 73 0.410959
958 3365 172.0 G-protein coupled receptors, family A Bos taurus Eykaryo. plasma Rhodopsin, inactive, with cyclic retinal analog NaN NaN 5te5 4.01 ... 1 1 0 1 0 2018-08-13 03:54:58 UTC 2018-11-09 00:23:20 UTC 5te5 348 0.462644
964 3389 188.0 G-protein coupled receptors, family A Homo sapiens Eykaryo. plasma Type-2 angiotensin II receptor, active state, ... NaN NaN 5ung 2.80 ... 1 1 1 1 0 2018-08-13 03:54:59 UTC 2018-11-08 23:19:16 UTC 5ung 310 0.483871
997 3439 203.0 GPCR Secretin (B) family Homo sapiens Eykaryo. plasma Glucagon receptor, inactive state, full-length... NaN NaN 5xez 3.00 ... 1 1 1 1 0 2018-08-13 03:55:01 UTC 2018-11-09 01:04:56 UTC 5xez 389 0.401028
1006 3449 72.0 G-protein coupled receptors, family A Homo sapiens Eykaryo. plasma Apelin receptor, inactive state NaN See 2lot, 2low and 2lov for peptides in micell... 5vbl 2.60 ... 1 1 0 1 0 2018-08-13 03:55:02 UTC 2019-04-20 17:21:24 UTC 5vbl 311 0.482315
1015 3468 96.0 G-protein coupled receptors, family A Homo sapiens Eykaryo. plasma C-C chemokine receptor type 5, inactive state,... NaN NaN 5uiw 2.20 ... 1 1 0 1 0 2018-08-13 03:55:03 UTC 2018-11-08 23:33:44 UTC 5uiw 365 0.452055
1022 3477 200.0 GPCR Secretin (B) family Homo sapiens Eykaryo. plasma Glucagon-like peptide 1 receptor, intermediate... NaN NaN 5nx2 3.70 ... 1 1 0 1 0 2018-08-13 03:55:03 UTC 2018-11-09 01:02:48 UTC 5nx2 394 0.428934
1030 3488 1549.0 Carbon-nitrogen hydrolase Escherichia coli Gram-neg. inner Apolipoprotein N-acyl transferase, structure 2 NaN NaN 5n6m 3.10 ... 1 1 0 1 0 2018-08-13 03:55:04 UTC 2018-08-13 03:55:04 UTC 5n6m 491 0.419552
1175 3745 1567.0 Designed TM alpha-hairpin proteins Designed proteins Undefined Protein TMHC2_E NaN NaN 6b87 2.95 ... 1 1 0 2 0 2018-08-13 03:55:12 UTC 2018-08-13 03:55:12 UTC 6b87 200 0.420000
1179 3793 161.0 G-protein coupled receptors, family A Bos taurus Eykaryo. plasma Rhodopsin, active, open channel structure NaN NaN 6fk6 2.36 ... 1 1 7 1 0 2018-08-13 03:55:14 UTC 2018-11-09 00:24:18 UTC 6fk6 326 0.475460
1228 3865 1352.0 Sulfate transporter (CysZ) Pseudomonas fragi Gram-neg. inner Sulfate transporter CysZ NaN A dimer with dual topology was suggested for C... 6d79 3.50 ... 1 1 0 2 0 2018-08-13 03:55:16 UTC 2018-08-13 03:55:16 UTC 6d79 409 0.447433
1243 3884 86.0 G-protein coupled receptors, family A Homo sapiens Eykaryo. plasma C5a anaphylatoxin chemotactic receptor 1, inac... NaN NaN 6c1r 2.20 ... 1 1 1 1 0 2018-08-13 03:55:17 UTC 2018-11-08 23:22:54 UTC 6c1r 374 0.411765
1317 3981 208.0 Frizzled/Smoothened family Homo sapiens Eykaryo. plasma Frizzled-4 receptor, inactive state NaN NaN 6bd4 2.40 ... 1 1 0 1 0 2018-09-04 22:21:55 UTC 2018-11-09 01:08:36 UTC 6bd4 325 0.480000
1376 4126 176.0 G-protein coupled receptors, family A Todarodes pacificus Eykaryo. plasma Squid rhodopsin, inactive, lumi intermediate NaN NaN 4ww3 2.80 ... 1 1 0 1 0 2018-11-06 16:34:02 UTC 2018-11-09 00:21:29 UTC 4ww3 347 0.472622
1382 4132 1361.0 Vitamin K epoxide reductase Synechococcus sp. Gram-neg. inner Vitamin K epoxide reductase, structure 1 NaN NaN 3kp9 3.60 ... 1 1 0 1 0 2018-11-06 17:10:58 UTC 2018-11-06 17:10:58 UTC 3kp9 259 0.424710
1408 4170 110.0 G-protein coupled receptors, family A Homo sapiens Eykaryo. plasma Endothelin B receptor, intermediate state 2 NaN NaN 6igk 2.00 ... 1 1 0 1 0 2018-12-25 05:23:32 UTC 2018-12-25 05:23:32 UTC 6igk 332 0.463855
1409 4171 111.0 G-protein coupled receptors, family A Homo sapiens Eykaryo. plasma Endothelin B receptor, intermediate state 3 NaN NaN 6igl 2.70 ... 1 1 0 1 0 2018-12-25 05:24:30 UTC 2018-12-25 05:24:30 UTC 6igl 323 0.482972
1449 4228 225.0 Ceramidase Homo sapiens Endoplasm. reticulum Alkaline ceramidase 3 NaN NaN 6g7o 2.70 ... 1 1 0 1 0 2019-01-30 01:33:11 UTC 2019-01-30 01:34:07 UTC 6g7o 350 0.431429
1493 4277 1585.0 Vacuolar iron transporter Eucalyptus grandis Vacuole Iron transporter VIT1, structure 1 NaN Similar to subunit Y ( Ndufa11) of respiratory... 6iu3 2.7 ... 1 1 0 2 0 2019-02-10 17:03:08 UTC 2019-02-11 04:04:11 UTC 6iu3 448 0.491071
1494 4278 1586.0 Vacuolar iron transporter Eucalyptus grandis Vacuole Iron transporter VIT1, structure 2 NaN Similar to subunit Y ( Ndufa11) of respiratory... 6iu4 3.5 ... 1 1 0 2 0 2019-02-10 17:20:20 UTC 2019-02-11 04:04:11 UTC 6iu4 450 0.497778
1497 4282 51.0 G-protein coupled receptors, family A Homo sapiens Eykaryo. plasma 5-hydroxytryptamine receptor 2A, structure 1 NaN NaN 6a93 3.0 ... 1 1 0 1 0 2019-02-23 00:38:47 UTC 2019-02-23 00:38:47 UTC 6a93 370 0.410811
1498 4283 52.0 G-protein coupled receptors, family A Homo sapiens Eykaryo. plasma 5-hydroxytryptamine receptor 2A, structure 2 NaN NaN 6a94 2.90 ... 1 1 0 1 0 2019-02-23 00:39:56 UTC 2019-02-23 00:39:56 UTC 6a94 359 0.420613

52 rows × 34 columns


In [48]:
chosen = info.query("structure_subunits_count == 1 and InMembraneRatio < 0.5 and Length < 500")

In [50]:
chosen.shape


Out[50]:
(102, 34)

In [57]:
picked = chosen.groupby("superfamily_id").apply(pd.DataFrame.sample, 1)

In [62]:
picked = picked.reset_index(drop=True)

In [63]:
picked.to_csv("/Users/weilu/Research/database/hybrid_prediction_database/picked.csv")

In [52]:
chosen["superfamily_id"].unique()


Out[52]:
array([  6, 173,  18,   2, 394,  63, 194, 202, 431, 406, 244, 327, 456,
       466, 470, 476])

In [68]:
chosen = info.query("structure_subunits_count == 1 and InMembraneRatio < 0.6 and InMembraneRatio > 0.4 and Length < 500")

chosen.shape


Out[68]:
(165, 34)

In [74]:
picked2 = chosen.sort_values("Length").groupby("superfamily_id").head(1).reset_index(drop=True)

In [75]:
picked2.to_csv("/Users/weilu/Research/database/hybrid_prediction_database/picked2.csv")

In [69]:
chosen["superfamily_id"].unique()


Out[69]:
array([ 15,   6,  14, 220,  21, 173,  18,   2, 267, 159,  64, 392, 394,
        63, 218, 396,   8, 202, 194, 409, 431, 244, 327, 456, 466, 415,
       493,  92])

In [ ]:


In [66]:
a = pd.read_csv("/Users/weilu/Research/database/hybrid_prediction_database/picked.csv", index_col=0)
pdb_list = a["Protein"].to_list()


Out[66]:
['1xrd',
 '5uiw',
 '3kp9',
 '2mi2',
 '3e9j',
 '6akg',
 '5y83',
 '2n7r',
 '5tcx',
 '4aw6',
 '5d91',
 '4zyo',
 '5ktf',
 '5vrh',
 '5mm0',
 '6bms']

In [67]:
picked


Out[67]:
Protein Length InMembraneRatio id ordering family_name_cache species_name_cache membrane_name_cache name description ... species_id family_id superfamily_id classtype_id type_id secondary_representations_count structure_subunits_count citations_count created_at updated_at
0 1xrd 52 0.442308 1119 259.0 Light-harvesting complexes from bacteria Rhodospirillum rubrum Gram-neg. inner Light-harvesting complex LH1, alpha chain NaN ... 321 1 2 1 1 0 1 0 2018-08-13 03:51:23 UTC 2018-08-13 03:51:23 UTC
1 5uiw 365 0.452055 3468 96.0 G-protein coupled receptors, family A Homo sapiens Eykaryo. plasma C-C chemokine receptor type 5, inactive state,... NaN ... 14 14 6 1 1 0 1 0 2018-08-13 03:55:03 UTC 2018-11-08 23:33:44 UTC
2 3kp9 259 0.424710 4132 1361.0 Vitamin K epoxide reductase Synechococcus sp. Gram-neg. inner Vitamin K epoxide reductase, structure 1 NaN ... 133 307 18 1 1 0 1 0 2018-11-06 17:10:58 UTC 2018-11-06 17:10:58 UTC
3 2mi2 104 0.317308 2443 1470.0 TatB protein Escherichia coli Gram-neg. inner Sec-independent protein translocase protein TatB NaN ... 9 699 63 1 1 0 1 0 2018-08-13 03:53:56 UTC 2018-08-13 03:53:56 UTC
4 3e9j 322 0.270186 2136 1341.0 Disulfide bond oxidoreductase-B (DsbB) Escherichia coli Gram-neg. inner DsbB - DsbA complex, conformation 4 NaN ... 9 247 173 1 1 0 1 0 2018-08-13 03:53:35 UTC 2018-08-13 03:53:35 UTC
5 6akg 301 0.285714 4289 1015.0 Claudins Mus musculus Eykaryo. plasma Claudin-3, structure 1 NaN ... 52 702 194 1 1 0 1 0 2019-02-23 21:34:55 UTC 2019-02-23 21:34:55 UTC
6 5y83 342 0.327485 3927 1226.0 OxaA/YidC Thermotoga maritima Gram-neg. inner Membrane protein insertase YidC NaN ... 61 293 202 1 1 0 1 0 2018-08-13 03:55:19 UTC 2018-08-13 03:55:19 UTC
7 2n7r 46 0.434783 3046 1398.0 Gamma-secretase Homo sapiens Endoplasm. reticulum Nicastrin, TM helix NaN ... 14 767 244 1 1 1 1 0 2018-08-13 03:54:37 UTC 2019-04-19 16:21:00 UTC
8 5tcx 206 0.412621 3253 1541.0 Tetraspanin Homo sapiens Eykaryo. plasma CD81 antigen NaN ... 14 567 327 1 1 0 1 0 2018-08-13 03:54:51 UTC 2018-08-13 03:54:51 UTC
9 4aw6 427 0.402810 1988 1449.0 Peptidase family M48 Homo sapiens Endoplasm. reticulum CAAX prenyl protease 1 homolog, structure 2 NaN ... 14 646 394 1 1 1 1 0 2018-08-13 03:53:22 UTC 2018-08-13 03:53:22 UTC
10 5d91 335 0.364179 2951 1501.0 Choline/ethanolamine phosphotransferase 1 Renibacterium salmoninarum Gram-pos. inner Phosphatidylinositolphosphate synthase, struct... NaN ... 648 701 406 1 1 0 1 0 2018-08-13 03:54:31 UTC 2018-08-13 03:54:31 UTC
11 4zyo 298 0.476510 2848 1517.0 Fatty acid desaturase Homo sapiens Endoplasm. reticulum Acyl-CoA desaturase NaN ... 14 768 431 1 1 0 1 0 2018-08-13 03:54:25 UTC 2018-08-13 03:54:25 UTC
12 5ktf 73 0.410959 3353 1543.0 CD36 glycoprotein Mus musculus Eykaryo. plasma Scavenger receptor B-1 NaN ... 52 840 456 1 1 0 1 0 2018-08-13 03:54:57 UTC 2018-08-13 03:54:57 UTC
13 5vrh 490 0.397959 3489 1550.0 Carbon-nitrogen hydrolase Escherichia coli Gram-neg. inner Apolipoprotein N-acyl transferase, structure 3 NaN ... 9 866 466 1 1 1 1 0 2018-08-13 03:55:04 UTC 2018-11-06 22:23:30 UTC
14 5mm0 355 0.292958 3516 1554.0 Dolichyl-phosphate beta-glucosyltransferase Pyrococcus furiosus Archaebac. Dolichyl phosphate mannose synthase, structure 2 NaN ... 305 870 470 1 1 0 1 0 2018-08-13 03:55:05 UTC 2018-08-13 03:55:05 UTC
15 6bms 282 0.368794 3652 1566.0 DHHC palmitoyltransferase Danio rerio Golgi Palmitoyltransferase NaN ... 129 886 476 1 1 0 1 0 2018-08-13 03:55:09 UTC 2018-08-13 03:55:09 UTC

16 rows × 34 columns


In [76]:
picked2


Out[76]:
Protein Length InMembraneRatio id ordering family_name_cache species_name_cache membrane_name_cache name description ... species_id family_id superfamily_id classtype_id type_id secondary_representations_count structure_subunits_count citations_count created_at updated_at
0 2n7r 46 0.434783 3046 1398.0 Gamma-secretase Homo sapiens Endoplasm. reticulum Nicastrin, TM helix NaN ... 14 767 244 1 1 1 1 0 2018-08-13 03:54:37 UTC 2019-04-19 16:21:00 UTC
1 1jo5 48 0.520833 1320 263.0 Light-harvesting complexes from bacteria Rhodobacter sphaeroides Gram-neg. inner Light-harvesting protein B-875, beta chain NaN ... 31 1 2 1 1 1 1 0 2018-08-13 03:51:42 UTC 2018-08-13 03:51:42 UTC
2 5ktf 73 0.410959 3353 1543.0 CD36 glycoprotein Mus musculus Eykaryo. plasma Scavenger receptor B-1 NaN ... 52 840 456 1 1 0 1 0 2018-08-13 03:54:57 UTC 2018-08-13 03:54:57 UTC
3 2moz 81 0.506173 2498 1347.0 MerF Mercuric ion uptake family Morganella morganii Gram-neg. inner MerF bacterial mercury uptake transporter, str... NaN ... 300 322 218 1 1 0 1 0 2018-08-13 03:53:59 UTC 2018-08-13 03:53:59 UTC
4 3zd0 85 0.517647 2254 1479.0 Nucleocapsid P7 protein Hepatitis C virus Endoplasm. reticulum P7 protein (747-809), structure 1 NaN ... 328 674 396 1 1 0 1 0 2018-08-13 03:53:45 UTC 2018-08-13 03:53:45 UTC
5 2lor 97 0.525773 1962 1444.0 Transmembrane protein 141 Homo sapiens Undefined Transmembrane protein 141 NaN ... 14 641 392 1 1 0 1 0 2018-08-13 03:53:19 UTC 2018-08-13 03:53:19 UTC
6 2ksr 140 0.592857 321 956.0 Ligand-gated ion channel of neurotransmitter r... Homo sapiens Eykaryo. plasma Nicotinic acetylcholine receptor, beta-2, in h... NaN ... 14 22 14 1 1 0 1 0 2018-08-13 03:50:09 UTC 2018-08-13 03:50:09 UTC
7 3wkv 140 0.571429 2381 805.0 Voltage-sensing proton channel Mus musculus Eykaryo. plasma Voltage-gated sensor domain of proton channel Hv1 NaN ... 52 806 8 1 1 0 1 0 2018-08-13 03:53:54 UTC 2018-08-13 03:53:54 UTC
8 4p79 173 0.497110 2447 1013.0 Claudins Mus musculus Eykaryo. plasma Claudin-15 NaN ... 52 702 194 1 1 0 1 0 2018-08-13 03:53:56 UTC 2018-08-13 03:53:56 UTC
9 2k74 183 0.530055 819 1335.0 Disulfide bond oxidoreductase-B (DsbB) Escherichia coli Gram-neg. inner Disulfide bond formation protein B, conformati... NaN ... 9 247 173 1 1 1 1 0 2018-08-13 03:50:49 UTC 2018-08-13 03:50:49 UTC
10 4a2n 192 0.567708 1655 1374.0 Isoprenylcysteine carboxyl methyltransferase (... Methanosarcina acetivorans Archaebac. Integral Membrane Methyltransferase NaN ... 252 471 159 1 1 0 1 0 2018-08-13 03:52:00 UTC 2018-08-13 03:52:00 UTC
11 5tcx 206 0.412621 3253 1541.0 Tetraspanin Homo sapiens Eykaryo. plasma CD81 antigen NaN ... 14 567 327 1 1 0 1 0 2018-08-13 03:54:51 UTC 2018-08-13 03:54:51 UTC
12 4b4a 221 0.588235 2040 1465.0 TatC Aquifex aeolicus Gram-neg. inner Sec-independent protein translocase TatC NaN ... 57 484 63 1 1 2 1 0 2018-08-13 03:53:26 UTC 2018-08-13 03:53:26 UTC
13 3wo7 224 0.575893 2426 1227.0 OxaA/YidC Bacillus halodurans Gram-pos. inner Membrane protein insertase YidC NaN ... 550 293 202 1 1 1 1 0 2018-08-13 03:53:56 UTC 2018-08-13 03:53:56 UTC
14 3tx3 227 0.519824 1355 1351.0 Sulfate transporter (CysZ) Idiomarina loihiensis Gram-neg. inner Putative sulfate permease CysZ NaN ... 356 389 267 1 1 0 1 0 2018-08-13 03:51:45 UTC 2018-08-13 03:51:45 UTC
15 3ddl 252 0.587302 823 46.0 Microbial and algal rhodopsins Salinibacter ruber Gram-neg. inner Xanthorhodopsin NaN ... 256 13 6 1 1 0 1 0 2018-08-13 03:50:49 UTC 2018-08-13 03:50:49 UTC
16 5jwy 254 0.535433 2453 1505.0 Transmembrane lipid phosphatase Escherichia coli Gram-neg. inner Phosphatidylglycerophosphatase NaN ... 9 704 409 1 1 1 1 0 2018-08-13 03:53:57 UTC 2018-10-28 03:10:41 UTC
17 3kp9 259 0.424710 4132 1361.0 Vitamin K epoxide reductase Synechococcus sp. Gram-neg. inner Vitamin K epoxide reductase, structure 1 NaN ... 133 307 18 1 1 0 1 0 2018-11-06 17:10:58 UTC 2018-11-06 17:10:58 UTC
18 5xpd 269 0.539033 3548 1326.0 Eukaryotic SWEET transporters Arabidopsis thaliana Eykaryo. plasma Bidirectional sugar transporter SWEET13, inwar... NaN ... 246 789 415 1 1 0 1 0 2018-08-13 03:55:06 UTC 2018-11-13 03:35:09 UTC
19 4zr1 274 0.489051 2890 1519.0 Fatty acid hydroxylase Saccharomyces cerevisiae Endoplasm. reticulum Ceramide fatty acid hydroxylase SCS7 NaN ... 36 779 431 1 1 1 1 0 2018-08-13 03:54:27 UTC 2018-08-13 03:54:27 UTC
20 6gci 292 0.582192 4231 1248.0 ADP/ATP carrier Myceliophthora thermophila Mitochon. inner Mitochondrial ADP-ATP carrier NaN ... 814 29 21 1 1 0 1 0 2019-01-30 13:47:49 UTC 2019-01-30 13:47:49 UTC
21 6a2j 309 0.576052 4201 338.0 Heme A synthase Bacillus subtilis Gram-pos. inner Heme A synthase NaN ... 89 971 92 1 1 1 1 0 2018-12-25 17:30:58 UTC 2018-12-25 17:30:58 UTC
22 4jr9 409 0.599022 2187 1116.0 Nitrate/nitrite porter Escherichia coli Gram-neg. inner Nitrate/nitrite exchanger NarK, partially occl... NaN ... 9 666 15 1 1 1 1 0 2018-08-13 03:53:39 UTC 2018-08-13 03:53:39 UTC
23 4il3 422 0.424171 2066 1450.0 Peptidase family M48 Saccharomyces mikatae Endoplasm. reticulum CaaX Protease Ste24p NaN ... 517 646 394 1 1 0 1 0 2018-08-13 03:53:29 UTC 2018-08-13 03:53:29 UTC
24 6ids 438 0.586758 4254 1292.0 Multi antimicrobial extrusion (MATE) family Vibrio cholerae Gram-neg. inner MATE transporter VcmN, structure 3 NaN ... 45 327 220 1 1 0 1 0 2019-01-30 18:13:02 UTC 2019-01-30 18:13:02 UTC
25 5n6m 491 0.419552 3488 1549.0 Carbon-nitrogen hydrolase Escherichia coli Gram-neg. inner Apolipoprotein N-acyl transferase, structure 2 NaN ... 9 866 466 1 1 0 1 0 2018-08-13 03:55:04 UTC 2018-08-13 03:55:04 UTC
26 6bug 492 0.585366 4049 1579.0 Membrane-bound O-acyltransferase Streptococcus thermophilus Gram-neg. outer D-alanyl transfer protein DltB, with D-alanyl ... NaN ... 565 924 493 1 1 2 1 0 2018-10-12 16:46:37 UTC 2018-10-18 16:48:56 UTC
27 4dji 493 0.598377 1938 1153.0 Amino acid-Polyamine-Organocation (APC) family Escherichia coli Gram-neg. inner Glutamate/gamma-aminobutyrate antiporter NaN ... 9 281 64 1 1 1 1 0 2018-08-13 03:53:18 UTC 2018-08-13 03:53:18 UTC

28 rows × 34 columns


In [ ]: