In [1]:
%autosave 0
from __future__ import print_function
Autosave disabled
We here show how to extract fragments from PDB files with a given sequence. Given a PDB file (trajectories are NOT supported), it is possible to extract fragments with a given sequence using the function
bb.snippet(fname,sequence,outdir=outdir)
PDB fragments are written in the directory outdir. See below an example where we extract all fragments with sequence GNRA from the structure of the large ribosomal subunit
In [2]:
import barnaba as bb
import os
fname = "../test/data/1S72.pdb"
sequence = "GNRA"
os.system("mkdir -p snippet")
bb.snippet(fname,sequence,outdir="snippet")
# Initializing file ../test/data/1S72.pdb
# Writing PDB 1S72_64_G_0_0_00000.pdb
# Writing PDB 1S72_77_G_0_0_00001.pdb
# Writing PDB 1S72_92_G_0_0_00002.pdb
# Writing PDB 1S72_116_G_0_0_00003.pdb
# Writing PDB 1S72_142_G_0_0_00004.pdb
# Writing PDB 1S72_149_G_0_0_00005.pdb
# Writing PDB 1S72_157_G_0_0_00006.pdb
# Writing PDB 1S72_164_G_0_0_00007.pdb
# Writing PDB 1S72_180_G_0_0_00008.pdb
# Writing PDB 1S72_184_G_0_0_00009.pdb
# Writing PDB 1S72_190_G_0_0_00010.pdb
# Writing PDB 1S72_196_G_0_0_00011.pdb
# Writing PDB 1S72_201_G_0_0_00012.pdb
# Writing PDB 1S72_213_G_0_0_00013.pdb
# Writing PDB 1S72_219_G_0_0_00014.pdb
# Writing PDB 1S72_223_G_0_0_00015.pdb
# Writing PDB 1S72_229_G_0_0_00016.pdb
# Writing PDB 1S72_259_G_0_0_00017.pdb
# Writing PDB 1S72_314_G_0_0_00018.pdb
# Writing PDB 1S72_324_G_0_0_00019.pdb
# Writing PDB 1S72_334_G_0_0_00020.pdb
# Writing PDB 1S72_351_G_0_0_00021.pdb
# Writing PDB 1S72_404_G_0_0_00022.pdb
# Writing PDB 1S72_426_G_0_0_00023.pdb
# Writing PDB 1S72_446_G_0_0_00024.pdb
# Writing PDB 1S72_456_G_0_0_00025.pdb
# Writing PDB 1S72_469_G_0_0_00026.pdb
# Writing PDB 1S72_482_G_0_0_00027.pdb
# Writing PDB 1S72_499_G_0_0_00028.pdb
# Writing PDB 1S72_504_G_0_0_00029.pdb
# Writing PDB 1S72_506_G_0_0_00030.pdb
# Writing PDB 1S72_518_G_0_0_00031.pdb
# Writing PDB 1S72_529_G_0_0_00032.pdb
# Writing PDB 1S72_537_G_0_0_00033.pdb
# Writing PDB 1S72_577_G_0_0_00034.pdb
# Writing PDB 1S72_588_G_0_0_00035.pdb
# Writing PDB 1S72_599_G_0_0_00036.pdb
# Writing PDB 1S72_600_G_0_0_00037.pdb
# Writing PDB 1S72_627_G_0_0_00038.pdb
# Writing PDB 1S72_640_G_0_0_00039.pdb
# Writing PDB 1S72_657_G_0_0_00040.pdb
# Writing PDB 1S72_679_G_0_0_00041.pdb
# Writing PDB 1S72_689_G_0_0_00042.pdb
# Writing PDB 1S72_690_G_0_0_00043.pdb
# Writing PDB 1S72_691_G_0_0_00044.pdb
# Writing PDB 1S72_743_G_0_0_00045.pdb
# Writing PDB 1S72_805_G_0_0_00046.pdb
# Writing PDB 1S72_816_G_0_0_00047.pdb
# Writing PDB 1S72_854_G_0_0_00048.pdb
# Writing PDB 1S72_871_G_0_0_00049.pdb
# Writing PDB 1S72_873_G_0_0_00050.pdb
# Writing PDB 1S72_892_G_0_0_00051.pdb
# Writing PDB 1S72_911_G_0_0_00052.pdb
# Writing PDB 1S72_948_G_0_0_00053.pdb
# Writing PDB 1S72_958_G_0_0_00054.pdb
# Writing PDB 1S72_1037_G_0_0_00055.pdb
# Writing PDB 1S72_1055_G_0_0_00056.pdb
# Writing PDB 1S72_1075_G_0_0_00057.pdb
# Writing PDB 1S72_1076_G_0_0_00058.pdb
# Writing PDB 1S72_1087_G_0_0_00059.pdb
# Writing PDB 1S72_1121_G_0_0_00060.pdb
# Writing PDB 1S72_1158_G_0_0_00061.pdb
# Writing PDB 1S72_1163_G_0_0_00062.pdb
# Writing PDB 1S72_1190_G_0_0_00063.pdb
# Writing PDB 1S72_1197_G_0_0_00064.pdb
# Writing PDB 1S72_1239_G_0_0_00065.pdb
# Writing PDB 1S72_1258_G_0_0_00066.pdb
# Writing PDB 1S72_1284_G_0_0_00067.pdb
# Writing PDB 1S72_1315_G_0_0_00068.pdb
# Writing PDB 1S72_1325_G_0_0_00069.pdb
# Writing PDB 1S72_1327_G_0_0_00070.pdb
# Writing PDB 1S72_1349_G_0_0_00071.pdb
# Writing PDB 1S72_1354_G_0_0_00072.pdb
# Writing PDB 1S72_1376_G_0_0_00073.pdb
# Writing PDB 1S72_1387_G_0_0_00074.pdb
# Writing PDB 1S72_1389_G_0_0_00075.pdb
# Writing PDB 1S72_1468_G_0_0_00076.pdb
# Writing PDB 1S72_1484_G_0_0_00077.pdb
# Writing PDB 1S72_1489_G_0_0_00078.pdb
# Writing PDB 1S72_1490_G_0_0_00079.pdb
# Writing PDB 1S72_1491_G_0_0_00080.pdb
# Writing PDB 1S72_1523_G_0_0_00081.pdb
# Writing PDB 1S72_1525_G_0_0_00082.pdb
# Writing PDB 1S72_1588_G_0_0_00083.pdb
# Writing PDB 1S72_1595_G_0_0_00084.pdb
# Writing PDB 1S72_1604_G_0_0_00085.pdb
# Writing PDB 1S72_1627_G_0_0_00086.pdb
# Writing PDB 1S72_1628_G_0_0_00087.pdb
# Writing PDB 1S72_1629_G_0_0_00088.pdb
# Writing PDB 1S72_1634_G_0_0_00089.pdb
# Writing PDB 1S72_1655_G_0_0_00090.pdb
# Writing PDB 1S72_1681_G_0_0_00091.pdb
# Writing PDB 1S72_1707_G_0_0_00092.pdb
# Writing PDB 1S72_1709_G_0_0_00093.pdb
# Writing PDB 1S72_1726_G_0_0_00094.pdb
# Writing PDB 1S72_1730_G_0_0_00095.pdb
# Writing PDB 1S72_1743_G_0_0_00096.pdb
# Writing PDB 1S72_1744_G_0_0_00097.pdb
# Writing PDB 1S72_1752_G_0_0_00098.pdb
# Writing PDB 1S72_1773_G_0_0_00099.pdb
# Writing PDB 1S72_1780_G_0_0_00100.pdb
# Writing PDB 1S72_1794_G_0_0_00101.pdb
# Writing PDB 1S72_1812_G_0_0_00102.pdb
# Writing PDB 1S72_1819_G_0_0_00103.pdb
# Writing PDB 1S72_1837_G_0_0_00104.pdb
# Writing PDB 1S72_1849_G_0_0_00105.pdb
# Writing PDB 1S72_1855_G_0_0_00106.pdb
# Writing PDB 1S72_1863_G_0_0_00107.pdb
# Writing PDB 1S72_2051_G_0_0_00108.pdb
# Writing PDB 1S72_2080_G_0_0_00109.pdb
# Writing PDB 1S72_2092_G_0_0_00110.pdb
# Writing PDB 1S72_2093_G_0_0_00111.pdb
# Writing PDB 1S72_2097_G_0_0_00112.pdb
# Writing PDB 1S72_2249_G_0_0_00113.pdb
# Writing PDB 1S72_2299_G_0_0_00114.pdb
# Warning: no P atom in residue 2344_G_0_0
# Writing PDB 1S72_2337_G_0_0_00115.pdb
# Writing PDB 1S72_2350_G_0_0_00116.pdb
# Writing PDB 1S72_2359_G_0_0_00117.pdb
# Writing PDB 1S72_2365_G_0_0_00118.pdb
# Writing PDB 1S72_2399_G_0_0_00119.pdb
# Writing PDB 1S72_2410_G_0_0_00120.pdb
# Writing PDB 1S72_2412_G_0_0_00121.pdb
# Writing PDB 1S72_2426_G_0_0_00122.pdb
# Writing PDB 1S72_2453_G_0_0_00123.pdb
# Writing PDB 1S72_2466_G_0_0_00124.pdb
# Writing PDB 1S72_2480_G_0_0_00125.pdb
# Writing PDB 1S72_2501_G_0_0_00126.pdb
# Writing PDB 1S72_2574_G_0_0_00127.pdb
# Writing PDB 1S72_2580_G_0_0_00128.pdb
# Writing PDB 1S72_2609_G_0_0_00129.pdb
# Writing PDB 1S72_2630_G_0_0_00130.pdb
# Writing PDB 1S72_2632_G_0_0_00131.pdb
# Writing PDB 1S72_2696_G_0_0_00132.pdb
# Writing PDB 1S72_2700_G_0_0_00133.pdb
# Writing PDB 1S72_2738_G_0_0_00134.pdb
# Writing PDB 1S72_2740_G_0_0_00135.pdb
# Writing PDB 1S72_2773_G_0_0_00136.pdb
# Writing PDB 1S72_2798_G_0_0_00137.pdb
# Writing PDB 1S72_2809_G_0_0_00138.pdb
# Writing PDB 1S72_2810_G_0_0_00139.pdb
# Writing PDB 1S72_2815_G_0_0_00140.pdb
# Writing PDB 1S72_2877_G_0_0_00141.pdb
# Writing PDB 1S72_2882_G_0_0_00142.pdb
# Writing PDB 1S72_49_G_9_0_00143.pdb
# Writing PDB 1S72_74_G_9_0_00144.pdb
# Writing PDB 1S72_90_G_9_0_00145.pdb
# Writing PDB 1S72_100_G_9_0_00146.pdb
# Writing PDB 1S72_101_G_9_0_00147.pdb
# Writing PDB 1S72_102_G_9_0_00148.pdb
We now list the files and print the sequence:
In [3]:
import glob
flist = glob.glob("snippet/*.pdb")
for f in flist:
seq = [line[17:20].strip() for line in open(f) if line[12:16].strip()=="C2"]
print("%-40s %4s" % (f,"".join(seq)))
print("Total number of fragments with sequence GNRA:", len(flist))
snippet/1S72_100_G_9_0_00146.pdb GGGA
snippet/1S72_101_G_9_0_00147.pdb GGAA
snippet/1S72_102_G_9_0_00148.pdb GAAA
snippet/1S72_1037_G_0_0_00055.pdb GGGA
snippet/1S72_1055_G_0_0_00056.pdb GUAA
snippet/1S72_1075_G_0_0_00057.pdb GGGA
snippet/1S72_1076_G_0_0_00058.pdb GGAA
snippet/1S72_1087_G_0_0_00059.pdb GAGA
snippet/1S72_1121_G_0_0_00060.pdb GUAA
snippet/1S72_1158_G_0_0_00061.pdb GGGA
snippet/1S72_1163_G_0_0_00062.pdb GUGA
snippet/1S72_116_G_0_0_00003.pdb GAGA
snippet/1S72_1190_G_0_0_00063.pdb GAAA
snippet/1S72_1197_G_0_0_00064.pdb GUAA
snippet/1S72_1239_G_0_0_00065.pdb GGGA
snippet/1S72_1258_G_0_0_00066.pdb GAGA
snippet/1S72_1284_G_0_0_00067.pdb GUAA
snippet/1S72_1315_G_0_0_00068.pdb GGAA
snippet/1S72_1325_G_0_0_00069.pdb GUGA
snippet/1S72_1327_G_0_0_00070.pdb GAAA
snippet/1S72_1349_G_0_0_00071.pdb GUGA
snippet/1S72_1354_G_0_0_00072.pdb GAAA
snippet/1S72_1376_G_0_0_00073.pdb GCGA
snippet/1S72_1387_G_0_0_00074.pdb GUGA
snippet/1S72_1389_G_0_0_00075.pdb GAGA
snippet/1S72_142_G_0_0_00004.pdb GCAA
snippet/1S72_1468_G_0_0_00076.pdb GCAA
snippet/1S72_1484_G_0_0_00077.pdb GAAA
snippet/1S72_1489_G_0_0_00078.pdb GGGA
snippet/1S72_1490_G_0_0_00079.pdb GGAA
snippet/1S72_1491_G_0_0_00080.pdb GAAA
snippet/1S72_149_G_0_0_00005.pdb GGAA
snippet/1S72_1523_G_0_0_00081.pdb GUGA
snippet/1S72_1525_G_0_0_00082.pdb GAAA
snippet/1S72_157_G_0_0_00006.pdb GAGA
snippet/1S72_1588_G_0_0_00083.pdb GGAA
snippet/1S72_1595_G_0_0_00084.pdb GUAA
snippet/1S72_1604_G_0_0_00085.pdb GGAA
snippet/1S72_1627_G_0_0_00086.pdb GGGA
snippet/1S72_1628_G_0_0_00087.pdb GGAA
snippet/1S72_1629_G_0_0_00088.pdb GAAA
snippet/1S72_1634_G_0_0_00089.pdb GUGA
snippet/1S72_164_G_0_0_00007.pdb GAAA
snippet/1S72_1655_G_0_0_00090.pdb GAAA
snippet/1S72_1681_G_0_0_00091.pdb GAGA
snippet/1S72_1707_G_0_0_00092.pdb GCGA
snippet/1S72_1709_G_0_0_00093.pdb GAAA
snippet/1S72_1726_G_0_0_00094.pdb GGGA
snippet/1S72_1730_G_0_0_00095.pdb GCAA
snippet/1S72_1743_G_0_0_00096.pdb GGGA
snippet/1S72_1744_G_0_0_00097.pdb GGAA
snippet/1S72_1752_G_0_0_00098.pdb GCAA
snippet/1S72_1773_G_0_0_00099.pdb GGAA
snippet/1S72_1780_G_0_0_00100.pdb GGGA
snippet/1S72_1794_G_0_0_00101.pdb GGAA
snippet/1S72_180_G_0_0_00008.pdb GGGA
snippet/1S72_1812_G_0_0_00102.pdb GUGA
snippet/1S72_1819_G_0_0_00103.pdb GGAA
snippet/1S72_1837_G_0_0_00104.pdb GUAA
snippet/1S72_1849_G_0_0_00105.pdb GUGA
snippet/1S72_184_G_0_0_00009.pdb GGAA
snippet/1S72_1855_G_0_0_00106.pdb GCAA
snippet/1S72_1863_G_0_0_00107.pdb GCAA
snippet/1S72_190_G_0_0_00010.pdb GAAA
snippet/1S72_196_G_0_0_00011.pdb GCAA
snippet/1S72_201_G_0_0_00012.pdb GUGA
snippet/1S72_2051_G_0_0_00108.pdb GUGA
snippet/1S72_2080_G_0_0_00109.pdb GAGA
snippet/1S72_2092_G_0_0_00110.pdb GGGA
snippet/1S72_2093_G_0_0_00111.pdb GGAA
snippet/1S72_2097_G_0_0_00112.pdb GCGA
snippet/1S72_213_G_0_0_00013.pdb GUAA
snippet/1S72_219_G_0_0_00014.pdb GCGA
snippet/1S72_223_G_0_0_00015.pdb GUGA
snippet/1S72_2249_G_0_0_00113.pdb GGGA
snippet/1S72_2299_G_0_0_00114.pdb GAAA
snippet/1S72_229_G_0_0_00016.pdb GCGA
snippet/1S72_2337_G_0_0_00115.pdb GGGA
snippet/1S72_2350_G_0_0_00116.pdb GCGA
snippet/1S72_2359_G_0_0_00117.pdb GCAA
snippet/1S72_2365_G_0_0_00118.pdb GCAA
snippet/1S72_2399_G_0_0_00119.pdb GGAA
snippet/1S72_2410_G_0_0_00120.pdb GCGA
snippet/1S72_2412_G_0_0_00121.pdb GAAA
snippet/1S72_2426_G_0_0_00122.pdb GCGA
snippet/1S72_2453_G_0_0_00123.pdb GCAA
snippet/1S72_2466_G_0_0_00124.pdb GAAA
snippet/1S72_2480_G_0_0_00125.pdb GGGA
snippet/1S72_2501_G_0_0_00126.pdb GCAA
snippet/1S72_2574_G_0_0_00127.pdb GCAA
snippet/1S72_2580_G_0_0_00128.pdb GUGA
snippet/1S72_259_G_0_0_00017.pdb GCAA
snippet/1S72_2609_G_0_0_00129.pdb GUGA
snippet/1S72_2630_G_0_0_00130.pdb GUGA
snippet/1S72_2632_G_0_0_00131.pdb GAGA
snippet/1S72_2696_G_0_0_00132.pdb GAGA
snippet/1S72_2700_G_0_0_00133.pdb GGAA
snippet/1S72_2738_G_0_0_00134.pdb GAGA
snippet/1S72_2740_G_0_0_00135.pdb GAGA
snippet/1S72_2773_G_0_0_00136.pdb GUAA
snippet/1S72_2798_G_0_0_00137.pdb GAAA
snippet/1S72_2809_G_0_0_00138.pdb GGAA
snippet/1S72_2810_G_0_0_00139.pdb GAAA
snippet/1S72_2815_G_0_0_00140.pdb GAGA
snippet/1S72_2877_G_0_0_00141.pdb GUAA
snippet/1S72_2882_G_0_0_00142.pdb GAGA
snippet/1S72_314_G_0_0_00018.pdb GGAA
snippet/1S72_324_G_0_0_00019.pdb GUGA
snippet/1S72_334_G_0_0_00020.pdb GUGA
snippet/1S72_351_G_0_0_00021.pdb GAGA
snippet/1S72_404_G_0_0_00022.pdb GCGA
snippet/1S72_426_G_0_0_00023.pdb GCGA
snippet/1S72_446_G_0_0_00024.pdb GAGA
snippet/1S72_456_G_0_0_00025.pdb GUGA
snippet/1S72_469_G_0_0_00026.pdb GUGA
snippet/1S72_482_G_0_0_00027.pdb GCAA
snippet/1S72_499_G_0_0_00028.pdb GGGA
snippet/1S72_49_G_9_0_00143.pdb GGAA
snippet/1S72_504_G_0_0_00029.pdb GCGA
snippet/1S72_506_G_0_0_00030.pdb GAAA
snippet/1S72_518_G_0_0_00031.pdb GAAA
snippet/1S72_529_G_0_0_00032.pdb GCGA
snippet/1S72_537_G_0_0_00033.pdb GCGA
snippet/1S72_577_G_0_0_00034.pdb GCGA
snippet/1S72_588_G_0_0_00035.pdb GUAA
snippet/1S72_599_G_0_0_00036.pdb GGGA
snippet/1S72_600_G_0_0_00037.pdb GGAA
snippet/1S72_627_G_0_0_00038.pdb GAAA
snippet/1S72_640_G_0_0_00039.pdb GGGA
snippet/1S72_64_G_0_0_00000.pdb GCGA
snippet/1S72_657_G_0_0_00040.pdb GCAA
snippet/1S72_679_G_0_0_00041.pdb GGGA
snippet/1S72_689_G_0_0_00042.pdb GGGA
snippet/1S72_690_G_0_0_00043.pdb GGAA
snippet/1S72_691_G_0_0_00044.pdb GAAA
snippet/1S72_743_G_0_0_00045.pdb GGGA
snippet/1S72_74_G_9_0_00144.pdb GGGA
snippet/1S72_77_G_0_0_00001.pdb GGGA
snippet/1S72_805_G_0_0_00046.pdb GAAA
snippet/1S72_816_G_0_0_00047.pdb GGAA
snippet/1S72_854_G_0_0_00048.pdb GUGA
snippet/1S72_871_G_0_0_00049.pdb GUGA
snippet/1S72_873_G_0_0_00050.pdb GAAA
snippet/1S72_892_G_0_0_00051.pdb GCAA
snippet/1S72_90_G_9_0_00145.pdb GCGA
snippet/1S72_911_G_0_0_00052.pdb GAAA
snippet/1S72_92_G_0_0_00002.pdb GCGA
snippet/1S72_948_G_0_0_00053.pdb GUGA
snippet/1S72_958_G_0_0_00054.pdb GCGA
Total number of fragments with sequence GNRA: 149
We show in the example_cluster how to visualise and analyse these data
Content source: srnas/barnaba
Similar notebooks: