In [1]:
%autosave 0
from __future__ import print_function


Autosave disabled

Snippet utility: extract fragments from structures with a given sequence

We here show how to extract fragments from PDB files with a given sequence. Given a PDB file (trajectories are NOT supported), it is possible to extract fragments with a given sequence using the function

bb.snippet(fname,sequence,outdir=outdir)

PDB fragments are written in the directory outdir. See below an example where we extract all fragments with sequence GNRA from the structure of the large ribosomal subunit


In [2]:
import barnaba as bb
import os

fname = "../test/data/1S72.pdb"
sequence = "GNRA"
os.system("mkdir -p snippet")
bb.snippet(fname,sequence,outdir="snippet")


# Initializing file ../test/data/1S72.pdb
# Writing PDB 1S72_64_G_0_0_00000.pdb
# Writing PDB 1S72_77_G_0_0_00001.pdb
# Writing PDB 1S72_92_G_0_0_00002.pdb
# Writing PDB 1S72_116_G_0_0_00003.pdb
# Writing PDB 1S72_142_G_0_0_00004.pdb
# Writing PDB 1S72_149_G_0_0_00005.pdb
# Writing PDB 1S72_157_G_0_0_00006.pdb
# Writing PDB 1S72_164_G_0_0_00007.pdb
# Writing PDB 1S72_180_G_0_0_00008.pdb
# Writing PDB 1S72_184_G_0_0_00009.pdb
# Writing PDB 1S72_190_G_0_0_00010.pdb
# Writing PDB 1S72_196_G_0_0_00011.pdb
# Writing PDB 1S72_201_G_0_0_00012.pdb
# Writing PDB 1S72_213_G_0_0_00013.pdb
# Writing PDB 1S72_219_G_0_0_00014.pdb
# Writing PDB 1S72_223_G_0_0_00015.pdb
# Writing PDB 1S72_229_G_0_0_00016.pdb
# Writing PDB 1S72_259_G_0_0_00017.pdb
# Writing PDB 1S72_314_G_0_0_00018.pdb
# Writing PDB 1S72_324_G_0_0_00019.pdb
# Writing PDB 1S72_334_G_0_0_00020.pdb
# Writing PDB 1S72_351_G_0_0_00021.pdb
# Writing PDB 1S72_404_G_0_0_00022.pdb
# Writing PDB 1S72_426_G_0_0_00023.pdb
# Writing PDB 1S72_446_G_0_0_00024.pdb
# Writing PDB 1S72_456_G_0_0_00025.pdb
# Writing PDB 1S72_469_G_0_0_00026.pdb
# Writing PDB 1S72_482_G_0_0_00027.pdb
# Writing PDB 1S72_499_G_0_0_00028.pdb
# Writing PDB 1S72_504_G_0_0_00029.pdb
# Writing PDB 1S72_506_G_0_0_00030.pdb
# Writing PDB 1S72_518_G_0_0_00031.pdb
# Writing PDB 1S72_529_G_0_0_00032.pdb
# Writing PDB 1S72_537_G_0_0_00033.pdb
# Writing PDB 1S72_577_G_0_0_00034.pdb
# Writing PDB 1S72_588_G_0_0_00035.pdb
# Writing PDB 1S72_599_G_0_0_00036.pdb
# Writing PDB 1S72_600_G_0_0_00037.pdb
# Writing PDB 1S72_627_G_0_0_00038.pdb
# Writing PDB 1S72_640_G_0_0_00039.pdb
# Writing PDB 1S72_657_G_0_0_00040.pdb
# Writing PDB 1S72_679_G_0_0_00041.pdb
# Writing PDB 1S72_689_G_0_0_00042.pdb
# Writing PDB 1S72_690_G_0_0_00043.pdb
# Writing PDB 1S72_691_G_0_0_00044.pdb
# Writing PDB 1S72_743_G_0_0_00045.pdb
# Writing PDB 1S72_805_G_0_0_00046.pdb
# Writing PDB 1S72_816_G_0_0_00047.pdb
# Writing PDB 1S72_854_G_0_0_00048.pdb
# Writing PDB 1S72_871_G_0_0_00049.pdb
# Writing PDB 1S72_873_G_0_0_00050.pdb
# Writing PDB 1S72_892_G_0_0_00051.pdb
# Writing PDB 1S72_911_G_0_0_00052.pdb
# Writing PDB 1S72_948_G_0_0_00053.pdb
# Writing PDB 1S72_958_G_0_0_00054.pdb
# Writing PDB 1S72_1037_G_0_0_00055.pdb
# Writing PDB 1S72_1055_G_0_0_00056.pdb
# Writing PDB 1S72_1075_G_0_0_00057.pdb
# Writing PDB 1S72_1076_G_0_0_00058.pdb
# Writing PDB 1S72_1087_G_0_0_00059.pdb
# Writing PDB 1S72_1121_G_0_0_00060.pdb
# Writing PDB 1S72_1158_G_0_0_00061.pdb
# Writing PDB 1S72_1163_G_0_0_00062.pdb
# Writing PDB 1S72_1190_G_0_0_00063.pdb
# Writing PDB 1S72_1197_G_0_0_00064.pdb
# Writing PDB 1S72_1239_G_0_0_00065.pdb
# Writing PDB 1S72_1258_G_0_0_00066.pdb
# Writing PDB 1S72_1284_G_0_0_00067.pdb
# Writing PDB 1S72_1315_G_0_0_00068.pdb
# Writing PDB 1S72_1325_G_0_0_00069.pdb
# Writing PDB 1S72_1327_G_0_0_00070.pdb
# Writing PDB 1S72_1349_G_0_0_00071.pdb
# Writing PDB 1S72_1354_G_0_0_00072.pdb
# Writing PDB 1S72_1376_G_0_0_00073.pdb
# Writing PDB 1S72_1387_G_0_0_00074.pdb
# Writing PDB 1S72_1389_G_0_0_00075.pdb
# Writing PDB 1S72_1468_G_0_0_00076.pdb
# Writing PDB 1S72_1484_G_0_0_00077.pdb
# Writing PDB 1S72_1489_G_0_0_00078.pdb
# Writing PDB 1S72_1490_G_0_0_00079.pdb
# Writing PDB 1S72_1491_G_0_0_00080.pdb
# Writing PDB 1S72_1523_G_0_0_00081.pdb
# Writing PDB 1S72_1525_G_0_0_00082.pdb
# Writing PDB 1S72_1588_G_0_0_00083.pdb
# Writing PDB 1S72_1595_G_0_0_00084.pdb
# Writing PDB 1S72_1604_G_0_0_00085.pdb
# Writing PDB 1S72_1627_G_0_0_00086.pdb
# Writing PDB 1S72_1628_G_0_0_00087.pdb
# Writing PDB 1S72_1629_G_0_0_00088.pdb
# Writing PDB 1S72_1634_G_0_0_00089.pdb
# Writing PDB 1S72_1655_G_0_0_00090.pdb
# Writing PDB 1S72_1681_G_0_0_00091.pdb
# Writing PDB 1S72_1707_G_0_0_00092.pdb
# Writing PDB 1S72_1709_G_0_0_00093.pdb
# Writing PDB 1S72_1726_G_0_0_00094.pdb
# Writing PDB 1S72_1730_G_0_0_00095.pdb
# Writing PDB 1S72_1743_G_0_0_00096.pdb
# Writing PDB 1S72_1744_G_0_0_00097.pdb
# Writing PDB 1S72_1752_G_0_0_00098.pdb
# Writing PDB 1S72_1773_G_0_0_00099.pdb
# Writing PDB 1S72_1780_G_0_0_00100.pdb
# Writing PDB 1S72_1794_G_0_0_00101.pdb
# Writing PDB 1S72_1812_G_0_0_00102.pdb
# Writing PDB 1S72_1819_G_0_0_00103.pdb
# Writing PDB 1S72_1837_G_0_0_00104.pdb
# Writing PDB 1S72_1849_G_0_0_00105.pdb
# Writing PDB 1S72_1855_G_0_0_00106.pdb
# Writing PDB 1S72_1863_G_0_0_00107.pdb
# Writing PDB 1S72_2051_G_0_0_00108.pdb
# Writing PDB 1S72_2080_G_0_0_00109.pdb
# Writing PDB 1S72_2092_G_0_0_00110.pdb
# Writing PDB 1S72_2093_G_0_0_00111.pdb
# Writing PDB 1S72_2097_G_0_0_00112.pdb
# Writing PDB 1S72_2249_G_0_0_00113.pdb
# Writing PDB 1S72_2299_G_0_0_00114.pdb
# Warning: no P atom in residue 2344_G_0_0 
# Writing PDB 1S72_2337_G_0_0_00115.pdb
# Writing PDB 1S72_2350_G_0_0_00116.pdb
# Writing PDB 1S72_2359_G_0_0_00117.pdb
# Writing PDB 1S72_2365_G_0_0_00118.pdb
# Writing PDB 1S72_2399_G_0_0_00119.pdb
# Writing PDB 1S72_2410_G_0_0_00120.pdb
# Writing PDB 1S72_2412_G_0_0_00121.pdb
# Writing PDB 1S72_2426_G_0_0_00122.pdb
# Writing PDB 1S72_2453_G_0_0_00123.pdb
# Writing PDB 1S72_2466_G_0_0_00124.pdb
# Writing PDB 1S72_2480_G_0_0_00125.pdb
# Writing PDB 1S72_2501_G_0_0_00126.pdb
# Writing PDB 1S72_2574_G_0_0_00127.pdb
# Writing PDB 1S72_2580_G_0_0_00128.pdb
# Writing PDB 1S72_2609_G_0_0_00129.pdb
# Writing PDB 1S72_2630_G_0_0_00130.pdb
# Writing PDB 1S72_2632_G_0_0_00131.pdb
# Writing PDB 1S72_2696_G_0_0_00132.pdb
# Writing PDB 1S72_2700_G_0_0_00133.pdb
# Writing PDB 1S72_2738_G_0_0_00134.pdb
# Writing PDB 1S72_2740_G_0_0_00135.pdb
# Writing PDB 1S72_2773_G_0_0_00136.pdb
# Writing PDB 1S72_2798_G_0_0_00137.pdb
# Writing PDB 1S72_2809_G_0_0_00138.pdb
# Writing PDB 1S72_2810_G_0_0_00139.pdb
# Writing PDB 1S72_2815_G_0_0_00140.pdb
# Writing PDB 1S72_2877_G_0_0_00141.pdb
# Writing PDB 1S72_2882_G_0_0_00142.pdb
# Writing PDB 1S72_49_G_9_0_00143.pdb
# Writing PDB 1S72_74_G_9_0_00144.pdb
# Writing PDB 1S72_90_G_9_0_00145.pdb
# Writing PDB 1S72_100_G_9_0_00146.pdb
# Writing PDB 1S72_101_G_9_0_00147.pdb
# Writing PDB 1S72_102_G_9_0_00148.pdb

We now list the files and print the sequence:


In [3]:
import glob

flist = glob.glob("snippet/*.pdb")
for f in flist:
    seq = [line[17:20].strip() for line in open(f) if line[12:16].strip()=="C2"]
    print("%-40s %4s" % (f,"".join(seq)))
            
print("Total number of fragments with sequence GNRA:", len(flist))


snippet/1S72_100_G_9_0_00146.pdb         GGGA
snippet/1S72_101_G_9_0_00147.pdb         GGAA
snippet/1S72_102_G_9_0_00148.pdb         GAAA
snippet/1S72_1037_G_0_0_00055.pdb        GGGA
snippet/1S72_1055_G_0_0_00056.pdb        GUAA
snippet/1S72_1075_G_0_0_00057.pdb        GGGA
snippet/1S72_1076_G_0_0_00058.pdb        GGAA
snippet/1S72_1087_G_0_0_00059.pdb        GAGA
snippet/1S72_1121_G_0_0_00060.pdb        GUAA
snippet/1S72_1158_G_0_0_00061.pdb        GGGA
snippet/1S72_1163_G_0_0_00062.pdb        GUGA
snippet/1S72_116_G_0_0_00003.pdb         GAGA
snippet/1S72_1190_G_0_0_00063.pdb        GAAA
snippet/1S72_1197_G_0_0_00064.pdb        GUAA
snippet/1S72_1239_G_0_0_00065.pdb        GGGA
snippet/1S72_1258_G_0_0_00066.pdb        GAGA
snippet/1S72_1284_G_0_0_00067.pdb        GUAA
snippet/1S72_1315_G_0_0_00068.pdb        GGAA
snippet/1S72_1325_G_0_0_00069.pdb        GUGA
snippet/1S72_1327_G_0_0_00070.pdb        GAAA
snippet/1S72_1349_G_0_0_00071.pdb        GUGA
snippet/1S72_1354_G_0_0_00072.pdb        GAAA
snippet/1S72_1376_G_0_0_00073.pdb        GCGA
snippet/1S72_1387_G_0_0_00074.pdb        GUGA
snippet/1S72_1389_G_0_0_00075.pdb        GAGA
snippet/1S72_142_G_0_0_00004.pdb         GCAA
snippet/1S72_1468_G_0_0_00076.pdb        GCAA
snippet/1S72_1484_G_0_0_00077.pdb        GAAA
snippet/1S72_1489_G_0_0_00078.pdb        GGGA
snippet/1S72_1490_G_0_0_00079.pdb        GGAA
snippet/1S72_1491_G_0_0_00080.pdb        GAAA
snippet/1S72_149_G_0_0_00005.pdb         GGAA
snippet/1S72_1523_G_0_0_00081.pdb        GUGA
snippet/1S72_1525_G_0_0_00082.pdb        GAAA
snippet/1S72_157_G_0_0_00006.pdb         GAGA
snippet/1S72_1588_G_0_0_00083.pdb        GGAA
snippet/1S72_1595_G_0_0_00084.pdb        GUAA
snippet/1S72_1604_G_0_0_00085.pdb        GGAA
snippet/1S72_1627_G_0_0_00086.pdb        GGGA
snippet/1S72_1628_G_0_0_00087.pdb        GGAA
snippet/1S72_1629_G_0_0_00088.pdb        GAAA
snippet/1S72_1634_G_0_0_00089.pdb        GUGA
snippet/1S72_164_G_0_0_00007.pdb         GAAA
snippet/1S72_1655_G_0_0_00090.pdb        GAAA
snippet/1S72_1681_G_0_0_00091.pdb        GAGA
snippet/1S72_1707_G_0_0_00092.pdb        GCGA
snippet/1S72_1709_G_0_0_00093.pdb        GAAA
snippet/1S72_1726_G_0_0_00094.pdb        GGGA
snippet/1S72_1730_G_0_0_00095.pdb        GCAA
snippet/1S72_1743_G_0_0_00096.pdb        GGGA
snippet/1S72_1744_G_0_0_00097.pdb        GGAA
snippet/1S72_1752_G_0_0_00098.pdb        GCAA
snippet/1S72_1773_G_0_0_00099.pdb        GGAA
snippet/1S72_1780_G_0_0_00100.pdb        GGGA
snippet/1S72_1794_G_0_0_00101.pdb        GGAA
snippet/1S72_180_G_0_0_00008.pdb         GGGA
snippet/1S72_1812_G_0_0_00102.pdb        GUGA
snippet/1S72_1819_G_0_0_00103.pdb        GGAA
snippet/1S72_1837_G_0_0_00104.pdb        GUAA
snippet/1S72_1849_G_0_0_00105.pdb        GUGA
snippet/1S72_184_G_0_0_00009.pdb         GGAA
snippet/1S72_1855_G_0_0_00106.pdb        GCAA
snippet/1S72_1863_G_0_0_00107.pdb        GCAA
snippet/1S72_190_G_0_0_00010.pdb         GAAA
snippet/1S72_196_G_0_0_00011.pdb         GCAA
snippet/1S72_201_G_0_0_00012.pdb         GUGA
snippet/1S72_2051_G_0_0_00108.pdb        GUGA
snippet/1S72_2080_G_0_0_00109.pdb        GAGA
snippet/1S72_2092_G_0_0_00110.pdb        GGGA
snippet/1S72_2093_G_0_0_00111.pdb        GGAA
snippet/1S72_2097_G_0_0_00112.pdb        GCGA
snippet/1S72_213_G_0_0_00013.pdb         GUAA
snippet/1S72_219_G_0_0_00014.pdb         GCGA
snippet/1S72_223_G_0_0_00015.pdb         GUGA
snippet/1S72_2249_G_0_0_00113.pdb        GGGA
snippet/1S72_2299_G_0_0_00114.pdb        GAAA
snippet/1S72_229_G_0_0_00016.pdb         GCGA
snippet/1S72_2337_G_0_0_00115.pdb        GGGA
snippet/1S72_2350_G_0_0_00116.pdb        GCGA
snippet/1S72_2359_G_0_0_00117.pdb        GCAA
snippet/1S72_2365_G_0_0_00118.pdb        GCAA
snippet/1S72_2399_G_0_0_00119.pdb        GGAA
snippet/1S72_2410_G_0_0_00120.pdb        GCGA
snippet/1S72_2412_G_0_0_00121.pdb        GAAA
snippet/1S72_2426_G_0_0_00122.pdb        GCGA
snippet/1S72_2453_G_0_0_00123.pdb        GCAA
snippet/1S72_2466_G_0_0_00124.pdb        GAAA
snippet/1S72_2480_G_0_0_00125.pdb        GGGA
snippet/1S72_2501_G_0_0_00126.pdb        GCAA
snippet/1S72_2574_G_0_0_00127.pdb        GCAA
snippet/1S72_2580_G_0_0_00128.pdb        GUGA
snippet/1S72_259_G_0_0_00017.pdb         GCAA
snippet/1S72_2609_G_0_0_00129.pdb        GUGA
snippet/1S72_2630_G_0_0_00130.pdb        GUGA
snippet/1S72_2632_G_0_0_00131.pdb        GAGA
snippet/1S72_2696_G_0_0_00132.pdb        GAGA
snippet/1S72_2700_G_0_0_00133.pdb        GGAA
snippet/1S72_2738_G_0_0_00134.pdb        GAGA
snippet/1S72_2740_G_0_0_00135.pdb        GAGA
snippet/1S72_2773_G_0_0_00136.pdb        GUAA
snippet/1S72_2798_G_0_0_00137.pdb        GAAA
snippet/1S72_2809_G_0_0_00138.pdb        GGAA
snippet/1S72_2810_G_0_0_00139.pdb        GAAA
snippet/1S72_2815_G_0_0_00140.pdb        GAGA
snippet/1S72_2877_G_0_0_00141.pdb        GUAA
snippet/1S72_2882_G_0_0_00142.pdb        GAGA
snippet/1S72_314_G_0_0_00018.pdb         GGAA
snippet/1S72_324_G_0_0_00019.pdb         GUGA
snippet/1S72_334_G_0_0_00020.pdb         GUGA
snippet/1S72_351_G_0_0_00021.pdb         GAGA
snippet/1S72_404_G_0_0_00022.pdb         GCGA
snippet/1S72_426_G_0_0_00023.pdb         GCGA
snippet/1S72_446_G_0_0_00024.pdb         GAGA
snippet/1S72_456_G_0_0_00025.pdb         GUGA
snippet/1S72_469_G_0_0_00026.pdb         GUGA
snippet/1S72_482_G_0_0_00027.pdb         GCAA
snippet/1S72_499_G_0_0_00028.pdb         GGGA
snippet/1S72_49_G_9_0_00143.pdb          GGAA
snippet/1S72_504_G_0_0_00029.pdb         GCGA
snippet/1S72_506_G_0_0_00030.pdb         GAAA
snippet/1S72_518_G_0_0_00031.pdb         GAAA
snippet/1S72_529_G_0_0_00032.pdb         GCGA
snippet/1S72_537_G_0_0_00033.pdb         GCGA
snippet/1S72_577_G_0_0_00034.pdb         GCGA
snippet/1S72_588_G_0_0_00035.pdb         GUAA
snippet/1S72_599_G_0_0_00036.pdb         GGGA
snippet/1S72_600_G_0_0_00037.pdb         GGAA
snippet/1S72_627_G_0_0_00038.pdb         GAAA
snippet/1S72_640_G_0_0_00039.pdb         GGGA
snippet/1S72_64_G_0_0_00000.pdb          GCGA
snippet/1S72_657_G_0_0_00040.pdb         GCAA
snippet/1S72_679_G_0_0_00041.pdb         GGGA
snippet/1S72_689_G_0_0_00042.pdb         GGGA
snippet/1S72_690_G_0_0_00043.pdb         GGAA
snippet/1S72_691_G_0_0_00044.pdb         GAAA
snippet/1S72_743_G_0_0_00045.pdb         GGGA
snippet/1S72_74_G_9_0_00144.pdb          GGGA
snippet/1S72_77_G_0_0_00001.pdb          GGGA
snippet/1S72_805_G_0_0_00046.pdb         GAAA
snippet/1S72_816_G_0_0_00047.pdb         GGAA
snippet/1S72_854_G_0_0_00048.pdb         GUGA
snippet/1S72_871_G_0_0_00049.pdb         GUGA
snippet/1S72_873_G_0_0_00050.pdb         GAAA
snippet/1S72_892_G_0_0_00051.pdb         GCAA
snippet/1S72_90_G_9_0_00145.pdb          GCGA
snippet/1S72_911_G_0_0_00052.pdb         GAAA
snippet/1S72_92_G_0_0_00002.pdb          GCGA
snippet/1S72_948_G_0_0_00053.pdb         GUGA
snippet/1S72_958_G_0_0_00054.pdb         GCGA
Total number of fragments with sequence GNRA: 149

We show in the example_cluster how to visualise and analyse these data