Importing and loading

Python imports


In [1]:
import SOM2
import SOMTools
import SOMclust
import scipy
import IO
pylab.rcParams['figure.figsize'] = 10, 10  # that's default image size for this interactive session

Importing SOM data


In [11]:
smap = load('smap.npy')

In [12]:
inputmat = load('projections.npy')
som = SOM2.SOM(inputmat)
som.smap = smap

Importing trajectory (only C-alpha trace)


In [13]:
traj = IO.Trajectory('newtraj78_210521_ca.dcd', struct='2pgb_hydro_ca.pdb')

Computing the Best Matching Units: which structures are in each cells of the map


In [14]:
bmus = som.get_allbmus()

In [15]:
bmus.shape


Out[15]:
(9249, 2)

Computing the density


In [16]:
X,Y,Z = smap.shape
density = zeros((X,Y), dtype=int)
for i,j in bmus:
    density[i,j]+=1
matshow(density)
colorbar()


Out[16]:
<matplotlib.colorbar.Colorbar instance at 0x96c2290>

Computing U-matrix


In [17]:
umat = SOMTools.getUmatrix(smap)

In [18]:
matshow(umat)
colorbar()


Out[18]:
<matplotlib.colorbar.Colorbar instance at 0xa2f5b90>

Computing clusters from the U-matrix

Flooding of the U-matrix


In [52]:
clust = SOMclust.clusters(umat, bmus, smap)


0.96/100: flooding: 24/2499, 116.29, (113, 98)
1.92/100: flooding: 48/2499, 124.05, (110, 102)
2.88/100: flooding: 72/2499, 129.97, (116, 101)
3.84/100: flooding: 96/2499, 134.36, (117, 97)
4.80/100: flooding: 120/2499, 128.21, (116, 86)
5.76/100: flooding: 144/2499, 137.61, (114, 91)
6.72/100: flooding: 168/2499, 139.58, (117, 92)
7.68/100: flooding: 192/2499, 142.94, (115, 91)
8.64/100: flooding: 216/2499, 146.26, (114, 103)
9.60/100: flooding: 240/2499, 139.33, (117, 113)
10.56/100: flooding: 264/2499, 149.04, (107, 100)
11.52/100: flooding: 288/2499, 151.88, (113, 104)
12.48/100: flooding: 312/2499, 154.98, (105, 103)
13.45/100: flooding: 336/2499, 158.73, (118, 83)
14.41/100: flooding: 360/2499, 161.88, (105, 101)
15.37/100: flooding: 384/2499, 165.25, (121, 100)
16.33/100: flooding: 408/2499, 169.98, (100, 101)
17.29/100: flooding: 432/2499, 173.71, (113, 109)
18.25/100: flooding: 456/2499, 178.44, (107, 91)
19.21/100: flooding: 480/2499, 172.60, (114, 116)
20.17/100: flooding: 504/2499, 185.66, (110, 111)
21.13/100: flooding: 528/2499, 190.50, (109, 110)
22.09/100: flooding: 552/2499, 194.82, (105, 93)
23.05/100: flooding: 576/2499, 204.19, (116, 117)
24.01/100: flooding: 600/2499, 210.11, (122, 89)
24.97/100: flooding: 624/2499, 173.63, (114, 126)
25.93/100: flooding: 648/2499, 216.68, (122, 88)
26.89/100: flooding: 672/2499, 222.28, (121, 81)
27.85/100: flooding: 696/2499, 229.23, (118, 81)
28.81/100: flooding: 720/2499, 240.14, (123, 108)
29.77/100: flooding: 744/2499, 245.35, (117, 122)
30.73/100: flooding: 768/2499, 252.01, (124, 90)
31.69/100: flooding: 792/2499, 263.87, (112, 120)
32.65/100: flooding: 816/2499, 273.58, (118, 124)
33.61/100: flooding: 840/2499, 282.62, (128, 98)
34.57/100: flooding: 864/2499, 291.66, (124, 114)
35.53/100: flooding: 888/2499, 302.47, (102, 95)
36.49/100: flooding: 912/2499, 311.08, (125, 86)
37.45/100: flooding: 936/2499, 252.10, (88, 98)
38.42/100: flooding: 960/2499, 287.27, (89, 98)
39.38/100: flooding: 984/2499, 313.04, (98, 99)
40.34/100: flooding: 1008/2499, 323.90, (90, 99)
41.30/100: flooding: 1032/2499, 330.96, (125, 85)
42.26/100: flooding: 1056/2499, 337.82, (129, 105)
43.22/100: flooding: 1080/2499, 349.18, (85, 104)
44.18/100: flooding: 1104/2499, 359.31, (123, 126)
45.14/100: flooding: 1128/2499, 368.49, (131, 110)
46.10/100: flooding: 1152/2499, 377.94, (120, 121)
47.06/100: flooding: 1176/2499, 388.64, (124, 76)
48.02/100: flooding: 1200/2499, 397.82, (129, 91)
48.98/100: flooding: 1224/2499, 412.56, (133, 103)
49.94/100: flooding: 1248/2499, 425.51, (121, 121)
50.90/100: flooding: 1272/2499, 442.17, (133, 105)
51.86/100: flooding: 1296/2499, 455.20, (92, 100)
52.82/100: flooding: 1320/2499, 475.61, (89, 109)
53.78/100: flooding: 1344/2499, 494.71, (124, 119)
54.74/100: flooding: 1368/2499, 518.05, (128, 79)
55.70/100: flooding: 1392/2499, 546.59, (129, 115)
56.66/100: flooding: 1416/2499, 566.68, (131, 89)
57.62/100: flooding: 1440/2499, 602.25, (126, 73)
58.58/100: flooding: 1464/2499, 645.25, (129, 76)
59.54/100: flooding: 1488/2499, 689.67, (131, 79)
60.50/100: flooding: 1512/2499, 752.44, (129, 117)
61.46/100: flooding: 1536/2499, 828.33, (134, 91)
62.42/100: flooding: 1560/2499, 868.37, (105, 90)
63.39/100: flooding: 1584/2499, 919.92, (134, 120)
64.35/100: flooding: 1608/2499, 995.03, (136, 119)
65.31/100: flooding: 1632/2499, 731.25, (140, 87)
66.27/100: flooding: 1656/2499, 1029.42, (136, 118)
67.23/100: flooding: 1680/2499, 1078.60, (107, 113)
68.19/100: flooding: 1704/2499, 1162.14, (106, 89)
69.15/100: flooding: 1728/2499, 478.78, (144, 90)
70.11/100: flooding: 1752/2499, 1254.38, (135, 79)
71.07/100: flooding: 1776/2499, 1373.98, (131, 76)
72.03/100: flooding: 1800/2499, 1466.43, (134, 88)
72.99/100: flooding: 1824/2499, 1588.46, (101, 92)
73.95/100: flooding: 1848/2499, 1720.51, (135, 88)
74.91/100: flooding: 1872/2499, 1816.56, (147, 87)
75.87/100: flooding: 1896/2499, 2004.45, (145, 84)
76.83/100: flooding: 1920/2499, 2125.81, (98, 109)
77.79/100: flooding: 1944/2499, 2224.33, (139, 114)
78.75/100: flooding: 1968/2499, 2344.46, (101, 110)
79.71/100: flooding: 1992/2499, 340.81, (101, 116)
80.67/100: flooding: 2016/2499, 410.00, (99, 121)
81.63/100: flooding: 2040/2499, 493.18, (103, 116)
82.59/100: flooding: 2064/2499, 622.40, (96, 117)
83.55/100: flooding: 2088/2499, 822.66, (95, 118)
84.51/100: flooding: 2112/2499, 1032.77, (106, 120)
85.47/100: flooding: 2136/2499, 1374.94, (99, 112)
86.43/100: flooding: 2160/2499, 1786.49, (94, 112)
87.39/100: flooding: 2184/2499, 2205.17, (91, 115)
88.36/100: flooding: 2208/2499, 2395.97, (96, 95)
89.32/100: flooding: 2232/2499, 2478.52, (144, 81)
90.28/100: flooding: 2256/2499, 2562.94, (94, 95)
91.24/100: flooding: 2280/2499, 2615.99, (90, 116)
92.20/100: flooding: 2304/2499, 2689.31, (109, 83)
93.16/100: flooding: 2328/2499, 2796.57, (108, 121)
94.12/100: flooding: 2352/2499, 2978.45, (150, 80)
95.08/100: flooding: 2376/2499, 3290.29, (104, 88)
96.04/100: flooding: 2400/2499, 3157.62, (103, 130)
97.00/100: flooding: 2424/2499, 3620.29, (150, 88)
97.96/100: flooding: 2448/2499, 3850.62, (101, 135)
98.92/100: flooding: 2472/2499, 4077.51, (101, 136)
99.88/100: flooding: 2496/2499, 4505.36, (139, 76)

In [20]:
imshow(ma.masked_array(clust.umat_cont, clust.mask), interpolation='nearest')
colorbar()


Out[20]:
<matplotlib.colorbar.Colorbar instance at 0xaf56b00>

Computing clusters


In [53]:
clust.getclusters()


Out[53]:
(array([[19, 19, 19, ..., 20, 20, 20],
       [19, 19, 19, ..., 20, 20, 20],
       [19, 19, 19, ..., 20, 20, 20],
       ..., 
       [ 2,  2,  2, ...,  5,  5, 23],
       [ 2,  2,  2, ...,  5,  5,  5],
       [ 2,  2,  2, ...,  5,  5,  5]]),
 array([[19, 19,  0, ..., 20,  0,  0],
       [19, 19, 19, ..., 20, 20, 20],
       [19, 19, 19, ..., 20, 20, 20],
       ..., 
       [ 2,  2,  2, ...,  5,  0,  0],
       [ 2,  2,  2, ...,  5,  5,  0],
       [ 2,  2,  2, ...,  5,  5,  0]]))

In [22]:
clust.plotclusters()


Which structures are in each clusters?


In [23]:
nframes = clust.labels.size
clusters = []
for i in range(nframes):
    clusters.append([i, clust.labels[i]])
clusters = asarray(clusters)
clusters = clusters[clusters[:,1].argsort()]
savetxt('clusters', clusters, fmt='%d')

Projecting data onto the SOM

The RMSD

Structural alignment


In [24]:
traj.struct.atoms['coord'] = traj.struct.atoms['coord'] - traj.struct.atoms['coord'].mean(axis=0)

In [25]:
traj.align(templatestruct=(traj.struct.atoms['coord']).flatten())

In [26]:
i,j = traj.array.shape
print (i,j)


(9249, 168)

In [27]:
traj.array = traj.array.reshape((i, j/3, 3))
traj.array.shape


Out[27]:
(9249, 56, 3)

Computing the RMSD


In [28]:
rmsds = sqrt((( traj.array - traj.struct.atoms['coord']*numpy.ones_like(traj.array) )**2).sum(axis=2)).mean(axis=1)

In [29]:
histo = hist(rmsds, 100, range=[0,5])


Projecting the RMSD onto the SOM


In [30]:
def project(data):
    pmap = zeros((X,Y))
    for c,e in enumerate(bmus):
        i,j = e
        pmap[i,j]+=data[c]
    pmap = pmap / density
    pmap = clust.flood(pmap, clust.x_offset, clust.y_offset, clust.mask)[0]
    return pmap
rmsdmap = project(rmsds)


-c:6: RuntimeWarning: invalid value encountered in divide

In [31]:
clust.plotclusters(color='k', matrix=rmsdmap, cmap=cm.PRGn, vmax=2.)



In [32]:
mu_rmsd = []
std_rmsd = []
clustid = unique(clust.labels)
for e in clustid:
    clustrmsd = rmsds[nonzero(clust.labels == e)[0]]
    mu = clustrmsd.mean()
    mu_rmsd.append(mu)
    std = clustrmsd.std()
    std_rmsd.append(std)
    text(e, mu+std, clustrmsd.size, horizontalalignment='center')
bar(clustid, mu_rmsd, yerr=std_rmsd, align='center')
ylabel("rmsd in A")
xlabel("cluster id")
grid()


Saving clusters


In [55]:
for e in unique(clust.labels):
    clust.getTrajClust(traj, e)

In [ ]: