Script that analyzes the results from multifarious assembly

Load data

We start by loading the stored files. For this, we load the evaluation library


In [190]:
# General libraries
import os
import pickle
import numpy as np 

# Plot, in nb, only when .show() is called
import matplotlib.pyplot as plt
%matplotlib notebook
plt.ioff()

# Personal libraries
import tools.evaluation as ev
import tools.plot as pt

Before evaluating the simulaion results, we scan the folders and create a directory list. Note that each type of simulations has a five letters heading, MfErr.


In [ ]:
path = '/data/sartori/Data/multifarious'
dir_list = [ path + '/' + x for x in os.listdir(path) if x[0:15]=='Small_OscEa5_Eb']
dir_list.sort()

We can now read the files and store the errors over time. We store them in a dictionary which assigns to the tuple (m,M) a list with all the errors over time


In [14]:
errors_data = {}
sequence_data = {}
duration_data = {}
transition_data = {}

for d in dir_list:
    #if os.path.isfile(d + '/errors.npy'): # errors.npy
    try:
        print d
        # Load errors-data
        #errors = ev.errors_from_files(d)
        errors = np.load(d + '/errors.npy')
        # Create data-key
        key = (float(d.split('_')[2][2:]), float(d.split('_')[3][3:]))
        # Post-process data
        sequence = ev.sequence_from_files(path=d, to_file=True)
        duration = ev.durations_from_files(path=d, to_file=True)
        transition = ev.transition_matrix_from_files(path=d,m=3,to_file=False)  
        # Append
        errors_data.setdefault(key,[]).append(errors)
        sequence_data.setdefault(key,[]).append(sequence)
        duration_data.setdefault(key,[]).append(duration)
        transition_data.setdefault(key,[]).append(transition)
    except IOError:
        print 'error'
        
np.save('./tmp/Small_OscEa5_EbMu_errors.npy', errors_data)
np.save('./tmp/Small_OscEa5_EbMu_sequence.npy', sequence_data)
np.save('./tmp/Small_OscEa5_EbMu_duration.npy', duration_data)
np.save('./tmp/Small_OscEa5_EbMu_transition.npy', transition_data)

Alternatively, we can just load the file that we may have saved before


In [420]:
#with open(path +'/data.npy', 'wb') as f: np.save(f, data)
data_e = np.load('/Users/admin/Repos/dynamic_multifarious/tmp/Small_Ea5_EbMu_errors.npy')[()]
data_s = np.load('/Users/admin/Repos/dynamic_multifarious/tmp/Small_Ea5_EbMu_sequence.npy')[()]
#data_d = np.load('/Users/admin/Repos/dynamic_multifarious/tmp/Small_Ea5_EbMu_duration.npy')[()]
data_t = np.load('/Users/admin/Repos/dynamic_multifarious/tmp/Small_Ea5_EbMu_transition.npy')[()]

We can plot the error over time for one example, together with its filtered counter-part


In [425]:
# Choose data-set
key = (5., -9.)
n = 2
test_errors = data_e[key][n]
print(data_s[key][n])
print(data_t[key][n][1])
#print(data_d[key][n])

# Filter errors
k = .1
time = np.arange(np.size(test_errors[:,0]))
kernel = np.exp(-k*time)
convolved = np.array([np.convolve(x, kernel) for x in test_errors.transpose()])
convolved = convolved.transpose()[:np.size(test_errors[:,0]),:]
norms = test_errors.max(0)/convolved.max(0)

# Generate plots
plt.gca().set_color_cycle(['r', 'g', 'b'])
#plt.plot( (convolved*norms), linewidth=2)
plt.plot(test_errors, lw=2)
#plt.xlim(0, 3000)
plt.savefig('./tmp/trace.pdf')
plt.show()


[0 1 0 2 0 3 0 1 0 2 0 2 0 2 0 3 0 3 0 1 0 2 0 1 0 1 0 1 0 2 0 3]
[[ 2.  4.  0.]
 [ 1.  2.  3.]
 [ 2.  0.  1.]]

Analyze the retrievability of the dynamics

Create transition histograms


In [342]:
from numpy import linalg as LA

histograms = {}

for key in data_t.iterkeys():
    count = np.zeros((3,3))
    for NvecPmat in data_t[key]:
        if NvecPmat[0].sum()>1:
            count += NvecPmat[1]
    histograms[key] = count

We can now create a metric with the distance between the obtained histogram and the goal histogram


In [345]:
# The goal is the normalized allosteric metric
A = np.array([[0,1,0],[0,0,1],[1,0,0]]) / 3.

# We run over all histograms
xyz = []
for key in histograms.keys():
    if np.sum(histograms[key])==0:
        distance = 1.
    else:
        distance = LA.norm(histograms[key]/np.sum(histograms[key]) - A)#/np.sum(histograms[key])
    xyz.append( (key[0], key[1], np.abs(1-distance)) )
        
XYZ = np.asarray(xyz)

We can now create a metric with the distance between the obtained histogram and the goal histogram


In [410]:
import matplotlib.cm as cm
Eb, mu1, score = XYZ[:,0], -XYZ[:,1], XYZ[:,2]
energies = np.arange(0, 30, .1)

fig = plt.figure(1)
ax = fig.add_subplot(111)
heatmap = plt.tricontourf(mu1, Eb, score, 10, norm=plt.Normalize(vmax=1., vmin=0.), cmap=cm.inferno,alpha=.5)
ax.scatter(mu1, Eb, c=score,cmap=cm.inferno, s=30, lw = .5)
bound1 = ax.plot(energies, energies/2, 'k--',lw=2)
bound2 = ax.plot(energies, energies*0+5., 'k--',lw=2)

plt.colorbar(heatmap,alpha=1.)
plt.xlim(0, 20)
plt.ylim(0, 11)
plt.xlabel('Chemical potential, $\mu$',fontsize=16)
plt.ylabel('Binding energy, $E_B$',fontsize=16)
plt.savefig('./tmp/space_EB_MU_EA5.pdf')
plt.show()


Analyze the duration distribution // ADD ATP!!!


In [295]:
from numpy import linalg as LA

durations = {}

for key in data_d.iterkeys():
    for wow in data_d[key]:
        count = np.append(count,wow[0])
        #print count
    durations[key] = count

In [304]:
#[(7.0, -11.9), (7.0, -12.25), (7.0, -11.2), (7.0, -11.55)]
plt.hist(durations[(7,-12.25)], 30, normed=1, facecolor='orange')
plt.savefig('./tmp/Osc_peak.pdf')
plt.show()


BELOW BE DRAGONS!!!

Analysis of error dynamics


In [271]:
[x+1 for x in [10., 15., 20., 25., 30., 35., 40.]]


Out[271]:
[11.0, 16.0, 21.0, 26.0, 31.0, 36.0, 41.0]

In [13]:
reload(ev)
xyz = []
for key in data.iterkeys():
    my_error = 0.
    runs = len(data[key])
    for err_time in data[key]: my_error += err_time[-1,0] / runs
    xyz.append( (key[0], key[1], my_error) )

XYZ = np.asarray(xyz)


----------------------------------------------------------------
TypeError                      Traceback (most recent call last)
<ipython-input-13-6fd40b3ffc04> in <module>()
      4     my_error = 0.
      5     runs = len(data[key])
----> 6     for err_time in data[key]: my_error += err_time[-1,0] / runs
      7     xyz.append( (key[0], key[1], my_error) )
      8 

TypeError: tuple indices must be integers, not tuple

In [6]:
structures = np.split(XYZ[np.argsort(XYZ[:,1]),0], np.where(np.diff(XYZ[np.argsort(XYZ[:,1]),1]) != 0)[0]+1)
species = np.split(XYZ[np.argsort(XYZ[:,1]),1], np.where(np.diff(XYZ[np.argsort(XYZ[:,1]),1]) != 0)[0]+1)
errors = np.split(XYZ[np.argsort(XYZ[:,1]),2], np.where(np.diff(XYZ[np.argsort(XYZ[:,1]),1]) != 0)[0]+1)

We now generate a plot


In [88]:
fig = plt.figure()
ax1 = fig.add_subplot(111)

for sp, st, er in zip(species, structures, errors)[4::2]:
    ax1.scatter(st , er, s=35, c=str(min(1.,40./sp[0])), marker="o", label='$M=$'+str(int(sp[0])))

plt.xlabel('stored structures, $m$',fontdict={'fontsize':20})
plt.ylabel('retrieval error',fontdict={'fontsize':20})
plt.axis([0, 30, 0, 1])

plt.legend(loc='lower right');
plt.show()



In [17]:
species_single = [min(s) for s in species]
error_min = [min(e) for e in errors]

In [19]:
fig = plt.figure()
ax1 = fig.add_subplot(111)

ax1.scatter(species_single , error_min, s=35, c='k', marker="o")

plt.xlabel('species, $M$',fontdict={'fontsize':20})
plt.ylabel('minimal retrieval error ($m=1$)',fontdict={'fontsize':20})
plt.axis([0, 300, 0, 1])

plt.show()



In [89]:
x = [min(s) for s in species]
m_half = [st[np.argmin(np.abs(e-.5))] for e, st in zip(errors, structures)]
e_half = [e[np.argmin(np.abs(e-.5))] for e, st in zip(errors, structures)]

In [92]:
fig = plt.figure()
ax1 = fig.add_subplot(111)

ax1.scatter(x, m_half, s=35, c='k', marker="o")

plt.xlabel('species, $M$',fontdict={'fontsize':20})
plt.ylabel('retrievable structures, $m_{1/2}$',fontdict={'fontsize':20})
plt.axis([0, 305, 0, 30])

plt.show()