Testing clustering based on the MFPT


In [1]:
import sys
sys.path.append("../")
sys.path.append("../nmpath/")
from test.tools_for_notebook import *
%matplotlib inline
from nmpath.auxfunctions import *
from nmpath.mfpt import *
from nmpath.mappers import rectilinear_mapper
from nmpath.clustering import *
#from nmpath.mappers import voronoi_mapper

Toy model with two basins


In [2]:
plot_traj([],[],figsize=(6,5))


Generating MC trajectory

Continuos Ensemble


In [3]:
mc_traj = mc_simulation2D(500000)
my_ensemble = Ensemble([mc_traj])

Discrete Ensemble and Transition Matrix

The mapping funcion divides each dimension in 12. The total number of bins is 144.


In [4]:
discrete_ens = DiscreteEnsemble.from_ensemble(my_ensemble, mapping_function2D)

# Transition Matrix
K = discrete_ens._mle_transition_matrix(N*N,prior_counts=1e-6)

Agglomerative Clustering

The points with the same color belong to the same cluster, only the clusters with size > 1 are shown.


In [16]:
t_min_list=[]
t_max_list=[]
t_AB_list=[]
n_clusters = [135, 130, 125, 120, 115, 110, 105, 100, 95, 90, 85, 80, 75, 70]

for n in n_clusters:
    big_clusters=[]
    big_clusters_index =[]
    clusters, t_min, t_max, clustered_tmatrix = kinetic_clustering_from_tmatrix(K, n, verbose=False)
    t_min_list.append(t_min)
    t_max_list.append(t_max)
    
    for i, cluster in enumerate(clusters):
        if len(cluster) > 1:
            big_clusters.append(cluster)
            big_clusters_index.append(i)
            
    n_big = len(big_clusters)
    
    if n_big > 1:
        tAB = markov_commute_time(clustered_tmatrix,[big_clusters_index[0]],[big_clusters_index[1]] )
    else:
        tAB = 0.0
    t_AB_list.append(tAB)
    
    discrete = [True for i in range(n_big)]
    
    print("{} Clusters, t_cut: {:.2f}tau, t_max: {:.2e}tau, tAB: {:.2f}tau".format(n, t_min, t_max, tAB))
    plot_traj([ [big_clusters[i],[]] for i in range(n_big) ], 
              discrete, std = 0.00002, alpha=0.3, justpoints=True, figsize=(3,3))


135 Clusters, t_cut: 104.27tau, t_max: 6.95e+09tau, tAB: 6299.18tau
130 Clusters, t_cut: 226.32tau, t_max: 6.95e+09tau, tAB: 6262.79tau
125 Clusters, t_cut: 311.51tau, t_max: 6.95e+09tau, tAB: 6216.89tau
120 Clusters, t_cut: 581.75tau, t_max: 6.95e+09tau, tAB: 6173.23tau
115 Clusters, t_cut: 705.49tau, t_max: 6.95e+09tau, tAB: 6092.57tau
110 Clusters, t_cut: 939.86tau, t_max: 6.95e+09tau, tAB: 6014.91tau
105 Clusters, t_cut: 1479.61tau, t_max: 6.95e+09tau, tAB: 5898.42tau
100 Clusters, t_cut: 1718.03tau, t_max: 6.95e+09tau, tAB: 5804.86tau
95 Clusters, t_cut: 2529.20tau, t_max: 6.95e+09tau, tAB: 5689.15tau
90 Clusters, t_cut: 3020.00tau, t_max: 6.95e+09tau, tAB: 5516.75tau
85 Clusters, t_cut: 3320.08tau, t_max: 6.95e+09tau, tAB: 5346.85tau
80 Clusters, t_cut: 3799.59tau, t_max: 6.95e+09tau, tAB: 5208.42tau
75 Clusters, t_cut: 4565.67tau, t_max: 6.95e+09tau, tAB: 4942.05tau
70 Clusters, t_cut: 5294.46tau, t_max: 6.95e+09tau, tAB: 0.00tau

In [60]:
plt.plot(n_clusters, t_min_list, label="t_cut")
plt.plot(n_clusters, t_AB_list, label="t_AB")
plt.xlabel("Number of Clusters")
plt.ylabel("t (tau)")
#plt.text(110, 4000,"Clustering", fontsize=14)
plt.axis([70,135,0,9000])
#plt.arrow(125, 3600, -30, 0,shape='left', lw=2, length_includes_head=True)
plt.title("Commute times vs Number of Clusters")
plt.legend()
plt.show()



In [19]:
m_ratio = [t_AB_list[i]/t_min_list[i] for i in range(len(t_min_list))]

plt.plot(n_clusters, m_ratio, label="t_AB / t_cut", color="red")
plt.xlabel("Number of Clusters")
plt.ylabel("t_AB / t_cut")
plt.axis([70,135,0,65])
plt.legend()
plt.show()



In [20]:
m_ratio2 = [t_max_list[i]/t_min_list[i] for i in range(len(t_min_list))]

plt.plot(n_clusters, m_ratio2, label="t_max / t_cut", color="green")
plt.xlabel("Number of Clusters")
plt.ylabel("t_max / t_cut")
#plt.axis([70,135,0,1000])
plt.legend()
plt.show()



In [ ]: