In [117]:
import probabilisticgraph as pg
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import os
%matplotlib notebook

In [71]:
def graph_to_csv(graph):
    names = []
    entries = []
    columns = []
    for a in graph.alphabet:
        columns.extend([a, 'P(' + a + ')'])
    for s in graph.states:
        names.append(s.name)
        entry = []
        for letter in graph.alphabet:
            ns = s.next_state_from_edge(letter)
            n = ns.name if ns else 'None'
            entry.extend([n, s.prob_to_next_letter(letter)])
        entries.append(entry)
    df = pd.DataFrame(entries, index=names, columns=columns)
    return df

In [128]:
def partition(graph_table, labels, alphabet):
    idxs = [x for x in df.columns if 'P' not in x]
    det_graph = df.loc[:,idxs]
    det_graph['partitions'] = labels
    for a in alphabet:
        det_graph['to' + a] = [det_graph.loc[x,'partitions'] if x != 'None'
                               else 'None'
                               for x in det_graph[a]]
    return det_graph

In [185]:
def moore_iteration(p, alphabet):
    r = p['partitions'].unique()
    q = p.drop(labels=alphabet, axis=1)
    count = max(r) + 1
    to = ['to' + x for x in alphabet]
    for i in r:
        u = q[q['partitions'] == i]
        z = u.loc[(u[to] == u.ix[0,to]).apply(all, axis=1)==False,:]
        while not z.empty:
            p.loc[z.index, 'partition'] = count
            count += 1
            z = u.loc[(u[to] == u.ix[0,to]).apply(all, axis=1)==False,:]
    return p

In [188]:
def moore(p, alphabet):
    done = False
    while not done:
        old_p = p['partitions']
        p = moore_iteration(p, alphabet)
        if old_p == p['partitions']:
            done = True
    return p

In [122]:
graph_name = 'graphs/ternary10db/dmarkov_d8'
if graph_name + '.csv' in os.listdir('.'):
    df = pd.read_csv(graph_name + '.csv', index=[0])
else:
    g = pg.ProbabilisticGraph(path = graph_name + '.yaml')
    df = graph_to_csv(g)
    df.to_csv(graph_name + '.csv')
df.head(5)


Out[122]:
0 P(0) 1 P(1) 2 P(2)
12111112 21111120 0.346535 21111121 0.584158 21111122 0.069307
12111110 21111100 0.472813 21111101 0.446809 21111102 0.080378
12111111 21111110 0.481586 21111111 0.396601 21111112 0.121813
22210212 22102120 0.500000 22102121 0.500000 22102122 0.000000
22210210 22102100 0.454545 22102101 0.272727 22102102 0.272727

In [124]:
idxs = [x for x in df.columns if 'P' in x]
morphs = df.loc[:,idxs]
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
plt.scatter(morphs.iloc[:,0], morphs.iloc[:,1], morphs.iloc[:,2])
plt.show()



In [125]:
from sklearn.decomposition import PCA
pca = PCA(n_components = len(g.alphabet) - 1)
pca.fit(morphs)
T = pca.transform(morphs)
fig2 = plt.figure()
ax = fig2.add_subplot(111)
plt.scatter(T[:,0], T[:,1])
plt.show()



In [126]:
from sklearn.cluster import KMeans
k_means = KMeans(n_clusters=3)
k_means.fit(T)
labels = k_means.predict(T)
centroids = k_means.cluster_centers_

In [274]:
p = partition(df, labels, g.alphabet)
h = moore(p, g.alphabet)


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-274-e80af58c2312> in <module>()
      1 p = partition(df, labels, g.alphabet)
----> 2 h = moore(p, g.alphabet)

<ipython-input-188-1ff6f3a8e1cd> in moore(p, alphabet)
      3     while not done:
      4         old_p = p['partitions']
----> 5         p = moore_iteration(p, alphabet)
      6         if old_p == p['partitions']:
      7             done = True

<ipython-input-185-0ed4e29614fd> in moore_iteration(p, alphabet)
      2     r = p['partitions'].unique
      3     q = p.drop(labels=alphabet, axis=1)
----> 4     count = max(r) + 1
      5     to = ['to' + x for x in g.alphabet]
      6     for i in r:

TypeError: 'instancemethod' object is not iterable

In [227]:


In [232]:



(3580, 4)
(2511, 4)

In [273]:



Out[273]:
True

In [ ]:


In [ ]: