In [117]:
import probabilisticgraph as pg
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import os
%matplotlib notebook
In [71]:
def graph_to_csv(graph):
names = []
entries = []
columns = []
for a in graph.alphabet:
columns.extend([a, 'P(' + a + ')'])
for s in graph.states:
names.append(s.name)
entry = []
for letter in graph.alphabet:
ns = s.next_state_from_edge(letter)
n = ns.name if ns else 'None'
entry.extend([n, s.prob_to_next_letter(letter)])
entries.append(entry)
df = pd.DataFrame(entries, index=names, columns=columns)
return df
In [128]:
def partition(graph_table, labels, alphabet):
idxs = [x for x in df.columns if 'P' not in x]
det_graph = df.loc[:,idxs]
det_graph['partitions'] = labels
for a in alphabet:
det_graph['to' + a] = [det_graph.loc[x,'partitions'] if x != 'None'
else 'None'
for x in det_graph[a]]
return det_graph
In [185]:
def moore_iteration(p, alphabet):
r = p['partitions'].unique()
q = p.drop(labels=alphabet, axis=1)
count = max(r) + 1
to = ['to' + x for x in alphabet]
for i in r:
u = q[q['partitions'] == i]
z = u.loc[(u[to] == u.ix[0,to]).apply(all, axis=1)==False,:]
while not z.empty:
p.loc[z.index, 'partition'] = count
count += 1
z = u.loc[(u[to] == u.ix[0,to]).apply(all, axis=1)==False,:]
return p
In [188]:
def moore(p, alphabet):
done = False
while not done:
old_p = p['partitions']
p = moore_iteration(p, alphabet)
if old_p == p['partitions']:
done = True
return p
In [122]:
graph_name = 'graphs/ternary10db/dmarkov_d8'
if graph_name + '.csv' in os.listdir('.'):
df = pd.read_csv(graph_name + '.csv', index=[0])
else:
g = pg.ProbabilisticGraph(path = graph_name + '.yaml')
df = graph_to_csv(g)
df.to_csv(graph_name + '.csv')
df.head(5)
Out[122]:
In [124]:
idxs = [x for x in df.columns if 'P' in x]
morphs = df.loc[:,idxs]
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
plt.scatter(morphs.iloc[:,0], morphs.iloc[:,1], morphs.iloc[:,2])
plt.show()
In [125]:
from sklearn.decomposition import PCA
pca = PCA(n_components = len(g.alphabet) - 1)
pca.fit(morphs)
T = pca.transform(morphs)
fig2 = plt.figure()
ax = fig2.add_subplot(111)
plt.scatter(T[:,0], T[:,1])
plt.show()
In [126]:
from sklearn.cluster import KMeans
k_means = KMeans(n_clusters=3)
k_means.fit(T)
labels = k_means.predict(T)
centroids = k_means.cluster_centers_
In [274]:
p = partition(df, labels, g.alphabet)
h = moore(p, g.alphabet)
In [227]:
In [232]:
In [273]:
Out[273]:
In [ ]:
In [ ]: