In [46]:
import matplotlib.pyplot as plt
from nltk.corpus import words
from collections import defaultdict,Counter,OrderedDict
from itertools import combinations
import networkx as nx
word_list = words.words()
words4=[w for w in word_list if len(w)==4]
In [84]:
class Graph:
def __init__(self):
self.nodes={}
def add_node(self, node):
if node is None: return
if node not in self.nodes:
self.nodes[node]={}
def add_edge(self,node1, node2, weight=1):
if node1 is None or node2 is None: return
self.add_node(node1)
self.add_node(node2)
if node2 not in self.nodes[node1]:
self.nodes[node1][node2]=weight
if node1 not in self.nodes[node2]:
self.nodes[node2][node1]=weight
def get_neighbors(self,node):
return self.nodes[node]
def get_nodes(self):
return self.nodes.keys()
def build_word_buckets(words):
"""
For every word in the words
adds it into 4 different word buckets,
lebelled with labels with one letter missing.
For example "pool" goes into "_ool","p_ol","po_l" and "poo_" buckets.
Doing so we gather together all the words
that require just one modification to convert them to
each other. Words in 4 buckets will create edges of a graph
for word transformation.
Parameters
----------
- words: list of words of 4 letters each.
Returns
-------
Dictionary of 4 buckets for each word in the input list of words.
"""
buckets=defaultdict(list)
s=set(words)
for word in s:
for i in range(4):
w= word#.lower()
label=w[:i]+"_"+w[i+1:]
buckets[label].append(w)
return buckets
def build_graph(words):
"""
Returns a nx graph created from list of words.
Parameters
----------
- words: list of words of 4 letters each.
"""
bs=build_word_buckets(words)
g=nx.Graph()
# for label, w_list in bs.items():
for w_list in bs.values():
g.add_nodes_from(w_list)
g.add_edges_from(combinations(w_list,2))
return g
g=build_graph(words4)
print(nx.info(g))
nl=list(g.nodes)[:2]
# nx.draw_networkx_nodes(g, pos=nx.spring_layout(g),nodelist=nl)
# plt.draw()
print(list(g.neighbors('kobu')))
print(list(g['kobu']))
def bfs(graph, start_key, target_key):
visited=set()
#to_visit=[start_key]
to_visit=OrderedDict.fromkeys([start_key])
# to_visit[start_key]=1
while len(to_visit)>0:
k=to_visit.popitem(last=False)[0]
#k=to_visit.pop(0)
visited.add(k)
# prosess the key
if k == target_key:
return k
children=graph[k]
for c in children:
if c not in visited and c not in to_visit:
graph.nodes[c]['parent']=k
#to_visit.append(c)
to_visit[c]=1
def get_path(G, key):
if key is None: return
path=[key]
k=key
parent=G.nodes[k].get('parent',None)
while parent != None:
path.append(parent)
k=parent
parent=G.nodes[k].get('parent',None)
return path
In [110]:
# remove 'parent' attr
for k in g.nodes: g.nodes[k].pop('parent',None)
# check there is no attrs
nodes=[(n,d) for n,d in g.nodes(data=True) if len(d)>0]
print(nodes)
print('==========')
kk=bfs(g,'fool','tool')
# if kk: print(g.nodes[kk])
print(get_path(g,kk))
nodes=[(n) for n,d in g.nodes(data=True) if len(d)>0]
print(len(nodes))
print(nodes)
In [ ]: