In [1]:
%matplotlib inline
import re
import nltk
from textblob import TextBlob, Word
from nltk.corpus import wordnet as wn
import networkx as nx
import numpy as np
nltk.data.path.append('nltk_data')
In [2]:
listings = np.array([listing for listing in Listing.objects.all()])
raw_text = np.array([listing.message for listing in listings])
text = np.array([re.sub(r'^https?:\/\/.*[\r\n]*', '', message, flags=re.MULTILINE) for message in raw_text])
In [3]:
blob = TextBlob(text[1])
print "blob:", blob
print "tags:", blob.tags
print "noun phrases:", blob.noun_phrases
In [4]:
# http://www.randomhacks.net/2009/12/29/visualizing-wordnet-relationships-as-graphs/
def closure_graph(synset, fn):
seen = set()
graph = nx.DiGraph()
def recurse(s):
if not s in seen:
seen.add(s)
graph.add_node(s.name())
for s1 in fn(s):
graph.add_node(s1.name())
graph.add_edge(s.name(), s1.name())
recurse(s1)
recurse(synset)
return graph
In [5]:
graph = closure_graph(wn.synset('electronics.n.01'), lambda s: s.hypernyms())
nx.draw_graphviz(graph, with_labels=True)
In [6]:
graph = closure_graph(wn.synset('technology.n.01'), lambda s: s.hypernyms())
nx.draw_graphviz(graph, with_labels=True)
In [7]:
graph = closure_graph(wn.synset('tech.n.01'), lambda s: s.hypernyms())
nx.draw_graphviz(graph, with_labels=True)
In [8]:
graph = closure_graph(wn.synset('computer.n.01'), lambda s: s.hypernyms())
nx.draw_graphviz(graph, with_labels=True)
In [9]:
graph = closure_graph(wn.synset('phone.n.01'), lambda s: s.hypernyms())
nx.draw_graphviz(graph, with_labels=True)
In [9]: