textblob



In [1]:
%matplotlib inline

import re
import nltk
from textblob import TextBlob, Word
from nltk.corpus import wordnet as wn
import networkx as nx
import numpy as np

nltk.data.path.append('nltk_data')

In [2]:
listings = np.array([listing for listing in Listing.objects.all()])
raw_text = np.array([listing.message for listing in listings])
text = np.array([re.sub(r'^https?:\/\/.*[\r\n]*', '', message, flags=re.MULTILINE) for message in raw_text])

In [3]:
blob = TextBlob(text[1])
print "blob:", blob
print "tags:", blob.tags
print "noun phrases:", blob.noun_phrases


blob: Selling two tickets to Ingrid Michaelson. Please message me :)
tags: [(u'Selling', u'VBG'), (u'two', u'CD'), (u'tickets', u'NNS'), (u'to', u'TO'), (u'Ingrid', u'NNP'), (u'Michaelson', u'NNP'), (u'Please', u'VB'), (u'message', u'NN'), (u'me', u'PRP'), (u':)', u'SYM')]
noun phrases: [u'selling', u'ingrid michaelson', u'please']

In [4]:
# http://www.randomhacks.net/2009/12/29/visualizing-wordnet-relationships-as-graphs/
def closure_graph(synset, fn):
    seen = set()
    graph = nx.DiGraph()

    def recurse(s):
        if not s in seen:
            seen.add(s)
            graph.add_node(s.name())
            for s1 in fn(s):
                graph.add_node(s1.name())
                graph.add_edge(s.name(), s1.name())
                recurse(s1)

    recurse(synset)
    return graph

In [5]:
graph = closure_graph(wn.synset('electronics.n.01'), lambda s: s.hypernyms())
nx.draw_graphviz(graph, with_labels=True)



In [6]:
graph = closure_graph(wn.synset('technology.n.01'), lambda s: s.hypernyms())
nx.draw_graphviz(graph, with_labels=True)



In [7]:
graph = closure_graph(wn.synset('tech.n.01'), lambda s: s.hypernyms())
nx.draw_graphviz(graph, with_labels=True)



In [8]:
graph = closure_graph(wn.synset('computer.n.01'), lambda s: s.hypernyms())
nx.draw_graphviz(graph, with_labels=True)



In [9]:
graph = closure_graph(wn.synset('phone.n.01'), lambda s: s.hypernyms())
nx.draw_graphviz(graph, with_labels=True)



In [9]: