In [22]:
import nltk
%matplotlib inline

In [23]:
nltk.word_tokenize("Hello World; this is a sentence")


Out[23]:
['Hello', 'World', ';', 'this', 'is', 'a', 'sentence']

In [24]:
from nltk.book import *

In [29]:
print text4.name
text4.dispersion_plot(["citizens", "freedom", "black", "and", "the", "god"])


Inaugural Address Corpus

In [55]:
from nltk import load_parser
from nltk.sem import chat80

query = "What cities are located in China"

cp = load_parser("grammars/book_grammars/sql0.fcfg")
trees = list(cp.parse(query.split()))
answer = trees[0].label()['SEM']
q = " ".join(answer)
print q

rows = chat80.sql_query("corpora/city_database/city.db", q)
rows.fetchall()


SELECT City FROM city_table WHERE   Country="china"
Out[55]:
[(u'canton',),
 (u'chungking',),
 (u'dairen',),
 (u'harbin',),
 (u'kowloon',),
 (u'mukden',),
 (u'peking',),
 (u'shanghai',),
 (u'sian',),
 (u'tientsin',)]

In [ ]: