notebook.community

Edit and run



In [22]:

    
import nltk
%matplotlib inline



In [23]:

    
nltk.word_tokenize("Hello World; this is a sentence")









    Out[23]:





['Hello', 'World', ';', 'this', 'is', 'a', 'sentence']



In [24]:

    
from nltk.book import *



In [29]:

    
print text4.name
text4.dispersion_plot(["citizens", "freedom", "black", "and", "the", "god"])









    



Inaugural Address Corpus



In [55]:

    
from nltk import load_parser
from nltk.sem import chat80

query = "What cities are located in China"

cp = load_parser("grammars/book_grammars/sql0.fcfg")
trees = list(cp.parse(query.split()))
answer = trees[0].label()['SEM']
q = " ".join(answer)
print q

rows = chat80.sql_query("corpora/city_database/city.db", q)
rows.fetchall()









    



SELECT City FROM city_table WHERE   Country="china"






    Out[55]:





[(u'canton',),
 (u'chungking',),
 (u'dairen',),
 (u'harbin',),
 (u'kowloon',),
 (u'mukden',),
 (u'peking',),
 (u'shanghai',),
 (u'sian',),
 (u'tientsin',)]



In [ ]: