In [7]:
from IPython.lib import display
display.IFrame('http://www.nltk.org/',800,600)


Out[7]:

In [8]:
import nltk
sentence = """At eight o'clock on Thursday morning
... Arthur didn't feel very good."""
tokens = nltk.word_tokenize(sentence)
tokens
['At', 'eight', "o'clock", 'on', 'Thursday', 'morning',
'Arthur', 'did', "n't", 'feel', 'very', 'good', '.']
tagged = nltk.pos_tag(tokens)
tagged[0:6]
[('At', 'IN'), ('eight', 'CD'), ("o'clock", 'JJ'), ('on', 'IN'),
('Thursday', 'NNP'), ('morning', 'NN')]


---------------------------------------------------------------------------
LookupError                               Traceback (most recent call last)
<ipython-input-8-68e99985caaf> in <module>()
      2 sentence = """At eight o'clock on Thursday morning
      3 ... Arthur didn't feel very good."""
----> 4 tokens = nltk.word_tokenize(sentence)
      5 tokens
      6 ['At', 'eight', "o'clock", 'on', 'Thursday', 'morning',

/Users/Neuromancer/anaconda/envs/myenv3/lib/python3.4/site-packages/nltk/tokenize/__init__.py in word_tokenize(text, language)
     99     :param language: the model name in the Punkt corpus
    100     """
--> 101     return [token for sent in sent_tokenize(text, language)
    102             for token in _treebank_word_tokenize(sent)]
    103 

/Users/Neuromancer/anaconda/envs/myenv3/lib/python3.4/site-packages/nltk/tokenize/__init__.py in sent_tokenize(text, language)
     83     :param language: the model name in the Punkt corpus
     84     """
---> 85     tokenizer = load('tokenizers/punkt/{0}.pickle'.format(language))
     86     return tokenizer.tokenize(text)
     87 

/Users/Neuromancer/anaconda/envs/myenv3/lib/python3.4/site-packages/nltk/data.py in load(resource_url, format, cache, verbose, logic_parser, fstruct_reader, encoding)
    779 
    780     # Load the resource.
--> 781     opened_resource = _open(resource_url)
    782 
    783     if format == 'raw':

/Users/Neuromancer/anaconda/envs/myenv3/lib/python3.4/site-packages/nltk/data.py in _open(resource_url)
    893 
    894     if protocol is None or protocol.lower() == 'nltk':
--> 895         return find(path_, path + ['']).open()
    896     elif protocol.lower() == 'file':
    897         # urllib might not use mode='rb', so handle this one ourselves:

/Users/Neuromancer/anaconda/envs/myenv3/lib/python3.4/site-packages/nltk/data.py in find(resource_name, paths)
    622     sep = '*'*70
    623     resource_not_found = '\n%s\n%s\n%s' % (sep, msg, sep)
--> 624     raise LookupError(resource_not_found)
    625 
    626 def retrieve(resource_url, filename=None, verbose=True):

LookupError: 
**********************************************************************
  Resource 'tokenizers/punkt/PY3/english.pickle' not found.
  Please use the NLTK Downloader to obtain the resource:  >>>
  nltk.download()
  Searched in:
    - '/Users/Neuromancer/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/local/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/local/lib/nltk_data'
    - ''
**********************************************************************

In [ ]:


In [ ]: