In [ ]:
#Inspect the texts available in the built in gutenberg corpus
from nltk.corpus import gutenberg
fileids = gutenberg.fileids()
print len(fileids), 'files'
print fileids
In [ ]:
#Inspect the raw text of the Alice in Wonderland text
alice_raw = gutenberg.raw(fileids=['carroll-alice.txt'])
#Inspecting the type
print 'Type: ', type(alice_raw)
print
#Looking at the first 250 characters
print alice_raw[:250]