In [2]:
import nltk
In [3]:
nltk.corpus
Out[3]:
<LazyModule 'nltk.corpus'>
In [2]:
from nltk.corpus import stopwords
In [3]:
len(stopwords)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-3-92825649c2b0> in <module>()
----> 1 len(stopwords)
TypeError: object of type 'LazyCorpusLoader' has no len()
In [6]:
"is" in stopwords
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-6-35420da34507> in <module>()
----> 1 "is" in stopwords
TypeError: argument of type 'LazyCorpusLoader' is not iterable
In [4]:
stopwords.words("english")
Out[4]:
['i',
'me',
'my',
'myself',
'we',
'our',
'ours',
'ourselves',
'you',
'your',
'yours',
'yourself',
'yourselves',
'he',
'him',
'his',
'himself',
'she',
'her',
'hers',
'herself',
'it',
'its',
'itself',
'they',
'them',
'their',
'theirs',
'themselves',
'what',
'which',
'who',
'whom',
'this',
'that',
'these',
'those',
'am',
'is',
'are',
'was',
'were',
'be',
'been',
'being',
'have',
'has',
'had',
'having',
'do',
'does',
'did',
'doing',
'a',
'an',
'the',
'and',
'but',
'if',
'or',
'because',
'as',
'until',
'while',
'of',
'at',
'by',
'for',
'with',
'about',
'against',
'between',
'into',
'through',
'during',
'before',
'after',
'above',
'below',
'to',
'from',
'up',
'down',
'in',
'out',
'on',
'off',
'over',
'under',
'again',
'further',
'then',
'once',
'here',
'there',
'when',
'where',
'why',
'how',
'all',
'any',
'both',
'each',
'few',
'more',
'most',
'other',
'some',
'such',
'no',
'nor',
'not',
'only',
'own',
'same',
'so',
'than',
'too',
'very',
's',
't',
'can',
'will',
'just',
'don',
'should',
'now',
'd',
'll',
'm',
'o',
're',
've',
'y',
'ain',
'aren',
'couldn',
'didn',
'doesn',
'hadn',
'hasn',
'haven',
'isn',
'ma',
'mightn',
'mustn',
'needn',
'shan',
'shouldn',
'wasn',
'weren',
'won',
'wouldn']
In [1]:
from feature import *
In [2]:
doc = get_wordlist()
Creating the initial Wordlist file. This will take some time
Creating sample list for MREX
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
/usr/lib/python3.4/genericpath.py in isfile(path)
29 try:
---> 30 st = os.stat(path)
31 except OSError:
FileNotFoundError: [Errno 2] No such file or directory: '/datastore/newspaperarchive/MREX/1884/08_08.json'
During handling of the above exception, another exception occurred:
KeyboardInterrupt Traceback (most recent call last)
<ipython-input-2-3589a94d7db9> in <module>()
----> 1 doc = get_wordlist()
/home/hannah/feature.py in get_wordlist()
44 for newspaper in NEWSPAPERS:
45 print("Creating sample list for {0}".format(newspaper))
---> 46 samplelist = create_samplelist(newspaper)
47
48 # commit that list to a log? Maybe should.
/home/hannah/feature.py in create_samplelist(newspaper, samples)
27 while(samples>0):
28 date = get_random_day(samplelist)
---> 29 if n.exists(newspaper, *date):
30 samplelist.append(date)
31 samples -= 1
/home/hannah/newspaperaccess.py in exists(self, newspaper, year, month, day)
111 newspaper = self._guess_newspaper(newspaper)
112 ppath = papertopath(newspaper, year, month, day, archive = self.archive)
--> 113 return os.path.isfile(ppath)
114
115 def get(self, newspaper, year, month, day, page=None):
/usr/lib/python3.4/genericpath.py in isfile(path)
29 try:
30 st = os.stat(path)
---> 31 except OSError:
32 return False
33 return stat.S_ISREG(st.st_mode)
KeyboardInterrupt:
In [3]:
type(doc)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-3-74dd8dc0157c> in <module>()
----> 1 type(doc)
NameError: name 'doc' is not defined
In [ ]:
os.path.isfile("feature.py")
In [ ]:
In [ ]:
Content source: BL-Labs/poetryhunt
Similar notebooks: