---------------------------------------------------------------------------
BadZipFile Traceback (most recent call last)
<ipython-input-15-7d4c656a93cb> in <module>()
1 for i, r in theme_df.iterrows():
----> 2 tmp = process_texts(r['body'])
3 theme_df.set_value(i, 'final', tmp)
<ipython-input-10-ec7d6600afd5> in process_texts(texts)
18 # texts = [bigram[line] for line in texts]
19 texts = [[word.split('/')[0] for word in lemmatize(' '.join(line),
---> 20 allowed_tags=re.compile('(NN)'), min_length=3)] for line in texts]
21 return texts
<ipython-input-10-ec7d6600afd5> in <listcomp>(.0)
18 # texts = [bigram[line] for line in texts]
19 texts = [[word.split('/')[0] for word in lemmatize(' '.join(line),
---> 20 allowed_tags=re.compile('(NN)'), min_length=3)] for line in texts]
21 return texts
~/anaconda3/lib/python3.6/site-packages/gensim/utils.py in lemmatize(content, allowed_tags, light, stopwords, min_length, max_length)
1061
1062 """
-> 1063 if not has_pattern():
1064 raise ImportError("Pattern library is not installed. Pattern library is needed in order to use lemmatize function")
1065 from pattern.en import parse
~/anaconda3/lib/python3.6/site-packages/gensim/utils.py in has_pattern()
1034 """
1035 try:
-> 1036 from pattern.en import parse # noqa:F401
1037 return True
1038 except ImportError:
~/anaconda3/lib/python3.6/site-packages/pattern/text/en/__init__.py in <module>()
77 )
78 # Import all submodules.
---> 79 from pattern.text.en import inflect
80 from pattern.text.en import wordnet
81 from pattern.text.en import wordlist
~/anaconda3/lib/python3.6/site-packages/pattern/text/en/__init__.py in <module>()
78 # Import all submodules.
79 from pattern.text.en import inflect
---> 80 from pattern.text.en import wordnet
81 from pattern.text.en import wordlist
82
~/anaconda3/lib/python3.6/site-packages/pattern/text/en/wordnet/__init__.py in <module>()
55 for token in ("wordnet", "wordnet_ic", "sentiwordnet"):
56 try:
---> 57 nltk.data.find("corpora/" + token)
58 except LookupError:
59 try:
~/anaconda3/lib/python3.6/site-packages/nltk/data.py in find(resource_name, paths)
638 [pieces[i] + '.zip'] + pieces[i:])
639 try:
--> 640 return find(modified_name, paths)
641 except LookupError:
642 pass
~/anaconda3/lib/python3.6/site-packages/nltk/data.py in find(resource_name, paths)
624 if os.path.exists(p):
625 try:
--> 626 return ZipFilePathPointer(p, zipentry)
627 except IOError:
628 # resource not in zipfile
~/anaconda3/lib/python3.6/site-packages/nltk/compat.py in _decorator(*args, **kwargs)
219 def _decorator(*args, **kwargs):
220 args = (args[0], add_py3_data(args[1])) + args[2:]
--> 221 return init_func(*args, **kwargs)
222 return wraps(init_func)(_decorator)
223
~/anaconda3/lib/python3.6/site-packages/nltk/data.py in __init__(self, zipfile, entry)
471 """
472 if isinstance(zipfile, string_types):
--> 473 zipfile = OpenOnDemandZipFile(os.path.abspath(zipfile))
474
475 # Normalize the entry string, it should be relative:
~/anaconda3/lib/python3.6/site-packages/nltk/compat.py in _decorator(*args, **kwargs)
219 def _decorator(*args, **kwargs):
220 args = (args[0], add_py3_data(args[1])) + args[2:]
--> 221 return init_func(*args, **kwargs)
222 return wraps(init_func)(_decorator)
223
~/anaconda3/lib/python3.6/site-packages/nltk/data.py in __init__(self, filename)
990 if not isinstance(filename, string_types):
991 raise TypeError('ReopenableZipFile filename must be a string')
--> 992 zipfile.ZipFile.__init__(self, filename)
993 assert self.filename == filename
994 self.close()
~/anaconda3/lib/python3.6/zipfile.py in __init__(self, file, mode, compression, allowZip64)
1106 try:
1107 if mode == 'r':
-> 1108 self._RealGetContents()
1109 elif mode in ('w', 'x'):
1110 # set the modified flag so central directory gets written
~/anaconda3/lib/python3.6/zipfile.py in _RealGetContents(self)
1173 raise BadZipFile("File is not a zip file")
1174 if not endrec:
-> 1175 raise BadZipFile("File is not a zip file")
1176 if self.debug > 1:
1177 print(endrec)
BadZipFile: File is not a zip file