---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
<ipython-input-6-188f2713c61f> in <module>()
----> 1 texts_tokenized, counter = tokenize(texts)
<ipython-input-5-56ba90ca983f> in tokenize(texts)
3 counter = Counter()
4 for text in tqdm(texts):
----> 5 tokens = [x.lower() for x in nltk.word_tokenize(text)]
6 texts_tokenized.append(tokens)
7 counter.update(tokens)
~/.env/nipstimemachine/lib/python3.6/site-packages/nltk/tokenize/__init__.py in word_tokenize(text, language, preserve_line)
126 :type preserver_line: bool
127 """
--> 128 sentences = [text] if preserve_line else sent_tokenize(text, language)
129 return [token for sent in sentences
130 for token in _treebank_word_tokenizer.tokenize(sent)]
~/.env/nipstimemachine/lib/python3.6/site-packages/nltk/tokenize/__init__.py in sent_tokenize(text, language)
93 """
94 tokenizer = load('tokenizers/punkt/{0}.pickle'.format(language))
---> 95 return tokenizer.tokenize(text)
96
97 # Standard word tokenizer.
~/.env/nipstimemachine/lib/python3.6/site-packages/nltk/tokenize/punkt.py in tokenize(self, text, realign_boundaries)
1235 Given a text, returns a list of the sentences in that text.
1236 """
-> 1237 return list(self.sentences_from_text(text, realign_boundaries))
1238
1239 def debug_decisions(self, text):
~/.env/nipstimemachine/lib/python3.6/site-packages/nltk/tokenize/punkt.py in sentences_from_text(self, text, realign_boundaries)
1283 follows the period.
1284 """
-> 1285 return [text[s:e] for s, e in self.span_tokenize(text, realign_boundaries)]
1286
1287 def _slices_from_text(self, text):
~/.env/nipstimemachine/lib/python3.6/site-packages/nltk/tokenize/punkt.py in span_tokenize(self, text, realign_boundaries)
1274 if realign_boundaries:
1275 slices = self._realign_boundaries(text, slices)
-> 1276 return [(sl.start, sl.stop) for sl in slices]
1277
1278 def sentences_from_text(self, text, realign_boundaries=True):
~/.env/nipstimemachine/lib/python3.6/site-packages/nltk/tokenize/punkt.py in <listcomp>(.0)
1274 if realign_boundaries:
1275 slices = self._realign_boundaries(text, slices)
-> 1276 return [(sl.start, sl.stop) for sl in slices]
1277
1278 def sentences_from_text(self, text, realign_boundaries=True):
~/.env/nipstimemachine/lib/python3.6/site-packages/nltk/tokenize/punkt.py in _realign_boundaries(self, text, slices)
1314 """
1315 realign = 0
-> 1316 for sl1, sl2 in _pair_iter(slices):
1317 sl1 = slice(sl1.start + realign, sl1.stop)
1318 if not sl2:
~/.env/nipstimemachine/lib/python3.6/site-packages/nltk/tokenize/punkt.py in _pair_iter(it)
311 it = iter(it)
312 prev = next(it)
--> 313 for el in it:
314 yield (prev, el)
315 prev = el
~/.env/nipstimemachine/lib/python3.6/site-packages/nltk/tokenize/punkt.py in _slices_from_text(self, text)
1289 for match in self._lang_vars.period_context_re().finditer(text):
1290 context = match.group() + match.group('after_tok')
-> 1291 if self.text_contains_sentbreak(context):
1292 yield slice(last_break, match.end())
1293 if match.group('next_tok'):
~/.env/nipstimemachine/lib/python3.6/site-packages/nltk/tokenize/punkt.py in text_contains_sentbreak(self, text)
1335 """
1336 found = False # used to ignore last token
-> 1337 for t in self._annotate_tokens(self._tokenize_words(text)):
1338 if found:
1339 return True
~/.env/nipstimemachine/lib/python3.6/site-packages/nltk/tokenize/punkt.py in _annotate_second_pass(self, tokens)
1471 """
1472 for t1, t2 in _pair_iter(tokens):
-> 1473 self._second_pass_annotation(t1, t2)
1474 yield t1
1475
~/.env/nipstimemachine/lib/python3.6/site-packages/nltk/tokenize/punkt.py in _second_pass_annotation(self, aug_tok1, aug_tok2)
1507 # the token is an abbreviation or an ellipsis, then decide
1508 # whether we should *also* classify it as a sentbreak.
-> 1509 if ( (aug_tok1.abbr or aug_tok1.ellipsis) and
1510 (not tok_is_initial) ):
1511 # [4.1.1. Orthographic Heuristic] Check if there's
KeyboardInterrupt: