In [ ]:
import numpy as np
import json
import time
import argparse
import _pickle as pickle

from os import path
from tqdm import tqdm

import sys
sys.path.append('../..')
from utils import CoreNLP_path
#from stanford_corenlp_pywrapper import CoreNLP
from gensim.models import KeyedVectors
from tokenizer import CoreNLPTokenizer

import multiprocessing
from multiprocessing import Pool
from multiprocessing.util import Finalize
from functools import partial


Using TensorFlow backend.

In [2]:
def word2vec(word2vec_path):
    model = KeyedVectors.load_word2vec_format(word2vec_path)

    def get_word_vector(word):
        try:
            return model[word]
        except KeyError:
            return np.zeros(model.vector_size)

    return get_word_vector

In [3]:
print('Reading SQuAD data... ', end='')
with open('../../data/train_parsed.json') as fd:
    samples = json.load(fd)
print('Done!')


Reading SQuAD data... Done!

In [4]:
print('Initiating CoreNLP service connection... ', end='')
tokenizer = CoreNLPTokenizer(classpath='/home/anatoly/stanford-corenlp-full-2017-06-09/*', annatators='pos, ner, lemma')
print('Done!')


Initiating CoreNLP service connection... Done!

In [5]:
try:
    cpus = multiprocessing.cpu_count()
except NotImplementedError:
    cpus = 2   # arbitrary default

cpus


Out[5]:
4

In [6]:
def chunks(l, n):
    """Yield successive n-sized chunks from l."""
    for i in range(0, len(l), n):
        yield l[i:i + n]

In [12]:
class Tokenizer(object):
    def __init__(self, cpus):
        self.cpus = cpus
        
    def worker(self, arr):
        t = CoreNLPTokenizer(classpath='/home/anatoly/stanford-corenlp-full-2017-06-09/*')
        return [t.tokenize(sample) for sample in arr]
        
    def tokenize(self, arr):
        chunked = chunks(arr, round(len(arr) / self.cpus))
        p = Pool(self.cpus)
        nested_list = p.map(self.worker, chunked)
        return [val for sublist in nested_list for val in sublist]
            
t = Tokenizer(4)
t.tokenize([sample['context'] for sample in samples[0:10]])


Out[12]:
[(['Architecturally',
   ',',
   'the',
   'school',
   'has',
   'a',
   'Catholic',
   'character',
   '.',
   'Atop',
   'the',
   'Main',
   'Building',
   "'s",
   'gold',
   'dome',
   'is',
   'a',
   'golden',
   'statue',
   'of',
   'the',
   'Virgin',
   'Mary',
   '.',
   'Immediately',
   'in',
   'front',
   'of',
   'the',
   'Main',
   'Building',
   'and',
   'facing',
   'it',
   ',',
   'is',
   'a',
   'copper',
   'statue',
   'of',
   'Christ',
   'with',
   'arms',
   'upraised',
   'with',
   'the',
   'legend',
   '``',
   'Venite',
   'Ad',
   'Me',
   'Omnes',
   "''",
   '.',
   'Next',
   'to',
   'the',
   'Main',
   'Building',
   'is',
   'the',
   'Basilica',
   'of',
   'the',
   'Sacred',
   'Heart',
   '.',
   'Immediately',
   'behind',
   'the',
   'basilica',
   'is',
   'the',
   'Grotto',
   ',',
   'a',
   'Marian',
   'place',
   'of',
   'prayer',
   'and',
   'reflection',
   '.',
   'It',
   'is',
   'a',
   'replica',
   'of',
   'the',
   'grotto',
   'at',
   'Lourdes',
   ',',
   'France',
   'where',
   'the',
   'Virgin',
   'Mary',
   'reputedly',
   'appeared',
   'to',
   'Saint',
   'Bernadette',
   'Soubirous',
   'in',
   '1858',
   '.',
   'At',
   'the',
   'end',
   'of',
   'the',
   'main',
   'drive',
   '(',
   'and',
   'in',
   'a',
   'direct',
   'line',
   'that',
   'connects',
   'through',
   '3',
   'statues',
   'and',
   'the',
   'Gold',
   'Dome',
   ')',
   ',',
   'is',
   'a',
   'simple',
   ',',
   'modern',
   'stone',
   'statue',
   'of',
   'Mary',
   '.'],
  [[0, 15],
   [15, 16],
   [17, 20],
   [21, 27],
   [28, 31],
   [32, 33],
   [34, 42],
   [43, 52],
   [52, 53],
   [54, 58],
   [59, 62],
   [63, 67],
   [68, 76],
   [76, 78],
   [79, 83],
   [84, 88],
   [89, 91],
   [92, 93],
   [94, 100],
   [101, 107],
   [108, 110],
   [111, 114],
   [115, 121],
   [122, 126],
   [126, 127],
   [128, 139],
   [140, 142],
   [143, 148],
   [149, 151],
   [152, 155],
   [156, 160],
   [161, 169],
   [170, 173],
   [174, 180],
   [181, 183],
   [183, 184],
   [185, 187],
   [188, 189],
   [190, 196],
   [197, 203],
   [204, 206],
   [207, 213],
   [214, 218],
   [219, 223],
   [224, 232],
   [233, 237],
   [238, 241],
   [242, 248],
   [249, 250],
   [250, 256],
   [257, 259],
   [260, 262],
   [263, 268],
   [268, 269],
   [269, 270],
   [271, 275],
   [276, 278],
   [279, 282],
   [283, 287],
   [288, 296],
   [297, 299],
   [300, 303],
   [304, 312],
   [313, 315],
   [316, 319],
   [320, 326],
   [327, 332],
   [332, 333],
   [334, 345],
   [346, 352],
   [353, 356],
   [357, 365],
   [366, 368],
   [369, 372],
   [373, 379],
   [379, 380],
   [381, 382],
   [383, 389],
   [390, 395],
   [396, 398],
   [399, 405],
   [406, 409],
   [410, 420],
   [420, 421],
   [422, 424],
   [425, 427],
   [428, 429],
   [430, 437],
   [438, 440],
   [441, 444],
   [445, 451],
   [452, 454],
   [455, 462],
   [462, 463],
   [464, 470],
   [471, 476],
   [477, 480],
   [481, 487],
   [488, 492],
   [493, 502],
   [503, 511],
   [512, 514],
   [515, 520],
   [521, 531],
   [532, 541],
   [542, 544],
   [545, 549],
   [549, 550],
   [551, 553],
   [554, 557],
   [558, 561],
   [562, 564],
   [565, 568],
   [569, 573],
   [574, 579],
   [580, 581],
   [581, 584],
   [585, 587],
   [588, 589],
   [590, 596],
   [597, 601],
   [602, 606],
   [607, 615],
   [616, 623],
   [624, 625],
   [626, 633],
   [634, 637],
   [638, 641],
   [642, 646],
   [647, 651],
   [651, 652],
   [652, 653],
   [654, 656],
   [657, 658],
   [659, 665],
   [665, 666],
   [667, 673],
   [674, 679],
   [680, 686],
   [687, 689],
   [690, 694],
   [694, 695]]),
 (['Architecturally',
   ',',
   'the',
   'school',
   'has',
   'a',
   'Catholic',
   'character',
   '.',
   'Atop',
   'the',
   'Main',
   'Building',
   "'s",
   'gold',
   'dome',
   'is',
   'a',
   'golden',
   'statue',
   'of',
   'the',
   'Virgin',
   'Mary',
   '.',
   'Immediately',
   'in',
   'front',
   'of',
   'the',
   'Main',
   'Building',
   'and',
   'facing',
   'it',
   ',',
   'is',
   'a',
   'copper',
   'statue',
   'of',
   'Christ',
   'with',
   'arms',
   'upraised',
   'with',
   'the',
   'legend',
   '``',
   'Venite',
   'Ad',
   'Me',
   'Omnes',
   "''",
   '.',
   'Next',
   'to',
   'the',
   'Main',
   'Building',
   'is',
   'the',
   'Basilica',
   'of',
   'the',
   'Sacred',
   'Heart',
   '.',
   'Immediately',
   'behind',
   'the',
   'basilica',
   'is',
   'the',
   'Grotto',
   ',',
   'a',
   'Marian',
   'place',
   'of',
   'prayer',
   'and',
   'reflection',
   '.',
   'It',
   'is',
   'a',
   'replica',
   'of',
   'the',
   'grotto',
   'at',
   'Lourdes',
   ',',
   'France',
   'where',
   'the',
   'Virgin',
   'Mary',
   'reputedly',
   'appeared',
   'to',
   'Saint',
   'Bernadette',
   'Soubirous',
   'in',
   '1858',
   '.',
   'At',
   'the',
   'end',
   'of',
   'the',
   'main',
   'drive',
   '(',
   'and',
   'in',
   'a',
   'direct',
   'line',
   'that',
   'connects',
   'through',
   '3',
   'statues',
   'and',
   'the',
   'Gold',
   'Dome',
   ')',
   ',',
   'is',
   'a',
   'simple',
   ',',
   'modern',
   'stone',
   'statue',
   'of',
   'Mary',
   '.'],
  [[0, 15],
   [15, 16],
   [17, 20],
   [21, 27],
   [28, 31],
   [32, 33],
   [34, 42],
   [43, 52],
   [52, 53],
   [54, 58],
   [59, 62],
   [63, 67],
   [68, 76],
   [76, 78],
   [79, 83],
   [84, 88],
   [89, 91],
   [92, 93],
   [94, 100],
   [101, 107],
   [108, 110],
   [111, 114],
   [115, 121],
   [122, 126],
   [126, 127],
   [128, 139],
   [140, 142],
   [143, 148],
   [149, 151],
   [152, 155],
   [156, 160],
   [161, 169],
   [170, 173],
   [174, 180],
   [181, 183],
   [183, 184],
   [185, 187],
   [188, 189],
   [190, 196],
   [197, 203],
   [204, 206],
   [207, 213],
   [214, 218],
   [219, 223],
   [224, 232],
   [233, 237],
   [238, 241],
   [242, 248],
   [249, 250],
   [250, 256],
   [257, 259],
   [260, 262],
   [263, 268],
   [268, 269],
   [269, 270],
   [271, 275],
   [276, 278],
   [279, 282],
   [283, 287],
   [288, 296],
   [297, 299],
   [300, 303],
   [304, 312],
   [313, 315],
   [316, 319],
   [320, 326],
   [327, 332],
   [332, 333],
   [334, 345],
   [346, 352],
   [353, 356],
   [357, 365],
   [366, 368],
   [369, 372],
   [373, 379],
   [379, 380],
   [381, 382],
   [383, 389],
   [390, 395],
   [396, 398],
   [399, 405],
   [406, 409],
   [410, 420],
   [420, 421],
   [422, 424],
   [425, 427],
   [428, 429],
   [430, 437],
   [438, 440],
   [441, 444],
   [445, 451],
   [452, 454],
   [455, 462],
   [462, 463],
   [464, 470],
   [471, 476],
   [477, 480],
   [481, 487],
   [488, 492],
   [493, 502],
   [503, 511],
   [512, 514],
   [515, 520],
   [521, 531],
   [532, 541],
   [542, 544],
   [545, 549],
   [549, 550],
   [551, 553],
   [554, 557],
   [558, 561],
   [562, 564],
   [565, 568],
   [569, 573],
   [574, 579],
   [580, 581],
   [581, 584],
   [585, 587],
   [588, 589],
   [590, 596],
   [597, 601],
   [602, 606],
   [607, 615],
   [616, 623],
   [624, 625],
   [626, 633],
   [634, 637],
   [638, 641],
   [642, 646],
   [647, 651],
   [651, 652],
   [652, 653],
   [654, 656],
   [657, 658],
   [659, 665],
   [665, 666],
   [667, 673],
   [674, 679],
   [680, 686],
   [687, 689],
   [690, 694],
   [694, 695]]),
 (['Architecturally',
   ',',
   'the',
   'school',
   'has',
   'a',
   'Catholic',
   'character',
   '.',
   'Atop',
   'the',
   'Main',
   'Building',
   "'s",
   'gold',
   'dome',
   'is',
   'a',
   'golden',
   'statue',
   'of',
   'the',
   'Virgin',
   'Mary',
   '.',
   'Immediately',
   'in',
   'front',
   'of',
   'the',
   'Main',
   'Building',
   'and',
   'facing',
   'it',
   ',',
   'is',
   'a',
   'copper',
   'statue',
   'of',
   'Christ',
   'with',
   'arms',
   'upraised',
   'with',
   'the',
   'legend',
   '``',
   'Venite',
   'Ad',
   'Me',
   'Omnes',
   "''",
   '.',
   'Next',
   'to',
   'the',
   'Main',
   'Building',
   'is',
   'the',
   'Basilica',
   'of',
   'the',
   'Sacred',
   'Heart',
   '.',
   'Immediately',
   'behind',
   'the',
   'basilica',
   'is',
   'the',
   'Grotto',
   ',',
   'a',
   'Marian',
   'place',
   'of',
   'prayer',
   'and',
   'reflection',
   '.',
   'It',
   'is',
   'a',
   'replica',
   'of',
   'the',
   'grotto',
   'at',
   'Lourdes',
   ',',
   'France',
   'where',
   'the',
   'Virgin',
   'Mary',
   'reputedly',
   'appeared',
   'to',
   'Saint',
   'Bernadette',
   'Soubirous',
   'in',
   '1858',
   '.',
   'At',
   'the',
   'end',
   'of',
   'the',
   'main',
   'drive',
   '(',
   'and',
   'in',
   'a',
   'direct',
   'line',
   'that',
   'connects',
   'through',
   '3',
   'statues',
   'and',
   'the',
   'Gold',
   'Dome',
   ')',
   ',',
   'is',
   'a',
   'simple',
   ',',
   'modern',
   'stone',
   'statue',
   'of',
   'Mary',
   '.'],
  [[0, 15],
   [15, 16],
   [17, 20],
   [21, 27],
   [28, 31],
   [32, 33],
   [34, 42],
   [43, 52],
   [52, 53],
   [54, 58],
   [59, 62],
   [63, 67],
   [68, 76],
   [76, 78],
   [79, 83],
   [84, 88],
   [89, 91],
   [92, 93],
   [94, 100],
   [101, 107],
   [108, 110],
   [111, 114],
   [115, 121],
   [122, 126],
   [126, 127],
   [128, 139],
   [140, 142],
   [143, 148],
   [149, 151],
   [152, 155],
   [156, 160],
   [161, 169],
   [170, 173],
   [174, 180],
   [181, 183],
   [183, 184],
   [185, 187],
   [188, 189],
   [190, 196],
   [197, 203],
   [204, 206],
   [207, 213],
   [214, 218],
   [219, 223],
   [224, 232],
   [233, 237],
   [238, 241],
   [242, 248],
   [249, 250],
   [250, 256],
   [257, 259],
   [260, 262],
   [263, 268],
   [268, 269],
   [269, 270],
   [271, 275],
   [276, 278],
   [279, 282],
   [283, 287],
   [288, 296],
   [297, 299],
   [300, 303],
   [304, 312],
   [313, 315],
   [316, 319],
   [320, 326],
   [327, 332],
   [332, 333],
   [334, 345],
   [346, 352],
   [353, 356],
   [357, 365],
   [366, 368],
   [369, 372],
   [373, 379],
   [379, 380],
   [381, 382],
   [383, 389],
   [390, 395],
   [396, 398],
   [399, 405],
   [406, 409],
   [410, 420],
   [420, 421],
   [422, 424],
   [425, 427],
   [428, 429],
   [430, 437],
   [438, 440],
   [441, 444],
   [445, 451],
   [452, 454],
   [455, 462],
   [462, 463],
   [464, 470],
   [471, 476],
   [477, 480],
   [481, 487],
   [488, 492],
   [493, 502],
   [503, 511],
   [512, 514],
   [515, 520],
   [521, 531],
   [532, 541],
   [542, 544],
   [545, 549],
   [549, 550],
   [551, 553],
   [554, 557],
   [558, 561],
   [562, 564],
   [565, 568],
   [569, 573],
   [574, 579],
   [580, 581],
   [581, 584],
   [585, 587],
   [588, 589],
   [590, 596],
   [597, 601],
   [602, 606],
   [607, 615],
   [616, 623],
   [624, 625],
   [626, 633],
   [634, 637],
   [638, 641],
   [642, 646],
   [647, 651],
   [651, 652],
   [652, 653],
   [654, 656],
   [657, 658],
   [659, 665],
   [665, 666],
   [667, 673],
   [674, 679],
   [680, 686],
   [687, 689],
   [690, 694],
   [694, 695]]),
 (['Architecturally',
   ',',
   'the',
   'school',
   'has',
   'a',
   'Catholic',
   'character',
   '.',
   'Atop',
   'the',
   'Main',
   'Building',
   "'s",
   'gold',
   'dome',
   'is',
   'a',
   'golden',
   'statue',
   'of',
   'the',
   'Virgin',
   'Mary',
   '.',
   'Immediately',
   'in',
   'front',
   'of',
   'the',
   'Main',
   'Building',
   'and',
   'facing',
   'it',
   ',',
   'is',
   'a',
   'copper',
   'statue',
   'of',
   'Christ',
   'with',
   'arms',
   'upraised',
   'with',
   'the',
   'legend',
   '``',
   'Venite',
   'Ad',
   'Me',
   'Omnes',
   "''",
   '.',
   'Next',
   'to',
   'the',
   'Main',
   'Building',
   'is',
   'the',
   'Basilica',
   'of',
   'the',
   'Sacred',
   'Heart',
   '.',
   'Immediately',
   'behind',
   'the',
   'basilica',
   'is',
   'the',
   'Grotto',
   ',',
   'a',
   'Marian',
   'place',
   'of',
   'prayer',
   'and',
   'reflection',
   '.',
   'It',
   'is',
   'a',
   'replica',
   'of',
   'the',
   'grotto',
   'at',
   'Lourdes',
   ',',
   'France',
   'where',
   'the',
   'Virgin',
   'Mary',
   'reputedly',
   'appeared',
   'to',
   'Saint',
   'Bernadette',
   'Soubirous',
   'in',
   '1858',
   '.',
   'At',
   'the',
   'end',
   'of',
   'the',
   'main',
   'drive',
   '(',
   'and',
   'in',
   'a',
   'direct',
   'line',
   'that',
   'connects',
   'through',
   '3',
   'statues',
   'and',
   'the',
   'Gold',
   'Dome',
   ')',
   ',',
   'is',
   'a',
   'simple',
   ',',
   'modern',
   'stone',
   'statue',
   'of',
   'Mary',
   '.'],
  [[0, 15],
   [15, 16],
   [17, 20],
   [21, 27],
   [28, 31],
   [32, 33],
   [34, 42],
   [43, 52],
   [52, 53],
   [54, 58],
   [59, 62],
   [63, 67],
   [68, 76],
   [76, 78],
   [79, 83],
   [84, 88],
   [89, 91],
   [92, 93],
   [94, 100],
   [101, 107],
   [108, 110],
   [111, 114],
   [115, 121],
   [122, 126],
   [126, 127],
   [128, 139],
   [140, 142],
   [143, 148],
   [149, 151],
   [152, 155],
   [156, 160],
   [161, 169],
   [170, 173],
   [174, 180],
   [181, 183],
   [183, 184],
   [185, 187],
   [188, 189],
   [190, 196],
   [197, 203],
   [204, 206],
   [207, 213],
   [214, 218],
   [219, 223],
   [224, 232],
   [233, 237],
   [238, 241],
   [242, 248],
   [249, 250],
   [250, 256],
   [257, 259],
   [260, 262],
   [263, 268],
   [268, 269],
   [269, 270],
   [271, 275],
   [276, 278],
   [279, 282],
   [283, 287],
   [288, 296],
   [297, 299],
   [300, 303],
   [304, 312],
   [313, 315],
   [316, 319],
   [320, 326],
   [327, 332],
   [332, 333],
   [334, 345],
   [346, 352],
   [353, 356],
   [357, 365],
   [366, 368],
   [369, 372],
   [373, 379],
   [379, 380],
   [381, 382],
   [383, 389],
   [390, 395],
   [396, 398],
   [399, 405],
   [406, 409],
   [410, 420],
   [420, 421],
   [422, 424],
   [425, 427],
   [428, 429],
   [430, 437],
   [438, 440],
   [441, 444],
   [445, 451],
   [452, 454],
   [455, 462],
   [462, 463],
   [464, 470],
   [471, 476],
   [477, 480],
   [481, 487],
   [488, 492],
   [493, 502],
   [503, 511],
   [512, 514],
   [515, 520],
   [521, 531],
   [532, 541],
   [542, 544],
   [545, 549],
   [549, 550],
   [551, 553],
   [554, 557],
   [558, 561],
   [562, 564],
   [565, 568],
   [569, 573],
   [574, 579],
   [580, 581],
   [581, 584],
   [585, 587],
   [588, 589],
   [590, 596],
   [597, 601],
   [602, 606],
   [607, 615],
   [616, 623],
   [624, 625],
   [626, 633],
   [634, 637],
   [638, 641],
   [642, 646],
   [647, 651],
   [651, 652],
   [652, 653],
   [654, 656],
   [657, 658],
   [659, 665],
   [665, 666],
   [667, 673],
   [674, 679],
   [680, 686],
   [687, 689],
   [690, 694],
   [694, 695]]),
 (['Architecturally',
   ',',
   'the',
   'school',
   'has',
   'a',
   'Catholic',
   'character',
   '.',
   'Atop',
   'the',
   'Main',
   'Building',
   "'s",
   'gold',
   'dome',
   'is',
   'a',
   'golden',
   'statue',
   'of',
   'the',
   'Virgin',
   'Mary',
   '.',
   'Immediately',
   'in',
   'front',
   'of',
   'the',
   'Main',
   'Building',
   'and',
   'facing',
   'it',
   ',',
   'is',
   'a',
   'copper',
   'statue',
   'of',
   'Christ',
   'with',
   'arms',
   'upraised',
   'with',
   'the',
   'legend',
   '``',
   'Venite',
   'Ad',
   'Me',
   'Omnes',
   "''",
   '.',
   'Next',
   'to',
   'the',
   'Main',
   'Building',
   'is',
   'the',
   'Basilica',
   'of',
   'the',
   'Sacred',
   'Heart',
   '.',
   'Immediately',
   'behind',
   'the',
   'basilica',
   'is',
   'the',
   'Grotto',
   ',',
   'a',
   'Marian',
   'place',
   'of',
   'prayer',
   'and',
   'reflection',
   '.',
   'It',
   'is',
   'a',
   'replica',
   'of',
   'the',
   'grotto',
   'at',
   'Lourdes',
   ',',
   'France',
   'where',
   'the',
   'Virgin',
   'Mary',
   'reputedly',
   'appeared',
   'to',
   'Saint',
   'Bernadette',
   'Soubirous',
   'in',
   '1858',
   '.',
   'At',
   'the',
   'end',
   'of',
   'the',
   'main',
   'drive',
   '(',
   'and',
   'in',
   'a',
   'direct',
   'line',
   'that',
   'connects',
   'through',
   '3',
   'statues',
   'and',
   'the',
   'Gold',
   'Dome',
   ')',
   ',',
   'is',
   'a',
   'simple',
   ',',
   'modern',
   'stone',
   'statue',
   'of',
   'Mary',
   '.'],
  [[0, 15],
   [15, 16],
   [17, 20],
   [21, 27],
   [28, 31],
   [32, 33],
   [34, 42],
   [43, 52],
   [52, 53],
   [54, 58],
   [59, 62],
   [63, 67],
   [68, 76],
   [76, 78],
   [79, 83],
   [84, 88],
   [89, 91],
   [92, 93],
   [94, 100],
   [101, 107],
   [108, 110],
   [111, 114],
   [115, 121],
   [122, 126],
   [126, 127],
   [128, 139],
   [140, 142],
   [143, 148],
   [149, 151],
   [152, 155],
   [156, 160],
   [161, 169],
   [170, 173],
   [174, 180],
   [181, 183],
   [183, 184],
   [185, 187],
   [188, 189],
   [190, 196],
   [197, 203],
   [204, 206],
   [207, 213],
   [214, 218],
   [219, 223],
   [224, 232],
   [233, 237],
   [238, 241],
   [242, 248],
   [249, 250],
   [250, 256],
   [257, 259],
   [260, 262],
   [263, 268],
   [268, 269],
   [269, 270],
   [271, 275],
   [276, 278],
   [279, 282],
   [283, 287],
   [288, 296],
   [297, 299],
   [300, 303],
   [304, 312],
   [313, 315],
   [316, 319],
   [320, 326],
   [327, 332],
   [332, 333],
   [334, 345],
   [346, 352],
   [353, 356],
   [357, 365],
   [366, 368],
   [369, 372],
   [373, 379],
   [379, 380],
   [381, 382],
   [383, 389],
   [390, 395],
   [396, 398],
   [399, 405],
   [406, 409],
   [410, 420],
   [420, 421],
   [422, 424],
   [425, 427],
   [428, 429],
   [430, 437],
   [438, 440],
   [441, 444],
   [445, 451],
   [452, 454],
   [455, 462],
   [462, 463],
   [464, 470],
   [471, 476],
   [477, 480],
   [481, 487],
   [488, 492],
   [493, 502],
   [503, 511],
   [512, 514],
   [515, 520],
   [521, 531],
   [532, 541],
   [542, 544],
   [545, 549],
   [549, 550],
   [551, 553],
   [554, 557],
   [558, 561],
   [562, 564],
   [565, 568],
   [569, 573],
   [574, 579],
   [580, 581],
   [581, 584],
   [585, 587],
   [588, 589],
   [590, 596],
   [597, 601],
   [602, 606],
   [607, 615],
   [616, 623],
   [624, 625],
   [626, 633],
   [634, 637],
   [638, 641],
   [642, 646],
   [647, 651],
   [651, 652],
   [652, 653],
   [654, 656],
   [657, 658],
   [659, 665],
   [665, 666],
   [667, 673],
   [674, 679],
   [680, 686],
   [687, 689],
   [690, 694],
   [694, 695]]),
 (['As',
   'at',
   'most',
   'other',
   'universities',
   ',',
   'Notre',
   'Dame',
   "'s",
   'students',
   'run',
   'a',
   'number',
   'of',
   'news',
   'media',
   'outlets',
   '.',
   'The',
   'nine',
   'student-run',
   'outlets',
   'include',
   'three',
   'newspapers',
   ',',
   'both',
   'a',
   'radio',
   'and',
   'television',
   'station',
   ',',
   'and',
   'several',
   'magazines',
   'and',
   'journals',
   '.',
   'Begun',
   'as',
   'a',
   'one-page',
   'journal',
   'in',
   'September',
   '1876',
   ',',
   'the',
   'Scholastic',
   'magazine',
   'is',
   'issued',
   'twice',
   'monthly',
   'and',
   'claims',
   'to',
   'be',
   'the',
   'oldest',
   'continuous',
   'collegiate',
   'publication',
   'in',
   'the',
   'United',
   'States',
   '.',
   'The',
   'other',
   'magazine',
   ',',
   'The',
   'Juggler',
   ',',
   'is',
   'released',
   'twice',
   'a',
   'year',
   'and',
   'focuses',
   'on',
   'student',
   'literature',
   'and',
   'artwork',
   '.',
   'The',
   'Dome',
   'yearbook',
   'is',
   'published',
   'annually',
   '.',
   'The',
   'newspapers',
   'have',
   'varying',
   'publication',
   'interests',
   ',',
   'with',
   'The',
   'Observer',
   'published',
   'daily',
   'and',
   'mainly',
   'reporting',
   'university',
   'and',
   'other',
   'news',
   ',',
   'and',
   'staffed',
   'by',
   'students',
   'from',
   'both',
   'Notre',
   'Dame',
   'and',
   'Saint',
   'Mary',
   "'s",
   'College',
   '.',
   'Unlike',
   'Scholastic',
   'and',
   'The',
   'Dome',
   ',',
   'The',
   'Observer',
   'is',
   'an',
   'independent',
   'publication',
   'and',
   'does',
   'not',
   'have',
   'a',
   'faculty',
   'advisor',
   'or',
   'any',
   'editorial',
   'oversight',
   'from',
   'the',
   'University',
   '.',
   'In',
   '1987',
   ',',
   'when',
   'some',
   'students',
   'believed',
   'that',
   'The',
   'Observer',
   'began',
   'to',
   'show',
   'a',
   'conservative',
   'bias',
   ',',
   'a',
   'liberal',
   'newspaper',
   ',',
   'Common',
   'Sense',
   'was',
   'published',
   '.',
   'Likewise',
   ',',
   'in',
   '2003',
   ',',
   'when',
   'other',
   'students',
   'believed',
   'that',
   'the',
   'paper',
   'showed',
   'a',
   'liberal',
   'bias',
   ',',
   'the',
   'conservative',
   'paper',
   'Irish',
   'Rover',
   'went',
   'into',
   'production',
   '.',
   'Neither',
   'paper',
   'is',
   'published',
   'as',
   'often',
   'as',
   'The',
   'Observer',
   ';',
   'however',
   ',',
   'all',
   'three',
   'are',
   'distributed',
   'to',
   'all',
   'students',
   '.',
   'Finally',
   ',',
   'in',
   'Spring',
   '2008',
   'an',
   'undergraduate',
   'journal',
   'for',
   'political',
   'science',
   'research',
   ',',
   'Beyond',
   'Politics',
   ',',
   'made',
   'its',
   'debut',
   '.'],
  [[0, 2],
   [3, 5],
   [6, 10],
   [11, 16],
   [17, 29],
   [29, 30],
   [31, 36],
   [37, 41],
   [41, 43],
   [44, 52],
   [53, 56],
   [57, 58],
   [59, 65],
   [66, 68],
   [69, 73],
   [74, 79],
   [80, 87],
   [87, 88],
   [89, 92],
   [93, 97],
   [98, 109],
   [110, 117],
   [118, 125],
   [126, 131],
   [132, 142],
   [142, 143],
   [144, 148],
   [149, 150],
   [151, 156],
   [157, 160],
   [161, 171],
   [172, 179],
   [179, 180],
   [181, 184],
   [185, 192],
   [193, 202],
   [203, 206],
   [207, 215],
   [215, 216],
   [217, 222],
   [223, 225],
   [226, 227],
   [228, 236],
   [237, 244],
   [245, 247],
   [248, 257],
   [258, 262],
   [262, 263],
   [264, 267],
   [268, 278],
   [279, 287],
   [288, 290],
   [291, 297],
   [298, 303],
   [304, 311],
   [312, 315],
   [316, 322],
   [323, 325],
   [326, 328],
   [329, 332],
   [333, 339],
   [340, 350],
   [351, 361],
   [362, 373],
   [374, 376],
   [377, 380],
   [381, 387],
   [388, 394],
   [394, 395],
   [396, 399],
   [400, 405],
   [406, 414],
   [414, 415],
   [416, 419],
   [420, 427],
   [427, 428],
   [429, 431],
   [432, 440],
   [441, 446],
   [447, 448],
   [449, 453],
   [454, 457],
   [458, 465],
   [466, 468],
   [469, 476],
   [477, 487],
   [488, 491],
   [492, 499],
   [499, 500],
   [501, 504],
   [505, 509],
   [510, 518],
   [519, 521],
   [522, 531],
   [532, 540],
   [540, 541],
   [542, 545],
   [546, 556],
   [557, 561],
   [562, 569],
   [570, 581],
   [582, 591],
   [591, 592],
   [593, 597],
   [598, 601],
   [602, 610],
   [611, 620],
   [621, 626],
   [627, 630],
   [631, 637],
   [638, 647],
   [648, 658],
   [659, 662],
   [663, 668],
   [669, 673],
   [673, 674],
   [675, 678],
   [679, 686],
   [687, 689],
   [690, 698],
   [699, 703],
   [704, 708],
   [709, 714],
   [715, 719],
   [720, 723],
   [724, 729],
   [730, 734],
   [734, 736],
   [737, 744],
   [744, 745],
   [746, 752],
   [753, 763],
   [764, 767],
   [768, 771],
   [772, 776],
   [776, 777],
   [778, 781],
   [782, 790],
   [791, 793],
   [794, 796],
   [797, 808],
   [809, 820],
   [821, 824],
   [825, 829],
   [830, 833],
   [834, 838],
   [839, 840],
   [841, 848],
   [849, 856],
   [857, 859],
   [860, 863],
   [864, 873],
   [874, 883],
   [884, 888],
   [889, 892],
   [893, 903],
   [903, 904],
   [905, 907],
   [908, 912],
   [912, 913],
   [914, 918],
   [919, 923],
   [924, 932],
   [933, 941],
   [942, 946],
   [947, 950],
   [951, 959],
   [960, 965],
   [966, 968],
   [969, 973],
   [974, 975],
   [976, 988],
   [989, 993],
   [993, 994],
   [995, 996],
   [997, 1004],
   [1005, 1014],
   [1014, 1015],
   [1016, 1022],
   [1023, 1028],
   [1029, 1032],
   [1033, 1042],
   [1042, 1043],
   [1044, 1052],
   [1052, 1053],
   [1054, 1056],
   [1057, 1061],
   [1061, 1062],
   [1063, 1067],
   [1068, 1073],
   [1074, 1082],
   [1083, 1091],
   [1092, 1096],
   [1097, 1100],
   [1101, 1106],
   [1107, 1113],
   [1114, 1115],
   [1116, 1123],
   [1124, 1128],
   [1128, 1129],
   [1130, 1133],
   [1134, 1146],
   [1147, 1152],
   [1153, 1158],
   [1159, 1164],
   [1165, 1169],
   [1170, 1174],
   [1175, 1185],
   [1185, 1186],
   [1187, 1194],
   [1195, 1200],
   [1201, 1203],
   [1204, 1213],
   [1214, 1216],
   [1217, 1222],
   [1223, 1225],
   [1226, 1229],
   [1230, 1238],
   [1238, 1239],
   [1240, 1247],
   [1247, 1248],
   [1249, 1252],
   [1253, 1258],
   [1259, 1262],
   [1263, 1274],
   [1275, 1277],
   [1278, 1281],
   [1282, 1290],
   [1290, 1291],
   [1292, 1299],
   [1299, 1300],
   [1301, 1303],
   [1304, 1310],
   [1311, 1315],
   [1316, 1318],
   [1319, 1332],
   [1333, 1340],
   [1341, 1344],
   [1345, 1354],
   [1355, 1362],
   [1363, 1371],
   [1371, 1372],
   [1373, 1379],
   [1380, 1388],
   [1388, 1389],
   [1390, 1394],
   [1395, 1398],
   [1399, 1404],
   [1404, 1405]]),
 (['As',
   'at',
   'most',
   'other',
   'universities',
   ',',
   'Notre',
   'Dame',
   "'s",
   'students',
   'run',
   'a',
   'number',
   'of',
   'news',
   'media',
   'outlets',
   '.',
   'The',
   'nine',
   'student-run',
   'outlets',
   'include',
   'three',
   'newspapers',
   ',',
   'both',
   'a',
   'radio',
   'and',
   'television',
   'station',
   ',',
   'and',
   'several',
   'magazines',
   'and',
   'journals',
   '.',
   'Begun',
   'as',
   'a',
   'one-page',
   'journal',
   'in',
   'September',
   '1876',
   ',',
   'the',
   'Scholastic',
   'magazine',
   'is',
   'issued',
   'twice',
   'monthly',
   'and',
   'claims',
   'to',
   'be',
   'the',
   'oldest',
   'continuous',
   'collegiate',
   'publication',
   'in',
   'the',
   'United',
   'States',
   '.',
   'The',
   'other',
   'magazine',
   ',',
   'The',
   'Juggler',
   ',',
   'is',
   'released',
   'twice',
   'a',
   'year',
   'and',
   'focuses',
   'on',
   'student',
   'literature',
   'and',
   'artwork',
   '.',
   'The',
   'Dome',
   'yearbook',
   'is',
   'published',
   'annually',
   '.',
   'The',
   'newspapers',
   'have',
   'varying',
   'publication',
   'interests',
   ',',
   'with',
   'The',
   'Observer',
   'published',
   'daily',
   'and',
   'mainly',
   'reporting',
   'university',
   'and',
   'other',
   'news',
   ',',
   'and',
   'staffed',
   'by',
   'students',
   'from',
   'both',
   'Notre',
   'Dame',
   'and',
   'Saint',
   'Mary',
   "'s",
   'College',
   '.',
   'Unlike',
   'Scholastic',
   'and',
   'The',
   'Dome',
   ',',
   'The',
   'Observer',
   'is',
   'an',
   'independent',
   'publication',
   'and',
   'does',
   'not',
   'have',
   'a',
   'faculty',
   'advisor',
   'or',
   'any',
   'editorial',
   'oversight',
   'from',
   'the',
   'University',
   '.',
   'In',
   '1987',
   ',',
   'when',
   'some',
   'students',
   'believed',
   'that',
   'The',
   'Observer',
   'began',
   'to',
   'show',
   'a',
   'conservative',
   'bias',
   ',',
   'a',
   'liberal',
   'newspaper',
   ',',
   'Common',
   'Sense',
   'was',
   'published',
   '.',
   'Likewise',
   ',',
   'in',
   '2003',
   ',',
   'when',
   'other',
   'students',
   'believed',
   'that',
   'the',
   'paper',
   'showed',
   'a',
   'liberal',
   'bias',
   ',',
   'the',
   'conservative',
   'paper',
   'Irish',
   'Rover',
   'went',
   'into',
   'production',
   '.',
   'Neither',
   'paper',
   'is',
   'published',
   'as',
   'often',
   'as',
   'The',
   'Observer',
   ';',
   'however',
   ',',
   'all',
   'three',
   'are',
   'distributed',
   'to',
   'all',
   'students',
   '.',
   'Finally',
   ',',
   'in',
   'Spring',
   '2008',
   'an',
   'undergraduate',
   'journal',
   'for',
   'political',
   'science',
   'research',
   ',',
   'Beyond',
   'Politics',
   ',',
   'made',
   'its',
   'debut',
   '.'],
  [[0, 2],
   [3, 5],
   [6, 10],
   [11, 16],
   [17, 29],
   [29, 30],
   [31, 36],
   [37, 41],
   [41, 43],
   [44, 52],
   [53, 56],
   [57, 58],
   [59, 65],
   [66, 68],
   [69, 73],
   [74, 79],
   [80, 87],
   [87, 88],
   [89, 92],
   [93, 97],
   [98, 109],
   [110, 117],
   [118, 125],
   [126, 131],
   [132, 142],
   [142, 143],
   [144, 148],
   [149, 150],
   [151, 156],
   [157, 160],
   [161, 171],
   [172, 179],
   [179, 180],
   [181, 184],
   [185, 192],
   [193, 202],
   [203, 206],
   [207, 215],
   [215, 216],
   [217, 222],
   [223, 225],
   [226, 227],
   [228, 236],
   [237, 244],
   [245, 247],
   [248, 257],
   [258, 262],
   [262, 263],
   [264, 267],
   [268, 278],
   [279, 287],
   [288, 290],
   [291, 297],
   [298, 303],
   [304, 311],
   [312, 315],
   [316, 322],
   [323, 325],
   [326, 328],
   [329, 332],
   [333, 339],
   [340, 350],
   [351, 361],
   [362, 373],
   [374, 376],
   [377, 380],
   [381, 387],
   [388, 394],
   [394, 395],
   [396, 399],
   [400, 405],
   [406, 414],
   [414, 415],
   [416, 419],
   [420, 427],
   [427, 428],
   [429, 431],
   [432, 440],
   [441, 446],
   [447, 448],
   [449, 453],
   [454, 457],
   [458, 465],
   [466, 468],
   [469, 476],
   [477, 487],
   [488, 491],
   [492, 499],
   [499, 500],
   [501, 504],
   [505, 509],
   [510, 518],
   [519, 521],
   [522, 531],
   [532, 540],
   [540, 541],
   [542, 545],
   [546, 556],
   [557, 561],
   [562, 569],
   [570, 581],
   [582, 591],
   [591, 592],
   [593, 597],
   [598, 601],
   [602, 610],
   [611, 620],
   [621, 626],
   [627, 630],
   [631, 637],
   [638, 647],
   [648, 658],
   [659, 662],
   [663, 668],
   [669, 673],
   [673, 674],
   [675, 678],
   [679, 686],
   [687, 689],
   [690, 698],
   [699, 703],
   [704, 708],
   [709, 714],
   [715, 719],
   [720, 723],
   [724, 729],
   [730, 734],
   [734, 736],
   [737, 744],
   [744, 745],
   [746, 752],
   [753, 763],
   [764, 767],
   [768, 771],
   [772, 776],
   [776, 777],
   [778, 781],
   [782, 790],
   [791, 793],
   [794, 796],
   [797, 808],
   [809, 820],
   [821, 824],
   [825, 829],
   [830, 833],
   [834, 838],
   [839, 840],
   [841, 848],
   [849, 856],
   [857, 859],
   [860, 863],
   [864, 873],
   [874, 883],
   [884, 888],
   [889, 892],
   [893, 903],
   [903, 904],
   [905, 907],
   [908, 912],
   [912, 913],
   [914, 918],
   [919, 923],
   [924, 932],
   [933, 941],
   [942, 946],
   [947, 950],
   [951, 959],
   [960, 965],
   [966, 968],
   [969, 973],
   [974, 975],
   [976, 988],
   [989, 993],
   [993, 994],
   [995, 996],
   [997, 1004],
   [1005, 1014],
   [1014, 1015],
   [1016, 1022],
   [1023, 1028],
   [1029, 1032],
   [1033, 1042],
   [1042, 1043],
   [1044, 1052],
   [1052, 1053],
   [1054, 1056],
   [1057, 1061],
   [1061, 1062],
   [1063, 1067],
   [1068, 1073],
   [1074, 1082],
   [1083, 1091],
   [1092, 1096],
   [1097, 1100],
   [1101, 1106],
   [1107, 1113],
   [1114, 1115],
   [1116, 1123],
   [1124, 1128],
   [1128, 1129],
   [1130, 1133],
   [1134, 1146],
   [1147, 1152],
   [1153, 1158],
   [1159, 1164],
   [1165, 1169],
   [1170, 1174],
   [1175, 1185],
   [1185, 1186],
   [1187, 1194],
   [1195, 1200],
   [1201, 1203],
   [1204, 1213],
   [1214, 1216],
   [1217, 1222],
   [1223, 1225],
   [1226, 1229],
   [1230, 1238],
   [1238, 1239],
   [1240, 1247],
   [1247, 1248],
   [1249, 1252],
   [1253, 1258],
   [1259, 1262],
   [1263, 1274],
   [1275, 1277],
   [1278, 1281],
   [1282, 1290],
   [1290, 1291],
   [1292, 1299],
   [1299, 1300],
   [1301, 1303],
   [1304, 1310],
   [1311, 1315],
   [1316, 1318],
   [1319, 1332],
   [1333, 1340],
   [1341, 1344],
   [1345, 1354],
   [1355, 1362],
   [1363, 1371],
   [1371, 1372],
   [1373, 1379],
   [1380, 1388],
   [1388, 1389],
   [1390, 1394],
   [1395, 1398],
   [1399, 1404],
   [1404, 1405]]),
 (['As',
   'at',
   'most',
   'other',
   'universities',
   ',',
   'Notre',
   'Dame',
   "'s",
   'students',
   'run',
   'a',
   'number',
   'of',
   'news',
   'media',
   'outlets',
   '.',
   'The',
   'nine',
   'student-run',
   'outlets',
   'include',
   'three',
   'newspapers',
   ',',
   'both',
   'a',
   'radio',
   'and',
   'television',
   'station',
   ',',
   'and',
   'several',
   'magazines',
   'and',
   'journals',
   '.',
   'Begun',
   'as',
   'a',
   'one-page',
   'journal',
   'in',
   'September',
   '1876',
   ',',
   'the',
   'Scholastic',
   'magazine',
   'is',
   'issued',
   'twice',
   'monthly',
   'and',
   'claims',
   'to',
   'be',
   'the',
   'oldest',
   'continuous',
   'collegiate',
   'publication',
   'in',
   'the',
   'United',
   'States',
   '.',
   'The',
   'other',
   'magazine',
   ',',
   'The',
   'Juggler',
   ',',
   'is',
   'released',
   'twice',
   'a',
   'year',
   'and',
   'focuses',
   'on',
   'student',
   'literature',
   'and',
   'artwork',
   '.',
   'The',
   'Dome',
   'yearbook',
   'is',
   'published',
   'annually',
   '.',
   'The',
   'newspapers',
   'have',
   'varying',
   'publication',
   'interests',
   ',',
   'with',
   'The',
   'Observer',
   'published',
   'daily',
   'and',
   'mainly',
   'reporting',
   'university',
   'and',
   'other',
   'news',
   ',',
   'and',
   'staffed',
   'by',
   'students',
   'from',
   'both',
   'Notre',
   'Dame',
   'and',
   'Saint',
   'Mary',
   "'s",
   'College',
   '.',
   'Unlike',
   'Scholastic',
   'and',
   'The',
   'Dome',
   ',',
   'The',
   'Observer',
   'is',
   'an',
   'independent',
   'publication',
   'and',
   'does',
   'not',
   'have',
   'a',
   'faculty',
   'advisor',
   'or',
   'any',
   'editorial',
   'oversight',
   'from',
   'the',
   'University',
   '.',
   'In',
   '1987',
   ',',
   'when',
   'some',
   'students',
   'believed',
   'that',
   'The',
   'Observer',
   'began',
   'to',
   'show',
   'a',
   'conservative',
   'bias',
   ',',
   'a',
   'liberal',
   'newspaper',
   ',',
   'Common',
   'Sense',
   'was',
   'published',
   '.',
   'Likewise',
   ',',
   'in',
   '2003',
   ',',
   'when',
   'other',
   'students',
   'believed',
   'that',
   'the',
   'paper',
   'showed',
   'a',
   'liberal',
   'bias',
   ',',
   'the',
   'conservative',
   'paper',
   'Irish',
   'Rover',
   'went',
   'into',
   'production',
   '.',
   'Neither',
   'paper',
   'is',
   'published',
   'as',
   'often',
   'as',
   'The',
   'Observer',
   ';',
   'however',
   ',',
   'all',
   'three',
   'are',
   'distributed',
   'to',
   'all',
   'students',
   '.',
   'Finally',
   ',',
   'in',
   'Spring',
   '2008',
   'an',
   'undergraduate',
   'journal',
   'for',
   'political',
   'science',
   'research',
   ',',
   'Beyond',
   'Politics',
   ',',
   'made',
   'its',
   'debut',
   '.'],
  [[0, 2],
   [3, 5],
   [6, 10],
   [11, 16],
   [17, 29],
   [29, 30],
   [31, 36],
   [37, 41],
   [41, 43],
   [44, 52],
   [53, 56],
   [57, 58],
   [59, 65],
   [66, 68],
   [69, 73],
   [74, 79],
   [80, 87],
   [87, 88],
   [89, 92],
   [93, 97],
   [98, 109],
   [110, 117],
   [118, 125],
   [126, 131],
   [132, 142],
   [142, 143],
   [144, 148],
   [149, 150],
   [151, 156],
   [157, 160],
   [161, 171],
   [172, 179],
   [179, 180],
   [181, 184],
   [185, 192],
   [193, 202],
   [203, 206],
   [207, 215],
   [215, 216],
   [217, 222],
   [223, 225],
   [226, 227],
   [228, 236],
   [237, 244],
   [245, 247],
   [248, 257],
   [258, 262],
   [262, 263],
   [264, 267],
   [268, 278],
   [279, 287],
   [288, 290],
   [291, 297],
   [298, 303],
   [304, 311],
   [312, 315],
   [316, 322],
   [323, 325],
   [326, 328],
   [329, 332],
   [333, 339],
   [340, 350],
   [351, 361],
   [362, 373],
   [374, 376],
   [377, 380],
   [381, 387],
   [388, 394],
   [394, 395],
   [396, 399],
   [400, 405],
   [406, 414],
   [414, 415],
   [416, 419],
   [420, 427],
   [427, 428],
   [429, 431],
   [432, 440],
   [441, 446],
   [447, 448],
   [449, 453],
   [454, 457],
   [458, 465],
   [466, 468],
   [469, 476],
   [477, 487],
   [488, 491],
   [492, 499],
   [499, 500],
   [501, 504],
   [505, 509],
   [510, 518],
   [519, 521],
   [522, 531],
   [532, 540],
   [540, 541],
   [542, 545],
   [546, 556],
   [557, 561],
   [562, 569],
   [570, 581],
   [582, 591],
   [591, 592],
   [593, 597],
   [598, 601],
   [602, 610],
   [611, 620],
   [621, 626],
   [627, 630],
   [631, 637],
   [638, 647],
   [648, 658],
   [659, 662],
   [663, 668],
   [669, 673],
   [673, 674],
   [675, 678],
   [679, 686],
   [687, 689],
   [690, 698],
   [699, 703],
   [704, 708],
   [709, 714],
   [715, 719],
   [720, 723],
   [724, 729],
   [730, 734],
   [734, 736],
   [737, 744],
   [744, 745],
   [746, 752],
   [753, 763],
   [764, 767],
   [768, 771],
   [772, 776],
   [776, 777],
   [778, 781],
   [782, 790],
   [791, 793],
   [794, 796],
   [797, 808],
   [809, 820],
   [821, 824],
   [825, 829],
   [830, 833],
   [834, 838],
   [839, 840],
   [841, 848],
   [849, 856],
   [857, 859],
   [860, 863],
   [864, 873],
   [874, 883],
   [884, 888],
   [889, 892],
   [893, 903],
   [903, 904],
   [905, 907],
   [908, 912],
   [912, 913],
   [914, 918],
   [919, 923],
   [924, 932],
   [933, 941],
   [942, 946],
   [947, 950],
   [951, 959],
   [960, 965],
   [966, 968],
   [969, 973],
   [974, 975],
   [976, 988],
   [989, 993],
   [993, 994],
   [995, 996],
   [997, 1004],
   [1005, 1014],
   [1014, 1015],
   [1016, 1022],
   [1023, 1028],
   [1029, 1032],
   [1033, 1042],
   [1042, 1043],
   [1044, 1052],
   [1052, 1053],
   [1054, 1056],
   [1057, 1061],
   [1061, 1062],
   [1063, 1067],
   [1068, 1073],
   [1074, 1082],
   [1083, 1091],
   [1092, 1096],
   [1097, 1100],
   [1101, 1106],
   [1107, 1113],
   [1114, 1115],
   [1116, 1123],
   [1124, 1128],
   [1128, 1129],
   [1130, 1133],
   [1134, 1146],
   [1147, 1152],
   [1153, 1158],
   [1159, 1164],
   [1165, 1169],
   [1170, 1174],
   [1175, 1185],
   [1185, 1186],
   [1187, 1194],
   [1195, 1200],
   [1201, 1203],
   [1204, 1213],
   [1214, 1216],
   [1217, 1222],
   [1223, 1225],
   [1226, 1229],
   [1230, 1238],
   [1238, 1239],
   [1240, 1247],
   [1247, 1248],
   [1249, 1252],
   [1253, 1258],
   [1259, 1262],
   [1263, 1274],
   [1275, 1277],
   [1278, 1281],
   [1282, 1290],
   [1290, 1291],
   [1292, 1299],
   [1299, 1300],
   [1301, 1303],
   [1304, 1310],
   [1311, 1315],
   [1316, 1318],
   [1319, 1332],
   [1333, 1340],
   [1341, 1344],
   [1345, 1354],
   [1355, 1362],
   [1363, 1371],
   [1371, 1372],
   [1373, 1379],
   [1380, 1388],
   [1388, 1389],
   [1390, 1394],
   [1395, 1398],
   [1399, 1404],
   [1404, 1405]]),
 (['As',
   'at',
   'most',
   'other',
   'universities',
   ',',
   'Notre',
   'Dame',
   "'s",
   'students',
   'run',
   'a',
   'number',
   'of',
   'news',
   'media',
   'outlets',
   '.',
   'The',
   'nine',
   'student-run',
   'outlets',
   'include',
   'three',
   'newspapers',
   ',',
   'both',
   'a',
   'radio',
   'and',
   'television',
   'station',
   ',',
   'and',
   'several',
   'magazines',
   'and',
   'journals',
   '.',
   'Begun',
   'as',
   'a',
   'one-page',
   'journal',
   'in',
   'September',
   '1876',
   ',',
   'the',
   'Scholastic',
   'magazine',
   'is',
   'issued',
   'twice',
   'monthly',
   'and',
   'claims',
   'to',
   'be',
   'the',
   'oldest',
   'continuous',
   'collegiate',
   'publication',
   'in',
   'the',
   'United',
   'States',
   '.',
   'The',
   'other',
   'magazine',
   ',',
   'The',
   'Juggler',
   ',',
   'is',
   'released',
   'twice',
   'a',
   'year',
   'and',
   'focuses',
   'on',
   'student',
   'literature',
   'and',
   'artwork',
   '.',
   'The',
   'Dome',
   'yearbook',
   'is',
   'published',
   'annually',
   '.',
   'The',
   'newspapers',
   'have',
   'varying',
   'publication',
   'interests',
   ',',
   'with',
   'The',
   'Observer',
   'published',
   'daily',
   'and',
   'mainly',
   'reporting',
   'university',
   'and',
   'other',
   'news',
   ',',
   'and',
   'staffed',
   'by',
   'students',
   'from',
   'both',
   'Notre',
   'Dame',
   'and',
   'Saint',
   'Mary',
   "'s",
   'College',
   '.',
   'Unlike',
   'Scholastic',
   'and',
   'The',
   'Dome',
   ',',
   'The',
   'Observer',
   'is',
   'an',
   'independent',
   'publication',
   'and',
   'does',
   'not',
   'have',
   'a',
   'faculty',
   'advisor',
   'or',
   'any',
   'editorial',
   'oversight',
   'from',
   'the',
   'University',
   '.',
   'In',
   '1987',
   ',',
   'when',
   'some',
   'students',
   'believed',
   'that',
   'The',
   'Observer',
   'began',
   'to',
   'show',
   'a',
   'conservative',
   'bias',
   ',',
   'a',
   'liberal',
   'newspaper',
   ',',
   'Common',
   'Sense',
   'was',
   'published',
   '.',
   'Likewise',
   ',',
   'in',
   '2003',
   ',',
   'when',
   'other',
   'students',
   'believed',
   'that',
   'the',
   'paper',
   'showed',
   'a',
   'liberal',
   'bias',
   ',',
   'the',
   'conservative',
   'paper',
   'Irish',
   'Rover',
   'went',
   'into',
   'production',
   '.',
   'Neither',
   'paper',
   'is',
   'published',
   'as',
   'often',
   'as',
   'The',
   'Observer',
   ';',
   'however',
   ',',
   'all',
   'three',
   'are',
   'distributed',
   'to',
   'all',
   'students',
   '.',
   'Finally',
   ',',
   'in',
   'Spring',
   '2008',
   'an',
   'undergraduate',
   'journal',
   'for',
   'political',
   'science',
   'research',
   ',',
   'Beyond',
   'Politics',
   ',',
   'made',
   'its',
   'debut',
   '.'],
  [[0, 2],
   [3, 5],
   [6, 10],
   [11, 16],
   [17, 29],
   [29, 30],
   [31, 36],
   [37, 41],
   [41, 43],
   [44, 52],
   [53, 56],
   [57, 58],
   [59, 65],
   [66, 68],
   [69, 73],
   [74, 79],
   [80, 87],
   [87, 88],
   [89, 92],
   [93, 97],
   [98, 109],
   [110, 117],
   [118, 125],
   [126, 131],
   [132, 142],
   [142, 143],
   [144, 148],
   [149, 150],
   [151, 156],
   [157, 160],
   [161, 171],
   [172, 179],
   [179, 180],
   [181, 184],
   [185, 192],
   [193, 202],
   [203, 206],
   [207, 215],
   [215, 216],
   [217, 222],
   [223, 225],
   [226, 227],
   [228, 236],
   [237, 244],
   [245, 247],
   [248, 257],
   [258, 262],
   [262, 263],
   [264, 267],
   [268, 278],
   [279, 287],
   [288, 290],
   [291, 297],
   [298, 303],
   [304, 311],
   [312, 315],
   [316, 322],
   [323, 325],
   [326, 328],
   [329, 332],
   [333, 339],
   [340, 350],
   [351, 361],
   [362, 373],
   [374, 376],
   [377, 380],
   [381, 387],
   [388, 394],
   [394, 395],
   [396, 399],
   [400, 405],
   [406, 414],
   [414, 415],
   [416, 419],
   [420, 427],
   [427, 428],
   [429, 431],
   [432, 440],
   [441, 446],
   [447, 448],
   [449, 453],
   [454, 457],
   [458, 465],
   [466, 468],
   [469, 476],
   [477, 487],
   [488, 491],
   [492, 499],
   [499, 500],
   [501, 504],
   [505, 509],
   [510, 518],
   [519, 521],
   [522, 531],
   [532, 540],
   [540, 541],
   [542, 545],
   [546, 556],
   [557, 561],
   [562, 569],
   [570, 581],
   [582, 591],
   [591, 592],
   [593, 597],
   [598, 601],
   [602, 610],
   [611, 620],
   [621, 626],
   [627, 630],
   [631, 637],
   [638, 647],
   [648, 658],
   [659, 662],
   [663, 668],
   [669, 673],
   [673, 674],
   [675, 678],
   [679, 686],
   [687, 689],
   [690, 698],
   [699, 703],
   [704, 708],
   [709, 714],
   [715, 719],
   [720, 723],
   [724, 729],
   [730, 734],
   [734, 736],
   [737, 744],
   [744, 745],
   [746, 752],
   [753, 763],
   [764, 767],
   [768, 771],
   [772, 776],
   [776, 777],
   [778, 781],
   [782, 790],
   [791, 793],
   [794, 796],
   [797, 808],
   [809, 820],
   [821, 824],
   [825, 829],
   [830, 833],
   [834, 838],
   [839, 840],
   [841, 848],
   [849, 856],
   [857, 859],
   [860, 863],
   [864, 873],
   [874, 883],
   [884, 888],
   [889, 892],
   [893, 903],
   [903, 904],
   [905, 907],
   [908, 912],
   [912, 913],
   [914, 918],
   [919, 923],
   [924, 932],
   [933, 941],
   [942, 946],
   [947, 950],
   [951, 959],
   [960, 965],
   [966, 968],
   [969, 973],
   [974, 975],
   [976, 988],
   [989, 993],
   [993, 994],
   [995, 996],
   [997, 1004],
   [1005, 1014],
   [1014, 1015],
   [1016, 1022],
   [1023, 1028],
   [1029, 1032],
   [1033, 1042],
   [1042, 1043],
   [1044, 1052],
   [1052, 1053],
   [1054, 1056],
   [1057, 1061],
   [1061, 1062],
   [1063, 1067],
   [1068, 1073],
   [1074, 1082],
   [1083, 1091],
   [1092, 1096],
   [1097, 1100],
   [1101, 1106],
   [1107, 1113],
   [1114, 1115],
   [1116, 1123],
   [1124, 1128],
   [1128, 1129],
   [1130, 1133],
   [1134, 1146],
   [1147, 1152],
   [1153, 1158],
   [1159, 1164],
   [1165, 1169],
   [1170, 1174],
   [1175, 1185],
   [1185, 1186],
   [1187, 1194],
   [1195, 1200],
   [1201, 1203],
   [1204, 1213],
   [1214, 1216],
   [1217, 1222],
   [1223, 1225],
   [1226, 1229],
   [1230, 1238],
   [1238, 1239],
   [1240, 1247],
   [1247, 1248],
   [1249, 1252],
   [1253, 1258],
   [1259, 1262],
   [1263, 1274],
   [1275, 1277],
   [1278, 1281],
   [1282, 1290],
   [1290, 1291],
   [1292, 1299],
   [1299, 1300],
   [1301, 1303],
   [1304, 1310],
   [1311, 1315],
   [1316, 1318],
   [1319, 1332],
   [1333, 1340],
   [1341, 1344],
   [1345, 1354],
   [1355, 1362],
   [1363, 1371],
   [1371, 1372],
   [1373, 1379],
   [1380, 1388],
   [1388, 1389],
   [1390, 1394],
   [1395, 1398],
   [1399, 1404],
   [1404, 1405]]),
 (['As',
   'at',
   'most',
   'other',
   'universities',
   ',',
   'Notre',
   'Dame',
   "'s",
   'students',
   'run',
   'a',
   'number',
   'of',
   'news',
   'media',
   'outlets',
   '.',
   'The',
   'nine',
   'student-run',
   'outlets',
   'include',
   'three',
   'newspapers',
   ',',
   'both',
   'a',
   'radio',
   'and',
   'television',
   'station',
   ',',
   'and',
   'several',
   'magazines',
   'and',
   'journals',
   '.',
   'Begun',
   'as',
   'a',
   'one-page',
   'journal',
   'in',
   'September',
   '1876',
   ',',
   'the',
   'Scholastic',
   'magazine',
   'is',
   'issued',
   'twice',
   'monthly',
   'and',
   'claims',
   'to',
   'be',
   'the',
   'oldest',
   'continuous',
   'collegiate',
   'publication',
   'in',
   'the',
   'United',
   'States',
   '.',
   'The',
   'other',
   'magazine',
   ',',
   'The',
   'Juggler',
   ',',
   'is',
   'released',
   'twice',
   'a',
   'year',
   'and',
   'focuses',
   'on',
   'student',
   'literature',
   'and',
   'artwork',
   '.',
   'The',
   'Dome',
   'yearbook',
   'is',
   'published',
   'annually',
   '.',
   'The',
   'newspapers',
   'have',
   'varying',
   'publication',
   'interests',
   ',',
   'with',
   'The',
   'Observer',
   'published',
   'daily',
   'and',
   'mainly',
   'reporting',
   'university',
   'and',
   'other',
   'news',
   ',',
   'and',
   'staffed',
   'by',
   'students',
   'from',
   'both',
   'Notre',
   'Dame',
   'and',
   'Saint',
   'Mary',
   "'s",
   'College',
   '.',
   'Unlike',
   'Scholastic',
   'and',
   'The',
   'Dome',
   ',',
   'The',
   'Observer',
   'is',
   'an',
   'independent',
   'publication',
   'and',
   'does',
   'not',
   'have',
   'a',
   'faculty',
   'advisor',
   'or',
   'any',
   'editorial',
   'oversight',
   'from',
   'the',
   'University',
   '.',
   'In',
   '1987',
   ',',
   'when',
   'some',
   'students',
   'believed',
   'that',
   'The',
   'Observer',
   'began',
   'to',
   'show',
   'a',
   'conservative',
   'bias',
   ',',
   'a',
   'liberal',
   'newspaper',
   ',',
   'Common',
   'Sense',
   'was',
   'published',
   '.',
   'Likewise',
   ',',
   'in',
   '2003',
   ',',
   'when',
   'other',
   'students',
   'believed',
   'that',
   'the',
   'paper',
   'showed',
   'a',
   'liberal',
   'bias',
   ',',
   'the',
   'conservative',
   'paper',
   'Irish',
   'Rover',
   'went',
   'into',
   'production',
   '.',
   'Neither',
   'paper',
   'is',
   'published',
   'as',
   'often',
   'as',
   'The',
   'Observer',
   ';',
   'however',
   ',',
   'all',
   'three',
   'are',
   'distributed',
   'to',
   'all',
   'students',
   '.',
   'Finally',
   ',',
   'in',
   'Spring',
   '2008',
   'an',
   'undergraduate',
   'journal',
   'for',
   'political',
   'science',
   'research',
   ',',
   'Beyond',
   'Politics',
   ',',
   'made',
   'its',
   'debut',
   '.'],
  [[0, 2],
   [3, 5],
   [6, 10],
   [11, 16],
   [17, 29],
   [29, 30],
   [31, 36],
   [37, 41],
   [41, 43],
   [44, 52],
   [53, 56],
   [57, 58],
   [59, 65],
   [66, 68],
   [69, 73],
   [74, 79],
   [80, 87],
   [87, 88],
   [89, 92],
   [93, 97],
   [98, 109],
   [110, 117],
   [118, 125],
   [126, 131],
   [132, 142],
   [142, 143],
   [144, 148],
   [149, 150],
   [151, 156],
   [157, 160],
   [161, 171],
   [172, 179],
   [179, 180],
   [181, 184],
   [185, 192],
   [193, 202],
   [203, 206],
   [207, 215],
   [215, 216],
   [217, 222],
   [223, 225],
   [226, 227],
   [228, 236],
   [237, 244],
   [245, 247],
   [248, 257],
   [258, 262],
   [262, 263],
   [264, 267],
   [268, 278],
   [279, 287],
   [288, 290],
   [291, 297],
   [298, 303],
   [304, 311],
   [312, 315],
   [316, 322],
   [323, 325],
   [326, 328],
   [329, 332],
   [333, 339],
   [340, 350],
   [351, 361],
   [362, 373],
   [374, 376],
   [377, 380],
   [381, 387],
   [388, 394],
   [394, 395],
   [396, 399],
   [400, 405],
   [406, 414],
   [414, 415],
   [416, 419],
   [420, 427],
   [427, 428],
   [429, 431],
   [432, 440],
   [441, 446],
   [447, 448],
   [449, 453],
   [454, 457],
   [458, 465],
   [466, 468],
   [469, 476],
   [477, 487],
   [488, 491],
   [492, 499],
   [499, 500],
   [501, 504],
   [505, 509],
   [510, 518],
   [519, 521],
   [522, 531],
   [532, 540],
   [540, 541],
   [542, 545],
   [546, 556],
   [557, 561],
   [562, 569],
   [570, 581],
   [582, 591],
   [591, 592],
   [593, 597],
   [598, 601],
   [602, 610],
   [611, 620],
   [621, 626],
   [627, 630],
   [631, 637],
   [638, 647],
   [648, 658],
   [659, 662],
   [663, 668],
   [669, 673],
   [673, 674],
   [675, 678],
   [679, 686],
   [687, 689],
   [690, 698],
   [699, 703],
   [704, 708],
   [709, 714],
   [715, 719],
   [720, 723],
   [724, 729],
   [730, 734],
   [734, 736],
   [737, 744],
   [744, 745],
   [746, 752],
   [753, 763],
   [764, 767],
   [768, 771],
   [772, 776],
   [776, 777],
   [778, 781],
   [782, 790],
   [791, 793],
   [794, 796],
   [797, 808],
   [809, 820],
   [821, 824],
   [825, 829],
   [830, 833],
   [834, 838],
   [839, 840],
   [841, 848],
   [849, 856],
   [857, 859],
   [860, 863],
   [864, 873],
   [874, 883],
   [884, 888],
   [889, 892],
   [893, 903],
   [903, 904],
   [905, 907],
   [908, 912],
   [912, 913],
   [914, 918],
   [919, 923],
   [924, 932],
   [933, 941],
   [942, 946],
   [947, 950],
   [951, 959],
   [960, 965],
   [966, 968],
   [969, 973],
   [974, 975],
   [976, 988],
   [989, 993],
   [993, 994],
   [995, 996],
   [997, 1004],
   [1005, 1014],
   [1014, 1015],
   [1016, 1022],
   [1023, 1028],
   [1029, 1032],
   [1033, 1042],
   [1042, 1043],
   [1044, 1052],
   [1052, 1053],
   [1054, 1056],
   [1057, 1061],
   [1061, 1062],
   [1063, 1067],
   [1068, 1073],
   [1074, 1082],
   [1083, 1091],
   [1092, 1096],
   [1097, 1100],
   [1101, 1106],
   [1107, 1113],
   [1114, 1115],
   [1116, 1123],
   [1124, 1128],
   [1128, 1129],
   [1130, 1133],
   [1134, 1146],
   [1147, 1152],
   [1153, 1158],
   [1159, 1164],
   [1165, 1169],
   [1170, 1174],
   [1175, 1185],
   [1185, 1186],
   [1187, 1194],
   [1195, 1200],
   [1201, 1203],
   [1204, 1213],
   [1214, 1216],
   [1217, 1222],
   [1223, 1225],
   [1226, 1229],
   [1230, 1238],
   [1238, 1239],
   [1240, 1247],
   [1247, 1248],
   [1249, 1252],
   [1253, 1258],
   [1259, 1262],
   [1263, 1274],
   [1275, 1277],
   [1278, 1281],
   [1282, 1290],
   [1290, 1291],
   [1292, 1299],
   [1299, 1300],
   [1301, 1303],
   [1304, 1310],
   [1311, 1315],
   [1316, 1318],
   [1319, 1332],
   [1333, 1340],
   [1341, 1344],
   [1345, 1354],
   [1355, 1362],
   [1363, 1371],
   [1371, 1372],
   [1373, 1379],
   [1380, 1388],
   [1388, 1389],
   [1390, 1394],
   [1395, 1398],
   [1399, 1404],
   [1404, 1405]])]

In [ ]:
print('Tokenizing dataset with CoreNLP using pool of workers')
x = [tokenizer.tokenize(sample['context']) for sample in tqdm(samples[0:10])]
print('Done!')

In [ ]:
x

In [ ]: