In [1]:
from matcher import Text, Matcher
import json
import pandas as pd
from IPython.display import clear_output
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = [16, 6]
In [5]:
# Load the data.
with open('../txt/middlemarch.json') as f:
rawCriticism = f.readlines()
# Parse the data.
data = [json.loads(line) for line in rawCriticism]
# Load Middlemarch
with open('../middlemarch.txt') as f:
rawMM = f.read()
mm = Text(rawMM, 'Middlemarch')
In [6]:
for i, article in enumerate(data):
clear_output()
print('\r', 'Matching article %s of %s' % (i, len(data)), end='')
if 'numMatches' not in article:
articleText = Text(article['ocr'], article['id'])
article['numMatches'], article['Locations in A'], article['Locations in B'] = \
Matcher(mm, articleText).match()
In [7]:
# Write output somewhere.
with open('../txt/e2a.json', 'w') as outfile:
json.dump(data, outfile)