This is a notebook to help preview collateX processing of variant texts, working with segments of the 1818, 1823, and 1831 editions of Frankenstein.
In [ ]:
from collatex import *
collation = Collation()
with \
open('1818_openingLetters.txt','r') as f1818, \
open('1823_openingLetters.txt','r') as f1823, \
open('1831_openingLetters.txt','r') as f1831:
w1818=f1818.read()
w1823=f1823.read()
w1831=f1831.read()
collation.add_plain_witness('w1818', w1818)
collation.add_plain_witness('w1823', w1823)
collation.add_plain_witness('w1831', w1831)
# outputs colored HTML in Jupyter notebook
collate(collation, segmentation=True, output='html2', layout='vertical')
# creates SVG in Jupyter notebook (try also svg2)
# collate(collation, segmentation=True, output='svg')
# outputs TEI into a file called collation.xml
# tei = collate(collation, segmentation=True, output='tei')
# with \
# open('collation.xml', 'w') as output
# output.write(tei)
In [ ]:
from collatex import *
collation = Collation()
with \
open('1818_openingLetters.txt','r') as f1818, \
open('1823_openingLetters.txt','r') as f1823, \
open('1831_openingLetters.txt','r') as f1831:
w1818=f1818.read()
w1823=f1823.read()
w1831=f1831.read()
collation.add_plain_witness('w1818', w1818)
collation.add_plain_witness('w1823', w1823)
collation.add_plain_witness('w1831', w1831)
# outputs colored HTML in Jupyter notebook
# collate(collation, segmentation=True, output='html2', layout='vertical')
# creates SVG in Jupyter notebook (try also svg2)
collate(collation, segmentation=True, output='svg')
# outputs TEI into a file called collation.xml
# tei = collate(collation, segmentation=True, output='tei')
# with \
# open('collation.xml', 'w') as output
# output.write(tei)
In [ ]:
%load_ext autoreload
%autoreload 2
from collatex import *
collation = Collation()
collation.add_plain_witness('w1818', 'The big pink c o c k a t o o.')
collation.add_plain_witness('w1823', 'The big yellow c o c k a t o o.')
collation.add_plain_witness('w1831', 'The big yellow c o c k a t i e l.')
collate(collation, segmentation=True, output='svg_simple')
# collate(collation, segmentation=True, output='svg')
In [ ]:
from collatex import *
import re
def tokenize(input):
words = re.split(r'\s+', input) # split on whitespace
tokens_by_word = [re.findall(r'.+\w|\W+$', word) for word in words] # break off final punctuation
tokens = []
for item in tokens_by_word:
tokens.extend(item)
token_list = [{"t": token} for token in tokens] # create dictionaries for each token
return token_list
input_a = "Peter's cat."
input_b = "Peter's dog."
tokens_a = tokenize(input_a)
print('tokens_a =', tokens_a)
tokens_b = tokenize(input_b)
print('tokens_b =', tokens_b)
witness_a = { "id": "A", "tokens": tokens_a }
print('witness_a = ', witness_a)
witness_b = { "id": "B", "tokens": tokens_b }
input = { "witnesses": [ witness_a, witness_b ] }
table = collate(input, segmentation=False)
print(table)
In [ ]:
%load_ext autoreload
%autoreload 2
from collatex import *
collation = Collation()
collation.add_plain_witness('A','The big gray koala')
collation.add_plain_witness('B','The grey koala')
table = collate(collation, segmentation=False, near_match=True)
print(table)
collate(collation,segmentation=False,near_match=True,output="svg")
In [ ]: