notebook.community

Edit and run

This is a notebook to help preview collateX processing of variant texts, working with segments of the 1818, 1823, and 1831 editions of Frankenstein.



In [ ]:

    
from collatex import *
collation = Collation() 
with \
    open('1818_openingLetters.txt','r') as f1818, \
    open('1823_openingLetters.txt','r') as f1823, \
    open('1831_openingLetters.txt','r') as f1831: 
    w1818=f1818.read() 
    w1823=f1823.read() 
    w1831=f1831.read() 
    collation.add_plain_witness('w1818', w1818) 
    collation.add_plain_witness('w1823', w1823) 
    collation.add_plain_witness('w1831', w1831) 
# outputs colored HTML in Jupyter notebook
collate(collation, segmentation=True, output='html2', layout='vertical')
# creates SVG in Jupyter notebook (try also svg2)
# collate(collation, segmentation=True, output='svg')
# outputs TEI into a file called collation.xml
# tei = collate(collation, segmentation=True, output='tei')
# with \
# open('collation.xml', 'w') as output
#   output.write(tei)



In [ ]:

    
from collatex import *
collation = Collation() 
with \
    open('1818_openingLetters.txt','r') as f1818, \
    open('1823_openingLetters.txt','r') as f1823, \
    open('1831_openingLetters.txt','r') as f1831: 
    w1818=f1818.read() 
    w1823=f1823.read() 
    w1831=f1831.read() 
    collation.add_plain_witness('w1818', w1818) 
    collation.add_plain_witness('w1823', w1823) 
    collation.add_plain_witness('w1831', w1831) 
# outputs colored HTML in Jupyter notebook
# collate(collation, segmentation=True, output='html2', layout='vertical')
# creates SVG in Jupyter notebook (try also svg2)
collate(collation, segmentation=True, output='svg')
# outputs TEI into a file called collation.xml
# tei = collate(collation, segmentation=True, output='tei')
# with \
# open('collation.xml', 'w') as output
#   output.write(tei)



In [ ]:

    
%load_ext autoreload
%autoreload 2
from collatex import *
collation = Collation() 
collation.add_plain_witness('w1818', 'The big pink c o c k a t o o.') 
collation.add_plain_witness('w1823', 'The big yellow c o c k a t o o.') 
collation.add_plain_witness('w1831', 'The big yellow c o c k a t i e l.') 
collate(collation, segmentation=True, output='svg_simple')
# collate(collation, segmentation=True, output='svg')



In [ ]:

    
from collatex import *
import re

def tokenize(input):
    words = re.split(r'\s+', input) # split on whitespace
    tokens_by_word = [re.findall(r'.+\w|\W+$', word) for word in words] # break off final punctuation
    tokens = []
    for item in tokens_by_word:
        tokens.extend(item)
    token_list = [{"t": token} for token in tokens] # create dictionaries for each token
    return token_list

input_a = "Peter's cat."
input_b = "Peter's dog."

tokens_a = tokenize(input_a)
print('tokens_a =', tokens_a)
tokens_b = tokenize(input_b)
print('tokens_b =', tokens_b)
witness_a = { "id": "A", "tokens": tokens_a }
print('witness_a = ', witness_a)
witness_b = { "id": "B", "tokens": tokens_b }
input = { "witnesses": [ witness_a, witness_b ] }
table = collate(input, segmentation=False)
print(table)



In [ ]:

    
%load_ext autoreload
%autoreload 2
from collatex import *
collation = Collation()
collation.add_plain_witness('A','The big gray koala')
collation.add_plain_witness('B','The grey koala')
table = collate(collation, segmentation=False, near_match=True)
print(table)
collate(collation,segmentation=False,near_match=True,output="svg")



In [ ]: