In this notebook, you can explore and analyze the collation of the Declamations of Calpurnius Flaccus obtained with CollateX.
There are six different documents compared in this collation: four manuscripts and two editions. Including corrections by second hands in the manuscripts, this makes a total of ten witnesses.
In [1]:
#ipywidgets modules
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
from ipywidgets import Button, Layout, ButtonStyle
import ipywidgets as widgets
#python modules
import json
import sys
import re
from IPython.display import display,HTML,clear_output
from datetime import datetime #for unique filenames
import base64 #for download links
In [2]:
#path to the file with json results of the collation
#path = 'json-collations/calpurnius-collation-joint-BCMNPH.json'
path = 'json-collations/calpurnius-collation-joint-BCMNPH-corr.json'
#open the file
with open (path, encoding='utf-8') as jsonfile:
#transform the json structure (arrays, objects) into python structure (lists, dictionaries)
data = json.load(jsonfile)
#list of witnesses
witnesses = data["witnesses"]
#print(witnesses)
#table of the aligned text versions
collation = data["table"]
#base text: choose a witness which variants are considered true readings (in green)
#for Calpurnius, the most recent edition of Hakanson is used as the base text
#if you do not want a base text, set it as an empty string ''
base_text = 'LH'
#the index of a witness is its position in the witness list:
#for instance B1 has position 0, and P1594 has position 9.
In [3]:
#original text
def cell_to_string(c):
#tokens t are joined together, separated by a space
string = ' '.join(token['t'] for token in c)
return string
#text with normalized tokens
def cell_to_string_norm(c):
string = ''
#word division is not taken into account when comparing the normalized text
#for this reason we do not add a space in between tokens
for token in c:
if 'n' in token:
string += token['n']
elif 't' in token:
string += token['t']
return string
In [4]:
#compare two cells, original text
def compare_cell(c1,c2):
return cell_to_string(c1) == cell_to_string(c2)
#compare two cells, normalized text
def compare_cell_norm(c1,c2):
return cell_to_string_norm(c1) == cell_to_string_norm(c2)
#compare a list of cells, original text
#return true if all the cells are equivalent (they contain the same string of tokens)
def compare_multiple_cell(cell_list):
#compare each cell to the next
for c1,c2 in zip(cell_list, cell_list[1:]):
if compare_cell(c1,c2) is False:
comparison = False
break
else:
comparison = True
return comparison
#compare a list of cells, normalized text
#return true if all the cells are equivalent (they contain the same string of tokens)
def compare_multiple_cell_norm(cell_list):
#compare each cell to the next
for c1,c2 in zip(cell_list, cell_list[1:]):
if compare_cell_norm(c1,c2) is False:
comparison = False
break
else:
comparison = True
return comparison
In [5]:
#this function returns rows of the collation table (table) where a list of x witnesses (witlist) agree together.
#we display only variant locations, and not places where all witnesses agree.
def find_agreements(table, witlist):
result_table = []
#transform widget tuple into actual list
witlist = list(witlist)
#transform the witnesses names (sigla) into indexes
witindex = [witnesses.index(wit) for wit in witlist]
nonwitindex = [witnesses.index(wit) for wit in witnesses if wit not in witlist]
for row in table:
#get list of cell for the x witnesses
cell_list = [row[i] for i in witindex]
#there must be agreement of the x witnesses (normalized tokens)
if compare_multiple_cell_norm(cell_list) is True:
for i in nonwitindex:
#if they disagree with at least one of the others
if compare_cell_norm(row[witindex[0]],row[i]) is False:
#add row to the result
result_table.append(row)
#and go to next row
break
return result_table
In [6]:
#This function is similar to the previous one:
#it returns rows of the collation table (table) where a list of x witnesses (witlist) agree together, but
#do not agree with the witnesses in a second list (nonwitlist).
#By default, the function will return the agreement of the x witnesses, against all the other witnesses.
def compare_witnesses(table, witlist, nonwitlist=[]):
result_table = []
#first list of x witnesses which agree together
witindex = [witnesses.index(wit) for wit in witlist]
#against all the other witnesses
if not nonwitlist:
nonwitindex = [witnesses.index(wit) for wit in witnesses if wit not in witlist]
#except if a second list of y witnesses is specified
else:
nonwitindex = [witnesses.index(wit) for wit in nonwitlist]
#go through the collation table, row by row
#to find places where the x witnesses agree together against others
for row in table:
#get list of cell for the x witnesses
cell_list = [row[i] for i in witindex]
#there must be agreement of the x witnesses (normalised tokens)
if compare_multiple_cell_norm(cell_list) is True:
for i in nonwitindex:
#if they agree with one of the other y witnesses
if compare_cell_norm(row[witindex[0]],row[i]) is True:
#go to next row
break
#but if they do not agree with any of the y witnesses
else:
#add row to the result
result_table.append(row)
return result_table
In [7]:
def view_variants(table):
result_table = []
#go through the collation table, row by row
for row in table:
#if there is a variant in the row (i.e. at least one cell is different from another cell, normalized form)
if compare_multiple_cell_norm(row) is False:
#add row to the result
result_table.append(row)
return result_table
In [8]:
#this function returns a minimal HTML table, to display in the notebook.
def table_to_html(collation,table):
#table in an HTML format
html_table = ''
#div is for a better slides view. For notebook use, comment it out
#html_table += '<div style="overflow: scroll; width:960; height:417px; word-break: break-all;">'
html_table += '<table border="1" style="width: 100%; border: 1px solid #000000; border-collapse: collapse;" cellpadding="4">'
#add a header to the table with columns, one for each witnesses and one for the row ID
html_table += '<tr>'
#a column for each witness
for wit in witnesses:
html_table += '<th>'+wit+'</th>'
#optional: column for the declamation number
#html_table += '<th>Decl</th>'
#column for the row id
html_table += '<th>ID</th>'
html_table += '</tr>'
for row in table:
#add a row to the html table
html_table += '<tr>'
#optional : a variable to store the declamation number (will not be defined in empty rows)
#declamation = 0
#fill row with cell for each witness
for cell in row:
#transform the tokens t into a string.
#we display the original tokens, not the normalized form
token = cell_to_string(cell)
#some cells are empty. Thus the declamation number is only available in cell with at least 1 token
#if len(cell)>0:
# declamation = str(cell[0]['decl'])
#if no base text is selected, background colour will be white
if not base_text:
bg = "white"
#if the tokens are the same as the base text tokens (normalized form)
#it is displayed as a "true reading" in a green cell
elif compare_cell_norm(cell,row[witnesses.index(base_text)]):
bg = "d9ead3"
#otherwise it is diplayed as an "error" in a red cell
else:
bg = "ffb1b1"
html_table += '<td bgcolor="'+bg+'">'+token+'</td>'
#optional: add declamation number
#html_table += '<td>'+str(location)+'</td>'
#add row ID
html_table += '<td>'+str(collation.index(row))+'</td>'
#close the row
html_table += '</tr>'
#close the table
html_table += '</table>'
#html_table += '</div>'
return html_table
In [9]:
#this function returns a fancier HTML, but can't be displayed in the notebook (yet)
def table_to_html_fancy(collation,table):
#table in an HTML format
html_table = '<table>'
#add a header to the table with columns
html_table += '<thead><tr>'
#a column for each witness
for wit in witnesses:
html_table += '<th>'+'<p>'+wit+'</p>'+'</th>'
#a column for the row id
html_table += '<th><p>ID</p></th>'
#close header
html_table += '</tr></thead><tbody>'
for row in table:
#add a row to the html table
html_table += '<tr>'
for cell in row:
#transform the tokens t into a string (original token)
token = cell_to_string(cell)
#if there is no base text
if not base_text:
#arbitrary class for the HTML cells. It will have no effect on the result.
cl = "foo"
#if the normalized token is the same as the base text
#it is diplayed as a "true reading" in a cell with green left border
elif compare_cell_norm(cell,row[witnesses.index(base_text)]):
cl = "green"
#otherwise as an "error" in a cell with an orange left border
else:
cl = "orange"
#add token to the table, in a text paragraph
html_table += '<td class="'+cl+'">'+'<p>'+token
#if there is a note to display, add a little 'i' to indicate there is more hidden information
for t in cell:
#in the cell, if we find a token with a note
if 'note' in t:
#add info indicator
html_table += ' <a href="#" class="expander right"><i class="fa fa-info-circle"></i></a>'
#then stop (even if there are several notes, we display only one indicator)
break
#close the text paragraph in the cell
html_table += '</p>'
#add paragraphs for hidden content (notes. Not limited to notes only: normalized form could be added, etc.)
for t in cell:
if 'note' in t:
html_table += '<p class="expandable hidden more-info">Note: '+t['note']+'</p>'
#when the cell is not empty, add hidden info of page/line numbers. Adapted to make 'locus' optional
if len(cell)>0 and 'locus' in cell[0]:
#if len(cell)>0 :
#add link to images when possible
if 'link' in cell[0]:
url = cell[0]['link']
html_table += '<p class="expandable-row hidden more-info"><a target="blank" href='+url+'>'+cell[0]['locus']+'</a></p>'
else:
html_table += '<p class="expandable-row hidden more-info">'+cell[0]['locus']+'</p>'
#close cell
html_table += '</td>'
#add row ID with indicator of hidden content
html_table += '<td>'+'<p>'+str(collation.index(row))+' <a href="#" class="expander-row right"><i class="fa fa-ellipsis-v"></i></a></p>'+'</td>'
#close the row
html_table += '</tr>'
#close the table
html_table += '</tbody></table>'
return html_table
In [10]:
def print_witnesses_text(table):
reverse_table = [[row[i] for row in table] for i in range(len(witnesses))]
for index,row in enumerate(reverse_table):
text = ''
for cell in row:
#the row starts and ends with a token, not a space
if row.index(cell) == 0 or text == '' or not cell:
text += cell_to_string(cell)
#if it is not the start of the string or an empty cell, add a space to separate tokens
else:
text += ' '+cell_to_string(cell)
text += ', '+str(witnesses[index])
print(text)
#return reverse_table
In [11]:
def print_info(rowID, wit):
#select cell
cell = collation[rowID][witnesses.index(wit)]
#if cell is empty, there is no token
if len(cell) == 0:
print('-')
else:
for token in cell:
#position of token in cell + content
print(cell.index(token), ':', ', '.join(token[feature] for feature in token))
In [12]:
def get_pos(row):
return collation.index(row)
In [13]:
def move_token_up(rowID, wit):
try:
#the token cannot be in the first row
rowID > 0
#select the first token
token = collation[rowID][witnesses.index(wit)].pop(0)
#append it at the end of the cell in the previous row
collation[rowID-1][witnesses.index(wit)].append(token)
print("Token '"+token['t']+"' moved up!")
except:
print("There is no token to move.")
def move_token_down(rowID, wit):
try:
#the token cannot be in the last row
rowID < len(collation)-1
#select the last token
token = collation[rowID][witnesses.index(wit)].pop()
#add it at the beginning of the cell in the next row
collation[rowID+1][witnesses.index(wit)].insert(0, token)
print("Token '"+token['t']+"' moved down!")
except:
print("There is no token to move.")
In [14]:
def add_row_after(rowID):
#rowID must be within collation table
if rowID < 0 or rowID > len(collation)-1:
print('Row '+str(rowID)+' does not exist.')
else:
#create an empty row
new_row = []
#for each witness in the collation
for wit in witnesses:
#add an empty list of tokens to the row
new_row.append([])
#insert new row in the collation, after the row passed in argument (+1)
collation.insert(rowID+1, new_row)
print('Row added!')
def delete_row(rowID):
#rowID must be within collation table
if rowID < 0 or rowID > len(collation)-1:
print('Row '+str(rowID)+' does not exist.')
else:
collation.pop(rowID)
print('Row deleted!')
In [15]:
#add or modify a note
def add_note(wit, rowID, token, note):
try:
#select token
t = collation[rowID][witnesses.index(wit)][token]
if note is '':
print('Your note is empty.')
elif 'note' in t:
#add comment to an already existing note
t['note'] += ' '+note
else:
#or create a new note
t['note'] = note
except:
print('This token is not valid.')
#delete completely a token's note
def del_note(wit, rowID, token):
try:
#select token
t = collation[rowID][witnesses.index(wit)][token]
if 'note' in t:
#delete note
t.pop('note')
else:
#or print error message
print('There is no note to delete')
except:
print('This token is not valid.')
In [16]:
def search(table,text):
#result table to build
result_table = []
#go through each row of the collation table
for row in table:
#go through each cell
for cell in row:
#if the search text matches the cell text (original or normalized form)
if text in cell_to_string_norm(cell) or text in cell_to_string(cell):
#add row to the result table
result_table.append(row)
#go to next row
break
#if the result table is empty, the text was not found in the collation
if result_table == []:
print(text+" was not found!")
return result_table
In [17]:
#save the json file with update in the collation table
def save_json(path, table):
#combine new collation table with witnesses, so as to have one data variable
data = {'witnesses':witnesses, 'table':table}
#open a file according to path
with open(path, 'w') as outfile:
#write the data in json format
json.dump(data, outfile)
In [18]:
#save a subset of the collation table into fancy HTML version, with a small text description
def save_table(descr, table, path):
#path to template
template_path = 'alignment-tables/template.html'
#load the text of the template into a variable html
with open(template_path, 'r', encoding='utf-8') as infile:
html_table = infile.read()
#add base text to description
if base_text:
description = descr +'<br>Agreement with the base text '+base_text+' is marked in green.'
description += ' Variation from '+base_text+' is marked in red.'
#modify template: replace the comment with description paragraph
html_table = re.sub(r'<!--descr-->',description,html_table)
#modify template: replace the comment with table
html_table = re.sub(r'<!--table-->',table,html_table)
#save
with open(path, 'w', encoding='utf-8') as outfile:
outfile.write(html_table)
#generate download link
b64 = base64.b64encode(html_table.encode())
payload = b64.decode()
title = "Download HTML file - "+descr
filename = "table.html"
html = '<a download="{filename}" href="data:text/csv;base64,{payload}" target="_blank">{title}</a>'
html = html.format(payload=payload,title=title,filename=filename)
return HTML(html)
In [19]:
#select an extract of the collation table with interactive widgets
#widget for HTML display
w1_html = widgets.HTML(value="")
#define the beginning of extract
w_from = widgets.BoundedIntText(
value=6,
min=0,
max=len(collation)-1,
description='From:',
continuous_update=True,
)
#define the end of extract
#because of python list slicing, the last number is not included in the result.
#to make it more intuitive, the "to" number is added +1 in collation_extract function
w_to = widgets.BoundedIntText(
value=11,
min=0,
max=len(collation)-1,
description='To:',
continuous_update=True,
)
#binding widgets with table_to_html function
def collation_extract(a, b):
x = a
y = b+1
if y <= x:
print("The table you have requested does not exist.")
w1_html.value = table_to_html(collation,collation[x:y])
#uncomment the next lines to see the widgets
##interactive selection of a collation table extract
#interact(collation_extract, a=w_from, b=w_to)
##display HTML widget (rows 6-11)
#display(w1_html)
In [20]:
#Widgets for:
#move tokens up/down
#add/delete rows
#add/delete notes to a specific token
#widget to select a witness
w_wit = widgets.Dropdown(
options = witnesses,
description = 'Witness:',
)
#widget to select a row
w_rowID = widgets.BoundedIntText(
min=0,
max=len(collation)-1,
description='ID:',
)
#widget to select a specific token
w_token = widgets.Text(
min=0,
description = 'Token position:',
)
#widget to enter text note
w_note = widgets.Text(
description = 'Note:',
)
out = widgets.Output()
#link buttons and functions
@out.capture(clear_output=True)#wait=True, clear_output=True
def modif_on_click(b):
if b.description == 'add row after':
#add row
add_row_after(rowID=w_rowID.value)
if b.description == 'delete row':
#delete
delete_row(rowID=w_rowID.value)
if b.description == 'move token down':
move_token_down(rowID=w_rowID.value, wit=w_wit.value)
if b.description == 'move token up':
move_token_up(rowID=w_rowID.value, wit=w_wit.value)
In [21]:
#add row after
b1 = widgets.Button(description="add row after",
style=ButtonStyle(button_color='#fae58b'))
b1.on_click(modif_on_click)
#uncomment the next line to see the widget
#interact_manual(add_row_after, rowID=w_rowID, {'manual': True, 'manual_name': 'add row after'})
In [22]:
#delete row
b2 = widgets.Button(description="delete row",
style=b1.style)
b2.on_click(modif_on_click)
#uncomment the next line to see the widget
#interact_manual(delete_row, rowID=w_rowID, {'manual': True, 'manual_name': 'delete row'})
In [23]:
#move token down
b3 = widgets.Button(description="move token down",
style=b1.style)
b3.on_click(modif_on_click)
#uncomment the next line to see the widget
#interact_manual(move_token_down, rowID=w_rowID, wit=w_wit, {'manual': True, 'manual_name': 'move token down'})
In [24]:
#move token up
b4 = widgets.Button(description="move token up",
style=b1.style)
b4.on_click(modif_on_click)
#uncomment the next line to see the widget
#interact_manual(move_token_up, rowID=w_rowID, wit=w_wit, {'manual': True, 'manual_name': 'move token up'})
In [25]:
#add/delete notes
#link add button and function
@out.capture(clear_output=True)
def add_on_click(b):
add_note(wit=w_wit.value, rowID=w_rowID.value, token=w_token.value, note=w_note.value)
#check result
print('Result:')
print_info(w_rowID.value, w_wit.value)
print('\n')
#add a note button
w_add_note = widgets.Button(description='Add note', button_style='success')
w_add_note.on_click(add_on_click)
#link del button and function
@out.capture(clear_output=True)
def del_on_click(b):
del_note(wit=w_wit.value, rowID=w_rowID.value, token=w_token.value)
#check result
print('Result:')
print_info(w_rowID.value, w_wit.value)
#delete a note button
w_del_note = widgets.Button(description='Delete note', button_style='danger')
w_del_note.on_click(del_on_click)
#dislpay widgets
#uncomment the next line to see the widgets
#display(w_wit, w_rowID, w_token, w_note)
#display(w_add_note, w_del_note)
In [26]:
#save new json
#path to the new file
path_new_json = 'json-collations/calpurnius-collation-joint-BCMNPH-corr.json'
#alternative path: take the original collation file name, and add a date/time identifier
#file_name = os.path.split(path)[1]
#file_id = datetime.now().strftime('%Y-%m-%d-%H%M%S')
#path_new_json = 'json-collations/'+file_id+'-'+file_name
#save button to click
w_button = widgets.Button(description="Save JSON", button_style='info')
#on click
def on_button_clicked(b):
#save json of the whole collation
save_json(path_new_json, collation)
#link btw button and onclick function
w_button.on_click(on_button_clicked)
#save json
#uncomment the next line to see the widget
#display(w_button)
In [27]:
#widget for HTML display
w2_html = widgets.HTML(value="")
#selection of a group of witnesses which share the same readings
w1 = widgets.SelectMultiple(
description="Agreements:",
options=witnesses
)
#selection of a secong group of witnesses
w2 = widgets.SelectMultiple(
description="Against:",
options=witnesses
)
def collation_compare(table, a, b):
#transform widget tuple into actual list
if isinstance(a, (tuple)):
witlist = list(a)
nonwitlist = list(b)
else:
witlist = [a]
nonwitlist = [b]
if not a:
print("No witness selected.")
else:
#create the result table
result = compare_witnesses(table, witlist, nonwitlist)
#transform table to HTML
html_table = table_to_html(table,result)
#add an indication of the number of rows in the result table
html_table += '<span>Total: '+str(len(result))+' rows in the table.</span>'
#set HTML display value
w2_html.value = html_table
#-----------
#save button
w_save = widgets.Button(description="Save Table", button_style='info')
#description of the table
w_descr = widgets.Text(value="Table description")
def on_button_clicked(x):
#transform widget tuple into actual list
if isinstance(w1.value, (tuple)):
witlist = list(w1.value)
nonwitlist = list(w2.value)
else:
witlist = [w1.value]
nonwitlist = [w2.value]
if not w1.value:
print("No table to save.")
else:
#path for new result file
file_id = datetime.now().strftime('%Y-%m-%d-%H%M%S')
path_result = 'alignment-tables/collation-'+file_id+'.html'
#description
descr = str(w_descr.value)
#html table
table = table_to_html_fancy(collation,compare_witnesses(collation, witlist, nonwitlist))
#save
save_table(descr, table, path_result)
#link button with saving action
w_save.on_click(on_button_clicked)
#---------------
#find agreements between witnesses or unique readings
#uncomment the next line to see the widgets
#interact(collation_compare, table=fixed(collation), a=w1, b=w2)
#display(w2_html)
#display(w_descr)
#display(w_save)
In [28]:
#widget for HTML display
w3_html = widgets.HTML(value="")
#do the search
def search_collation(table,text):
w3_html.value = table_to_html(table,search(table,text))
#search collation with interactive text input
#uncomment the next line to see the widgets
#interact(search_collation, table=fixed(collation),text="calpurnius",__manual=True)
#display(w3_html)
In [29]:
#Examples: 459/C1, 932/M1, 9/LH
#uncomment the next line to see the widget
#interact(print_info, rowID=w_rowID, wit=w_wit)
Possible interactions are grouped by category:
In [30]:
#Using the tab widget, gather all interactions in one place
tab = widgets.Tab()
#page 1 = view extract
w_extract = interactive(collation_extract, a=w_from, b=w_to)
page1 = widgets.VBox(children = [w_extract, w1_html])
#page 2 = modify the collation
w_modif1 = widgets.VBox(children=[w_rowID, b1, out])#add row
w_modif2 = widgets.VBox(children=[w_rowID, b2, out])#delete row
w_modif3 = widgets.VBox(children=[w_rowID, w_wit, b3, out])#move token down
w_modif4 = widgets.VBox(children=[w_rowID, w_wit, b4, out])#move token up
w_modif5 = widgets.VBox([w_wit, w_rowID, w_token, w_note,
widgets.HBox(children=[w_add_note, w_del_note]), out])#add/del notes
accordion = widgets.Accordion(children=[w_modif1, w_modif2, w_modif3, w_modif4, w_modif5])
accordion.set_title(0, 'Add Row')
accordion.set_title(1, 'Delete Row')
accordion.set_title(2, 'Move Token Down')
accordion.set_title(3, 'Move Token Up')
accordion.set_title(4, 'Notes')
accordion.selected_index = None
page2 = widgets.VBox(children = [accordion, w_button])
#page 3 = find agreements
w_agr = interactive(collation_compare, table=fixed(collation), a=w1, b=w2)
page3 = widgets.VBox(children = [w_agr, w2_html, w_descr, w_save])
#page 4 = search
w_search = interactive(search_collation, {'manual' : True, 'manual_name' : 'Search'}, table=fixed(collation),text="calpurnius")
page4 = widgets.VBox(children = [w_search, w3_html])
#page 5 = clarify
w_clar = interactive(print_info, rowID=w_rowID, wit=w_wit)
page5 = widgets.VBox(children = [w_clar])
tab.children = [page1, page2, page3, page4, page5]
tab.set_title(0, 'Extract')
tab.set_title(1, 'Modifications')
tab.set_title(2, 'Find Agreements')
tab.set_title(3, 'Search')
tab.set_title(4, 'Clarify')
display(tab)