In [25]:
# imports
from collatex import *
from integration.xml_tokenization import tokenize_xml_file
from integration.xml_tokenization import tokenize_plain_text_file
# normalize tokens
def prepare_and_normalize_tokens(words_from_file):
token_stream_witness = []
for word in words_from_file:
token_data = {}
token_data["t"]=word
token_data["n"]=word.lower()
token_stream_witness.append(token_data)
return token_stream_witness
# 1. open and parse the source file of the first witness
tei_file_1823_act1 = open("./fixtures/1823_act_1.xml", encoding='utf-8')
token_stream_witness_1 = prepare_and_normalize_tokens(tokenize_xml_file(tei_file_1823_act1))
tei_file_1823_act1.close()
print(len(token_stream_witness_1))
# 2. open and parse the source file of the second witness
tei_file_second_edition_act1 = open("./fixtures/second_edition_act_1.xml", encoding='utf-8')
token_stream_witness_2 = prepare_and_normalize_tokens(tokenize_xml_file(tei_file_second_edition_act1))
tei_file_second_edition_act1.close()
print(len(token_stream_witness_2))
# 3. open and parse the source file of the third witness
plain_text_third_witness = open("./fixtures/Act1_version3.txt", encoding='utf-8')
token_stream_witness_3 = prepare_and_normalize_tokens(tokenize_plain_text_file(plain_text_third_witness))
plain_text_third_witness.close()
print(len(token_stream_witness_3))
3200
3424
3376
In [27]:
# create JSON block
witness_data_1 = {}
witness_data_2 = {}
witness_data_3 = {}
witness_data_1["id"]="1"
witness_data_2["id"]="2"
witness_data_3["id"]="3"
witness_data_1["tokens"] = token_stream_witness_1[0:500]
witness_data_2["tokens"] = token_stream_witness_2[0:500]
witness_data_3["tokens"] = token_stream_witness_3[0:500]
pretokenized_json = {}
pretokenized_json["witnesses"] = [witness_data_1, witness_data_2, witness_data_3]
print("DONE")
DONE
In [28]:
collate_pretokenized_json(pretokenized_json, output="html2")
1
2
3
-
-
julian
-
-
.
act
-
act
1
i
i
:
.
.
scene
scene
scene
1
1
i
:
:
-
an
an
-
elegant
apartment
-
-
in
-
-
the
-
-
royal
-
-
palace
-
-
.
.
-
an
an
apartment
apartment
apartment
in
in
in
the
the
the
royal
royal
royal
palace
palace
palace
.
.
.
the
-
-
windows
-
-
opening
-
-
on
-
-
a
-
-
balcony
-
-
,
-
-
adorned
-
-
with
-
-
flowers
-
-
.
-
-
julian
julian
julian
sleeping
sleeping
sleeping
on
on
on
a
a
a
couch
couch
couch
--
.
.
annabel
annabel
annabel
-
.
.
annab
annab
ann
.
.
.
-
-
'
no
no
no
,
;
;
still
still
still
he
he
he
sleeps
sleeps
sleeps
--
!
!
'
'
'
twas
twas
twas
but
but
but
the
the
the
myrtle
myrtle
myrtle
bud
bud
bud
tapping
tapping
tapping
against
against
against
the
the
the
casement
casement
casement
,
,
,
as
as
as
the
the
the
wind
wind
wind
stirred
stirred
stirred
in
in
in
the
the
the
leafy
leafy
leafy
branches
branches
branches
.
.
.
well
well
well
he
he
he
loved
loved
loved
that
that
that
pleasant
pleasant
pleasant
bird
bird
birdlike
-
-
-
like
like
-
sound
sound
sound
,
,
,
which
which
which
,
,
,
as
as
as
a
a
a
voice
voice
voice
summoned
-
,
-
summon
summon
-
'
'
-
d
d
us
us
us
forth
forth
forth
into
into
into
the
the
the
fresher
fresher
fresher
air
air
air
of
of
of
eve
eve
eve
,
,
-
or
or
or
early
early
early
morn
morn
morn
.
.
.
ah
ah
ah
!
!
!
when
when
when
again
again
againó
--
--
and
and
and
yet
yet
yet
this
his
his
-
sleep
sleep
sleep
is
is
is
hopeful
hopeful
hopeful
.
.
.
for
for
for
seven
seven
seven
nights
nights
nights
he
he
he
had
had
had
not
not
not
tasted
tasted
tasted
slumber
slumber
slumber
.
.
.
who
who
who
comes
comes
comes
here
here
here
?
?
?
enter
enter
enter
alfonso
alfonso
alfonso
,
-
-
(
-
-
as
as
as
theodore
theodore
theodore
)
-
.
the
the
the
gentle
gentle
gentle
page
page
page
!
!
!
alas
alas
alas
!
!
,
to
to
to
wake
wake
wake
him
him
him
now
now
now
!
!
!
hush
hush
hush
,
,
,
theodore
theodore
theodore
!
!
!
tread
tread
tread
softly
softly
softlyósoftlier
--
--
-
softlier
softlier
-
,
,
,
boy
boy
boy
!
!
!
alfon
alfon
alf
-
-
.
doth
doth
doth
he
he
he
still
still
still
-
-
,
sleep
sleep
sleep
?
?
?
annab
annab
ann
.
.
.
speak
speak
speak
lower
lower
lower
.
.
.
alfon
alfon
alf
-
-
.
doth
doth
doth
he
he
he
sleep
sleep
sleep
?
?
?
annab
annab
ann
-
-
.
-
avoid
avoid
-
the
the
-
couch
couch
-
;
;
come
come
come
this
this
this
way
way
way
,
;
;
theodore
-
-
!
-
-
here
-
-
,
-
-
close
close
close
to
to
to
me
me
me
-
.
.
he
he
he
sleeps
sleeps
sleeps
.
.
.
he
he
he
hath
hath
hath
not
not
not
mov
moved
moved
'
-
-
d
-
-
in
in
in
all
all
all
the
the
the
hours
hours
hours
that
that
that
thou
thou
thou
hast
hast
hast
been
been
been
away
away
away
.
.
.
alfon
alfon
alp
.
.
.
then
then
then
we
we
we
may
may
may
hope
hope
hope
,
,
;
dear
dear
dear
lady
lady
lady
,
,
,
we
we
we
may
may
may
hope
hope
hope
!
!
.
annab
annab
ann
.
.
.
alas
alas
alas
!
!
!
alas
alas
alas
!
!
!
see
see
see
how
how
how
he
he
he
lies
lies
lies
,
,
,
scarce
scarce
scarce
breathing
breathing
breathing
.
.
.
whilst
whilst
whilst
i
i
i
hung
hung
hung
over
over
over
his
his
his
couch
couch
couch
,
-
-
i
i
i
should
should
should
have
have
have
thought
thought
thought
him
him
him
dead
dead
dead
,
,
,
but
but
but
for
for
for
his
his
his
short
short
short
and
and
and
frequent
frequent
frequent
sighs
sighs
sighs
.
.
.
alfon
alfon
alp
-
-
.
-
-
.
ah
ah
ah
me
me
me
!
!
!
not
not
not
even
even
even
in
in
in
slumber
slumber
slumber
can
can
can
he
he
he
lose
lose
lose
the
the
the
sense
sense
sense
of
of
of
that
that
that
deep
deep
deep
misery
misery
misery
.
;
;
and
and
and
i
i
ióhe
--
--
-
he
he
-
wakes
wakes
wakes
!
!
!
dost
dost
dost
thou
thou
thou
not
not
not
see
see
see
the
the
the
quivering
quivering
quivering
mantle
mantle
mantle
heave
heave
heave
with
with
with
sudden
sudden
sudden
motion
motion
motion
?
?
?
annab
annab
ann
.
.
.
thou
thou
thou
hast
hast
hast
wakened
wakened
wakened
him
him
him
.
.
.
thy
thy
thy
clamorous
clamorous
clamorous
grief
grief
grief
hath
hath
hath
roused
roused
roused
him
him
him
.
.
.
hence
hence
hence
!
!
!
begone
begone
begone
!
!
!
-
leave
leave
-
me
me
-
!
!
alfon
alfon
alp
.
.
.
and
and
and
yet
yet
yet
his
his
his
eyes
eyes
eyes
are
are
are
closed
closed
closed
.
.
.
he
he
he
sleeps
sleeps
sleeps
.
.
.
he
he
he
did
did
did
but
but
but
move
move
move
his
his
his
hand
hand
hand
.
.
.
annab
annab
ann
.
.
.
how
how
how
changed
changed
changed
he
he
he
is
is
is
!
!
!
how
how
how
pale
pale
pale
!
!
!
how
how
how
wasted
wasted
wasted
!
!
!
can
can
can
one
one
one
little
little
little
week
week
week
of
of
of
pain
pain
pain
and
and
and
sickness
sickness
sickness
so
so
so
have
have
have
faded
faded
faded
thee
thee
thee
,
,
,
my
my
my
princely
princely
princely
julian
julian
julian
!
!
!
but
but
but
eight
eight
eight
days
days
days
ago
ago
ago
there
there
there
lived
lived
lived
not
not
not
in
in
in
this
this
this
gladsome
gladsome
gladsome
sicily
sicily
sicily
so
so
so
glad
glad
glad
a
a
a
spirit
spirit
spirit
.
.
.
voice
voice
voice
,
,
-
and
and
and
step
step
step
,
,
-
and
and
and
eye
eye
eye
,
,
-
all
all
all
were
were
were
one
one
one
happiness
happiness
happiness
,
;
;
till
till
till
that
that
that
dread
dread
dread
hour
hour
hour
,
,
,
when
when
when
,
-
-
drest
drest
drest
in
in
in
sparkling
sparkling
sparkling
smiles
smiles
smiles
,
,
,
radiant
radiant
radiant
and
and
and
glowing
glowing
glowing
,
,
-
with
with
with
tender
tender
tender
thoughts
thoughts
thoughts
,
,
,
he
he
he
flew
flew
flew
to
to
to
meet
meet
meet
the
the
the
king
king
king
and
and
and
his
his
his
great
great
great
father
father
father
.
.
.
he
he
he
went
went
went
forth
forth
forth
alone
alone
alone
,
;
;
frenzy
frenzy
frenzy
and
and
and
grief
grief
grief
came
came
came
back
back
back
with
with
with
him
him
him
.
.
.
annab
alf
alp
.
-
-
alf
-
-
.
.
.
and
and
and
i
i
i
,
,
,
another
another
another
grief
grief
grief
.
.
.
annab
annab
ann
.
.
.
thou
thou
thou
wast
wast
wast
a
a
a
comforter
comforter
comforter
.
.
.
all
all
all
stranger
stranger
stranger
as
as
as
thou
thou
thou
art
art
art
,
,
,
hast
hast
hast
thou
thou
thou
not
not
not
shared
shared
shared
my
my
my
watch
watch
watch
as
as
as
carefully
carefully
carefully
,
,
,
as
as
as
faithfully
faithfully
faithfully
,
,
-
as
as
as
i
i
i
had
had
had
been
been
been
thy
thy
thy
sister
sister
sister
?
?
!
-
aye
ay
-
,
,
-
and
and
-
he
he
-
-
,
-
if
if
-
ever
ever
-
in
in
-
this
this
-
wild
wild
-
mysterious
mysterious
-
woe
woe
-
one
one
-
sight
sight
-
or
or
-
sound
sound
-
hath
hath
-
cheered
cheered
-
him
him
-
,
,
-
it
it
-
hath
hath
-
been
been
-
a
a
-
glance
glance
-
,
,
-
a
a
-
word
word
-
of
of
-
thine
thine
-
.
.
-
alf
alp
-
.
.
-
he
he
-
knows
knows
-
me
me
-
not
not
-
.
.
-
ann
ann
-
.
.
-
he
he
-
knows
knows
-
not
not
-
me
me
-
.
.
alfon
alfon
alf
.
.
.
i
i
i
never
never
never
heard
heard
heard
before
-
before
that
-
that
'
-
'
twas
-
twas
to
-
to
meet
-
meet
the
-
the
king
-
king
that
-
-
fatal
-
-
night
-
-
,
-
-
knowingly
-
-
,
-
-
purposely
-
-
!
-
-
how
-
-
could
-
-
he
-
-
guess
-
-
that
-
-
they
-
-
should
-
-
meet
-
-
?
-
-
what
-
-
moved
-
-
him
-
-
to
-
-
that
-
-
thought
-
-
?
-
-
annab
-
-
.
-
-
stranger
-
-
,
-
-
In [ ]:
Content source: DiXiT-eu/collatex-tutorial
Similar notebooks: