In [2]:

    
import datetime as dt
import os
import sys

from cltk.corpus.greek.tlg.parse_tlg_indices import get_id_author
from cltk.corpus.utils.formatter import assemble_tlg_author_filepaths
from nltk.tokenize.punkt import PunktLanguageVars
import pandas



In [3]:

    
p = PunktLanguageVars()

Lexical diveristy by author

Original text with diacritics

Get status about corpus



In [4]:

    
t0 = dt.datetime.utcnow()

cleaned_dir = os.path.expanduser('~/cltk_data/greek/text/tlg/plaintext_clean')
dir_contents = os.listdir(cleaned_dir)

corpus_stats = {}

all_tokens_list = []
for doc_count, file in enumerate(dir_contents):
    file_path = os.path.join(cleaned_dir, file)
    with open(file_path) as fo:
        text = fo.read().lower()
    text = ''.join([char for char in text if char not in ['.']])
    tokens = p.word_tokenize(text)
    all_tokens_list += tokens

print('Total author files:', doc_count)
print('Total words:', len(all_tokens_list))
all_tokens_unique = set(all_tokens_list)
print('Total unique words:', len(all_tokens_unique))

corpus_stats = {'doc_count': doc_count, 
               'total_words': len(all_tokens_list),
               'total_unique_words': len(all_tokens_unique)}

print('... finished in {}'.format(dt.datetime.utcnow() - t0))









    



Total author files: 1822
Total words: 72057716
Total unique words: 1515193
... finished in 0:03:38.839884



In [5]:

    
df_corpus = pandas.DataFrame(corpus_stats, index=[0])
print(df_corpus)









    



   doc_count  total_unique_words  total_words
0       1822             1515193     72057716

Get stats per author



In [6]:

    
from cltk.corpus.greek.tlg.parse_tlg_indices import get_epithet_of_author



In [7]:

    
map_id_author = get_id_author()



In [8]:

    
t0 = dt.datetime.utcnow()

# Words and unique words per author
map_id_word_counts = {}
for file in dir_contents:
    map_word_counts = {}
    file_path = os.path.join(cleaned_dir, file)
    author_id = file[3:-4]
    author = map_id_author[author_id]
    with open(file_path) as fo:
        text = fo.read().lower()
    text = ''.join([char for char in text if char not in ['.']])
    tokens = p.word_tokenize(text)
    map_word_counts['name'] = author
    map_word_counts['epithet'] = get_epithet_of_author(author_id)
    map_word_counts['word_count_all'] = len(tokens)
    map_word_counts['word_count_unique'] = len(set(tokens))
    try:
        lexical_diversity = len(set(tokens)) / len(tokens)
    except ZeroDivisionError:
        lexical_diversity = 0
    map_word_counts['lexical_diversity'] = lexical_diversity
    
    map_id_word_counts[author_id] = map_word_counts
#     print(author)
#     print('    ', 'Total words:', len(tokens))
#     print('    ', 'Total unique words:', len(set(tokens)))
#     print('    ', 'Lexical diversity:', lexical_diversity)

print('... finished in {}'.format(dt.datetime.utcnow() - t0))









    



... finished in 0:03:44.275525



In [9]:

    
df_text_counts = pandas.DataFrame(map_id_word_counts).T



In [10]:

    
df_text_counts









    Out[10]:






  
    
      
      epithet
      lexical_diversity
      name
      word_count_all
      word_count_unique
    
  
  
    
      0001
      Epici/-ae
      0.34366
      Apollonius Rhodius Epic.
      39155
      13456
    
    
      0002
      Elegiaci
      0.398142
      Theognis Eleg.
      9798
      3901
    
    
      0003
      Historici/-ae
      0.150405
      Thucydides Hist.
      150427
      22625
    
    
      0004
      Biographi
      0.222551
      Diogenes Laertius Biogr.
      110977
      24698
    
    
      0005
      Bucolici
      0.439063
      Theocritus Bucol.
      21719
      9536
    
    
      0006
      Tragici
      0.19995
      Euripides Trag.
      184076
      36806
    
    
      0007
      Biographi
      0.110099
      Plutarchus Biogr. et Phil.
      1034650
      113914
    
    
      0008
      Sophistae
      0.151358
      Athenaeus Soph.
      394588
      59724
    
    
      0009
      Lyrici/-ae
      0.674388
      Sappho Lyr.
      3756
      2533
    
    
      0010
      Oratores
      0.139739
      Isocrates Orat.
      120603
      16853
    
    
      0011
      Tragici
      0.288234
      Sophocles Trag.
      73423
      21163
    
    
      0012
      Epici/-ae
      0.155017
      Homerus Epic., Homer
      199617
      30944
    
    
      0013
      None
      0.393614
      Hymni Homerici, Homeric Hymns
      16036
      6312
    
    
      0014
      Oratores
      0.112017
      Demosthenes Orat.
      297535
      33329
    
    
      0015
      Historici/-ae
      0.255029
      Herodianus Hist.
      46928
      11968
    
    
      0016
      Historici/-ae
      0.158366
      Herodotus Hist.
      185779
      29421
    
    
      0017
      Oratores
      0.193321
      Isaeus Orat.
      32878
      6356
    
    
      0018
      Philosophici/-ae
      0.139545
      Philo Judaeus Phil.
      449869
      62777
    
    
      0019
      Comici
      0.244177
      Aristophanes Comic.
      112787
      27540
    
    
      0020
      Epici/-ae
      0.390019
      Hesiodus Epic.
      25168
      9816
    
    
      0022
      Epici/-ae
      0.517786
      Nicander Epic.
      11976
      6201
    
    
      0023
      Epici/-ae
      0.398901
      Oppianus Epic.
      22755
      9077
    
    
      0024
      Epici/-ae
      0.491281
      Oppianus Epic.
      13477
      6621
    
    
      0026
      Oratores
      0.232688
      Aeschines Orat.
      48984
      11398
    
    
      0027
      Oratores
      0.27413
      Andocides Orat.
      17623
      4831
    
    
      0028
      Oratores
      0.254137
      Antiphon Orat.
      21390
      5436
    
    
      0029
      Oratores
      0.302597
      Dinarchus Orat.
      17710
      5359
    
    
      0030
      Oratores
      0.330946
      Hyperides Orat.
      16894
      5591
    
    
      0031
      None
      0.134781
      Novum Testamentum, New Testament
      138907
      18722
    
    
      0032
      Historici/-ae
      0.127116
      Xenophon Hist.
      317174
      40318
    
    
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      5031
      None
      0.321937
      Scholia In Nicandrum
      51100
      16451
    
    
      5032
      None
      0.364637
      Scholia In Oppianum
      70108
      25564
    
    
      5033
      None
      0.57196
      Scholia In Pausaniam
      806
      461
    
    
      5034
      None
      0.143688
      Scholia In Pindarum
      281729
      40481
    
    
      5035
      None
      0.250535
      Scholia In Platonem
      64482
      16155
    
    
      5037
      None
      0.186222
      Scholia In Sophoclem
      160631
      29913
    
    
      5038
      None
      0.291173
      Scholia In Theocritum
      49565
      14432
    
    
      5039
      None
      0.219468
      Scholia In Thucydidem
      93649
      20553
    
    
      5040
      None
      0.474512
      Scholia In Xenophontem
      4198
      1992
    
    
      5045
      Rhetorici
      0.310473
      Anonymi In Aphthonium Rhet.
      4316
      1340
    
    
      5046
      None
      0.467742
      Scholia In Theonem Rhetorem
      1178
      551
    
    
      5048
      None
      0.444405
      Scholia In Clementem Alexandrinum
      11233
      4992
    
    
      5052
      None
      0.238008
      Scholia in Maximum Confessorem
      20701
      4927
    
    
      7000
      None
      0.311176
      Anthologia Graeca, AG
      133198
      41448
    
    
      7051
      None
      0.181684
      Doctrina Patrum
      63682
      11570
    
    
      7052
      None
      0.355126
      Anthologiae Graecae Appendix
      77367
      27475
    
    
      9003
      Lexicographi
      0.952381
      Anonymus Lexicographus Lexicogr.
      42
      40
    
    
      9004
      None
      0.134826
      Anonymi In Aristotelis Librum Alterum Analytic...
      17111
      2307
    
    
      9006
      Paroemiographi
      0.281114
      Gregorius Paroemiogr.
      10839
      3047
    
    
      9007
      None
      0.466233
      Appendix Proverbiorum
      7848
      3659
    
    
      9008
      Paroemiographi
      0.452399
      Macarius Chrysocephalus Paroemiogr.
      7794
      3526
    
    
      9009
      Paroemiographi
      0.32256
      Michael Apostolius Paroemiogr.
      52970
      17086
    
    
      9010
      None
      0.189172
      Suda, Suidas
      617130
      116744
    
    
      9012
      Poetae
      0.381793
      Ignatius Biogr. et Poeta
      20223
      7721
    
    
      9018
      Paroemiographi
      0.398801
      Arsenius Paroemiogr.
      22693
      9050
    
    
      9019
      Philosophici/-ae
      0.130852
      Stephanus Phil.
      25823
      3379
    
    
      9020
      Grammatici
      0.287102
      Stephanus Gramm.
      19871
      5705
    
    
      9021
      Alchemistae
      0.287719
      Stephanus Alchem.
      16815
      4838
    
    
      9022
      Poetae
      0.385695
      Joannes Tzetzes Gramm. et Poeta
      6767
      2610
    
    
      9023
      Philologi
      0.280107
      Thomas Magister Philol.
      45704
      12802
    
  

1823 rows × 5 columns



In [11]:

    
df_text_counts.to_csv(os.path.expanduser('~/cltk_data/user_data/stats_text_counts.csv'))

Stats by author



In [12]:

    
from statistics import mean
from statistics import stdev



In [13]:

    
author_stats = {}
corpus_word_count_all = []
corpus_word_count_unique = []
corpus_word_lexical_diversity = []
for author_id, map_counts in map_id_word_counts.items():
    corpus_word_count_all.append(map_counts['word_count_all'])
    corpus_word_count_unique.append(map_counts['word_count_unique'])
    corpus_word_lexical_diversity.append(map_counts['lexical_diversity'])

author_stats['mean_words_per_author'] = mean(corpus_word_count_all)
author_stats['standard_deviation_of_words_per_author:'] = stdev(corpus_word_count_all)
author_stats['mean_unique_words_per_author'] = mean(corpus_word_count_unique)
author_stats['standard_deviation_of_unique_words_per_author'] = stdev(corpus_word_count_unique)
author_stats['lexical_diversity_per_author'] = mean(corpus_word_lexical_diversity)
author_stats['standard_deviation_of_lexical_diversity_per_author:'] = stdev(corpus_word_lexical_diversity)

print('Mean words per author:', mean(corpus_word_count_all))
print('Standard deviation of words per author:', stdev(corpus_word_count_all))

print('Mean unique words per author:', mean(corpus_word_count_unique))
print('Standard deviation of unique words per author:', stdev(corpus_word_count_unique))

print('Lexical diversity per author:', mean(corpus_word_lexical_diversity))
print('Standard deviation of lexical diversity per author:', stdev(corpus_word_lexical_diversity))









    



Mean words per author: 39526.99725726824
Standard deviation of words per author: 174923.28976653758
Mean unique words per author: 5435.820076796489
Standard deviation of unique words per author: 14195.290142159112
Lexical diversity per author: 0.5171187962883808
Standard deviation of lexical diversity per author: 0.2732410961564417



In [14]:

    
df_authors = pandas.DataFrame(author_stats, index=[0])
df_authors









    Out[14]:






  
    
      
      lexical_diversity_per_author
      mean_unique_words_per_author
      mean_words_per_author
      standard_deviation_of_lexical_diversity_per_author:
      standard_deviation_of_unique_words_per_author
      standard_deviation_of_words_per_author:
    
  
  
    
      0
      0.517119
      5435.820077
      39526.997257
      0.273241
      14195.290142
      174923.289767



In [15]:

    
df_authors.to_csv(os.path.expanduser('~/cltk_data/user_data/stats_authors.csv'))

Get stats about epithets



In [16]:

    
from collections import defaultdict
import datetime as dt
from cltk.corpus.greek.tlg.parse_tlg_indices import get_epithets



In [17]:

    
list_epithets = get_epithets()



In [18]:

    
t0 = dt.datetime.utcnow()

map_epithet_counts_all = defaultdict(list)
map_epithet_counts_unique = defaultdict(list)
map_epithet_lexical_diversity = defaultdict(list)
for file in dir_contents:
    map_word_counts = defaultdict(list)
    file_path = os.path.join(cleaned_dir, file)
    author_id = file[3:-4]
    author = map_id_author[author_id]
    with open(file_path) as fo:
        text = fo.read().lower()
    text = ''.join([char for char in text if char not in ['.']])
    tokens = p.word_tokenize(text)
    try:
        lexical_diversity = len(set(tokens)) / len(tokens)
    except ZeroDivisionError:
        lexical_diversity = 0
    epithet = get_epithet_of_author(author_id)

    map_epithet_counts_all[epithet].append(len(tokens))
    map_epithet_counts_unique[epithet].append(len(set(tokens)))
    map_epithet_lexical_diversity[epithet].append(lexical_diversity)

print('... finished in {}'.format(dt.datetime.utcnow() - t0))









    



... finished in 0:03:44.608805



In [24]:

    
from statistics import StatisticsError

epithet_lexical_diversity_tuples = []
epithet_scores = {}
for epithet, counts in map_epithet_counts_all.items():
    print(epithet)
    print('    Mean of word counts per author:', mean(counts))
    try:
        wc_standard_deviation = stdev(counts)
    except StatisticsError:
        wc_standard_deviation = 0
    print('    Standard deviation of word counts per author:', wc_standard_deviation)
    
    uniques_list = map_epithet_counts_unique[epithet]
    print('    Mean of unique word counts per author:', mean(uniques_list))
    try:
        uniques_standard_deviation = stdev(uniques_list)
    except StatisticsError:
        uniques_standard_deviation = 0
    print('    Standard deviation of unique word counts per author:', uniques_standard_deviation)

    lexical_diversity_list = map_epithet_lexical_diversity[epithet]
    print('    Mean of lexical diversity per author:', mean(lexical_diversity_list))
    try:
        ld_standard_deviation = stdev(lexical_diversity_list)
    except StatisticsError:
        ld_standard_deviation = 0
    print('    Standard deviation of lexical diversity:', ld_standard_deviation)
    
    epithet_lexical_diversity_tuples.append((epithet, mean(lexical_diversity_list)))

    tmp_scores = {}
    tmp_scores['mean_of_word_counts_ per_author'] = mean(counts)
    tmp_scores['standard_deviation_of_word_counts_per_author'] = wc_standard_deviation
    tmp_scores['mean_of_unique_word_counts_per_author'] = mean(uniques_list)
    tmp_scores['standard_deviation_of_unique_word_counts_per_author'] = uniques_standard_deviation
    tmp_scores['mean_of_lexical_diversity_per_author'] = mean(lexical_diversity_list)
    tmp_scores['standard_deviation_of_lexical_diversity'] = ld_standard_deviation
    epithet_scores[epithet] = tmp_scores









    



Mechanici
    Mean of word counts per author: 27850.714285714286
    Standard deviation of word counts per author: 58272.68047926142
    Mean of unique word counts per author: 3668.4285714285716
    Standard deviation of unique word counts per author: 4857.279000192832
    Mean of lexical diversity per author: 0.30923266180627396
    Standard deviation of lexical diversity: 0.1190888649052873
Mimographi
    Mean of word counts per author: 2752
    Standard deviation of word counts per author: 3008.032247167573
    Mean of unique word counts per author: 1479
    Standard deviation of unique word counts per author: 1438.2551929334377
    Mean of lexical diversity per author: 0.6253901209264193
    Standard deviation of lexical diversity: 0.16095147451789965
Hymnographi
    Mean of word counts per author: 126901
    Standard deviation of word counts per author: 0
    Mean of unique word counts per author: 24999
    Standard deviation of unique word counts per author: 0
    Mean of lexical diversity per author: 0.19699608356120124
    Standard deviation of lexical diversity: 0
Philologi
    Mean of word counts per author: 260571
    Standard deviation of word counts per author: 554261.013120849
    Mean of unique word counts per author: 29787.11111111111
    Standard deviation of unique word counts per author: 48661.146589051015
    Mean of lexical diversity per author: 0.2882825748650545
    Standard deviation of lexical diversity: 0.1304755800246082
Iambici
    Mean of word counts per author: 610.5
    Standard deviation of word counts per author: 966.6762046390635
    Mean of unique word counts per author: 423.5
    Standard deviation of unique word counts per author: 658.1576558849711
    Mean of lexical diversity per author: 0.7662045014476172
    Standard deviation of lexical diversity: 0.2688812502534106
Poetae
    Mean of word counts per author: 8495.23076923077
    Standard deviation of word counts per author: 23588.759891838694
    Mean of unique word counts per author: 2542.923076923077
    Standard deviation of unique word counts per author: 5088.694100829982
    Mean of lexical diversity per author: 0.5715269592682862
    Standard deviation of lexical diversity: 0.350695968788658
Medici
    Mean of word counts per author: 132242.07894736843
    Standard deviation of word counts per author: 414574.0207429061
    Mean of unique word counts per author: 14082.105263157895
    Standard deviation of unique word counts per author: 26032.439765622858
    Mean of lexical diversity per author: 0.33949473725041757
    Standard deviation of lexical diversity: 0.234759640350198
Scriptores Ecclesiastici
    Mean of word counts per author: 203310.41379310345
    Standard deviation of word counts per author: 587654.3703291191
    Mean of unique word counts per author: 19739.91379310345
    Standard deviation of unique word counts per author: 29989.26318118424
    Mean of lexical diversity per author: 0.3207979204601886
    Standard deviation of lexical diversity: 0.19043146387928753
Periegetae
    Mean of word counts per author: 28730
    Standard deviation of word counts per author: 71070.64200673862
    Mean of unique word counts per author: 5329.444444444444
    Standard deviation of unique word counts per author: 10078.459953176267
    Mean of lexical diversity per author: 0.42366081412770284
    Standard deviation of lexical diversity: 0.14377276157425073
Paradoxographi
    Mean of word counts per author: 3216.4444444444443
    Standard deviation of word counts per author: 4577.134341242102
    Mean of unique word counts per author: 1278
    Standard deviation of unique word counts per author: 1403.2975450701822
    Mean of lexical diversity per author: 0.5526884575900334
    Standard deviation of lexical diversity: 0.16815577249748542
Alchemistae
    Mean of word counts per author: 4077.2272727272725
    Standard deviation of word counts per author: 8089.548341808638
    Mean of unique word counts per author: 1223.3636363636363
    Standard deviation of unique word counts per author: 1752.24526836841
    Mean of lexical diversity per author: 0.4830936632872289
    Standard deviation of lexical diversity: 0.17351700128167993
Tragici
    Mean of word counts per author: 4161.788235294118
    Standard deviation of word counts per author: 22768.237305558832
    Mean of unique word counts per author: 1114.6705882352942
    Standard deviation of unique word counts per author: 5230.739015865812
    Mean of lexical diversity per author: 0.6186934024311059
    Standard deviation of lexical diversity: 0.4210897971414373
Poetae Didactici
    Mean of word counts per author: 310
    Standard deviation of word counts per author: 0
    Mean of unique word counts per author: 228
    Standard deviation of unique word counts per author: 0
    Mean of lexical diversity per author: 0.7354838709677419
    Standard deviation of lexical diversity: 0
Onirocritici
    Mean of word counts per author: 38698.5
    Standard deviation of word counts per author: 36553.88505343857
    Mean of unique word counts per author: 7283.5
    Standard deviation of unique word counts per author: 8166.3762159234375
    Mean of lexical diversity per author: 0.1598640506435638
    Standard deviation of lexical diversity: 0.060021036591121714
Poetae Medici
    Mean of word counts per author: 532
    Standard deviation of word counts per author: 397.98576188937545
    Mean of unique word counts per author: 374.75
    Standard deviation of unique word counts per author: 307.5097559427993
    Mean of lexical diversity per author: 0.7121893719806763
    Standard deviation of lexical diversity: 0.1684456565310668
Epici/-ae
    Mean of word counts per author: 8508.848484848484
    Standard deviation of word counts per author: 31548.93517171376
    Mean of unique word counts per author: 2118.121212121212
    Standard deviation of unique word counts per author: 5666.842950994445
    Mean of lexical diversity per author: 0.5901755381170039
    Standard deviation of lexical diversity: 0.33857453628977313
Elegiaci
    Mean of word counts per author: 601.3703703703703
    Standard deviation of word counts per author: 1892.1529688558105
    Mean of unique word counts per author: 300.48148148148147
    Standard deviation of unique word counts per author: 766.5725100115534
    Mean of lexical diversity per author: 0.7829407154009786
    Standard deviation of lexical diversity: 0.2874134683446021
Scriptores Erotici
    Mean of word counts per author: 24547.375
    Standard deviation of word counts per author: 26112.565305601165
    Mean of unique word counts per author: 6774.875
    Standard deviation of unique word counts per author: 6261.752001237353
    Mean of lexical diversity per author: 0.40319032343438627
    Standard deviation of lexical diversity: 0.1804668508503265
Geometri
    Mean of word counts per author: 113808
    Standard deviation of word counts per author: 97741.43902153273
    Mean of unique word counts per author: 4100.75
    Standard deviation of unique word counts per author: 1907.860472711077
    Mean of lexical diversity per author: 0.046396734394107485
    Standard deviation of lexical diversity: 0.017133951688166348
Philosophici/-ae
    Mean of word counts per author: 51075.875555555554
    Standard deviation of word counts per author: 179693.3744019452
    Mean of unique word counts per author: 5046.333333333333
    Standard deviation of unique word counts per author: 11373.068974634143
    Mean of lexical diversity per author: 0.4755907773618297
    Standard deviation of lexical diversity: 0.24403259165874935
Biographi
    Mean of word counts per author: 139125.22222222222
    Standard deviation of word counts per author: 337874.1998139906
    Mean of unique word counts per author: 18505.444444444445
    Standard deviation of unique word counts per author: 36628.98858865445
    Mean of lexical diversity per author: 0.3489969555756385
    Standard deviation of lexical diversity: 0.16288958090681266
Doxographi
    Mean of word counts per author: 20857
    Standard deviation of word counts per author: 10690.040317978226
    Mean of unique word counts per author: 4847
    Standard deviation of unique word counts per author: 1711.198410471445
    Mean of lexical diversity per author: 0.24332716760648826
    Standard deviation of lexical diversity: 0.04267050974259767
Hagiographi
    Mean of word counts per author: 577026
    Standard deviation of word counts per author: 0
    Mean of unique word counts per author: 81563
    Standard deviation of unique word counts per author: 0
    Mean of lexical diversity per author: 0.14135064971075828
    Standard deviation of lexical diversity: 0
Tactici
    Mean of word counts per author: 17298
    Standard deviation of word counts per author: 14257.208254072744
    Mean of unique word counts per author: 4335.166666666667
    Standard deviation of unique word counts per author: 2317.036332616877
    Mean of lexical diversity per author: 0.2811319092652864
    Standard deviation of lexical diversity: 0.05898116623511755
Astrologici
    Mean of word counts per author: 40658.07142857143
    Standard deviation of word counts per author: 49684.96543602623
    Mean of unique word counts per author: 6993.571428571428
    Standard deviation of unique word counts per author: 6760.877613881952
    Mean of lexical diversity per author: 0.2667317612551159
    Standard deviation of lexical diversity: 0.11512053657071745
Chronographi
    Mean of word counts per author: 167315.33333333334
    Standard deviation of word counts per author: 128769.99315316697
    Mean of unique word counts per author: 26193.166666666668
    Standard deviation of unique word counts per author: 16527.893288821375
    Mean of lexical diversity per author: 0.192453839756767
    Standard deviation of lexical diversity: 0.07109978057978761
Mythographi
    Mean of word counts per author: 8067.428571428572
    Standard deviation of word counts per author: 9161.463363043098
    Mean of unique word counts per author: 2916
    Standard deviation of unique word counts per author: 2726.469206379073
    Mean of lexical diversity per author: 0.44350822261989964
    Standard deviation of lexical diversity: 0.10409992530376733
Oratores
    Mean of word counts per author: 54713.083333333336
    Standard deviation of word counts per author: 83232.85401533385
    Mean of unique word counts per author: 8986.25
    Standard deviation of unique word counts per author: 8894.274369963061
    Mean of lexical diversity per author: 0.28408280368872196
    Standard deviation of lexical diversity: 0.14306279635893515
Comici
    Mean of word counts per author: 2135.9527027027025
    Standard deviation of word counts per author: 11381.961100546177
    Mean of unique word counts per author: 596.1081081081081
    Standard deviation of unique word counts per author: 2589.913029126116
    Mean of lexical diversity per author: 0.5163583294667708
    Standard deviation of lexical diversity: 0.2271212563395277
Epigrammatici/-ae
    Mean of word counts per author: 2778.913043478261
    Standard deviation of word counts per author: 12372.339754148963
    Mean of unique word counts per author: 762.5652173913044
    Standard deviation of unique word counts per author: 2912.8728996721206
    Mean of lexical diversity per author: 0.8487669388459786
    Standard deviation of lexical diversity: 0.1713862514400121
Gnostici
    Mean of word counts per author: 1032
    Standard deviation of word counts per author: 1412.799348810722
    Mean of unique word counts per author: 431.5
    Standard deviation of unique word counts per author: 572.0493859799169
    Mean of lexical diversity per author: 0.614900854930397
    Standard deviation of lexical diversity: 0.28748269520242653
Musici
    Mean of word counts per author: 12335.2
    Standard deviation of word counts per author: 14978.744229741023
    Mean of unique word counts per author: 3010.6
    Standard deviation of unique word counts per author: 3574.0627162936016
    Mean of lexical diversity per author: 0.4080182947663436
    Standard deviation of lexical diversity: 0.23824942152223064
Parodii
    Mean of word counts per author: 1370.8
    Standard deviation of word counts per author: 2010.2351106276099
    Mean of unique word counts per author: 474.4
    Standard deviation of unique word counts per author: 623.1856063806352
    Mean of lexical diversity per author: 0.6014235088742101
    Standard deviation of lexical diversity: 0.28881236454346115
Historici/-ae
    Mean of word counts per author: 18369.85846153846
    Standard deviation of word counts per author: 67096.250988704
    Mean of unique word counts per author: 3234.3046153846153
    Standard deviation of unique word counts per author: 8736.966947452285
    Mean of lexical diversity per author: 0.602783132461963
    Standard deviation of lexical diversity: 0.21149779951217368
Poetae Philosophi
    Mean of word counts per author: 4307.875
    Standard deviation of word counts per author: 7185.83500342365
    Mean of unique word counts per author: 1407.625
    Standard deviation of unique word counts per author: 2199.737837464144
    Mean of lexical diversity per author: 0.6119466967099234
    Standard deviation of lexical diversity: 0.29434900019244575
Atticistae
    Mean of word counts per author: 17408.25
    Standard deviation of word counts per author: 11418.418406971548
    Mean of unique word counts per author: 6447.5
    Standard deviation of unique word counts per author: 3113.0307311899983
    Mean of lexical diversity per author: 0.40594681623420636
    Standard deviation of lexical diversity: 0.07373702720652488
Lyrici/-ae
    Mean of word counts per author: 1221.2407407407406
    Standard deviation of word counts per author: 4085.5070581364143
    Mean of unique word counts per author: 727.7222222222222
    Standard deviation of unique word counts per author: 2007.501135286379
    Mean of lexical diversity per author: 0.7476551963199876
    Standard deviation of lexical diversity: 0.28102096252171194
Rhetorici
    Mean of word counts per author: 59919.942307692305
    Standard deviation of word counts per author: 139601.47564777324
    Mean of unique word counts per author: 8529.788461538461
    Standard deviation of unique word counts per author: 14684.76227495771
    Mean of lexical diversity per author: 0.40280602805586774
    Standard deviation of lexical diversity: 0.23582844963950045
Scriptores Fabularum
    Mean of word counts per author: 39241.5
    Standard deviation of word counts per author: 40196.89919011167
    Mean of unique word counts per author: 10083.5
    Standard deviation of unique word counts per author: 7292.3922343768645
    Mean of lexical diversity per author: 0.3403359258334396
    Standard deviation of lexical diversity: 0.16278828961999994
Epistolographi
    Mean of word counts per author: 24930.4
    Standard deviation of word counts per author: 40838.60395875452
    Mean of unique word counts per author: 5774
    Standard deviation of unique word counts per author: 7477.122608330025
    Mean of lexical diversity per author: 0.31840533079667954
    Standard deviation of lexical diversity: 0.17407256235074994
Grammatici
    Mean of word counts per author: 23759.118644067796
    Standard deviation of word counts per author: 52972.00968438824
    Mean of unique word counts per author: 5341.627118644068
    Standard deviation of unique word counts per author: 7737.724602942137
    Mean of lexical diversity per author: 0.4383668023216881
    Standard deviation of lexical diversity: 0.19517979251428155
Apologetici
    Mean of word counts per author: 14997.111111111111
    Standard deviation of word counts per author: 22442.318036716064
    Mean of unique word counts per author: 3726.3333333333335
    Standard deviation of unique word counts per author: 4017.8107222715207
    Mean of lexical diversity per author: 0.4362444959064383
    Standard deviation of lexical diversity: 0.1966271315397474
Theologici
    Mean of word counts per author: 337317.2272727273
    Standard deviation of word counts per author: 514073.7187353332
    Mean of unique word counts per author: 31959.863636363636
    Standard deviation of unique word counts per author: 33977.534250478275
    Mean of lexical diversity per author: 0.2056865016124094
    Standard deviation of lexical diversity: 0.12839293187456263
Polyhistorici
    Mean of word counts per author: 11359
    Standard deviation of word counts per author: 0
    Mean of unique word counts per author: 3939
    Standard deviation of unique word counts per author: 0
    Mean of lexical diversity per author: 0.34677348358130117
    Standard deviation of lexical diversity: 0
Astronomici
    Mean of word counts per author: 17452.272727272728
    Standard deviation of word counts per author: 14411.656067856387
    Mean of unique word counts per author: 2519.909090909091
    Standard deviation of unique word counts per author: 1804.452573748917
    Mean of lexical diversity per author: 0.15605168533359925
    Standard deviation of lexical diversity: 0.09962395743920886
Gnomici
    Mean of word counts per author: 994.6666666666666
    Standard deviation of word counts per author: 607.3963560421921
    Mean of unique word counts per author: 633.3333333333334
    Standard deviation of unique word counts per author: 445.1767439268737
    Mean of lexical diversity per author: 0.6072317526129395
    Standard deviation of lexical diversity: 0.07395986512270739
Bucolici
    Mean of word counts per author: 9096
    Standard deviation of word counts per author: 10974.216555180603
    Mean of unique word counts per author: 4239.666666666667
    Standard deviation of unique word counts per author: 4614.311685759138
    Mean of lexical diversity per author: 0.5318337561964496
    Standard deviation of lexical diversity: 0.08261723685478253
Geographi
    Mean of word counts per author: 38415.454545454544
    Standard deviation of word counts per author: 86587.8937639248
    Mean of unique word counts per author: 6882.454545454545
    Standard deviation of unique word counts per author: 13451.501279512531
    Mean of lexical diversity per author: 0.46230688215853827
    Standard deviation of lexical diversity: 0.2618199117561238
Paroemiographi
    Mean of word counts per author: 22930.6
    Standard deviation of word counts per author: 17919.046244150384
    Mean of unique word counts per author: 8053
    Standard deviation of unique word counts per author: 5665.462734852291
    Mean of lexical diversity per author: 0.36520992842041383
    Standard deviation of lexical diversity: 0.0664398410604052
Mathematici
    Mean of word counts per author: 100291.22222222222
    Standard deviation of word counts per author: 122502.94839796488
    Mean of unique word counts per author: 7417.222222222223
    Standard deviation of unique word counts per author: 9056.485214720136
    Mean of lexical diversity per author: 0.23653613754097272
    Standard deviation of lexical diversity: 0.25607419422288413
None
    Mean of word counts per author: 34036.12574850299
    Standard deviation of word counts per author: 108466.06341998396
    Mean of unique word counts per author: 5970.086826347306
    Standard deviation of unique word counts per author: 14189.938299315627
    Mean of lexical diversity per author: 0.5262500789551204
    Standard deviation of lexical diversity: 0.25427357430911485
Sophistae
    Mean of word counts per author: 47278.12903225807
    Standard deviation of word counts per author: 92375.2689579276
    Mean of unique word counts per author: 9728.774193548386
    Standard deviation of unique word counts per author: 15342.648829780941
    Mean of lexical diversity per author: 0.42123188222717806
    Standard deviation of lexical diversity: 0.18189319925611835
Lexicographi
    Mean of word counts per author: 265077.25
    Standard deviation of word counts per author: 235698.55655105316
    Mean of unique word counts per author: 65895.75
    Standard deviation of unique word counts per author: 46740.19347681393
    Mean of lexical diversity per author: 0.45092513391608313
    Standard deviation of lexical diversity: 0.34822708023376175
Choliambographi
    Mean of word counts per author: 19
    Standard deviation of word counts per author: 0
    Mean of unique word counts per author: 19
    Standard deviation of unique word counts per author: 0
    Mean of lexical diversity per author: 1.0
    Standard deviation of lexical diversity: 0



In [25]:

    
# sort epithets by lexical diversity
sorted(epithet_lexical_diversity_tuples, key=lambda x: x[1], reverse=True)









    Out[25]:





[('Choliambographi', 1.0),
 ('Epigrammatici/-ae', 0.8487669388459786),
 ('Elegiaci', 0.7829407154009786),
 ('Iambici', 0.7662045014476172),
 ('Lyrici/-ae', 0.7476551963199876),
 ('Poetae Didactici', 0.7354838709677419),
 ('Poetae Medici', 0.7121893719806763),
 ('Mimographi', 0.6253901209264193),
 ('Tragici', 0.6186934024311059),
 ('Gnostici', 0.614900854930397),
 ('Poetae Philosophi', 0.6119466967099234),
 ('Gnomici', 0.6072317526129395),
 ('Historici/-ae', 0.602783132461963),
 ('Parodii', 0.6014235088742101),
 ('Epici/-ae', 0.5901755381170039),
 ('Poetae', 0.5715269592682862),
 ('Paradoxographi', 0.5526884575900334),
 ('Bucolici', 0.5318337561964496),
 (None, 0.5262500789551204),
 ('Comici', 0.5163583294667708),
 ('Alchemistae', 0.4830936632872289),
 ('Philosophici/-ae', 0.4755907773618297),
 ('Geographi', 0.46230688215853827),
 ('Lexicographi', 0.45092513391608313),
 ('Mythographi', 0.44350822261989964),
 ('Grammatici', 0.4383668023216881),
 ('Apologetici', 0.4362444959064383),
 ('Periegetae', 0.42366081412770284),
 ('Sophistae', 0.42123188222717806),
 ('Musici', 0.4080182947663436),
 ('Atticistae', 0.40594681623420636),
 ('Scriptores Erotici', 0.40319032343438627),
 ('Rhetorici', 0.40280602805586774),
 ('Paroemiographi', 0.36520992842041383),
 ('Biographi', 0.3489969555756385),
 ('Polyhistorici', 0.34677348358130117),
 ('Scriptores Fabularum', 0.3403359258334396),
 ('Medici', 0.33949473725041757),
 ('Scriptores Ecclesiastici', 0.3207979204601886),
 ('Epistolographi', 0.31840533079667954),
 ('Mechanici', 0.30923266180627396),
 ('Philologi', 0.2882825748650545),
 ('Oratores', 0.28408280368872196),
 ('Tactici', 0.2811319092652864),
 ('Astrologici', 0.2667317612551159),
 ('Doxographi', 0.24332716760648826),
 ('Mathematici', 0.23653613754097272),
 ('Theologici', 0.2056865016124094),
 ('Hymnographi', 0.19699608356120124),
 ('Chronographi', 0.192453839756767),
 ('Onirocritici', 0.1598640506435638),
 ('Astronomici', 0.15605168533359925),
 ('Hagiographi', 0.14135064971075828),
 ('Geometri', 0.046396734394107485)]



In [26]:

    
pandas.DataFrame(epithet_lexical_diversity_tuples)









    Out[26]:






  
    
      
      0
      1
    
  
  
    
      0
      Mechanici
      0.309233
    
    
      1
      Mimographi
      0.625390
    
    
      2
      Hymnographi
      0.196996
    
    
      3
      Philologi
      0.288283
    
    
      4
      Iambici
      0.766205
    
    
      5
      Poetae
      0.571527
    
    
      6
      Medici
      0.339495
    
    
      7
      Scriptores Ecclesiastici
      0.320798
    
    
      8
      Periegetae
      0.423661
    
    
      9
      Paradoxographi
      0.552688
    
    
      10
      Alchemistae
      0.483094
    
    
      11
      Tragici
      0.618693
    
    
      12
      Poetae Didactici
      0.735484
    
    
      13
      Onirocritici
      0.159864
    
    
      14
      Poetae Medici
      0.712189
    
    
      15
      Epici/-ae
      0.590176
    
    
      16
      Elegiaci
      0.782941
    
    
      17
      Scriptores Erotici
      0.403190
    
    
      18
      Geometri
      0.046397
    
    
      19
      Philosophici/-ae
      0.475591
    
    
      20
      Biographi
      0.348997
    
    
      21
      Doxographi
      0.243327
    
    
      22
      Hagiographi
      0.141351
    
    
      23
      Tactici
      0.281132
    
    
      24
      Astrologici
      0.266732
    
    
      25
      Chronographi
      0.192454
    
    
      26
      Mythographi
      0.443508
    
    
      27
      Oratores
      0.284083
    
    
      28
      Comici
      0.516358
    
    
      29
      Epigrammatici/-ae
      0.848767
    
    
      30
      Gnostici
      0.614901
    
    
      31
      Musici
      0.408018
    
    
      32
      Parodii
      0.601424
    
    
      33
      Historici/-ae
      0.602783
    
    
      34
      Poetae Philosophi
      0.611947
    
    
      35
      Atticistae
      0.405947
    
    
      36
      Lyrici/-ae
      0.747655
    
    
      37
      Rhetorici
      0.402806
    
    
      38
      Scriptores Fabularum
      0.340336
    
    
      39
      Epistolographi
      0.318405
    
    
      40
      Grammatici
      0.438367
    
    
      41
      Apologetici
      0.436244
    
    
      42
      Theologici
      0.205687
    
    
      43
      Polyhistorici
      0.346773
    
    
      44
      Astronomici
      0.156052
    
    
      45
      Gnomici
      0.607232
    
    
      46
      Bucolici
      0.531834
    
    
      47
      Geographi
      0.462307
    
    
      48
      Paroemiographi
      0.365210
    
    
      49
      Mathematici
      0.236536
    
    
      50
      None
      0.526250
    
    
      51
      Sophistae
      0.421232
    
    
      52
      Lexicographi
      0.450925
    
    
      53
      Choliambographi
      1.000000



In [27]:

    
df_epithet_scores = pandas.DataFrame(epithet_scores).T
df_epithet_scores









    Out[27]:






  
    
      
      mean_of_lexical_diversity_per_author
      mean_of_unique_word_counts_per_author
      mean_of_word_counts_ per_author
      standard_deviation_of_lexical_diversity
      standard_deviation_of_unique_word_counts_per_author
      standard_deviation_of_word_counts_per_author
    
  
  
    
      Mimographi
      0.625390
      1479.000000
      2752.000000
      0.160951
      1438.255193
      3008.032247
    
    
      Philologi
      0.288283
      29787.111111
      260571.000000
      0.130476
      48661.146589
      554261.013121
    
    
      Iambici
      0.766205
      423.500000
      610.500000
      0.268881
      658.157656
      966.676205
    
    
      Poetae
      0.571527
      2542.923077
      8495.230769
      0.350696
      5088.694101
      23588.759892
    
    
      Lexicographi
      0.450925
      65895.750000
      265077.250000
      0.348227
      46740.193477
      235698.556551
    
    
      Medici
      0.339495
      14082.105263
      132242.078947
      0.234760
      26032.439766
      414574.020743
    
    
      Scriptores Ecclesiastici
      0.320798
      19739.913793
      203310.413793
      0.190431
      29989.263181
      587654.370329
    
    
      Periegetae
      0.423661
      5329.444444
      28730.000000
      0.143773
      10078.459953
      71070.642007
    
    
      Paradoxographi
      0.552688
      1278.000000
      3216.444444
      0.168156
      1403.297545
      4577.134341
    
    
      Scriptores Erotici
      0.403190
      6774.875000
      24547.375000
      0.180467
      6261.752001
      26112.565306
    
    
      Tragici
      0.618693
      1114.670588
      4161.788235
      0.421090
      5230.739016
      22768.237306
    
    
      Poetae Didactici
      0.735484
      228.000000
      310.000000
      0.000000
      0.000000
      0.000000
    
    
      Onirocritici
      0.159864
      7283.500000
      38698.500000
      0.060021
      8166.376216
      36553.885053
    
    
      Poetae Medici
      0.712189
      374.750000
      532.000000
      0.168446
      307.509756
      397.985762
    
    
      Epici/-ae
      0.590176
      2118.121212
      8508.848485
      0.338575
      5666.842951
      31548.935172
    
    
      Hymnographi
      0.196996
      24999.000000
      126901.000000
      0.000000
      0.000000
      0.000000
    
    
      Alchemistae
      0.483094
      1223.363636
      4077.227273
      0.173517
      1752.245268
      8089.548342
    
    
      Geometri
      0.046397
      4100.750000
      113808.000000
      0.017134
      1907.860473
      97741.439022
    
    
      Biographi
      0.348997
      18505.444444
      139125.222222
      0.162890
      36628.988589
      337874.199814
    
    
      Doxographi
      0.243327
      4847.000000
      20857.000000
      0.042671
      1711.198410
      10690.040318
    
    
      Bucolici
      0.531834
      4239.666667
      9096.000000
      0.082617
      4614.311686
      10974.216555
    
    
      Hagiographi
      0.141351
      81563.000000
      577026.000000
      0.000000
      0.000000
      0.000000
    
    
      Tactici
      0.281132
      4335.166667
      17298.000000
      0.058981
      2317.036333
      14257.208254
    
    
      Mathematici
      0.236536
      7417.222222
      100291.222222
      0.256074
      9056.485215
      122502.948398
    
    
      Chronographi
      0.192454
      26193.166667
      167315.333333
      0.071100
      16527.893289
      128769.993153
    
    
      Mythographi
      0.443508
      2916.000000
      8067.428571
      0.104100
      2726.469206
      9161.463363
    
    
      Comici
      0.516358
      596.108108
      2135.952703
      0.227121
      2589.913029
      11381.961101
    
    
      Epigrammatici/-ae
      0.848767
      762.565217
      2778.913043
      0.171386
      2912.872900
      12372.339754
    
    
      Polyhistorici
      0.346773
      3939.000000
      11359.000000
      0.000000
      0.000000
      0.000000
    
    
      Gnostici
      0.614901
      431.500000
      1032.000000
      0.287483
      572.049386
      1412.799349
    
    
      Musici
      0.408018
      3010.600000
      12335.200000
      0.238249
      3574.062716
      14978.744230
    
    
      Parodii
      0.601424
      474.400000
      1370.800000
      0.288812
      623.185606
      2010.235111
    
    
      Geographi
      0.462307
      6882.454545
      38415.454545
      0.261820
      13451.501280
      86587.893764
    
    
      Paroemiographi
      0.365210
      8053.000000
      22930.600000
      0.066440
      5665.462735
      17919.046244
    
    
      Historici/-ae
      0.602783
      3234.304615
      18369.858462
      0.211498
      8736.966947
      67096.250989
    
    
      Poetae Philosophi
      0.611947
      1407.625000
      4307.875000
      0.294349
      2199.737837
      7185.835003
    
    
      Atticistae
      0.405947
      6447.500000
      17408.250000
      0.073737
      3113.030731
      11418.418407
    
    
      Lyrici/-ae
      0.747655
      727.722222
      1221.240741
      0.281021
      2007.501135
      4085.507058
    
    
      Rhetorici
      0.402806
      8529.788462
      59919.942308
      0.235828
      14684.762275
      139601.475648
    
    
      Epistolographi
      0.318405
      5774.000000
      24930.400000
      0.174073
      7477.122608
      40838.603959
    
    
      Grammatici
      0.438367
      5341.627119
      23759.118644
      0.195180
      7737.724603
      52972.009684
    
    
      Apologetici
      0.436244
      3726.333333
      14997.111111
      0.196627
      4017.810722
      22442.318037
    
    
      Theologici
      0.205687
      31959.863636
      337317.227273
      0.128393
      33977.534250
      514073.718735
    
    
      Philosophici/-ae
      0.475591
      5046.333333
      51075.875556
      0.244033
      11373.068975
      179693.374402
    
    
      Astronomici
      0.156052
      2519.909091
      17452.272727
      0.099624
      1804.452574
      14411.656068
    
    
      Gnomici
      0.607232
      633.333333
      994.666667
      0.073960
      445.176744
      607.396356
    
    
      Scriptores Fabularum
      0.340336
      10083.500000
      39241.500000
      0.162788
      7292.392234
      40196.899190
    
    
      Oratores
      0.284083
      8986.250000
      54713.083333
      0.143063
      8894.274370
      83232.854015
    
    
      Elegiaci
      0.782941
      300.481481
      601.370370
      0.287413
      766.572510
      1892.152969
    
    
      Astrologici
      0.266732
      6993.571429
      40658.071429
      0.115121
      6760.877614
      49684.965436
    
    
      NaN
      0.526250
      5970.086826
      34036.125749
      0.254274
      14189.938299
      108466.063420
    
    
      Sophistae
      0.421232
      9728.774194
      47278.129032
      0.181893
      15342.648830
      92375.268958
    
    
      Mechanici
      0.309233
      3668.428571
      27850.714286
      0.119089
      4857.279000
      58272.680479
    
    
      Choliambographi
      1.000000
      19.000000
      19.000000
      0.000000
      0.000000
      0.000000



In [28]:

    
df_epithet_scores.to_csv(os.path.expanduser('~/cltk_data/user_data/stats_epithet.csv'))



In [ ]:

	epithet	lexical_diversity	name	word_count_all	word_count_unique
0001	Epici/-ae	0.34366	Apollonius Rhodius Epic.	39155	13456
0002	Elegiaci	0.398142	Theognis Eleg.	9798	3901
0003	Historici/-ae	0.150405	Thucydides Hist.	150427	22625
0004	Biographi	0.222551	Diogenes Laertius Biogr.	110977	24698
0005	Bucolici	0.439063	Theocritus Bucol.	21719	9536
0006	Tragici	0.19995	Euripides Trag.	184076	36806
0007	Biographi	0.110099	Plutarchus Biogr. et Phil.	1034650	113914
0008	Sophistae	0.151358	Athenaeus Soph.	394588	59724
0009	Lyrici/-ae	0.674388	Sappho Lyr.	3756	2533
0010	Oratores	0.139739	Isocrates Orat.	120603	16853
0011	Tragici	0.288234	Sophocles Trag.	73423	21163
0012	Epici/-ae	0.155017	Homerus Epic., Homer	199617	30944
0013	None	0.393614	Hymni Homerici, Homeric Hymns	16036	6312
0014	Oratores	0.112017	Demosthenes Orat.	297535	33329
0015	Historici/-ae	0.255029	Herodianus Hist.	46928	11968
0016	Historici/-ae	0.158366	Herodotus Hist.	185779	29421
0017	Oratores	0.193321	Isaeus Orat.	32878	6356
0018	Philosophici/-ae	0.139545	Philo Judaeus Phil.	449869	62777
0019	Comici	0.244177	Aristophanes Comic.	112787	27540
0020	Epici/-ae	0.390019	Hesiodus Epic.	25168	9816
0022	Epici/-ae	0.517786	Nicander Epic.	11976	6201
0023	Epici/-ae	0.398901	Oppianus Epic.	22755	9077
0024	Epici/-ae	0.491281	Oppianus Epic.	13477	6621
0026	Oratores	0.232688	Aeschines Orat.	48984	11398
0027	Oratores	0.27413	Andocides Orat.	17623	4831
0028	Oratores	0.254137	Antiphon Orat.	21390	5436
0029	Oratores	0.302597	Dinarchus Orat.	17710	5359
0030	Oratores	0.330946	Hyperides Orat.	16894	5591
0031	None	0.134781	Novum Testamentum, New Testament	138907	18722
0032	Historici/-ae	0.127116	Xenophon Hist.	317174	40318
...	...	...	...	...	...
5031	None	0.321937	Scholia In Nicandrum	51100	16451
5032	None	0.364637	Scholia In Oppianum	70108	25564
5033	None	0.57196	Scholia In Pausaniam	806	461
5034	None	0.143688	Scholia In Pindarum	281729	40481
5035	None	0.250535	Scholia In Platonem	64482	16155
5037	None	0.186222	Scholia In Sophoclem	160631	29913
5038	None	0.291173	Scholia In Theocritum	49565	14432
5039	None	0.219468	Scholia In Thucydidem	93649	20553
5040	None	0.474512	Scholia In Xenophontem	4198	1992
5045	Rhetorici	0.310473	Anonymi In Aphthonium Rhet.	4316	1340
5046	None	0.467742	Scholia In Theonem Rhetorem	1178	551
5048	None	0.444405	Scholia In Clementem Alexandrinum	11233	4992
5052	None	0.238008	Scholia in Maximum Confessorem	20701	4927
7000	None	0.311176	Anthologia Graeca, AG	133198	41448
7051	None	0.181684	Doctrina Patrum	63682	11570
7052	None	0.355126	Anthologiae Graecae Appendix	77367	27475
9003	Lexicographi	0.952381	Anonymus Lexicographus Lexicogr.	42	40
9004	None	0.134826	Anonymi In Aristotelis Librum Alterum Analytic...	17111	2307
9006	Paroemiographi	0.281114	Gregorius Paroemiogr.	10839	3047
9007	None	0.466233	Appendix Proverbiorum	7848	3659
9008	Paroemiographi	0.452399	Macarius Chrysocephalus Paroemiogr.	7794	3526
9009	Paroemiographi	0.32256	Michael Apostolius Paroemiogr.	52970	17086
9010	None	0.189172	Suda, Suidas	617130	116744
9012	Poetae	0.381793	Ignatius Biogr. et Poeta	20223	7721
9018	Paroemiographi	0.398801	Arsenius Paroemiogr.	22693	9050
9019	Philosophici/-ae	0.130852	Stephanus Phil.	25823	3379
9020	Grammatici	0.287102	Stephanus Gramm.	19871	5705
9021	Alchemistae	0.287719	Stephanus Alchem.	16815	4838
9022	Poetae	0.385695	Joannes Tzetzes Gramm. et Poeta	6767	2610
9023	Philologi	0.280107	Thomas Magister Philol.	45704	12802

	0	1
0	Mechanici	0.309233
1	Mimographi	0.625390
2	Hymnographi	0.196996
3	Philologi	0.288283
4	Iambici	0.766205
5	Poetae	0.571527
6	Medici	0.339495
7	Scriptores Ecclesiastici	0.320798
8	Periegetae	0.423661
9	Paradoxographi	0.552688
10	Alchemistae	0.483094
11	Tragici	0.618693
12	Poetae Didactici	0.735484
13	Onirocritici	0.159864
14	Poetae Medici	0.712189
15	Epici/-ae	0.590176
16	Elegiaci	0.782941
17	Scriptores Erotici	0.403190
18	Geometri	0.046397
19	Philosophici/-ae	0.475591
20	Biographi	0.348997
21	Doxographi	0.243327
22	Hagiographi	0.141351
23	Tactici	0.281132
24	Astrologici	0.266732
25	Chronographi	0.192454
26	Mythographi	0.443508
27	Oratores	0.284083
28	Comici	0.516358
29	Epigrammatici/-ae	0.848767
30	Gnostici	0.614901
31	Musici	0.408018
32	Parodii	0.601424
33	Historici/-ae	0.602783
34	Poetae Philosophi	0.611947
35	Atticistae	0.405947
36	Lyrici/-ae	0.747655
37	Rhetorici	0.402806
38	Scriptores Fabularum	0.340336
39	Epistolographi	0.318405
40	Grammatici	0.438367
41	Apologetici	0.436244
42	Theologici	0.205687
43	Polyhistorici	0.346773
44	Astronomici	0.156052
45	Gnomici	0.607232
46	Bucolici	0.531834
47	Geographi	0.462307
48	Paroemiographi	0.365210
49	Mathematici	0.236536
50	None	0.526250
51	Sophistae	0.421232
52	Lexicographi	0.450925
53	Choliambographi	1.000000

	mean_of_lexical_diversity_per_author	mean_of_unique_word_counts_per_author	mean_of_word_counts_ per_author	standard_deviation_of_lexical_diversity	standard_deviation_of_unique_word_counts_per_author	standard_deviation_of_word_counts_per_author
Mimographi	0.625390	1479.000000	2752.000000	0.160951	1438.255193	3008.032247
Philologi	0.288283	29787.111111	260571.000000	0.130476	48661.146589	554261.013121
Iambici	0.766205	423.500000	610.500000	0.268881	658.157656	966.676205
Poetae	0.571527	2542.923077	8495.230769	0.350696	5088.694101	23588.759892
Lexicographi	0.450925	65895.750000	265077.250000	0.348227	46740.193477	235698.556551
Medici	0.339495	14082.105263	132242.078947	0.234760	26032.439766	414574.020743
Scriptores Ecclesiastici	0.320798	19739.913793	203310.413793	0.190431	29989.263181	587654.370329
Periegetae	0.423661	5329.444444	28730.000000	0.143773	10078.459953	71070.642007
Paradoxographi	0.552688	1278.000000	3216.444444	0.168156	1403.297545	4577.134341
Scriptores Erotici	0.403190	6774.875000	24547.375000	0.180467	6261.752001	26112.565306
Tragici	0.618693	1114.670588	4161.788235	0.421090	5230.739016	22768.237306
Poetae Didactici	0.735484	228.000000	310.000000	0.000000	0.000000	0.000000
Onirocritici	0.159864	7283.500000	38698.500000	0.060021	8166.376216	36553.885053
Poetae Medici	0.712189	374.750000	532.000000	0.168446	307.509756	397.985762
Epici/-ae	0.590176	2118.121212	8508.848485	0.338575	5666.842951	31548.935172
Hymnographi	0.196996	24999.000000	126901.000000	0.000000	0.000000	0.000000
Alchemistae	0.483094	1223.363636	4077.227273	0.173517	1752.245268	8089.548342
Geometri	0.046397	4100.750000	113808.000000	0.017134	1907.860473	97741.439022
Biographi	0.348997	18505.444444	139125.222222	0.162890	36628.988589	337874.199814
Doxographi	0.243327	4847.000000	20857.000000	0.042671	1711.198410	10690.040318
Bucolici	0.531834	4239.666667	9096.000000	0.082617	4614.311686	10974.216555
Hagiographi	0.141351	81563.000000	577026.000000	0.000000	0.000000	0.000000
Tactici	0.281132	4335.166667	17298.000000	0.058981	2317.036333	14257.208254
Mathematici	0.236536	7417.222222	100291.222222	0.256074	9056.485215	122502.948398
Chronographi	0.192454	26193.166667	167315.333333	0.071100	16527.893289	128769.993153
Mythographi	0.443508	2916.000000	8067.428571	0.104100	2726.469206	9161.463363
Comici	0.516358	596.108108	2135.952703	0.227121	2589.913029	11381.961101
Epigrammatici/-ae	0.848767	762.565217	2778.913043	0.171386	2912.872900	12372.339754
Polyhistorici	0.346773	3939.000000	11359.000000	0.000000	0.000000	0.000000
Gnostici	0.614901	431.500000	1032.000000	0.287483	572.049386	1412.799349
Musici	0.408018	3010.600000	12335.200000	0.238249	3574.062716	14978.744230
Parodii	0.601424	474.400000	1370.800000	0.288812	623.185606	2010.235111
Geographi	0.462307	6882.454545	38415.454545	0.261820	13451.501280	86587.893764
Paroemiographi	0.365210	8053.000000	22930.600000	0.066440	5665.462735	17919.046244
Historici/-ae	0.602783	3234.304615	18369.858462	0.211498	8736.966947	67096.250989
Poetae Philosophi	0.611947	1407.625000	4307.875000	0.294349	2199.737837	7185.835003
Atticistae	0.405947	6447.500000	17408.250000	0.073737	3113.030731	11418.418407
Lyrici/-ae	0.747655	727.722222	1221.240741	0.281021	2007.501135	4085.507058
Rhetorici	0.402806	8529.788462	59919.942308	0.235828	14684.762275	139601.475648
Epistolographi	0.318405	5774.000000	24930.400000	0.174073	7477.122608	40838.603959
Grammatici	0.438367	5341.627119	23759.118644	0.195180	7737.724603	52972.009684
Apologetici	0.436244	3726.333333	14997.111111	0.196627	4017.810722	22442.318037
Theologici	0.205687	31959.863636	337317.227273	0.128393	33977.534250	514073.718735
Philosophici/-ae	0.475591	5046.333333	51075.875556	0.244033	11373.068975	179693.374402
Astronomici	0.156052	2519.909091	17452.272727	0.099624	1804.452574	14411.656068
Gnomici	0.607232	633.333333	994.666667	0.073960	445.176744	607.396356
Scriptores Fabularum	0.340336	10083.500000	39241.500000	0.162788	7292.392234	40196.899190
Oratores	0.284083	8986.250000	54713.083333	0.143063	8894.274370	83232.854015
Elegiaci	0.782941	300.481481	601.370370	0.287413	766.572510	1892.152969
Astrologici	0.266732	6993.571429	40658.071429	0.115121	6760.877614	49684.965436
NaN	0.526250	5970.086826	34036.125749	0.254274	14189.938299	108466.063420
Sophistae	0.421232	9728.774194	47278.129032	0.181893	15342.648830	92375.268958
Mechanici	0.309233	3668.428571	27850.714286	0.119089	4857.279000	58272.680479
Choliambographi	1.000000	19.000000	19.000000	0.000000	0.000000	0.000000