In [1]:
from tf.fabric import Fabric

ETCBC = 'hebrew/etcbc4c'
TF = Fabric( modules=ETCBC, silent=False )

api = TF.load('''
    book chapter verse
    otype lex_utf8 
    pdp 
    gn nu ps st vs vt 
    function typ gloss
''')


This is Text-Fabric 2.3.7
Api reference : https://github.com/ETCBC/text-fabric/wiki/Api
Tutorial      : https://github.com/ETCBC/text-fabric/blob/master/docs/tutorial.ipynb
Data sources  : https://github.com/ETCBC/text-fabric-data
Data docs     : https://etcbc.github.io/text-fabric-data
Shebanq docs  : https://shebanq.ancient-data.org/text
Slack team    : https://shebanq.slack.com/signup
Questions? Ask shebanq@ancient-data.org for an invite to Slack
111 features found and 0 ignored
  0.00s loading features ...
   |     0.03s B otype                from /home/kungsik/github/text-fabric-data/hebrew/etcbc4c
   |     7.66s T oslots               from /home/kungsik/github/text-fabric-data/hebrew/etcbc4c
   |     0.01s B book                 from /home/kungsik/github/text-fabric-data/hebrew/etcbc4c
   |     0.01s B chapter              from /home/kungsik/github/text-fabric-data/hebrew/etcbc4c
   |     0.00s B verse                from /home/kungsik/github/text-fabric-data/hebrew/etcbc4c
   |     0.14s B lex_utf8             from /home/kungsik/github/text-fabric-data/hebrew/etcbc4c
   |      |     0.85s C __levels__           from otype, oslots
   |      |       12s C __order__            from otype, oslots, __levels__
   |      |     0.66s C __rank__             from otype, __order__
   |      |       16s C __levUp__            from otype, oslots, __rank__
   |      |     8.88s C __levDown__          from otype, __levUp__, __rank__
   |      |     3.28s C __boundary__         from otype, oslots, __rank__
   |     0.00s M otext                from /home/kungsik/github/text-fabric-data/hebrew/etcbc4c
   |      |     0.10s C __sections__         from otype, oslots, otext, __levUp__, __levels__, book, chapter, verse
   |     1.04s T pdp                  from /home/kungsik/github/text-fabric-data/hebrew/etcbc4c
   |     0.09s B gn                   from /home/kungsik/github/text-fabric-data/hebrew/etcbc4c
   |     0.11s B nu                   from /home/kungsik/github/text-fabric-data/hebrew/etcbc4c
   |     0.09s B ps                   from /home/kungsik/github/text-fabric-data/hebrew/etcbc4c
   |     0.09s B st                   from /home/kungsik/github/text-fabric-data/hebrew/etcbc4c
   |     0.10s B vs                   from /home/kungsik/github/text-fabric-data/hebrew/etcbc4c
   |     0.10s B vt                   from /home/kungsik/github/text-fabric-data/hebrew/etcbc4c
   |     0.63s T function             from /home/kungsik/github/text-fabric-data/hebrew/etcbc4c
   |     0.17s B typ                  from /home/kungsik/github/text-fabric-data/hebrew/etcbc4c
   |     0.01s B gloss                from /home/kungsik/github/text-fabric-data/hebrew/etcbc4c
   |     0.00s Feature overview: 104 for nodes; 5 for edges; 2 configs; 7 computed
    54s All features loaded/computed - for details use loadLog()

In [2]:
api.makeAvailableIn(globals())

In [39]:
chpNode = T.nodeFromSection(('Genesis',))
chpNode


Out[39]:
1367534

In [2]:
#node: 범위값, synType: clause, phrase, word, feat: 문법요소, num: 상위 몇개까지 줄력할지.
def featureStat(node, synType, feat, num):
    statNode = L.d(node, otype = synType)
    statType = {}
    
    for n in statNode:
        if feat == 'lex_utf8': sType = F.lex_utf8.v(L.u(n, otype='lex')[0])
        elif feat == 'pdp': sType = F.pdp.v(n)
        elif feat == 'gn': sType = F.gn.v(n)
        elif feat == 'nu': sType = F.nu.v(n)
        elif feat == 'ps': sType = F.ps.v(n)
        elif feat == 'st': sType = F.st.v(n)
        elif feat == 'vs': sType = F.vs.v(n)
        elif feat == 'vt': sType = F.vt.v(n)
        elif feat == 'function': sType = F.function.v(n)
        elif feat == 'typ': sType = F.typ.v(n)
        elif feat == 'gloss': sType = F.gloss.v(L.u(n, otype='lex')[0])
        
        #단어와 관련된 통계는 유의미하지 않은 요소 제거
        if feat == 'lex_utf8' or feat == 'gloss':
            if F.pdp.v(n) == 'prep': continue
            elif F.pdp.v(n)  == 'conj': continue
            elif F.pdp.v(n) == 'art': continue
            elif F.pdp.v(n) == 'nega': continue
            
        #키값이 있으면 기존의 키 값에 1을 더하고, 키값이 없으면 새로운 키를 생성하고 1을 부여함.
        if sType in statType:
            statType[sType] = statType[sType] + 1        
        else:
            statType[sType] = 1

    sortedKey = sorted(statType, key=statType.__getitem__, reverse=True)
    
    i = 1

    for k in sortedKey:
        print(str(i) + ". " + k + ":" + str(statType[k]))
        i = i + 1
        if i > num: break

In [3]:
featureStat(chpNode, "phrase", "function", 10)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-3-95494711dbf8> in <module>()
----> 1 featureStat(chpNode, "phrase", "function", 10)

NameError: name 'chpNode' is not defined

In [84]:
featureStat(chpNode, "word", "lex_utf8", 10)


1. אמר:607
2. בנ:365
3. כל:342
4. היה:316
5. ארצ:311
6. שׁמ:228
7. אלהימ:219
8. בוא:217
9. אב:208
10. ילד:189

In [91]:
#전치사 사용 통계
def prepStat(node, num):
    statNode = L.d(node, otype = 'word')
    prep = {}
    for n in statNode:
        if F.pdp.v(n) == 'prep':
            sPrep = F.lex_utf8.v(L.u(n, otype='lex')[0])
            if sPrep in prep:
                prep[sPrep] = prep[sPrep] + 1        
            else:
                prep[sPrep] = 1
    
    sortedKey = sorted(prep, key=prep.__getitem__, reverse=True)
    
    i = 1

    for k in sortedKey:
        print(str(i) + ". " + k + ":" + str(prep[k]))
        i = i + 1
        if i > num: break

In [92]:
prepStat(chpNode, 10)


1. ל:1335
2. את:1147
3. ב:825
4. אל:475
5. מנ:433
6. על:304
7. כ:141
8. עמ:83
9. אחר:77
10. בינ:75

In [ ]: