notebook.community

Edit and run



In [1]:

    
from tf.fabric import Fabric

ETCBC = 'hebrew/etcbc4c'
TF = Fabric( modules=ETCBC, silent=False )

api = TF.load('''
    book chapter verse
    otype lex_utf8 
    pdp 
    gn nu ps st vs vt 
    function typ gloss
''')









    



This is Text-Fabric 2.3.7
Api reference : https://github.com/ETCBC/text-fabric/wiki/Api
Tutorial      : https://github.com/ETCBC/text-fabric/blob/master/docs/tutorial.ipynb
Data sources  : https://github.com/ETCBC/text-fabric-data
Data docs     : https://etcbc.github.io/text-fabric-data
Shebanq docs  : https://shebanq.ancient-data.org/text
Slack team    : https://shebanq.slack.com/signup
Questions? Ask shebanq@ancient-data.org for an invite to Slack
111 features found and 0 ignored
  0.00s loading features ...
   |     0.03s B otype                from /home/kungsik/github/text-fabric-data/hebrew/etcbc4c
   |     0.01s B book                 from /home/kungsik/github/text-fabric-data/hebrew/etcbc4c
   |     0.01s B chapter              from /home/kungsik/github/text-fabric-data/hebrew/etcbc4c
   |     0.00s B verse                from /home/kungsik/github/text-fabric-data/hebrew/etcbc4c
   |     0.14s B lex_utf8             from /home/kungsik/github/text-fabric-data/hebrew/etcbc4c
   |     0.10s B pdp                  from /home/kungsik/github/text-fabric-data/hebrew/etcbc4c
   |     0.09s B gn                   from /home/kungsik/github/text-fabric-data/hebrew/etcbc4c
   |     0.09s B nu                   from /home/kungsik/github/text-fabric-data/hebrew/etcbc4c
   |     0.09s B ps                   from /home/kungsik/github/text-fabric-data/hebrew/etcbc4c
   |     0.08s B st                   from /home/kungsik/github/text-fabric-data/hebrew/etcbc4c
   |     0.10s B vs                   from /home/kungsik/github/text-fabric-data/hebrew/etcbc4c
   |     0.10s B vt                   from /home/kungsik/github/text-fabric-data/hebrew/etcbc4c
   |     0.06s B function             from /home/kungsik/github/text-fabric-data/hebrew/etcbc4c
   |     0.16s B typ                  from /home/kungsik/github/text-fabric-data/hebrew/etcbc4c
   |     0.02s B gloss                from /home/kungsik/github/text-fabric-data/hebrew/etcbc4c
   |     0.01s Feature overview: 104 for nodes; 5 for edges; 2 configs; 7 computed
  5.03s All features loaded/computed - for details use loadLog()



In [2]:

    
api.makeAvailableIn(globals())



In [33]:

    
def countArgument(node):
    countArg = 0
    countArgDic = {}
    for verseNode in node:
        clauseNode = L.d(verseNode, otype = "clause")        
        for cNode in clauseNode:
            if F.typ.v(cNode) == 'InfC': continue        
                
            phraseNode = L.d(cNode, otype = "phrase")
            for pNode in phraseNode:
                if F.pdp.v(pNode) != 'conj' and F.function.v(pNode) != 'Pred' and F.function.v(pNode) != 'Nega':
                    countArg = countArg + 1
            if countArg in countArgDic:
                countArgDic[countArg] = countArgDic[countArg] + 1
            else:
                countArgDic[countArg] = 1
            countArg = 0
    
    return countArgDic



In [34]:

    
countArgument(nodeList)









    Out[34]:





{0: 1, 1: 3, 2: 8, 3: 7, 4: 3, 5: 1}



In [68]:

    
import collections



In [203]:

    
#1001001-1002001; 2001001-2002001 ==> Genesis, 1, 1 ~ Genesis, 2, 1; + Exodus ... 
def codetorange(code):
    bookList = ["null", "Genesis", "Exodus", "Leviticus", "Numbers", "Deuteronomy", "Joshua", "Judges", 
            "1_Samuel", "2_Samuel", "1_Kings", "2_Kings", "Isaiah", "Jeremiah", "Ezekiel", 
            "Hosea", "Joel", "Amos", "Obadiah", "Jonah", "Micah", "Nahum", "Habakkuk", "Zephaniah", 
            "Haggai", "Zechariah", "Malachi", "Psalms", "Job", "Proverbs", "Ruth", "Song_of_songs", 
            "Ecclesiastes", "Lamentations", "Esther", "Daniel", "Ezra", "Nehemiah", "1_Chronicles", 
            "2_Chronicles"]   
    code = code.replace(" ", "")
    codeSplit1 = code.split(';')
    nodeList = []
    last = ''
    for c1 in codeSplit1:
        i = 0
        codeSplit2 = c1.split('-')
        for c2 in codeSplit2:
            #book
            if len(c2) == 7: 
                bookCodeList = int(c2[0])
                bookCode = bookList[bookCodeList]
            elif len(c2) == 8:
                bookCodeList = int(c2[0] + c2[1])
                bookCode = bookList[bookCodeList]
            #chapter
            chpCode = c2[-6] +  c2[-5] +  c2[-4]
            chpCode = int(chpCode)
            #verse
            verseCode = c2[-3] +  c2[-2] +  c2[-1]
            verseCode = int(verseCode)
            
            if i == 0:
                first = T.nodeFromSection((bookCode, chpCode, verseCode))
                i = 1
            else:
                last = T.nodeFromSection((bookCode, chpCode, verseCode))
        if(last):
            for n in range(first, last + 1):
                nodeList.append(n)
        else:
            nodeList.append(first)
    return nodeList



In [205]:

    
code = "1001001; 2001001"
codetorange(code)









    Out[205]:





[1413682, 1415215]



In [206]:

    
def featureStat1(node, synType, feat, num):
    statType = {}
    for verseNode in node:
        statNode = L.d(verseNode, otype = synType)
        for n in statNode:
            if feat == 'lex_utf8': sType = F.lex_utf8.v(L.u(n, otype='lex')[0])
            elif feat == 'pdp': sType = F.pdp.v(n)
            elif feat == 'psgnnu':
                sType = F.ps.v(n) + "-" + F.gn.v(n) + "-" + F.nu.v(n)
                if sType == 'NA-NA-NA': continue
                sType = sType.replace("NA-", "")
                sType = sType.replace("NA", "")
                sType = sType.replace("unknown-", "")
                sType = sType.replace("unknown", "")
                if sType == '': continue
            elif feat == 'st': sType = F.st.v(n)
            elif feat == 'vs': sType = F.vs.v(n)
            elif feat == 'vt': sType = F.vt.v(n)
            elif feat == 'function': sType = F.function.v(n)
            elif feat == 'typ': sType = F.typ.v(n)
            elif feat == 'gloss': sType = F.gloss.v(L.u(n, otype='lex')[0])
            #단어와 관련된 통계는 유의미하지 않은 요소 제거
            if feat == 'lex_utf8' or feat == 'gloss':
                if F.pdp.v(n) == 'prep': continue
                elif F.pdp.v(n)  == 'conj': continue
                elif F.pdp.v(n) == 'art': continue
                elif F.pdp.v(n) == 'nega': continue
            #키값이 있으면 기존의 키 값에 1을 더하고, 키값이 없으면 새로운 키를 생성하고 1을 부여함.
            if sType in statType:
                statType[sType] = statType[sType] + 1
            else:
                statType[sType] = 1
    sortedKey = sorted(statType, key=statType.__getitem__, reverse=True)
    i = 1
    result = collections.OrderedDict({})
    for k in sortedKey:
        result[k] = str(statType[k])
        i = i + 1
        if i > num: break
    return result



In [ ]:

    
def featureStat1(node, synType, feat, num):
    statType = {}
    for verseNode in node:
        statNode = L.d(verseNode, otype = synType)
        for n in statNode:
            if feat == 'lex_utf8': sType = F.lex_utf8.v(L.u(n, otype='lex')[0])
            elif feat == 'pdp': sType = F.pdp.v(n)
            elif feat == 'psgnnu':
                sType = F.ps.v(n) + "-" + F.gn.v(n) + "-" + F.nu.v(n)
                if sType == 'NA-NA-NA': continue
                sType = sType.replace("NA-", "")
                sType = sType.replace("NA", "")
                sType = sType.replace("unknown-", "")
                sType = sType.replace("unknown", "")
                if sType == '': continue
            elif feat == 'st': sType = F.st.v(n)
            elif feat == 'vs': sType = F.vs.v(n)
            elif feat == 'vt': sType = F.vt.v(n)
            elif feat == 'function': sType = F.function.v(n)
            elif feat == 'typ': sType = F.typ.v(n)
            elif feat == 'gloss': sType = F.gloss.v(L.u(n, otype='lex')[0])
            #단어와 관련된 통계는 유의미하지 않은 요소 제거
            if feat == 'lex_utf8' or feat == 'gloss':
                if F.pdp.v(n) == 'prep': continue
                elif F.pdp.v(n)  == 'conj': continue
                elif F.pdp.v(n) == 'art': continue
                elif F.pdp.v(n) == 'nega': continue
            #키값이 있으면 기존의 키 값에 1을 더하고, 키값이 없으면 새로운 키를 생성하고 1을 부여함.
            if sType in statType:
                statType[sType] = statType[sType] + 1
            else:
                statType[sType] = 1
    sortedKey = sorted(statType, key=statType.__getitem__, reverse=True)
    i = 1
    result = collections.OrderedDict({})
    for k in sortedKey:
        result[k] = str(statType[k])
        i = i + 1
        if i > num: break
    return result