notebook.community

Edit and run



In [13]:

    
import codecs
import json

from cjklib.reading import ReadingFactory
rf = ReadingFactory()



# Load CEDICT
f = codecs.open('lib/cedict_1_0_ts_utf-8_mdbg.txt', encoding='utf-8')
Cl = {}

for line in f:    
    d = line.split(u'/')
    for f in d:
        if f[0:10] == u'classifier':
            c = line.split(' ')[1]
            
            if c not in Cl:
                Cl[c] = []
            
            Cl[c] += [ f ]

            
with codecs.open('dist/classifierDescriptions.txt', 'w', encoding='utf-8') as f:
    for k,v in Cl.iteritems():
        f.write(u'{}\t{}\n'.format(k, '; '.join(v)))



In [ ]: