In [13]:
import codecs
import json
from cjklib.reading import ReadingFactory
rf = ReadingFactory()
# Load CEDICT
f = codecs.open('lib/cedict_1_0_ts_utf-8_mdbg.txt', encoding='utf-8')
Cl = {}
for line in f:
d = line.split(u'/')
for f in d:
if f[0:10] == u'classifier':
c = line.split(' ')[1]
if c not in Cl:
Cl[c] = []
Cl[c] += [ f ]
with codecs.open('dist/classifierDescriptions.txt', 'w', encoding='utf-8') as f:
for k,v in Cl.iteritems():
f.write(u'{}\t{}\n'.format(k, '; '.join(v)))
In [ ]: