In [71]:
import os
import json
from pprint import pprint
In [72]:
with open('dictionary.json') as f:
chunks = json.load(f)
with open('classes_indices.json') as f:
classes = json.load(f)
In [73]:
cls_order = sorted(classes.keys())
for cls in cls_order:
print(cls)
pprint(chunks[classes[cls][0]])
In [74]:
pprint(cls_order)
In [75]:
folder = 'lexicons'
if not os.path.exists(os.path.join('..', folder)):
os.mkdir(os.path.join('..', folder))
dict_file = 'nominals.lexc'
with open(os.path.join('..', folder, dict_file), 'w') as f:
f.write("""!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!! L E X I C O N !!!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
""")
f.write("""
LEXICON Nouns
""")
In [76]:
# loan phonology
russ = 'январь февраль март апрель май июнь июль август сентябрь октябрь декабрь билет бригада бригадир буква \
понедельник вторник среда четверг пятница суббота воскресенье тетрадь ңэвучитель округ пионер революция порядка \
электричество элетростанция электроаак чернилаёчгын энанвалёматпункт'.split()
for i in range(len(chunks)):
if chunks[i]['lex'] in russ:
chunks[i]['stem'][0] = chunks[i]['stem'][0]+'%{☭%}'
# for st in chunks[i]['stem']:
# if set(st)&recessive and set(st)&dominant:
# chunks[i]['stem'] = list(map(lambda x: x+'%{☭%}', chunks[i]['stem']))
# break
In [77]:
# разметить архифонемы и loan phonology
# архифонемы - Æ и G
# G в g_j, Æ везде
# Æ
recessive = set('уюи')
dominant = set('аояё')
for i in range(len(chunks)):
if not '☭' in chunks[i]['stem'][0]:
if set(chunks[i]['stem'][0])&recessive or (set(chunks[i]['stem'][1])&dominant and not set(chunks[i]['stem'][0])&dominant):
if 'э' in chunks[i]['stem'][0]:
chunks[i]['stem'][0] = chunks[i]['stem'][0].replace('э', '%{Æ%}') # 0 bcs i will take 0 as stem
elif 'ле' in chunks[i]['stem'][0]:
chunks[i]['stem'][0] = chunks[i]['stem'][0].replace('е', '%{Æ%}')
In [78]:
# G
for i in classes['first_a_g_j_class']+classes['singulative_lyng_g_j_class']:
chunks[i]['stem'][0] = chunks[i]['stem'][0][:-1]+'%{G%}'
In [79]:
# N
for i in classes['first_a_y_n_to_ng_class']+classes['first_c_ng_to_n_class']:
chunks[i]['stem'][0] = chunks[i]['stem'][0][:-2]+'%{N%}'+chunks[i]['stem'][0][-1]
In [80]:
# epenthesis
cons = 'йцкнгшщзхъждлрпвфчсмтьбңқ'
for i in range(len(chunks)):
if not '☭' in chunks[i]['stem'][0]:
if chunks[i]['stem'][0][-1] in cons and chunks[i]['stem'][0][-2] in cons:
chunks[i]['stem'][0] = chunks[i]['stem'][0][:-1]+'%{ы%}'+chunks[i]['stem'][0][-1]
In [81]:
chunks[220]
Out[81]:
In [82]:
# oops
for i in range(len(chunks)):
chunks[i]['lex'] = chunks[i]['lex'].replace('ң', 'ӈ')
chunks[i]['lex'] = chunks[i]['lex'].replace('қ', 'ӄ')
chunks[i]['lex'] = chunks[i]['lex'].replace('л', 'ԓ')
chunks[i]['lex'] = chunks[i]['lex'].replace("'", 'ʼ')
chunks[i]['stem'] = list(map(lambda x: x.replace('ң', 'ӈ'), chunks[i]['stem']))
chunks[i]['stem'] = list(map(lambda x: x.replace('қ', 'ӄ'), chunks[i]['stem']))
chunks[i]['stem'] = list(map(lambda x: x.replace('л', 'ԓ'), chunks[i]['stem']))
chunks[i]['stem'] = list(map(lambda x: x.replace("'", 'ʼ'), chunks[i]['stem']))
In [83]:
# with open('dictionary.json', 'w') as f:
# json.dump(chunks, f)
In [84]:
to_write = []
for i in classes['first_a_class']+classes['first_a_y_class']+classes['first_a_g_j_class']:
x = chunks[i]
to_write.append('{}:{} N-Ia-AATGYR ; ! {}'.format(x['lex'], x['stem'][0], x['trans_ru']))
In [85]:
for i in classes['first_a_y_n_to_ng_class']:
x = chunks[i]
to_write.append('{}:{} N-Ia-JYNYR-ABS ; ! {}'.format(x['lex'], x['stem'][2], x['trans_ru']))
to_write.append('{}:{} N-OBL ; ! {}'.format(x['lex'], x['stem'][0], x['trans_ru']))
In [86]:
for i in classes['first_b_class']:
x = chunks[i]
to_write.append('{}:{} N-Ib-ANGKY ; ! {}'.format(x['lex'], x['stem'][0], x['trans_ru']))
In [87]:
for i in classes['first_c_class']:
x = chunks[i]
to_write.append('{}:{} N-Ic-AJKOL ; ! {}'.format(x['lex'], x['stem'][0], x['trans_ru']))
to_write.append('\n')
In [88]:
for i in classes['first_c_ng_to_n_class']:
x = chunks[i]
to_write.append('{}:{} N-Ic-EJNETKUNEN-ABS ; ! {}'.format(x['lex'], x['stem'][2], x['trans_ru']))
to_write.append('{}:{} N-OBL ; ! {}'.format(x['lex'], x['stem'][0], x['trans_ru']))
to_write.append('\n')
In [89]:
for i in classes['second_a_full_redup_class']:
x = chunks[i]
to_write.append('{}:{} N-IIa-VIIL-ABS ; ! {}'.format(x['lex'], x['stem'][2], x['trans_ru']))
to_write.append('{}:{} N-OBL ; ! {}'.format(x['lex'], x['stem'][0], x['trans_ru']))
In [90]:
for i in classes['second_b_redup_wo_last_letter_class']:
x = chunks[i]
to_write.append('{}:{} N-IIb-VAJP-ABS ; ! {}'.format(x['lex'], x['stem'][2], x['trans_ru']))
to_write.append('{}:{} N-OBL ; ! {}'.format(x['lex'], x['stem'][0], x['trans_ru']))
to_write.append('\n')
In [91]:
for i in classes['third_class']:
x = chunks[i]
to_write.append('{}:{} N-III-AVYNRAL ; ! {}'.format(x['lex'], x['stem'][0], x['trans_ru']))
for i in classes['third_false_lgyn_class']:
x = chunks[i]
to_write.append('{}:{}ԓг N-III-AVYNRAL ; ! {}'.format(x['lex'], x['stem'][0], x['trans_ru']))
to_write.append('\n')
In [92]:
for i in classes['fourth_class']:
x = chunks[i]
to_write.append('{}:{} N-IV-ARAPA ; ! {}'.format(x['lex'], x['stem'][0], x['trans_ru']))
for i in classes['fourth_or_class']:
x = chunks[i]
to_write.append('{}:{} N-IV-QORA-ABS ; ! {}'.format(x['lex'], x['stem'][2], x['trans_ru']))
to_write.append('{}:{} N-OBL ; ! {}'.format(x['lex'], x['stem'][0], x['trans_ru']))
to_write.append('\n')
In [93]:
for i in classes['five_v_class']:
x = chunks[i]
to_write.append('{}:{} N-Vv-AVEEN ; ! {}'.format(x['lex'], x['stem'][0][:-1], x['trans_ru']))
for i in classes['five_v_reduced_class']:
x = chunks[i]
to_write.append('{}:{} N-Vvy-AGNOTVAN ; ! {}'.format(x['lex'], x['stem'][0][:-1], x['trans_ru']))
to_write.append('\n')
In [94]:
for i in classes['singulative_lgyn_class']:
x = chunks[i]
to_write.append('{}:{} N-SING-AJOPYCH ; ! {}'.format(x['lex'], x['stem'][0], x['trans_ru']))
to_write.append('\n')
In [95]:
for i in classes['singulative_lyng_class']+classes['singulative_lyng_g_j_class']:
x = chunks[i]
to_write.append('{}:{} N-SING-EPEEPEG ; ! {}'.format(x['lex'], x['stem'][0], x['trans_ru']))
to_write.append('\n')
In [96]:
for i in classes['aa_or_class']:
x = chunks[i]
to_write.append('{}:{} N-VANGQASQOR-ABS ; ! {}'.format(x['lex'], x['stem'][2], x['trans_ru']))
to_write.append('{}:{} N-OBL ; ! {}'.format(x['lex'], x['stem'][0], x['trans_ru']))
to_write.append('\n')
In [97]:
to_write.append('\n')
In [98]:
to_write.append('! this one may be III \n')
for i in classes['first_n_ending_class']:
x = chunks[i]
to_write.append('{}:{} N-Ia-AATGYR ; ! {}'.format(x['lex'], x['stem'][0], x['trans_ru']))
to_write.append('\n')
In [99]:
to_write.append('! either Vv or III \n')
for i in classes['third_or_five_v_class']:
x = chunks[i]
to_write.append('{}:{} N-Vv-AVEEN ; ! {}'.format(x['lex'], x['stem'][0][:-1], x['trans_ru']))
to_write.append('\n')
In [100]:
to_write.append('! this one may be III \n')
for i in classes['strange_yn_except_absolutive_class']:
x = chunks[i]
to_write.append('{}:{} N-Ia-AATGYR ; ! {}'.format(x['lex'], x['stem'][0], x['trans_ru']))
to_write.append('\n')
In [101]:
to_write.append('! either third or sing, either ending with ы or not \n')
for i in classes['strange_third_class']:
x = chunks[i]
to_write.append('{}:{} N-III-AVYNRAL ; ! {}'.format(x['lex'], x['stem'][0], x['trans_ru']))
to_write.append('\n')
In [102]:
len(to_write)
Out[102]:
In [103]:
with open(os.path.join('..', folder, dict_file), 'a') as f:
f.write('\n'.join(to_write))
In [104]:
len(to_write)/len(chunks)
Out[104]:
In [ ]: