In [1]:
import os
import json
from pprint import pprint
In [2]:
with open('dictionary.json') as f:
chunks = json.load(f)
with open('classes_indices.json') as f:
classes = json.load(f)
In [3]:
cls_order = sorted(classes.keys())
for cls in cls_order:
print(cls)
pprint(chunks[classes[cls][0]])
In [4]:
pprint(cls_order)
In [5]:
folder = 'lexicons'
if not os.path.exists(os.path.join('..', folder)):
os.mkdir(os.path.join('..', folder))
dict_file = 'nominals.lexc'
with open(os.path.join('..', folder, dict_file), 'w') as f:
f.write("""!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!! L E X I C O N !!!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
""")
f.write("""
LEXICON Nouns
""")
In [6]:
# loan phonology
russ = 'январь февраль март апрель май июнь июль август сентябрь октябрь декабрь биԓет бригада бригадир буква \
понедельник вторник среда четверг пятница суббота воскресенье тетрадь ңэвучитель округ пионер революция порядка \
электричество элетростанция электроаак чернилаёчгын энанвалёматпункт'.split()
for i in range(len(chunks)):
if chunks[i]['lex'] in russ:
chunks[i]['stem'][0] = chunks[i]['stem'][0]+'%{☭%}'
# for st in chunks[i]['stem']:
# if set(st)&recessive and set(st)&dominant:
# chunks[i]['stem'] = list(map(lambda x: x+'%{☭%}', chunks[i]['stem']))
# break
In [7]:
# разметить архифонемы и loan phonology
# архифонемы - Æ и G
# G в g_j, Æ везде
# Æ
recessive = set('уюи')
dominant = set('аояё')
for i in range(len(chunks)):
if not '☭' in chunks[i]['stem'][0]:
if set(chunks[i]['stem'][0])&recessive or (set(chunks[i]['stem'][1])&dominant and not set(chunks[i]['stem'][0])&dominant):
if 'э' in chunks[i]['stem'][0]:
chunks[i]['stem'][0] = chunks[i]['stem'][0].replace('э', '%{Æ%}') # 0 bcs i will take 0 as stem
if 'ԓе' in chunks[i]['stem'][0]:
chunks[i]['stem'][0] = chunks[i]['stem'][0].replace('е', '%{Æ%}')
In [8]:
# G
for i in classes['first_a_g_j_class']+classes['singulative_lyng_g_j_class']:
chunks[i]['stem'][0] = chunks[i]['stem'][0][:-1]+'%{G%}'
In [9]:
# N
for i in classes['first_a_y_n_to_ng_class']+classes['first_c_ng_to_n_class']:
chunks[i]['stem'][0] = chunks[i]['stem'][0][:-2]+'%{N%}'+chunks[i]['stem'][0][-1]
In [10]:
# epenthesis
cons = 'йцкнгшщзхъждлрпвфчсмтьбңқ'
for i in range(len(chunks)):
if not '☭' in chunks[i]['stem'][0]:
if chunks[i]['stem'][0][-1] in cons and chunks[i]['stem'][0][-2] in cons:
chunks[i]['stem'][0] = chunks[i]['stem'][0][:-1]+'%{ы%}'+chunks[i]['stem'][0][-1]
In [11]:
chunks[220]
Out[11]:
In [12]:
# oops
for i in range(len(chunks)):
chunks[i]['lex'] = chunks[i]['lex'].replace('ң', 'ӈ')
chunks[i]['lex'] = chunks[i]['lex'].replace('қ', 'ӄ')
chunks[i]['lex'] = chunks[i]['lex'].replace('л', 'ԓ')
chunks[i]['lex'] = chunks[i]['lex'].replace("'", 'ʼ')
chunks[i]['stem'] = list(map(lambda x: x.replace('ң', 'ӈ'), chunks[i]['stem']))
chunks[i]['stem'] = list(map(lambda x: x.replace('қ', 'ӄ'), chunks[i]['stem']))
chunks[i]['stem'] = list(map(lambda x: x.replace('л', 'ԓ'), chunks[i]['stem']))
chunks[i]['stem'] = list(map(lambda x: x.replace("'", 'ʼ'), chunks[i]['stem']))
In [13]:
# with open('dictionary.json', 'w') as f:
# json.dump(chunks, f)
In [14]:
to_write = []
for i in classes['first_a_class']+classes['first_a_y_class']+classes['first_a_g_j_class']:
x = chunks[i]
to_write.append('{}:{} N-Ia-AATGYR ; ! {}'.format(x['lex'], x['stem'][0], x['trans_ru']))
In [15]:
for i in classes['first_a_y_n_to_ng_class']:
x = chunks[i]
to_write.append('{}:{} N-Ia-JYNYR-ABS ; ! {}'.format(x['lex'], x['stem'][2], x['trans_ru']))
to_write.append('{}:{} N-OBL ; ! {}'.format(x['lex'], x['stem'][0], x['trans_ru']))
In [16]:
for i in classes['first_b_class']:
x = chunks[i]
to_write.append('{}:{} N-Ib-ANGKY ; ! {}'.format(x['lex'], x['stem'][0], x['trans_ru']))
In [17]:
for i in classes['first_c_class']:
x = chunks[i]
to_write.append('{}:{} N-Ic-AJKOL ; ! {}'.format(x['lex'], x['stem'][0], x['trans_ru']))
to_write.append('\n')
In [18]:
for i in classes['first_c_ng_to_n_class']:
x = chunks[i]
to_write.append('{}:{} N-Ic-EJNETKUNEN-ABS ; ! {}'.format(x['lex'], x['stem'][2], x['trans_ru']))
to_write.append('{}:{} N-OBL ; ! {}'.format(x['lex'], x['stem'][0], x['trans_ru']))
to_write.append('\n')
In [19]:
for i in classes['second_a_full_redup_class']:
x = chunks[i]
to_write.append('{}:{} N-IIa-VIIL-ABS ; ! {}'.format(x['lex'], x['stem'][2], x['trans_ru']))
to_write.append('{}:{} N-OBL ; ! {}'.format(x['lex'], x['stem'][0], x['trans_ru']))
In [20]:
for i in classes['second_b_redup_wo_last_letter_class']:
x = chunks[i]
to_write.append('{}:{} N-IIb-VAJP-ABS ; ! {}'.format(x['lex'], x['stem'][2], x['trans_ru']))
to_write.append('{}:{} N-OBL ; ! {}'.format(x['lex'], x['stem'][0], x['trans_ru']))
to_write.append('\n')
In [21]:
for i in classes['third_class']:
x = chunks[i]
to_write.append('{}:{} N-III-AVYNRAL ; ! {}'.format(x['lex'], x['stem'][0], x['trans_ru']))
for i in classes['third_false_lgyn_class']:
x = chunks[i]
to_write.append('{}:{}ԓг N-III-AVYNRAL ; ! {}'.format(x['lex'], x['stem'][0], x['trans_ru']))
to_write.append('\n')
In [22]:
for i in classes['fourth_class']:
x = chunks[i]
to_write.append('{}:{} N-IV-ARAPA ; ! {}'.format(x['lex'], x['stem'][0], x['trans_ru']))
for i in classes['fourth_or_class']:
x = chunks[i]
to_write.append('{}:{} N-IV-QORA-ABS ; ! {}'.format(x['lex'], x['stem'][2], x['trans_ru']))
to_write.append('{}:{} N-OBL ; ! {}'.format(x['lex'], x['stem'][0], x['trans_ru']))
to_write.append('\n')
In [23]:
for i in classes['five_v_class']:
x = chunks[i]
to_write.append('{}:{} N-Vv-AVEEN ; ! {}'.format(x['lex'], x['stem'][0][:-1], x['trans_ru']))
for i in classes['five_v_reduced_class']:
x = chunks[i]
to_write.append('{}:{} N-Vvy-AGNOTVAN ; ! {}'.format(x['lex'], x['stem'][0][:-1], x['trans_ru']))
to_write.append('\n')
In [24]:
for i in classes['singulative_lgyn_class']:
x = chunks[i]
to_write.append('{}:{} N-SING-AJOPYCH ; ! {}'.format(x['lex'], x['stem'][0], x['trans_ru']))
to_write.append('\n')
In [25]:
for i in classes['singulative_lyng_class']+classes['singulative_lyng_g_j_class']:
x = chunks[i]
to_write.append('{}:{} N-SING-EPEEPEG ; ! {}'.format(x['lex'], x['stem'][0], x['trans_ru']))
to_write.append('\n')
In [26]:
for i in classes['aa_or_class']:
x = chunks[i]
to_write.append('{}:{} N-VANGQASQOR-ABS ; ! {}'.format(x['lex'], x['stem'][2], x['trans_ru']))
to_write.append('{}:{} N-OBL ; ! {}'.format(x['lex'], x['stem'][0], x['trans_ru']))
to_write.append('\n')
In [27]:
to_write.append('\n')
In [28]:
to_write.append('! this one may be III \n')
for i in classes['first_n_ending_class']:
x = chunks[i]
to_write.append('{}:{} N-Ia-AATGYR ; ! {}'.format(x['lex'], x['stem'][0], x['trans_ru']))
to_write.append('\n')
In [29]:
to_write.append('! either Vv or III \n')
for i in classes['third_or_five_v_class']:
x = chunks[i]
to_write.append('{}:{} N-Vv-AVEEN ; ! {}'.format(x['lex'], x['stem'][0][:-1], x['trans_ru']))
to_write.append('\n')
In [30]:
to_write.append('! this one may be III \n')
for i in classes['strange_yn_except_absolutive_class']:
x = chunks[i]
to_write.append('{}:{} N-Ia-AATGYR ; ! {}'.format(x['lex'], x['stem'][0], x['trans_ru']))
to_write.append('\n')
In [31]:
to_write.append('! either third or sing, either ending with ы or not \n')
for i in classes['strange_third_class']:
x = chunks[i]
to_write.append('{}:{} N-III-AVYNRAL ; ! {}'.format(x['lex'], x['stem'][0], x['trans_ru']))
to_write.append('\n')
In [32]:
len(to_write)
Out[32]:
In [33]:
with open(os.path.join('..', folder, dict_file), 'a') as f:
f.write('\n'.join(to_write))
In [34]:
len(to_write)/len(chunks)
Out[34]:
In [ ]: