In [1]:
%matplotlib inline
import tensorflow as tf
from vahun.Text import Text
import numpy as np
from vahun.tools import Timer
from vahun.tools import explog
from vahun.autoencoder import Autoencoder_ffnn
from vahun.tools import show_performance
from vahun.genetic import Settings
from vahun.tools import get_reconstruction
timer=Timer()
corpus_path='/mnt/store/velkey/mnsz2/filt.200k.maxlen20'
encode=200
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
corpus=Text(corpus_path=corpus_path,size=0)
logger=explog(encoder_type="demo_autoencoder_top_uni"+str(encode),
encoding_dim=encode,
feature_len=20,
lang=corpus_path,
unique_words=len(set(corpus.wordlist)),
name="demo_autoencoder_top_uni"+str(encode),
population_size=0,
words=len(corpus.wordlist))
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
In [2]:
encoder=Autoencoder_ffnn(experiment=Settings([encode,len(corpus.abc)*20]),
logger=logger,tf_session=sess,
inputdim=len(corpus.abc)*20,
layerlist=[encode,len(corpus.abc)*20],
encode_index=int(1),corpus=corpus,
optimizer =tf.train.AdamOptimizer(learning_rate = 0.001),
nonlinear=tf.sigmoid,
charnum=len(corpus.abc))
In [3]:
encoder.train(corpus.x_train,corpus.x_valid,corpus.x_test,512,30)
In [4]:
result=get_reconstruction(encoder,corpus.x_test,corpus,20,len(corpus.abc))
In [5]:
with open('/mnt/store/velkey/auto_200_results_with_levendist', "a") as myfile:
for it in result:
string=""
string+=str(it[0])
string+='\t'
string+=str(it[1])
string+='\t'
string+=str(it[2])
string+='\n'
myfile.write(string)
In [ ]:
stds=show_performance(encoder,corpus.x_test,corpus,20,printer=True,inputfsize=len(corpus.abc))
In [ ]:
std=show_performance(encoder,["eh","kecske","kutya","aytuk","macska","árvíztűrő","fúró","kacsa","a","és"],corpus,printer=True,inputfsize=len(corpus.abc))
In [ ]:
def decode_critical(lista,enc=180):
for POS in lista:
encoded=np.ones(enc)*-10
encoded[POS]=10
a=encoder.decode([encoded])[0].reshape([10,len(corpus.abc)])
b=corp.defeaturize_data_charlevel_onehot([a])
print(b)
In [ ]:
import heapq
topstd=heapq.nlargest(6, range(len(stds)), stds.__getitem__)
big_stuff=[]
for i in range(50):
big_stuff.append([])
for topind in topstd:
a=encoder.encode([corpus.x_test[i]])
a[0][topind]=2
b=encoder.decode([a[0]])[0].reshape([10,len(corpus.abc)])
c=corpus.defeaturize_data_charlevel_onehot([b])
big_stuff[i].append(c)
for row in big_stuff:
print(row)
In [ ]: