In [1]:
%matplotlib inline
import tensorflow as tf
from vahun.Text import Text
import numpy as np
from vahun.tools import Timer
from vahun.tools import explog
from vahun.variational_autoencoder import Variational_autoencoder
from vahun.tools import show_performance
encode=180
popsize=1
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
corpus=Text(corpus_path='/mnt/store/velkey/mnsz2/filt.200k.maxlen20',size=100000)
logger=explog(encoder_type="hunfiltered_lowered_uniq_"+str(encode),
encoding_dim=encode,feature_len=10,
lang="Hun",unique_words=len(set(corpus.wordlist)),
name="autoencoder_lowered_"+str(encode),population_size=popsize,
words=len(corpus.wordlist))
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
In [2]:
encoder=Variational_autoencoder(logger=logger,tf_session=sess,
inputdim=960,
encoding_size=400,corpus=corpus,
optimizer =tf.train.AdamOptimizer(learning_rate = 0.001),
nonlinear=tf.sigmoid,
charnum=len(corpus.abc))
encoder.train(corpus.x_train,corpus.x_valid,corpus.x_test,512,80)
In [3]:
stds=show_performance(encoder,corpus.x_test,corpus,10,printer=True,inputfsize=48,inputdepth=20)
In [4]:
stds=show_performance(encoder,["e","eh","kutya","aytuk","macska","árvíztűrő","fúró","kacsa","a","és"],corpus,printer=True,inputfsize=48,inputdepth=20)
In [5]:
def decode_critical(enc=180):
for POS in range(enc):
encoded=np.ones(enc)
encoded[POS]=0
a=encoder.decode([encoded])[0].reshape([10,36])
b=corp.defeaturize_data_charlevel_onehot([a])
print(b)
In [ ]: