In [1]:
%matplotlib inline
import tensorflow as tf
from vahun.Text import Text
import numpy as np
from vahun.tools import Timer
from vahun.tools import explog
from vahun.autoencoder import Autoencoder_ffnn
from vahun.tools import show_performance
from vahun.genetic import Settings
from vahun.tools import get_reconstruction
timer=Timer()

corpus_path='/mnt/store/velkey/mnsz2/filt.200k.maxlen20'
encode=200
config = tf.ConfigProto()
config.gpu_options.allow_growth = True

corpus=Text(corpus_path=corpus_path,size=0)

logger=explog(encoder_type="demo_autoencoder_top_uni"+str(encode),
              encoding_dim=encode,
              feature_len=20,
              lang=corpus_path,
              unique_words=len(set(corpus.wordlist)),
              name="demo_autoencoder_top_uni"+str(encode),
              population_size=0,
              words=len(corpus.wordlist))

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)

In [2]:
encoder=Autoencoder_ffnn(experiment=Settings([encode,len(corpus.abc)*20]),
                 logger=logger,tf_session=sess,
                 inputdim=len(corpus.abc)*20,
                 layerlist=[encode,len(corpus.abc)*20],
                 encode_index=int(1),corpus=corpus,
                 optimizer =tf.train.AdamOptimizer(learning_rate = 0.001),
                 nonlinear=tf.sigmoid,
                 charnum=len(corpus.abc))

In [3]:
encoder.train(corpus.x_train,corpus.x_valid,corpus.x_test,512,30)

In [4]:
result=get_reconstruction(encoder,corpus.x_test,corpus,20,len(corpus.abc))

In [5]:
with open('/mnt/store/velkey/auto_200_results_with_levendist', "a") as myfile:
    for it in result:
        string=""
        string+=str(it[0])
        string+='\t'
        string+=str(it[1])
        string+='\t'
        string+=str(it[2])
        string+='\n'
        myfile.write(string)

In [ ]:
stds=show_performance(encoder,corpus.x_test,corpus,20,printer=True,inputfsize=len(corpus.abc))

In [ ]:
std=show_performance(encoder,["eh","kecske","kutya","aytuk","macska","árvíztűrő","fúró","kacsa","a","és"],corpus,printer=True,inputfsize=len(corpus.abc))

In [ ]:
def decode_critical(lista,enc=180):
    for POS in lista:
        encoded=np.ones(enc)*-10
        encoded[POS]=10
        a=encoder.decode([encoded])[0].reshape([10,len(corpus.abc)])
        b=corp.defeaturize_data_charlevel_onehot([a])
        print(b)

In [ ]:
import heapq
topstd=heapq.nlargest(6, range(len(stds)), stds.__getitem__)
big_stuff=[]
for i in range(50):
    big_stuff.append([])
    for topind in topstd:
        a=encoder.encode([corpus.x_test[i]])
        a[0][topind]=2
        b=encoder.decode([a[0]])[0].reshape([10,len(corpus.abc)])
        c=corpus.defeaturize_data_charlevel_onehot([b])
        big_stuff[i].append(c)
for row in big_stuff:
    print(row)

In [ ]: