notebook.community

Edit and run



In [3]:

    
from vahun.corpus import Corpus
import numpy as np
import tensorflow as tf
from vahun.tools import Timer
from vahun.tools import explog
from vahun.variational_autoencoder import Variational_autoencoder

encode=10
dictsize=2500000
popsize=1


config = tf.ConfigProto()
config.gpu_options.allow_growth = True
#config.gpu_options.per_process_gpu_memory_fraction=1
corp_path='/mnt/permanent/Language/Hungarian/Corp/Webkorpusz/webkorpusz.wpl'
corp=Corpus(corpus_path=corp_path,
            needed_corpus=["hun_lower_unique"],
            language="Hun",size=dictsize,encoding_len=10)
database=corp.mark_list(corp.hun_lower_unique)
all_features=corp.featurize_data_charlevel_onehot(database)
train=all_features[0:int(len(all_features)*0.8)]
test=all_features[int(len(all_features)*0.8):len(all_features)]
x_train = train.reshape((len(train), np.prod(train.shape[1:])))
x_test = test.reshape((len(test), np.prod(test.shape[1:])))
print(x_train.shape)

logger=explog(encoder_type="variational_uniq_with_marks"+str(encode),
              encoding_dim=encode,feature_len=0,
              lang="Hun",unique_words=len(set(database)),
              name="variational_uniq"+str(encode),population_size=popsize,
              words=len(corp.hun_lower_unique))

config = tf.ConfigProto()
config.gpu_options.allow_growth = True









    



Reading file, speed:  2352615.4543404686  words/s
Reading file, speed:  2180410.9937981837  words/s
Corpus initalized, fields: ['hun_lower_unique'] 
Unique words:  246519
(51621, 380)



In [6]:

    
def experiment(min=1,max=20):
    for encoding in  range(min,max):
        for j in range(2):
            sess = tf.Session(config=config)
            variational=Variational_autoencoder(logger=logger,tf_session=sess,
                         inputdim=380,
                         encoding_size=encoding*10,
                         optimizer =tf.train.AdamOptimizer(learning_rate = 0.001),
                         nonlinear=tf.sigmoid)
            variational.train(x_train,x_test,512,50)
            sess.close()



In [7]:

    
experiment(6,16)