In [3]:
from vahun.corpus import Corpus
import numpy as np
import tensorflow as tf
from vahun.tools import Timer
from vahun.tools import explog
from vahun.variational_autoencoder import Variational_autoencoder
encode=10
dictsize=2500000
popsize=1
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
#config.gpu_options.per_process_gpu_memory_fraction=1
corp_path='/mnt/permanent/Language/Hungarian/Corp/Webkorpusz/webkorpusz.wpl'
corp=Corpus(corpus_path=corp_path,
needed_corpus=["hun_lower_unique"],
language="Hun",size=dictsize,encoding_len=10)
database=corp.mark_list(corp.hun_lower_unique)
all_features=corp.featurize_data_charlevel_onehot(database)
train=all_features[0:int(len(all_features)*0.8)]
test=all_features[int(len(all_features)*0.8):len(all_features)]
x_train = train.reshape((len(train), np.prod(train.shape[1:])))
x_test = test.reshape((len(test), np.prod(test.shape[1:])))
print(x_train.shape)
logger=explog(encoder_type="variational_uniq_with_marks"+str(encode),
encoding_dim=encode,feature_len=0,
lang="Hun",unique_words=len(set(database)),
name="variational_uniq"+str(encode),population_size=popsize,
words=len(corp.hun_lower_unique))
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
In [6]:
def experiment(min=1,max=20):
for encoding in range(min,max):
for j in range(2):
sess = tf.Session(config=config)
variational=Variational_autoencoder(logger=logger,tf_session=sess,
inputdim=380,
encoding_size=encoding*10,
optimizer =tf.train.AdamOptimizer(learning_rate = 0.001),
nonlinear=tf.sigmoid)
variational.train(x_train,x_test,512,50)
sess.close()
In [7]:
experiment(6,16)