In [1]:
import tensorflow as tf
from tensorflow.python.ops import lookup_ops
In [2]:
tf.reset_default_graph()
sess = tf.InteractiveSession()
table = lookup_ops.index_table_from_tensor(
tf.constant(['PAD'] + list('aábcde'))
)
dataset = tf.contrib.data.Dataset.from_tensor_slices(
tf.constant(["abá", "acű", "bcd"])
)
dataset = dataset.map(lambda string: tf.string_split([string], delimiter='').values)
dataset = dataset.map(lambda words: table.lookup(words))
batched = dataset.padded_batch(5, padded_shapes=(tf.TensorShape([6])))
batched_iter = batched.make_initializable_iterator()
bstart = batched_iter.get_next()
table_initializer = tf.tables_initializer()
sess.run(table_initializer)
sess.run(batched_iter.initializer)
s = sess.run(bstart)
s
Out[2]:
In [3]:
with open('/tmp/toy_data.txt', 'w') as data_file:
data_file.write("abc\tdef\n")
data_file.write("def\tábc\n")
In [4]:
tf.reset_default_graph()
sess = tf.InteractiveSession()
table = lookup_ops.index_table_from_tensor(
tf.constant(['PAD'] + list('aábcde'))
)
dataset = tf.contrib.data.TextLineDataset('/tmp/toy_data.txt')
dataset = dataset.map(lambda string: tf.string_split([string], delimiter='\t').values)
source = dataset.map(lambda string: string[0])
target = dataset.map(lambda string: string[1])
dataset = target
dataset = dataset.map(lambda string: tf.string_split([string], delimiter='').values)
dataset = dataset.map(lambda words: table.lookup(words))
batched = dataset.padded_batch(5, padded_shapes=(tf.TensorShape([6])))
batched_iter = batched.make_initializable_iterator()
bstart = batched_iter.get_next()
table_initializer = tf.tables_initializer()
sess.run(table_initializer)
sess.run(batched_iter.initializer)
s = sess.run(bstart)
s
Out[4]: