Sample Hangul RNN


In [1]:
# -*- coding: utf-8 -*-
# Import Packages
import numpy as np
import tensorflow as tf
import collections
import string
import argparse
import time
import os
from six.moves import cPickle
from TextLoader import *
from Hangulpy import *
print ("Packages Imported")


Packages Imported

Load dataset using TextLoader


In [2]:
data_dir    = "data/nine_dreams"
batch_size  = 50
seq_length  = 50
data_loader = TextLoader(data_dir, batch_size, seq_length)
# This makes "vocab.pkl" and "data.npy" in "data/nine_dreams"   
#  from "data/nine_dreams/input.txt" 
vocab_size = data_loader.vocab_size
vocab = data_loader.vocab
chars = data_loader.chars
print ( "type of 'data_loader' is %s, length is %d" 
       % (type(data_loader.vocab), len(data_loader.vocab)) )
print ( "\n" )
print ("data_loader.vocab looks like \n%s " %
       (data_loader.vocab))
print ( "\n" )
print ( "type of 'data_loader.chars' is %s, length is %d" 
       % (type(data_loader.chars), len(data_loader.chars)) )
print ( "\n" )
print ("data_loader.chars looks like \n%s " % (data_loader.chars,))


loading preprocessed files
type of 'data_loader' is <type 'dict'>, length is 76


data_loader.vocab looks like 
{u'_': 69, u'6': 59, u':': 57, u'\n': 19, u'4': 67, u'5': 63, u'>': 75, u'!': 52, u' ': 1, u'"': 28, u'\u1d25': 0, u"'": 49, u')': 46, u'(': 45, u'-': 65, u',': 27, u'.': 24, u'\u3131': 7, u'0': 73, u'\u3133': 60, u'\u3132': 29, u'\u3135': 50, u'\u3134': 4, u'\u3137': 13, u'\u3136': 44, u'\u3139': 5, u'\u3138': 32, u'\u313b': 55, u'\u313a': 48, u'\u313c': 54, u'?': 41, u'3': 66, u'\u3141': 12, u'\u3140': 51, u'\u3143': 47, u'\u3142': 17, u'\u3145': 10, u'\u3144': 43, u'\u3147': 2, u'\u3146': 22, u'\u3149': 40, u'\u3148': 15, u'\u314b': 42, u'\u314a': 23, u'\u314d': 31, u'\u314c': 30, u'\u314f': 3, u'\u314e': 14, u'\u3151': 34, u'\u3150': 21, u'\u3153': 11, u'\u3152': 74, u'\u3155': 18, u'\u3154': 20, u'\u3157': 9, u'\u3156': 39, u'\u3159': 53, u'\u3158': 26, u'\u315b': 38, u'\u315a': 33, u'\u315d': 36, u'\u315c': 16, u'\u315f': 35, u'\u315e': 61, u'\u3161': 8, u'\u3160': 37, u'\u3163': 6, u'\u3162': 25, u'\x1a': 72, u'9': 64, u'7': 71, u'2': 62, u'1': 58, u'\u313f': 56, u'\u313e': 70, u'8': 68} 


type of 'data_loader.chars' is <type 'tuple'>, length is 76


data_loader.chars looks like 
(u'\u1d25', u' ', u'\u3147', u'\u314f', u'\u3134', u'\u3139', u'\u3163', u'\u3131', u'\u3161', u'\u3157', u'\u3145', u'\u3153', u'\u3141', u'\u3137', u'\u314e', u'\u3148', u'\u315c', u'\u3142', u'\u3155', u'\n', u'\u3154', u'\u3150', u'\u3146', u'\u314a', u'.', u'\u3162', u'\u3158', u',', u'"', u'\u3132', u'\u314c', u'\u314d', u'\u3138', u'\u315a', u'\u3151', u'\u315f', u'\u315d', u'\u3160', u'\u315b', u'\u3156', u'\u3149', u'?', u'\u314b', u'\u3144', u'\u3136', u'(', u')', u'\u3143', u'\u313a', u"'", u'\u3135', u'\u3140', u'!', u'\u3159', u'\u313c', u'\u313b', u'\u313f', u':', u'1', u'6', u'\u3133', u'\u315e', u'2', u'5', u'9', u'-', u'3', u'4', u'8', u'_', u'\u313e', u'7', u'\x1a', u'0', u'\u3152', u'>') 

Define Network


In [3]:
rnn_size   = 512
num_layers = 3
grad_clip  = 5.

_batch_size = 1
_seq_length = 1

vocab_size = data_loader.vocab_size

with tf.device("/cpu:0"):
    # Select RNN Cell
    unitcell = tf.nn.rnn_cell.BasicLSTMCell(rnn_size)
    cell = tf.nn.rnn_cell.MultiRNNCell([unitcell] * num_layers)
    # Set paths to the graph 
    input_data = tf.placeholder(tf.int32, [_batch_size, _seq_length])
    targets    = tf.placeholder(tf.int32, [_batch_size, _seq_length])
    initial_state = cell.zero_state(_batch_size, tf.float32)

    # Set Network
    with tf.variable_scope('rnnlm'):
        softmax_w = tf.get_variable("softmax_w", [rnn_size, vocab_size])
        softmax_b = tf.get_variable("softmax_b", [vocab_size])
        with tf.device("/cpu:0"):
            embedding = tf.get_variable("embedding", [vocab_size, rnn_size])
            inputs = tf.split(1, _seq_length, tf.nn.embedding_lookup(embedding, input_data))
            inputs = [tf.squeeze(input_, [1]) for input_ in inputs]
            
    # Loop function for seq2seq
    def loop(prev, _):
        prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b)
        prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
        return tf.nn.embedding_lookup(embedding, prev_symbol)
    # Output of RNN 
    outputs, last_state = tf.nn.seq2seq.rnn_decoder(inputs, initial_state
                                , cell, loop_function=None, scope='rnnlm')
    output = tf.reshape(tf.concat(1, outputs), [-1, rnn_size])
    logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
    # Next word probability 
    probs = tf.nn.softmax(logits)
    # Define LOSS
    loss = tf.nn.seq2seq.sequence_loss_by_example([logits], # Input
        [tf.reshape(targets, [-1])], # Target
        [tf.ones([_batch_size * _seq_length])], # Weight 
        vocab_size)
    # Define Optimizer
    cost = tf.reduce_sum(loss) / _batch_size / _seq_length
    final_state = last_state
    lr = tf.Variable(0.0, trainable=False)
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), grad_clip)
    _optm = tf.train.AdamOptimizer(lr)
    optm = _optm.apply_gradients(zip(grads, tvars))

print ("Network Ready")


Network Ready

In [6]:
# Sample ! 
def sample( sess, chars, vocab, __probs, num=200, prime=u'ㅇㅗᴥㄴㅡㄹᴥ '):
    state = sess.run(cell.zero_state(1, tf.float32))
    _probs = __probs
    prime = list(prime)
    for char in prime[:-1]:
        x = np.zeros((1, 1))
        x[0, 0] = vocab[char]
        feed = {input_data: x, initial_state:state}
        [state] = sess.run([final_state], feed)

    def weighted_pick(weights):
        weights = weights / np.sum(weights) 
        t = np.cumsum(weights)
        s = np.sum(weights)
        return(int(np.searchsorted(t, np.random.rand(1)*s)))

    ret = prime
    char = prime[-1]
    for n in range(num):
        x = np.zeros((1, 1))
        x[0, 0] = vocab[char]
        feed = {input_data: x, initial_state:state}
        [_probsval, state] = sess.run([_probs, final_state], feed)
        p = _probsval[0]
        sample = int(np.random.choice(len(p), p=p))
        # sample = weighted_pick(p)
        # sample = np.argmax(p)
        pred = chars[sample]
        ret += pred
        char = pred
    return ret
print ("sampling function done.")


sampling function done.

Sample


In [7]:
save_dir = 'data/nine_dreams'
prime = decompose_text(u"누구 ")

print ("Prime Text : %s => %s" % (automata(prime), "".join(prime)))
n = 2000

sess = tf.Session()
sess.run(tf.initialize_all_variables())
saver = tf.train.Saver(tf.all_variables())
ckpt = tf.train.get_checkpoint_state(save_dir)

# load_name = u'data/nine_dreams/model.ckpt-0'
load_name = u'data/nine_dreams/model.ckpt-99000'

print (load_name)

if ckpt and ckpt.model_checkpoint_path:
    saver.restore(sess, load_name)
    sampled_text = sample(sess, chars, vocab, probs, n, prime)
    #print ("")
    print (u"SAMPLED TEXT = %s" % sampled_text)
    print ("")
    print ("-- RESULT --")
    print (automata("".join(sampled_text)))


Prime Text : 누구  => ㄴㅜᴥㄱㅜᴥ 
data/nine_dreams/model.ckpt-99000
SAMPLED TEXT = [u'\u3134', u'\u315c', u'\u1d25', u'\u3131', u'\u315c', u'\u1d25', u' ', u'\u3145', u'\u3157', u'\u1d25', u'\u3139', u'\u3163', u'\u1d25', u'\u3147', u'\u3154', u'\u1d25', u' ', u'\u3145', u'\u3153', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u3146', u'\u1d25', u'\u3147', u'\u3161', u'\u3139', u'\u1d25', u' ', u'\u3131', u'\u3153', u'\u3145', u'\u1d25', u'\u3147', u'\u3163', u'\u1d25', u'\u3139', u'\u3157', u'\u1d25', u'\u3137', u'\u314f', u'\u1d25', u'.', u'"', u'\n', u' ', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3147', u'\u3153', u'\u1d25', u'\u3145', u'\u3153', u'\u1d25', u' ', u'\u3145', u'\u3153', u'\u1d25', u'\u3147', u'\u315c', u'\u3139', u'\u1d25', u'\u3147', u'\u3154', u'\u1d25', u' ', u'\u3137', u'\u3161', u'\u3139', u'\u1d25', u'\u3147', u'\u3153', u'\u1d25', u'\u3131', u'\u314f', u'\u1d25', u'\u3134', u'\u3163', u'\u1d25', u' ', u'\u3147', u'\u3153', u'\u1d25', u'\u3149', u'\u3163', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u' ', u'\u3141', u'\u314f', u'\u3139', u'\u1d25', u'\u3147', u'\u3161', u'\u3139', u'\u1d25', u' ', u'\u3137', u'\u3161', u'\u3137', u'\u1d25', u'\u3131', u'\u3157', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u3146', u'\u1d25', u'\u3147', u'\u3161', u'\u1d25', u'\u3134', u'\u3163', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u' ', u'\u3141', u'\u314f', u'\u3139', u'\u1d25', u'\u3147', u'\u3161', u'\u3139', u'\u1d25', u' ', u'\u3137', u'\u3161', u'\u3137', u'\u1d25', u'\u3131', u'\u3157', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u3146', u'\u1d25', u'\u3147', u'\u3161', u'\u1d25', u'\u3134', u'\u3163', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u' ', u'\u3141', u'\u314f', u'\u3139', u'\u1d25', u'\u3147', u'\u3161', u'\u3139', u'\u1d25', u' ', u'\u3137', u'\u3161', u'\u3137', u'\u1d25', u'\u3131', u'\u3157', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u3146', u'\u1d25', u'\u3147', u'\u3153', u'\u3146', u'\u1d25', u'\u3137', u'\u314f', u'\u1d25', u'.', u' ', u'\n', u' ', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3147', u'\u3154', u'\u1d25', u' ', u'\u3137', u'\u3150', u'\u1d25', u'\u3137', u'\u314f', u'\u3142', u'\u1d25', u'\u314e', u'\u314f', u'\u1d25', u'\u3131', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u3139', u'\u1d25', u',', u' ', u'\n', u' ', u' ', u'"', u'\u3145', u'\u3157', u'\u1d25', u'\u3147', u'\u3160', u'\u1d25', u'\u3131', u'\u314f', u'\u1d25', u' ', u'\u3137', u'\u314f', u'\u1d25', u'\u3145', u'\u3163', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u1d25', u'\u3131', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u3139', u'\u1d25', u',', u' ', u'\n', u' ', u' ', u'"', u'\u3147', u'\u3163', u'\u1d25', u'\u3148', u'\u3154', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3147', u'\u3154', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u1d25', u'\u3131', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u3139', u'\u1d25', u',', u' ', u'\n', u' ', u' ', u'"', u'\u3145', u'\u3157', u'\u1d25', u'\u3148', u'\u3153', u'\u1d25', u'\u3131', u'\u314f', u'\u1d25', u' ', u'\u3147', u'\u3153', u'\u1d25', u'\u3149', u'\u3163', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u1d25', u'\u3131', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u3139', u'\u1d25', u',', u' ', u'\n', u' ', u' ', u'"', u'\u3145', u'\u3157', u'\u1d25', u'\u3148', u'\u3153', u'\u1d25', u'\u3134', u'\u3161', u'\u3134', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u' ', u'\u3141', u'\u314f', u'\u3139', u'\u1d25', u'\u3147', u'\u3161', u'\u3139', u'\u1d25', u' ', u'\u3137', u'\u3161', u'\u3137', u'\u1d25', u'\u3131', u'\u3157', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u1d25', u'\u3131', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u3139', u'\u1d25', u',', u' ', u'\n', u' ', u' ', u'"', u'\u3145', u'\u3157', u'\u1d25', u'\u3148', u'\u3153', u'\u1d25', u'\u3131', u'\u314f', u'\u1d25', u' ', u'\u3147', u'\u3153', u'\u1d25', u'\u3149', u'\u3163', u'\u1d25', u' ', u'\u3145', u'\u314f', u'\u1d25', u'\u3139', u'\u314f', u'\u3141', u'\u1d25', u'\u3147', u'\u3161', u'\u3139', u'\u1d25', u' ', u'\u3137', u'\u3161', u'\u3139', u'\u1d25', u'\u3147', u'\u3153', u'\u1d25', u' ', u'\u3131', u'\u314f', u'\u1d25', u'\u3145', u'\u3153', u'\u1d25', u' ', u'\u3131', u'\u3161', u'\u1d25', u' ', u'\u3147', u'\u314f', u'\u1d25', u'\u3139', u'\u3161', u'\u3141', u'\u1d25', u'\u3137', u'\u314f', u'\u1d25', u'\u3147', u'\u315c', u'\u3134', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u3139', u'\u1d25', u'\u3147', u'\u3163', u'\u1d25', u'\u3147', u'\u3153', u'\u3146', u'\u1d25', u'\u3137', u'\u314f', u'\u1d25', u'.', u' ', u'\n', u' ', u' ', u'\u3147', u'\u3163', u'\u1d25', u' ', u'\u3141', u'\u314f', u'\u3139', u'\u1d25', u'\u3147', u'\u3154', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u1d25', u'\u3131', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u3139', u'\u1d25', u',', u' ', u'\n', u' ', u' ', u'"', u'\u3145', u'\u3157', u'\u1d25', u'\u3148', u'\u3153', u'\u1d25', u'\u3131', u'\u314f', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u1d25', u'\u3131', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u3139', u'\u1d25', u',', u' ', u'\n', u' ', u' ', u'"', u'\u3145', u'\u3157', u'\u1d25', u'\u3148', u'\u3153', u'\u1d25', u'\u3131', u'\u314f', u'\u1d25', u' ', u'\u3147', u'\u3153', u'\u1d25', u'\u3149', u'\u3163', u'\u1d25', u' ', u'\u3145', u'\u314f', u'\u1d25', u'\u3139', u'\u314f', u'\u3141', u'\u1d25', u'\u3147', u'\u3161', u'\u3139', u'\u1d25', u' ', u'\u3137', u'\u3161', u'\u3139', u'\u1d25', u'\u3147', u'\u3161', u'\u1d25', u'\u3134', u'\u3163', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u' ', u'\u3141', u'\u314f', u'\u3139', u'\u1d25', u'\u3147', u'\u3161', u'\u3139', u'\u1d25', u' ', u'\u3137', u'\u3161', u'\u3137', u'\u1d25', u'\u3131', u'\u3157', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u1d25', u'\u3131', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u3139', u'\u1d25', u',', u' ', u'\n', u' ', u' ', u'"', u'\u3145', u'\u3157', u'\u1d25', u'\u3148', u'\u3153', u'\u1d25', u'\u3131', u'\u314f', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u1d25', u'\u3131', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u3139', u'\u1d25', u',', u' ', u'\n', u' ', u' ', u'"', u'\u3145', u'\u3157', u'\u1d25', u'\u3148', u'\u3153', u'\u1d25', u'\u3131', u'\u314f', u'\u1d25', u' ', u'\u3147', u'\u3153', u'\u1d25', u'\u3149', u'\u3163', u'\u1d25', u' ', u'\u3145', u'\u314f', u'\u1d25', u'\u3139', u'\u314f', u'\u3141', u'\u1d25', u'\u3147', u'\u3161', u'\u3139', u'\u1d25', u' ', u'\u3137', u'\u3161', u'\u3139', u'\u1d25', u'\u3147', u'\u3153', u'\u1d25', u' ', u'\u3131', u'\u314f', u'\u1d25', u'\u3145', u'\u3163', u'\u1d25', u'\u3131', u'\u314f', u'\u1d25', u' ', u'\u3147', u'\u314f', u'\u1d25', u'\u3134', u'\u3163', u'\u1d25', u'\u3139', u'\u314f', u'\u1d25', u' ', u'\u314e', u'\u314f', u'\u1d25', u'\u3147', u'\u3157', u'\u1d25', u'\u3134', u'\u3163', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u3139', u'\u1d25', u'\u3147', u'\u3161', u'\u3134', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u' ', u'\u3141', u'\u3157', u'\u3141', u'\u1d25', u'\u3147', u'\u3163', u'\u1d25', u' ', u'\u3147', u'\u3153', u'\u1d25', u'\u3149', u'\u3163', u'\u1d25', u' ', u'\u3131', u'\u314f', u'\u1d25', u'\u3147', u'\u3161', u'\u3139', u'\u1d25', u'\u3147', u'\u3161', u'\u3139', u'\u1d25', u' ', u'\n', u'\u3142', u'\u3157', u'\u1d25', u'\u3134', u'\u3150', u'\u1d25', u'\u3147', u'\u3153', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u3141', u'\u1d25', u'\u3147', u'\u3161', u'\u3139', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3139', u'\u315c', u'\u1d25', u'\u3131', u'\u3157', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u3146', u'\u1d25', u'\u3147', u'\u3153', u'\u3146', u'\u1d25', u'\u3137', u'\u314f', u'\u1d25', u'.', u' ', u'\n', u' ', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3147', u'\u3154', u'\u1d25', u' ', u'\u3137', u'\u3150', u'\u1d25', u'\u3137', u'\u314f', u'\u3142', u'\u1d25', u'\u314e', u'\u314f', u'\u1d25', u'\u3131', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u3139', u'\u1d25', u',', u' ', u'\n', u' ', u' ', u'"', u'\u3145', u'\u3157', u'\u1d25', u'\u3147', u'\u3160', u'\u1d25', u'\u3131', u'\u314f', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u1d25', u'\u3131', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u3139', u'\u1d25', u',', u' ', u'\n', u' ', u' ', u'"', u'\u3147', u'\u3163', u'\u1d25', u'\u3148', u'\u3154', u'\u1d25', u' ', u'\u3145', u'\u314f', u'\u1d25', u'\u3139', u'\u314f', u'\u3141', u'\u1d25', u'\u3147', u'\u3163', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u1d25', u'\u3131', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u3139', u'\u1d25', u',', u' ', u'\n', u' ', u' ', u'"', u'\u3145', u'\u3157', u'\u1d25', u'\u3148', u'\u3153', u'\u1d25', u'\u3131', u'\u314f', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u1d25', u'\u3131', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u3139', u'\u1d25', u',', u' ', u'\n', u' ', u' ', u'"', u'\u3147', u'\u3163', u'\u1d25', u'\u3148', u'\u3154', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3147', u'\u3154', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u1d25', u'\u3131', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u3139', u'\u1d25', u',', u' ', u'\n', u' ', u' ', u'"', u'\u3145', u'\u3157', u'\u1d25', u'\u3147', u'\u3160', u'\u1d25', u'\u3134', u'\u3161', u'\u3134', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3141', u'\u3163', u'\u1d25', u' ', u'\u3145', u'\u314f', u'\u1d25', u'\u3139', u'\u314f', u'\u3141', u'\u1d25', u'\u3147', u'\u3161', u'\u3139', u'\u1d25', u' ', u'\u3137', u'\u3161', u'\u3139', u'\u1d25', u'\u3147', u'\u3161', u'\u1d25', u'\u3134', u'\u3163', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u' ', u'\u3141', u'\u314f', u'\u3139', u'\u1d25', u'\u3147', u'\u3161', u'\u3139', u'\u1d25', u' ', u'\u3137', u'\u3161', u'\u3137', u'\u1d25', u'\u3131', u'\u3157', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u1d25', u'\u3131', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u3139', u'\u1d25', u',', u' ', u'\n', u' ', u' ', u'"', u'\u3145', u'\u3157', u'\u1d25', u'\u3148', u'\u3153', u'\u1d25', u'\u3131', u'\u314f', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u1d25', u'\u3131', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u3139', u'\u1d25', u',', u' ', u'\n', u' ', u' ', u'"', u'\u3145', u'\u3157', u'\u1d25', u'\u3148', u'\u3153', u'\u1d25', u'\u3131', u'\u314f', u'\u1d25', u' ', u'\u3147', u'\u3153', u'\u1d25', u'\u3149', u'\u3163', u'\u1d25', u' ', u'\u3145', u'\u314f', u'\u1d25', u'\u3139', u'\u314f', u'\u3141', u'\u1d25', u'\u3147', u'\u3161', u'\u3139', u'\u1d25', u' ', u'\u3137', u'\u3161', u'\u3139', u'\u1d25', u'\u3147', u'\u3153', u'\u1d25', u' ', u'\u3131', u'\u314f', u'\u1d25', u'\u3145', u'\u3163', u'\u1d25', u'\u3131', u'\u314f', u'\u1d25', u' ', u'\u3147', u'\u314f', u'\u1d25', u'\u3134', u'\u3163', u'\u1d25', u'\u3139', u'\u314f', u'\u1d25', u' ', u'\u314e', u'\u314f', u'\u1d25', u'\u3147', u'\u3157', u'\u1d25', u'\u3134', u'\u3163', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u' ', u'\u3147', u'\u314f', u'\u1d25', u'\u3134', u'\u3163', u'\u1d25', u' ', u'\u3147', u'\u3153', u'\u1d25', u'\u3149', u'\u3163', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u3139', u'\u1d25', u' ', u'\u3145', u'\u314f', u'\u1d25', u'\u3139', u'\u314f', u'\u3141', u'\u1d25', u'\u3147', u'\u3161', u'\u3139', u'\u1d25', u' ', u'\u3145', u'\u314f', u'\u1d25', u'\u3139', u'\u314f', u'\u3147', u'\u1d25', u'\u314e', u'\u314f', u'\u1d25', u'\u3147', u'\u3155', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u3139', u'\u1d25', u'\u3147', u'\u3161', u'\u3139', u'\u1d25', u' ', u'\n', u'\u3142', u'\u3157', u'\u1d25', u'\u3134', u'\u3150', u'\u1d25', u'\u3147', u'\u3153', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u3141', u'\u1d25', u'\u3147', u'\u3161', u'\u3139', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3139', u'\u315c', u'\u1d25', u'\u3131', u'\u3157', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u3146', u'\u1d25', u'\u3147', u'\u3153', u'\u3146', u'\u1d25', u'\u3137', u'\u314f', u'\u1d25', u'.', u' ', u'\n', u' ', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3147', u'\u3154', u'\u1d25', u' ', u'\u3137', u'\u3150', u'\u1d25', u'\u3137', u'\u314f', u'\u3142', u'\u1d25', u'\u314e', u'\u314f', u'\u1d25', u'\u3131', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u3139', u'\u1d25', u',', u' ', u'\n', u' ', u' ', u'"', u'\u3145', u'\u3157', u'\u1d25', u'\u3147', u'\u3160', u'\u1d25', u'\u3131', u'\u314f', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u1d25', u'\u3131', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u3139', u'\u1d25', u',', u' ', u'\n', u' ', u' ', u'"', u'\u3147', u'\u3163', u'\u1d25', u'\u3148', u'\u3154', u'\u1d25', u' ', u'\u3145', u'\u314f', u'\u1d25', u'\u3139', u'\u314f', u'\u3141', u'\u1d25', u'\u3147', u'\u3163', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u1d25', u'\u3131', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u3139', u'\u1d25', u',', u' ', u'\n', u' ', u' ', u'"', u'\u3145', u'\u3157', u'\u1d25', u'\u3148', u'\u3153', u'\u1d25', u'\u3131', u'\u314f', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u1d25', u'\u3131', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u3139', u'\u1d25', u',', u' ', u'\n', u' ', u' ', u'"', u'\u3145', u'\u3157', u'\u1d25', u'\u3148', u'\u3153', u'\u1d25', u'\u3131', u'\u314f', u'\u1d25', u' ', u'\u3147', u'\u3153', u'\u1d25', u'\u3149', u'\u3163', u'\u1d25', u' ', u'\u3145', u'\u314f', u'\u1d25', u'\u3139', u'\u314f', u'\u3141', u'\u1d25', u'\u3147', u'\u3161', u'\u3139', u'\u1d25', u' ', u'\u3137', u'\u3161', u'\u3139', u'\u1d25', u'\u3147', u'\u3153', u'\u1d25', u' ', u'\u3131', u'\u314f', u'\u1d25', u'\u3145', u'\u3163', u'\u1d25', u'\u3131', u'\u314f', u'\u1d25', u' ', u'\u3147', u'\u314f', u'\u1d25', u'\u3134', u'\u3163', u'\u1d25', u'\u3139', u'\u314f', u'\u1d25', u' ', u'\u314e', u'\u314f', u'\u1d25', u'\u3147', u'\u3157', u'\u1d25', u'\u3134', u'\u3163', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u3139', u'\u1d25', u'\u3147', u'\u3161', u'\u3134', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u' ', u'\u3141', u'\u3157', u'\u3141', u'\u1d25', u'\u3147', u'\u3163', u'\u1d25', u' ', u'\u3147', u'\u3153', u'\u1d25', u'\u3149', u'\u3163', u'\u1d25', u' ', u'\u3131', u'\u314f', u'\u1d25', u'\u3147', u'\u3161', u'\u3139', u'\u1d25', u'\u3147', u'\u3161', u'\u3139', u'\u1d25', u' ', u'\n', u'\u3142', u'\u3157', u'\u1d25', u'\u3134', u'\u3150', u'\u1d25', u'\u3147', u'\u3153', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u3141', u'\u1d25', u'\u3147', u'\u3161', u'\u3139', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3139', u'\u315c', u'\u1d25', u'\u3131', u'\u3157', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u3146', u'\u1d25', u'\u3147', u'\u3153', u'\u3146', u'\u1d25', u'\u3137', u'\u314f', u'\u1d25', u'.', u' ', u'\n', u' ', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3147', u'\u3154', u'\u1d25', u' ', u'\u3137', u'\u3150', u'\u1d25', u'\u3137', u'\u314f', u'\u3142', u'\u1d25', u'\u314e', u'\u314f', u'\u1d25', u'\u3131', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u3139', u'\u1d25', u',', u' ', u'\n', u' ', u' ', u'"', u'\u3145', u'\u3157', u'\u1d25', u'\u3147', u'\u3160', u'\u1d25', u'\u3131', u'\u314f', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u3141', u'\u1d25', u'\u3147', u'\u3161', u'\u3139', u'\u1d25', u' ', u'\u3137', u'\u3161', u'\u3139', u'\u1d25', u'\u3147', u'\u3153', u'\u1d25', u' ', u'\u3131', u'\u314f', u'\u1d25', u'\u3145', u'\u3163', u'\u1d25', u'\u3131', u'\u314f', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u3146', u'\u1d25', u'\u3147', u'\u3161', u'\u1d25', u'\u3134', u'\u3163', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u' ', u'\u3141', u'\u3157', u'\u3141', u'\u1d25', u'\u3147', u'\u3163', u'\u1d25', u' ', u'\u3147', u'\u3153', u'\u1d25', u'\u3149', u'\u3163', u'\u1d25', u' ', u'\u3131', u'\u314f', u'\u1d25', u'\u314e', u'\u3163', u'\u1d25', u' ', u'\u3147', u'\u3155', u'\u1d25', u'\u3131', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u3139', u'\u1d25', u' ', u'\u3137', u'\u3161', u'\u3139', u'\u1d25', u'\u3147', u'\u3161', u'\u3134', u'\u1d25', u' ', u'\u3131', u'\u3153', u'\u3145', u'\u1d25', u'\u3147', u'\u3163', u'\u1d25', u' ', u'\u3147', u'\u3153', u'\u3144', u'\u1d25', u'\u3145', u'\u314f', u'\u1d25', u'\u3147', u'\u3157', u'\u1d25', u'\u3134', u'\u3163', u'\u1d25', u' ', u'\u3147', u'\u3153', u'\u1d25', u'\u3149', u'\u3163', u'\u1d25', u' ', u'\u3131', u'\u3161', u'\u1d25', u' ', u'\u3147', u'\u3155', u'\u1d25', u'\u3131', u'\u3158', u'\u3134', u'\u1d25', u'\u3147', u'\u3163', u'\u1d25', u' ', u'\u3147', u'\u3153', u'\u3144', u'\u1d25', u'\u3134', u'\u3161', u'\u3134', u'\u1d25', u' ', u'\u3131', u'\u3153', u'\u3145', u'\u1d25', u'\u3147', u'\u3163', u'\u1d25', u'\u3139', u'\u314f', u'\u1d25', u' ', u'\u314e', u'\u314f', u'\u1d25', u'\u3134', u'\u314f', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u3141', u'\u1d25', u'\u3147', u'\u3161', u'\u3139', u'\u1d25', u' ', u'\n', u'\u3147', u'\u3163', u'\u1d25', u'\u3139', u'\u315c', u'\u1d25', u'\u3147', u'\u3153', u'\u3146', u'\u1d25', u'\u3137', u'\u314f', u'\u1d25', u'.', u' ', u'\n', u' ', u' ', u'"', u'\u3147', u'\u3163', u'\u1d25', u' ', u'\u3141', u'\u314f', u'\u3139', u'\u1d25', u'\u3147', u'\u3161', u'\u3139', u'\u1d25', u' ', u'\u3137', u'\u3161', u'\u3139', u'\u1d25', u'\u3147', u'\u3153', u'\u1d25', u' ', u'\u3147', u'\u3157', u'\u1d25', u'\u3139', u'\u3161', u'\u1d25', u'\u3134', u'\u3163', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u' ', u'\u3141', u'\u3157', u'\u3141', u'\u1d25', u'\u3147', u'\u3163', u'\u1d25', u' ', u'\u3147', u'\u3153', u'\u1d25', u'\u3149', u'\u3163', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u1d25', u'\u3131', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u3139', u'\u1d25', u',', u' ', u'\n', u' ', u' ', u'"', u'\u3145', u'\u3157', u'\u1d25', u'\u3148', u'\u3153', u'\u1d25', u'\u3131', u'\u314f', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u1d25', u'\u3131', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u3139', u'\u1d25', u',', u' ', u'\n', u' ', u' ', u'"', u'\u3145', u'\u3157', u'\u1d25', u'\u3148', u'\u3153', u'\u1d25', u'\u3131', u'\u314f', u'\u1d25', u' ', u'\u3147', u'\u3153', u'\u1d25', u'\u3149', u'\u3163', u'\u1d25', u' ', u'\u3145', u'\u314f', u'\u1d25', u'\u3139', u'\u314f', u'\u3141', u'\u1d25', u'\u3147', u'\u3161', u'\u3139', u'\u1d25', u' ', u'\u3137', u'\u3161', u'\u3139', u'\u1d25', u'\u3147', u'\u3161', u'\u1d25', u'\u3134', u'\u3163', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u' ', u'\u3141', u'\u314f', u'\u3139', u'\u1d25', u'\u3147', u'\u3161', u'\u3139', u'\u1d25', u' ', u'\u3137', u'\u3161', u'\u3137', u'\u1d25', u'\u3131', u'\u3157', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u1d25', u'\u3131', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u3139', u'\u1d25', u',', u' ', u'\n', u' ', u' ', u'"', u'\u3145', u'\u3157', u'\u1d25', u'\u3148', u'\u3153', u'\u1d25', u'\u3131', u'\u314f', u'\u1d25', u' ', u'\u3147', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u1d25', u'\u3131', u'\u3163', u'\u1d25', u'\u3139', u'\u3161', u'\u3139', u'\u1d25', u',', u' ', u'\n', u' ', u' ', u'"', u'\u3145', u'\u3157', u'\u1d25', u'\u3148', u'\u3153', u'\u1d25', u'\u3131', u'\u314f', u'\u1d25', u' ', u'\u3147', u'\u3153', u'\u1d25', u'\u3149', u'\u3163', u'\u1d25', u' ', u'\u3145', u'\u314f', u'\u1d25', u'\u3139', u'\u314f', u'\u3141', u'\u1d25', u'\u3147', u'\u3161', u'\u3139', u'\u1d25', u' ', u'\u3137', u'\u3161', u'\u3139', u'\u1d25', u'\u3147', u'\u3153', u'\u1d25', u' ', u'\u3131', u'\u314f', u'\u1d25', u'\u3145', u'\u3163', u'\u1d25', u'\u3131']

-- RESULT --
누구 소리에 서 있을 것이로다."
  이어서 서울에 들어가니 어찌 이 말을 듣고 있으니 이 말을 듣고 있으니 이 말을 듣고 있었다. 
  이에 대답하기를, 
  "소유가 다시 이르기를, 
  "이제 이에 이르기를, 
  "소저가 어찌 이르기를, 
  "소저는 이 말을 듣고 이르기를, 
  "소저가 어찌 사람을 들어 가서 그 아름다운 일이었다. 
  이 말에 이르기를, 
  "소저가 이르기를, 
  "소저가 어찌 사람을 들으니 이 말을 듣고 이르기를, 
  "소저가 이르기를, 
  "소저가 어찌 사람을 들어 가시가 아니라 하오니 이 일은 이 몸이 어찌 가을을 
보내어 이름을 이루고 있었다. 
  이에 대답하기를, 
  "소유가 이르기를, 
  "이제 사람이 이르기를, 
  "소저가 이르기를, 
  "이제 이에 이르기를, 
  "소유는 이미 사람을 들으니 이 말을 듣고 이르기를, 
  "소저가 이르기를, 
  "소저가 어찌 사람을 들어 가시가 아니라 하오니 이 아니 어찌 이를 사람을 사랑하여 이 일을 
보내어 이름을 이루고 있었다. 
  이에 대답하기를, 
  "소유가 이르기를, 
  "이제 사람이 이르기를, 
  "소저가 이르기를, 
  "소저가 어찌 사람을 들어 가시가 아니라 하오니 이 일은 이 몸이 어찌 가을을 
보내어 이름을 이루고 있었다. 
  이에 대답하기를, 
  "소유가 이름을 들어 가시가 있으니 이 몸이 어찌 가히 여기를 들은 것이 없사오니 어찌 그 여관이 없는 것이라 하나 이 이름을 
이루었다. 
  "이 말을 들어 오르니 이 몸이 어찌 이르기를, 
  "소저가 이르기를, 
  "소저가 어찌 사람을 들으니 이 말을 듣고 이르기를, 
  "소저가 이르기를, 
  "소저가 어찌 사람을 들어 가시

In [ ]: