In [1]:
import tensorflow as tf

참고 자료

(1) 보편적 Case

  • Generator를 사용
    • python api를 의존하기 때문에 병목이 있을 수 있음

In [2]:
def gen():
    for i in range(10):
        yield i

In [3]:
dataset = tf.data.Dataset.from_generator(gen, tf.float32)\
        .make_one_shot_iterator()\
        .get_next()

In [4]:
with tf.Session() as sess:
    _data = sess.run(dataset)
    print(_data)


0.0

In [5]:
with tf.Session() as sess:
    for _ in range(10):
        _data = sess.run(dataset)
        print(_data)


0.0
1.0
2.0
3.0
4.0
5.0
6.0
7.0
8.0
9.0

In [6]:
# End of sequence Error 발생
with tf.Session() as sess:
    for _ in range(12):
        _data = sess.run(dataset)
        print(_data)


0.0
1.0
2.0
3.0
4.0
5.0
6.0
7.0
8.0
9.0
---------------------------------------------------------------------------
OutOfRangeError                           Traceback (most recent call last)
/usr/local/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
   1321     try:
-> 1322       return fn(*args)
   1323     except errors.OpError as e:

/usr/local/lib/python3.6/site-packages/tensorflow/python/client/session.py in _run_fn(feed_dict, fetch_list, target_list, options, run_metadata)
   1306       return self._call_tf_sessionrun(
-> 1307           options, feed_dict, fetch_list, target_list, run_metadata)
   1308 

/usr/local/lib/python3.6/site-packages/tensorflow/python/client/session.py in _call_tf_sessionrun(self, options, feed_dict, fetch_list, target_list, run_metadata)
   1408           self._session, options, feed_dict, fetch_list, target_list,
-> 1409           run_metadata)
   1410     else:

OutOfRangeError: End of sequence
	 [[Node: IteratorGetNext = IteratorGetNext[output_shapes=[<unknown>], output_types=[DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](OneShotIterator)]]

During handling of the above exception, another exception occurred:

OutOfRangeError                           Traceback (most recent call last)
<ipython-input-6-600497cc9ec9> in <module>()
      1 with tf.Session() as sess:
      2     for _ in range(12):
----> 3         _data = sess.run(dataset)
      4         print(_data)

/usr/local/lib/python3.6/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
    898     try:
    899       result = self._run(None, fetches, feed_dict, options_ptr,
--> 900                          run_metadata_ptr)
    901       if run_metadata:
    902         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

/usr/local/lib/python3.6/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
   1133     if final_fetches or final_targets or (handle and feed_dict_tensor):
   1134       results = self._do_run(handle, final_targets, final_fetches,
-> 1135                              feed_dict_tensor, options, run_metadata)
   1136     else:
   1137       results = []

/usr/local/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
   1314     if handle is None:
   1315       return self._do_call(_run_fn, feeds, fetches, targets, options,
-> 1316                            run_metadata)
   1317     else:
   1318       return self._do_call(_prun_fn, handle, feeds, fetches)

/usr/local/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
   1333         except KeyError:
   1334           pass
-> 1335       raise type(e)(node_def, op, message)
   1336 
   1337   def _extend_graph(self):

OutOfRangeError: End of sequence
	 [[Node: IteratorGetNext = IteratorGetNext[output_shapes=[<unknown>], output_types=[DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](OneShotIterator)]]

Caused by op 'IteratorGetNext', defined at:
  File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/usr/local/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 486, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 112, in start
    self.asyncio_loop.run_forever()
  File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/asyncio/base_events.py", line 422, in run_forever
    self._run_once()
  File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/asyncio/base_events.py", line 1432, in _run_once
    handle._run()
  File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/usr/local/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 102, in _handle_events
    handler_func(fileobj, events)
  File "/usr/local/lib/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/usr/local/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/usr/local/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/usr/local/lib/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/usr/local/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/usr/local/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/usr/local/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/usr/local/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/usr/local/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/usr/local/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/usr/local/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2903, in run_ast_nodes
    if self.run_code(code, result):
  File "/usr/local/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-3-3149864e765b>", line 1, in <module>
    dataset = tf.data.Dataset.from_generator(gen, tf.float32)        .make_one_shot_iterator()        .get_next()
  File "/usr/local/lib/python3.6/site-packages/tensorflow/python/data/ops/iterator_ops.py", line 370, in get_next
    name=name)), self._output_types,
  File "/usr/local/lib/python3.6/site-packages/tensorflow/python/ops/gen_dataset_ops.py", line 1466, in iterator_get_next
    output_shapes=output_shapes, name=name)
  File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3392, in create_op
    op_def=op_def)
  File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1718, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

OutOfRangeError (see above for traceback): End of sequence
	 [[Node: IteratorGetNext = IteratorGetNext[output_shapes=[<unknown>], output_types=[DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](OneShotIterator)]]
  • generator로 label, feature까지 출력하고 싶다면

In [8]:
def gen():
    for i, j in zip(range(10, 20), range(10)):
        yield (i, j)

In [9]:
dataset = tf.data.Dataset.from_generator(gen, (tf.float32, tf.float32))\
        .make_one_shot_iterator()\
        .get_next()

In [12]:
with tf.Session() as sess:
    for _ in range(10):
        _label, _feature = sess.run(dataset)
        print(_label, _feature)


10.0 0.0
11.0 1.0
12.0 2.0
13.0 3.0
14.0 4.0
15.0 5.0
16.0 6.0
17.0 7.0
18.0 8.0
19.0 9.0

Minibatch를 하고 싶다면

  • shuffle한 후, batch 설정

In [13]:
def gen():
    for i, j in zip(range(10, 1010), range(1000)):
        yield (i, j)

In [18]:
dataset = tf.data.Dataset.from_generator(gen, (tf.float32, tf.float32))\
        .shuffle(7777)\
        .batch(20)\
        .make_one_shot_iterator()\
        .get_next()

In [22]:
with tf.Session() as sess:
    for _ in range(10):
        _label, _feature = sess.run(dataset)
        print(_label, _feature)


[ 65. 833. 479. 731. 342. 152. 957. 600.  66. 391. 978. 239. 658. 287.
 928.  23. 415. 509. 726. 741.] [ 55. 823. 469. 721. 332. 142. 947. 590.  56. 381. 968. 229. 648. 277.
 918.  13. 405. 499. 716. 731.]
[365. 390. 249. 458. 771. 173. 611. 721. 631. 862. 673. 129. 929. 813.
 569. 892. 369. 503. 702. 545.] [355. 380. 239. 448. 761. 163. 601. 711. 621. 852. 663. 119. 919. 803.
 559. 882. 359. 493. 692. 535.]
[802. 614. 719. 266. 416. 122.  71. 678. 242. 883. 123. 115. 106. 424.
 517.  45. 608. 487. 321. 570.] [792. 604. 709. 256. 406. 112.  61. 668. 232. 873. 113. 105.  96. 414.
 507.  35. 598. 477. 311. 560.]
[330. 399. 803. 896. 627. 230. 316. 694. 603.  20. 350. 336. 297. 299.
  38.  30. 531. 810. 786. 615.] [320. 389. 793. 886. 617. 220. 306. 684. 593.  10. 340. 326. 287. 289.
  28.  20. 521. 800. 776. 605.]
[465. 934. 947. 235. 144. 711. 591. 992. 441. 843. 732. 687. 853. 301.
 153. 693. 196.  59. 548. 984.] [455. 924. 937. 225. 134. 701. 581. 982. 431. 833. 722. 677. 843. 291.
 143. 683. 186.  49. 538. 974.]
[ 389.  527.  240.  618.   75.  571.  363.  411.   58.  904.  586.  562.
  499.  515.  283.  915.  130.   95. 1007.  326.] [379. 517. 230. 608.  65. 561. 353. 401.  48. 894. 576. 552. 489. 505.
 273. 905. 120.  85. 997. 316.]
[733. 670.  47. 851. 902. 831. 935. 141. 970. 312. 804. 779. 983. 362.
 135. 112. 794. 606. 838. 438.] [723. 660.  37. 841. 892. 821. 925. 131. 960. 302. 794. 769. 973. 352.
 125. 102. 784. 596. 828. 428.]
[414. 762. 576. 659. 213. 367. 736. 542. 377. 832. 908. 341. 429. 462.
  93. 824. 195. 535. 241. 214.] [404. 752. 566. 649. 203. 357. 726. 532. 367. 822. 898. 331. 419. 452.
  83. 814. 185. 525. 231. 204.]
[623. 858. 567. 357. 307. 275. 686. 188. 997. 692. 601. 626. 302. 345.
 560. 181. 138. 248. 990. 712.] [613. 848. 557. 347. 297. 265. 676. 178. 987. 682. 591. 616. 292. 335.
 550. 171. 128. 238. 980. 702.]
[706. 856. 782. 556. 641. 243. 198. 575. 889. 875. 514. 383. 559. 557.
 537. 511. 890. 529. 448. 579.] [696. 846. 772. 546. 631. 233. 188. 565. 879. 865. 504. 373. 549. 547.
 527. 501. 880. 519. 438. 569.]

(2) TextLineDataset

  • 병목을 해결 가능

In [24]:
dataset = tf.data.TextLineDataset("./test_data.csv")\
        .make_one_shot_iterator()\
        .get_next()

In [44]:
with tf.Session() as sess:
    _data = sess.run(dataset)
    print(_data)


b'1,1,2,3,4,5,6,7,8,9'
  • b'1,1,2,3,4,5,6,7,8,9' : decoding 필요

In [45]:
dataset = tf.data.TextLineDataset("./test_data.csv")\
        .make_one_shot_iterator()\
        .get_next()

In [52]:
lines = tf.decode_csv(dataset, record_defaults=[[0]]*10)

In [53]:
feature = tf.stack(lines[1:]) #, axis=1)

In [54]:
label = lines[0]

In [56]:
with tf.Session() as sess:
    _fea, _lab = sess.run([feature, label])
    print(_lab, _fea)


1 [1 2 3 4 5 6 7 8 9]

In [142]:
dataset = tf.data.TextLineDataset("./test_data.csv")\
        .batch(2)\
        .repeat(999999)\
        .make_one_shot_iterator()\
        .get_next()

In [143]:
lines = tf.decode_csv(dataset, record_defaults=[[0]]*10)
feature = tf.stack(lines[1:], axis=1)
label = tf.expand_dims(lines[0], axis=-1)

feature = tf.cast(feature, tf.float32)
label = tf.cast(label, tf.float32)
# float형으로 정의해야 이상없이 연산이 됨

In [144]:
with tf.Session() as sess:
    _fea, _lab = sess.run([feature, label])
    for f, l in zip(_fea, _lab):
        print(f, l)


[1. 2. 3. 4. 5. 6. 7. 8. 9.] [1.]
[0. 0. 1. 2. 3. 4. 5. 6. 7.] [1.]

Modeling


In [145]:
layer1 = tf.layers.dense(feature, units=9, activation=tf.nn.relu)
layer2 = tf.layers.dense(layer1, units=9, activation=tf.nn.relu)
layer3 = tf.layers.dense(layer2, units=9, activation=tf.nn.relu)
layer4 = tf.layers.dense(layer3, units=9, activation=tf.nn.relu)
out = tf.layers.dense(layer4, units=1)

In [146]:
print("label's shape {}".format(label))
# label's shape (?,) : [1, 2, 3, 4, 5, 6]
# int면 계산이 안됨


label's shape Tensor("Cast_13:0", shape=(?, 1), dtype=float32)

In [147]:
print("out's shape {}".format(out))
# [[1], [2], [3], [4], [5], [6]]


out's shape Tensor("dense_54/BiasAdd:0", shape=(?, 1), dtype=float32)

loss, Optimizer 정의


In [148]:
loss = tf.losses.sigmoid_cross_entropy(label, out)
  • Shapes (?, 1) and (?,) are incompatible error
    • shape를 맞춰주기 : tf.expand_dims 사용
  • Value passed to parameter 'x' has DataType int32 not in list of allowed values error
    • value의 type을 float32로 바꾸기 : tf.cast 사용
  • Attempting to use uninitialized value accuracy/total error
    • accuracy 관련 tf.local_variables_initializer() 실행

In [155]:
train_op = tf.train.GradientDescentOptimizer(1e-2).minimize(loss)

pred = tf.nn.sigmoid(out)
accuracy = tf.metrics.accuracy(label, tf.round(pred))

In [156]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())
    for i in range(30):
        _, _loss, _acc = sess.run([train_op, loss, accuracy])
        print("step: {}, loss: {}, accuracy: {}".format(i, _loss, _acc))


step: 0, loss: 0.46937721967697144, accuracy: (0.0, 1.0)
step: 1, loss: 0.1210394948720932, accuracy: (1.0, 1.0)
step: 2, loss: 0.44066905975341797, accuracy: (1.0, 1.0)
step: 3, loss: 0.1059541404247284, accuracy: (1.0, 1.0)
step: 4, loss: 0.4197200536727905, accuracy: (1.0, 1.0)
step: 5, loss: 0.09589895606040955, accuracy: (1.0, 1.0)
step: 6, loss: 0.40277212858200073, accuracy: (1.0, 1.0)
step: 7, loss: 0.08771941065788269, accuracy: (1.0, 1.0)
step: 8, loss: 0.38517528772354126, accuracy: (1.0, 1.0)
step: 9, loss: 0.08122526109218597, accuracy: (1.0, 1.0)
step: 10, loss: 0.36560487747192383, accuracy: (1.0, 1.0)
step: 11, loss: 0.07562658190727234, accuracy: (1.0, 1.0)
step: 12, loss: 0.34701573848724365, accuracy: (1.0, 1.0)
step: 13, loss: 0.07004612684249878, accuracy: (1.0, 1.0)
step: 14, loss: 0.3287907838821411, accuracy: (1.0, 1.0)
step: 15, loss: 0.30994081497192383, accuracy: (1.0, 1.0)
step: 16, loss: 0.06589339673519135, accuracy: (1.0, 1.0)
step: 17, loss: 0.059850625693798065, accuracy: (1.0, 1.0)
step: 18, loss: 0.05646444857120514, accuracy: (1.0, 1.0)
step: 19, loss: 0.05126112699508667, accuracy: (1.0, 1.0)
step: 20, loss: 0.29497042298316956, accuracy: (1.0, 1.0)
step: 21, loss: 0.049495577812194824, accuracy: (1.0, 1.0)
step: 22, loss: 0.2776646614074707, accuracy: (1.0, 1.0)
step: 23, loss: 0.046578533947467804, accuracy: (1.0, 1.0)
step: 24, loss: 0.2614331841468811, accuracy: (1.0, 1.0)
step: 25, loss: 0.04399692639708519, accuracy: (1.0, 1.0)
step: 26, loss: 0.24561378359794617, accuracy: (1.0, 1.0)
step: 27, loss: 0.04194994270801544, accuracy: (1.0, 1.0)
step: 28, loss: 0.2311621904373169, accuracy: (1.0, 1.0)
step: 29, loss: 0.03943498805165291, accuracy: (1.0, 1.0)

Accuracy

TFRecord

  • read, write 속도가 빠르게!