notebook.community

Edit and run



In [23]:

    
import tensorflow as tf
import numpy as np
import gym



In [24]:

    
import matplotlib.pyplot as plt
%matplotlib inline



In [25]:

    
sess = tf.InteractiveSession()









    



Exception AssertionError: AssertionError() in <bound method InteractiveSession.__del__ of <tensorflow.python.client.session.InteractiveSession object at 0x7f66fc3fe190>> ignored



In [26]:

    
e = gym.make('Breakout-v0')
ob = e.reset()









    



[2016-05-19 17:09:21,721] Making new env: Breakout-v0



In [27]:

    
print ob.shape, ob.dtype
plt.imshow(ob)









    



(210, 160, 3) uint8






    Out[27]:





<matplotlib.image.AxesImage at 0x7f66fc382150>

Converting to Grayscale



In [34]:

    
# reshape is needed so we can use plt.imshow
rgb_to_gray = tf.reshape(tf.image.rgb_to_grayscale(ob), [ob.shape[0], ob.shape[1]])



In [35]:

    
gray_ob = rgb_to_gray.eval()
gray_ob.shape, gray_ob.dtype









    Out[35]:





((210, 160), dtype('uint8'))



In [36]:

    
plt.gray()
plt.imshow(gray_ob)









    Out[36]:





<matplotlib.image.AxesImage at 0x7f66e008fc50>

Resizing Images



In [39]:

    
# let's get the current ratio
from __future__ import division
ratio = ob.shape[0] / ob.shape[1]
print ratio



In [87]:

    
def resize_op(img, h, w):
    resized_ob = tf.image.resize_bilinear(tf.reshape(ob, [1, ob.shape[0], ob.shape[1], ob.shape[2]]), [h,w ])
    return tf.reshape(resized_ob, [h, w, 3])



In [88]:

    
# 84 x 84 is the size of the img used in the original DQN paper, 21168 or 7056 (grayscale) points per img
plt.imshow(resize_op(ob, 84, 84).eval())









    Out[88]:





<matplotlib.image.AxesImage at 0x7f66d00740d0>



In [111]:

    
w = 40
h = int(w * ratio)
rgb_pixels = 3 * h * w
grayscale_pixels = h * w
print 'height =', h, 'width =', w
print 'pixels with rgb =', rgb_pixels, 'grayscale =', grayscale_pixels
plt.imshow(resize_op(ob, h, w).eval())









    



height = 52 width = 40
pixels with rgb = 6240 grayscale = 2080






    Out[111]:





<matplotlib.image.AxesImage at 0x7f66aca028d0>

Scaling Pixel Values



In [92]:

    
scaled_ob = tf.image.convert_image_dtype(ob, tf.float32, saturate=True).eval()
print scaled_ob.min(), scaled_ob.max()









    



0.0 0.784314



In [75]:

    
np.unique(ob)









    Out[75]:





array([  0,  42,  48,  58,  66,  72, 108, 122, 130, 142, 158, 160, 162,
       180, 198, 200], dtype=uint8)



In [77]:

    
np.unique(gray_ob)









    Out[77]:





array([  0,  84, 110, 123, 127, 129, 131, 142, 148], dtype=uint8)



In [94]:

    
np.unique(scaled_ob)









    Out[94]:





array([ 0.        ,  0.16470589,  0.18823531,  0.227451  ,  0.25882354,
        0.28235295,  0.42352945,  0.4784314 ,  0.50980395,  0.55686277,
        0.61960787,  0.627451  ,  0.63529414,  0.70588237,  0.77647066,
        0.7843138 ], dtype=float32)

Putting it Together - Making a Image Preprocessing Pipeline



In [97]:

    
# assumes img.shape is (batch_size, h, w, c)
def img_preprocess(img, h, w):
    img = tf.convert_to_tensor(img)
    rgb2y = tf.image.rgb_to_grayscale(img)
    resized = tf.image.resize_bilinear(rgb2y, [h, w])
    return resized



In [105]:

    
obs = np.reshape(ob, [1] + list(ob.shape))
obs.shape









    Out[105]:





(1, 210, 160, 3)



In [112]:

    
preprocessed_img = img_preprocess(obs, 84, 84).eval()



In [113]:

    
plt.gray()
plt.imshow(preprocessed_img.reshape(84, 84))









    Out[113]:





<matplotlib.image.AxesImage at 0x7f66ac95ae10>

We didn't do scaling in the pipeline here if we did we wouldn't be able to show the image. But if we're feeding the input into a neural network we might scale it.



In [ ]: