In [3]:
import pdb;
import scipy.misc as scimisc

from tkinter import *
from PIL import Image
from PIL import ImageTk

%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib.animation as animation
from PIL import Image

import MalmoPython
import os
import sys
import time
import random
import json
import numpy as np
import time
from IPython.display import clear_output,display
import logging
import math

This notebook is just used to craft the gym environment for Minecraft Malmo


In [4]:
import gym
import gym_minecraft
import itertools
import numpy as np
import tensorflow as tf
import tensorflow.contrib.layers as layers

import baselines.common.tf_util as U

from baselines import logger
from baselines import deepq
from baselines.deepq.replay_buffer import ReplayBuffer
from baselines.common.schedules import LinearSchedule

In [3]:
def preprocess(rgb_array,scale = 1/12):
    frame_shape = rgb_array.shape
    
    frame = np.array(rgb_array)
    gray_frame = np.dot(frame[...,:3],[0.299,0.587,0.114]).reshape((frame_shape[0],frame_shape[1]))
    smaller = scimisc.imresize(gray_frame,scale,mode='L').astype('float64')
    smaller = np.expand_dims(smaller,2) # convert to a 3D array of shape (height,width,grayscale)
    return smaller

In [4]:
def render(obs,root,canvas):
    obs = np.squeeze(obs,2)
    image = Image.fromarray(obs.astype('int8'),mode='L')
    photo = ImageTk.PhotoImage(image)
    root.one = photo
    canvas.delete("all")
    canvas.create_image(frame_height,frame_width, image=photo)
    root.update()

In [5]:
# Test

root = Tk()
root_frame = Frame(root)
canvas = Canvas(root_frame, borderwidth=0, highlightthickness=0, width=200, height=130, bg="black" )
root_frame.pack()
canvas.pack()

frame_height = 25
frame_width = 35


env = gym.make("MinecraftBasic-v0")
env.load_mission_file("./CliffWalking.xml")
env.init(videoResolution=[420,300],allowContinuousMovement=["move", "turn", "strafe"])


scale = 1/12 # scale image down by 1/12
newshape = (env.video_height*scale,env.video_width*scale,1) # dimension of 1 for grayscale
newshape = tuple(map(int,newshape))

# the pre processor will adjust the observation space therefore we will edit the property of the environment to take the pre processor into accoutn
env.observation_space = gym.spaces.Box(low=0, high=255,
shape=newshape)

done = False

for i in range(1000):
    try:
        env.reset()
        while True:
            action = env.action_space.sample()
            obs, reward, done, info = env.step(action)
            proc_obs = preprocess(obs)
            
            render(proc_obs,root_frame,canvas)

            if done:
                break
    except:
        root.destroy()
        env.close()
        raise
env.close()
root.destroy()


Agent missed 31 observation(s).
Agent missed 1 observation(s).
Agent missed 1 observation(s).
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-5-67e4ab745eee> in <module>()
     34             proc_obs = preprocess(obs)
     35 
---> 36             render(proc_obs,root_frame,canvas)
     37 
     38             if done:

<ipython-input-4-203edae5a7c5> in render(obs, root, canvas)
      5     root.one = photo
      6     canvas.delete("all")
----> 7     canvas.create_image(frame_height,frame_width, image=photo)
      8     root.update()

~/miniconda2/envs/casper/lib/python3.5/tkinter/__init__.py in create_image(self, *args, **kw)
   2335     def create_image(self, *args, **kw):
   2336         """Create image item with coordinates x1,y1."""
-> 2337         return self._create('image', args, kw)
   2338     def create_line(self, *args, **kw):
   2339         """Create line with coordinates x1,y1,...,xn,yn."""

~/miniconda2/envs/casper/lib/python3.5/tkinter/__init__.py in _create(self, itemType, args, kw)
   2326         return self.tk.getint(self.tk.call(
   2327             self._w, 'create', itemType,
-> 2328             *(args + self._options(cnf, kw))))
   2329     def create_arc(self, *args, **kw):
   2330         """Create arc shaped region with coordinates x1,y1,x2,y2."""

KeyboardInterrupt: 

In [8]:
def model(inpt, num_actions, scope, reuse=False):
    """This model takes as input an observation and returns values of all actions."""
    with tf.variable_scope(scope, reuse=reuse):
        out = inpt
        out = layers.fully_connected(out, num_outputs=64, activation_fn=tf.nn.tanh)
        out = layers.fully_connected(out, num_outputs=num_actions, activation_fn=None)
        return out
qmodel = deepq.models.cnn_to_mlp(
        convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
        hiddens=[256],
        dueling=True,
)

In [ ]:
U.reset()
with U.make_session(2):
    # Create the environment
    env = gym.make("MinecraftBasic-v0")
    env.init(videoResolution=[420,300],allowContinuousMovement=["move", "turn", "strafe"])
    env.load_mission_file("./CliffWalking.xml")
    scale = 1/12 # scale image down by 1/12
    newshape = (env.video_height*scale,env.video_width*scale,1) # dimension of 1 for grayscale
    newshape = tuple(map(int,newshape))

    # the pre processor will adjust the observation space therefore we will edit the property of the environment to take the pre processor into accoutn
    env.observation_space = gym.spaces.Box(low=0, high=255,
    shape=newshape)
    # Create all the functions necessary to train the model
    act, train, update_target, debug = deepq.build_train(
        make_obs_ph=lambda name: U.BatchInput(env.observation_space.shape, name=name),
        q_func=qmodel,
        num_actions=env.action_space.n,
        optimizer=tf.train.AdamOptimizer(learning_rate=5e-4),
    )
    # Create the replay buffer
    replay_buffer = ReplayBuffer(50000)
    # Create the schedule for exploration starting from 1 (every action is random) down to
    # 0.02 (98% of actions are selected according to values predicted by the model).
    exploration = LinearSchedule(schedule_timesteps=10000, initial_p=1.0, final_p=0.02)

    # Initialize the parameters and copy them to the target network.
    U.initialize()
    update_target()

    episode_rewards = [0.0]
    preobs = env.reset()
    obs = preprocess(preobs)
    for t in itertools.count():
        # Take action and update exploration to the newest value
        action = act(obs[None], update_eps=exploration.value(t))[0]
        new_preobs, rew, done, _ = env.step(action)
        new_obs = preprocess(new_preobs)
        # Store transition in the replay buffer.
        replay_buffer.add(obs, action, rew, new_obs, float(done))
        obs = new_obs

        episode_rewards[-1] += rew
        if done:
            obs = env.reset()
            episode_rewards.append(0)

        is_solved = t > 100 and np.mean(episode_rewards[-101:-1]) >= 200
        if is_solved:
            # Show off the result
            env.render()
        else:
            # Minimize the error in Bellman's equation on a batch sampled from replay buffer.
            if t > 1000:
                obses_t, actions, rewards, obses_tp1, dones = replay_buffer.sample(32)
                train(obses_t, actions, rewards, obses_tp1, dones, np.ones_like(rewards))
            # Update target network periodically.
            if t % 1000 == 0:
                update_target()

        if done and len(episode_rewards) % 10 == 0:
            logger.record_tabular("steps", t)
            logger.record_tabular("episodes", len(episode_rewards))
            logger.record_tabular("mean episode reward", round(np.mean(episode_rewards[-101:-1]), 1))
            logger.record_tabular("% time spent exploring", int(100 * exploration.value(t)))
            logger.dump_tabular()


WARNING:tensorflow:From /home/ubuntu/baselines/baselines/deepq/build_graph.py:366: arg_max (from tensorflow.python.ops.gen_math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use `argmax` instead

In [3]:
env = gym.make("MinecraftBasic-v0")
env.load_mission_file("./CliffWalking.xml")
env.init(videoResolution=[40,40])


#scale = 1/12 # scale image down by 1/12
#newshape = (env.video_height*scale,env.video_width*scale,1) # dimension of 1 for grayscale
#newshape = tuple(map(int,newshape))

# the pre processor will adjust the observation space therefore we will edit the property of the environment to take the pre processor into accoutn
#env.observation_space = gym.spaces.Box(low=0, high=255,
#shape=newshape)


Unknown commandhandler MissionQuit

Need to override env step to preprocess and calculate reward


In [ ]:


In [4]:


In [ ]:
act = deepq.learn(
        env,
        q_func=qmodel,
        lr=1e-4,
        max_timesteps=1000,
        buffer_size=10000,
        exploration_fraction=0.1,
        exploration_final_eps=0.01,
        train_freq=4,
        learning_starts=10000,
        target_network_update_freq=1000,
        gamma=0.99,
        prioritized_replay=True
)

    env.close()


WARNING:tensorflow:From /home/ubuntu/baselines/baselines/deepq/build_graph.py:366: arg_max (from tensorflow.python.ops.gen_math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use `argmax` instead

In [6]:
from MinecraftGym import MinecraftWrapper

In [14]:
env = gym.make("MinecraftCliffWalking1-v0")
env.init(videoResolution=[40,30],allowContinuousMovement=["move", "turn", "strafe"])

In [15]:
env2 = MinecraftWrapper(env,scale=1)

In [16]:
for i in range(1000):
    try:
        env.reset()
        while True:
            action = env.action_space.sample()
            obs, reward, done, info = env.step(action)
            #proc_obs = preprocess(obs)
            
            
            #render(proc_obs,root_frame,canvas)
            env.render()
            if done:
                break
    except:
        env.close()
        raise


Error starting mission: Failed to find an available client for this mission - tried all the clients in the supplied client pool.
Agent missed 1 observation(s).
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-16-7a0babea91e1> in <module>()
      1 for i in range(1000):
      2     try:
----> 3         env.reset()
      4         while True:
      5             action = env.action_space.sample()

~/miniconda2/envs/casper/lib/python3.5/site-packages/gym/core.py in reset(self)
    102             space.
    103         """
--> 104         return self._reset()
    105 
    106     def render(self, mode='human', close=False):

~/miniconda2/envs/casper/lib/python3.5/site-packages/gym_minecraft-0.0.2-py3.5.egg/gym_minecraft/envs/minecraft_env.py in _reset(self)
    250         world_state = self.agent_host.getWorldState()
    251         while not world_state.has_mission_begun:
--> 252             time.sleep(0.1)
    253             world_state = self.agent_host.getWorldState()
    254             for error in world_state.errors:

KeyboardInterrupt: 

In [ ]: