In [1]:
    
import gym
import keras
import numpy as np
    
    
In [2]:
    
# Hyperparameters
H = 200 # number of hidden layer neurons
batch_size = 10 # every how many episodes to do a param update?
learning_rate = 1e-3
gamma = 0.99 # discount factor for reward
decay_rate = 0.99 # decay factor for RMSProp leaky sum of grad^2
resume = False # resume from previous checkpoint?
render = False
D = 80 * 80 # input dimensionality: 80x80 grid
running_reward = 21.0 # Default start at zero
D = 80 * 80 # input dimensionality: 80x80 grid
def prepro(I):
  """ prepro 210x160x3 uint8 frame into 6400 (80x80) 1D float vector """
  I = I[35:195] # crop
  I = I[::2,::2,0] # downsample by factor of 2
  I[I == 144] = 0 # erase background (background type 1)
  I[I == 109] = 0 # erase background (background type 2)
  I[I != 0] = 1 # everything else (paddles, ball) just set to 1
  return I.astype(np.float).ravel()
def discount_rewards(r):
  """ take 1D float array of rewards and compute discounted reward """
  discounted_r = np.zeros_like(r)
  running_add = 0
  for t in reversed(xrange(0, r.size)):
    if r[t] != 0: running_add = 0 # reset the sum, since this was a game boundary (pong specific!)
    running_add = running_add * gamma + r[t]
    discounted_r[t] = running_add
  return discounted_r
    
In [3]:
    
def prepro(I):
    """ prepro 210x160x3 uint8 frame into 6400 (80x80) 1D float vector """
    I = I[35:195] # crop
    I = I[::2,::2,0] # downsample by factor of 2
    I[I == 144] = 0 # erase background (background type 1)
    I[I == 109] = 0 # erase background (background type 2)
    I[I != 0] = 1 # everything else (paddles, ball) just set to 1
    return I.astype(np.float32)
def get_dense_model():
    """Make keras model"""
    learning_rate=1e-4
    inp = Input(shape=(80*80,))
    h = Dense(200, activation='relu')(inp)
    out = Dense(1, activation='sigmoid')(h)
    model = Model(inp, out)
    optim = RMSprop(learning_rate)
    model.compile(optim, 'binary_crossentropy')
    try:
        model.load_weights('mod_weights_binary.h5')
        print('weights loaded')
    except:
        pass
    return model
    
In [4]:
    
from keras.models import Sequential
from keras.layers import Dense, Activation
kmodel = Sequential()
kmodel.add(Dense(output_dim=200,input_dim=6400, activation="relu"))
kmodel.add(Dense(1, activation="sigmoid"))
kmodel.compile(loss='binary_crossentropy', 
               optimizer=keras.optimizers.RMSprop(lr=0.01, rho=0.9, epsilon=1e-08, decay=0.0))
kmodel.save("model/model.h5")
from keras.models import load_model
def get_dense_model():
    return load_model('model/model.h5')
    
    
In [5]:
    
game = "Pong-v0"
def run_parallel_episodes(pp,n_episodes = 3):
    X,ACTION,REWARD = [],[],[]
    
    outs=pp.map(run_episodes,n_episodes*[0])
    for o in outs:
        X.extend(o[0])
        ACTION.extend(o[1])
        REWARD.extend(o[2])
        
    X = np.vstack(X)
    ACTION = np.vstack(ACTION)
    REWARD = np.vstack(REWARD)
    return X,ACTION,REWARD
def run_episodes(thr = [1,2,3]):
    n_episodes=3
    D=80*80
    model=get_dense_model()
    env=gym.make(game)
    observation = env.reset()
    prev_x = None # used in computing the difference frame
    X,ACTION,REWARD = [],[],[]
    running_reward = None
    reward_sum = 0
    episode_number = 0
    loc_len=0
    while True:
        # preprocess the observation, set input to network to be difference image
        cur_x = prepro(observation)
        x = cur_x - prev_x if prev_x is not None else np.zeros(D)
        prev_x = cur_x
        # forward the policy network and sample an action from the returned probability
        aprob = model.predict(x.reshape((1, -1)))
        action = 2 if np.random.uniform() < aprob else 3 # roll the dice!
        # record various intermediates (needed later for backprop)
        X.append(x.reshape((1, -1))) # observation
        # y = 1 if action == 2 else 0 # a "fake label" giving the action chosen
        ACTION.append(1 if action == 2 else 0) # a "fake label" giving the action chosen
        # step the environment and get new measurements
        observation, reward, done, info = env.step(action)
        reward_sum += reward
        REWARD.append(reward) # record reward (has to be done after we call step() to get reward for previous action)
        loc_len+=1
        if done: # an episode finished (one player has reached a score of 21)
            episode_number += 1
            #print(episode_number,reward_sum,loc_len)
            reward_sum=0
            loc_len=0
            if episode_number>(n_episodes-1):
                X = np.vstack(X)
                ACTION = np.array(ACTION)
                REWARD =  np.array(REWARD)
                return X,ACTION,REWARD
            observation = env.reset()
    
In [6]:
    
class threadsafe_iter:
    """Takes an iterator/generator and makes it thread-safe by
    serializing call to the `next` method of given iterator/generator.
    """
    def __init__(self, it):
        self.it = it
        self.lock = threading.Lock()
    def __iter__(self):
        return self
    def next(self):
        with self.lock:
            return self.it.next()
def threadsafe_generator(f):
    """A decorator that takes a generator function and makes it thread-safe.
    """
    def g(*a, **kw):
        return threadsafe_iter(f(*a, **kw))
    return g
    
In [7]:
    
import threading
@threadsafe_generator
def game_generator():
    while True:
        obs = run_episodes()
        yield obs
    
In [8]:
    
import datetime
    
In [9]:
    
start = datetime.datetime.now()
kmodel.fit_generator(generator = game_generator(),
                    steps_per_epoch = 10,
                    epochs=1,
                    workers = 1,
                    verbose=2,
                    pickle_safe=False)
print  datetime.datetime.now() - start
#kmodel.save("model/model.h5")
    
    
    
    
In [10]:
    
start = datetime.datetime.now()
kmodel.fit_generator(generator = game_generator(),
                    steps_per_epoch = 10,
                     epochs=1,
                    workers = 5,
                    verbose=2,
                    pickle_safe=True)
print  datetime.datetime.now() - start
#kmodel.save("model/model.h5")