In [1]:
%matplotlib inline
import os,random
os.environ["THEANO_FLAGS"] = "mode=FAST_RUN,device=gpu%d,floatX=float32"%(random.randint(0,3))
import seaborn as sns
from gym import envs
import kerlym


/usr/lib/python2.7/dist-packages/matplotlib/__init__.py:874: UserWarning: axes.color_cycle is deprecated and replaced with axes.prop_cycle; please use the latter.
  warnings.warn(self.msg_depr % (key, alt_key))
Using Theano backend.
ERROR (theano.sandbox.cuda): ERROR: Not using GPU. Initialisation of device 1 failed:
Bad device number 1. Only 1 devices available.
ERROR:theano.sandbox.cuda:ERROR: Not using GPU. Initialisation of device 1 failed:
Bad device number 1. Only 1 devices available.
[2016-06-02 17:17:17,781] ERROR: Not using GPU. Initialisation of device 1 failed:
Bad device number 1. Only 1 devices available.
Using gpu device 0: GeForce GT 750M (CNMeM is disabled, cuDNN 5004)

In [2]:
env = "Pong-v0"
env = envs.make(env)


INFO:gym.envs.registration:Making new env: Pong-v0
[2016-06-02 17:17:20,599] Making new env: Pong-v0

In [3]:
agent = kerlym.agents.D2QN(env, 
                  nframes=1, 
                  epsilon=0.1, 
                  discount=0.99, 
                  modelfactory=kerlym.networks.simple_cnn,
                  epsilon_schedule=lambda episode,epsilon: epsilon,                
                  update_nsamp=1000, 
                  batch_size=32, 
                  dropout=0.5,
                  timesteps_per_batch=200, 
                  stats_rate=10,
                  enable_plots = True, 
                  max_memory = 1000000, 
                  difference_obs = True,
                  #preprocessor = None,
                  preprocessor = kerlym.preproc.karpathy_preproc,
                  learning_rate = 1e-4)


Input Dim:  6400 [1, 1, 80, 80]
Output Actions:  [0, 1, 2, 3, 4, 5]
____________________________________________________________________________________________________
Layer (type)                       Output Shape        Param #     Connected to                     
====================================================================================================
input_1 (InputLayer)               (None, 6400)        0                                            
____________________________________________________________________________________________________
reshape_1 (Reshape)                (None, 1, 1, 80, 80)0           input_1[0][0]                    
____________________________________________________________________________________________________
timedistributed_1 (TimeDistributed)(None, 1, 16, 20, 201040        reshape_1[0][0]                  
____________________________________________________________________________________________________
timedistributed_2 (TimeDistributed)(None, 1, 32, 10, 108224        timedistributed_1[0][0]          
____________________________________________________________________________________________________
flatten_1 (Flatten)                (None, 3200)        0           timedistributed_2[0][0]          
____________________________________________________________________________________________________
dense_1 (Dense)                    (None, 256)         819456      flatten_1[0][0]                  
____________________________________________________________________________________________________
dense_2 (Dense)                    (None, 6)           1542        dense_1[0][0]                    
====================================================================================================
Total params: 830262
____________________________________________________________________________________________________
None

In [4]:
agent.learn(ipy_clear=True, max_episodes=100000, max_pathlength=200)


 * Episode 00000000	Frame 00000199	Samples: 00000199	Terminal: 00000001	Reward: -3	Epsilon: 0.100000
 * Episode 00000001	Frame 00000199	Samples: 00000398	Terminal: 00000002	Reward: -3	Epsilon: 0.100000
 * Episode 00000002	Frame 00000199	Samples: 00000597	Terminal: 00000003	Reward: -3	Epsilon: 0.100000
 * Episode 00000003	Frame 00000199	Samples: 00000796	Terminal: 00000004	Reward: -2	Epsilon: 0.100000
 * Episode 00000004	Frame 00000199	Samples: 00000995	Terminal: 00000005	Reward: -3	Epsilon: 0.100000
 * Episode 00000005	Frame 00000199	Samples: 00001194	Terminal: 00000006	Reward: -3	Epsilon: 0.100000
 * Episode 00000006	Frame 00000199	Samples: 00001393	Terminal: 00000007	Reward: -1	Epsilon: 0.100000
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-4-a1f21dea752f> in <module>()
----> 1 agent.learn(ipy_clear=True, max_episodes=100000, max_pathlength=200)
      2 

/home/oshea/src/kerlym/kerlym/dqn.pyc in learn(self, ipy_clear, max_episodes, max_pathlength)
    228 
    229                 do_update = (i%self.timesteps_per_batch==self.timesteps_per_batch-1)
--> 230                 self.update_train( obs, action, reward, new_obs, done, do_update )
    231 
    232                 obs[:,:] = new_obs[:,:]

/home/oshea/src/kerlym/kerlym/dqn.pyc in update_train(self, p_state, action, p_reward, new_state, terminal, update_model)
     94             self.model_updates += 1
     95 
---> 96             X_train, y_train = self.process_minibatch(terminal)
     97             hist = self.get_model(greedy=False).fit(X_train,
     98                            y_train,

/home/oshea/src/kerlym/kerlym/dqn.pyc in process_minibatch(self, terminal_rewards)
    138                 old_qval =self.get_model(greedy=True).predict(old_state_m,
    139                                               batch_size=1,
--> 140                                               verbose=0)
    141 
    142                 input2 = new_state_m

/home/oshea/src/keras/keras/engine/training.pyc in predict(self, x, batch_size, verbose)
   1101         f = self.predict_function
   1102         return self._predict_loop(f, ins,
-> 1103                                   batch_size=batch_size, verbose=verbose)
   1104 
   1105     def train_on_batch(self, x, y,

/home/oshea/src/keras/keras/engine/training.pyc in _predict_loop(self, f, ins, batch_size, verbose)
    821                 ins_batch = slice_X(ins, batch_ids)
    822 
--> 823             batch_outs = f(ins_batch)
    824             if type(batch_outs) != list:
    825                 batch_outs = [batch_outs]

/home/oshea/src/keras/keras/backend/theano_backend.pyc in __call__(self, inputs)
    491     def __call__(self, inputs):
    492         assert type(inputs) in {list, tuple}
--> 493         return self.function(*inputs)
    494 
    495 

/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
    893         try:
    894             outputs =\
--> 895                 self.fn() if output_subset is None else\
    896                 self.fn(output_subset=output_subset)
    897         except Exception:

KeyboardInterrupt: 

In [ ]: