import os,random
os.environ["THEANO_FLAGS"] = "mode=FAST_RUN,device=gpu%d,floatX=float32"%(random.randint(0,3))
import seaborn as sns
from gym import envs
import kerlym

env = "Pong-v0"
env = envs.make(env)

agent = kerlym.agents.D2QN(env, 
                  epsilon_schedule=lambda episode,epsilon: epsilon,                
                  enable_plots = True, 
                  max_memory = 1000000, 
                  difference_obs = True,
                  #preprocessor = None,
                  preprocessor = kerlym.preproc.karpathy_preproc,
                  learning_rate = 1e-4)

Input Dim:  6400 [1, 1, 80, 80]
Output Actions:  [0, 1, 2, 3, 4, 5]
Layer (type)                       Output Shape        Param #     Connected to                     
input_1 (InputLayer)               (None, 6400)        0                                            
reshape_1 (Reshape)                (None, 1, 1, 80, 80)0           input_1[0][0]                    
timedistributed_1 (TimeDistributed)(None, 1, 16, 20, 201040        reshape_1[0][0]                  
timedistributed_2 (TimeDistributed)(None, 1, 32, 10, 108224        timedistributed_1[0][0]          
flatten_1 (Flatten)                (None, 3200)        0           timedistributed_2[0][0]          
dense_1 (Dense)                    (None, 256)         819456      flatten_1[0][0]                  
dense_2 (Dense)                    (None, 6)           1542        dense_1[0][0]                    
Total params: 830262

agent.learn(ipy_clear=True, max_episodes=100000, max_pathlength=200)

