In [8]:
from pg_function_approx_actor_critic_multenv import *

In [9]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import numpy as np


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

In [10]:
car1 = mountain_car(init_alpha= 1e-5, constant_alpha= False, lambda_=0.5, N_0= 50, random_init_theta=False, environment = 'MountainCar-v0',algorithm = 'QAC')


[2016-05-30 12:11:20,923] Making new env: MountainCar-v0
N_0 50
init alpha 1e-05
Constant Alpha False
lambda 0.5
using environment MountainCar-v0
tile resolution 10

In [14]:
car1.train(iter=1000, dataname = 'qac_mountain_car', save = True)


0 EPISODE #1.0
with a exploration of 100.0%
and learning rate of 1e-05
lasted 13955 steps
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-14-92c82336a415> in <module>()
----> 1 car1.train(iter=1000, dataname = 'qac_mountain_car', save = True)

/home/frederik/Dokumente/DeepRL/PolicyGradient/pg_function_approx_actor_critic_multenv.pyc in train(self, iter, dataname, save)
    291                 self.epsilon = 0.
    292                 limit = 10000
--> 293                 det_episode = self.run_episode(limit=limit)
    294                 if det_episode == []:
    295                     len_episode = limit

/home/frederik/Dokumente/DeepRL/PolicyGradient/pg_function_approx_actor_critic_multenv.pyc in run_episode(self, enable_render, limit)
    218             count += 1
    219 
--> 220             action = self.policy(state, mode=self.policy_mode)
    221             state, reward, done, info = self.env.step(action)
    222             episode.append((state, action, reward))

/home/frederik/Dokumente/DeepRL/PolicyGradient/pg_function_approx_actor_critic_multenv.pyc in policy(self, state, mode)
    191         explore = bool(np.random.choice([1,0],p=[self.epsilon, 1-self.epsilon]))
    192 
--> 193         features = self.get_full_feature(state)
    194         # print(explore, features, end="")
    195         if mode=='deterministic' and not explore:

/home/frederik/Dokumente/DeepRL/PolicyGradient/pg_function_approx_actor_critic_multenv.pyc in get_full_feature(self, state)
    153     def get_full_feature(self,state):
    154 
--> 155         flatgrid = self.get_tile_feature(state)
    156         length_flatgrid = flatgrid.shape[0]
    157 

/home/frederik/Dokumente/DeepRL/PolicyGradient/pg_function_approx_actor_critic_multenv.pyc in get_tile_feature(self, state)
    123         ind = tuple(ind)
    124 
--> 125         grid = np.zeros(np.ones(obs_dim)*self.tile_resolution)
    126         try:
    127             grid[ind] = 1

KeyboardInterrupt: 

In [13]:
import matplotlib.pyplot as plt
%matplotlib
car1 = car1 = mountain_car(init_alpha= 1e-5, constant_alpha= False, lambda_=0.5, N_0= 50, random_init_theta=False, environment = 'MountainCar-v0',algorithm = 'QAC')
car1.loaddata('qac_mountain_car')
car1.plot_q_function()


[2016-05-30 13:50:18,044] Making new env: MountainCar-v0
Using matplotlib backend: Qt4Agg
N_0 50
init alpha 1e-05
Constant Alpha False
lambda 0.5
using environment MountainCar-v0
tile resolution 10
[  0.00000000e+00   5.00063535e-03   2.90628348e-02  -3.92683351e-03
   8.76296037e-02  -9.25755194e-01  -4.63168370e-01  -3.77845830e-02
   0.00000000e+00   0.00000000e+00   7.27311862e-05   2.98700722e-02
   7.63248190e-02   2.03173811e+00  -1.85410842e+00  -6.39083465e-02
   4.03378345e+00  -4.97416423e-01  -1.84833676e-02   0.00000000e+00
   1.67671300e-03   9.90697329e-02   7.48171649e-01   2.81695051e+00
   1.36631574e+01   1.91880837e+01   1.92151495e+01  -2.37152516e-01
  -3.21189199e-01   0.00000000e+00   8.16940189e-03   1.91256208e-01
   1.39486385e+00   7.05881000e+00   1.12425145e+02   2.79741218e+01
   1.41943630e+01  -3.50569279e-01  -4.31897245e-01  -1.39910379e-03
   4.41510690e-03   2.16659262e-01   1.79409736e-01   3.03490620e+00
  -2.19609820e+00   1.67071900e+02   3.93444488e+01  -1.62382906e-01
  -3.31849674e-01  -1.53676383e-03   1.37637217e-05   8.69332876e-02
   1.95465815e-01   1.11455175e-01   3.78401378e-02  -2.68855021e-01
  -1.24660845e-01  -2.57843303e-01  -1.59098055e-01  -6.96755250e-05
   0.00000000e+00   5.32145446e-03   9.46337876e-02   9.77730182e-02
  -2.53761135e-02  -9.66607249e-02  -2.48373394e-01  -2.42920522e-01
  -2.02484640e-02   0.00000000e+00   0.00000000e+00   0.00000000e+00
   9.16265490e-03   4.87618783e-02  -1.03182838e-02  -1.66714070e-01
  -1.86022606e-01  -4.17925565e-02  -2.51818102e-05   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   1.33502276e-03
  -7.75606709e-03  -3.29028152e-02  -1.76662084e-02  -3.29990784e-04
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00  -5.08949964e-04  -3.31302576e-04
   3.10559953e-06   1.14423014e-07   0.00000000e+00   0.00000000e+00
   0.00000000e+00  -1.99324530e-04   1.33405776e-02   6.21539339e-05
   2.78724211e-03  -5.32143187e-01  -5.78965715e-01  -9.13603745e-03
   0.00000000e+00   0.00000000e+00   0.00000000e+00   1.51251271e-03
   2.80931672e-02   4.87174802e-02  -8.36395527e+00   3.01823434e+00
   2.21907163e+00  -4.33257971e-01  -3.75869347e-02   0.00000000e+00
   0.00000000e+00   1.37078937e-02   1.21186679e-01   1.00730170e+00
   3.57063861e+00   5.49105183e+00   6.61752018e+00   4.74282165e+00
  -4.64913284e-01  -2.65748847e-03  -3.89762647e-05   5.87116135e-02
   1.59771575e-01   3.82831415e-01   2.91366929e-01   4.03639068e+00
   2.74818515e+01   9.98308601e+00  -4.79417036e-01  -9.35143092e-02
  -1.30887092e-05   1.01345091e-01   2.17430612e-01   2.64192421e+00
   6.56717793e+01   1.60679324e+02   1.09452808e+02   8.91742279e+00
  -3.36015151e-01  -1.77040253e-01   0.00000000e+00   4.13068558e-02
   2.46580327e-01   1.28968749e-01   5.09120262e+00   4.61494181e+00
   1.18988330e+01  -1.07312212e-01  -3.09013266e-01  -6.72131883e-02
   0.00000000e+00   1.35802011e-03   1.03744474e-01   1.26361853e-01
   4.04471149e-02  -5.11646704e-02  -1.11430888e-01  -2.43601681e-01
  -2.18095518e-01  -4.83592113e-03   0.00000000e+00   0.00000000e+00
   8.09315724e-03   8.52095555e-02   4.85679485e-02  -7.59379664e-02
  -2.40569396e-01  -2.42959801e-01  -3.73896990e-02  -5.71696470e-06
   0.00000000e+00   0.00000000e+00   0.00000000e+00   5.38992519e-03
   1.24891511e-02  -5.59963423e-02  -9.83476518e-02  -3.35011655e-02
  -1.61840774e-04   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00  -1.40087877e-03  -6.78247446e-03
  -4.78936405e-03  -4.14309751e-04   0.00000000e+00   0.00000000e+00
   0.00000000e+00   1.43477443e-03   2.65903576e-02   1.12847902e-01
   4.16080170e-02  -4.14696424e-01  -3.51003485e-01  -4.77318692e-02
   0.00000000e+00   0.00000000e+00  -1.13335814e-05   7.33632343e-03
   8.41229618e-02  -9.03686514e-01   6.38557272e+00   3.96864115e+00
  -1.78277506e+00  -2.07057712e-01  -2.55741945e-02   0.00000000e+00
  -1.14020531e-03   2.23529841e-02   1.02741868e-01   1.33780602e+00
   1.47881270e+00  -8.34857125e+00   2.25477035e+00  -6.26768853e-02
  -1.77638967e-01   0.00000000e+00  -3.23121317e-03   5.49846090e-03
   5.48304482e-02  -5.29625753e+00  -1.16837644e+02   2.20321300e+01
   4.60794097e+00  -1.01476120e-01  -1.62628709e-01  -3.03185307e-03
  -1.82781337e-03  -9.95511968e-03   2.74741216e-02  -1.66570385e+00
   4.17167860e-01  -8.57204391e+01  -1.93470606e+01  -7.25531290e-02
  -1.38723009e-01  -4.21987604e-03  -9.08451311e-06   2.71526213e-03
   3.39780427e-02  -1.14572761e-02   6.05388087e-02   7.14124853e-02
  -5.92953088e-02  -8.93890247e-02  -9.07119937e-02  -8.98436864e-05
   0.00000000e+00  -1.35280073e-03  -3.85167994e-03   3.48215861e-03
   6.25748567e-02   3.45745022e-02  -4.99679058e-02  -1.34507457e-01
  -2.53909989e-02   0.00000000e+00   0.00000000e+00   0.00000000e+00
  -2.79216553e-03   9.14374671e-03   4.98708578e-02   4.37236416e-02
  -1.12165974e-02  -2.18806439e-02  -6.42156679e-05   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00   8.76623339e-04
   7.00518637e-03   7.11555566e-03  -7.15440275e-04  -2.79075697e-04
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00  -9.23353681e-05  -9.29727102e-06
   8.02959219e-07  -9.56311168e-08   0.00000000e+00   0.00000000e+00
   0.00000000e+00   3.42743529e-04  -8.83617359e-03   2.47432585e-02
   1.82938937e-02  -1.47049519e-01  -3.93336668e-01  -9.58379493e-03
   0.00000000e+00   0.00000000e+00   0.00000000e+00   1.17104432e-04
   2.47561036e-02   1.14743333e-01   8.62968238e+00  -5.29686798e+00
  -4.75034492e+00  -3.04899712e-01  -4.05942156e-02   0.00000000e+00
   0.00000000e+00  -1.43509059e-03   3.92870382e-02  -2.00003643e+00
  -4.18121215e+00  -7.55200733e+00  -2.51523847e+00  -1.58090151e+00
  -1.90172877e-01  -4.04843491e-03   1.43000366e-05  -1.48983002e-03
   4.94472383e-02  -2.37088351e+00  -3.57120269e+00  -9.51596657e+00
  -7.06579500e+00  -8.86340111e+00  -8.86020916e-02  -9.20630416e-02
   6.54744564e-06  -1.57146593e-02   1.48197817e-02  -1.50275469e+00
  -6.46109836e+01  -6.62829179e+01   2.54436288e-01  -5.08260348e+00
  -7.17461353e-02  -1.06871697e-01   0.00000000e+00  -9.50386149e-03
   3.30112741e-02   1.48849465e-02  -2.09807149e+00  -1.12769299e+00
  -6.24404470e+00  -7.86294432e-02  -1.03411978e-01  -5.74548846e-02
   0.00000000e+00  -6.83637689e-04   4.81308149e-03   3.88233074e-03
   2.23652634e-02   4.40668504e-02  -1.23807897e-02  -7.45933831e-02
  -1.36509846e-01  -6.99366465e-03   0.00000000e+00   0.00000000e+00
  -1.55754911e-03  -1.09737629e-02   4.03567626e-02   5.91892299e-02
   2.69808766e-02  -8.82583396e-02  -3.01754594e-02  -7.30740918e-06
   0.00000000e+00   0.00000000e+00   0.00000000e+00   1.49751381e-04
   1.55168776e-02   2.69190167e-02   6.89585949e-03  -5.82542298e-03
  -3.08269670e-04   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   7.96023335e-04   2.01455853e-03
   4.10890945e-04  -1.96425059e-06   0.00000000e+00   0.00000000e+00
   0.00000000e+00  -6.43540978e-03  -5.56531925e-02  -1.08921069e-01
  -1.29237621e-01   1.34045162e+00   8.14171855e-01   8.55164521e-02
   0.00000000e+00   0.00000000e+00  -6.13976048e-05  -3.72063956e-02
  -1.60447781e-01  -1.12805159e+00  -4.53146430e+00  -3.90473281e+00
  -2.25100839e+00   7.04474136e-01   4.40575620e-02   0.00000000e+00
  -5.36507696e-04  -1.21422717e-01  -8.50913517e-01  -4.15475653e+00
  -1.51419701e+01  -1.08395125e+01  -2.14699199e+01   2.99829401e-01
   4.98828166e-01   0.00000000e+00  -4.93818872e-03  -1.96754669e-01
  -1.44969429e+00  -1.76255248e+00   4.41249877e+00  -5.00062518e+01
  -1.88023040e+01   4.52045399e-01   5.94525954e-01   4.43095686e-03
  -2.58729353e-03  -2.06704142e-01  -2.06883858e-01  -1.36920235e+00
   1.77893034e+00  -8.13514604e+01  -1.99973882e+01   2.34936035e-01
   4.70572682e-01   5.75663988e-03  -4.67920862e-06  -8.96485498e-02
  -2.29443857e-01  -9.99978990e-02  -9.83789465e-02   1.97442535e-01
   1.83956154e-01   3.47232328e-01   2.49810048e-01   1.59519211e-04
   0.00000000e+00  -3.96865373e-03  -9.07821077e-02  -1.01255177e-01
  -3.71987432e-02   6.20862227e-02   2.98341300e-01   3.77427980e-01
   4.56394628e-02   0.00000000e+00   0.00000000e+00   0.00000000e+00
  -6.37048937e-03  -5.79056250e-02  -3.95525740e-02   1.22990429e-01
   1.97239203e-01   6.36732004e-02   8.93974781e-05   0.00000000e+00
   0.00000000e+00   0.00000000e+00   0.00000000e+00  -2.21164610e-03
   7.50880713e-04   2.57872595e-02   1.83816486e-02   6.09066481e-04
   0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   6.01285332e-04   3.40599847e-04
  -3.90855875e-06  -1.87918971e-08   0.00000000e+00   0.00000000e+00
   0.00000000e+00  -1.43418998e-04  -4.50440405e-03  -2.48054125e-02
  -2.10811358e-02   6.79192706e-01   9.72302383e-01   1.87198324e-02
   0.00000000e+00   0.00000000e+00   0.00000000e+00  -1.62961714e-03
  -5.28492708e-02  -1.63460814e-01  -2.65727107e-01   2.27863365e+00
   2.53127329e+00   7.38157683e-01   7.81811503e-02   0.00000000e+00
   0.00000000e+00  -1.22728031e-02  -1.60473718e-01   9.92734731e-01
   6.10573545e-01   2.06095551e+00  -4.10228171e+00  -3.16192014e+00
   6.55086161e-01   6.70592338e-03   2.46762282e-05  -5.72217835e-02
  -2.09218813e-01   1.98805209e+00   3.27983576e+00   5.47957589e+00
  -2.04160565e+01  -1.11968490e+00   5.68019127e-01   1.85577351e-01
   6.54126355e-06  -8.56304316e-02  -2.32250393e-01  -1.13916952e+00
  -1.06079570e+00  -9.43964063e+01  -1.09707244e+02  -3.83481932e+00
   4.07761286e-01   2.83911949e-01   0.00000000e+00  -3.18029943e-02
  -2.79591601e-01  -1.43853695e-01  -2.99313113e+00  -3.48724883e+00
  -5.65478828e+00   1.85941655e-01   4.12425244e-01   1.24668073e-01
   0.00000000e+00  -6.74382425e-04  -1.08557556e-01  -1.30244184e-01
  -6.28123783e-02   7.09782001e-03   1.23811678e-01   3.18195065e-01
   3.54605364e-01   1.18295858e-02   0.00000000e+00   0.00000000e+00
  -6.53560813e-03  -7.42357926e-02  -8.89247111e-02   1.67487365e-02
   2.13588520e-01   3.31218141e-01   6.75651584e-02   1.30243739e-05
   0.00000000e+00   0.00000000e+00   0.00000000e+00  -5.53967658e-03
  -2.80060288e-02   2.90773256e-02   9.14517924e-02   3.93265885e-02
   4.70110444e-04   0.00000000e+00   0.00000000e+00   0.00000000e+00
   0.00000000e+00   0.00000000e+00   6.04855433e-04   4.76791593e-03
   4.37847310e-03   4.16274001e-04   0.00000000e+00   0.00000000e+00]
[10054, 8519, 47699, 3348, 34381, 36440, 0, 1642, 10529, 57399, 3539, 52215, 47816, 74719, 41400, 16065, 15090, 30865, 45094, 41374, 21033, 57673, 10749, 52905, 13267, 5531, 6653, 10877, 14717, 42243, 83946, 2213, 3146, 8498, 31611, 1582, 4950, 10711, 11711, 1527, 3041, 11075, 15594, 1225, 5454, 4043, 1653, 3636, 3116, 1414, 898, 1591, 4078, 1399, 8280, 13662, 2386, 2350, 1881, 1092, 744, 2668, 4868, 1242, 3774, 3626, 718, 5717, 980, 1352, 1090, 654, 1421, 671, 1930, 1595, 2192, 1136, 1532, 2009, 2329, 1214, 2833, 931, 1447, 1508, 752, 2203, 1036, 863, 1523, 1545, 1781, 751, 1570, 1340, 1117, 2205, 1242, 719, 1340, 2121, 1776, 810, 594, 799, 1675, 884, 1497, 852, 1544, 1404, 672, 1480, 1473, 950, 738, 1234, 991, 811, 886, 1623, 994, 1085, 581, 1952, 836, 1083, 1042, 492, 784, 604, 2021, 674, 2336, 1361, 742, 611, 793, 953, 884, 693, 1293, 762, 1862, 704, 653, 1505, 717, 1361, 1149, 888, 586, 830, 578, 789, 603, 703, 874, 1759, 747, 718, 1209, 892, 337, 1274, 961, 1070, 1647, 439, 1261, 1058, 387, 1075, 724, 576, 334, 1056, 909, 902, 806, 1735, 1698, 787, 639, 352, 1348, 1064, 641, 724, 424, 1344, 1050, 410, 584, 1114, 754, 994, 491, 1359, 610, 1404, 437, 776, 970, 806, 2217, 422, 705, 826, 339, 1112, 411, 509, 741, 574, 1159, 346, 579, 1564, 1512, 651, 522, 598, 735, 731, 1180, 1136, 601, 760, 333, 666, 760, 723, 818, 346, 1633, 731, 907, 1497, 500, 556, 635, 958, 1117, 2275, 627, 655, 1654, 491, 1635, 798, 687, 3241, 413, 1734, 582, 645, 940, 683, 1247, 338, 754, 946, 1149, 1703, 864, 1160, 339, 588, 1650, 554, 1350, 843, 2371, 1217, 1130, 1245, 246, 433, 343, 950, 1270, 1157, 1461, 552, 1876, 567, 1180, 935, 1504, 440, 911, 1441, 1013, 4063, 857, 1037, 863, 2377, 1731, 1757, 524, 965, 2188, 1352, 2916, 1205, 1671, 10387, 7345, 2419, 5881, 9743, 1534, 1738, 7337, 1457, 1796, 2468, 13568, 8626, 1129, 11126, 4072, 573, 3926, 2655, 19010, 688, 2305, 9905, 2692, 28787, 1977, 20085, 21321, 6535, 3761, 15534, 0, 3790, 11076, 66222, 0, 27426, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[10054, 8519, 47699, 3348, 34381, 36440, 0, 1642, 10529, 57399, 3539, 52215, 47816, 74719, 41400, 16065, 15090, 30865, 45094, 41374, 21033, 57673, 10749, 52905, 13267, 5531, 6653, 10877, 14717, 42243, 83946, 2213, 3146, 8498, 31611, 1582, 4950, 10711, 11711, 1527, 3041, 11075, 15594, 1225, 5454, 4043, 1653, 3636, 3116, 1414, 898, 1591, 4078, 1399, 8280, 13662, 2386, 2350, 1881, 1092, 744, 2668, 4868, 1242, 3774, 3626, 718, 5717, 980, 1352, 1090, 654, 1421, 671, 1930, 1595, 2192, 1136, 1532, 2009, 2329, 1214, 2833, 931, 1447, 1508, 752, 2203, 1036, 863, 1523, 1545, 1781, 751, 1570, 1340, 1117, 2205, 1242, 719, 1340, 2121, 1776, 810, 594, 799, 1675, 884, 1497, 852, 1544, 1404, 672, 1480, 1473, 950, 738, 1234, 991, 811, 886, 1623, 994, 1085, 581, 1952, 836, 1083, 1042, 492, 784, 604, 2021, 674, 2336, 1361, 742, 611, 793, 953, 884, 693, 1293, 762, 1862, 704, 653, 1505, 717, 1361, 1149, 888, 586, 830, 578, 789, 603, 703, 874, 1759, 747, 718, 1209, 892, 337, 1274, 961, 1070, 1647, 439, 1261, 1058, 387, 1075, 724, 576, 334, 1056, 909, 902, 806, 1735, 1698, 787, 639, 352, 1348, 1064, 641, 724, 424, 1344, 1050, 410, 584, 1114, 754, 994, 491, 1359, 610, 1404, 437, 776, 970, 806, 2217, 422, 705, 826, 339, 1112, 411, 509, 741, 574, 1159, 346, 579, 1564, 1512, 651, 522, 598, 735, 731, 1180, 1136, 601, 760, 333, 666, 760, 723, 818, 346, 1633, 731, 907, 1497, 500, 556, 635, 958, 1117, 2275, 627, 655, 1654, 491, 1635, 798, 687, 3241, 413, 1734, 582, 645, 940, 683, 1247, 338, 754, 946, 1149, 1703, 864, 1160, 339, 588, 1650, 554, 1350, 843, 2371, 1217, 1130, 1245, 246, 433, 343, 950, 1270, 1157, 1461, 552, 1876, 567, 1180, 935, 1504, 440, 911, 1441, 1013, 4063, 857, 1037, 863, 2377, 1731, 1757, 524, 965, 2188, 1352, 2916, 1205, 1671, 10387, 7345, 2419, 5881, 9743, 1534, 1738, 7337, 1457, 1796, 2468, 13568, 8626, 1129, 11126, 4072, 573, 3926, 2655, 19010, 688, 2305, 9905, 2692, 28787, 1977, 20085, 21321, 6535, 3761, 15534, 0, 3790, 11076, 66222, 0, 27426, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
plotting the q-function for action 0

plotting the q-function for action 1

plotting the q-function for action 2


In [ ]: