In [8]:
from pg_function_approx_actor_critic_multenv import *
In [9]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import numpy as np
The autoreload extension is already loaded. To reload it, use:
%reload_ext autoreload
In [10]:
car1 = mountain_car(init_alpha= 1e-5, constant_alpha= False, lambda_=0.5, N_0= 50, random_init_theta=False, environment = 'MountainCar-v0',algorithm = 'QAC')
[2016-05-30 12:11:20,923] Making new env: MountainCar-v0
N_0 50
init alpha 1e-05
Constant Alpha False
lambda 0.5
using environment MountainCar-v0
tile resolution 10
In [14]:
car1.train(iter=1000, dataname = 'qac_mountain_car', save = True)
0 EPISODE #1.0
with a exploration of 100.0%
and learning rate of 1e-05
lasted 13955 steps
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
<ipython-input-14-92c82336a415> in <module>()
----> 1 car1.train(iter=1000, dataname = 'qac_mountain_car', save = True)
/home/frederik/Dokumente/DeepRL/PolicyGradient/pg_function_approx_actor_critic_multenv.pyc in train(self, iter, dataname, save)
291 self.epsilon = 0.
292 limit = 10000
--> 293 det_episode = self.run_episode(limit=limit)
294 if det_episode == []:
295 len_episode = limit
/home/frederik/Dokumente/DeepRL/PolicyGradient/pg_function_approx_actor_critic_multenv.pyc in run_episode(self, enable_render, limit)
218 count += 1
219
--> 220 action = self.policy(state, mode=self.policy_mode)
221 state, reward, done, info = self.env.step(action)
222 episode.append((state, action, reward))
/home/frederik/Dokumente/DeepRL/PolicyGradient/pg_function_approx_actor_critic_multenv.pyc in policy(self, state, mode)
191 explore = bool(np.random.choice([1,0],p=[self.epsilon, 1-self.epsilon]))
192
--> 193 features = self.get_full_feature(state)
194 # print(explore, features, end="")
195 if mode=='deterministic' and not explore:
/home/frederik/Dokumente/DeepRL/PolicyGradient/pg_function_approx_actor_critic_multenv.pyc in get_full_feature(self, state)
153 def get_full_feature(self,state):
154
--> 155 flatgrid = self.get_tile_feature(state)
156 length_flatgrid = flatgrid.shape[0]
157
/home/frederik/Dokumente/DeepRL/PolicyGradient/pg_function_approx_actor_critic_multenv.pyc in get_tile_feature(self, state)
123 ind = tuple(ind)
124
--> 125 grid = np.zeros(np.ones(obs_dim)*self.tile_resolution)
126 try:
127 grid[ind] = 1
KeyboardInterrupt:
In [13]:
import matplotlib.pyplot as plt
%matplotlib
car1 = car1 = mountain_car(init_alpha= 1e-5, constant_alpha= False, lambda_=0.5, N_0= 50, random_init_theta=False, environment = 'MountainCar-v0',algorithm = 'QAC')
car1.loaddata('qac_mountain_car')
car1.plot_q_function()
[2016-05-30 13:50:18,044] Making new env: MountainCar-v0
Using matplotlib backend: Qt4Agg
N_0 50
init alpha 1e-05
Constant Alpha False
lambda 0.5
using environment MountainCar-v0
tile resolution 10
[ 0.00000000e+00 5.00063535e-03 2.90628348e-02 -3.92683351e-03
8.76296037e-02 -9.25755194e-01 -4.63168370e-01 -3.77845830e-02
0.00000000e+00 0.00000000e+00 7.27311862e-05 2.98700722e-02
7.63248190e-02 2.03173811e+00 -1.85410842e+00 -6.39083465e-02
4.03378345e+00 -4.97416423e-01 -1.84833676e-02 0.00000000e+00
1.67671300e-03 9.90697329e-02 7.48171649e-01 2.81695051e+00
1.36631574e+01 1.91880837e+01 1.92151495e+01 -2.37152516e-01
-3.21189199e-01 0.00000000e+00 8.16940189e-03 1.91256208e-01
1.39486385e+00 7.05881000e+00 1.12425145e+02 2.79741218e+01
1.41943630e+01 -3.50569279e-01 -4.31897245e-01 -1.39910379e-03
4.41510690e-03 2.16659262e-01 1.79409736e-01 3.03490620e+00
-2.19609820e+00 1.67071900e+02 3.93444488e+01 -1.62382906e-01
-3.31849674e-01 -1.53676383e-03 1.37637217e-05 8.69332876e-02
1.95465815e-01 1.11455175e-01 3.78401378e-02 -2.68855021e-01
-1.24660845e-01 -2.57843303e-01 -1.59098055e-01 -6.96755250e-05
0.00000000e+00 5.32145446e-03 9.46337876e-02 9.77730182e-02
-2.53761135e-02 -9.66607249e-02 -2.48373394e-01 -2.42920522e-01
-2.02484640e-02 0.00000000e+00 0.00000000e+00 0.00000000e+00
9.16265490e-03 4.87618783e-02 -1.03182838e-02 -1.66714070e-01
-1.86022606e-01 -4.17925565e-02 -2.51818102e-05 0.00000000e+00
0.00000000e+00 0.00000000e+00 0.00000000e+00 1.33502276e-03
-7.75606709e-03 -3.29028152e-02 -1.76662084e-02 -3.29990784e-04
0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00 -5.08949964e-04 -3.31302576e-04
3.10559953e-06 1.14423014e-07 0.00000000e+00 0.00000000e+00
0.00000000e+00 -1.99324530e-04 1.33405776e-02 6.21539339e-05
2.78724211e-03 -5.32143187e-01 -5.78965715e-01 -9.13603745e-03
0.00000000e+00 0.00000000e+00 0.00000000e+00 1.51251271e-03
2.80931672e-02 4.87174802e-02 -8.36395527e+00 3.01823434e+00
2.21907163e+00 -4.33257971e-01 -3.75869347e-02 0.00000000e+00
0.00000000e+00 1.37078937e-02 1.21186679e-01 1.00730170e+00
3.57063861e+00 5.49105183e+00 6.61752018e+00 4.74282165e+00
-4.64913284e-01 -2.65748847e-03 -3.89762647e-05 5.87116135e-02
1.59771575e-01 3.82831415e-01 2.91366929e-01 4.03639068e+00
2.74818515e+01 9.98308601e+00 -4.79417036e-01 -9.35143092e-02
-1.30887092e-05 1.01345091e-01 2.17430612e-01 2.64192421e+00
6.56717793e+01 1.60679324e+02 1.09452808e+02 8.91742279e+00
-3.36015151e-01 -1.77040253e-01 0.00000000e+00 4.13068558e-02
2.46580327e-01 1.28968749e-01 5.09120262e+00 4.61494181e+00
1.18988330e+01 -1.07312212e-01 -3.09013266e-01 -6.72131883e-02
0.00000000e+00 1.35802011e-03 1.03744474e-01 1.26361853e-01
4.04471149e-02 -5.11646704e-02 -1.11430888e-01 -2.43601681e-01
-2.18095518e-01 -4.83592113e-03 0.00000000e+00 0.00000000e+00
8.09315724e-03 8.52095555e-02 4.85679485e-02 -7.59379664e-02
-2.40569396e-01 -2.42959801e-01 -3.73896990e-02 -5.71696470e-06
0.00000000e+00 0.00000000e+00 0.00000000e+00 5.38992519e-03
1.24891511e-02 -5.59963423e-02 -9.83476518e-02 -3.35011655e-02
-1.61840774e-04 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00 -1.40087877e-03 -6.78247446e-03
-4.78936405e-03 -4.14309751e-04 0.00000000e+00 0.00000000e+00
0.00000000e+00 1.43477443e-03 2.65903576e-02 1.12847902e-01
4.16080170e-02 -4.14696424e-01 -3.51003485e-01 -4.77318692e-02
0.00000000e+00 0.00000000e+00 -1.13335814e-05 7.33632343e-03
8.41229618e-02 -9.03686514e-01 6.38557272e+00 3.96864115e+00
-1.78277506e+00 -2.07057712e-01 -2.55741945e-02 0.00000000e+00
-1.14020531e-03 2.23529841e-02 1.02741868e-01 1.33780602e+00
1.47881270e+00 -8.34857125e+00 2.25477035e+00 -6.26768853e-02
-1.77638967e-01 0.00000000e+00 -3.23121317e-03 5.49846090e-03
5.48304482e-02 -5.29625753e+00 -1.16837644e+02 2.20321300e+01
4.60794097e+00 -1.01476120e-01 -1.62628709e-01 -3.03185307e-03
-1.82781337e-03 -9.95511968e-03 2.74741216e-02 -1.66570385e+00
4.17167860e-01 -8.57204391e+01 -1.93470606e+01 -7.25531290e-02
-1.38723009e-01 -4.21987604e-03 -9.08451311e-06 2.71526213e-03
3.39780427e-02 -1.14572761e-02 6.05388087e-02 7.14124853e-02
-5.92953088e-02 -8.93890247e-02 -9.07119937e-02 -8.98436864e-05
0.00000000e+00 -1.35280073e-03 -3.85167994e-03 3.48215861e-03
6.25748567e-02 3.45745022e-02 -4.99679058e-02 -1.34507457e-01
-2.53909989e-02 0.00000000e+00 0.00000000e+00 0.00000000e+00
-2.79216553e-03 9.14374671e-03 4.98708578e-02 4.37236416e-02
-1.12165974e-02 -2.18806439e-02 -6.42156679e-05 0.00000000e+00
0.00000000e+00 0.00000000e+00 0.00000000e+00 8.76623339e-04
7.00518637e-03 7.11555566e-03 -7.15440275e-04 -2.79075697e-04
0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00 -9.23353681e-05 -9.29727102e-06
8.02959219e-07 -9.56311168e-08 0.00000000e+00 0.00000000e+00
0.00000000e+00 3.42743529e-04 -8.83617359e-03 2.47432585e-02
1.82938937e-02 -1.47049519e-01 -3.93336668e-01 -9.58379493e-03
0.00000000e+00 0.00000000e+00 0.00000000e+00 1.17104432e-04
2.47561036e-02 1.14743333e-01 8.62968238e+00 -5.29686798e+00
-4.75034492e+00 -3.04899712e-01 -4.05942156e-02 0.00000000e+00
0.00000000e+00 -1.43509059e-03 3.92870382e-02 -2.00003643e+00
-4.18121215e+00 -7.55200733e+00 -2.51523847e+00 -1.58090151e+00
-1.90172877e-01 -4.04843491e-03 1.43000366e-05 -1.48983002e-03
4.94472383e-02 -2.37088351e+00 -3.57120269e+00 -9.51596657e+00
-7.06579500e+00 -8.86340111e+00 -8.86020916e-02 -9.20630416e-02
6.54744564e-06 -1.57146593e-02 1.48197817e-02 -1.50275469e+00
-6.46109836e+01 -6.62829179e+01 2.54436288e-01 -5.08260348e+00
-7.17461353e-02 -1.06871697e-01 0.00000000e+00 -9.50386149e-03
3.30112741e-02 1.48849465e-02 -2.09807149e+00 -1.12769299e+00
-6.24404470e+00 -7.86294432e-02 -1.03411978e-01 -5.74548846e-02
0.00000000e+00 -6.83637689e-04 4.81308149e-03 3.88233074e-03
2.23652634e-02 4.40668504e-02 -1.23807897e-02 -7.45933831e-02
-1.36509846e-01 -6.99366465e-03 0.00000000e+00 0.00000000e+00
-1.55754911e-03 -1.09737629e-02 4.03567626e-02 5.91892299e-02
2.69808766e-02 -8.82583396e-02 -3.01754594e-02 -7.30740918e-06
0.00000000e+00 0.00000000e+00 0.00000000e+00 1.49751381e-04
1.55168776e-02 2.69190167e-02 6.89585949e-03 -5.82542298e-03
-3.08269670e-04 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00 7.96023335e-04 2.01455853e-03
4.10890945e-04 -1.96425059e-06 0.00000000e+00 0.00000000e+00
0.00000000e+00 -6.43540978e-03 -5.56531925e-02 -1.08921069e-01
-1.29237621e-01 1.34045162e+00 8.14171855e-01 8.55164521e-02
0.00000000e+00 0.00000000e+00 -6.13976048e-05 -3.72063956e-02
-1.60447781e-01 -1.12805159e+00 -4.53146430e+00 -3.90473281e+00
-2.25100839e+00 7.04474136e-01 4.40575620e-02 0.00000000e+00
-5.36507696e-04 -1.21422717e-01 -8.50913517e-01 -4.15475653e+00
-1.51419701e+01 -1.08395125e+01 -2.14699199e+01 2.99829401e-01
4.98828166e-01 0.00000000e+00 -4.93818872e-03 -1.96754669e-01
-1.44969429e+00 -1.76255248e+00 4.41249877e+00 -5.00062518e+01
-1.88023040e+01 4.52045399e-01 5.94525954e-01 4.43095686e-03
-2.58729353e-03 -2.06704142e-01 -2.06883858e-01 -1.36920235e+00
1.77893034e+00 -8.13514604e+01 -1.99973882e+01 2.34936035e-01
4.70572682e-01 5.75663988e-03 -4.67920862e-06 -8.96485498e-02
-2.29443857e-01 -9.99978990e-02 -9.83789465e-02 1.97442535e-01
1.83956154e-01 3.47232328e-01 2.49810048e-01 1.59519211e-04
0.00000000e+00 -3.96865373e-03 -9.07821077e-02 -1.01255177e-01
-3.71987432e-02 6.20862227e-02 2.98341300e-01 3.77427980e-01
4.56394628e-02 0.00000000e+00 0.00000000e+00 0.00000000e+00
-6.37048937e-03 -5.79056250e-02 -3.95525740e-02 1.22990429e-01
1.97239203e-01 6.36732004e-02 8.93974781e-05 0.00000000e+00
0.00000000e+00 0.00000000e+00 0.00000000e+00 -2.21164610e-03
7.50880713e-04 2.57872595e-02 1.83816486e-02 6.09066481e-04
0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00 6.01285332e-04 3.40599847e-04
-3.90855875e-06 -1.87918971e-08 0.00000000e+00 0.00000000e+00
0.00000000e+00 -1.43418998e-04 -4.50440405e-03 -2.48054125e-02
-2.10811358e-02 6.79192706e-01 9.72302383e-01 1.87198324e-02
0.00000000e+00 0.00000000e+00 0.00000000e+00 -1.62961714e-03
-5.28492708e-02 -1.63460814e-01 -2.65727107e-01 2.27863365e+00
2.53127329e+00 7.38157683e-01 7.81811503e-02 0.00000000e+00
0.00000000e+00 -1.22728031e-02 -1.60473718e-01 9.92734731e-01
6.10573545e-01 2.06095551e+00 -4.10228171e+00 -3.16192014e+00
6.55086161e-01 6.70592338e-03 2.46762282e-05 -5.72217835e-02
-2.09218813e-01 1.98805209e+00 3.27983576e+00 5.47957589e+00
-2.04160565e+01 -1.11968490e+00 5.68019127e-01 1.85577351e-01
6.54126355e-06 -8.56304316e-02 -2.32250393e-01 -1.13916952e+00
-1.06079570e+00 -9.43964063e+01 -1.09707244e+02 -3.83481932e+00
4.07761286e-01 2.83911949e-01 0.00000000e+00 -3.18029943e-02
-2.79591601e-01 -1.43853695e-01 -2.99313113e+00 -3.48724883e+00
-5.65478828e+00 1.85941655e-01 4.12425244e-01 1.24668073e-01
0.00000000e+00 -6.74382425e-04 -1.08557556e-01 -1.30244184e-01
-6.28123783e-02 7.09782001e-03 1.23811678e-01 3.18195065e-01
3.54605364e-01 1.18295858e-02 0.00000000e+00 0.00000000e+00
-6.53560813e-03 -7.42357926e-02 -8.89247111e-02 1.67487365e-02
2.13588520e-01 3.31218141e-01 6.75651584e-02 1.30243739e-05
0.00000000e+00 0.00000000e+00 0.00000000e+00 -5.53967658e-03
-2.80060288e-02 2.90773256e-02 9.14517924e-02 3.93265885e-02
4.70110444e-04 0.00000000e+00 0.00000000e+00 0.00000000e+00
0.00000000e+00 0.00000000e+00 6.04855433e-04 4.76791593e-03
4.37847310e-03 4.16274001e-04 0.00000000e+00 0.00000000e+00]
[10054, 8519, 47699, 3348, 34381, 36440, 0, 1642, 10529, 57399, 3539, 52215, 47816, 74719, 41400, 16065, 15090, 30865, 45094, 41374, 21033, 57673, 10749, 52905, 13267, 5531, 6653, 10877, 14717, 42243, 83946, 2213, 3146, 8498, 31611, 1582, 4950, 10711, 11711, 1527, 3041, 11075, 15594, 1225, 5454, 4043, 1653, 3636, 3116, 1414, 898, 1591, 4078, 1399, 8280, 13662, 2386, 2350, 1881, 1092, 744, 2668, 4868, 1242, 3774, 3626, 718, 5717, 980, 1352, 1090, 654, 1421, 671, 1930, 1595, 2192, 1136, 1532, 2009, 2329, 1214, 2833, 931, 1447, 1508, 752, 2203, 1036, 863, 1523, 1545, 1781, 751, 1570, 1340, 1117, 2205, 1242, 719, 1340, 2121, 1776, 810, 594, 799, 1675, 884, 1497, 852, 1544, 1404, 672, 1480, 1473, 950, 738, 1234, 991, 811, 886, 1623, 994, 1085, 581, 1952, 836, 1083, 1042, 492, 784, 604, 2021, 674, 2336, 1361, 742, 611, 793, 953, 884, 693, 1293, 762, 1862, 704, 653, 1505, 717, 1361, 1149, 888, 586, 830, 578, 789, 603, 703, 874, 1759, 747, 718, 1209, 892, 337, 1274, 961, 1070, 1647, 439, 1261, 1058, 387, 1075, 724, 576, 334, 1056, 909, 902, 806, 1735, 1698, 787, 639, 352, 1348, 1064, 641, 724, 424, 1344, 1050, 410, 584, 1114, 754, 994, 491, 1359, 610, 1404, 437, 776, 970, 806, 2217, 422, 705, 826, 339, 1112, 411, 509, 741, 574, 1159, 346, 579, 1564, 1512, 651, 522, 598, 735, 731, 1180, 1136, 601, 760, 333, 666, 760, 723, 818, 346, 1633, 731, 907, 1497, 500, 556, 635, 958, 1117, 2275, 627, 655, 1654, 491, 1635, 798, 687, 3241, 413, 1734, 582, 645, 940, 683, 1247, 338, 754, 946, 1149, 1703, 864, 1160, 339, 588, 1650, 554, 1350, 843, 2371, 1217, 1130, 1245, 246, 433, 343, 950, 1270, 1157, 1461, 552, 1876, 567, 1180, 935, 1504, 440, 911, 1441, 1013, 4063, 857, 1037, 863, 2377, 1731, 1757, 524, 965, 2188, 1352, 2916, 1205, 1671, 10387, 7345, 2419, 5881, 9743, 1534, 1738, 7337, 1457, 1796, 2468, 13568, 8626, 1129, 11126, 4072, 573, 3926, 2655, 19010, 688, 2305, 9905, 2692, 28787, 1977, 20085, 21321, 6535, 3761, 15534, 0, 3790, 11076, 66222, 0, 27426, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[10054, 8519, 47699, 3348, 34381, 36440, 0, 1642, 10529, 57399, 3539, 52215, 47816, 74719, 41400, 16065, 15090, 30865, 45094, 41374, 21033, 57673, 10749, 52905, 13267, 5531, 6653, 10877, 14717, 42243, 83946, 2213, 3146, 8498, 31611, 1582, 4950, 10711, 11711, 1527, 3041, 11075, 15594, 1225, 5454, 4043, 1653, 3636, 3116, 1414, 898, 1591, 4078, 1399, 8280, 13662, 2386, 2350, 1881, 1092, 744, 2668, 4868, 1242, 3774, 3626, 718, 5717, 980, 1352, 1090, 654, 1421, 671, 1930, 1595, 2192, 1136, 1532, 2009, 2329, 1214, 2833, 931, 1447, 1508, 752, 2203, 1036, 863, 1523, 1545, 1781, 751, 1570, 1340, 1117, 2205, 1242, 719, 1340, 2121, 1776, 810, 594, 799, 1675, 884, 1497, 852, 1544, 1404, 672, 1480, 1473, 950, 738, 1234, 991, 811, 886, 1623, 994, 1085, 581, 1952, 836, 1083, 1042, 492, 784, 604, 2021, 674, 2336, 1361, 742, 611, 793, 953, 884, 693, 1293, 762, 1862, 704, 653, 1505, 717, 1361, 1149, 888, 586, 830, 578, 789, 603, 703, 874, 1759, 747, 718, 1209, 892, 337, 1274, 961, 1070, 1647, 439, 1261, 1058, 387, 1075, 724, 576, 334, 1056, 909, 902, 806, 1735, 1698, 787, 639, 352, 1348, 1064, 641, 724, 424, 1344, 1050, 410, 584, 1114, 754, 994, 491, 1359, 610, 1404, 437, 776, 970, 806, 2217, 422, 705, 826, 339, 1112, 411, 509, 741, 574, 1159, 346, 579, 1564, 1512, 651, 522, 598, 735, 731, 1180, 1136, 601, 760, 333, 666, 760, 723, 818, 346, 1633, 731, 907, 1497, 500, 556, 635, 958, 1117, 2275, 627, 655, 1654, 491, 1635, 798, 687, 3241, 413, 1734, 582, 645, 940, 683, 1247, 338, 754, 946, 1149, 1703, 864, 1160, 339, 588, 1650, 554, 1350, 843, 2371, 1217, 1130, 1245, 246, 433, 343, 950, 1270, 1157, 1461, 552, 1876, 567, 1180, 935, 1504, 440, 911, 1441, 1013, 4063, 857, 1037, 863, 2377, 1731, 1757, 524, 965, 2188, 1352, 2916, 1205, 1671, 10387, 7345, 2419, 5881, 9743, 1534, 1738, 7337, 1457, 1796, 2468, 13568, 8626, 1129, 11126, 4072, 573, 3926, 2655, 19010, 688, 2305, 9905, 2692, 28787, 1977, 20085, 21321, 6535, 3761, 15534, 0, 3790, 11076, 66222, 0, 27426, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
plotting the q-function for action 0
plotting the q-function for action 1
plotting the q-function for action 2
In [ ]:
Content source: febert/DeepRL
Similar notebooks: