In [5]:
# %matplotlib inline
# from matplotlib import pyplot as plt
import gym
import numpy as np
# from gym.envs.registration import register
# from gym import wrappers
# import shutil
In [6]:
env = gym.make('Taxi-v2')
In [16]:
s=env.reset()
In [15]:
env.render()
In [17]:
s
Out[17]:
In [19]:
env.reset()
Out[19]:
In [21]:
env.step(0)
Out[21]:
In [22]:
env.render()
Out[22]:
In [77]:
def cap(x, down, up, ninter):
if x<down:
x=down
if up<=x:
x=up-1
step=(up-down)/ninter
#print x
return (x-down)//step
In [80]:
down=10
up=30
for i in range(down-2, up+3):
print "%s %s"%(i,cap(i,down,up,5))
In [222]:
class Env(object):
def __init__(self):
self.n_action=16
self.n_state=500
def get_state(self):
x1=cap(self.temp, 10, 25, 5)
x2=cap(self.humi, 10, 100, 5)
x3=cap(self.co2, 300, 3000, 5)
x4=self.light
x5=self.watp
return x1*5*5*2*2 + x2*5*5*2*2 + x3*2*2 + x4*2 +x5
def reset(self, temp=25, humi=80, co2=200, light=0, watp=0):
self.temp=temp
self.humi=humi
self.co2=co2
self.light=light
self.watp=watp
return self.get_state()
def set_target(self, temp=25, humi=80, co2=200, light=0, watp=0):
self.t_temp=temp
self.t_humi=humi
self.t_co2=co2
self.t_light=light
self.t_watp=watp
return self.get_state()
def get_reward(self):
if self.t_temp-1<self.temp and self.temp<=self.t_temp+1:
if self.t_humi-5<self.humi and self.humi<=self.t_humi+5:
return 1
return -0.1
def code2int(self,code):
#(comp,mist,light,watp)
return code[0]*8 + code[1]*4 + code[2]*2 + code[3]
def int2code(self,a=0):
b=bin(16+a)
return int(b[-4]),int(b[-3]),int(b[-2]),int(b[-1])
def step(self,a):
comp,mist,light,watp = self.int2code(a)
if comp==1:
self.humi-=5
self.temp-=1
if mist==1:
self.humi+=5
if light==1:
self.humi+=1
self.temp+=1
if watp==1:
self.humi+=1
return self.get_state(), self.get_reward(), 0, 0
def render(self):
print( "temp:{}, humi:{}, co2:{}, light:{}, watp{}".format(
self.temp, self.humi, self.co2, self.light, self.watp))
In [223]:
env=Env()
In [224]:
env.int2code(15)
Out[224]:
In [225]:
env.code2int( (1,1,1,1) )
Out[225]:
In [226]:
env.reset()
Out[226]:
In [227]:
env.set_target(temp=10, humi=90, co2=200, light=0, watp=0)
Out[227]:
In [228]:
env.code2int( code )
Out[228]:
In [235]:
#(comp,mist,light,watp)
code=(0,0,0,1)
print env.step(env.code2int( code ))
print env.render()
In [90]:
env.step(0,0,0,0)
Out[90]:
In [180]:
bin(16+0)
Out[180]:
In [ ]:
env