notebook.community

Edit and run



In [5]:

    
# %matplotlib inline
# from matplotlib import pyplot as plt
import gym
import numpy as np
# from gym.envs.registration import register
# from gym import wrappers
# import shutil



In [6]:

    
env = gym.make('Taxi-v2')









    



[2017-08-01 12:24:33,630] Making new env: Taxi-v2



In [16]:

    
s=env.reset()



In [15]:

    
env.render()









    



+---------+
|R: | : :G|
| : : : : |
| : : : : |
| | : | : |
|Y| : |B: |
+---------+



In [17]:

    
s









    Out[17]:





329



In [19]:

    
env.reset()









    Out[19]:





152



In [21]:

    
env.step(0)









    Out[21]:





(252, -1, False, {'prob': 1.0})



In [22]:

    
env.render()









    Out[22]:





<bound method TimeLimit.render of <TimeLimit<TaxiEnv<Taxi-v2>>>>



In [77]:

    
def cap(x, down, up, ninter):
    if x<down:
        x=down
    if up<=x:
        x=up-1
    step=(up-down)/ninter
    #print x
    return (x-down)//step



In [80]:

    
down=10
up=30
for i in range(down-2, up+3):
    print "%s %s"%(i,cap(i,down,up,5))



In [222]:

    
class Env(object):
    def __init__(self):
        self.n_action=16
        self.n_state=500
    def get_state(self):        
        x1=cap(self.temp, 10, 25, 5)
        x2=cap(self.humi, 10, 100, 5)
        x3=cap(self.co2, 300, 3000, 5)
        x4=self.light
        x5=self.watp
        return x1*5*5*2*2 + x2*5*5*2*2 + x3*2*2 + x4*2 +x5
    
    def reset(self, temp=25, humi=80, co2=200, light=0, watp=0):        
        self.temp=temp
        self.humi=humi
        self.co2=co2
        self.light=light
        self.watp=watp
        return self.get_state()
    
    def set_target(self, temp=25, humi=80, co2=200, light=0, watp=0):
        self.t_temp=temp
        self.t_humi=humi
        self.t_co2=co2
        self.t_light=light
        self.t_watp=watp
        return self.get_state()
    
    def get_reward(self):
        if self.t_temp-1<self.temp and self.temp<=self.t_temp+1:
            if self.t_humi-5<self.humi and self.humi<=self.t_humi+5:
                return 1
        return -0.1
    
    def code2int(self,code):
        #(comp,mist,light,watp)
        return code[0]*8 + code[1]*4 + code[2]*2 + code[3]
    
    def int2code(self,a=0):
        b=bin(16+a)
        return int(b[-4]),int(b[-3]),int(b[-2]),int(b[-1])
    
    def step(self,a):
        comp,mist,light,watp = self.int2code(a)
        if comp==1:
            self.humi-=5
            self.temp-=1
        if mist==1:
            self.humi+=5
        if light==1:
            self.humi+=1
            self.temp+=1
        if watp==1:
            self.humi+=1
        return self.get_state(), self.get_reward(), 0, 0
    
    def render(self):
        print( "temp:{}, humi:{}, co2:{}, light:{}, watp{}".format(
             self.temp, self.humi, self.co2, self.light, self.watp))



In [223]:

    
env=Env()



In [224]:

    
env.int2code(15)









    Out[224]:





(1, 1, 1, 1)



In [225]:

    
env.code2int( (1,1,1,1) )









    Out[225]:





15



In [226]:

    
env.reset()









    Out[226]:





700



In [227]:

    
env.set_target(temp=10, humi=90, co2=200, light=0, watp=0)









    Out[227]:





700



In [228]:

    
env.code2int( code )









    Out[228]:





15



In [235]:

    
#(comp,mist,light,watp)
code=(0,0,0,1)
print env.step(env.code2int( code ))
print env.render()









    



(700, -0.1, 0, 0)
temp:23, humi:74, co2:200, light:0, watp0
None



In [90]:

    
env.step(0,0,0,0)









    Out[90]:





700



In [180]:

    
bin(16+0)









    Out[180]:





'0b10000'



In [ ]:

    
env