In [5]:
# %matplotlib inline
# from matplotlib import pyplot as plt
import gym
import numpy as np
# from gym.envs.registration import register
# from gym import wrappers
# import shutil

In [6]:
env = gym.make('Taxi-v2')


[2017-08-01 12:24:33,630] Making new env: Taxi-v2

In [16]:
s=env.reset()

In [15]:
env.render()


+---------+
|R: | : :G|
| : : : : |
| : : : : |
| | : | : |
|Y| : |B: |
+---------+


In [17]:
s


Out[17]:
329

In [19]:
env.reset()


Out[19]:
152

In [21]:
env.step(0)


Out[21]:
(252, -1, False, {'prob': 1.0})

In [22]:
env.render()


Out[22]:
<bound method TimeLimit.render of <TimeLimit<TaxiEnv<Taxi-v2>>>>

In [77]:
def cap(x, down, up, ninter):
    if x<down:
        x=down
    if up<=x:
        x=up-1
    step=(up-down)/ninter
    #print x
    return (x-down)//step

In [80]:
down=10
up=30
for i in range(down-2, up+3):
    print "%s %s"%(i,cap(i,down,up,5))


8 0
9 0
10 0
11 0
12 0
13 0
14 1
15 1
16 1
17 1
18 2
19 2
20 2
21 2
22 3
23 3
24 3
25 3
26 4
27 4
28 4
29 4
30 4
31 4
32 4

In [222]:
class Env(object):
    def __init__(self):
        self.n_action=16
        self.n_state=500
    def get_state(self):        
        x1=cap(self.temp, 10, 25, 5)
        x2=cap(self.humi, 10, 100, 5)
        x3=cap(self.co2, 300, 3000, 5)
        x4=self.light
        x5=self.watp
        return x1*5*5*2*2 + x2*5*5*2*2 + x3*2*2 + x4*2 +x5
    
    def reset(self, temp=25, humi=80, co2=200, light=0, watp=0):        
        self.temp=temp
        self.humi=humi
        self.co2=co2
        self.light=light
        self.watp=watp
        return self.get_state()
    
    def set_target(self, temp=25, humi=80, co2=200, light=0, watp=0):
        self.t_temp=temp
        self.t_humi=humi
        self.t_co2=co2
        self.t_light=light
        self.t_watp=watp
        return self.get_state()
    
    def get_reward(self):
        if self.t_temp-1<self.temp and self.temp<=self.t_temp+1:
            if self.t_humi-5<self.humi and self.humi<=self.t_humi+5:
                return 1
        return -0.1
    
    def code2int(self,code):
        #(comp,mist,light,watp)
        return code[0]*8 + code[1]*4 + code[2]*2 + code[3]
    
    def int2code(self,a=0):
        b=bin(16+a)
        return int(b[-4]),int(b[-3]),int(b[-2]),int(b[-1])
    
    def step(self,a):
        comp,mist,light,watp = self.int2code(a)
        if comp==1:
            self.humi-=5
            self.temp-=1
        if mist==1:
            self.humi+=5
        if light==1:
            self.humi+=1
            self.temp+=1
        if watp==1:
            self.humi+=1
        return self.get_state(), self.get_reward(), 0, 0
    
    def render(self):
        print( "temp:{}, humi:{}, co2:{}, light:{}, watp{}".format(
             self.temp, self.humi, self.co2, self.light, self.watp))

In [223]:
env=Env()

In [224]:
env.int2code(15)


Out[224]:
(1, 1, 1, 1)

In [225]:
env.code2int( (1,1,1,1) )


Out[225]:
15

In [226]:
env.reset()


Out[226]:
700

In [227]:
env.set_target(temp=10, humi=90, co2=200, light=0, watp=0)


Out[227]:
700

In [228]:
env.code2int( code )


Out[228]:
15

In [235]:
#(comp,mist,light,watp)
code=(0,0,0,1)
print env.step(env.code2int( code ))
print env.render()


(700, -0.1, 0, 0)
temp:23, humi:74, co2:200, light:0, watp0
None

In [90]:
env.step(0,0,0,0)


Out[90]:
700

In [180]:
bin(16+0)


Out[180]:
'0b10000'

In [ ]:
env