Wayne Nixalo - 2018/3/14-15
My wanting to explore the time-complexity of the Bellman Equation (here in 6.S094 (2018) Lecture 3) turned into Python OOP practice.
https://www.python-course.eu/graphs_python.php
http://ls.pwd.io/2014/08/singly-and-doubly-linked-lists-in-python/
In [556]:
class Graph:
def __init__(self, depth, actions):
self.depth = depth
self.actions = actions
self.graph = {}
self.__addNode('ROOT', 'STATE_00', 0.0)
self.__populate()
def __addNode(self, sup, sub, w): # superior, subordinate, weight
if not sup in self.keys():
self.graph[sup] = []
self.graph[sup].append([sub, w])
self.graph[sub] = []
self.graph[sub].append([sup, w])
def __populate(self):
for d in range(self.depth):
states = self.getLevel(d)
for state in states:
# action-path suffix
suffix = state.split('_')[-1][1:]
suffix = ('', suffix)[suffix[-1].isalpha()]
# state name + depth level
base_name = state.split('_')[0] + '_' + str(d + 1)
# add actions
for a in range(len(self.actions)):
self.__addNode(state,
base_name+suffix+chr(ord('A') + a), self.actions[a])
def __getitem__(self, key):
if key == '' or key == []:
return 'ENDE'
key = key.upper()
return self.graph[key]
def getLevel(self, level):
return [key for key in graph.keys() if key.split('_')[-1][0].isdigit() and int(key.split('_')[-1][0]) == level]
def superior(self, key):
"""Display incomming (superior) vertex"""
key = key.upper()
return self.graph[key][0]
def subordinates(self, key):
"""Display all outgoing vertices."""
key = key.upper()
return self.graph[key][1:]
def keys(self):
return self.graph.keys()
def __depthPrint(self, key, depth=0):
"""Recursively Print Depth-First structure of graph"""
depth += 1
subs = self.subordinates(key) # subs = [['state_xx', num], ...]
for sub in subs:
indent = "> "*depth
print(indent + sub[0] + f' - {str(sub[1])}')
self.__depthPrint(sub[0], depth)
return
def display(self): # breadth-first display of graph
# arrows indicate depth of State
for i,key in enumerate(self.keys()):
# indent by depth -- NOTE: works for depth < 10
if key != 'ROOT':
indent = " "*(max(0, int(key.split("_")[-1][0])) if key != "ROOT" else 0)
print(f'{"> "*(len(indent)//2)}{key}')
print(f'{indent+" "}{self.subordinates(key) if key != "ROOT" else self.__getitem__(key):}')
def displayNested(self): # depth-first display of graph
print(self.graph['ROOT'][0][0]) # self.graph['root'] = [['state_00', num]]
self.__depthPrint(self.graph['ROOT'][0][0])
def getCost(self, state):
return state[1]
def currentCost(self, key):
cost = 0.0
state = self.graph[key]
while state[0] != 'ROOT':
state = self.superior(key)
cost += state[1]
key = state[0]
return cost
def nextQMove(self, state):
learning_rate = 0.1
discount_fact = 0.5
if type(state[0]) == list: # nextQMove returns `state` but __getitem__ returns `[state]`
state = state[0]
# find highest-reward move
current_cost = self.currentCost(state[0])
states = self.subordinates(state[0])
if len(states) == 0:
return None
next_state = max(enumerate(states), key=(lambda x: x[1]))
next_state = next_state[1]
# Bellman QLearning Equation - I *think* it's like this
# next_cost = currentCost(state[0]) +
# learning_rate * (next_state[1] + discount_fact*next_state[1] - current_cost)
# NOTE: why is the above necessary if I can get the cost of an action, and my
# current state cost?
return next_state
In [557]:
graph['root']
Out[557]:
In [567]:
graph = Graph(3, [2,3,1])
state = graph['root'][0]
policy_cost = 0.0
while state != None:
policy_cost += state[1]
print(state)
state = graph.nextQMove(state)
print(policy_cost)
Right. So it kind does its job. But that's not exactly what it's supposed to do, I think. A better implementation would have this in a full graph - which means I'd have to expand my Graph class to be more than just an n-ary tree. I could use the dictionary structure to build an in_vertex, out_vertex abstraction.
I'd also need to understand that Q-Learning equation a bit more. What does γ*max(Qt(st1,a) - Qt(st,at))
mean? Is it the option that adds the greatest reward? If so why is there an Rt1 term added to it? Isn't that the reward of the next action?
What is really meant by 'Old State'? I understand 'New State' ie: the new state.. but how do you add a reward value onto the Old State?
Anyway, I kinda get the gist of this toy problem. This was a good exercise in writing OOP code in Python.
Oh I get it now. Rt1 is the reward gotten for taking action a at state s (current state). It's then a sum of that reward with the maximum acheivable summed reward of all future actions & states, discounted, and minus the current states .. cost, or something like that. So yeah it does do a big bit of computation because for every move it calculates the potential of every possible subsequent move, and of those chooses the 'best' one. Got it.
scratch work below:
In [494]:
# https://stackoverflow.com/a/28906262
a = [3,2,1,4,5]
print(a)
print(list(enumerate(a)))
print(max(enumerate(a), key=(lambda x: x[1])))
In [495]:
graph = Graph(3, [2,3,1])
# graph = Graph(1, [1,1])
In [496]:
state = graph['root']
graph.nextQMove(state)
Out[496]:
In [497]:
graph.displayNested()
In [444]:
graph.currentCost(graph.subordinates('STATE_00')[0][0])
Out[444]:
In [445]:
graph.currentCost('STATE_00')
Out[445]:
In [432]:
graph.subordinates('STATE_00')[0]
Out[432]:
In [433]:
graph.superior('STATE_1A')
Out[433]:
In [434]:
graph.superior(['State_1a',2][0])
Out[434]:
In [435]:
graph.subordinates('STATE_00')[0][0]
Out[435]:
In [398]:
# essentially depth-first display
graph.displayNested()
In [396]:
# basically breadth-first display
graph.display()
In [333]:
for state in graph.subordinates(graph.getLevel(0)[0]):
print(state)
In [301]:
graph.keys()
Out[301]:
In [321]:
graph.display()
In [273]:
graph.getLevel(0)
Out[273]:
In [ ]:
In [ ]:
In [264]:
graph['STATE_1A']
Out[264]:
In [261]:
graph.display()
In [209]:
graph.keys()
Out[209]:
In [ ]:
[k]
In [235]:
depth = 0
states = [key for key in graph.keys() if key.split('_')[-1][0].isdigit() and int(key.split('_')[-1][0]) == depth]
states
Out[235]:
In [227]:
states = [key for key in graph.keys()]; states
Out[227]:
In [233]:
[key for key in states if key.split('_')[-1][0].isdigit()]
Out[233]:
In [232]:
states[1].split('_')[-1][0].isdigit()
Out[232]:
In [ ]:
In [208]:
graph['root'][0][0]
Out[208]:
In [65]:
graph['state_00']
Out[65]:
In [66]:
graph.graph
Out[66]:
In [67]:
graph.keys()
Out[67]:
In [68]:
graph['root']
Out[68]:
In [69]:
graph['state_00']
Out[69]:
In [70]:
graph['state_1A']
Out[70]:
In [71]:
graph['state_1B']
Out[71]:
In [72]:
graph.superior('state_00')
Out[72]:
Alrighty. Let's implement a 4-state deep graph with 3 actions at each level.
In [175]:
# action weights
actions = [3, 4, 5]
depth = 4
graph = Graph(depth, actions)
In [176]:
graph.graph
Out[176]:
In [177]:
graph.display()
In [ ]:
# def __populate(self):
# base_name = 'STATE_'
# for a in range(len(actions)):
# self.__addNode('STATE_00',
# 'base_name' + str(d+1) + chr(ord('A') + a),
# self.actions[a])
# for d in range(1, depth):
# for a in range(len(actions)):
# base_name = 'STATE_' + str(d) + chr(ord('A') + a)
# # TODO
# STATE_00
# \----STATE_1A
# \--------STATE_2AA
# \-------STATE_2AB
# \------STATE_2AC
# \---STATE_1B
# \--------STATE_2BA
# \-------STATE_2BB
# \------STATE_2BC
# \--STATE_1C
# \--------STATE_2CA
# \-------STATE_2CB
# \------STATE_2CC
# for d in range(self.depth):
# self.__addNode(
# node_name = self.__getitem__('root')[0][0]
# for d in range(self.depth):
# for a in range(len(self.actions)):
# self.__addNode(node_name, )
# self.__addNode('STATE_0' + str(d),
# 'STATE_' + str(d+1) + chr(ord('A') + a),
# self.actions[a])