In [8]:
import os
os.environ['PYTHONASYNCIODEBUG'] = '1'
from agent import *
import asyncio
from qlearn import QLearn
from sarsa import Sarsa
import itertools
import functools
import json
import random
import sklearn
import collections
def merge(dicts):
    super_dict = collections.defaultdict(list)
    for d in dicts:
        for k, v in d.items():
            super_dict[k]+=v
    return super_dict
def tuple_namer(name,tupl):
    """convert an unnamed state tuple
    to a namedtuple object"""
    tupl_templ = collections.namedtuple(name, 'battery status neighbour')
    named = tupl_templ(battery = tupl[0], status = tupl[1], neighbour = tupl[2])
    return named
def dictionary_saver(d, filename):
    """d is a dictionary whose keys are of the form (namedtuple, 'string')"""
    json_friendly_d = {json.dumps(k):v for k,v in d.items()}
    sklearn.externals.joblib.dump(d, filename)

In [9]:
"""define our agents"""
#======states=========#
battery = range(6)
status = ['sleeping', 'running','recruiting', 'pending']
neighbour = ['sleeping', 'running']
all_vars = [battery,status, neighbour]
state_combinations = list(itertools.product(*all_vars))
#======actions========#
def go_to_sleep(old):
    new = old._replace(status = 'sleeping')
    if new.status!=old.status:
        broadcast_change(old, new)
    return new
def wakeup(old):
    new = old._replace(status = 'running')
    if new.status!=old.status:
        print('broadcasting change')
        broadcast_change(old, new)
    return new
def recruit(old, dest):
    new = old._replace(status = 'recruiting')
    return new
def noop(old):
    return old
def create_action_states(states):
    accept = wakeup
    deny = go_to_sleep
    actions_states_sleeping = {i:[noop, wakeup] for i in states if i.status=='sleeping'}
    actions_states_running = {i:[go_to_sleep, noop, recruit] for i in states if i.status == 'running'}
    actions_states_pending = {i:[deny, accept] for i in states if i.status=='pending'}
    actions_states_recruiting = {i:[noop] for i in states if i.status=='recruiting'}
    return merge([actions_states_sleeping, actions_states_running, actions_states_pending,actions_states_recruiting]) 
#####rewards###########
def state_rewards(state1, state2):
    if state2.battery == 0:
        return -10
    elif (state1.status == 'running' and state1.neighbour == 'running' and
         state2.status == 'running' and state2.neighbour == 'running'):
        return 2
    else:
        return 0
###agent 1###
states1 = [tuple_namer('Sensor1', i) for i in state_combinations]
initial_state1 = tuple_namer('Sensor1', (3,'running', 'sleeping'))
actions_states1 = create_action_states(states1)
agent1 = Agent(actions_states1, state_rewards, initial_state1, wakeup, Sarsa)
###agent 2###
states2 = [tuple_namer('Sensor2', i) for i in state_combinations]
initial_state2 = tuple_namer('Sensor2', (1,'sleeping', 'running'))
actions_states2 = create_action_states(states2)
agent2 = Agent(actions_states2, state_rewards, initial_state2, wakeup, Sarsa)


  File "<ipython-input-9-6d0535de1c65>", line 7
    state_combinations = list(itertools.product(*all_vars))e
                                                           ^
SyntaxError: invalid syntax

In [3]:
"""message passing between agents"""
qs = {'Sensor1':agent1.sensing_q, 'Sensor2':agent2.sensing_q}
def find_lead(qs,recruiter):
    """for recruiter, find potential helper"""
    all_candidates = [k for k in qs if k!=recruiter]
    print('all candidates: ', all_candidates)
    return all_candidates[0]
def broadcast_change(old_state, new_state):
    """gets called when a sensor changes
    from sleeping to awake, notifies the other
    sensors of this change"""
    def neighbor_changed(old_other, new_other,old_self):
        if old_other.status == 'pending':
            new_self = old_self._replace(status= 'running', neighbour=new_other.status)
        else:
            new_self = old_self._replace(neighbour=new_other.status)
        return new_self
    update_from = type(new_state).__name__
    print('update from: ', update_from)
    update_to = find_lead(qs, update_from)
    print('updating ', update_to, 'with info from ', update_from)
    neighbor_change_func = functools.partial(neighbor_changed,old_state, new_state)
    qs[update_to].put_nowait((0,neighbor_change_func))        
@asyncio.coroutine
def recruit_for(qs):
    """message broker that facilitates recruitment
    between sensors.qs is a dictionary of str, q pairs
    where str is the key describing which sensor the q belongs to"""
    def recruit_action(state):
        new_state = state._replace(status='pending')
        return new_state
    while True:
        recruiter = yield from recruiter_q.get()
        target = find_lead(qs, recruiter)
        print(recruiter, ' trying to recruit ', target)
        qs[target].put_nowait((1,recruit_action))

In [4]:
"""now define our environments"""
#=====autonomous actions=======#
@asyncio.coroutine
def battery_action(q):
    sunny = True
    def adjust_battery(is_sunny, sensor):
        if sensor.status =='sleeping':
            new_battery = sensor.battery + (1 + is_sunny*1)#increase by 1 if not sunny, by 2 if sunny
            sensor = sensor._replace(battery=new_battery)
        else:
            new_battery = sensor.battery - (2 - is_sunny*1)
            sensor = sensor._replace(battery=new_battery)
        if sensor.battery<=0:
            sensor = sensor._replace(battery=0, status ='sleeping')
        if sensor.battery>=5:
            sensor = sensor._replace(battery=5, status ='running')
        return sensor
    while True:
        if random.random()<0.1:
            sunny = not sunny
        adjust_battery_sunny = functools.partial(adjust_battery, sunny)
        yield from asyncio.sleep(2)
        yield from q.put((1,adjust_battery_sunny))
        print('sensing q: ', q.qsize())
#======reactions to agent actions==========#
def reaction_default(state1,state2, action):
    return state2
def recruiting_reaction(state1, state2, action):
    """try to recruit"""
    requester = type(state1).__name__
    recruiter_q.put_nowait(requester)
    final_state = state2#._replace(status='running')
    return final_state

env_reactions = {'go_to_sleep':reaction_default, 'wakeup':reaction_default,
                 'noop':reaction_default, 'accept': reaction_default,
                 'deny':reaction_default, 'recruit':recruiting_reaction}
env1 = Environment(env_reactions,[battery_action], agent1.sensing_q, agent1.action_q)
env2 = Environment(env_reactions,[battery_action], agent2.sensing_q, agent2.action_q)

In [5]:
"""now run the simulation"""
loop = asyncio.get_event_loop()
tasks = [agent1.experience_environment(), env1.react_to_action(),
         agent2.experience_environment(), env2.react_to_action(),
         recruit_for(qs)]
for i in env1.env_actions:
    tasks.append(i(env1.sensing_q))
for j in env2.env_actions:
    tasks.append(j(env2.sensing_q))
def loop_stopper():
    loop.stop()
loop.call_later(1000, loop_stopper) 
loop.run_until_complete(asyncio.wait(tasks))
loop.close()


got a state modifier  (1, <function wakeup at 0x7f21543066a8>)
previous:  Sensor2(battery=1, status='sleeping', neighbour='running') <function wakeup at 0x7f21543066a8>
broadcasting change
update from:  Sensor2
all candidates:  ['Sensor1']
updating  Sensor1 with info from  Sensor2
changed:  Sensor2(battery=1, status='running', neighbour='running')
change in state
available:  [<function go_to_sleep at 0x7f2179053598>, <function noop at 0x7f2154306840>, <function recruit at 0x7f2154306730>]
q:  [0.0, 0.0, 0.0]
learning:  Sensor2(battery=1, status='sleeping', neighbour='running') <function wakeup at 0x7f21543066a8> Sensor2(battery=1, status='running', neighbour='running') 0
got a state modifier  (1, <function wakeup at 0x7f21543066a8>)
previous:  Sensor1(battery=3, status='running', neighbour='sleeping') <function wakeup at 0x7f21543066a8>
changed:  Sensor1(battery=3, status='running', neighbour='sleeping')
no change in state Sensor1(battery=3, status='running', neighbour='sleeping') Sensor1(battery=3, status='running', neighbour='sleeping')
got a state modifier  (0, functools.partial(<function broadcast_change.<locals>.neighbor_changed at 0x7f21541dcbf8>, Sensor2(battery=1, status='sleeping', neighbour='running'), Sensor2(battery=1, status='running', neighbour='running')))
previous:  Sensor1(battery=3, status='running', neighbour='sleeping') <function wakeup at 0x7f21543066a8>
changed:  Sensor1(battery=3, status='running', neighbour='running')
change in state
available:  [<function go_to_sleep at 0x7f2179053598>, <function noop at 0x7f2154306840>, <function recruit at 0x7f2154306730>]
q:  [0.0, 0.0, 0.0]
learning:  Sensor1(battery=3, status='running', neighbour='sleeping') <function wakeup at 0x7f21543066a8> Sensor1(battery=3, status='running', neighbour='running') 0
update from:  Sensor1
all candidates:  ['Sensor2']
updating  Sensor2 with info from  Sensor1
put it on the q
got a state modifier  (0, functools.partial(<function broadcast_change.<locals>.neighbor_changed at 0x7f21541dcd08>, Sensor1(battery=3, status='running', neighbour='running'), Sensor1(battery=3, status='sleeping', neighbour='running')))
previous:  Sensor2(battery=1, status='running', neighbour='running') <function recruit at 0x7f2154306730>
changed:  Sensor2(battery=1, status='running', neighbour='sleeping')
change in state
available:  [<function go_to_sleep at 0x7f2179053598>, <function noop at 0x7f2154306840>, <function recruit at 0x7f2154306730>]
q:  [0.0, 0.0, 0.0]
learning:  Sensor2(battery=1, status='running', neighbour='running') <function recruit at 0x7f2154306730> Sensor2(battery=1, status='running', neighbour='sleeping') 0
got a state modifier  (0, functools.partial(<function Environment.modify_state.<locals>.env_and_action_modifier at 0x7f21541dcea0>, <function reaction_default at 0x7f21541ce0d0>, <function go_to_sleep at 0x7f2179053598>))
previous:  Sensor1(battery=3, status='running', neighbour='running') <function go_to_sleep at 0x7f2179053598>
update from:  Sensor1
all candidates:  ['Sensor2']
updating  Sensor2 with info from  Sensor1
changed:  Sensor1(battery=3, status='sleeping', neighbour='running')
change in state
available:  [<function noop at 0x7f2154306840>, <function wakeup at 0x7f21543066a8>]
q:  [0.0, 0.0]
learning:  Sensor1(battery=3, status='running', neighbour='running') <function go_to_sleep at 0x7f2179053598> Sensor1(battery=3, status='sleeping', neighbour='running') 0
got a state modifier  (0, functools.partial(<function broadcast_change.<locals>.neighbor_changed at 0x7f21541dcbf8>, Sensor1(battery=3, status='running', neighbour='running'), Sensor1(battery=3, status='sleeping', neighbour='running')))
previous:  Sensor2(battery=1, status='running', neighbour='sleeping') <function recruit at 0x7f2154306730>
changed:  Sensor2(battery=1, status='running', neighbour='sleeping')
no change in state Sensor2(battery=1, status='running', neighbour='sleeping') Sensor2(battery=1, status='running', neighbour='sleeping')
put it on the q
got a state modifier  (0, functools.partial(<function Environment.modify_state.<locals>.env_and_action_modifier at 0x7f21541dcd08>, <function reaction_default at 0x7f21541ce0d0>, <function noop at 0x7f2154306840>))
previous:  Sensor1(battery=3, status='sleeping', neighbour='running') <function noop at 0x7f2154306840>
changed:  Sensor1(battery=3, status='sleeping', neighbour='running')
no change in state Sensor1(battery=3, status='sleeping', neighbour='running') Sensor1(battery=3, status='sleeping', neighbour='running')
sensing q:  0
got a state modifier  (1, functools.partial(<function battery_action.<locals>.adjust_battery at 0x7f21541dca60>, True))
previous:  Sensor1(battery=3, status='sleeping', neighbour='running') <function noop at 0x7f2154306840>
changed:  Sensor1(battery=5, status='running', neighbour='running')
change in state
available:  [<function go_to_sleep at 0x7f2179053598>, <function noop at 0x7f2154306840>, <function recruit at 0x7f2154306730>]
q:  [0.0, 0.0, 0.0]
learning:  Sensor1(battery=3, status='sleeping', neighbour='running') <function noop at 0x7f2154306840> Sensor1(battery=5, status='running', neighbour='running') 0
update from:  Sensor1
all candidates:  ['Sensor2']
updating  Sensor2 with info from  Sensor1
put it on the q
got a state modifier  (0, functools.partial(<function broadcast_change.<locals>.neighbor_changed at 0x7f21541dc950>, Sensor1(battery=5, status='running', neighbour='running'), Sensor1(battery=5, status='sleeping', neighbour='running')))
previous:  Sensor2(battery=1, status='running', neighbour='sleeping') <function recruit at 0x7f2154306730>
changed:  Sensor2(battery=1, status='running', neighbour='sleeping')
no change in state Sensor2(battery=1, status='running', neighbour='sleeping') Sensor2(battery=1, status='running', neighbour='sleeping')
got a state modifier  (0, functools.partial(<function Environment.modify_state.<locals>.env_and_action_modifier at 0x7f21541dcea0>, <function reaction_default at 0x7f21541ce0d0>, <function go_to_sleep at 0x7f2179053598>))
previous:  Sensor1(battery=5, status='running', neighbour='running') <function go_to_sleep at 0x7f2179053598>
update from:  Sensor1
all candidates:  ['Sensor2']
updating  Sensor2 with info from  Sensor1
changed:  Sensor1(battery=5, status='sleeping', neighbour='running')
change in state
available:  [<function noop at 0x7f2154306840>, <function wakeup at 0x7f21543066a8>]
q:  [0.0, 0.0]
learning:  Sensor1(battery=5, status='running', neighbour='running') <function go_to_sleep at 0x7f2179053598> Sensor1(battery=5, status='sleeping', neighbour='running') 0
got a state modifier  (0, functools.partial(<function broadcast_change.<locals>.neighbor_changed at 0x7f21541dcbf8>, Sensor1(battery=5, status='running', neighbour='running'), Sensor1(battery=5, status='sleeping', neighbour='running')))
previous:  Sensor2(battery=1, status='running', neighbour='sleeping') <function recruit at 0x7f2154306730>
changed:  Sensor2(battery=1, status='running', neighbour='sleeping')
no change in state Sensor2(battery=1, status='running', neighbour='sleeping') Sensor2(battery=1, status='running', neighbour='sleeping')
put it on the q
sensing q:  0
got a state modifier  (0, functools.partial(<function Environment.modify_state.<locals>.env_and_action_modifier at 0x7f21541dc950>, <function reaction_default at 0x7f21541ce0d0>, <function noop at 0x7f2154306840>))
previous:  Sensor1(battery=5, status='sleeping', neighbour='running') <function noop at 0x7f2154306840>
changed:  Sensor1(battery=5, status='sleeping', neighbour='running')
no change in state Sensor1(battery=5, status='sleeping', neighbour='running') Sensor1(battery=5, status='sleeping', neighbour='running')
got a state modifier  (1, functools.partial(<function battery_action.<locals>.adjust_battery at 0x7f21541dcc80>, True))
previous:  Sensor2(battery=1, status='running', neighbour='sleeping') <function recruit at 0x7f2154306730>
changed:  Sensor2(battery=0, status='sleeping', neighbour='sleeping')
change in state
available:  [<function noop at 0x7f2154306840>, <function wakeup at 0x7f21543066a8>]
q:  [0.0, 0.0]
learning:  Sensor2(battery=1, status='running', neighbour='sleeping') <function recruit at 0x7f2154306730> Sensor2(battery=0, status='sleeping', neighbour='sleeping') -10
sensing q:  0
got a state modifier  (1, functools.partial(<function battery_action.<locals>.adjust_battery at 0x7f21541dca60>, True))
previous:  Sensor1(battery=5, status='sleeping', neighbour='running') <function noop at 0x7f2154306840>
changed:  Sensor1(battery=5, status='running', neighbour='running')
change in state
available:  [<function go_to_sleep at 0x7f2179053598>, <function noop at 0x7f2154306840>, <function recruit at 0x7f2154306730>]
q:  [0, 0.0, 0.0]
learning:  Sensor1(battery=5, status='sleeping', neighbour='running') <function noop at 0x7f2154306840> Sensor1(battery=5, status='running', neighbour='running') 0
sensing q:  0
got a state modifier  (1, functools.partial(<function battery_action.<locals>.adjust_battery at 0x7f21541dcc80>, True))
previous:  Sensor2(battery=0, status='sleeping', neighbour='sleeping') <function wakeup at 0x7f21543066a8>
changed:  Sensor2(battery=2, status='sleeping', neighbour='sleeping')
change in state
available:  [<function noop at 0x7f2154306840>, <function wakeup at 0x7f21543066a8>]
q:  [0.0, 0.0]
learning:  Sensor2(battery=0, status='sleeping', neighbour='sleeping') <function wakeup at 0x7f21543066a8> Sensor2(battery=2, status='sleeping', neighbour='sleeping') 0
sensing q:  0
got a state modifier  (1, functools.partial(<function battery_action.<locals>.adjust_battery at 0x7f21541dca60>, True))
previous:  Sensor1(battery=5, status='running', neighbour='running') <function recruit at 0x7f2154306730>
changed:  Sensor1(battery=4, status='running', neighbour='running')
change in state
available:  [<function go_to_sleep at 0x7f2179053598>, <function noop at 0x7f2154306840>, <function recruit at 0x7f2154306730>]
q:  [0.0, 0.0, 0.0]
learning:  Sensor1(battery=5, status='running', neighbour='running') <function recruit at 0x7f2154306730> Sensor1(battery=4, status='running', neighbour='running') 2
sensing q:  0
got a state modifier  (1, functools.partial(<function battery_action.<locals>.adjust_battery at 0x7f21541dcc80>, True))
previous:  Sensor2(battery=2, status='sleeping', neighbour='sleeping') <function noop at 0x7f2154306840>
changed:  Sensor2(battery=4, status='sleeping', neighbour='sleeping')
change in state
available:  [<function noop at 0x7f2154306840>, <function wakeup at 0x7f21543066a8>]
q:  [0.0, 0.0]
learning:  Sensor2(battery=2, status='sleeping', neighbour='sleeping') <function noop at 0x7f2154306840> Sensor2(battery=4, status='sleeping', neighbour='sleeping') 0
sensing q:  0
got a state modifier  (1, functools.partial(<function battery_action.<locals>.adjust_battery at 0x7f21541dca60>, True))
previous:  Sensor1(battery=4, status='running', neighbour='running') <function noop at 0x7f2154306840>
changed:  Sensor1(battery=3, status='running', neighbour='running')
change in state
available:  [<function go_to_sleep at 0x7f2179053598>, <function noop at 0x7f2154306840>, <function recruit at 0x7f2154306730>]
q:  [0, 0.0, 0.0]
learning:  Sensor1(battery=4, status='running', neighbour='running') <function noop at 0x7f2154306840> Sensor1(battery=3, status='running', neighbour='running') 2
sensing q:  0
got a state modifier  (1, functools.partial(<function battery_action.<locals>.adjust_battery at 0x7f21541dcc80>, False))
previous:  Sensor2(battery=4, status='sleeping', neighbour='sleeping') <function noop at 0x7f2154306840>
changed:  Sensor2(battery=5, status='running', neighbour='sleeping')
change in state
available:  [<function go_to_sleep at 0x7f2179053598>, <function noop at 0x7f2154306840>, <function recruit at 0x7f2154306730>]
q:  [0.0, 0.0, 0.0]
learning:  Sensor2(battery=4, status='sleeping', neighbour='sleeping') <function noop at 0x7f2154306840> Sensor2(battery=5, status='running', neighbour='sleeping') 0
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-5-3cf364da8a3f> in <module>()
     11     loop.stop()
     12 loop.call_later(1000, loop_stopper)
---> 13 loop.run_until_complete(asyncio.wait(tasks))
     14 loop.close()

/usr/lib/python3.4/asyncio/base_events.py in run_until_complete(self, future)
    302         future.add_done_callback(_run_until_complete_cb)
    303         try:
--> 304             self.run_forever()
    305         except:
    306             if new_task and future.done() and not future.cancelled():

/usr/lib/python3.4/asyncio/base_events.py in run_forever(self)
    274             while True:
    275                 try:
--> 276                     self._run_once()
    277                 except _StopError:
    278                     break

/usr/lib/python3.4/asyncio/base_events.py in _run_once(self)
   1115         if self._debug and timeout != 0:
   1116             t0 = self.time()
-> 1117             event_list = self._selector.select(timeout)
   1118             dt = self.time() - t0
   1119             if dt >= 1.0:

/usr/lib/python3.4/selectors.py in select(self, timeout)
    430             ready = []
    431             try:
--> 432                 fd_event_list = self._epoll.poll(timeout, max_ev)
    433             except InterruptedError:
    434                 return ready

KeyboardInterrupt: 

In [ ]:
agent1.learner.q

In [ ]:
b = collections.namedtuple('Named','status')
c = b(status = True)

In [ ]:
c = c._replace(status = False)

In [ ]:
c

In [ ]:


In [ ]: