In [1]:
import sys
sys.path.append('../')
from cogsci2017.environment.arm_diva_env import CogSci2017Environment
from cogsci2017.learning.supervisor import Supervisor
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
iterations = 2000
environment = CogSci2017Environment(gui=False, audio=False)
config = dict(m_mins=environment.conf.m_mins,
m_maxs=environment.conf.m_maxs,
s_mins=environment.conf.s_mins,
s_maxs=environment.conf.s_maxs)
agent = Supervisor(config, model_babbling="random", n_motor_babbling=1000, explo_noise=0.05)
In [2]:
for i in range(iterations):
if i % (iterations/10) == 0:
print "Iteration", i
context = environment.get_current_context()
m = agent.produce(context)
s = environment.update(m)
agent.perceive(s)
# s_traj = environment.diva_traj
# if s_traj is not None:
# if i < 100:
# plt.plot([-f[1] for f in s_traj], [-f[0] for f in s_traj], color="r", alpha=0.2)
# else:
# plt.plot([-f[1] for f in s_traj], [-f[0] for f in s_traj], color="b", alpha=0.2)
#
#plt.xlim([-11.25,-9.5])
#plt.ylim([-9.25, -7.5])
In [4]:
#for hs in human_sounds:
hs = "oiy"
agent.modules["mod13"].sm.sigma_expl = np.array([0.1]*28)
#print agent.modules["mod13"].sm.sigma_expl
m = agent.modules["mod13"].inverse(np.array(environment.human_sounds_traj_std[hs]), explore=True)
#print m
s = environment.update([0.]*21 + list(m))
s_traj = environment.diva_traj
#print s_traj
error = np.linalg.norm(np.array(environment.human_sounds_traj[hs]) - np.array([f[0] for f in s_traj[[0, 12, 24, 37, 49]]] + [f[1] for f in s_traj[[0, 12, 24, 37, 49]]]))
print "error", hs, error
plt.plot([-f[1] for f in s_traj], [-f[0] for f in s_traj], color="b", alpha=0.2)
plt.plot(- np.array(environment.human_sounds_traj[hs][5:]), -np.array(environment.human_sounds_traj[hs][:5]), lw=2)
plt.xlim([-11.25,-9.5])
plt.ylim([-9.25, -7.5])
Out[4]:
In [3]:
for i in range(100):
context = environment.get_current_context()[:4]
s_goal = [2. * np.random.random() - 1. for _ in range(10)]
m = agent.modules["mod10"].inverse(np.array(context + s_goal), explore=True)
m = [0.] * 21 + list(m)
s = environment.update(m)
if environment.produced_sound:
print environment.produced_sound
agent.perceive(s)
In [3]:
print agent.modules["mod10"].sm.model.imodel.fmodel.dataset.data[1]
In [3]:
# TEST EVAL
n_goals = 1
eval_results = {}
for region in [1]:
print
print "region", region
eval_results[region] = {}
for i in range(n_goals):
eval_results[region][i] = {}
#environment.reset_toys()
print environment.get_current_context()
print environment.current_toy1
for toy in ["toy1"]:
print
print toy
eval_results[region][i][toy] = {}
if toy == "toy1":
goal = [environment.current_toy1[0] * (1. - t) / 2. for t in [0., 0.3, 0.5, 0.8, 1.]] + \
[environment.current_toy1[1] * (1. - t) / 2. for t in [0., 0.3, 0.5, 0.8, 1.]]
arm_mid = "mod3"
diva_mid = "mod10"
elif toy == "toy2":
goal = [environment.current_toy2[0] * (1. - t) / 2. for t in [0., 0.3, 0.5, 0.8, 1.]] + \
[environment.current_toy2[1] * (1. - t) / 2. for t in [0., 0.3, 0.5, 0.8, 1.]]
arm_mid = "mod4"
diva_mid = "mod11"
elif toy == "toy3":
goal = [environment.current_toy3[0] * (1. - t) / 2. for t in [0., 0.3, 0.5, 0.8, 1.]] + \
[environment.current_toy3[1] * (1. - t) / 2. for t in [0., 0.3, 0.5, 0.8, 1.]]
arm_mid = "mod5"
diva_mid = "mod12"
print "goal", i, goal
print "context", environment.get_current_context()
context = list(agent.modules[arm_mid].get_c(environment.get_current_context()))
dists, _ = agent.modules[arm_mid].sm.model.imodel.fmodel.dataset.nn_y(context+goal)
arm_dist = dists[0]
if len(agent.modules[diva_mid].sm.model.imodel.fmodel.dataset) > 0:
context = list(agent.modules[diva_mid].get_c(environment.get_current_context()))
dists, _ = agent.modules[diva_mid].sm.model.imodel.fmodel.dataset.nn_y(context+goal)
diva_dist = dists[0]
else:
diva_dist = np.inf
print "arm dist", arm_dist
print "diva_dist", diva_dist
if arm_dist < diva_dist:
m = agent.modules[arm_mid].inverse(np.array(context + goal), explore=False)
m = list(m) + [0.]*28
else:
m = agent.modules[diva_mid].inverse(np.array(context + goal), explore=False)
m = [0.]*21 + list(m)
s = environment.update(m)
print "s", s
if toy == "toy1":
reached = s[30:40]
elif toy == "toy2":
reached = s[40:50]
elif toy == "toy3":
reached = s[50:60]
comp_error = np.linalg.norm(np.array(reached) - np.array(goal))
print "reached", reached, "error", comp_error
eval_results[region][i][toy]["comp_error"] = comp_error
eval_results[region][i][toy]["arm_dist"] = arm_dist
eval_results[region][i][toy]["diva_dist"] = diva_dist
In [16]:
print environment.get_current_context()
print environment.current_toy1
In [ ]: