In [133]:
#%matplotlib inline
import glob
import json
import pandas.io as io
import pandas as pd
import numpy as np
import quaternion
import sklearn.preprocessing as pre
import pprint
import matplotlib.pyplot as plt
from collections import defaultdict
##Library based on statsmodel for information theory
import infotheo
class DataAnalyzer:
def __init__(self, folderPath, skill):
self.skill_being_learned = skill
self.initial = io.json.read_json( folderPath + "/INITIAL.json", typ='frame')
self.final = io.json.read_json( folderPath + "/FINAL.json", typ='frame')
#Deleting state column as not reqired for calculations
del self.initial['state']
del self.final['state']
print "Are the inital and final features equal : ", (self.initial.columns.values==self.final.columns.values).all()
self.readExpertKnowledgeBase()
number_of_demonstrations = self.initial['arm_joint_1'].count()
print("Data has been succesfully read. Nu of Demonstrations : ", number_of_demonstrations)
#Duplication rows if the Number of Demonstrations are less than 3
if (number_of_demonstrations < 3):
self.initial_copy = self.initial
self.final_copy = self.final
for i in range(number_of_demonstrations-3):
self.initial = self.initial.append(self.initial)
self.final = self.final.append(self.final)
number_of_demonstrations = self.initial['arm_joint_1'].count()
print("Data Duplication, No of Demonstrations : ", number_of_demonstrations)
print self.initial['arm_joint_1'][0], self.final['arm_joint_1'][0]
print self.initial['base_link_y'][0], self.final['base_link_y'][0]
def mad_based_outlier(self, points, thresh=3.5):
"""
http://stackoverflow.com/questions/22354094/pythonic-way-of-detecting-outliers-in-one-dimensional-observation-data
Removes outliers based on MAD.
Returns boolean map with false value in outliers
"""
if len(points.shape) == 1:
points = points[:,None]
median = np.median(points, axis=0)
diff = np.sum((points - median)**2, axis=-1)
diff = np.sqrt(diff)
med_abs_deviation = np.median(diff)
modified_z_score = 0.6745 * diff / med_abs_deviation
return modified_z_score > thresh
def readExpertKnowledgeBase(self):
'''
The function reads the knowledge base to extract the mappings between the
skill to features.
---------------------------------------------------------------
|skill \ Features || Feature1 | Feature2 | Feature3 | Feature4 |
---------------------------------------------------------------
|skill1 || * | | | |
|skill2 || | * | * | |
|skill3 || | | | * |
|skill4 || * | | * | * |
---------------------------------------------------------------
TODO :
how to represent the knoweldge base
how to intutively add the knowledge base
'''
#Database Path
self.skills_db = './knowledge_base/skills.csv'
self.robot_features_db = './knowledge_base/robot_features.csv'
self.environment_features_db = './knowledge_base/environment_features.csv'
self.templates_db = './knowledge_base/templates.csv'
#Reading knowledge Base
self.skills = []
self.robot_features = []
self.environment_features = []
templates = []
self.templates = defaultdict(list)
with open(self.skills_db, 'rb') as f:
for line in f:
self.skills.append(line.rstrip('\n'))
with open(self.robot_features_db, 'rb') as f:
for line in f:
self.robot_features.append(line.rstrip('\n'))
with open(self.environment_features_db, 'rb') as f:
for line in f:
self.environment_features.append(line.rstrip('\n'))
with open(self.templates_db, 'rb') as f:
for i, line in enumerate(f):
templates.append(line.rstrip('\n').split())
for temp in line.rstrip('\n').split():
self.templates[i].append(temp)
self.features_without_positions = {'arm_joint_1','arm_joint_2','arm_joint_3','arm_joint_4','arm_joint_5'}
self.manipulate_templates()
print "Database reading Complete:"
print "Skills : ",len(self.skills)," Robot Features : ",len(self.robot_features)
print "Environment Features : ",len(self.environment_features)," Templates : ", len(self.templates)
def manipulate_templates(self):
'''
The expert only specifies the feature name in the template
Internal replresentation conatins splitting data in x y z and orientation
This funciton splits the template feature name into internal representation
This function also adds the distance feature of all the features with the environment features.
'''
for i, temp in self.templates.iteritems():
for feature in temp[1:]:
if feature not in self.features_without_positions:
self.templates[i].remove(feature)
self.templates[i].extend([feature + "_x", feature + "_y", feature + "_z",
feature + "_ox",feature + "_oy",feature + "_oz", feature + "_ow"])
for env_feature in self.environment_features:
self.templates[i].append("d_linear_"+feature+"_"+env_feature)
def recommend_using_knowledge_base(self):
"""
After Calculation of the entropy.
The Templates for each action are used to determine the feature
with lowest entropy
"""
print "skill being learnt : ", self.skill_being_learned
for key, temp in self.templates.iteritems():
entropy_sum_template = 0
if self.skill_being_learned == temp[0]:
for feature in temp[1:]:
try:
entropy_sum_template += self.condEntropyFinalGivenInitial[feature]
except :
print "feature not available : ", feature
continue
print "FOR template : ", key, " Summed Entropy : ",entropy_sum_template
def dataManipulation(self):
'''
TODO : Remove this once the camera based object detector is ready
'''
#Assigning griper palm co - ordinates as the object point
self.final = self.final.assign(object_1_x = self.final.gripper_palm_link_x,
object_1_y = self.final.gripper_palm_link_y,
object_1_z = self.final.gripper_palm_link_z,
object_1_ox = self.final.gripper_palm_link_ox,
object_1_oy = self.final.gripper_palm_link_oy,
object_1_oz = self.final.gripper_palm_link_oz,
object_1_ow = self.final.gripper_palm_link_ow)
self.initial = self.initial.assign(object_1_x = self.final.gripper_palm_link_x,
object_1_y = self.final.gripper_palm_link_y,
object_1_z = self.final.gripper_palm_link_z,
object_1_ox = self.final.gripper_palm_link_oy, #mixing the pose to get different pose between initial and final
object_1_oy = self.final.gripper_palm_link_ox,
object_1_oz = self.final.gripper_palm_link_ow,
object_1_ow = self.final.gripper_palm_link_oz)
def dataCalculatingRelativeDistances(self, from_frame, to_frame):
'''
This function calculates all the relative distance between all the frames .
Both linear distance and angular distance.
'''
linear_distance = "d_linear_"+from_frame+"_"+to_frame
angular_distance = "d_angular_"+from_frame+"_"+to_frame
from_x = from_frame+"_x"
from_y = from_frame+"_y"
from_z = from_frame+"_z"
to_x = to_frame+"_x"
to_y = to_frame+"_y"
to_z = to_frame+"_z"
from_ox = from_frame+"_ox"
from_oy = from_frame+"_oy"
from_oz = from_frame+"_oz"
from_ow = from_frame+"_ow"
to_ox = to_frame+"_ox"
to_oy = to_frame+"_oy"
to_oz = to_frame+"_oz"
to_ow = to_frame+"_ow"
'''
self.final = self.final.assign(d_linear_m0_gripper = lambda x: np.sqrt((x.gripper_palm_link_x - x.object_1_x)**2 +
(x.gripper_palm_link_y - x.object_1_y)**2 +
(x.gripper_palm_link_z - x.object_1_z)**2 ))
self.initial = self.initial.assign(d_linear_m0_gripper = lambda x: np.sqrt((x.gripper_palm_link_x - x.object_1_x)**2 +
(x.gripper_palm_link_y - x.object_1_y)**2 +
(x.gripper_palm_link_z - x.object_1_z)**2 ))
'''
try :
# Calculating Distance between gripper and object and assigning
final_distance_col = np.sqrt((self.final[from_x] - self.final[to_x])**2 +
(self.final[from_y] - self.final[to_y])**2 +
(self.final[from_z] - self.final[to_z])**2 )
init_distance_col = np.sqrt((self.initial[from_x] - self.initial[to_x])**2 +
(self.initial[from_y] - self.initial[to_y])**2 +
(self.initial[from_z] - self.initial[to_z])**2 )
self.final[linear_distance] = final_distance_col
self.initial[linear_distance] = init_distance_col
#self.final = self.final.assign(linear_distance = final_distance_col)
#self.initial = self.initial.assign(linear_distance = init_distance_col)
# Calculating angular distances between the frames
diffAngle = []
for index, row in self.final.iterrows():
q0 = np.quaternion(row[to_ox], row[to_oy], row[to_oz], row[to_ow])
q1 = np.quaternion(row[from_ox], row[from_oy], row[from_oz], row[from_ow])
_ = q0.inverse()*q1
diffAngle.append(_.angle())
self.final[angular_distance] = diffAngle
diffAngle = []
for index, row in self.initial.iterrows():
q0 = np.quaternion(row[to_ox], row[to_oy], row[to_oz], row[to_ow])
q1 = np.quaternion(row[from_ox], row[from_oy], row[from_oz], row[from_ow])
_ = q0.inverse()*q1
diffAngle.append(_.angle())
self.initial[angular_distance] = diffAngle
except :
print "Feature not present in Readings : ", from_frame," ",to_frame
def createRelativeDistanceData(self):
'''
Calculates the relative distance between the features which are relevant
This is a general function to keep adding the features between which the
data has to be calculated.
The the distance between Robot features and environment features.
Features :
Robot Features :
"arm_link_0", "arm_link_1", "arm_link_2", "arm_link_3", "arm_link_4",
"arm_link_5", "gripper_palm_link", "gripper_finger_link_l","gripper_finger_link_r",
"base_footprint", "base_link", "wheel_link_bl", "wheel_link_br", "wheel_link_fl", "wheel_link_fr"
Environment Features :
"table_1","table_2","table_3","table_4","table_5","table_6","table_7","object_1"
'''
#TODO the base frame for TF transformation was taken as arm_link_1 so its missing in the
#data collection. So need to update .
robot_features = ["arm_link_0", "arm_link_2", "arm_link_3", "arm_link_4",
"arm_link_5", "gripper_palm_link", "gripper_finger_link_l","gripper_finger_link_r",
"base_footprint", "base_link", "wheel_link_bl", "wheel_link_br", "wheel_link_fl", "wheel_link_fr" ]
env_features = ["table_1","table_2","table_3","table_4","table_5","table_6","table_7"]
print "env : ",self.environment_features
for robot_feature in self.robot_features:
if robot_feature not in self.features_without_positions:
for env_feature in self.environment_features:
self.dataCalculatingRelativeDistances(robot_feature, env_feature)
print("Relative Distances are calculated . Data is ready for analysis")
print "Total number of features : ",len(self.initial.columns.values)
def discribeData(self):
#print "INTIAL DATA :", self.initial.describe()
#print "FINAL DATA :", self.final.describe()
self.initial.boxplot()
self.final.boxplot()
def discribeParameter(self, name):
print "Parameter :", name
print "INITIAL :", self.initial[name]
print "FINAL :", self.final[name]
#plt.savefig("/data/dataDeebul/rnd/RecommenderSystemInRobotics/experiments/" + name + "Box")
pdf, H, xedges, yedges = self.jointProbabilityDensityFunction(name)
px = pdf.sum(0)
py = pdf.sum(1)
print "pdf Initial Values, pdf Final Values :", px, py
print "check sumpx, sum py, sumpxpy : ", sum(px), sum(py), sum(px)+sum(py)
print "H_FinalGivenInitial : ", np.float16(infotheo.condentropy(py, px, pdf)), "H_InitialGivenFinal :", np.float16(infotheo.condentropy(px, py, pdf))
initialValue = np.asarray( self.initial[name] )
finalValue = np.asarray( self.final[name])
labels = list('IF')
plt.boxplot(np.vstack((initialValue,finalValue)).T, labels=labels)
plt.show()
#min_max_scaler = preprocessing.MinMaxScaler()
#initialValue = min_max_scaler.fit_transform(initialValue)
#finalValue = min_max_scaler.fit_transform(finalValue)
initialValue = np.around(initialValue, decimals=4)
finalValue = np.around(finalValue, decimals=4)
#Return a boolean map of data, with false at place of all outliers
#So replacing all the outliers with the std mean
initialValue[self.mad_based_outlier(initialValue)] = np.median(initialValue, axis=0)
finalValue[self.mad_based_outlier(finalValue)] = np.median(finalValue, axis=0)
myextent =[xedges[0],xedges[-1],yedges[0],yedges[-1]]
plt.imshow(H.T,origin='low',extent=myextent,interpolation='nearest',aspect='auto')
plt.plot(finalValue, initialValue,'ro')
plt.colorbar()
plt.ylabel("Initial Values")
plt.xlabel("Final Values")
plt.title("Parameter : "+name )
#plt.savefig("/data/dataDeebul/rnd/RecommenderSystemInRobotics/experiments/" + name + "JoinPDF")
plt.show()
xx = np.floor(100 * initialValue )
yy = np.floor(100 * finalValue )
def jointProbabilityDensityFunction(self, feature, bins=5):
"""
Creates the Joint probability distribution based on the initial
and final values of the feature.
"""
initialValue = np.asarray( self.initial[feature] )
finalValue = np.asarray( self.final[feature])
if (np.allclose(initialValue, finalValue)):
print "feature same final intial : ", feature
#min_max_scaler = preprocessing.MinMaxScaler()
#initialValue = min_max_scaler.fit_transform(initialValue)
#finalValue = min_max_scaler.fit_transform(finalValue)
initialValue = np.around(initialValue, decimals=3)
finalValue = np.around(finalValue, decimals=3)
#Return a boolean map of data, with false at place of all outliers
#So replacing all the outliers with the std mean
initialValue[self.mad_based_outlier(initialValue)] = np.median(initialValue, axis=0)
finalValue[self.mad_based_outlier(finalValue)] = np.median(finalValue, axis=0)
pre.normalize((initialValue,finalValue),copy=False)
#scalling both the axes on the same scale
#since intital and final values are measured of a single feature
#The histogram should be on same scale
#from larget value of both to smallest value of both
value_max = max(initialValue.max(), finalValue.max())
value_min = min(initialValue.min(), finalValue.min())
range_value = [[value_min, value_max],[value_min, value_max]]
H, xedges, yedges = np.histogram2d(finalValue, initialValue, bins=bins, range=range_value)
return H/float(len(initialValue)), H, xedges, yedges
def condEntropyFinalGivenInitial(self):
"""
Calculates the conditional entropy of the features. It calculates the
final entropy given initial
"""
self.condEntropyFinalGivenInitial = {}
for featureName in self.initial.columns.values:
pdf, _, __, ___ = self.jointProbabilityDensityFunction(featureName, 5)
#pdf of initial values
pInitial = pdf.sum(0)
#pdf of final values
pFinal = pdf.sum(1)
#Entropy of Final given Initial
self.condEntropyFinalGivenInitial[featureName] = np.float16(infotheo.condentropy( pFinal, pInitial, pdf))
#self.condEntropyInitialGivenFinal[featureName] = np.float16(infotheo.condentropy( pInitial, pFinal, pdf))
#print "Entropy : ",featureName," : ",self.condEntropyFinalGivenInitial[featureName]," : ", np.float16(infotheo.condentropy( pInitial, pFinal, pdf))
print sum( x == 0.0 for x in self.condEntropyFinalGivenInitial.values() )
return(self.condEntropyFinalGivenInitial)
def condEntropyInitialGivenFinal(self):
"""
Calculates the conditional entropy of the features. It calculates the
final entropy given initial
"""
if (self.initial.columns.values==self.final.columns.values).all() == False :
print ("Columns are not same. Error in data collection")
self.condEntropyInitialGivenFinal = {}
for featureName in self.initial.columns.values:
pdf, _, __, ___ = self.jointProbabilityDensityFunction(featureName, 5)
#pdf of initial values
pInitial = pdf.sum(0)
#pdf of final values
pFinal = pdf.sum(1)
#Entropy of Final given Initial
self.condEntropyInitialGivenFinal[featureName] = np.float16(infotheo.condentropy( pInitial, pFinal, pdf))
return(self.condEntropyInitialGivenFinal)
In [134]:
move_arm = DataAnalyzer("./movetoArmSingleDemo", "move_to")
move_arm.createRelativeDistanceData()
h_F_I = move_arm.condEntropyFinalGivenInitial()
move_arm.recommend_using_knowledge_base()
'''
h_I_F = move_arm.condEntropyInitialGivenFinal()
zeroEntropy = []
for key1, key2 in zip(h_F_I.keys(), h_I_F.keys()):
if h_F_I[key1] == 0.0 and h_I_F[key2] != 0.0:
zeroEntropy.append(key1)
pprint.pprint(sorted(zeroEntropy))
'''
Out[134]:
In [135]:
move_base_relative = DataAnalyzer("./movetoBaseRelativePosition", "move_to")
move_base_relative.createRelativeDistanceData()
h_F_I = move_base_relative.condEntropyFinalGivenInitial()
move_base_relative.recommend_using_knowledge_base()
'''
move_base_relative.discribeParameter('d_linear_base_footprint_object_1')
h_F_I = move_base_relative.condEntropyFinalGivenInitial()
h_I_F = move_base_relative.condEntropyInitialGivenFinal()
zeroEntropy = []
for key1, key2 in zip(h_F_I.keys(), h_I_F.keys()):
if h_F_I[key1] == 0.0 and h_I_F[key2] != 0.0:
zeroEntropy.append(key1)
pprint.pprint(sorted(zeroEntropy))
'''
Out[135]:
In [68]:
movebase = DataAnalyzer("./movetoBaseAbsolutePositionWithEnv")
#movebase.dataManipulation()
movebase.createRelativeDistanceData()
#movebase.condEntropyFinalGivenInitial()
#movebase.discretizeData()
#movebase.discribeParameter('base_footprint_y')
h_F_I = movebase.condEntropyFinalGivenInitial()
h_I_F = movebase.condEntropyInitialGivenFinal()
zeroEntropy = []
for key1, key2 in zip(h_F_I.keys(), h_I_F.keys()):
if h_F_I[key1] == 0.0 and h_I_F[key2] != 0.0:
zeroEntropy.append(key1)
pprint.pprint(sorted(zeroEntropy))
In [139]:
reachData = DataAnalyzer("./reach")
reachData.dataManipulation()
reachData.dataCalculatingRelativeDistances()
reachData.condEntropyFinalGivenInitial()
#reachData.discretizeData()
reachData.discribeParameter('arm_joint_2')
In [179]:
reachData.discribeParameter('arm_joint_1')
In [168]:
reachData.discribeParameter('d_linear_m0_gripper')
In [129]:
reachData.discribeParameter('marker_0_x')
In [176]:
#ceDict = reachData.condEntropyFinalGivenInitial()
zeroEntropy = []
for key in ceDict.keys():
if ceDict[key] == 0.0:
zeroEntropy.append(key)
pprint.pprint(sorted(zeroEntropy))
pd.DataFrame.to_latex(sorted(zeroEntropy))
In [22]:
toolTipAllign = DataAnalyzer("./reach")
toolTipAllign.dataManipulation()
toolTipAllign.dataCalculatingRelativeDistances()
toolTipAllign.condEntropyFinalGivenInitial()
In [140]:
toolTipAllign = DataAnalyzer("./toolTipAllign")
toolTipAllign.dataManipulation()
toolTipAllign.dataCalculatingRelativeDistances()
ceDict = toolTipAllign.condEntropyFinalGivenInitial()
zeroEntropy = []
for key in ceDict.keys():
if ceDict[key] == 0.0:
zeroEntropy.append(key)
pprint.pprint(sorted(zeroEntropy))
In [32]:
a = [ 0.07354736, 0.07354736, 0.07348633, 0.07354736, 0.07348633, 0.0736084,
0.07348633, 0.0736084, 0.07354736, 0.07348633, 0.07366943, 0.07354736]
b = [ 0.07354736, 0.07354736, 0.0736084, 0.07513428, 0.07354736, 0.07348633,
0.07446289, 0.07366943, 0.07501221, 0.07537842, 0.07354736, 0.07348633]
np.histogram2d(b,a,bins=5)
Out[32]:
In [174]:
from sklearn import preprocessing
data_start = np.arange(10,20.0)
data_start = np.array([5,5,5,5,5,5,5,5,5.0])
data_end = np.arange(0.1,1.1,0.1)
'''
data = np.append(data_start,data_end)
print data
min_max_scaler = preprocessing.MinMaxScaler()
data = min_max_scaler.fit_transform(data)
print data
data_start, data_end = np.split(data,2)
data_start = min_max_scaler.fit_transform(data_start)
data_end = min_max_scaler.fit_transform(data_end)
print data_start
print data_end
'''
data_max = max(data_start.max(), data_end.max())
data_min = min(data_end.min(), data_end.min())
print x_max, x_min
H, xedges, yedges = np.histogram2d(data_start, data_start, bins=5)
pdf = H/float(len(data_start))
px = pdf.sum(0)
py = pdf.sum(1)
print "pdf Initial Values, pdf Final Values :", px, py
print "check sumpx, sum py, sumpxpy : ", sum(px), sum(py), sum(px)+sum(py)
print "H_FinalGivenInitial : ", np.float16(infotheo.condentropy(py, px, pdf)), "H_InitialGivenFinal :", np.float16(infotheo.condentropy(px, py, pdf))
myextent =[xedges[0],xedges[-1],yedges[0],yedges[-1]]
plt.imshow(H.T,origin='low',extent=myextent,interpolation='nearest',aspect='auto')
plt.plot(data_start, data_start,'ro')
plt.colorbar()
plt.ylabel("Initial Values")
plt.xlabel("Final Values")
plt.show()