Abstract for ICDL (March 23rd, 2015) Internship report
In [99]:
    
import cPickle
import matplotlib.pyplot as plt
import os
import sys
from numpy import array, mean, std, sqrt
sys.path.append('../')
from explaupoppydiva.drawer import Drawer
logs = '../logs/'
log_dirs = [
#'2015-04-18_02-11-25-explaupoppydiva-riac-cube2',
'2015-04-18_02-19-01-explaupoppydiva-riac-cube3',
#'2015-04-18_02-17-58-explaupoppydiva-riac-cube5',
#'2015-04-14_11-09-45-explaupoppydiva-discretized_progress-cube2',
#'2015-04-14_11-09-52-explaupoppydiva-discretized_progress-cube3',
#'2015-04-14_11-09-45-explaupoppydiva-random-cube2',
'2015-04-11_12-08-52_cube3',
#'2015-04-09_16-27-45_cube5',
            ]
iterations = 10000
eval_at = range(1, iterations + 1, iterations / 50)
    
In [100]:
    
import sys
import explaupoppydiva.config as config
sys.modules['config'] = config
pickled_logs = {}
for log_dir in log_dirs:
    for xp_dir in os.listdir(os.path.join(logs,log_dir)) + ['']:
        if os.path.isdir(os.path.join(logs, log_dir, xp_dir)):
        
            print os.path.join(logs, log_dir, xp_dir)
            for log_file in os.listdir(os.path.join(logs, log_dir, xp_dir)):
                file_path = os.path.join(logs, log_dir, xp_dir, log_file)
                if file_path.endswith('.pickle'):
                    #print file_path
                    try:
                        with open(file_path, 'r') as f:
                            pickled_logs[file_path] = cPickle.load(f)
                            f.close()                                         
                    except ValueError:
                        print "ValueError", file_path
    
    
In [101]:
    
%pylab inline
explorations = {}
for log_dir in log_dirs:
    explorations[log_dir] = {}
    fig_explo, ax = plt.subplots()
    fig_explo.canvas.set_window_title('Exploration comparison for ' + log_dir)
    for xp_dir in sorted(os.listdir(os.path.join(logs,log_dir))) + ['']:
        if os.path.isdir(os.path.join(logs, log_dir, xp_dir)):
        
            explo = {}
            #print os.path.join(logs, log_dir, xp_dir)
            for log_file in os.listdir(os.path.join(logs, log_dir, xp_dir)):
                file_path = os.path.join(logs, log_dir, xp_dir, log_file)
                if file_path.endswith('.pickle'):
                    #print file_path
                    if pickled_logs.has_key(file_path):
                        log = pickled_logs[file_path]
                        if len(log.explo) == len(log.config.eval_at):
                            explo[log_file] = array(log.explo)
            if len(explo.values()) > 0:
                l = len(explo.values()[0])
                #print log.config.name
                #print explo.values()
                #print l
                x = eval_at[:l]
                y = mean(array(explo.values()), axis=0)
                error = std(array(explo.values()), axis=0)
                error = error / sqrt(len(explo)) # Standard error of the mean
                color_cycle = ax._get_lines.color_cycle
                next_color = next(color_cycle)
                ax.plot(x, y, label = log.config.name, color=next_color)
                ax.fill_between(x, y-error, y+error, alpha=0.2, label = log.config.name, color = next_color)
                #ax.errorbar(eval_at[:l],, yerr=, label = log.config.name)
                
                explorations[log_dir][xp_dir] = explo
                
    
    
    fig_explo.show()
    
    plt.xlabel('Iterations', fontsize=18)
    plt.ylabel('Explored cells', fontsize=18)
    ax.legend(loc='lower right')
    fig_explo.set_size_inches(16,10)
    plt.savefig(logs + log_dir + '/explo-' + log_dir[0:-1] + '.png')
    plt.show()
    
    
    
    
    
In [102]:
    
end_explo = {}
for log_dir in explorations:
    end_explo[log_dir] = {}
    for xp in explorations[log_dir]:
        end_explo[log_dir][xp] = {}
        end_explo[log_dir][xp] = [explorations[log_dir][xp][key][-1] for key in explorations[log_dir][xp]]
    
In [103]:
    
for log_dir in end_explo:
    print log_dir
    for xp in end_explo[log_dir]:
        print "    ", xp
        print "        ", end_explo[log_dir][xp]
    
    
Mann-Whitney U test: http://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test
One tail / two tail: http://www.ats.ucla.edu/stat/mult_pkg/faq/general/tail_tests.htm
In [107]:
    
from scipy.stats import mannwhitneyu
x = end_explo['2015-04-18_02-19-01-explaupoppydiva-riac-cube3']['MS1-RIAC-seq_10000']
y = end_explo['2015-04-18_02-19-01-explaupoppydiva-riac-cube3']['SEQ-RIAC-seq_10000']
print mannwhitneyu(x, y)
    
    
In [33]:
    
from scipy.stats import mannwhitneyu
x = end_explo['2015-04-11_12-08-52_cube3']['MOTOR_BABBLING-seq_10000']
y = end_explo['2015-04-11_12-08-52_cube3']['MS1-GOAL-BABBLING-seq_10000']
print mannwhitneyu(x, y)
    
    
In [45]:
    
import cPickle
import matplotlib.pyplot as plt
import os
import sys
from numpy import array, mean, std, sqrt
sys.path.append('../')
from explaupoppydiva.drawer import Drawer
logs = '../logs/'
log_dirs = [
'2015-04-30_12-54-30-explaupoppydiva-tree-cube3'
            ]
iterations = 10000
eval_at = range(iterations / 10, iterations + 1, iterations / 10)
    
In [48]:
    
plot_explo = True
plot_explo_comp = True
explorations = {}
exploration_comp = {}
conditions_to_plot = [
                      'MOTOR_BABBLING-seq_10000',
                      'MS2-tree-seq_10000',
                      'SEQ-tree-seq_10000',
                      'TOP-DOWN-CMA-tree-seq_500',
                      'MS2-GOAL-BABBLING-seq_10000',
                      'SEQ-GOAL-BABBLING-seq_10000',
                      'TOP-DOWN-CMA-GOAL-BABBLING-seq_500',                      
                      ]
for log_dir in log_dirs:
    explorations[log_dir] = {}
    exploration_comp[log_dir] = {}
    if plot_explo:
        fig_explo, ax_explo = plt.subplots()
        fig_explo.canvas.set_window_title('Exploration comparison for ' + log_dir)
    
    if plot_explo_comp:
        fig_explo_comp, ax_explo_comp = plt.subplots()
        fig_explo_comp.canvas.set_window_title('ExploComp comparison for ' + log_dir)   
         
    for xp_dir in sorted(os.listdir(os.path.join(logs,log_dir))) + ['']:
        print xp_dir
        if xp_dir in conditions_to_plot and os.path.isdir(os.path.join(logs, log_dir, xp_dir)):
        
            comp = {}
            explo = {}
            explo_comp = {}
            print os.path.join(logs, log_dir, xp_dir)
            for log_file in os.listdir(os.path.join(logs, log_dir, xp_dir)):
                file_path = os.path.join(logs, log_dir, xp_dir, log_file)
                if file_path.endswith('.pickle'):
                    print file_path
                    try:
                        with open(file_path, 'r') as f:
                            log = cPickle.load(f)
                            f.close()
                            #print "explo", log.explo, "explocomp", log.explo_comp, "comp", log.eval_errors
                            
                            if plot_explo:
                                if len(log.explo) == len(eval_at):
                                    explo[log_file] = array(log.explo)
                                    
                            if plot_explo_comp:
                                if len(log.explo_comp) == len(eval_at):
                                    explo_comp[log_file] = array(log.explo_comp)
                                    
                    except ValueError:
                        print "ValueError"
            if plot_explo:
                if len(explo.values()) > 0:
                    print "Explo", mean(array(explo.values()), axis=0)
                    l = len(explo.values()[0])
                    print log.config.name
                    #print explo.values()
                    #print l
                    x = eval_at[:l]
                    y = mean(array(explo.values()), axis=0)
                    error = std(array(explo.values()), axis=0)
                    error = error / sqrt(len(explo)) # Standard error of the mean
                    color_cycle = ax_explo._get_lines.color_cycle
                    next_color = next(color_cycle)
                    ax_explo.plot(x, y, label = log.config.name, color=next_color)
                    if len(explo.values()) > 1:
                        ax_explo.fill_between(x, y-error, y+error, alpha=0.2, label = log.config.name, color = next_color)
                    #ax.errorbar(eval_at[:l],, yerr=, label = log.config.name)
                    
                    explorations[log_dir][xp_dir] = explo
                    
            if plot_explo_comp:
                if len(explo_comp.values()) > 0:
                    print "ExploComp", explo_comp.values()
                    l = len(explo_comp.values()[0])
                    print log.config.name
                    #print explo_comp.values()
                    #print l
                    x = eval_at[:l]
                    y = mean(array(explo_comp.values()), axis=0)
                    error = std(array(explo_comp.values()), axis=0)
                    error = error / sqrt(len(explo_comp)) # Standard error of the mean
                    color_cycle = ax_explo_comp._get_lines.color_cycle
                    next_color = next(color_cycle)
                    ax_explo_comp.plot(x, y, label = log.config.name, color=next_color)
                    if len(explo_comp.values()) > 1:
                        ax_explo_comp.fill_between(x, y-error, y+error, alpha=0.2, label = log.config.name, color = next_color)
                    #ax_comp.errorbar(eval_at[:l],, yerr=, label = log.config.name)
                
                    exploration_comp[log_dir][xp_dir] = explo_comp
                
    
    if plot_explo:
        fig_explo.show()
        ax_explo.legend(loc='upper left')
        #fig_explo.set_size_inches(19.2,12)
        plt.xlabel('Iterations', fontsize=18)
        plt.ylabel('Explored cells', fontsize=18)
        #plt.show(block=False)
        plt.savefig(logs + log_dir + '/explo-' + log_dir[0:-1] + '.png')
    
    
    if plot_explo_comp:
        fig_explo_comp.show()
        ax_explo_comp.legend(loc='upper left')
        #fig_explo_comp.set_size_inches(19.2,12)
        
        plt.xlabel('Iterations', fontsize=18)
        plt.ylabel('Reached cells', fontsize=18)
        #plt.show(block=False)
        plt.savefig(logs + log_dir + '/explo_comp-' + log_dir[0:-1] + '.png')
    plt.show()
    
    
    
    
In [54]:
    
end_explo = {}
for log_dir in explorations:
    end_explo[log_dir] = {}
    for xp in explorations[log_dir]:
        end_explo[log_dir][xp] = {}
        end_explo[log_dir][xp] = [explorations[log_dir][xp][key][-1] for key in explorations[log_dir][xp]]
print end_explo
end_explo_comp = {}
for log_dir in exploration_comp:
    end_explo_comp[log_dir] = {}
    for xp in exploration_comp[log_dir]:
        end_explo_comp[log_dir][xp] = {}
        end_explo_comp[log_dir][xp] = [exploration_comp[log_dir][xp][key][-1] for key in exploration_comp[log_dir][xp]]
print end_explo_comp
    
    
In [63]:
    
for log_dir in end_explo:
    print log_dir
    for xp in end_explo_comp[log_dir]:
        print "    ", xp
        print "        ", end_explo_comp[log_dir][xp]
    
    
In [61]:
    
from scipy.stats import mannwhitneyu
x = end_explo_comp['2015-04-30_12-54-30-explaupoppydiva-tree-cube3']['MOTOR_BABBLING-seq_10000']
y = end_explo_comp['2015-04-30_12-54-30-explaupoppydiva-tree-cube3']['TOP-DOWN-CMA-tree-seq_500']
print mannwhitneyu(x, y)
    
    
In [72]:
    
import cPickle
import matplotlib.pyplot as plt
from numpy import array, mean, std, sqrt
import os
import sys
from explaupoppydiva.drawer import Drawer
logs = '../logs/'
log_dirs = [
            '2015-05-30_16-11-48-Test-Arm-Seq',
            #'2015-05-30_15-30-09-Test-Arm-Seq',
            #'2015-05-30_14-44-11-Test-Arm-Seq',
            #'2015-05-27_17-21-18-Test-Arm-Seq',
#  '2015-05-27_15-07-24-Test-Arm-Seq',
#  '2015-05-26_19-58-11-Test-Arm-Seq'
            ]
conditions_to_plot = [
                      'Arm-Seq-MB-F-seq_5000',
                      'Arm-Seq-MB-H-seq_5000',
                    'Arm-Seq-GB-F-seq_5000',
                    'Arm-Seq-GB-H-seq_5000',
                    'Arm-Seq-GB-H-TD-seq_1240',   
                    'Arm-Seq-Tr-H-TD-seq_1240', 
                     'Arm-Seq-Tr-H-seq_5000', 
                     'Arm-Seq-Tr-F-seq_5000',            
                      ]
plot_explo = True
iterations = 5000
explorations = {}
for log_dir in log_dirs:
    explorations[log_dir] = {}
    
    if plot_explo:
        fig_explo, ax_explo = plt.subplots()
        fig_explo.canvas.set_window_title('Exploration comparison for ' + log_dir)
    
    
    for xp_dir in sorted(os.listdir(os.path.join(logs,log_dir)) + ['']):
        print xp_dir
        if xp_dir in conditions_to_plot and os.path.isdir(os.path.join(logs, log_dir, xp_dir)):
#             if xp_dir[-3:] == '840':                
#                 eval_at = range(1, iterations, 840/50)
#             else:
#                 eval_at = range(1, iterations, iterations/50)
            explo = {}
            explo_comp = {}
            comp = {}
            #print os.path.join(logs, log_dir, xp_dir)
            for log_file in os.listdir(os.path.join(logs, log_dir, xp_dir)):
                file_path = os.path.join(logs, log_dir, xp_dir, log_file)
                if file_path.endswith('.pickle'):
                    #print file_path
                    try:
                        with open(file_path, 'r') as f:
                            log = cPickle.load(f)
                            f.close()
                            
                            #print "explo", log.explo#, "explocomp", log.explo_comp, "comp", mean(array(log.eval_errors[0]))
                            
                            if plot_explo:
                                eval_at = range(1, iterations, iterations/len(log.explo))
                                explo[log_file] = array(log.explo)
#                                 if len(log.explo) == len(eval_at):
#                                     explo[log_file] = array(log.explo)
#                                 elif len(log.explo) == 2*len(log.config.eval_at):
#                                     explo[log_file] = array(log.explo)[range(0,len(log.explo),2)]
#                                 else:
#                                     print "Warning:", len(log.explo), len(eval_at)
                                    
                    except ValueError:
                        print "ValueError"
            if plot_explo:
                if len(explo.values()) > 0:
                    l = len(explo.values()[0])
                    #print log.config.name
                    #print explo.values()
                    #print l
                    if log.config.iter == 5000:
                        x = eval_at[:l]
                    else:
                        e2 = range(1,1240,1240/50)
                        v = array(e2)
                        v[v > 300] = (v[v>300]-300)*5 + 300
                        x = list(v)
                        #print len(x), x, mean(array(explo.values()), axis=0)
                    y = mean(array(explo.values()), axis=0)
                    error = std(array(explo.values()), axis=0)
                    error = error / sqrt(len(explo)) # Standard error of the mean
                    color_cycle = ax_explo._get_lines.color_cycle
                    next_color = next(color_cycle)
                    ax_explo.plot(x, y, label = log.config.name, color=next_color)
                    ax_explo.fill_between(x, y-error, y+error, alpha=0.2, label = log.config.name, color = next_color)
                    #ax.errorbar(eval_at[:l],, yerr=, label = log.config.name)
                    
                    explorations[log_dir][xp_dir] = explo
                
                
    
    if plot_explo:
        fig_explo.show()
        ax_explo.legend(loc='upper left')
        fig_explo.set_size_inches(19.2,12)
        plt.xlabel('Iterations', fontsize=18)
        plt.ylabel('Explored cells', fontsize=18)
        plt.ylim([0,900])
        plt.savefig(logs + log_dir + '/explo-' + log_dir[0:] + '.png')
    
    
plt.show()
    
    
    
In [73]:
    
end_explo = {}
for log_dir in explorations:
    end_explo[log_dir] = {}
    for xp in explorations[log_dir]:
        end_explo[log_dir][xp] = {}
        end_explo[log_dir][xp] = [explorations[log_dir][xp][key][-1] for key in explorations[log_dir][xp]]
        
for log_dir in end_explo:
    print log_dir
    for xp in end_explo[log_dir]:
        print "    ", xp
        print "        ", end_explo[log_dir][xp]
    
    
In [85]:
    
from scipy.stats import mannwhitneyu
x = end_explo['2015-05-30_16-11-48-Test-Arm-Seq']['Arm-Seq-GB-H-seq_5000']
y = end_explo['2015-05-30_16-11-48-Test-Arm-Seq']['Arm-Seq-Tr-H-seq_5000']
u, p = mannwhitneyu(x, y)
print "U:", u, "p:", 2*p
    
    
In [87]:
    
import cPickle
import matplotlib.pyplot as plt
from numpy import array, mean, std, sqrt
import os
import sys
from explaupoppydiva.drawer import Drawer
logs = os.getenv('HOME') + '/scm/Flowers/explaupoppydiva/logs/'
log_dirs = [
'2015-05-27_11-51-43-ARM-DIVA',
#'2015-05-26_20-59-31-ARM-DIVA',
            ]
conditions_to_plot = [
                      'Arm-Diva-Tr-H-seq_3000',    
                      'Arm-Diva-GB-H-seq_3000',   
                      'Arm-Diva-GB-F-seq_3000',   
                      'Arm-Diva-MB-H-seq_3000',   
                      'Arm-Diva-Tr-F-seq_3000',   
                      'Arm-Diva-MB-F-seq_3000',               
                      ]
plot_explo = True
iterations = 3000
eval_at = range(1, iterations +1, iterations/10)
explorations = {}
for log_dir in log_dirs:
    explorations[log_dir] = {}
    
    if plot_explo:
        fig_explo, ax_explo = plt.subplots()
        fig_explo.canvas.set_window_title('Exploration comparison for ' + log_dir)
    
    if plot_explo_comp:
        fig_explo_comp, ax_explo_comp = plt.subplots()
        fig_explo_comp.canvas.set_window_title('ExploComp comparison for ' + log_dir)   
         
    if plot_comp:
        fig_comp, ax_comp = plt.subplots()
        fig_comp.canvas.set_window_title('Comp comparison for ' + log_dir)   
    
    for xp_dir in os.listdir(os.path.join(logs,log_dir)) + ['']:
        print xp_dir
        if xp_dir in conditions_to_plot and os.path.isdir(os.path.join(logs, log_dir, xp_dir)):
        
            explo = {}
            explo_comp = {}
            comp = {}
            #print os.path.join(logs, log_dir, xp_dir)
            for log_file in os.listdir(os.path.join(logs, log_dir, xp_dir)):
                file_path = os.path.join(logs, log_dir, xp_dir, log_file)
                if file_path.endswith('.pickle'):
                    #print file_path
                    try:
                        with open(file_path, 'r') as f:
                            log = cPickle.load(f)
                            f.close()
                            
                            #print "explo", log.explo, "explocomp"#, log.explo_comp, "comp", mean(array(log.eval_errors[0]))
                            
                            if plot_explo:
                                eval_at = range(1, iterations, iterations/len(log.explo))
                                explo[log_file] = array(log.explo)
#                                 if len(log.explo) == len(eval_at):
#                                     explo[log_file] = array(log.explo)
#                                 if len(log.explo) == 2*len(log.config.eval_at):
#                                     explo[log_file] = array(log.explo)[range(0,len(log.explo),2)]
#                                     
                    except ValueError:
                        print "ValueError"
            if plot_explo:
                if len(explo.values()) > 0:
                    l = len(explo.values()[0])
                    #print log.config.name
                    #print explo.values()
                    #print l
                    x = eval_at[:l]
                    y = mean(array(explo.values()), axis=0)
                    error = std(array(explo.values()), axis=0)
                    error = error / sqrt(len(explo)) # Standard error of the mean
                    color_cycle = ax_explo._get_lines.color_cycle
                    next_color = next(color_cycle)
                    ax_explo.plot(x, y, label = log.config.name, color=next_color)
                    ax_explo.fill_between(x, y-error, y+error, alpha=0.2, label = log.config.name, color = next_color)
                    #ax.errorbar(eval_at[:l],, yerr=, label = log.config.name)
                    
                    explorations[log_dir][xp_dir] = explo
                
    
    if plot_explo:
        fig_explo.show()
        ax_explo.legend(loc='upper left')
        fig_explo.set_size_inches(19.2,12)
        plt.xlabel('Iterations', fontsize=18)
        plt.ylabel('Explored cells', fontsize=18)
        plt.xlim([0,2700])
        plt.savefig(logs + log_dir + '/explo-' + log_dir[0:] + '.png')
    
    
plt.show()
    
    
    
In [90]:
    
end_explo = {}
for log_dir in explorations:
    end_explo[log_dir] = {}
    for xp in explorations[log_dir]:
        end_explo[log_dir][xp] = {}
        end_explo[log_dir][xp] = [explorations[log_dir][xp][key][-1] for key in explorations[log_dir][xp]]
        
for log_dir in end_explo:
    print log_dir
    for xp in end_explo[log_dir]:
        print "    ", xp
        print "        ", end_explo[log_dir][xp]
    
    
In [97]:
    
from scipy.stats import mannwhitneyu
x = end_explo['2015-05-27_11-51-43-ARM-DIVA']['Arm-Diva-GB-H-seq_3000']
y = end_explo['2015-05-27_11-51-43-ARM-DIVA']['Arm-Diva-Tr-H-seq_3000']
u, p = mannwhitneyu(x, y)
print "U:", u, "p:", 2*p
    
    
In [ ]: