In [37]:

    
%pylab inline









    



Populating the interactive namespace from numpy and matplotlib






    



WARNING: pylab import has clobbered these variables: ['ylabel', 'xlabel']
`%pylab --no-import-all` prevents importing * from pylab and numpy



In [38]:

    
from pandas import read_csv
import pandas
import glob
from collections import defaultdict
import seaborn as sns
import pandas as pd
import numpy as np

Evolvability Analysis



In [39]:

    
#treatment_pre={"fragile-nomut/":"Maze (No SA)","fragile-mut/":"Maze (SA)"}
#treatment_pre={"nomut-3t/":"Maze (No SA 3t)","mut-3t/":"Maze (SA 3t)"} #"mut-1t/":"Maze (SA 1t)","nomut-1t/":"Maze (No SA 1t)",,
#treatment_post={"control":" Ctrl","e2":" Extnct"}

treatment_pre={"r/gsa":""} #,"r/na":"NA"} #"mut-1t/":"Maze (SA 1t)","nomut-1t/":"Maze (No SA 1t)",,
treatment_post={"cnti1":"Control","exti1":"ExtinctionMost","exti2":"ExtinctionMore","exti3":"ExtinctionSome"}
#treatment_post_d={"cnti1":" Ctrl","exti1":" Ext1","exti2":" Ext2","exti3":" Ext3"}
treatment_post_d=treatment_post.keys()
treatment_post_d.sort()
treatments=[]

for a in treatment_pre:
    for b in treatment_post_d:
        treatments.append((a,b))



In [40]:

    
def compile_treatment(treat):
    #search="%sout-%s*.evo"% treat
    search="%s%s*.evo"% treat
    print search
    files=glob.glob(search)
    
    max_dict=defaultdict(list)
    maxx_dict=defaultdict(list)
    min_dict=defaultdict(list)
    mean_dict=defaultdict(list)
    samps=defaultdict(list)
    for fn in files:
        fsamps=defaultdict(list)
        data=read_csv(fn,header=None,sep=' ',names=["gen","eval","evo"])
        samples = data["gen"].unique()
        
        #if(samples[-1]!=3000000):
        #     continue
        print samples
        for x in samples:
          #if x>5000:
          #      continue
          evo_samp=data[data["gen"]==x]["evo"]
          #print x,evo_samp
          #print evo_samp.shape
          samps[x]+=list(evo_samp)
          fsamps[fn]+=list(evo_samp)
          maxx_dict[x].append(max(fsamps[fn]))
          max_dict[x].append(evo_samp.max())
          min_dict[x].append(evo_samp.min())
          mean_dict[x].append(evo_samp.median())

    dict_store = (max_dict.copy(),min_dict.copy(),mean_dict.copy())
    
    keys= max_dict.keys()
    keys.sort()
    for x in keys:
     max_dict[x]=sum(max_dict[x])/float(len(max_dict[x]))
     maxx_dict[x]=sum(maxx_dict[x])/float(len(maxx_dict[x]))
     min_dict[x]=sum(min_dict[x])/float(len(min_dict[x]))
     mean_dict[x]=sum(mean_dict[x])/float(len(mean_dict[x]))
    x=max_dict.keys()
    x.sort()
    
    samps_arr=np.zeros((len(x),len(samps[x[1]])))
    
    while len(samps[0])<len(samps[x[1]]):
        print len(samps[0]),len(samps[x[1]])
        samps[0]+=samps[0]
    samps[0]=samps[0][:len(samps[x[1]])]
    
    #print [len(samps[x[k]]) for k in range(len(x))]
    for k in range(len(x)):
     samps_arr[k,:] = np.array(samps[x[k]])
    means=np.array([mean_dict[k] for k in x])
    mins=np.array([min_dict[k] for k in x])
    maxs=np.array([max_dict[k] for k in x])
    return np.array(x),means,mins,maxs,samps_arr,dict_store



In [41]:

    
k=0
samps_tot=[]
final_samps=[]

final_means=[]
final_mins=[]
final_maxs=[]

store={}

title("Evolvability over time")

compiled_results=[]

for treat in treatments:
    print treat
    x,means,low,high,samps,store[treat]=compile_treatment(treat)
    compiled_results.append((x,means,high))

    #errorbar(x,means,yerr=numpy.vstack([low,high]),label=treat[0]+treat[1])
    samps=samps.transpose()
    final_samps.append(samps[:,-1])
    samps_tot.append(samps)
    k+=1
    t=treat
    label=treatment_pre[t[0]]+treatment_post[t[1]]
    #new_ax.set_title(treatment_pre[t[0]]+treatment_post[t[1]])
    #sns.boxplot(samps,ax=new_ax)   
    #plot(means,label=label)
    #plot(low,label=label)
    plot(high,label=label)
    #legend()

    final_means.append(means[-1])
    final_mins.append(low[-1])
    final_maxs.append(high[-1])
legend()
plt.figure()



#sns.boxplot(final_samps,names=[treatment_pre[t[0]]+treatment_post[t[1]] for t in treatments])

x=range(len(treatments))
plot(x,final_means,"go",label="median")
plot(x,final_mins,"ro",label="min")
plot(x,final_maxs,"bo",label="max")
xticks(range(len(treatments)),[treatment_pre[t[0]]+treatment_post[t[1]] for t in treatments])









    



('r/gsa', 'cnti1')
r/gsacnti1*.evo
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
20 5000
40 5000
80 5000
160 5000
320 5000
640 5000
1280 5000
2560 5000
('r/gsa', 'exti1')
r/gsaexti1*.evo
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
21 5000
42 5000
84 5000
168 5000
336 5000
672 5000
1344 5000
2688 5000
('r/gsa', 'exti2')
r/gsaexti2*.evo
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
21 5000
42 5000
84 5000
168 5000
336 5000
672 5000
1344 5000
2688 5000
('r/gsa', 'exti3')
r/gsaexti3*.evo
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
[   0  500 1000 1500 2000 2500 3000 3500 4000 4500 5000]
20 5000
40 5000
80 5000
160 5000
320 5000
640 5000
1280 5000
2560 5000






    Out[41]:





([<matplotlib.axis.XTick at 0x7f56c7a1b990>,
  <matplotlib.axis.XTick at 0x7f56c7a3f1d0>,
  <matplotlib.axis.XTick at 0x7f56c7999690>,
  <matplotlib.axis.XTick at 0x7f56c79a5790>],
 <a list of 4 Text xticklabel objects>)



In [42]:

    
max_data=[numpy.array(compiled_results[0][0])]+[numpy.array(z[2]) for z in compiled_results]
print len(max_data)
max_data=np.vstack(max_data).transpose()

mean_data=[numpy.array(compiled_results[0][0])]+[numpy.array(z[1]) for z in compiled_results]
print len(mean_data)
mean_data=np.vstack(mean_data).transpose()

cols=["generation"]+[treatment_post[treat[1]] for treat in treatments]
df1 = pandas.DataFrame(max_data,columns=cols)
df1.to_csv("gmaze-max.dat",sep=" ",index_label="indx")

df2 = pandas.DataFrame(mean_data,columns=cols)
df2.to_csv("gmaze-mean.dat",sep=" ",index_label="indx")


#speed = pd.Series([str(t) for t in treatments], name="treatment")
#sns.tsplot(numpy.dstack(samps_tot),condition=speed,err_style="boot_traces",ci=99.9999)

#legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3,
#       ncol=2, mode="expand", borderaxespad=0.)

5
5

Stat analyis



In [44]:

    
#print store
key=3000000
indx=2
import scipy.stats

for x in treatments:
    for y in treatments:
        
        xlabel=treatment_pre[x[0]]+treatment_post[x[1]]
        ylabel=treatment_pre[y[0]]+treatment_post[y[1]]
        xdat=store[x][indx][key]
        ydat=store[y][indx][key]
        xmean=float(sum(xdat))/len(xdat)
        ymean=float(sum(ydat))/len(ydat)
        print xlabel,xmean,ylabel,ymean,scipy.stats.ttest_ind(xdat,ydat)[1]









    



---------------------------------------------------------------------------
ZeroDivisionError                         Traceback (most recent call last)
<ipython-input-44-04f5ae7c1e4f> in <module>()
     11         xdat=store[x][indx][key]
     12         ydat=store[y][indx][key]
---> 13         xmean=float(sum(xdat))/len(xdat)
     14         ymean=float(sum(ydat))/len(ydat)
     15         print xlabel,xmean,ylabel,ymean,scipy.stats.ttest_ind(xdat,ydat)[1]

ZeroDivisionError: float division by zero

Niche Analysis



In [45]:

    
for treat in treatments:
 #files=glob.glob("%sout-%s*.log"% treat)
 files=glob.glob("%s%s*.log"% treat)
 for fn in files:
        fn_new=fn + ".new"
        a=open(fn).read()
        a=a.replace("(","").replace(")","")
        b=open(fn_new,"w").write(a)



In [46]:

    
#data=data[data["eval"]%50000==0]
#plot(data["eval"],data["niches"])

def compile_treatment_niche(treat):
    #files=glob.glob("%sout-%s*.log.new"% treat)
    files=glob.glob("%s%s*.log.new"% treat)
    datas=[]
    for fn in files:
        dat=read_csv(fn,header=None,sep=',',names=["eval","niches","entropy","complexity","fit","gen"])
        #if(dat.shape[0]<3000):
        #    continue
        datas.append(dat[:2000])
    return datas,files

"""
    for x in datas[0]["eval"].unique():   
     max_dict[x]=sum(max_dict[x])/float(len(max_dict[x]))

    x=max_dict.keys()
    x.sort()
    y=[max_dict[k] for k in x]
    return x,y
"""









    Out[46]:





'\n    for x in datas[0]["eval"].unique():   \n     max_dict[x]=sum(max_dict[x])/float(len(max_dict[x]))\n\n    x=max_dict.keys()\n    x.sort()\n    y=[max_dict[k] for k in x]\n    return x,y\n'



In [47]:

    
data=[]
for treat in treatments:
#for treat in [treatments[4]]:
    #print treat
    t=treat
    label=treatment_pre[t[0]]+treatment_post[t[1]]
    d,files=compile_treatment_niche(treat)
    x=d[0]["gen"]
    accum=d[0]
    key="niches"
    for k in range(1,len(d)):
      #print len(d[k][key]),files[k]
      accum[key]+=d[k][key]
    accum[key]/=len(d)
    plot(x,accum[key],label=label)
    data.append((x,accum[key],label))
title("Niches occupied")
#legend()
legend(loc=3,
       ncol=2, mode="expand", borderaxespad=0.)









    Out[47]:





<matplotlib.legend.Legend at 0x7f56c7bd8ad0>



In [48]:

    
data=[numpy.array(data[0][0])]+[numpy.array(z[1]) for z in data]
print len(data)
data=np.vstack(data).transpose()


cols=["generation"]+[treatment_post[treat[1]] for treat in treatments]
df1 = pandas.DataFrame(data,columns=cols)
df1.to_csv("gmaze-niches.dat",sep=" ",index_label="indx")



In [49]:

    
for treat in treatments:
    print treat
    d,files=compile_treatment_niche(treat)
    x=d[0]["gen"]
    key="complexity"
    y=np.vstack([d[k][key] for k in range(len(d))]).mean(0)
    t=treat
    label=treatment_pre[t[0]]+treatment_post[t[1]]
    plot(x[1:],y[1:],label=label)

title("avg " + key)
legend(loc=3,
       ncol=2, mode="expand", borderaxespad=0.)









    



('r/gsa', 'cnti1')
('r/gsa', 'exti1')
('r/gsa', 'exti2')
('r/gsa', 'exti3')






    Out[49]:





<matplotlib.legend.Legend at 0x7f56c7f6b890>



In [50]:

    
for treat in treatments:
    d,files=compile_treatment_niche(treat)
    x=d[0]["gen"]
    key="fit"
    y=np.vstack([d[k][key] for k in range(len(d))]).mean(0)
    t=treat
    label=treatment_pre[t[0]]+treatment_post[t[1]]
    plot(x[100:],y[100:],label=label)

title("avg " + key)
legend(loc=3,
       ncol=2, mode="expand", borderaxespad=0.)









    Out[50]:





<matplotlib.legend.Legend at 0x7f56c7da8650>



In [52]:

    
labels=[]
values=[]

for treat in treatments:
    #print treat
    t=treat
    globstr=treat[0]+"*"+treat[1]+"*solution.dat"
    label=treatment_pre[t[0]]+treatment_post[t[1]]
    value=len(glob.glob(globstr))/20.0
    print label,value
    labels.append(label)
    values.append(value)

bar(range(len(treatments)),values)
xticks([k+0.4 for k in range(len(treatments))],labels)
ylim(0,1)
title("Solution Probability")
#xticklabels(values)









    



Control 0.45
ExtinctionMost 0.55
ExtinctionMore 0.4
ExtinctionSome 0.45






    Out[52]:





<matplotlib.text.Text at 0x7f56c76bdd10>



In [53]:

    
def get_traits(fn):
    lines=open(fn).read().split("\n")[:-1]
    traits=[k for k in lines if k.find("trait")!=-1]
    return [[float(z) for z in t.split()[1:5]] for t in traits]

def get_links(fn):
    lines=open(fn).read().split("\n")[:-1]
    traits=[k for k in lines if k.find("gene")!=-1]
    return [[float(z) for z in t.split()[1:5]] for t in traits]

def avg_over(files,func,indx):
    return sum([func(k)[indx] for k in files])/len(files)

from collections import Counter
def avg_traits(fn):
    t=get_traits(fn)
    links=get_links(fn)
    
    total=float(len(links))
    tcount=Counter([x[0] for x in links])
    
    #print tcount
    avg_mod = sum([t[x][1]*tcount[x+1] for x in range(3)])/total #len(t)
    avg_skip = sum([t[x][2]*tcount[x+1] for x in range(3)])/total #len(t)
    avg_dummy = min(tcount[1],tcount[2],tcount[3])/total
    #avg_dummy = sum([t[x][3]*tcount[x+1] for x in range(3)])/total #len(t)
    
    """
    avg_mod = sum([z[1] for z in t])/len(t)
    avg_skip = sum([z[2] for z in t])/len(t)
    avg_dummy = sum([z[3] for z in t])/len(t)
    """
    
    return (avg_mod,avg_skip,avg_dummy)

for z in range(3):
 labels=[]
 values=[]
 for treat in treatments:
    t=treat
    label=treatment_pre[t[0]]+treatment_post[t[1]]
    
    gcmd=treat[0]+"*"+treat[1]+"*bestevo.dat"
    files=glob.glob(gcmd)
    
    value=avg_over(files,avg_traits,z)
    print label,value
    labels.append(label)
    values.append(value)
    
    #get_traits(files[0])
    #get_links(files[1])
    #print avg_traits(files[4])
 bar(range(len(treatments)),values)
 xticks([k+0.4 for k in range(len(treatments))],labels)
 ylim(0,1)
 title("%d Average" % z)
 plt.figure()









    



Control 0.206585298139
ExtinctionMost 0.159105712469
ExtinctionMore 0.185365957583
ExtinctionSome 0.225021031176
Control 0.144503819477
ExtinctionMost 0.445095330087
ExtinctionMore 0.294547737171
ExtinctionSome 0.225082563777
Control 0.0345029620657
ExtinctionMost 0.0861044976479
ExtinctionMore 0.0820423793332
ExtinctionSome 0.0622368186397






    












    












    












    





<matplotlib.figure.Figure at 0x7f56c74e6890>



In [13]:



In [13]: