In [1]:
%pylab inline
In [2]:
from pandas import read_csv
import pandas
import glob
from collections import defaultdict
import seaborn as sns
import pandas as pd
import numpy as np
In [3]:
#treatment_pre={"fragile-nomut/":"Maze (No SA)","fragile-mut/":"Maze (SA)"}
#treatment_pre={"nomut-3t/":"Maze (No SA 3t)","mut-3t/":"Maze (SA 3t)"} #"mut-1t/":"Maze (SA 1t)","nomut-1t/":"Maze (No SA 1t)",,
#treatment_post={"control":" Ctrl","e2":" Extnct"}
treatment_pre={"r/sa":""} #,"r/na":"NA"} #"mut-1t/":"Maze (SA 1t)","nomut-1t/":"Maze (No SA 1t)",,
treatment_post={"cnti1":"Control","exti1":"ExtinctionMost","exti2":"ExtinctionMore","exti3":"ExtinctionSome"}
#treatment_post_d={"cnti1":" Ctrl","exti1":" Ext1","exti2":" Ext2","exti3":" Ext3"}
treatment_post_d=treatment_post.keys()
treatment_post_d.sort()
treatments=[]
for a in treatment_pre:
for b in treatment_post_d:
treatments.append((a,b))
In [4]:
def compile_treatment(treat):
#search="%sout-%s*.evo"% treat
search="%s%s*.evo"% treat
print search
files=glob.glob(search)
max_dict=defaultdict(list)
maxx_dict=defaultdict(list)
min_dict=defaultdict(list)
mean_dict=defaultdict(list)
samps=defaultdict(list)
for fn in files:
fsamps=defaultdict(list)
data=read_csv(fn,header=None,sep=' ',names=["eval","evo"])
samples = data["eval"].unique()
if(samples[-1]!=3000000):
continue
for x in samples:
evo_samp=data[data["eval"]==x]["evo"]
#print evo_samp.shape
samps[x]+=list(evo_samp)
fsamps[fn]+=list(evo_samp)
maxx_dict[x].append(max(fsamps[fn]))
max_dict[x].append(evo_samp.max())
min_dict[x].append(evo_samp.min())
mean_dict[x].append(evo_samp.median())
dict_store = (max_dict.copy(),min_dict.copy(),mean_dict.copy())
keys= max_dict.keys()
keys.sort()
for x in keys:
max_dict[x]=sum(max_dict[x])/float(len(max_dict[x]))
maxx_dict[x]=sum(maxx_dict[x])/float(len(maxx_dict[x]))
min_dict[x]=sum(min_dict[x])/float(len(min_dict[x]))
mean_dict[x]=sum(mean_dict[x])/float(len(mean_dict[x]))
x=max_dict.keys()
x.sort()
samps_arr=np.zeros((len(x),len(samps[0])))
#print [len(samps[x[k]]) for k in range(len(x))]
for k in range(len(x)):
samps_arr[k,:] = np.array(samps[x[k]])
means=np.array([mean_dict[k] for k in x])
mins=np.array([min_dict[k] for k in x])
maxs=np.array([max_dict[k] for k in x])
return np.array(x),means,mins,maxs,samps_arr,dict_store
In [5]:
k=0
samps_tot=[]
final_samps=[]
final_means=[]
final_mins=[]
final_maxs=[]
store={}
title("Evolvability over time")
compiled_results=[]
for treat in treatments:
print treat
x,means,low,high,samps,store[treat]=compile_treatment(treat)
compiled_results.append((x,means,high))
#errorbar(x,means,yerr=numpy.vstack([low,high]),label=treat[0]+treat[1])
samps=samps.transpose()
final_samps.append(samps[:,-1])
samps_tot.append(samps)
k+=1
t=treat
label=treatment_pre[t[0]]+treatment_post[t[1]]
#new_ax.set_title(treatment_pre[t[0]]+treatment_post[t[1]])
#sns.boxplot(samps,ax=new_ax)
#plot(means,label=label)
#plot(low,label=label)
plot(high,label=label)
#legend()
final_means.append(means[-1])
final_mins.append(low[-1])
final_maxs.append(high[-1])
legend()
plt.figure()
#sns.boxplot(final_samps,names=[treatment_pre[t[0]]+treatment_post[t[1]] for t in treatments])
x=range(len(treatments))
plot(x,final_means,"go",label="median")
plot(x,final_mins,"ro",label="min")
plot(x,final_maxs,"bo",label="max")
xticks(range(len(treatments)),[treatment_pre[t[0]]+treatment_post[t[1]] for t in treatments])
Out[5]:
In [6]:
max_data=[numpy.array(compiled_results[0][0])]+[numpy.array(z[2]) for z in compiled_results]
print len(max_data)
max_data=np.vstack(max_data).transpose()
mean_data=[numpy.array(compiled_results[0][0])]+[numpy.array(z[1]) for z in compiled_results]
print len(mean_data)
mean_data=np.vstack(mean_data).transpose()
cols=["generation"]+[treatment_post[treat[1]] for treat in treatments]
df1 = pandas.DataFrame(max_data,columns=cols)
df1.to_csv("maze-max.dat",sep=" ",index_label="indx")
df2 = pandas.DataFrame(mean_data,columns=cols)
df2.to_csv("maze-mean.dat",sep=" ",index_label="indx")
#speed = pd.Series([str(t) for t in treatments], name="treatment")
#sns.tsplot(numpy.dstack(samps_tot),condition=speed,err_style="boot_traces",ci=99.9999)
#legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3,
# ncol=2, mode="expand", borderaxespad=0.)
In [7]:
#print store
key=3000000
indx=2
import scipy.stats
for x in treatments:
for y in treatments:
xlabel=treatment_pre[x[0]]+treatment_post[x[1]]
ylabel=treatment_pre[y[0]]+treatment_post[y[1]]
xdat=store[x][indx][key]
ydat=store[y][indx][key]
xmean=float(sum(xdat))/len(xdat)
ymean=float(sum(ydat))/len(ydat)
print xlabel,xmean,ylabel,ymean,scipy.stats.ttest_ind(xdat,ydat)[1]
In [8]:
for treat in treatments:
#files=glob.glob("%sout-%s*.log"% treat)
files=glob.glob("%s%s*.log"% treat)
for fn in files:
fn_new=fn + ".new"
a=open(fn).read()
a=a.replace("(","").replace(")","")
b=open(fn_new,"w").write(a)
In [9]:
#data=data[data["eval"]%50000==0]
#plot(data["eval"],data["niches"])
def compile_treatment_niche(treat):
#files=glob.glob("%sout-%s*.log.new"% treat)
files=glob.glob("%s%s*.log.new"% treat)
datas=[]
for fn in files:
dat=read_csv(fn,header=None,sep=',',names=["eval","niches","entropy","complexity","fit"])
if(dat.shape[0]<3000):
continue
datas.append(dat)
return datas,files
"""
for x in datas[0]["eval"].unique():
max_dict[x]=sum(max_dict[x])/float(len(max_dict[x]))
x=max_dict.keys()
x.sort()
y=[max_dict[k] for k in x]
return x,y
"""
Out[9]:
In [10]:
data=[]
for treat in treatments:
#for treat in [treatments[4]]:
#print treat
t=treat
label=treatment_pre[t[0]]+treatment_post[t[1]]
d,files=compile_treatment_niche(treat)
x=d[0]["eval"]
accum=d[0]
key="niches"
for k in range(1,len(d)):
#print len(d[k][key]),files[k]
accum[key]+=d[k][key]
accum[key]/=len(d)
plot(x,accum[key],label=label)
data.append((x,accum[key],label))
title("Niches occupied")
#legend()
legend(loc=3,
ncol=2, mode="expand", borderaxespad=0.)
Out[10]:
In [11]:
data=[numpy.array(data[0][0])]+[numpy.array(z[1]) for z in data]
print len(data)
data=np.vstack(data).transpose()
cols=["generation"]+[treatment_post[treat[1]] for treat in treatments]
df1 = pandas.DataFrame(data,columns=cols)
df1.to_csv("maze-niches.dat",sep=" ",index_label="indx")
In [10]:
for treat in treatments:
print treat
d,files=compile_treatment_niche(treat)
x=d[0]["eval"]
key="complexity"
y=np.vstack([d[k][key] for k in range(len(d))]).mean(0)
t=treat
label=treatment_pre[t[0]]+treatment_post[t[1]]
plot(x[1:],y[1:],label=label)
title("avg " + key)
legend(loc=3,
ncol=2, mode="expand", borderaxespad=0.)
Out[10]:
In [11]:
for treat in treatments:
d,files=compile_treatment_niche(treat)
x=d[0]["eval"]
key="fit"
y=np.vstack([d[k][key] for k in range(len(d))]).mean(0)
t=treat
label=treatment_pre[t[0]]+treatment_post[t[1]]
plot(x[1:],y[1:],label=label)
title("avg " + key)
legend(loc=3,
ncol=2, mode="expand", borderaxespad=0.)
Out[11]:
In [12]:
labels=[]
values=[]
for treat in treatments:
#print treat
t=treat
globstr=treat[0]+"*"+treat[1]+"*solution.dat"
label=treatment_pre[t[0]]+treatment_post[t[1]]
value=len(glob.glob(globstr))/40.0
print label,value
labels.append(label)
values.append(value)
bar(range(len(treatments)),values)
xticks([k+0.4 for k in range(len(treatments))],labels)
ylim(0,1)
title("Solution Probability")
#xticklabels(values)
Out[12]:
In [13]:
def get_traits(fn):
lines=open(fn).read().split("\n")[:-1]
traits=[k for k in lines if k.find("trait")!=-1]
return [[float(z) for z in t.split()[1:5]] for t in traits]
def get_links(fn):
lines=open(fn).read().split("\n")[:-1]
traits=[k for k in lines if k.find("gene")!=-1]
return [[float(z) for z in t.split()[1:5]] for t in traits]
def avg_over(files,func,indx):
return sum([func(k)[indx] for k in files])/len(files)
from collections import Counter
def avg_traits(fn):
t=get_traits(fn)
links=get_links(fn)
total=float(len(links))
tcount=Counter([x[0] for x in links])
#print tcount
avg_mod = sum([t[x][1]*tcount[x+1] for x in range(3)])/total #len(t)
avg_skip = sum([t[x][2]*tcount[x+1] for x in range(3)])/total #len(t)
avg_dummy = min(tcount[1],tcount[2],tcount[3])/total
#avg_dummy = sum([t[x][3]*tcount[x+1] for x in range(3)])/total #len(t)
"""
avg_mod = sum([z[1] for z in t])/len(t)
avg_skip = sum([z[2] for z in t])/len(t)
avg_dummy = sum([z[3] for z in t])/len(t)
"""
return (avg_mod,avg_skip,avg_dummy)
for z in range(3):
labels=[]
values=[]
for treat in treatments:
t=treat
label=treatment_pre[t[0]]+treatment_post[t[1]]
gcmd=treat[0]+"*"+treat[1]+"*bestevo.dat"
files=glob.glob(gcmd)
value=avg_over(files,avg_traits,z)
print label,value
labels.append(label)
values.append(value)
#get_traits(files[0])
#get_links(files[1])
#print avg_traits(files[4])
bar(range(len(treatments)),values)
xticks([k+0.4 for k in range(len(treatments))],labels)
ylim(0,1)
title("%d Average" % z)
plt.figure()
In [13]:
In [13]: