In [1]:
import glob, re, sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
In [ ]:
In [2]:
results = []
for fname in glob.glob('iteration*/all_affinity*.summary'):
name = re.sub('\.summary','', fname)
name = re.sub('/all_affinity_','-',name)
for line in open(fname):
vals = line.split()
#testset, iteration, rmse, R, S
if len(vals) == 5:
vals += [0,0,0]
for i in xrange(2,8):
vals[i] = float(vals[i])
results.append([name,name.split('-')[0]]+vals)
In [3]:
results = pd.DataFrame(results,columns=('name','I','testset','iteration','rmse','R','S','aucpose','aucaff','top'))
In [4]:
results = results[((results.testset == 'all_affinity') | (results.testset == 'all_pose')) & (results.aucpose != 0) & (results.I != 'iteration6') ]
results100 = results[results.iteration == '100k']
In [5]:
results100.sort_values(by='rmse')[:5]
Out[5]:
In [6]:
results100.sort_values(by='aucpose',ascending=False)[:5]
Out[6]:
In [7]:
results100.sort_values(by='top',ascending=False)[:5]
Out[7]:
In [8]:
sns.jointplot(x='rmse',y='R',data=results100,alpha=.5,xlim=(1.5,2.5),ylim=(.25,.6))
Out[8]:
In [9]:
sns.jointplot(x='S',y='R',data=results100,alpha=.5)
Out[9]:
In [10]:
sns.jointplot(x='rmse',y='aucpose',data=results100,alpha=.5,xlim=(1.5,2.5))
Out[10]:
In [11]:
sns.jointplot(x='rmse',y='top',data=results100,alpha=.5,xlim=(1.5,2.5))
Out[11]:
In [12]:
sns.jointplot(x='aucpose',y='top',data=results100,alpha=.5)
Out[12]:
In [13]:
# thank you stack overflow
def is_pareto_efficient_dumb(costs):
"""
:param costs: An (n_points, n_costs) array
:return: A (n_points, ) boolean array, indicating whether each point is Pareto efficient
"""
is_efficient = np.ones(costs.shape[0], dtype = bool)
for i, c in enumerate(costs):
is_efficient[i] = np.all(np.any(costs>=c, axis=1))
return is_efficient
In [14]:
costs = np.array(results100[['rmse','aucpose','top','R']])
costs[:,1:] *= -1
pareto = is_pareto_efficient_dumb(costs)
In [15]:
p = results100[pareto]
plt.plot(p.rmse,p.aucpose,'o')
Out[15]:
In [16]:
r = results100[results100.testset == 'all_pose']
costs = np.array(r[['top','S']])*-1
pareto = is_pareto_efficient_dumb(costs)
p = r[pareto]
In [17]:
ra = results100[results100.testset == 'all_affinity']
costs = np.array(ra[['top','S']])*-1
paretoa = is_pareto_efficient_dumb(costs)
pa = ra[paretoa]
In [18]:
plt.plot(results100.top,results100.S,'o',alpha=.2)
plt.plot(p.top,p.S,'o')
plt.plot(pa.top,pa.S,'o',alpha=.5)
plt.xlabel('Top')
plt.ylabel('S')
plt.xlim(.5)
plt.ylim(.5)
Out[18]:
In [19]:
plt.plot(r.top,r.S,'o',alpha=.2)
plt.plot(p.top,p.S,'o')
#plt.plot(pa.top,pa.S,'o',alpha=.5)
plt.xlabel('Top')
plt.ylabel('S')
plt.xlim(.5)
plt.ylim(.5)
Out[19]:
In [20]:
p.sort_values('top')
Out[20]:
In [21]:
pa.sort_values('top')
Out[21]:
In [22]:
plt.figure(figsize=(12,12))
sns.lmplot(x='top',y='R',data=results100[results100.testset == 'all_pose'],fit_reg=False,hue='I',scatter_kws={'alpha':0.6})
plt.xlim(.5)
plt.ylim(.5)
plt.savefig('iterations.pdf',bbox_inches='tight')
In [23]:
sdata = pd.read_csv('search_data.csv') # created with extractres.py
In [24]:
sns.distplot(sdata.R)
Out[24]:
In [25]:
sns.distplot(sdata.top)
Out[25]:
In [26]:
sns.distplot(sdata.rmse)
Out[26]:
In [27]:
sdata.top.max()
Out[27]:
In [28]:
sdata.loc[:,['top','auc','R','rmse']]
Out[28]:
In [29]:
plt.plot(sdata.R,sdata.rmse,'o')
plt.xlim(.4,.6)
plt.ylim(1,2)
Out[29]:
In [36]:
results100[results100.name.str.startswith('iteration5')&results100.name.str.endswith('0_2') ].sort_values(by='R',ascending=False)
Out[36]:
In [ ]:
In [ ]: