In [1]:
import glob, re, sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [ ]:


In [2]:
results = []
for fname in glob.glob('iteration*/all_affinity*.summary'):
    
    name = re.sub('\.summary','', fname)
    name = re.sub('/all_affinity_','-',name)
    for line in open(fname):
        vals = line.split()
        #testset, iteration, rmse, R, S
        if len(vals) == 5:
            vals += [0,0,0]
        for i in xrange(2,8):
            vals[i] = float(vals[i])

        results.append([name,name.split('-')[0]]+vals)

In [3]:
results = pd.DataFrame(results,columns=('name','I','testset','iteration','rmse','R','S','aucpose','aucaff','top'))

In [4]:
results = results[((results.testset == 'all_affinity') | (results.testset == 'all_pose')) & (results.aucpose != 0) & (results.I != 'iteration6') ]
results100 = results[results.iteration == '100k']

In [5]:
results100.sort_values(by='rmse')[:5]


Out[5]:
name I testset iteration rmse R S aucpose aucaff top
1605 iteration2-g1_p0_h0 iteration2 all_affinity 100k 1.650597 0.575963 0.579083 0.919870 0.771714 0.561282
2417 iteration5-32-32-32_7_1_1 iteration5 all_affinity 100k 1.662631 0.565374 0.560998 0.887932 0.746926 0.491492
2877 iteration4-3_32_0_7 iteration4 all_affinity 100k 1.662631 0.565374 0.560998 0.887932 0.746926 0.491492
1477 iteration2-g2_p0_h0 iteration2 all_affinity 100k 1.668115 0.567108 0.558524 0.915495 0.749532 0.551665
1637 iteration2-g1_p1_h0 iteration2 all_affinity 100k 1.670774 0.568650 0.571575 0.920238 0.805854 0.595314

In [6]:
results100.sort_values(by='aucpose',ascending=False)[:5]


Out[6]:
name I testset iteration rmse R S aucpose aucaff top
2577 iteration1-p1_rec0_astrat0_b1 iteration1 all_affinity 100k 1.732900 0.532962 0.550466 0.925966 0.785106 0.528977
2576 iteration1-p1_rec0_astrat0_b1 iteration1 all_pose 100k 1.749490 0.524212 0.540142 0.925966 0.785106 0.634279
1777 iteration2-g2_p2_h0 iteration2 all_affinity 100k 1.704713 0.558476 0.557435 0.922099 0.823510 0.610851
1776 iteration2-g2_p2_h0 iteration2 all_pose 100k 1.738621 0.546849 0.546129 0.922099 0.823510 0.618002
193 iteration3-none_relu_lr0.010 iteration3 all_affinity 100k 1.681941 0.550760 0.550246 0.921432 0.776856 0.567941

In [7]:
results100.sort_values(by='top',ascending=False)[:5]


Out[7]:
name I testset iteration rmse R S aucpose aucaff top
68 iteration3-batch_relu_lr0.010 iteration3 all_pose 100k 1.693488 0.553803 0.551250 0.916430 0.762815 0.656720
20 iteration3-batch_leaky_lr0.010 iteration3 all_pose 100k 1.702117 0.546954 0.535193 0.914155 0.761463 0.651788
636 iteration4.6-0.010_1_Adam_0.010 iteration4.6 all_pose 100k 1.749769 0.531438 0.534019 0.917711 0.770594 0.645869
452 iteration3-batch_tanh_lr0.010 iteration3 all_pose 100k 1.741043 0.512958 0.504249 0.887398 0.761664 0.641184
2400 iteration5-64-32-16_3_1_1 iteration5 all_pose 100k 1.721907 0.528304 0.516657 0.910429 0.742457 0.640937

In [8]:
sns.jointplot(x='rmse',y='R',data=results100,alpha=.5,xlim=(1.5,2.5),ylim=(.25,.6))


Out[8]:
<seaborn.axisgrid.JointGrid at 0x7f6830bb8fd0>

In [9]:
sns.jointplot(x='S',y='R',data=results100,alpha=.5)


Out[9]:
<seaborn.axisgrid.JointGrid at 0x7f682e9d28d0>

In [10]:
sns.jointplot(x='rmse',y='aucpose',data=results100,alpha=.5,xlim=(1.5,2.5))


Out[10]:
<seaborn.axisgrid.JointGrid at 0x7f682ea44550>

In [11]:
sns.jointplot(x='rmse',y='top',data=results100,alpha=.5,xlim=(1.5,2.5))


Out[11]:
<seaborn.axisgrid.JointGrid at 0x7f682e544710>

In [12]:
sns.jointplot(x='aucpose',y='top',data=results100,alpha=.5)


Out[12]:
<seaborn.axisgrid.JointGrid at 0x7f682e265490>

In [13]:
# thank you stack overflow

def is_pareto_efficient_dumb(costs):
    """
    :param costs: An (n_points, n_costs) array
    :return: A (n_points, ) boolean array, indicating whether each point is Pareto efficient
    """
    is_efficient = np.ones(costs.shape[0], dtype = bool)
    for i, c in enumerate(costs):
        is_efficient[i] = np.all(np.any(costs>=c, axis=1))
    return is_efficient

In [14]:
costs = np.array(results100[['rmse','aucpose','top','R']])
costs[:,1:] *= -1
pareto = is_pareto_efficient_dumb(costs)

In [15]:
p = results100[pareto]
plt.plot(p.rmse,p.aucpose,'o')


Out[15]:
[<matplotlib.lines.Line2D at 0x7f682e08a350>]

In [16]:
r = results100[results100.testset == 'all_pose']
costs = np.array(r[['top','S']])*-1
pareto = is_pareto_efficient_dumb(costs)
p = r[pareto]

In [17]:
ra = results100[results100.testset == 'all_affinity']
costs = np.array(ra[['top','S']])*-1
paretoa = is_pareto_efficient_dumb(costs)
pa = ra[paretoa]

In [18]:
plt.plot(results100.top,results100.S,'o',alpha=.2)

plt.plot(p.top,p.S,'o')
plt.plot(pa.top,pa.S,'o',alpha=.5)
plt.xlabel('Top')
plt.ylabel('S')
plt.xlim(.5)
plt.ylim(.5)


Out[18]:
(0.5, 0.6138508802164)

In [19]:
plt.plot(r.top,r.S,'o',alpha=.2)

plt.plot(p.top,p.S,'o')
#plt.plot(pa.top,pa.S,'o',alpha=.5)
plt.xlabel('Top')
plt.ylabel('S')
plt.xlim(.5)
plt.ylim(.5)


Out[19]:
(0.5, 0.5930056183938)

In [20]:
p.sort_values('top')


Out[20]:
name I testset iteration rmse R S aucpose aucaff top
1604 iteration2-g1_p0_h0 iteration2 all_pose 100k 1.678961 0.559362 0.559834 0.919870 0.771714 0.619729
1636 iteration2-g1_p1_h0 iteration2 all_pose 100k 1.704754 0.554418 0.556157 0.920238 0.805854 0.626387
1348 iteration2-g2_p1_h0 iteration2 all_pose 100k 1.701453 0.552951 0.552811 0.919297 0.801777 0.637731
68 iteration3-batch_relu_lr0.010 iteration3 all_pose 100k 1.693488 0.553803 0.551250 0.916430 0.762815 0.656720

In [21]:
pa.sort_values('top')


Out[21]:
name I testset iteration rmse R S aucpose aucaff top
1605 iteration2-g1_p0_h0 iteration2 all_affinity 100k 1.650597 0.575963 0.579083 0.919870 0.771714 0.561282
1637 iteration2-g1_p1_h0 iteration2 all_affinity 100k 1.670774 0.568650 0.571575 0.920238 0.805854 0.595314
1349 iteration2-g2_p1_h0 iteration2 all_affinity 100k 1.671064 0.566444 0.567625 0.919297 0.801777 0.606905
1777 iteration2-g2_p2_h0 iteration2 all_affinity 100k 1.704713 0.558476 0.557435 0.922099 0.823510 0.610851
1681 iteration2-g1_p4_h6 iteration2 all_affinity 100k 1.728620 0.551862 0.545324 0.917875 0.822993 0.612330
1873 iteration2-g2_p4_h0 iteration2 all_affinity 100k 1.773668 0.540626 0.540151 0.917587 0.832073 0.620715
1825 iteration2-g1_p4_h0 iteration2 all_affinity 100k 1.781914 0.528489 0.521414 0.916916 0.830906 0.630086

In [22]:
plt.figure(figsize=(12,12))
sns.lmplot(x='top',y='R',data=results100[results100.testset == 'all_pose'],fit_reg=False,hue='I',scatter_kws={'alpha':0.6})
plt.xlim(.5)
plt.ylim(.5)
plt.savefig('iterations.pdf',bbox_inches='tight')


<matplotlib.figure.Figure at 0x7f682df5bb90>

In [23]:
sdata = pd.read_csv('search_data.csv') # created with extractres.py

In [24]:
sns.distplot(sdata.R)


Out[24]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f682df18f50>

In [25]:
sns.distplot(sdata.top)


Out[25]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f682e055a90>

In [26]:
sns.distplot(sdata.rmse)


Out[26]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f682df7da90>

In [27]:
sdata.top.max()


Out[27]:
0.717632552404

In [28]:
sdata.loc[:,['top','auc','R','rmse']]


Out[28]:
top auc R rmse
0 0.613317 0.914527 0.533481 1.834271
1 0.624168 0.921237 0.533601 1.845174
2 0.634279 0.925966 0.524212 1.749490
3 0.620222 0.915519 0.541174 1.724415
4 0.620222 0.915429 0.533995 1.773011
5 0.615536 0.915351 0.524260 1.769684
6 0.625401 0.914728 0.547576 1.714522
7 0.637731 0.919297 0.552951 1.701453
8 0.617263 0.915837 0.538066 1.742812
9 0.614303 0.912644 0.519095 1.789310
10 0.615290 0.913756 0.527857 1.773695
11 0.621702 0.914838 0.534740 1.738086
12 0.622935 0.912770 0.553804 1.693084
13 0.616769 0.915495 0.560087 1.675865
14 0.638224 0.919930 0.519556 1.801691
15 0.618496 0.912853 0.535448 1.762682
16 0.619482 0.916119 0.537531 1.757615
17 0.619975 0.914318 0.523335 1.810894
18 0.615043 0.916189 0.523030 1.733958
19 0.618496 0.915021 0.519296 1.785828
20 0.619975 0.915519 0.541137 1.724470
21 0.629840 0.917853 0.537235 1.746628
22 0.619729 0.915976 0.533345 1.745007
23 0.631566 0.918258 0.538165 1.735999
24 0.616276 0.915368 0.539227 1.739500
25 0.625401 0.914734 0.529718 1.785219
26 0.606165 0.914652 0.541611 1.744224
27 0.639457 0.916916 0.515484 1.826143
28 0.623428 0.916626 0.535801 1.761180
29 0.619236 0.916240 0.536564 1.756000
... ... ... ... ...
180 0.688533 0.921800 0.545970 1.702716
181 0.694698 0.932548 0.563699 1.679349
182 0.658446 0.913884 0.546492 1.691058
183 0.696178 0.916064 0.545898 1.699379
184 0.708261 0.920734 0.576480 1.663244
185 0.696671 0.933664 0.586224 1.629425
186 0.717633 0.933611 0.579139 1.646743
187 0.702589 0.928618 0.536900 1.717798
188 0.694945 0.928164 0.575410 1.648389
189 0.679162 0.927451 0.542321 1.705006
190 0.697904 0.926328 0.549922 1.696687
191 0.696178 0.930033 0.583427 1.637986
192 0.679162 0.928100 0.554996 1.681855
193 0.684340 0.931879 0.585622 1.639307
194 0.684834 0.925412 0.557428 1.682890
195 0.684340 0.923530 0.541589 1.718924
196 0.700370 0.916046 0.531468 1.732090
197 0.672503 0.926666 0.562508 1.676187
198 0.688286 0.917091 0.537483 1.722038
199 0.702589 0.928618 0.536900 1.717798
200 0.679162 0.927451 0.542321 1.705006
201 0.686313 0.924801 0.553252 1.685514
202 0.677682 0.924081 0.567986 1.664400
203 0.674723 0.919748 0.559074 1.679121
204 0.680641 0.928221 0.588830 1.630622
205 0.694698 0.932548 0.563699 1.679349
206 0.708261 0.920734 0.576480 1.663244
207 0.717633 0.933611 0.579139 1.646743
208 0.702589 0.928618 0.536900 1.717798
209 0.697904 0.926328 0.549922 1.696687

210 rows × 4 columns


In [29]:
plt.plot(sdata.R,sdata.rmse,'o')
plt.xlim(.4,.6)
plt.ylim(1,2)


Out[29]:
(1, 2)

In [36]:
results100[results100.name.str.startswith('iteration5')&results100.name.str.endswith('0_2') ].sort_values(by='R',ascending=False)


Out[36]:
name I testset iteration rmse R S aucpose aucaff top
2497 iteration5-64-32-32_3_0_2 iteration5 all_affinity 100k 1.685173 0.556186 0.551134 0.871853 0.721267 0.500863
2496 iteration5-64-32-32_3_0_2 iteration5 all_pose 100k 1.681208 0.554549 0.548422 0.871853 0.721267 0.559309
2225 iteration5-32-32-32_7_0_2 iteration5 all_affinity 100k 1.689232 0.546925 0.546270 0.904420 0.750968 0.556104
2224 iteration5-32-32-32_7_0_2 iteration5 all_pose 100k 1.697964 0.542823 0.540535 0.904420 0.750968 0.619236
2256 iteration5-32-16-16_3_0_2 iteration5 all_pose 100k 1.699218 0.539285 0.533350 0.877813 0.717541 0.543773
2257 iteration5-32-16-16_3_0_2 iteration5 all_affinity 100k 1.704539 0.538173 0.532238 0.877813 0.717541 0.485080
2513 iteration5-64-32-32_7_0_2 iteration5 all_affinity 100k 1.713270 0.532251 0.528519 0.902869 0.738239 0.517633
2241 iteration5-64-32-16_3_0_2 iteration5 all_affinity 100k 1.706995 0.531288 0.532508 0.874228 0.710385 0.492478
2240 iteration5-64-32-16_3_0_2 iteration5 all_pose 100k 1.716807 0.527506 0.528589 0.874228 0.710385 0.561529
2289 iteration5-32-32-32_3_0_2 iteration5 all_affinity 100k 1.718968 0.526965 0.523700 0.884796 0.729079 0.490752
2288 iteration5-32-32-32_3_0_2 iteration5 all_pose 100k 1.712407 0.526708 0.522618 0.884796 0.729079 0.553637
2481 iteration5-32-16-16_7_0_2 iteration5 all_affinity 100k 1.728476 0.522732 0.516890 0.901181 0.747794 0.518619
2480 iteration5-32-16-16_7_0_2 iteration5 all_pose 100k 1.733871 0.521145 0.514545 0.901181 0.747794 0.617016
2512 iteration5-64-32-32_7_0_2 iteration5 all_pose 100k 1.742788 0.519979 0.512631 0.902869 0.738239 0.613317
2369 iteration5-64-32-16_7_0_2 iteration5 all_affinity 100k 1.743695 0.510761 0.511302 0.907768 0.754949 0.526757
2368 iteration5-64-32-16_7_0_2 iteration5 all_pose 100k 1.763878 0.499588 0.497190 0.907768 0.754949 0.613810

In [ ]:


In [ ]: