In [1]:
%load_ext autoreload
%autoreload 2

from matplotlib.pyplot import imshow
import matplotlib.pyplot as plt

%matplotlib inline
import numpy as np
import sys
sys.path.append('../')
import gp


Using gpu device 0: GeForce GTX TITAN (CNMeM is disabled, CuDNN 4007)
/home/d/nolearn/local/lib/python2.7/site-packages/theano/tensor/signal/downsample.py:6: UserWarning: downsample module has been moved to the theano.tensor.signal.pool module.
  "downsample module has been moved to the theano.tensor.signal.pool module.")

In [2]:
import cPickle as pickle

In [3]:
image, prob, gold, rhoana, bb = gp.Legacy.read_dojo_data()

In [4]:
def VI(gt, seg):
  # total_vi = 0
  slice_vi = []    
  for i in range(len(gt)):
      current_vi = gp.Util.vi(gt[i].astype(np.int64), seg[i].astype(np.int64))
      # total_vi += current_vi
      slice_vi.append(current_vi)
  # total_vi /= 10
  return np.mean(slice_vi), np.median(slice_vi), slice_vi

In [5]:
init_mean_vi = VI(gold, rhoana)[0]
init_median_vi = VI(gold, rhoana)[1]
init_vi_per_slice = VI(gold, rhoana)[2]

In [7]:
init_median_vi, init_mean_vi


Out[7]:
(0.4763612343909136, 0.51456962217066438)

In [25]:
for i in init_vi_per_slice:
    print i


0.456495226471
0.429392255747
0.418126257718
0.417671556086
0.490719699543
0.462002769239
0.585184084074
0.634014826312
0.65525434104
0.596835205477

In [9]:
FP_USERS = []
GP_USERS = []
FP_EXPERTS = []
GP_EXPERTS = []
with open('/home/d/GPSTUDY/study.csv', 'r') as f:
    lines = f.readlines()
for l in lines[1:]:
    line = l.strip('\n').split(',')
    userid = line[3]
    method = line[4]
    if method == 'FP':
        if userid.startswith('E'):
            FP_EXPERTS.append(userid)
        else:
            FP_USERS.append(userid)
    elif method == 'GP':
        if userid.startswith('E'):
            GP_EXPERTS.append(userid)
        else:
            GP_USERS.append(userid)

In [ ]:


In [46]:
gp_user_vis = []
for u in GP_USERS:
    with open('/home/d/GPSTUDY/'+u+'/corrections.p', 'rb') as f:
        corr = pickle.load(f)
    with open('/home/d/GPSTUDY/'+u+'/correction_vis.p', 'rb') as f:
        vi = pickle.load(f)
    
    gp_user_vis.append(vi)
       
gp_avg_vis = [0]*10
for i,u in enumerate(gp_user_vis):
    
    last_vi_of_user = gp_user_vis[i][-1]
    
    for z in range(10):
        gp_avg_vis[z] += last_vi_of_user[z]
        
for z in range(10):
    gp_avg_vis[z] /= 10

print 'Novices vi per slice GP'
for a in gp_avg_vis:
    print a


Novices vi per slice GP
0.379679360496
0.391228036599
0.397623271004
0.389320130602
0.446474380415
0.401875692025
0.488608036732
0.464579839411
0.454309260737
0.461899876767

In [45]:
fp_user_vis = []
for u in FP_USERS:
    with open('/home/d/GPSTUDY/'+u+'/corrections.p', 'rb') as f:
        corr = pickle.load(f)
    with open('/home/d/GPSTUDY/'+u+'/correction_vis.p', 'rb') as f:
        vi = pickle.load(f)
    
    fp_user_vis.append(vi)
       
fp_avg_vis = [0]*10
for i,u in enumerate(fp_user_vis):
    
    last_vi_of_user = fp_user_vis[i][-1]
    
    for z in range(10):
        fp_avg_vis[z] += last_vi_of_user[z]
        
for z in range(10):
    fp_avg_vis[z] /= 10

print 'Novices vi per slice FP'
for a in fp_avg_vis:
    print a


Novices vi per slice FP
0.437689437824
0.422487907361
0.418145124855
0.439276876747
0.471258802286
0.467612791902
0.593654308247
0.618820113411
0.661255030982
0.572448845896

In [51]:
dojo_best_user = [0.3764043166,
                  0.3516472472,
                  0.4079547444,
                  0.4530306854,
                  0.489459557,
                  0.4783714198,
                  0.4691797846,
                  0.4852945057,
                  0.4989719721,
                  0.4631116968]
print np.median(dojo_best_user)
dojo_avg_user = [0.4731860794,
                 0.4412143846,
                 0.4645102603,
                 0.4790327986,
                 0.5483534853,
                 0.5209529753,
                 0.5614397773,
                 0.5669964498,
                 0.6037881064,
                 0.5986637472]    
print np.median(dojo_avg_user)


0.4661457407
0.5346532303

In [124]:
# compare ANOVA for novices between GP FP and Dojo
from pyvttbl import DataFrame

df = DataFrame()
df['data'] = gp_avg_vis+fp_avg_vis+dojo_avg_user
  
df['conditions'] = ['GP']*len(gp_avg_vis) + ['FP']*len(fp_avg_vis) + ['Dojo']*len(dojo_avg_user)

aov_pyvttbl = df.anova1way('data', 'conditions')
print aov_pyvttbl

#
# now t-test
#
# Welch's t-test between GP and FP
from scipy.stats import ttest_ind, ttest_ind_from_stats
t, p = ttest_ind(gp_avg_vis, fp_avg_vis, equal_var=False)
print 'FP vs. GP', t,p
t, p = ttest_ind(gp_avg_vis, dojo_avg_user, equal_var=False)
print 'Dojo vs. GP', t,p


Anova: Single Factor on data

SUMMARY
Groups   Count    Sum    Average   Variance 
===========================================
Dojo        10   5.258     0.526      0.003 
FP          10   5.161     0.516      0.009 
GP          10   4.276     0.428      0.002 

O'BRIEN TEST FOR HOMOGENEITY OF VARIANCE
Source of Variation      SS       df      MS         F     P-value   eta^2   Obs. power 
=======================================================================================
Treatments            2.729e-04    2   1.365e-04   6.866     0.004   0.337        0.772 
Error                 5.367e-04   27   1.988e-05                                        
=======================================================================================
Total                 8.096e-04   29                                                    

ANOVA
Source of Variation    SS     df    MS       F     P-value   eta^2   Obs. power 
===============================================================================
Treatments            0.059    2   0.029   6.446     0.005   0.323        0.754 
Error                 0.123   27   0.005                                        
===============================================================================
Total                 0.181   29                                                

POSTHOC MULTIPLE COMPARISONS

Tukey HSD: Table of q-statistics
       Dojo      FP         GP    
=================================
Dojo   0      0.453 ns   4.607 ** 
FP            0          4.153 *  
GP                       0        
=================================
  + p < .10 (q-critical[3, 27] = 3.0301664694)
  * p < .05 (q-critical[3, 27] = 3.50576984879)
 ** p < .01 (q-critical[3, 27] = 4.49413305084)
FP vs. GP -2.7696124247 0.0168355778148
Dojo vs. GP -4.40690595721 0.000454694977827

In [125]:
#
# ANOVA automatic
#
gp_auto_vis = []

u='auto95GP_NEW'
with open('/home/d/GPSTUDY/'+u+'/corrections.p', 'rb') as f:
    corr = pickle.load(f)
with open('/home/d/GPSTUDY/'+u+'/correction_vis.p', 'rb') as f:
    vi = pickle.load(f)
    
gp_auto_vis.append(vi)
gp_auto_vis = gp_auto_vis[0][-1]

fp_auto_vis = []

u='auto95FP_NEW'
with open('/home/d/GPSTUDY/'+u+'/corrections.p', 'rb') as f:
    corr = pickle.load(f)
with open('/home/d/GPSTUDY/'+u+'/correction_vis.p', 'rb') as f:
    vi = pickle.load(f)
    
fp_auto_vis.append(vi)
fp_auto_vis = fp_auto_vis[0][-1]


df = DataFrame()
df['data'] = gp_auto_vis+fp_auto_vis
  
df['conditions'] = ['GP']*len(gp_auto_vis) + ['FP']*len(fp_auto_vis)

aov_pyvttbl = df.anova1way('data', 'conditions')
print aov_pyvttbl

t, p = ttest_ind(gp_auto_vis, fp_auto_vis, equal_var=False)
print t,p


Anova: Single Factor on data

SUMMARY
Groups   Count    Sum     Average   Variance 
============================================
FP          10   19.806     1.981      0.273 
GP          10   35.588     3.559      0.004 

O'BRIEN TEST FOR HOMOGENEITY OF VARIANCE
Source of Variation    SS     df    MS       F     P-value   eta^2   Obs. power 
===============================================================================
Treatments            0.361    1   0.361   4.545     0.047   0.202        0.476 
Error                 1.429   18   0.079                                        
===============================================================================
Total                 1.790   19                                                

ANOVA
Source of Variation     SS     df     MS       F       P-value    eta^2   Obs. power 
====================================================================================
Treatments            12.453    1   12.453   89.902   2.013e-08   0.833        0.971 
Error                  2.493   18    0.139                                           
====================================================================================
Total                 14.946   19                                                    

POSTHOC MULTIPLE COMPARISONS

Tukey HSD: Table of q-statistics
     FP      GP     
===================
FP   0    13.409 ** 
GP        0         
===================
  + p < .10 (q-critical[2, 18] = 2.4523362705)
  * p < .05 (q-critical[2, 18] = 2.9711524428)
 ** p < .01 (q-critical[2, 18] = 4.070729555)
9.48164755408 4.4790562398e-06

In [ ]:


In [126]:
#
# ANOVA oracle
#
gp_auto_vis = []

u='simuserGP_NEW'
with open('/home/d/GPSTUDY/'+u+'/corrections.p', 'rb') as f:
    corr = pickle.load(f)
with open('/home/d/GPSTUDY/'+u+'/correction_vis.p', 'rb') as f:
    vi = pickle.load(f)
    
gp_auto_vis.append(vi)
gp_auto_vis = gp_auto_vis[0][-1]

fp_auto_vis = []

u='simuserFP_NEW'
with open('/home/d/GPSTUDY/'+u+'/corrections.p', 'rb') as f:
    corr = pickle.load(f)
with open('/home/d/GPSTUDY/'+u+'/correction_vis.p', 'rb') as f:
    vi = pickle.load(f)
    
fp_auto_vis.append(vi)
fp_auto_vis = fp_auto_vis[0][-1]


df = DataFrame()
df['data'] = gp_auto_vis+fp_auto_vis
  
df['conditions'] = ['GP']*len(gp_auto_vis) + ['FP']*len(fp_auto_vis)

aov_pyvttbl = df.anova1way('data', 'conditions')
print aov_pyvttbl

t, p = ttest_ind(gp_auto_vis, fp_auto_vis, equal_var=False)
print t,p


Anova: Single Factor on data

SUMMARY
Groups   Count    Sum    Average   Variance 
===========================================
FP          10   3.706     0.371      0.002 
GP          10   3.550     0.355      0.001 

O'BRIEN TEST FOR HOMOGENEITY OF VARIANCE
Source of Variation      SS       df      MS         F     P-value   eta^2   Obs. power 
=======================================================================================
Treatments            4.858e-06    1   4.858e-06   2.177     0.157   0.108        0.285 
Error                 4.016e-05   18   2.231e-06                                        
=======================================================================================
Total                 4.502e-05   19                                                    

ANOVA
Source of Variation    SS     df    MS       F     P-value   eta^2   Obs. power 
===============================================================================
Treatments            0.001    1   0.001   0.795     0.384   0.042        0.140 
Error                 0.028   18   0.002                                        
===============================================================================
Total                 0.029   19                                                

POSTHOC MULTIPLE COMPARISONS

Tukey HSD: Table of q-statistics
     FP      GP    
==================
FP   0    1.261 ns 
GP        0        
==================
  + p < .10 (q-critical[2, 18] = 2.4523362705)
  * p < .05 (q-critical[2, 18] = 2.9711524428)
 ** p < .01 (q-critical[2, 18] = 4.070729555)
-0.891376325469 0.385687424343

In [131]:
#
# ANOVA experts
#
fp_user_vis = []
for u in FP_EXPERTS:
    with open('/home/d/GPSTUDY/'+u+'/corrections.p', 'rb') as f:
        corr = pickle.load(f)
    with open('/home/d/GPSTUDY/'+u+'/correction_vis.p', 'rb') as f:
        vi = pickle.load(f)
    
    fp_user_vis.append(vi)
       
fp_avg_vis = [0]*10
for i,u in enumerate(fp_user_vis):
    
    last_vi_of_user = fp_user_vis[i][-1]
    
    for z in range(10):
        fp_avg_vis[z] += last_vi_of_user[z]
        
for z in range(10):
    fp_avg_vis[z] /= 2

print 'Experts vi per slice FP'
for a in fp_avg_vis:
    print a
    
    
    
gp_user_vis = []
for u in GP_EXPERTS:
    with open('/home/d/GPSTUDY/'+u+'/corrections.p', 'rb') as f:
        corr = pickle.load(f)
    with open('/home/d/GPSTUDY/'+u+'/correction_vis.p', 'rb') as f:
        vi = pickle.load(f)
    
    gp_user_vis.append(vi)
       
gp_avg_vis = [0]*10
for i,u in enumerate(gp_user_vis):
    
    last_vi_of_user = gp_user_vis[i][-1]
    
    for z in range(10):
        gp_avg_vis[z] += last_vi_of_user[z]
        
for z in range(10):
    gp_avg_vis[z] /= 2

print 'Experts vi per slice GP'
for a in gp_avg_vis:
    print a
    
# compare ANOVA for novices between GP FP and Dojo
from pyvttbl import DataFrame

df = DataFrame()
df['data'] = gp_avg_vis+fp_avg_vis
  
df['conditions'] = ['GP']*len(gp_avg_vis) + ['FP']*len(fp_avg_vis)

aov_pyvttbl = df.anova1way('data', 'conditions')
print aov_pyvttbl

#
# now t-test
#
# Welch's t-test between GP and FP
from scipy.stats import ttest_ind, ttest_ind_from_stats
t, p = ttest_ind(gp_avg_vis, fp_avg_vis, equal_var=False)
print 'FP vs. GP', t,p


Experts vi per slice FP
0.406871541792
0.415759407634
0.393680750726
0.428640672751
0.411566463283
0.449489429525
0.548130054245
0.589139936178
0.638557268901
0.54587318108
Experts vi per slice GP
0.365237254649
0.391693247469
0.377953355518
0.361670020779
0.400695572365
0.379369643989
0.451711188713
0.443464853934
0.44277401823
0.414804241517
Anova: Single Factor on data

SUMMARY
Groups   Count    Sum    Average   Variance 
===========================================
FP          10   4.828     0.483      0.008 
GP          10   4.029     0.403      0.001 

O'BRIEN TEST FOR HOMOGENEITY OF VARIANCE
Source of Variation      SS       df      MS         F     P-value   eta^2   Obs. power 
=======================================================================================
Treatments            2.293e-04    1   2.293e-04   7.346     0.014   0.290        0.625 
Error                 5.619e-04   18   3.121e-05                                        
=======================================================================================
Total                 7.912e-04   19                                                    

ANOVA
Source of Variation    SS     df    MS       F     P-value   eta^2   Obs. power 
===============================================================================
Treatments            0.032    1   0.032   7.054     0.016   0.282        0.612 
Error                 0.081   18   0.005                                        
===============================================================================
Total                 0.113   19                                                

POSTHOC MULTIPLE COMPARISONS

Tukey HSD: Table of q-statistics
     FP     GP    
=================
FP   0    3.756 * 
GP        0       
=================
  + p < .10 (q-critical[2, 18] = 2.4523362705)
  * p < .05 (q-critical[2, 18] = 2.9711524428)
 ** p < .01 (q-critical[2, 18] = 4.070729555)
FP vs. GP -2.6559072376 0.0215798177333

In [ ]:


In [ ]:


In [132]:
#
# ANOVA oracle L CYLINDER
#
gp_auto_vis = []

u='simuserGP_NEW'
with open('/home/d/CYLINDERSTUDY/'+u+'/corrections.p', 'rb') as f:
    corr = pickle.load(f)
with open('/home/d/CYLINDERSTUDY/'+u+'/correction_vis.p', 'rb') as f:
    vi = pickle.load(f)
    
gp_auto_vis.append(vi)
gp_auto_vis = gp_auto_vis[0][-1]

fp_auto_vis = []

u='simuserFP_NEW'
with open('/home/d/CYLINDERSTUDY/'+u+'/corrections.p', 'rb') as f:
    corr = pickle.load(f)
with open('/home/d/CYLINDERSTUDY/'+u+'/correction_vis.p', 'rb') as f:
    vi = pickle.load(f)
    
fp_auto_vis.append(vi)
fp_auto_vis = fp_auto_vis[0][-1]


df = DataFrame()
df['data'] = gp_auto_vis+fp_auto_vis
  
df['conditions'] = ['GP']*len(gp_auto_vis) + ['FP']*len(fp_auto_vis)

aov_pyvttbl = df.anova1way('data', 'conditions')
print aov_pyvttbl

t, p = ttest_ind(gp_auto_vis, fp_auto_vis, equal_var=False)
print t,p


Anova: Single Factor on data

SUMMARY
Groups   Count    Sum     Average   Variance 
============================================
FP          50   15.066     0.301      0.006 
GP          50   15.266     0.305      0.005 

O'BRIEN TEST FOR HOMOGENEITY OF VARIANCE
Source of Variation      SS       df      MS         F     P-value     eta^2     Obs. power 
===========================================================================================
Treatments            2.015e-06    1   2.015e-06   0.010     0.919   1.056e-04        0.051 
Error                     0.019   98   1.946e-04                                            
===========================================================================================
Total                     0.019   99                                                        

ANOVA
Source of Variation      SS       df      MS         F     P-value     eta^2     Obs. power 
===========================================================================================
Treatments            3.997e-04    1   3.997e-04   0.071     0.790   7.274e-04        0.058 
Error                     0.549   98       0.006                                            
===========================================================================================
Total                     0.549   99                                                        

POSTHOC MULTIPLE COMPARISONS

Tukey HSD: Table of q-statistics
     FP      GP    
==================
FP   0    0.378 ns 
GP        0        
==================
  + p < .10 (q-critical[2, 98] = 2.34837405216)
  * p < .05 (q-critical[2, 98] = 2.80646078814)
 ** p < .01 (q-critical[2, 98] = 3.71504158171)
0.267084773462 0.789965487971

In [134]:
#
# ANOVA automatic L CYLINDER
#
gp_auto_vis = []

u='auto00GP_NEW'
with open('/home/d/CYLINDERSTUDY/'+u+'/corrections.p', 'rb') as f:
    corr = pickle.load(f)
with open('/home/d/CYLINDERSTUDY/'+u+'/correction_vis.p', 'rb') as f:
    vi = pickle.load(f)
    
gp_auto_vis.append(vi)
gp_auto_vis = gp_auto_vis[0][-1]

fp_auto_vis = []

u='auto95FP_NEW'
with open('/home/d/CYLINDERSTUDY/'+u+'/corrections.p', 'rb') as f:
    corr = pickle.load(f)
with open('/home/d/CYLINDERSTUDY/'+u+'/correction_vis.p', 'rb') as f:
    vi = pickle.load(f)
    
fp_auto_vis.append(vi)
fp_auto_vis = fp_auto_vis[0][-1]


df = DataFrame()
df['data'] = gp_auto_vis+fp_auto_vis
  
df['conditions'] = ['GP']*len(gp_auto_vis) + ['FP']*len(fp_auto_vis)

aov_pyvttbl = df.anova1way('data', 'conditions')
print aov_pyvttbl

t, p = ttest_ind(gp_auto_vis, fp_auto_vis, equal_var=False)
print t,p


Anova: Single Factor on data

SUMMARY
Groups   Count     Sum     Average   Variance 
=============================================
FP          50   133.467     2.669      0.635 
GP          50   174.927     3.499      0.654 

O'BRIEN TEST FOR HOMOGENEITY OF VARIANCE
Source of Variation     SS      df    MS       F     P-value     eta^2     Obs. power 
=====================================================================================
Treatments              0.009    1   0.009   0.003     0.954   3.353e-05        0.050 
Error                 267.188   98   2.726                                            
=====================================================================================
Total                 267.197   99                                                    

ANOVA
Source of Variation     SS     df     MS       F       P-value    eta^2   Obs. power 
====================================================================================
Treatments            17.189    1   17.189   26.676   1.265e-06   0.214        0.996 
Error                 63.148   98    0.644                                           
====================================================================================
Total                 80.337   99                                                    

POSTHOC MULTIPLE COMPARISONS

Tukey HSD: Table of q-statistics
     FP      GP    
==================
FP   0    7.304 ** 
GP        0        
==================
  + p < .10 (q-critical[2, 98] = 2.34837405216)
  * p < .05 (q-critical[2, 98] = 2.80646078814)
 ** p < .01 (q-critical[2, 98] = 3.71504158171)
5.16483674199 1.26548528992e-06

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [53]:
from pyvttbl import DataFrame

df = DataFrame()
df['data'] = init_vi_per_slice+gp_avg_vis+fp_avg_vis+dojo_avg_user
  
df['conditions'] = ['Init']*len(init_vi_per_slice) + ['GP']*len(gp_avg_vis) + ['FP']*len(fp_avg_vis) + ['Dojo']*len(dojo_avg_user)

aov_pyvttbl = df.anova1way('data', 'conditions')
print aov_pyvttbl


Anova: Single Factor on data

SUMMARY
Groups   Count    Sum    Average   Variance 
===========================================
Dojo        10   5.258     0.526      0.003 
FP          10   5.103     0.510      0.008 
GP          10   4.276     0.428      0.002 
Init        10   5.146     0.515      0.009 

O'BRIEN TEST FOR HOMOGENEITY OF VARIANCE
Source of Variation      SS       df      MS         F     P-value   eta^2   Obs. power 
=======================================================================================
Treatments            3.845e-04    3   1.282e-04   4.698     0.007   0.281        0.765 
Error                 9.820e-04   36   2.728e-05                                        
=======================================================================================
Total                     0.001   39                                                    

ANOVA
Source of Variation    SS     df    MS       F     P-value   eta^2   Obs. power 
===============================================================================
Treatments            0.061    3   0.020   3.696     0.020   0.235        0.681 
Error                 0.198   36   0.006                                        
===============================================================================
Total                 0.260   39                                                

POSTHOC MULTIPLE COMPARISONS

Tukey HSD: Table of q-statistics
       Dojo      FP        GP        Init   
===========================================
Dojo   0      0.662 ns   4.185 *   0.479 ns 
FP            0          3.523 +   0.183 ns 
GP                       0         3.706 +  
Init                               0        
===========================================
  + p < .10 (q-critical[4, 36] = 3.36095129998)
  * p < .05 (q-critical[4, 36] = 3.8088367871)
 ** p < .01 (q-critical[4, 36] = 4.72966194222)

In [ ]:
# with dojo
# There was a significant effect of which proofreading tool was used
# at the p<.05 level for the three conditions GP, FP, Dojo
# F(3,36) = 3.696, p = 0.02

In [54]:
from pyvttbl import DataFrame

df = DataFrame()
df['data'] = init_vi_per_slice+gp_avg_vis+fp_avg_vis
  
df['conditions'] = ['Init']*len(init_vi_per_slice) + ['GP']*len(gp_avg_vis) + ['FP']*len(fp_avg_vis)

aov_pyvttbl = df.anova1way('data', 'conditions')
print aov_pyvttbl


Anova: Single Factor on data

SUMMARY
Groups   Count    Sum    Average   Variance 
===========================================
FP          10   5.103     0.510      0.008 
GP          10   4.276     0.428      0.002 
Init        10   5.146     0.515      0.009 

O'BRIEN TEST FOR HOMOGENEITY OF VARIANCE
Source of Variation      SS       df      MS         F     P-value   eta^2   Obs. power 
=======================================================================================
Treatments            3.260e-04    2   1.630e-04   4.847     0.016   0.264        0.661 
Error                 9.080e-04   27   3.363e-05                                        
=======================================================================================
Total                     0.001   29                                                    

ANOVA
Source of Variation    SS     df    MS       F     P-value   eta^2   Obs. power 
===============================================================================
Treatments            0.048    2   0.024   3.872     0.033   0.223        0.582 
Error                 0.168   27   0.006                                        
===============================================================================
Total                 0.216   29                                                

POSTHOC MULTIPLE COMPARISONS

Tukey HSD: Table of q-statistics
       FP     GP        Init   
==============================
FP     0    3.319 +   0.173 ns 
GP          0         3.491 +  
Init                  0        
==============================
  + p < .10 (q-critical[3, 27] = 3.0301664694)
  * p < .05 (q-critical[3, 27] = 3.50576984879)
 ** p < .01 (q-critical[3, 27] = 4.49413305084)

In [ ]:
# without dojo
# There was a significant effect of which proofreading tool was used
# at the p<.05 level for the two conditions GP, FP
# F(2,27) = 3.872, p = 0.033

In [ ]:


In [58]:
# Welch's t-test between GP and FP
from scipy.stats import ttest_ind, ttest_ind_from_stats
t, p = ttest_ind(gp_avg_vis, fp_avg_vis, equal_var=False)
print t,p


-2.6267513896 0.0218107673055

In [ ]:
# Post hoc comparisons (after Bonferroni correction) indicate that the mean VI for results with GP was
# significantly lower than for FP
# (t_27 = -2.627, p = .0218)

In [ ]:


In [59]:
# t-test between GP and Dojo
# tada no surprise..
t, p = ttest_ind(gp_avg_vis, dojo_avg_user, equal_var=False)
print t,p


-4.40690595721 0.000454694977827

In [ ]:


In [ ]:


In [118]:
gp_auto_vis = []

u='auto95GP_NEW'
with open('/home/d/GPSTUDY/'+u+'/corrections.p', 'rb') as f:
    corr = pickle.load(f)
with open('/home/d/GPSTUDY/'+u+'/correction_vis.p', 'rb') as f:
    vi = pickle.load(f)
    
gp_auto_vis.append(vi)
gp_auto_vis = gp_auto_vis[0][-1]

fp_auto_vis = []

u='auto95FP_NEW'
with open('/home/d/GPSTUDY/'+u+'/corrections.p', 'rb') as f:
    corr = pickle.load(f)
with open('/home/d/GPSTUDY/'+u+'/correction_vis.p', 'rb') as f:
    vi = pickle.load(f)
    
fp_auto_vis.append(vi)
fp_auto_vis = fp_auto_vis[0][-1]


df = DataFrame()
df['data'] = init_vi_per_slice+gp_auto_vis+fp_auto_vis
  
df['conditions'] = ['Init']*len(init_vi_per_slice) + ['GP']*len(gp_auto_vis) + ['FP']*len(fp_auto_vis)

aov_pyvttbl = df.anova1way('data', 'conditions')
print aov_pyvttbl

t, p = ttest_ind(gp_auto_vis, fp_auto_vis, equal_var=False)
print t,p


Anova: Single Factor on data

SUMMARY
Groups   Count    Sum     Average   Variance 
============================================
FP          10   19.806     1.981      0.273 
GP          10   35.588     3.559      0.004 
Init        10    5.146     0.515      0.009 

O'BRIEN TEST FOR HOMOGENEITY OF VARIANCE
Source of Variation    SS     df    MS       F     P-value   eta^2   Obs. power 
===============================================================================
Treatments            0.473    2   0.237   4.468     0.021   0.249        0.633 
Error                 1.430   27   0.053                                        
===============================================================================
Total                 1.903   29                                                

ANOVA
Source of Variation     SS     df     MS        F       P-value    eta^2   Obs. power 
=====================================================================================
Treatments            46.357    2   23.179   243.346   5.357e-18   0.947        0.997 
Error                  2.572   27    0.095                                            
=====================================================================================
Total                 48.929   29                                                     

POSTHOC MULTIPLE COMPARISONS

Tukey HSD: Table of q-statistics
       FP      GP         Init    
=================================
FP     0    16.170 **   15.022 ** 
GP          0           31.192 ** 
Init                    0         
=================================
  + p < .10 (q-critical[3, 27] = 3.0301664694)
  * p < .05 (q-critical[3, 27] = 3.50576984879)
 ** p < .01 (q-critical[3, 27] = 4.49413305084)
-84.7394803804 1.09988653921e-22

In [121]:
gp_auto_vis = []

u='simuserGP_NEW'
with open('/home/d/GPSTUDY/'+u+'/corrections.p', 'rb') as f:
    corr = pickle.load(f)
with open('/home/d/GPSTUDY/'+u+'/correction_vis.p', 'rb') as f:
    vi = pickle.load(f)
    
gp_auto_vis.append(vi)
gp_auto_vis = gp_auto_vis[0][-1]

fp_auto_vis = []

u='simuserFP_NEW'
with open('/home/d/GPSTUDY/'+u+'/corrections.p', 'rb') as f:
    corr = pickle.load(f)
with open('/home/d/GPSTUDY/'+u+'/correction_vis.p', 'rb') as f:
    vi = pickle.load(f)
    
fp_auto_vis.append(vi)
fp_auto_vis = fp_auto_vis[0][-1]


df = DataFrame()
df['data'] = init_vi_per_slice+gp_auto_vis+fp_auto_vis
  
df['conditions'] = ['Init']*len(init_vi_per_slice) + ['GP']*len(gp_auto_vis) + ['FP']*len(fp_auto_vis)

aov_pyvttbl = df.anova1way('data', 'conditions')
print aov_pyvttbl

t, p = ttest_ind(gp_auto_vis, fp_auto_vis, equal_var=False)
print t,p


Anova: Single Factor on data

SUMMARY
Groups   Count    Sum    Average   Variance 
===========================================
FP          10   3.706     0.371      0.002 
GP          10   3.550     0.355      0.001 
Init        10   5.146     0.515      0.009 

O'BRIEN TEST FOR HOMOGENEITY OF VARIANCE
Source of Variation      SS       df      MS         F       P-value    eta^2   Obs. power 
==========================================================================================
Treatments            3.491e-04    2   1.746e-04   10.382   4.524e-04   0.435        0.873 
Error                 4.540e-04   27   1.681e-05                                           
==========================================================================================
Total                 8.031e-04   29                                                       

ANOVA
Source of Variation    SS     df    MS       F       P-value    eta^2   Obs. power 
==================================================================================
Treatments            0.155    2   0.077   19.696   5.306e-06   0.593        0.955 
Error                 0.106   27   0.004                                           
==================================================================================
Total                 0.261   29                                                   

POSTHOC MULTIPLE COMPARISONS

Tukey HSD: Table of q-statistics
       FP      GP        Init   
===============================
FP     0    0.787 ns   7.263 ** 
GP          0          8.050 ** 
Init                   0        
===============================
  + p < .10 (q-critical[3, 27] = 3.0301664694)
  * p < .05 (q-critical[3, 27] = 3.50576984879)
 ** p < .01 (q-critical[3, 27] = 4.49413305084)
-0.891376325469 0.385687424343

In [ ]:
gp_auto_vis = []

u='simuserGP_NEW'
with open('/home/d/GPSTUDY/'+u+'/corrections.p', 'rb') as f:
    corr = pickle.load(f)
with open('/home/d/GPSTUDY/'+u+'/correction_vis.p', 'rb') as f:
    vi = pickle.load(f)
    
gp_auto_vis.append(vi)
gp_auto_vis = gp_auto_vis[0][-1]

fp_auto_vis = []

u='simuserFP_NEW'
with open('/home/d/GPSTUDY/'+u+'/corrections.p', 'rb') as f:
    corr = pickle.load(f)
with open('/home/d/GPSTUDY/'+u+'/correction_vis.p', 'rb') as f:
    vi = pickle.load(f)
    
fp_auto_vis.append(vi)
fp_auto_vis = fp_auto_vis[0][-1]


df = DataFrame()
df['data'] = gp_auto_vis+fp_auto_vis
  
df['conditions'] = ['GP']*len(gp_auto_vis) + ['FP']*len(fp_auto_vis)

aov_pyvttbl = df.anova1way('data', 'conditions')
print aov_pyvttbl

t, p = ttest_ind(gp_auto_vis, fp_auto_vis, equal_var=False)
print t,p

In [ ]:


In [89]:
gp_user_vis_merge_only = []
for u in GP_USERS:
    with open('/home/d/GPSTUDY/'+u+'/corrections.p', 'rb') as f:
        corr = pickle.load(f)
    with open('/home/d/GPSTUDY/'+u+'/correction_vis.p', 'rb') as f:
        vi = pickle.load(f)
    
    gp_user_vis_merge_only.append(vi[0:9])
       
gp_avg_vis_merge_only = [0]*10
for i,u in enumerate(gp_user_vis_merge_only):
    
    last_vi_of_user_merge_only = gp_user_vis_merge_only[i][-1]
    
    for z in range(10):
        gp_avg_vis_merge_only[z] += last_vi_of_user_merge_only[z]
        
for z in range(10):
    gp_avg_vis_merge_only[z] /= 10

print 'Novices vi per slice GP Merge Only'
for a in gp_avg_vis_merge_only:
    print a


Novices vi per slice GP Merge Only
0.426966658968
0.424304130422
0.412701967968
0.415219549432
0.484201422633
0.462002769239
0.584686803746
0.620721362889
0.637159863436
0.57979152433

In [91]:
np.median(gp_avg_vis_merge_only)


Out[91]:
0.47310209593579611

In [75]:
init_vi_per_slice


Out[75]:
[0.4564952264711417,
 0.4293922557471461,
 0.41812625771753,
 0.4176715560855522,
 0.49071969954324857,
 0.46200276923857864,
 0.5851840840738847,
 0.6340148263120815,
 0.6552543410401137,
 0.5968352054773662]

In [92]:
np.median(init_vi_per_slice)


Out[92]:
0.4763612343909136

In [90]:
from pyvttbl import DataFrame

df = DataFrame()
df['data'] = init_vi_per_slice+gp_avg_vis_merge_only
  
df['conditions'] = ['Init']*len(init_vi_per_slice) + ['GP']*len(gp_avg_vis_merge_only)

aov_pyvttbl = df.anova1way('data', 'conditions')
print aov_pyvttbl


Anova: Single Factor on data

SUMMARY
Groups   Count    Sum    Average   Variance 
===========================================
GP          10   5.048     0.505      0.008 
Init        10   5.146     0.515      0.009 

O'BRIEN TEST FOR HOMOGENEITY OF VARIANCE
Source of Variation      SS       df      MS         F     P-value   eta^2   Obs. power 
=======================================================================================
Treatments            1.089e-06    1   1.089e-06   0.027     0.872   0.001        0.053 
Error                 7.304e-04   18   4.058e-05                                        
=======================================================================================
Total                 7.315e-04   19                                                    

ANOVA
Source of Variation      SS       df      MS         F     P-value   eta^2   Obs. power 
=======================================================================================
Treatments            4.796e-04    1   4.796e-04   0.057     0.815   0.003        0.056 
Error                     0.153   18       0.008                                        
=======================================================================================
Total                     0.153   19                                                    

POSTHOC MULTIPLE COMPARISONS

Tukey HSD: Table of q-statistics
       GP     Init   
====================
GP     0    0.336 ns 
Init        0        
====================
  + p < .10 (q-critical[2, 18] = 2.4523362705)
  * p < .05 (q-critical[2, 18] = 2.9711524428)
 ** p < .01 (q-critical[2, 18] = 4.070729555)

In [78]:
t, p = ttest_ind(init_vi_per_slice, gp_avg_vis_merge_only, equal_var=False)
print t,p


0.237752327571 0.814760459455

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [104]:
gp_auto_vis_merge_only = []

u='auto95GP_NEW'
with open('/home/d/GPSTUDY/'+u+'/corrections.p', 'rb') as f:
    corr = pickle.load(f)
with open('/home/d/GPSTUDY/'+u+'/correction_vis.p', 'rb') as f:
    vi = pickle.load(f)
    
gp_auto_vis_merge_only.append(vi[0:9])
gp_auto_vis_merge_only = gp_auto_vis_merge_only[0][-1]


df = DataFrame()
df['data'] = init_vi_per_slice+gp_auto_vis_merge_only
  
df['conditions'] = ['Init']*len(init_vi_per_slice) + ['GP']*len(gp_avg_vis_merge_only)

aov_pyvttbl = df.anova1way('data', 'conditions')
print aov_pyvttbl

t, p = ttest_ind(init_vi_per_slice, gp_auto_vis_merge_only, equal_var=False)
print t,p


Anova: Single Factor on data

SUMMARY
Groups   Count    Sum    Average   Variance 
===========================================
GP          10   5.085     0.508      0.008 
Init        10   5.146     0.515      0.009 

O'BRIEN TEST FOR HOMOGENEITY OF VARIANCE
Source of Variation      SS       df      MS         F     P-value     eta^2     Obs. power 
===========================================================================================
Treatments            5.606e-07    1   5.606e-07   0.015     0.904   8.221e-04        0.052 
Error                 6.813e-04   18   3.785e-05                                            
===========================================================================================
Total                 6.819e-04   19                                                        

ANOVA
Source of Variation      SS       df      MS         F     P-value   eta^2   Obs. power 
=======================================================================================
Treatments            1.852e-04    1   1.852e-04   0.022     0.885   0.001        0.052 
Error                     0.154   18       0.009                                        
=======================================================================================
Total                     0.154   19                                                    

POSTHOC MULTIPLE COMPARISONS

Tukey HSD: Table of q-statistics
       GP     Init   
====================
GP     0    0.208 ns 
Init        0        
====================
  + p < .10 (q-critical[2, 18] = 2.4523362705)
  * p < .05 (q-critical[2, 18] = 2.9711524428)
 ** p < .01 (q-critical[2, 18] = 4.070729555)
0.147170558585 0.884633914604

In [105]:
gp_auto_vis_merge_only = []

u='simuserGP_NEW'
with open('/home/d/GPSTUDY/'+u+'/corrections.p', 'rb') as f:
    corr = pickle.load(f)
with open('/home/d/GPSTUDY/'+u+'/correction_vis.p', 'rb') as f:
    vi = pickle.load(f)
    
gp_auto_vis_merge_only.append(vi[0:9])
gp_auto_vis_merge_only = gp_auto_vis_merge_only[0][-1]


df = DataFrame()
df['data'] = init_vi_per_slice+gp_auto_vis_merge_only
  
df['conditions'] = ['Init']*len(init_vi_per_slice) + ['GP']*len(gp_avg_vis_merge_only)

aov_pyvttbl = df.anova1way('data', 'conditions')
print aov_pyvttbl


t, p = ttest_ind(init_vi_per_slice, gp_auto_vis_merge_only, equal_var=False)
print t,p


Anova: Single Factor on data

SUMMARY
Groups   Count    Sum    Average   Variance 
===========================================
GP          10   5.067     0.507      0.008 
Init        10   5.146     0.515      0.009 

O'BRIEN TEST FOR HOMOGENEITY OF VARIANCE
Source of Variation      SS       df      MS         F     P-value     eta^2     Obs. power 
===========================================================================================
Treatments            7.027e-07    1   7.027e-07   0.018     0.896   9.830e-04        0.052 
Error                 7.141e-04   18   3.967e-05                                            
===========================================================================================
Total                 7.148e-04   19                                                        

ANOVA
Source of Variation      SS       df      MS         F     P-value   eta^2   Obs. power 
=======================================================================================
Treatments            3.099e-04    1   3.099e-04   0.036     0.851   0.002        0.054 
Error                     0.154   18       0.009                                        
=======================================================================================
Total                     0.154   19                                                    

POSTHOC MULTIPLE COMPARISONS

Tukey HSD: Table of q-statistics
       GP     Init   
====================
GP     0    0.270 ns 
Init        0        
====================
  + p < .10 (q-critical[2, 18] = 2.4523362705)
  * p < .05 (q-critical[2, 18] = 2.9711524428)
 ** p < .01 (q-critical[2, 18] = 4.070729555)
0.190597745592 0.850974953143

In [ ]:


In [106]:
gp_auto_vis_merge_only = []

u='E1'
with open('/home/d/GPSTUDY/'+u+'/corrections.p', 'rb') as f:
    corr = pickle.load(f)
with open('/home/d/GPSTUDY/'+u+'/correction_vis.p', 'rb') as f:
    vi = pickle.load(f)
    
gp_auto_vis_merge_only.append(vi[0:9])
gp_auto_vis_merge_only = gp_auto_vis_merge_only[0][-1]


df = DataFrame()
df['data'] = init_vi_per_slice+gp_auto_vis_merge_only
  
df['conditions'] = ['Init']*len(init_vi_per_slice) + ['GP']*len(gp_avg_vis_merge_only)

aov_pyvttbl = df.anova1way('data', 'conditions')
print aov_pyvttbl


t, p = ttest_ind(init_vi_per_slice, gp_auto_vis_merge_only, equal_var=False)
print t,p


Anova: Single Factor on data

SUMMARY
Groups   Count    Sum    Average   Variance 
===========================================
GP          10   5.050     0.505      0.008 
Init        10   5.146     0.515      0.009 

O'BRIEN TEST FOR HOMOGENEITY OF VARIANCE
Source of Variation      SS       df      MS         F     P-value   eta^2   Obs. power 
=======================================================================================
Treatments            2.384e-06    1   2.384e-06   0.058     0.812   0.003        0.057 
Error                 7.388e-04   18   4.104e-05                                        
=======================================================================================
Total                 7.411e-04   19                                                    

ANOVA
Source of Variation      SS       df      MS         F     P-value   eta^2   Obs. power 
=======================================================================================
Treatments            4.606e-04    1   4.606e-04   0.055     0.817   0.003        0.056 
Error                     0.151   18       0.008                                        
=======================================================================================
Total                     0.151   19                                                    

POSTHOC MULTIPLE COMPARISONS

Tukey HSD: Table of q-statistics
       GP     Init   
====================
GP     0    0.332 ns 
Init        0        
====================
  + p < .10 (q-critical[2, 18] = 2.4523362705)
  * p < .05 (q-critical[2, 18] = 2.9711524428)
 ** p < .01 (q-critical[2, 18] = 4.070729555)
0.234539315313 0.817217963753

In [ ]:


In [ ]:


In [109]:
gp_auto_vis_merge_only = []

u='E1'
with open('/home/d/GPSTUDY/'+u+'/corrections.p', 'rb') as f:
    corr = pickle.load(f)
with open('/home/d/GPSTUDY/'+u+'/correction_vis.p', 'rb') as f:
    vi = pickle.load(f)
    
gp_auto_vis_merge_only.append(vi[0:9])
gp_auto_vis_merge_only = gp_auto_vis_merge_only[0][-1]


df = DataFrame()
df['data'] = gp_auto_vis_merge_only+dojo_avg_user
  
df['conditions'] =['GP']*len(gp_avg_vis_merge_only) + ['Dojo']*len(dojo_avg_user)

aov_pyvttbl = df.anova1way('data', 'conditions')
print aov_pyvttbl


# t, p = ttest_ind(init_vi_per_slice, gp_auto_vis_merge_only, equal_var=False)
t, p = ttest_ind(dojo_avg_user, gp_auto_vis_merge_only, equal_var=False)
print t,p


Anova: Single Factor on data

SUMMARY
Groups   Count    Sum    Average   Variance 
===========================================
Dojo        10   5.258     0.526      0.003 
GP          10   5.050     0.505      0.008 

O'BRIEN TEST FOR HOMOGENEITY OF VARIANCE
Source of Variation      SS       df      MS         F     P-value   eta^2   Obs. power 
=======================================================================================
Treatments            1.063e-04    1   1.063e-04   4.795     0.042   0.210        0.493 
Error                 3.989e-04   18   2.216e-05                                        
=======================================================================================
Total                 5.052e-04   19                                                    

ANOVA
Source of Variation    SS     df    MS       F     P-value   eta^2   Obs. power 
===============================================================================
Treatments            0.002    1   0.002   0.380     0.546   0.021        0.093 
Error                 0.103   18   0.006                                        
===============================================================================
Total                 0.105   19                                                

POSTHOC MULTIPLE COMPARISONS

Tukey HSD: Table of q-statistics
       Dojo      GP    
======================
Dojo   0      0.871 ns 
GP            0        
======================
  + p < .10 (q-critical[2, 18] = 2.4523362705)
  * p < .05 (q-critical[2, 18] = 2.9711524428)
 ** p < .01 (q-critical[2, 18] = 4.070729555)
0.616065427431 0.546800142571

In [ ]:


In [116]:
fp_user_vis = []
for u in FP_USERS:
    with open('/home/d/GPSTUDY/'+u+'/corrections.p', 'rb') as f:
        corr = pickle.load(f)
    with open('/home/d/GPSTUDY/'+u+'/correction_vis.p', 'rb') as f:
        vi = pickle.load(f)
    
    fp_user_vis.append(vi)
       
fp_avg_vis = [0]*10
for i,u in enumerate(fp_user_vis):
    if len(fp_user_vis[i]) >= 10:
        what = 10
    else:
        what = 9
    last_vi_of_user = fp_user_vis[i][9]
    
    for z in range(10):
        fp_avg_vis[z] += last_vi_of_user[z]
        
for z in range(10):
    fp_avg_vis[z] /= 10

print 'Novices vi per slice FP'
for a in fp_avg_vis:
    print a


Novices vi per slice FP
0.453289703367
0.426780993196
0.419518782717
0.424488666788
0.494419522199
0.468580932306
0.587280988443
0.634014826312
0.660861862194
0.592219410322

In [117]:
gp_auto_vis_merge_only = []

u='E1'
with open('/home/d/GPSTUDY/'+u+'/corrections.p', 'rb') as f:
    corr = pickle.load(f)
with open('/home/d/GPSTUDY/'+u+'/correction_vis.p', 'rb') as f:
    vi = pickle.load(f)
    
gp_auto_vis_merge_only.append(vi[0:9])
gp_auto_vis_merge_only = gp_auto_vis_merge_only[0][-1]


df = DataFrame()
df['data'] = gp_auto_vis_merge_only+fp_avg_vis
  
df['conditions'] =['GP']*len(gp_avg_vis_merge_only) + ['FP']*len(fp_avg_vis)

aov_pyvttbl = df.anova1way('data', 'conditions')
print aov_pyvttbl


# t, p = ttest_ind(init_vi_per_slice, gp_auto_vis_merge_only, equal_var=False)
t, p = ttest_ind(fp_avg_vis, gp_auto_vis_merge_only, equal_var=False)
print t,p


Anova: Single Factor on data

SUMMARY
Groups   Count    Sum    Average   Variance 
===========================================
Dojo        10   5.161     0.516      0.009 
GP          10   5.050     0.505      0.008 

O'BRIEN TEST FOR HOMOGENEITY OF VARIANCE
Source of Variation      SS       df      MS         F     P-value   eta^2   Obs. power 
=======================================================================================
Treatments            2.108e-06    1   2.108e-06   0.049     0.827   0.003        0.056 
Error                 7.756e-04   18   4.309e-05                                        
=======================================================================================
Total                 7.777e-04   19                                                    

ANOVA
Source of Variation      SS       df      MS         F     P-value   eta^2   Obs. power 
=======================================================================================
Treatments            6.243e-04    1   6.243e-04   0.075     0.788   0.004        0.059 
Error                     0.150   18       0.008                                        
=======================================================================================
Total                     0.151   19                                                    

POSTHOC MULTIPLE COMPARISONS

Tukey HSD: Table of q-statistics
       Dojo      GP    
======================
Dojo   0      0.387 ns 
GP            0        
======================
  + p < .10 (q-critical[2, 18] = 2.4523362705)
  * p < .05 (q-critical[2, 18] = 2.9711524428)
 ** p < .01 (q-critical[2, 18] = 4.070729555)
0.273387553167 0.787672144717

In [ ]:


In [ ]:


In [20]:
len(gp_user_vis[0][-1])


Out[20]:
10

In [10]:
gt = ['1', '0', '0', '0', '1', '0', '0', '1', '1', '0']
goods = []
after_merge_vis = []
for u in GP_USERS:
    with open('/home/d/GPSTUDY/'+u+'/corrections.p', 'rb') as f:
        corr = pickle.load(f)
    with open('/home/d/GPSTUDY/'+u+'/correction_vis.p', 'rb') as f:
        vi = pickle.load(f)
    after_merge_vis.append(vi[10])
    good = 0
    for i,c in enumerate(corr[0:10]):
        if c[1] == gt[i]:
            good += 1 
    goods.append(good)        
    
avg_vis = [0]*10
for i,u in enumerate(after_merge_vis):
    
    for z in range(10):
        avg_vis[z] += u[z]
        
for z in range(10):
    avg_vis[z] /= 10
print np.mean(avg_vis), np.median(avg_vis), np.std(avg_vis)


0.502334452541 0.471410521752 0.086081917905

In [11]:
avg_vis


Out[11]:
[0.4253223576205434,
 0.42430413042212045,
 0.41270196796840014,
 0.4066375261443847,
 0.4808182742660415,
 0.46200276923857864,
 0.5825109038074127,
 0.6142256683149673,
 0.6371598634356328,
 0.5776610641870044]

In [861]:
gt = ['1', '0', '0', '0', '1', '0', '0', '1', '1', '0']
goods = []
after_merge_vis = []
for u in GP_EXPERTS:
    with open('/home/d/GPSTUDY/'+u+'/corrections.p', 'rb') as f:
        corr = pickle.load(f)
    with open('/home/d/GPSTUDY/'+u+'/correction_vis.p', 'rb') as f:
        vi = pickle.load(f)
    after_merge_vis.append(vi[10])
    good = 0
    for i,c in enumerate(corr[0:10]):
        if c[1] == gt[i]:
            good += 1 
    goods.append(good)        
    
avg_vis = [0]*10
for i,u in enumerate(after_merge_vis):
    
    for z in range(10):
        avg_vis[z] += u[z]
        
for z in range(10):
    avg_vis[z] /= 2
print np.mean(avg_vis), np.median(avg_vis), np.std(avg_vis)


0.503026236759 0.470682083976 0.0864562744934

In [851]:
np.std(goods)


Out[851]:
0.69999999999999996

In [847]:
gt = ['1', '0', '0', '0', '1', '0', '0', '1', '1', '0']
for f in GP_EXPERTS:
    with open('/home/d/GPSTUDY/'+f+'/corrections.p', 'rb') as f:
        corr = pickle.load(f)
    good = 0
    for i,c in enumerate(corr[0:10]):
        if c[1] == gt[i]:
            good += 1 
    print good


1
3

In [852]:
np.std([1,3])


Out[852]:
1.0

In [ ]:


In [621]:
fp_vi_per_slice, fp_merge_vis, fp_split_vis = gp.Stats.analyze_users(FP_USERS, gold, rhoana, clampX=700,
                                                                     filename='/home/d/PAPERGFX/fpusers.pdf')


No. users 10
Avg. correction time 4909.45155238
Avg. corrections 423.4
Avg. accepted 45.8
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-621-6d55655ac8ad> in <module>()
      1 fp_vi_per_slice, fp_merge_vis, fp_split_vis = gp.Stats.analyze_users(FP_USERS, gold, rhoana, clampX=700,
----> 2                                                                     filename='/home/d/PAPERGFX/fpusers.pdf')

TypeError: 'NoneType' object is not iterable

In [527]:



Out[527]:
['S3', 'S28', 'S54', 'S53', 'S25', 'S35', 'S36', 'S51', 'S52', 'S200']

In [589]:
gp_vi_per_slice, gp_merge_vis, gp_split_vis = gp.Stats.analyze_users(GP_USERS, gold, rhoana, clampX=700,
                                                                    filename='/home/d/PAPERGFX/gpusers.pdf')


No. users 10
Avg. correction time 6243.34924225
Avg. corrections 353.4
Avg. accepted 106.9

In [862]:
import matplotlib.gridspec as gridspec
left, width = .25, .5
bottom, height = .25, .5
right = left + width
top = bottom + height
fig = plt.figure(1, figsize=(60,20))
clamper=1500

# fig.suptitle('Focused Proofreading', y=1.05, fontsize=64, fontweight='bold')
gs = gridspec.GridSpec(2,5,width_ratios=[.2,1,1,1,1])
gs.update(wspace=0.12, hspace=0.05)
a = plt.subplot(gs[0])
a.axis('off')
plt.text(-.5, .85, 
         'Focused\nProofreading', 
         ha='left', va='top', rotation='90',
         fontsize=56, fontweight='normal')

plt.subplot(gs[1])
gp.Stats.analyze_users(['auto95FP_NEW'], gold, rhoana, returnplt=True, vilabel=True, hideYlabels=False, showlegend=True,clampX=clamper)
plt.title('Automatic', y=1.02)
plt.subplot(gs[2])
plt.title('Forced Choice Novice', y=1.02)
gp.Stats.analyze_users(FP_USERS, gold, rhoana, returnplt=True,clampX=clamper)
plt.subplot(gs[3])
gp.Stats.analyze_users(FP_EXPERTS, gold, rhoana, returnplt=True,clampX=clamper)
plt.title('Forced Choice Expert', y=1.02)
plt.subplot(gs[4])
gp.Stats.analyze_users(['simuserFP_NEW'], gold, rhoana, returnplt=True,clampX=clamper)
plt.title('Oracle', y=1.02)
a = plt.subplot(gs[5])
a.axis('off')
plt.text(-.5, .85, 
         'Guided\nProofreading', 
         ha='left', va='top', rotation='90',
         fontsize=56, fontweight='normal')
plt.subplot(gs[6])

gp.Stats.analyze_users(['auto95GP_NEW'], gold, rhoana, hline=153,returnplt=True,vilabel=True,clampX=clamper,hideYlabels=False,clabel=True,hideXlabels=False)
plt.subplot(gs[7])
gp.Stats.analyze_users(GP_USERS, gold, rhoana, returnplt=True,clampX=clamper,hideXlabels=False,clabel=True)
plt.subplot(gs[8])
gp.Stats.analyze_users(GP_EXPERTS, gold, rhoana, returnplt=True,clampX=clamper,hideXlabels=False,clabel=True)
plt.subplot(gs[9])
gp.Stats.analyze_users(['simuserGP_NEW'], gold, rhoana, returnplt=True,clampX=clamper,hideXlabels=False,clabel=True)

# plt.tight_layout(pad=0.01, w_pad=0.01, h_pad=.5)

# gp.Stats.analyze_users(GP_USERS, gold, rhoana, clampX=700,
#                                                                     filename='/home/d/PAPERGFX/gpusers.pdf')
# plt.subplot(212)
# gp.Stats.analyze_users(GP_USERS, gold, rhoana, clampX=700,
#                                                                     filename='/home/d/PAPERGFX/gpusers.pdf')

plt.savefig('/home/d/PAPERGFX/ac4trails.pdf')



In [ ]:


In [ ]:


In [428]:
expert_fp_vi_per_slice, expert_fp_merge_vis, expert_fp_split_vis = gp.Stats.analyze_users(FP_EXPERTS, gold, rhoana, 
#                                                                                           oracle=['simuserFP'],
                                                                                          clampX=False,
                                                                                         filename='/home/d/PAPERGFX/fpexperts.pdf')


No. users 2
Avg. correction time 2827.76391799
Avg. corrections 593.5
Avg. accepted 65.5

In [429]:
expert_gp_vi_per_slice, expert_gp_merge_vis, expert_gp_split_vis = gp.Stats.analyze_users(GP_EXPERTS, gold, rhoana,
                                                                                         filename='/home/d/PAPERGFX/gpexperts.pdf')


No. users 2
Avg. correction time 4960.00050347
Avg. corrections 371.5
Avg. accepted 90.0

In [476]:
simuser_gp_vi_per_slice, simuser_gp_merge_vis, simuser_gp_split_vis, cyl_simuser = gp.Stats.analyze_users(['simuserGP_NEW'], gold, rhoana,
                                                                                            clampX=False,
                                                                                            filename='/home/d/PAPERGFX/gporacle.pdf')


No. users 1
Avg. correction time 0.0
Avg. corrections 1769.0
Avg. accepted 248.0

In [431]:
simuser_fp_vi_per_slice, simuser_fp_merge_vis, simuser_fp_split_vis = gp.Stats.analyze_users(['simuserFP_NEW'], gold, rhoana,
                                                                                            clampX=False,
                                                                                            filename='/home/d/PAPERGFX/fporacle.pdf')


No. users 1
Avg. correction time 0.0
Avg. corrections 1605.0
Avg. accepted 200.0

In [432]:
auto95_fp_vi_per_slice, auto95_fp_merge_vis, auto95_fp_split_vis = gp.Stats.analyze_users(['auto95FP_NEW'], gold, rhoana,
                                                                                         filename='/home/d/PAPERGFX/fpauto.pdf')


No. users 1
Avg. correction time 0.0
Avg. corrections 551.0
Avg. accepted 551.0

In [691]:
auto95_gp_vi_per_slice, auto95_gp_merge_vis, auto95_gp_split_vis, auto95_all_vis = gp.Stats.analyze_users(['auto95GP_NEW'], gold, rhoana,
                                                                                         filename='/home/d/PAPERGFX/gpauto.pdf',
                                                                                         hline=153,
                                                                                         )


No. users 1
Avg. correction time 0.0
Avg. corrections 808.0
Avg. accepted 804.0

In [603]:
auto95_fp_t_vi_per_slice, auto95_fp_t_merge_vis, auto95_fp_t_split_vis = gp.Stats.analyze_users(['auto95FP_threshold_NEW'], gold, rhoana)


No. users 1
Avg. correction time 0.0
Avg. corrections 101.0
Avg. accepted 101.0

In [787]:
np.std(simuser_fp_vi_per_slice)


Out[787]:
0.037173195046294773

In [788]:
np.std(simuser_gp_vi_per_slice)


Out[788]:
0.030580564368912731

In [ ]:


In [583]:
def boxplot(objects, data, clampY=True, filename=None):
#     plt.subplots()

    y_pos = range(1,len(objects)+1)

    fig = plt.figure(figsize=(10,7))
    plt.axhline(np.median(data[0]), color='gray', linewidth=2, linestyle=':')
    plt.axhline(y=0.33414926373414477, color='gray', linestyle='--', linewidth=2, label='Best Possible')

    bp = plt.boxplot(data)
    plt.setp(bp['whiskers'],linewidth=3,linestyle='-',color='black')
    plt.setp(bp['fliers'],linewidth=3)
    plt.setp(bp['means'],linewidth=3)
    plt.setp(bp['medians'],linewidth=3
            ,color='black')
    plt.setp(bp['boxes'],linewidth=3,color='black')
    plt.setp(bp['caps'],linewidth=3)

    plt.ylabel('Variation of Information')
    if clampY:
        plt.ylim([0.3,0.7])
#     plt.yticks(np.arange(min(x), max(x)+1, 1.0))
    font = {'family' : 'sans-serif',
            'weight' : 'normal',
            'size'   : 22}

    plt.rc('font', **font)
    plt.xticks(y_pos, objects)
    if filename:
        plt.savefig(filename)
    plt.show()

In [681]:


In [592]:
dojo_best_user = [0.3764043166,
                      0.3516472472,
                      0.4079547444,
                      0.4530306854,
                      0.489459557,
                      0.4783714198,
                      0.4691797846,
                      0.4852945057,
                      0.4989719721,
                      0.4631116968]
    dojo_avg_user = [0.4731860794,
                     0.4412143846,
                     0.4645102603,
                     0.4790327986,
                     0.5483534853,
                     0.5209529753,
                     0.5614397773,
                     0.5669964498,
                     0.6037881064,
                     0.5986637472]

In [595]:
objects = ['Initial\nSegmentation', 'Dojo', 'Focused\nProofreading', 'Guided\nProofreading']
data = [init_vi_per_slice, dojo_avg_user, fp_vi_per_slice, gp_vi_per_slice]
boxplot(objects, data)



In [869]:
def bigboxplot(objects, data, clampY=True, filename=None):
#     plt.subplots()

    y_pos = range(1,len(objects)+1)

    fig = plt.figure(figsize=(20,14))
    ax = plt.gca()
    ax.axvline(x=1.5,ymax=1, color='lightblue', linewidth=2)
    ax.axvline(x=3.5,ymax=1, color='lightblue', linewidth=2)    
    ax.axvline(x=6.5,ymax=1, color='lightblue', linewidth=2)    
    ax.axvline(x=8.5,ymax=1, color='lightblue', linewidth=2)    
    plt.axhline(np.median(data[0]), color='gray', linewidth=2, linestyle=':', label='Initial Segmentation')
    plt.axhline(y=0.33414926373414477, color='gray', linestyle='--', linewidth=2, label='Best Possible')

    bp = plt.boxplot(data)
    plt.setp(bp['whiskers'],linewidth=3,linestyle='-',color='black')
    plt.setp(bp['fliers'],linewidth=3)
    plt.setp(bp['means'],linewidth=3)
    plt.setp(bp['medians'],linewidth=3
            ,color='black')
    plt.setp(bp['boxes'],linewidth=3,color='black')
    plt.setp(bp['caps'],linewidth=3)

    plt.text(2.5, .75, 
             'Automatic', 
             ha='center', va='top',
             fontsize=28, fontweight='normal')    
    plt.text(5, .75, 
             'F. Choice Novice', 
             ha='center', va='top',
             fontsize=28, fontweight='normal')   
    
    plt.text(7.5, .75, 
             'F. Choice Expert', 
             ha='center', va='top',
             fontsize=28, fontweight='normal')   
    

    plt.text(9.5, .75, 
             'Oracle', 
             ha='center', va='top',
             fontsize=28, fontweight='normal')    
    
    ax.set_aspect(7)
    ax.arrow(2, .62, 0, .03, head_width=0.2, head_length=0.03, fc='k', ec='k', linewidth=3)
    legend = ax.legend(loc='upper right')
    plt.ylabel('Variation of Information')
    if clampY:
        plt.ylim([0.3,0.7])
#     plt.yticks(np.arange(min(x), max(x)+1, 1.0))
    font = {'family' : 'sans-serif',
            'weight' : 'normal',
            'size'   : 28}

    plt.rc('font', **font)
    plt.rc('legend',**{'fontsize':24})
    plt.xticks(y_pos, objects)
    plt.yticks(np.arange(0.3, 0.71, 0.1))
    if filename:
        plt.savefig(filename)
        
    plt.savefig('/home/d/PAPERGFX/ac4boxplot.pdf')
    plt.show()

objects = ['Initial\nSegmentation',
           'Focused\nProofreading',
           'Guided\nProofreading',
           'Dojo',
           'Focused\nProofreading',
           'Guided\nProofreading',
           'Focused\nProofreading',
           'Guided\nProofreading',           
           'Focused\nProofreading',
           'Guided\nProofreading']
objects = ['Initial\nSegmentation',
           'FP',
           'GP',
           'Dojo',
           'FP',
           'GP',
           'FP',
           'GP',           
           'FP',
           'GP']

data = [init_vi_per_slice, 
        [], 
        auto95_all_vis[0][153], 
        dojo_avg_user, 
        fp_vi_per_slice, 
        gp_vi_per_slice,
        expert_fp_vi_per_slice,
        expert_gp_vi_per_slice,
       simuser_fp_vi_per_slice,
       simuser_gp_vi_per_slice]
bigboxplot(objects, data)



In [40]:
def bigboxplot(objects, data, clampY=True, filename=None):
#     plt.subplots()

    y_pos = range(1,len(objects)+1)

    fig = plt.figure(figsize=(20,14))
    ax = plt.gca()
    ax.axvline(x=1.5,ymax=1, color='lightblue', linewidth=2)
    ax.axvline(x=3.5,ymax=1, color='lightblue', linewidth=2)    
    ax.axvline(x=6.5,ymax=1, color='lightblue', linewidth=2)    
    ax.axvline(x=8.5,ymax=1, color='lightblue', linewidth=2)    
    plt.axhline(np.median(data[0]), color='gray', linewidth=2, linestyle=':', label='Initial Segmentation')
    plt.axhline(y=0.30, color='gray', linestyle='--', linewidth=2, label='Best Possible')

    bp = plt.boxplot(data)
    plt.setp(bp['whiskers'],linewidth=3,linestyle='-',color='black')
    plt.setp(bp['fliers'],linewidth=3)
    plt.setp(bp['means'],linewidth=3)
    plt.setp(bp['medians'],linewidth=3
            ,color='black')
    plt.setp(bp['boxes'],linewidth=3,color='black')
    plt.setp(bp['caps'],linewidth=3)

    plt.text(2.5, .75, 
             'Automatic', 
             ha='center', va='top',
             fontsize=28, fontweight='normal')    
#     plt.text(5, .75, 
#              'F. Choice Novice', 
#              ha='center', va='top',
#              fontsize=28, fontweight='normal')   
    
#     plt.text(7.5, .75, 
#              'F. Choice Expert', 
#              ha='center', va='top',
#              fontsize=28, fontweight='normal')   
    

    plt.text(5, .75, 
             'Oracle', 
             ha='center', va='top',
             fontsize=28, fontweight='normal')    
    
    ax.set_aspect(7)
    ax.arrow(2, .62, 0, .03, head_width=0.2, head_length=0.03, fc='k', ec='k', linewidth=3)
    legend = ax.legend(loc='upper right')
    plt.ylabel('Variation of Information')
    if clampY:
        plt.ylim([0.0,0.7])
#     plt.yticks(np.arange(min(x), max(x)+1, 1.0))
    font = {'family' : 'sans-serif',
            'weight' : 'normal',
            'size'   : 28}

    plt.rc('font', **font)
    plt.rc('legend',**{'fontsize':24})
    plt.xticks(y_pos, objects)
    plt.yticks(np.arange(0.3, 0.71, 0.1))
    if filename:
        plt.savefig(filename)
        
    plt.savefig('/home/d/PAPERGFX/cylboxplot.pdf')
    plt.show()

objects = ['Initial\nSegmentation',
           'Focused\nProofreading',
           'Guided\nProofreading',        
           'Focused\nProofreading',
           'Guided\nProofreading']
objects = ['Initial\nSegmentation',
           'FP',
           'GP',           
           'FP',
           'GP']

data = [cyl_init_vi, 
        [], 
        cyl_gp_allvis[0][2199], 
       cyl_simuser_fp_split_vis[-1],
        cyl_gp_simuser_allvis[0][-1]]
bigboxplot(objects, data)



In [26]:
len(cyl_gp_allvis[0])


Out[26]:
11095

In [32]:
len(cyl_gp_simuser_allvis[0])


Out[32]:
2697

In [34]:
np.median(cyl_gp_simuser_allvis[0][-1])


Out[34]:
0.29964038575794882

In [782]:
np.std(expert_gp_vi_per_slice)


Out[782]:
0.031913211956737381

In [776]:
np.median(gp_vi_per_slice)


Out[776]:
0.42417503622005071

In [597]:
objects = ['Initial\nSegmentation', 'Focused\nProofreading']
data = [init_vi_per_slice, fp_vi_per_slice]
boxplot(objects, data, filename='/home/d/PAPERGFX/fc_fp_ac4.pdf')



In [437]:
objects = ['Initial\nSegmentation', 'Guided\nProofreading']
data = [init_vi_per_slice, gp_vi_per_slice]
boxplot(objects, data)



In [ ]:


In [596]:
objects = ['Initial\nSegmentation', 'Dojo best', 'GP Novice', 'GP Expert']
data = [init_vi_per_slice, dojo_best_user, gp_vi_per_slice, expert_gp_vi_per_slice]
boxplot(objects, data, filename='/home/d/PAPERGFX/gp_novice_expert_box.pdf')



In [585]:
averages_gp_merge_vis = [0]*10#len(gp_merge_vis)
for u in range(len(gp_merge_vis)):
    for z in range(10):
        averages_gp_merge_vis[z] += gp_merge_vis[u][z]
for z in range(10):
    averages_gp_merge_vis[z] /= len(gp_merge_vis)
averages_gp_split_vis = [0]*10#len(gp_merge_vis)
for u in range(len(gp_split_vis)):
    for z in range(10):
        averages_gp_split_vis[z] += gp_split_vis[u][z]
for z in range(10):
    averages_gp_split_vis[z] /= len(gp_split_vis)
objects = ['Initial\nSegmentation', 'Merge\nCorrections', 'Split\nCorrections']
data = [init_vi_per_slice, averages_gp_merge_vis, averages_gp_split_vis]
boxplot(objects, data, filename='/home/d/PAPERGFX/gp_merge_split_novice_box.pdf')



In [586]:
averages_gp_merge_vis = [0]*10#len(gp_merge_vis)
for u in range(len(expert_gp_merge_vis)):
    for z in range(10):
        averages_gp_merge_vis[z] += expert_gp_merge_vis[u][z]
for z in range(10):
    averages_gp_merge_vis[z] /= len(expert_gp_merge_vis)
averages_gp_split_vis = [0]*10#len(gp_merge_vis)
for u in range(len(expert_gp_split_vis)):
    for z in range(10):
        averages_gp_split_vis[z] += expert_gp_split_vis[u][z]
for z in range(10):
    averages_gp_split_vis[z] /= len(expert_gp_split_vis)
objects = ['Initial\nSegmentation', 'Merge\nCorrections', 'Split\nCorrections']
data = [init_vi_per_slice, averages_gp_merge_vis, averages_gp_split_vis]
boxplot(objects, data, filename='/home/d/PAPERGFX/gp_merge_split_expert_box.pdf')



In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [439]:
objects = ['Initial\nSegmentation', 'Novice', 'Expert']
data = [init_vi_per_slice, fp_vi_per_slice, expert_fp_vi_per_slice]
boxplot(objects, data)



In [ ]:


In [ ]:


In [ ]:


In [445]:
objects = ['Initial\nSegmentation', 'Oracle']
data = [init_vi_per_slice, simuser_gp_vi_per_slice]
boxplot(objects, data)



In [444]:
objects = ['Initial\nSegmentation', 'Oracle']
data = [init_vi_per_slice, simuser_fp_vi_per_slice]
boxplot(objects, data)



In [798]:
np.std(auto95_fp_t_vi_per_slice)


Out[798]:
0.49553943958908797

In [800]:
np.median(cyl_simuser_fp_vi_per_slice)


Out[800]:
0.0

In [461]:
objects = ['Initial\nSegmentation', 'Automatic']
data = [init_vi_per_slice, auto95_fp_t_vi_per_slice]
boxplot(objects, data, False)



In [471]:
with open('/home/d/GPSTUDY/auto95GP_threshold_NEW/correction_vis.p','rb') as f:
    vi_ = pickle.load(f)
objects = ['Initial\nSegmentation', 'Automatic']
data = [init_vi_per_slice, vi_[-1]]
boxplot(objects, data, False)



In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [178]:
after_merge_after_split = plt.boxplot([init_vi_per_slice, averages_gp_merge_vis, averages_gp_split_vis])



In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [6]:
-

In [7]:
cyl_auto95_fp_vi_per_slice, cyl_auto95_fp_merge_vis, cyl_auto95_fp_split_vis = gp.Stats.analyze_users(['auto95FP_NEW'], c_gold, c_rhoana,
                                                                                         filename='/home/d/PAPERGFX/cylinder_fpauto2.pdf',
                                                                                         DATADIR='/home/d/CYLINDERSTUDY/',
                                                                                         data='cyl',
                                                                                         clampX=None, clampY=None,
                                                                                         skipoutput=True)


0.379410649496
No. users 1
Avg. correction time 0.0
Avg. corrections 9697.0
Avg. accepted 9697.0

In [8]:
cyl_simuser_fp_vi_per_slice, cyl_simuser_fp_merge_vis, cyl_simuser_fp_split_vis = gp.Stats.analyze_users(['simuserFP_NEW'], c_gold, c_rhoana,
                                                                                         filename='/home/d/PAPERGFX/cylinder_fpsimuser.pdf',
                                                                                         DATADIR='/home/d/CYLINDERSTUDY/',
                                                                                         data='cyl',
                                                                                         clampX=None, clampY=None,
                                                                                         skipoutput=True)


0.379410649496
No. users 1
Avg. correction time 0.0
Avg. corrections 26170.0
Avg. accepted 2419.0

In [17]:
cyl_auto95_gp_vi_per_slice, cyl_auto95_gp_merge_vis, cyl_auto95_gp_split_vis,cyl_gp_allvis = gp.Stats.analyze_users(['auto00GP_NEW'], c_gold, c_rhoana,
                                                                                         filename='/home/d/PAPERGFX/cylinder_gpauto.pdf',
                                                                                         DATADIR='/home/d/CYLINDERSTUDY/',
                                                                                         data='cyl',
                                                                                         clampX=None, clampY=None,
                                                                                         skipoutput=True)


No. users 1
Avg. correction time 0.0
Avg. corrections 11209.0
Avg. accepted 11094.0

In [18]:
cyl_simuser_gp_vi_per_slice, cyl_simuser_gp_merge_vis, cyl_simuser_gp_split_vis,cyl_gp_simuser_allvis = gp.Stats.analyze_users(['simuserGP_NEW'], c_gold, c_rhoana,
                                                                                         filename='/home/d/PAPERGFX/cylinder_gpsimuser.pdf',
                                                                                         DATADIR='/home/d/CYLINDERSTUDY/',
                                                                                         data='cyl',
                                                                                         clampX=None, clampY=None,
                                                                                         skipoutput=True)


No. users 1
Avg. correction time 0.0
Avg. corrections 27491.0
Avg. accepted 2696.0

In [62]:
def bigboxplot(objects, data, clampY=True, filename=None):
#     plt.subplots()

    y_pos = range(1,len(objects)+1)

    fig = plt.figure(figsize=(20,14))
    ax = plt.gca()
    ax.axvline(x=1.5,ymax=1, color='lightblue', linewidth=2)
    ax.axvline(x=3.5,ymax=1, color='lightblue', linewidth=2)    
    ax.axvline(x=6.5,ymax=1, color='lightblue', linewidth=2)    
    ax.axvline(x=8.5,ymax=1, color='lightblue', linewidth=2)    
    plt.axhline(np.median(data[0]), color='gray', linewidth=2, linestyle=':', label='Initial Segmentation')
    plt.axhline(y=0.27683609273291143, color='gray', linestyle='--', linewidth=2, label='Best Possible')

    bp = plt.boxplot(data)
    plt.setp(bp['whiskers'],linewidth=3,linestyle='-',color='black')
    plt.setp(bp['fliers'],linewidth=3)
    plt.setp(bp['means'],linewidth=3)
    plt.setp(bp['medians'],linewidth=3
            ,color='black')
    plt.setp(bp['boxes'],linewidth=3,color='black')
    plt.setp(bp['caps'],linewidth=3)

    plt.text(2.5, .72, 
             'Automatic', 
             ha='center', va='top',
             fontsize=28, fontweight='normal')    
#     plt.text(5, .75, 
#              'F. Choice Novice', 
#              ha='center', va='top',
#              fontsize=28, fontweight='normal')   
    
#     plt.text(7.5, .75, 
#              'F. Choice Expert', 
#              ha='center', va='top',
#              fontsize=28, fontweight='normal')   
    

    plt.text(4.5, .72, 
             'Oracle', 
             ha='center', va='top',
             fontsize=28, fontweight='normal')    
    
    ax.set_aspect(7)
    ax.arrow(2, .62, 0, .03, head_width=0.2, head_length=0.03, fc='k', ec='k', linewidth=3)
    legend = ax.legend(loc='upper right')
    plt.ylabel('Variation of Information')
    if clampY:
        plt.ylim([0.1,0.7])
#     plt.yticks(np.arange(min(x), max(x)+1, 1.0))
    font = {'family' : 'sans-serif',
            'weight' : 'normal',
            'size'   : 28}

    plt.rc('font', **font)
    plt.rc('legend',**{'fontsize':24})
    plt.xticks(y_pos, objects)
    plt.yticks(np.arange(0.1, 0.71, 0.1))
    if filename:
        plt.savefig(filename)
        
    plt.savefig('/home/d/PAPERGFX/cylboxplot.pdf')
    plt.show()

objects = ['Initial\nSegmentation',
           'Focused\nProofreading',
           'Guided\nProofreading',        
           'Focused\nProofreading',
           'Guided\nProofreading']
objects = ['Initial\nSegmentation',
           'FP',
           'GP',           
           'FP',
           'GP']

data = [cyl_init_vi, 
        [], 
        cyl_gp_allvis[0][2199], 
       cyl_simuser_fp_split_vis[-1],
        cyl_gp_simuser_allvis[0][-1]]
bigboxplot(objects, data)



In [ ]:


In [ ]:


In [827]:
print np.median(cyl_auto95_gp_vi_per_slice), np.std(cyl_auto95_gp_vi_per_slice)
print np.median(cyl_auto95_gp_split_vis), np.std(cyl_auto95_gp_split_vis)
print np.median(cyl_auto95_gp_merge_vis), np.std(cyl_auto95_gp_merge_vis)


3.47498759108 0.800471831943
3.47498759108 0.800471831943
0.379410649496 0.118353313221

In [825]:
print np.median(cyl_simuser_gp_vi_per_slice), np.std(cyl_simuser_gp_vi_per_slice)
print np.median(cyl_simuser_gp_merge_vis), np.std(cyl_simuser_gp_merge_vis)


0.299640385758 0.0731536569479
0.379410649496 0.118394362099

In [817]:
np.median(cyl_auto95_fp_vi_per_slice), np.std(cyl_auto95_fp_vi_per_slice)


Out[817]:
(2.7505919905312721, 0.78879876907289681)

In [813]:
np.median(cyl_simuser_fp_vi_per_slice), np.std(cyl_simuser_fp_vi_per_slice)


Out[813]:
(0.2979992058218679, 0.075003890641540089)

In [61]:
import matplotlib.gridspec as gridspec
left, width = .25, .5
bottom, height = .25, .5
right = left + width
top = bottom + height
fig = plt.figure(1, figsize=(40,20))
clamper=20000

# fig.suptitle('Focused Proofreading', y=1.05, fontsize=64, fontweight='bold')
gs = gridspec.GridSpec(2,3,width_ratios=[.15,1,1])
gs.update(wspace=0.2, hspace=0.1)
a = plt.subplot(gs[0])
a.axis('off')
plt.text(-.5, .85, 
         'Focused\nProofreading', 
         ha='left', va='top', rotation='90',
         fontsize=56, fontweight='normal')

plt.subplot(gs[1])
gp.Stats.analyze_users(['auto95FP_NEW'], c_gold, c_rhoana, returnplt=True, vilabel=True, hideYlabels=False, showlegend=True,clampX=clamper,
                       DATADIR='/home/d/CYLINDERSTUDY/',data='cyl')
plt.title('Automatic', y=1.02)
plt.subplot(gs[2])
gp.Stats.analyze_users(['simuserFP_NEW'], c_gold, c_rhoana, returnplt=True,clampX=clamper,DATADIR='/home/d/CYLINDERSTUDY/',data='cyl')
plt.title('Oracle', y=1.02)
a = plt.subplot(gs[3])
a.axis('off')
plt.text(-.5, .85, 
         'Guided\nProofreading', 
         ha='left', va='top', rotation='90',
         fontsize=56, fontweight='normal')
plt.subplot(gs[4])

gp.Stats.analyze_users(['auto00GP_NEW'], c_gold, c_rhoana, hline=2199,returnplt=True, vilabel=True,
                       clampX=clamper,hideYlabels=False,clabel=True,hideXlabels=False,DATADIR='/home/d/CYLINDERSTUDY/',data='cyl')
plt.subplot(gs[5])
gp.Stats.analyze_users(['simuserGP_NEW'], c_gold, c_rhoana, returnplt=True,clampX=clamper,hideXlabels=False,clabel=True,data='cyl',
                      DATADIR='/home/d/CYLINDERSTUDY/')

# plt.tight_layout(pad=0.01, w_pad=0.01, h_pad=.5)

# gp.Stats.analyze_users(GP_USERS, gold, rhoana, clampX=700,
#                                                                     filename='/home/d/PAPERGFX/gpusers.pdf')
# plt.subplot(212)
# gp.Stats.analyze_users(GP_USERS, gold, rhoana, clampX=700,
#                                                                     filename='/home/d/PAPERGFX/gpusers.pdf')

plt.savefig('/home/d/PAPERGFX/cyltrails.pdf')



In [ ]:


In [489]:
auto95_gp_vi_per_slice, auto95_gp_merge_vis, auto95_gp_split_vis = gp.Stats.analyze_users(['auto00GP_NEW'], c_gold, c_rhoana,
                                                                                         filename='/home/d/PAPERGFX/cylinder_gpauto.pdf',
                                                                                         DATADIR='/home/d/CYLINDERSTUDY/')


No. users 1
Avg. correction time 0.0
Avg. corrections 10817.0
Avg. accepted 10702.0
---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
<ipython-input-489-3f304c0195e3> in <module>()
      1 auto95_gp_vi_per_slice, auto95_gp_merge_vis, auto95_gp_split_vis = gp.Stats.analyze_users(['auto00GP_NEW'], c_gold, c_rhoana,
      2                                                                                          filename='/home/d/PAPERGFX/cylinder_gpauto.pdf',
----> 3                                                                                          DATADIR='/home/d/CYLINDERSTUDY/')

/home/d/Projects/gp/gp/stats.py in analyze_users(FP_USERS, gold, rhoana, oracle, clampX, filename, DATADIR)
    177     fp_vi_per_slice = [0,0,0,0,0,0,0,0,0,0]
    178     for o in fp_outputs:
--> 179         fp_vi_per_slice_per_user.append(VI(gold, o)[2])
    180         last_split_vis.append(VI(gold, o)[2])
    181     for u in fp_vi_per_slice_per_user:

/home/d/Projects/gp/gp/stats.py in VI(gt, seg)
     28       slice_vi = []
     29       for i in range(len(gt)):
---> 30           current_vi = Util.vi(gt[i].astype(np.int64), seg[i].astype(np.int64))
     31           # total_vi += current_vi
     32           slice_vi.append(current_vi)

/home/d/Projects/gp/gp/util.pyc in vi(array1, array2)
    280     '''
    281     '''
--> 282     return partition_comparison.variation_of_information(array1.ravel(), array2.ravel())
    283 

partition_comparison.pyx in partition_comparison.variation_of_information (src/partition_comparison.cpp:27215)()

AssertionError: 

In [490]:
simuser_gp_vi_per_slice, simuser_gp_merge_vis, simuser_gp_split_vis = gp.Stats.analyze_users(['simuserGP_NEW'], c_gold, c_rhoana,
                                                                                         filename='/home/d/PAPERGFX/cylinder_fpauto.pdf',
                                                                                         DATADIR='/home/d/CYLINDERSTUDY/')


No. users 1
Avg. correction time 0.0
Avg. corrections 27135.0
Avg. accepted 2648.0
---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
<ipython-input-490-7b9d37d2b5ce> in <module>()
      1 simuser_gp_vi_per_slice, simuser_gp_merge_vis, simuser_gp_split_vis = gp.Stats.analyze_users(['simuserGP_NEW'], c_gold, c_rhoana,
      2                                                                                          filename='/home/d/PAPERGFX/cylinder_fpauto.pdf',
----> 3                                                                                          DATADIR='/home/d/CYLINDERSTUDY/')

/home/d/Projects/gp/gp/stats.py in analyze_users(FP_USERS, gold, rhoana, oracle, clampX, filename, DATADIR)
    177     fp_vi_per_slice = [0,0,0,0,0,0,0,0,0,0]
    178     for o in fp_outputs:
--> 179         fp_vi_per_slice_per_user.append(VI(gold, o)[2])
    180         last_split_vis.append(VI(gold, o)[2])
    181     for u in fp_vi_per_slice_per_user:

/home/d/Projects/gp/gp/stats.py in VI(gt, seg)
     28       slice_vi = []
     29       for i in range(len(gt)):
---> 30           current_vi = Util.vi(gt[i].astype(np.int64), seg[i].astype(np.int64))
     31           # total_vi += current_vi
     32           slice_vi.append(current_vi)

/home/d/Projects/gp/gp/util.pyc in vi(array1, array2)
    280     '''
    281     '''
--> 282     return partition_comparison.variation_of_information(array1.ravel(), array2.ravel())
    283 

partition_comparison.pyx in partition_comparison.variation_of_information (src/partition_comparison.cpp:27215)()

AssertionError: 

In [ ]:


In [547]:
c_gold[0].shape


Out[547]:
(2048, 2048)

In [792]:
np.std(dojo_avg_user)


Out[792]:
0.055459279955864825

In [530]:
DATADIR='/home/d/CYLINDERSTUDY/'
FP_USERS=['auto95FP_NEW']
newrhoana = 'ui_results.p'
times = 'times.p'
corrections = 'corrections.p'
correction_vis = 'correction_vis.p'

fp_outputs = []
fp_times = []
fp_corrections = []
fp_vis =[]

for i,f in enumerate(FP_USERS):
    with open(DATADIR+FP_USERS[i]+'/'+newrhoana, 'rb') as f:
        fp_newrhoana = pickle.load(f)
        fp_outputs.append(fp_newrhoana)
    with open(DATADIR+FP_USERS[i]+'/'+times, 'rb') as f:
        fp_time = pickle.load(f)
        fp_time = [int(v) for v in fp_time]
        fp_times.append(fp_time)
    with open(DATADIR+FP_USERS[i]+'/'+corrections, 'rb') as f:
        fp_correction = pickle.load(f)       
        fp_corrections.append(fp_correction)
    with open(DATADIR+FP_USERS[i]+'/'+correction_vis, 'rb') as f:
        fp_correction_vis = pickle.load(f) 
        fp_vis.append(fp_correction_vis)

In [531]:
len(fp_vis[0][0])


Out[531]:
10

In [532]:
with open('/home/d/CYLINDERSTUDY/cylinderFP/cylinder_split_simuser_vis.p', 'rb') as f:
    cylinder_split_simuser_vis = pickle.load(f)

In [536]:
len(cylinder_split_simuser_vis[0][2])


Out[536]:
50

In [537]:
with open('/home/d/CYLINDERSTUDY/cylinderFP/cylinder_split_auto95_vis.p', 'rb') as f:
    cylinder_split_auto95_vis = pickle.load(f)

In [552]:
cylinder_split_auto95_vis[-1][2]


Out[552]:
[1.4856662242676064,
 1.3844544563651686,
 1.5317620595698997,
 1.3470759090120028,
 1.8952595804833798,
 2.0377511152949355,
 1.7599795717848146,
 2.107572675712862,
 1.987570633987242,
 2.0602892812180302,
 2.1962027482706885,
 2.2288660222404664,
 2.475013226940462,
 2.249195492230122,
 2.37011031101249,
 2.686167611647076,
 2.193645290574823,
 2.6024426062882866,
 2.626406371592399,
 2.724138687269732,
 2.773352866098695,
 2.727831114963849,
 2.7863513156573365,
 2.9784987515440653,
 3.0755508140723533,
 3.0216233580393865,
 3.0022596875267373,
 2.9913849299683393,
 3.047030785365775,
 3.438552518198928,
 3.441803338753955,
 3.571807981406515,
 3.578642659735305,
 3.5133058273454933,
 3.5045567306186762,
 3.5441550671647395,
 3.5190626216888456,
 3.577730736918236,
 3.619888632464546,
 3.662314511858265,
 3.645580011416984,
 3.524825929840972,
 3.3986150738275778,
 3.3456168265953616,
 3.1312327009122267,
 2.9211813841674137,
 2.612678960573779,
 2.1074823939871217,
 1.45476223041079,
 0.0]

In [ ]:


In [835]:
def VI(gt, seg):
  # total_vi = 0
  slice_vi = []    
  for i in range(len(gt)):
      current_vi = gp.Util.vi(gt[i].astype(np.int64), seg[i].astype(np.int64))
      # total_vi += current_vi
      slice_vi.append(current_vi)
  # total_vi /= 10
  return np.mean(slice_vi), np.median(slice_vi), slice_vi

In [837]:
cyl_vis = VI(c_gold, c_rhoana)[2]

In [839]:
np.median(cyl_vis), np.std(cyl_vis)


Out[839]:
(0.37941064949584113, 0.11839436209855872)

In [306]:
## FP SIMUSER
dojo_merge_simuser_vis = []
# with open('/home/d/netstatsPAPERFP/IPMLB/dojo_merge_auto95_vis.p', 'rb') as f:
#     dojo_merge_simuser_vis = pickle.load(f)
dojo_split_simuser_vis = []
with open('/home/d/netstatsPAPERFP/IPMLB/dojo_split_simuser_vis.p', 'rb') as f:
    dojo_split_simuser_vis = pickle.load(f)
# correction_vis
simuser_vis = [init_vi_per_slice]
for vi in dojo_merge_simuser_vis:
    simuser_vis.append(vi[2])
for vi in dojo_split_simuser_vis:
    simuser_vis.append(vi[2])
dojo_merge_fixes_simuser = []
# with open('/home/d/netstatsPAPERFP/IPMLB/dojo_merge_auto95_fixes.p', 'rb') as f:
#     dojo_merge_fixes_simuser = pickle.load(f)
dojo_split_fixes_simuser = []
with open('/home/d/netstatsPAPERFP/IPMLB/dojo_split_simuser_fixes.p', 'rb') as f:
    dojo_split_fixes_simuser = pickle.load(f)    
# corrections
simuser_corrections = []
# for f in dojo_merge_fixes_simuser:
#     if f == 'Good':
#         f = '1'
#     else:
#         f = 'current'
#     simuser_corrections.append(('merge', f))
for f in dojo_split_fixes_simuser:
    if f[0] == 1:
        f = '1'
    else:
        f = 'current'
    simuser_corrections.append(('split', f))
# output
simuser_output = []
with open('/home/d/netstatsPAPERFP/IPMLB/dojo_simuser_output.p', 'rb') as f:
    simuser_output = pickle.load(f)
SIMUSERDIR = '/home/d/GPSTUDY/simuserFP/'
with open(SIMUSERDIR+'/ui_results.p', 'wb') as f:
    pickle.dump(simuser_output, f)
with open(SIMUSERDIR+'/times.p', 'wb') as f:
    pickle.dump([0], f)
with open(SIMUSERDIR+'/corrections.p', 'wb') as f:
    pickle.dump(simuser_corrections, f)
with open(SIMUSERDIR+'/correction_vis.p', 'wb') as f:
    pickle.dump(simuser_vis, f)

In [267]:
simuser_vis[-1]


Out[267]:
[0.3551900803828607,
 0.3508653848118293,
 0.33542514752697095,
 0.3269841515239227,
 0.3355696611201271,
 0.3268870516306235,
 0.4340057588851831,
 0.4372768577069621,
 0.4229467649673557,
 0.3811965243113766]

In [270]:
simuser_vis[-1]


Out[270]:
[0.36215160284759573,
 0.37858068899578967,
 0.36319781430231046,
 0.3437432428288787,
 0.3887227169421985,
 0.3702204569012766,
 0.42193466683993375,
 0.4346364560413223,
 0.40905872629118445,
 0.389864002596668]

In [272]:
gpsim = simuser_output.copy()

In [274]:
fpsim = simuser_output.copy()

In [472]:
gp.Util.view(rhoana[2],large=True)



In [475]:
imshow(gp.Util.threshold(rhoana[2],5))


Out[475]:
<matplotlib.image.AxesImage at 0x7fdf86920c10>

In [474]:
imshow(gp.Util.threshold(rhoana[2],6))


Out[474]:
<matplotlib.image.AxesImage at 0x7fdf84b2c610>

In [276]:
gp.Util.view(gpsim[2],large=True)



In [277]:
gp.Util.view(fpsim[2],large=True)



In [278]:
gp.Util.view(gold[2], large=True)



In [282]:
best_vis = []
for z in range(10):
    fixed = gp.Util.propagate_max_overlap(rhoana[z], gold[z])
    fixed = gp.Util.relabel(fixed)
    best_vis.append(gp.Util.vi(fixed.astype(np.uint64),gold[z].astype(np.uint64)))

In [284]:
np.median(best_vis)


Out[284]:
0.33414926373414477

In [562]:
best_vis = []
for z in range(50):
    fixed = gp.Util.propagate_max_overlap(c_rhoana[z], c_gold[z])
    fixed = gp.Util.relabel(fixed)
    best_vis.append(gp.Util.vi(fixed.astype(np.uint64),c_gold[z].astype(np.uint64)))

In [563]:
np.median(best_vis)


Out[563]:
0.27683609273291143

In [ ]:


In [21]:
cyl_init_vi = gp.Legacy.VI(c_rhoana, c_gold)[2]

In [ ]:


In [ ]:


In [830]:
c_gold.shape


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-830-d9ab1ec5a460> in <module>()
----> 1 c_gold.shape

AttributeError: 'list' object has no attribute 'shape'

In [842]:
np.std(VI(gold, rhoana)[2])


Out[842]:
0.088579820006442336

In [226]:
len(simuser_corrections)


Out[226]:
1464

In [236]:
len(simuser_vis)


Out[236]:
202

In [231]:
np.median(init_vi_per_slice), np.median(simuser_vis)


Out[231]:
(0.4763612343909136, 0.4763612343909136)

In [96]:
simuser_output.shape


Out[96]:
(10, 474, 474)

In [118]:
simuser_corrections


Out[118]:
[('merge', 'current'),
 ('merge', 'current'),
 ('merge', 'current'),
 ('merge', 'current'),
 ('merge', 'current'),
 ('merge', 'current'),
 ('merge', 'current'),
 ('merge', 'current'),
 ('merge', 'current'),
 ('merge', 'current'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', 'current'),
 ('split', 'current'),
 ('split', 'current'),
 ('split', '1'),
 ('split', 'current'),
 ('split', '1'),
 ('split', '1'),
 ('split', 'current'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', 'current'),
 ('split', 'current'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', 'current'),
 ('split', '1'),
 ('split', '1'),
 ('split', 'current'),
 ('split', 'current'),
 ('split', 'current'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', 'current'),
 ('split', '1'),
 ('split', '1'),
 ('split', 'current'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', '1'),
 ('split', 'current'),
 ('split', '1')]

In [ ]:


In [290]:
fp_vi_per_slice


Out[290]:
[0.4376894378238423,
 0.42248790736115477,
 0.418145124855108,
 0.439276876746551,
 0.4712588022861315,
 0.4676127919023316,
 0.5936543082466625,
 0.6188201134113684,
 0.661255030981917,
 0.572448845896355]

In [297]:
np.median(gp_vi_per_slice)


Out[297]:
0.45486023885611315

In [298]:
np.median(fp_vi_per_slice)


Out[298]:
0.46943579709423155

In [293]:
init_median_vi


Out[293]:
0.4763612343909136

In [299]:
init_median_vi - 0.0927095314219


Out[299]:
0.38365170296901363

In [206]:
#
#

In [ ]:
#
with open('/home/d/dojo_xp/data/bigM_fp.p', 'rb') as f:
    bigM = pickle.load(f)

corrections = []
rhoana_copy = np.array(rhoana)
for c in fp_corrections[3]:
    bigM_max = -1
    bigM_max_index = None
    bigM_max_z = -1
    for z,m in enumerate(bigM):
        if m.max() > bigM_max:
            bigM_max = m.max()
            bigM_max_indices = np.where(m == bigM_max)
            bigM_max_index = [bigM_max_indices[0][0], bigM_max_indices[1][0]]
            bigM_max_z = z

            
    m = bigM[bigM_max_z]
    new_m = np.array(m)

    label1 = bigM_max_index[0]
    label2 = bigM_max_index[1]
    
    c_rhoana = rhoana_copy[bigM_max_z].copy()

    if c[1] == '1':
        
#         print 'merging', label1, label2
        
        corrections.append([bigM_max_z, label1, label2, gp.Util.view_labels(c_rhoana, [label1, label2], return_it=True)])
        
        c_rhoana[c_rhoana == label2] = label1

        # grab old neighbors of label 2 which are now neighbors of label1
        label2_neighbors = gp.Util.grab_neighbors(c_rhoana, label2)
        for l_neighbor in label2_neighbors:

            if l_neighbor == 0:
                continue

            if label1 == l_neighbor:
                continue
  
            # get old score
            old_score = new_m[label2, l_neighbor]

            label1_neighbor_score = new_m[label1, l_neighbor]

            # and now choose the max of these two
            new_m[label1, l_neighbor] = max(label1_neighbor_score, old_score)
            new_m[l_neighbor, label1] = max(label1_neighbor_score, old_score)


        # label2 does not exist anymore
        new_m[:,label2] = -2
        new_m[label2, :] = -2      

        bigM[bigM_max_z] = new_m
        
        rhoana_copy[bigM_max_z] = c_rhoana.copy()
        
    else:
        pass
#         print 'current'
for c in corrections:

    gp.Util.view(c[3], color=False)

In [ ]:
with open('/home/d/netstatsPAPER/')

In [ ]:


In [ ]:


In [208]:
aaa = '''#!/bin/bash
#
# add all other SBATCH directives here...
#
#SBATCH -p cox
#SBATCH -n 1 # Number of cores
#SBATCH -N 1 # Ensure that all cores are on one machine
#SBATCH --gres=gpu
#SBATCH --mem=8000
#SBATCH -t 10-12:00
#SBATCH --mail-type=ALL
#SBATCH --mail-user=haehn@seas.harvard.edu
#SBATCH -o /n/home05/haehn/SLURM/gp/out-me_{Z}.txt
#SBATCH -e /n/home05/haehn/SLURM/gp/err-me_{Z}.txt

source new-modules.sh
module load Anaconda/2.5.0-fasrc01
module load gcc/4.9.0-fasrc01

module load cuda/7.5-fasrc01
module load cudnn/7.0-fasrc01

module load opencv/3.0.0-fasrc04

# custom HDF5 lib
export LIBRARY_PATH=/n/home05/haehn/nolearncox/src/hdf5-1.8.17/hdf5/lib:$LIBRARY_PATH
export LD_LIBRARY_PATH=/n/home05/haehn/nolearncox/src/hdf5-1.8.17/hdf5/lib:$LD_LIBRARY_PATH
export CPATH=/n/home05/haehn/nolearncox/src/hdf5-1.8.17/hdf5/include:$CPATH
export FPATH=/n/home05/haehn/nolearncox/src/hdf5-1.8.17/hdf5/include:$FPATH

source /n/home05/haehn/nolearncox/bin/activate

# we are working in TEMP
cd /n/home05/haehn/Projects/gp/
python mergeerrors.py {Z}

# end of program
exit 0;
'''

In [213]:
for yyy in range(250,300):
    bbb = aaa.replace('{Z}', str(yyy))
    with open('../slurm/mergeerrors/'+str(yyy)+'.slurm', 'w') as f:
        f.write(bbb)

In [216]:
with open('/tmp/'+str(yyy)+'.slurm') as f:
        pickle.dump(bbb,f)


---------------------------------------------------------------------------
IOError                                   Traceback (most recent call last)
<ipython-input-216-0372e57446bd> in <module>()
      1 with open('/tmp/'+str(yyy)+'.slurm') as f:
----> 2         pickle.dump(bbb,f)

IOError: [Errno 9] Bad file descriptor

In [258]:
alist = '''
          75192294       cox 271.slur    haehn PD       0:00      1 (Resources)
          75192315       cox 272.slur    haehn PD       0:00      1 (Priority)
          75192348       cox 273.slur    haehn PD       0:00      1 (Priority)
          75192349       cox 274.slur    haehn PD       0:00      1 (Priority)
          75192350       cox 275.slur    haehn PD       0:00      1 (Priority)
          75192351       cox 276.slur    haehn PD       0:00      1 (Priority)
          75192352       cox 277.slur    haehn PD       0:00      1 (Priority)
          75192353       cox 278.slur    haehn PD       0:00      1 (Priority)
          75192364       cox 279.slur    haehn PD       0:00      1 (Priority)
          75192390       cox 280.slur    haehn PD       0:00      1 (Priority)
          75192398       cox 281.slur    haehn PD       0:00      1 (Priority)
          75192405       cox 282.slur    haehn PD       0:00      1 (Priority)
          75192460       cox 283.slur    haehn PD       0:00      1 (Priority)
          75192481       cox 284.slur    haehn PD       0:00      1 (Priority)
          75192527       cox 285.slur    haehn PD       0:00      1 (Priority)
          75192572       cox 286.slur    haehn PD       0:00      1 (Priority)
          75192583       cox 287.slur    haehn PD       0:00      1 (Priority)
          75192607       cox 288.slur    haehn PD       0:00      1 (Priority)
          75192608       cox 289.slur    haehn PD       0:00      1 (Priority)
          75192609       cox 290.slur    haehn PD       0:00      1 (Priority)
          75192610       cox 291.slur    haehn PD       0:00      1 (Priority)
          75192611       cox 292.slur    haehn PD       0:00      1 (Priority)
          75192622       cox 293.slur    haehn PD       0:00      1 (Priority)
          75192623       cox 294.slur    haehn PD       0:00      1 (Priority)
          75192685       cox 295.slur    haehn PD       0:00      1 (Priority)
          75192686       cox 296.slur    haehn PD       0:00      1 (Priority)
          75192687       cox 297.slur    haehn PD       0:00      1 (Priority)
          75192688       cox 298.slur    haehn PD       0:00      1 (Priority)
          75192699       cox 299.slur    haehn PD       0:00      1 (Priority)
          75192283       cox 270.slur    haehn  R    1:02:43      1 coxgpu01
          75192271       cox 269.slur    haehn  R    1:19:46      1 coxgpu04
          75192260       cox 268.slur    haehn  R    1:19:48      1 coxgpu03
          75192257       cox 267.slur    haehn  R    1:19:49      1 coxgpu01
          75192246       cox 266.slur    haehn  R    1:19:51      1 coxgpu04
          75192235       cox 265.slur    haehn  R    1:19:54      1 coxgpu03
          75192216       cox 264.slur    haehn  R    1:19:58      1 coxgpu01
          75192210       cox 263.slur    haehn  R    1:19:59      1 coxgpu04
          75192191       cox 262.slur    haehn  R    1:20:04      1 coxgpu03
          75192179       cox 260.slur    haehn  R    1:20:07      1 coxgpu04
          75192180       cox 261.slur    haehn  R    1:20:07      1 coxgpu01
          75192122       cox 258.slur    haehn  R    1:20:17      1 coxgpu01
          75192124       cox 259.slur    haehn  R    1:20:17      1 coxgpu03
          75192105       cox 252.slur    haehn  R    1:20:21      1 coxgpu05
          75192107       cox 253.slur    haehn  R    1:20:21      1 coxgpu01
          75192108       cox 254.slur    haehn  R    1:20:21      1 coxgpu02
          75192109       cox 255.slur    haehn  R    1:20:21      1 coxgpu03
          75192110       cox 256.slur    haehn  R    1:20:21      1 coxgpu04
          75192111       cox 257.slur    haehn  R    1:20:21      1 coxgpu05
          75192104       cox 251.slur    haehn  R    1:20:22      1 coxgpu03
          75192093       cox 250.slur    haehn  R    1:20:24      1 coxgpu01

'''

In [259]:
ids = []
nsplit = alist.split('\n')
for n in nsplit:
    lsplit =  n.split(' ')
    for l in lsplit:
        if l =='':
            continue
        else:
            ids.append(l)
            break

In [260]:
for i in ids:
    print 'scancel '+ i


scancel 75192294
scancel 75192315
scancel 75192348
scancel 75192349
scancel 75192350
scancel 75192351
scancel 75192352
scancel 75192353
scancel 75192364
scancel 75192390
scancel 75192398
scancel 75192405
scancel 75192460
scancel 75192481
scancel 75192527
scancel 75192572
scancel 75192583
scancel 75192607
scancel 75192608
scancel 75192609
scancel 75192610
scancel 75192611
scancel 75192622
scancel 75192623
scancel 75192685
scancel 75192686
scancel 75192687
scancel 75192688
scancel 75192699
scancel 75192283
scancel 75192271
scancel 75192260
scancel 75192257
scancel 75192246
scancel 75192235
scancel 75192216
scancel 75192210
scancel 75192191
scancel 75192179
scancel 75192180
scancel 75192122
scancel 75192124
scancel 75192105
scancel 75192107
scancel 75192108
scancel 75192109
scancel 75192110
scancel 75192111
scancel 75192104
scancel 75192093

In [179]:
init_median_vi


Out[179]:
0.4763612343909136

In [258]:
output_folder = '/home/d/netstatsPAPERFP/IPMLB/'

In [259]:
bigM_dojo_file = output_folder + '/bigM_fp_2D.p'

In [260]:
with open(bigM_dojo_file, 'rb') as f:
        bigM_dojo = pickle.load(f)

In [ ]:
bigM_dojo_file = output_folder + '/bigM_fp_2D.p'

In [873]:
from gp import Util

In [880]:
cremi_input_image = []
cremi_input_prob = []
cremi_input_gold = []
cremi_input_rhoana = []

# for z in range(0,50):
test_slices = range(15,25) + range(40,50) + range(65,75)
for z in test_slices:
    image, prob, gold, rhoana = Util.read_cremi_section(os.path.expanduser('/home/d/data/CREMIGP/TEST/'), z)
    cremi_input_image.append(image[0:500,0:500])
    cremi_input_prob.append(255.-prob[0:500,0:500])
    cremi_input_gold.append(gold[0:500,0:500])
    cremi_input_rhoana.append(rhoana[0:500,0:500])

In [879]:
sys.path.append('../gp/')
from util import Util

In [881]:
cremi_input_gold.shape


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-881-ab217022c31c> in <module>()
----> 1 cremi_input_gold.shape

AttributeError: 'list' object has no attribute 'shape'

In [901]:
cyl_simuser_fp_vi_per_slice, cyl_simuser_fp_merge_vis, cyl_simuser_fp_split_vis = gp.Stats.analyze_users(['simuserFP'], cremi_input_gold, cremi_input_rhoana,
                                                                                         filename='/home/d/PAPERGFX/cremi_fpsimuser.pdf',
                                                                                         DATADIR='/home/d/CREMISTUDY/',
                                                                                         data='cremi',
                                                                                         clampX=None, clampY=None,
                                                                                         skipoutput=True)


1.48387531843
No. users 1
Avg. correction time 0.0
Avg. corrections 2652.0
Avg. accepted 441.0

In [890]:
len(cyl_simuser_fp_vi_per_slice)


Out[890]:
50

In [ ]:


In [895]:
cyl_simuser_fp_vi_per_slice, cyl_simuser_fp_merge_vis, cyl_simuser_fp_split_vis = gp.Stats.analyze_users(['auto95FP_NEW'], cremi_input_gold, cremi_input_rhoana,
                                                                                         filename='/home/d/PAPERGFX/cremi_fpauto.pdf',
                                                                                         DATADIR='/home/d/CREMISTUDY/',
                                                                                         data='cremi',
                                                                                         clampX=None, clampY=None,
                                                                                         skipoutput=True)


1.48387531843
No. users 1
Avg. correction time 0.0
Avg. corrections 201.0
Avg. accepted 201.0

In [887]:
np.median(cyl_simuser_fp_split_vis)


Out[887]:
1.5143901333918834

In [891]:
len(cremi_input_gold)


Out[891]:
30

In [902]:
# FP SIM
np.median(cyl_simuser_fp_split_vis[:30]), np.std(cyl_simuser_fp_split_vis[:30])


Out[902]:
(1.2966302039051194, 0.24447993173780638)

In [900]:
np.median(cyl_simuser_fp_vi_per_slice[:30]), np.std(cyl_simuser_fp_split_vis[:30])


Out[900]:
(1.5143901333918834, 0.23646198072502539)

In [899]:
cremivi=[1.0549800794594688,
 1.0787705373852496,
 1.0861358078168148,
 0.9675713369670014,
 0.9997668056591769,
 0.9459034373828512,
 0.9877676442733927,
 0.9141360710451147,
 1.031830053143838,
 0.8788253949176683,
 1.6836230208130387,
 1.5795090743954079,
 1.525698278449243,
 1.5294184161061146,
 1.5089478700765797,
 1.5729122516497531,
 1.497966590050276,
 1.509997243873567,
 1.4813236033061328,
 1.5360103698782628,
 1.549647392613978,
 1.5256067574557957,
 1.6481299808338532,
 1.6513143780877426,
 1.4779760288996808,
 1.4864270335469092,
 1.4094745372930033,
 1.3310744400437553,
 1.3739251218870159,
 1.5206372358397617]
print np.median(cremivi), np.std(cremivi)


1.48387531843 0.25989367595

In [905]:
cyl_simuser_fp_vi_per_slice, cyl_simuser_fp_merge_vis, cyl_simuser_fp_split_vis = gp.Stats.analyze_users(['auto95GP_NEW'], cremi_input_gold, cremi_input_rhoana,
                                                                                         filename='/home/d/PAPERGFX/cremi_fpauto.pdf',
                                                                                         DATADIR='/home/d/CREMISTUDY/',
                                                                                         data='cremi',
                                                                                         clampX=None, clampY=None,
                                                                                         skipoutput=True)


1.48387531843
No. users 1
Avg. correction time 0.0
Avg. corrections 613.0
Avg. accepted 341.0

In [906]:
cyl_simuser_fp_vi_per_slice2, cyl_simuser_fp_merge_vis2, cyl_simuser_fp_split_vis2 = gp.Stats.analyze_users(['simuserGP'], cremi_input_gold, cremi_input_rhoana,
                                                                                         filename='/home/d/PAPERGFX/cremi_fpauto.pdf',
                                                                                         DATADIR='/home/d/CREMISTUDY/',
                                                                                         data='cremi',
                                                                                         clampX=None, clampY=None,
                                                                                         skipoutput=True)


1.48387531843
No. users 1
Avg. correction time 0.0
Avg. corrections 3843.0
Avg. accepted 1199.0

In [907]:
np.median(cyl_simuser_fp_vi_per_slice2[:30]), np.std(cyl_simuser_fp_vi_per_slice2[:30])


Out[907]:
(1.1716469642898275, 0.23430551760120516)

In [908]:
np.median(cyl_simuser_fp_vi_per_slice[:30]), np.std(cyl_simuser_fp_vi_per_slice[:30])


Out[908]:
(1.4246801398849518, 0.26646357097975892)

In [ ]: