In [1]:
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import matplotlib.ticker as ticker
from sklearn import metrics
from StrongCNN.utils import features_selection as fs
%matplotlib inline

In [2]:
def myLogFormat(y,pos):
    # Find the number of decimal places required
    decimalplaces = int(np.maximum(-np.log10(y),0))     # =0 for numbers >=1
    # Insert that number into a format string
    formatstring = '{{:.{:1d}f}}'.format(decimalplaces)
    # Return the formatted tick label
    return formatstring.format(y)

In [3]:
Creg_files = {
'HST':'/home/babyostrich/Documents/Repos/StrongCNN/data/control_data/HSTlikeModels/BCG/score_auc_C.txt',
'LSST-best':'/home/babyostrich/Documents/Repos/StrongCNN/data/control_data/LSSTlikeModels/single/BCG/score_auc_C.txt',
'LSST10':'/home/babyostrich/Documents/Repos/StrongCNN/data/control_data/LSSTlikeModels/stack/BCG/score_auc_C.txt',
'nHST':'/home/babyostrich/Documents/Repos/StrongCNN/data/control_data/HSTlikeModels/noBCG/score_auc_C.txt',
'nLSST-best':'/home/babyostrich/Documents/Repos/StrongCNN/data/control_data/LSSTlikeModels/single/noBCG/score_auc_C.txt',
'nLSST10':'/home/babyostrich/Documents/Repos/StrongCNN/data/control_data/LSSTlikeModels/stack/noBCG/score_auc_C.txt',

}
Creg_data = {}
for k, v in Creg_files.iteritems() :
    Creg_data[k] = pd.read_csv(v,delimiter=' ')

In [9]:
def plot_Creg(data_key,legend=False, save=False) :
    fig, ax1 = plt.subplots()
    ax2 = ax1.twinx()

    ax1.semilogx(Creg_data[data_key]['C_logreg'], Creg_data[data_key]['train_AUC'],'b:',lw=5.0,label='train')
    ax1.semilogx(Creg_data[data_key]['C_logreg'], Creg_data[data_key]['test_AUC'],'b',lw=5.0,label='test')
    ax2.loglog(Creg_data[data_key]['C_logreg'],Creg_data[data_key]['train_time'],'r',lw=1.0,label='train time')
    ax2.set_ylim((.5*Creg_data[data_key]['train_time'].min(),2*Creg_data[data_key]['train_time'].max()))
    plt.text(0.1,0.9,data_key, ha='left', va='center', transform=plt.gca().transAxes,fontsize='xx-large')
    ax1.set_xlabel('Logistic Regression Coefficient',fontsize='xx-large')
    ax1.set_ylabel('Area Under ROC Curve',fontsize='xx-large',color='blue')
    ax2.set_ylabel('Train Time (sec)',fontsize='xx-large',color='r')

    plt.gca().xaxis.set_major_formatter(ticker.FuncFormatter(myLogFormat))
    plt.gca().tick_params(axis='x', labelsize='x-large')
    plt.gca().tick_params(axis='y', labelsize='x-large')
    if legend :
        leg = ax1.legend(loc='lower right',fontsize='x-large',frameon=False)
        # change the font colors to match the line colors:
        for line,text in zip(leg.get_lines(), leg.get_texts()):
            text.set_color(line.get_color())
    if save :  
        plt.savefig('paper_figures/Creg/'+data_key+'.pdf',bbox_inches="tight")
    #plt.close()

In [10]:
plot_Creg('HST',save=True)
plot_Creg('LSST-best',save=True)
plot_Creg('LSST10',save=True,legend=True)
#plot_Creg('nHST',save=True)
plot_Creg('nLSST-best',save=True)
plot_Creg('nLSST10',save=True,legend=True)
plot_Creg('nLSST10',save=False,legend=True)



In [30]:
trainsize_files = {
    #'HST':'/home/babyostrich/Documents/Repos/StrongCNN/data/control_data/HSTlikeModels/BCG/trainsize_AUC_time.txt',
    'LSST-best':'/home/babyostrich/Documents/Repos/StrongCNN/data/control_data/LSSTlikeModels/single/BCG/trainsize_AUC_time.txt',
    'LSST10':'/home/babyostrich/Documents/Repos/StrongCNN/data/control_data/LSSTlikeModels/stack/BCG/trainsize_AUC_time.txt',
    #'nHST':'/home/babyostrich/Documents/Repos/StrongCNN/data/control_data/HSTlikeModels/noBCG/trainsize_AUC_time.txt',
    'nLSST-best':'/home/babyostrich/Documents/Repos/StrongCNN/data/control_data/LSSTlikeModels/single/noBCG/trainsize_AUC_time.txt',
    'nLSST10':'/home/babyostrich/Documents/Repos/StrongCNN/data/control_data/LSSTlikeModels/stack/noBCG/trainsize_AUC_time.txt',

}
trainsize_data = {}
for k, v in trainsize_files.iteritems() :
    trainsize_data[k] = pd.read_csv(v,delimiter=' ')

In [7]:
def plot_nolens_lens_trainsize_auc( data_keys, legend=True, save=True ) :
    fig, ax1 = plt.subplots()
    #ax1.text(0.1,0.9, data_key, ha='left', va='center', transform=ax1.transAxes,fontsize='xx-large')
    for data_key,color in zip(data_keys,['r','b']) :
        ax1.plot(trainsize_data[data_key]['train_size'], trainsize_data[data_key]['AUC'], color,marker='o',ms=8,lw=5.0,label=data_key)
        ax1.plot(trainsize_data['n'+data_key]['train_size'], trainsize_data['n'+data_key]['AUC'], color+'--',marker='o',ms=8,lw=5.0,label='n'+data_key)
    
    ax1.set_xlabel('Train Size',fontsize='xx-large')
    ax1.set_ylabel('Area Under ROC Curve',fontsize='xx-large')
    ax1.grid()
    ax1.set_xscale('log')
    ax1.set_xlim((500,12000))
    ax1.tick_params(axis='x', labelsize='x-large')
    ax1.tick_params(axis='y', labelsize='x-large')
        
    if legend :
        leg = plt.legend(loc='center right',fontsize='x-large',frameon=False)
        # change the font colors to match the line colors:
        for line,text in zip(leg.get_lines(), leg.get_texts()):
            text.set_color(line.get_color())
    if save :  
        print 'saving: '+'paper_figures/trainsize/combined_'+data_key+'_with_n'+data_key+'.pdf'
        
        plt.savefig('paper_figures/trainsize/combined_'+data_key+'_with_n'+data_key+'.pdf',bbox_inches="tight")
        plt.close()

In [31]:
plot_nolens_lens_trainsize_auc(['LSST-best','LSST10'], legend=True, save=True)
#plot_nolens_lens_trainsize_auc('LSST1', legend=True, save=True)


saving: paper_figures/trainsize/combined_LSST10_with_nLSST10.pdf

In [9]:
def plot_trainsize_time(data_key,legend=False,save=False) :
    fig, ax1 = plt.subplots()
    ax2 = ax1.twinx()
    ax1.text(0.1,0.9, data_key, ha='left', va='center', transform=ax1.transAxes,fontsize='xx-large')

    ax1.plot(trainsize_data[data_key]['train_size'], trainsize_data[data_key]['train_time'], 'bo-',lw=5.0,label=data_key)
    ax1.plot(trainsize_data['n'+data_key]['train_size'], trainsize_data['n'+data_key]['train_time'], 'bo--',lw=5.0,label="n"+data_key)
    ax2.plot(trainsize_data[data_key]['train_size'], trainsize_data[data_key]['Creg'], 'ro-',lw=2.0,label=data_key)
    ax2.plot(trainsize_data['n'+data_key]['train_size'], trainsize_data['n'+data_key]['Creg'], 'ro--',lw=2.0,label="n"+data_key)
    
    
    ax1.set_xlabel('Train Size',fontsize='xx-large')
    ax1.set_ylabel('Train Time',fontsize='xx-large',color='b')
    ax2.set_ylabel('Optimal C$_{\\rm{LogReg}}$',fontsize='xx-large',color='r')

    ax1.set_xscale('log')
    ax2.set_xscale('log')
    ax1.set_yscale('log')
    ax2.set_yscale('log')
    
    ax1.set_xlim((500,12000))
    ax2.set_xlim((500,12000))
    ax1.tick_params(axis='x', labelsize='x-large')
    ax2.tick_params(axis='y', labelsize='x-large')
    ax1.tick_params(axis='y', labelsize='x-large')
    if legend :
        leg = plt.legend(loc='lower right',fontsize='x-large',frameon=False)
        # change the font colors to match the line colors:
        #for line,text in zip(leg.get_lines(), leg.get_texts()):
            #text.set_color(line.get_color())
    if save :  
        print 'saving: '+'paper_figures/traintime/'+data_key+'.pdf'
        plt.grid()
        plt.savefig('paper_figures/traintime/'+data_key+'.pdf',bbox_inches="tight")
        plt.close()

In [32]:
#plot_trainsize_time('HST',save=True)
plot_trainsize_time('LSST-best',save=True, legend=True)
plot_trainsize_time('LSST10',save='LSST10',legend=True)


saving: paper_figures/traintime/LSST-best.pdf
saving: paper_figures/traintime/LSST10.pdf

In [11]:
def set_axis_style(ax, labels):
    ax.get_xaxis().set_tick_params(direction='out')
    ax.xaxis.set_ticks_position('bottom')
    ax.set_xticks(np.arange(1, len(labels) + 1))
    ax.set_xticklabels(labels,fontsize='x-large',rotation='vertical')
    ax.set_xlim(0.25, len(labels) + 0.75)
    #ax.set_xlabel('Data Set',fontsize='xx-large')

def plot_rotated_aucs(data_keys, save=False) :
    fig, ax1 = plt.subplots()
    inds = np.arange(1, len(data_keys) + 1)
    for i, k in enumerate(data_keys) :
        if i == 0 : (l1,lavg) = ("Scored for 1 rot", "Scored avg over 4 rot")
        else : (l1, lavg) = (None, None)
        ax1.plot([inds[i]], [rotated_aucs_data[k][0][0]], marker='*', color='r', ms=12, mec='r', lw=0.001, label=lavg)    
        ax1.plot([inds[i]]*(len(rotated_aucs_data[k][0])-1), rotated_aucs_data[k][0][1:], marker='o', color='b', mec='b', lw=0.001,ms=7, label=l1)
    ax1.set_ylabel('Area Under ROC Curve',fontsize='xx-large')
    set_axis_style(ax1, data_keys)
    ax1.legend()
    leg = ax1.legend(loc='best',fontsize='x-large',frameon=True, numpoints=1, scatterpoints=1)
    # change the font colors to match the line colors:
    for line,text in zip(leg.get_lines(), leg.get_texts()):
        text.set_color(line.get_color())
    plt.grid()
    plt.savefig('paper_figures/rotated_aucs/rotated_aucs.pdf',bbox_inches="tight")

In [33]:
rotated_aucs_files = {
'HST':'/home/babyostrich/Documents/Repos/StrongCNN/data/control_data/HSTlikeModels/BCG/rotated_aucs_test.txt',
'LSST-best':'/home/babyostrich/Documents/Repos/StrongCNN/data/control_data/LSSTlikeModels/single/BCG/rotated_aucs_test.txt',
'LSST10':'/home/babyostrich/Documents/Repos/StrongCNN/data/control_data/LSSTlikeModels/stack/BCG/rotated_aucs_test.txt',
'nHST':'/home/babyostrich/Documents/Repos/StrongCNN/data/control_data/HSTlikeModels/noBCG/rotated_aucs_test.txt',
'nLSST-best':'/home/babyostrich/Documents/Repos/StrongCNN/data/control_data/LSSTlikeModels/single/noBCG/rotated_aucs_test.txt',
'nLSST10':'/home/babyostrich/Documents/Repos/StrongCNN/data/control_data/LSSTlikeModels/stack/noBCG/rotated_aucs_test.txt'

}
rotated_aucs_data = {}
for k, v in rotated_aucs_files.iteritems() :
    rotated_aucs_data[k] = pd.read_csv(v,delimiter=' ',header=None)

In [34]:
plot_rotated_aucs(['LSST-best','nLSST-best','LSST10','nLSST10','HST','nHST'])



In [35]:
metric_files= {
    'HST':'/home/babyostrich/Documents/Repos/StrongCNN/data/control_data/HSTlikeModels/BCG/print_all_scores_for_debugging_test.txt',
    'nHST':'/home/babyostrich/Documents/Repos/StrongCNN/data/control_data/HSTlikeModels/noBCG/print_all_scores_for_debugging_test.txt',
    'LSST-best':'/home/babyostrich/Documents/Repos/StrongCNN/data/control_data/LSSTlikeModels/single/BCG/print_all_scores_for_debugging_test.txt',
    'nLSST-best':'/home/babyostrich/Documents/Repos/StrongCNN/data/control_data/LSSTlikeModels/single/noBCG/print_all_scores_for_debugging_test.txt',
    'LSST10':'/home/babyostrich/Documents/Repos/StrongCNN/data/control_data/LSSTlikeModels/stack/BCG/print_all_scores_for_debugging_test.txt',
    'nLSST10':'/home/babyostrich/Documents/Repos/StrongCNN/data/control_data/LSSTlikeModels/stack/noBCG/print_all_scores_for_debugging_test.txt',
    
}
metric_data = {}
for k, v in metric_files.iteritems() :
    metric_data[k] = pd.read_csv(v,delimiter=' ') 
    metric_data[k].sort('avgscores',inplace=True)
    fpr,tpr = fs._calc_tpr_fpr(metric_data[k]['avgscores'][:], metric_data[k]['label'][:])
    metric_data[k]['tpr'] = tpr
    metric_data[k]['fpr'] = fpr

In [15]:
def plot_ROC(data_keys, colors_ls, savefig=None, legend=True) : 
    for dk, c in zip(data_keys,colors_ls) :
        fpr, tpr, thresh = metrics.roc_curve(metric_data[dk]['label'], metric_data[dk]['avgscores'],pos_label=1)
        auc = metrics.roc_auc_score(metric_data[dk]['label'], metric_data[dk]['avgscores'])
        
        precision, recall, thresh = metrics.precision_recall_curve(metric_data[dk]['label'], metric_data[dk]['avgscores'],pos_label=1)
        #plt.plot(fpr, tpr, c, lw=5.,label=dk+' auc=%.2f'%auc)
        plt.plot(metric_data[dk]['fpr'], metric_data[dk]['tpr'], c, lw=5.,label=dk+' auc=%.3f'%auc)
        plt.plot((0,1),(0,1),'k--')
        plt.xlabel('False Positive Rate',fontsize='xx-large')
        plt.ylabel('True Positive Rate',fontsize='xx-large')
        
        plt.gca().tick_params(axis='x', labelsize='x-large')
        plt.gca().tick_params(axis='y', labelsize='x-large')

    if legend :
        leg = plt.legend(loc='lower right',fontsize='large',frameon=False)
        # change the font colors to match the line colors:
        for line,text in zip(leg.get_lines(), leg.get_texts()):
            text.set_color(line.get_color())
    if savefig != None :
        plt.grid()
        plt.savefig('paper_figures/metrics/ROC/'+savefig+'.pdf',bbox_inches="tight")

In [36]:
plot_ROC(['HST','nHST','LSST-best','nLSST-best','LSST10','nLSST10'],['r','r--','b','b--','g','g--'],savefig='ROC_compilation')



In [17]:
def plot_precision_recall(data_keys, colors_ls, savefig=None, legend=True) :
    for dk, c in zip(data_keys,colors_ls) :
        precision, recall, thresh = metrics.precision_recall_curve(metric_data[dk]['label'], metric_data[dk]['avgscores'],pos_label=1)
        plt.plot(recall, precision, c, lw=4.,label=dk)
    plt.xlabel('Recall (Completeness)',fontsize='xx-large')
    plt.ylabel('Precision (Purity)',fontsize='xx-large')
        
    plt.gca().tick_params(axis='x', labelsize='x-large')
    plt.gca().tick_params(axis='y', labelsize='x-large')

    plt.grid()
    plt.xlim(0,1)
    if legend :
        leg = plt.legend(loc='lower left',fontsize='large',frameon=False)
        # change the font colors to match the line colors:
        for line,text in zip(leg.get_lines(), leg.get_texts()):
            text.set_color(line.get_color())
    if savefig != None :
        plt.savefig('paper_figures/metrics/PR/'+savefig+'.pdf',bbox_inches="tight")

In [37]:
plot_precision_recall(['HST','nHST','LSST-best','nLSST-best','LSST10','nLSST10'],['r','r--','b','b--','g','g--'],savefig='PR_compilation')



In [19]:
def get_ROC_samples(data_key,borderline_tpr_range=None) :
    sorted_data = metric_data[data_key].sort('avgscores')
    sorted_lens = sorted_data.loc[sorted_data['label'] == 1]
    sorted_nonlens = sorted_data.loc[sorted_data['label'] == 0]
    
    borderline_nonlens_min = sorted_nonlens.loc[sorted_nonlens['tpr']<borderline_tpr_range[1]] 
    borderline_nonlens = borderline_nonlens_min.loc[borderline_nonlens_min['tpr']>borderline_tpr_range[0]]
    borderline_lens_min = sorted_lens.loc[sorted_lens['tpr']<borderline_tpr_range[1]] 
    borderline_lens = borderline_lens_min.loc[borderline_lens_min['tpr']>borderline_tpr_range[0]] 
    
    return {'tp': sorted_lens[-20:],
           'fp': sorted_nonlens[-20:],
            'bp': borderline_lens,
            'bn': borderline_nonlens,
            'fn':sorted_lens[:20],
            'tn':sorted_nonlens[:20],
           }

In [20]:
ROC_samples = {}
ROC_samples['LSST10'] = get_ROC_samples('LSST10',borderline_tpr_range=(0.78,0.8) )
ROC_samples["LSST10"]


Out[20]:
{'bn':        IDs  label  avgscores    score0    score1    score2    score3    tpr  \
 211   1188      0   0.466607  0.414649  0.447716  0.469970  0.534094  0.781   
 949   1852      0   0.466922  0.329979  0.622395  0.365537  0.549779  0.782   
 1023  1919      0   0.467467  0.471113  0.441819  0.404160  0.552775  0.783   
 738   1662      0   0.467721  0.577941  0.424227  0.427533  0.441182  0.784   
 228   1202      0   0.467867  0.594365  0.356105  0.495520  0.425478  0.785   
 112   1099      0   0.468043  0.679444  0.366476  0.437028  0.389224  0.786   
 1016  1912      0   0.468407  0.440817  0.385165  0.585401  0.462245  0.787   
 899   1807      0   0.469071  0.346982  0.549304  0.606930  0.373069  0.788   
 630   1565      0   0.469438  0.493639  0.446307  0.311340  0.626466  0.789   
 164   1145      0   0.470000  0.421679  0.566591  0.391761  0.499969  0.790   
 338   1301      0   0.471564  0.538281  0.542397  0.410749  0.394829  0.791   
 633   1568      0   0.472065  0.498596  0.418268  0.609830  0.361568  0.792   
 56    1048      0   0.473008  0.457492  0.273238  0.582005  0.579296  0.793   
 286   1255      0   0.473863  0.559406  0.554078  0.285398  0.496572  0.794   
 198   1176      0   0.474012  0.489718  0.553069  0.474727  0.378533  0.795   
 955   1858      0   0.474845  0.641667  0.427970  0.416383  0.413360  0.796   
 96    1084      0   0.474859  0.583209  0.401948  0.463326  0.450955  0.797   
 606   1543      0   0.475600  0.551532  0.400682  0.545302  0.404883  0.798   
 861   1773      0   0.476395  0.583219  0.825801  0.335220  0.161340  0.799   
 
         fpr  
 211   0.289  
 949   0.289  
 1023  0.289  
 738   0.289  
 228   0.289  
 112   0.289  
 1016  0.289  
 899   0.290  
 630   0.290  
 164   0.291  
 338   0.293  
 633   0.293  
 56    0.294  
 286   0.296  
 198   0.296  
 955   0.297  
 96    0.297  
 606   0.299  
 861   0.299  ,
 'bp':       IDs  label  avgscores    score0    score1    score2    score3    tpr  \
 1231  306      1   0.468705  0.355815  0.481027  0.429249  0.608731  0.788   
 1753  777      1   0.469794  0.459082  0.439995  0.588287  0.391811  0.790   
 1727  753      1   0.470777  0.459676  0.573444  0.364408  0.485582  0.791   
 14    101      1   0.470789  0.608766  0.539933  0.231118  0.503338  0.791   
 1765  788      1   0.472478  0.463549  0.489429  0.556280  0.380654  0.793   
 1792  811      1   0.473175  0.281027  0.526656  0.419226  0.665792  0.794   
 1581  621      1   0.473655  0.676538  0.478593  0.343542  0.395948  0.794   
 1460  512      1   0.474711  0.195103  0.614749  0.477372  0.611621  0.796   
 1574  615      1   0.474899  0.379947  0.504694  0.501778  0.513175  0.798   
 1883  894      1   0.475146  0.656599  0.384355  0.453013  0.406617  0.798   
 
         fpr  
 1231  0.289  
 1753  0.290  
 1727  0.291  
 14    0.292  
 1765  0.293  
 1792  0.294  
 1581  0.295  
 1460  0.296  
 1574  0.297  
 1883  0.298  ,
 'fn':       IDs  label  avgscores    score0    score1    score2    score3    tpr  \
 1446   50      1   0.066902  0.076449  0.080036  0.065881  0.045241  0.001   
 1524  570      1   0.100377  0.069747  0.136773  0.117230  0.077757  0.005   
 1864  877      1   0.105025  0.108137  0.114466  0.093289  0.104207  0.008   
 1438  493      1   0.114386  0.047299  0.097577  0.152126  0.160543  0.015   
 1475  526      1   0.116373  0.155829  0.095275  0.108555  0.105836  0.015   
 1400  459      1   0.128986  0.081105  0.061357  0.163123  0.210357  0.022   
 1820  837      1   0.144722  0.204823  0.121315  0.197759  0.054990  0.038   
 1241  315      1   0.156820  0.192809  0.157618  0.113814  0.163037  0.046   
 1735  760      1   0.157662  0.151498  0.172838  0.114741  0.191569  0.047   
 1508  556      1   0.164523  0.202673  0.113348  0.194322  0.147747  0.051   
 1597  636      1   0.174837  0.235587  0.191667  0.098246  0.173846  0.061   
 1281  351      1   0.175211  0.129211  0.270647  0.149911  0.151076  0.062   
 1173  254      1   0.175214  0.138643  0.132256  0.157627  0.272331  0.062   
 1965  968      1   0.179885  0.126693  0.187444  0.221192  0.184209  0.077   
 1681  711      1   0.192620  0.200483  0.144401  0.092696  0.332898  0.099   
 69    106      1   0.192654  0.220386  0.126575  0.216897  0.206756  0.099   
 1648  682      1   0.194538  0.205867  0.204615  0.168592  0.199078  0.103   
 1842  857      1   0.196975  0.237353  0.182145  0.285796  0.082605  0.109   
 1823   84      1   0.197148  0.138255  0.228331  0.169644  0.252361  0.109   
 1872  884      1   0.197891  0.214550  0.202597  0.164597  0.209818  0.109   
 
         fpr  
 1446  0.000  
 1524  0.001  
 1864  0.002  
 1438  0.003  
 1475  0.004  
 1400  0.005  
 1820  0.006  
 1241  0.007  
 1735  0.008  
 1508  0.009  
 1597  0.010  
 1281  0.011  
 1173  0.012  
 1965  0.013  
 1681  0.014  
 69    0.015  
 1648  0.016  
 1842  0.017  
 1823  0.018  
 1872  0.019  ,
 'fp':        IDs  label  avgscores    score0    score1    score2    score3    tpr  \
 592   1530      0   0.845499  0.907808  0.727562  0.907777  0.838850  0.980   
 660   1592      0   0.853053  0.765311  0.832464  0.882205  0.932233  0.981   
 985   1885      0   0.853506  0.880341  0.768833  0.831026  0.933825  0.982   
 847   1760      0   0.853671  0.785495  0.861581  0.859722  0.907887  0.983   
 272   1242      0   0.864994  0.838634  0.855901  0.882373  0.883068  0.984   
 231   1205      0   0.884163  0.901454  0.780300  0.936769  0.918129  0.985   
 720   1646      0   0.885994  0.911526  0.848766  0.939106  0.844576  0.986   
 306   1273      0   0.889511  0.881923  0.878567  0.895041  0.902512  0.987   
 1003  1900      0   0.896690  0.956171  0.861788  0.945111  0.823690  0.988   
 822   1738      0   0.899659  0.922366  0.882806  0.893434  0.900029  0.989   
 178   1158      0   0.900920  0.894640  0.953442  0.801425  0.954173  0.990   
 778   1699      0   0.915145  0.910572  0.837182  0.943650  0.969177  0.991   
 1111  1999      0   0.916716  0.928790  0.885517  0.970741  0.881817  0.992   
 21    1016      0   0.922783  0.941944  0.916270  0.888648  0.944270  0.993   
 201   1179      0   0.933250  0.941803  0.900793  0.928070  0.962333  0.994   
 581   1520      0   0.934980  0.917904  0.954126  0.935911  0.931979  0.995   
 675   1605      0   0.952706  0.893626  0.984009  0.948982  0.984205  0.996   
 707   1634      0   0.958829  0.895727  0.986094  0.969030  0.984463  0.997   
 628   1563      0   0.959297  0.994329  0.930671  0.989045  0.923145  0.998   
 520   1466      0   0.965584  0.969539  0.962744  0.946948  0.983104  0.999   
 
         fpr  
 592   0.778  
 660   0.787  
 985   0.787  
 847   0.787  
 272   0.795  
 231   0.807  
 720   0.810  
 306   0.817  
 1003  0.822  
 822   0.828  
 178   0.831  
 778   0.847  
 1111  0.849  
 21    0.856  
 201   0.869  
 581   0.871  
 675   0.897  
 707   0.909  
 628   0.910  
 520   0.920  ,
 'tn':        IDs  label  avgscores    score0    score1    score2    score3    tpr  \
 404   1361      0   0.065369  0.062744  0.070126  0.036652  0.091956  0.000   
 1048  1941      0   0.067416  0.014689  0.136062  0.042765  0.076146  0.001   
 810   1727      0   0.085603  0.050891  0.097967  0.107241  0.086314  0.002   
 568   1509      0   0.088321  0.094456  0.133905  0.074575  0.050347  0.003   
 13    1009      0   0.100341  0.126040  0.048799  0.100115  0.126411  0.004   
 107   1094      0   0.101960  0.217450  0.021997  0.155117  0.013275  0.005   
 541   1485      0   0.102206  0.153644  0.077561  0.076659  0.100962  0.006   
 176   1156      0   0.104573  0.146017  0.089022  0.077395  0.105856  0.007   
 489   1438      0   0.105433  0.087573  0.136206  0.133229  0.064724  0.008   
 279   1249      0   0.105579  0.095259  0.107981  0.123972  0.095105  0.009   
 319   1285      0   0.107327  0.148776  0.208865  0.020944  0.050724  0.010   
 255   1227      0   0.110958  0.052560  0.135451  0.119646  0.136176  0.011   
 799   1717      0   0.111317  0.101397  0.140932  0.111557  0.091381  0.012   
 134   1118      0   0.111948  0.150742  0.080794  0.110253  0.106001  0.013   
 356   1318      0   0.112227  0.176995  0.093603  0.064420  0.113892  0.014   
 1058  1950      0   0.116608  0.118517  0.094466  0.062505  0.190945  0.015   
 1075  1966      0   0.116686  0.128311  0.129780  0.128239  0.080414  0.016   
 325   1290      0   0.118731  0.140435  0.066469  0.174464  0.093554  0.017   
 139   1122      0   0.118975  0.145730  0.116524  0.132640  0.081007  0.018   
 936   1840      0   0.120762  0.229199  0.079002  0.147983  0.026863  0.019   
 
         fpr  
 404   0.000  
 1048  0.001  
 810   0.001  
 568   0.001  
 13    0.001  
 107   0.002  
 541   0.002  
 176   0.002  
 489   0.003  
 279   0.003  
 319   0.003  
 255   0.003  
 799   0.003  
 134   0.003  
 356   0.003  
 1058  0.005  
 1075  0.005  
 325   0.005  
 139   0.005  
 936   0.005  ,
 'tp':       IDs  label  avgscores    score0    score1    score2    score3  tpr  \
 1824  840      1   0.998766  0.999670  0.998907  0.999093  0.997394    1   
 213   119      1   0.998847  0.998775  0.999482  0.998554  0.998575    1   
 1365  427      1   0.999068  0.999096  0.998260  0.999429  0.999489    1   
 1933  939      1   0.999111  0.998570  0.998694  0.999367  0.999812    1   
 1450  503      1   0.999241  0.999733  0.999233  0.999514  0.998483    1   
 1282  352      1   0.999606  0.999754  0.999592  0.999875  0.999203    1   
 1326  392      1   0.999607  0.999608  0.999874  0.999505  0.999442    1   
 1401   46      1   0.999770  0.999910  0.999422  0.999870  0.999880    1   
 1445    5      1   0.999812  0.999795  0.999771  0.999871  0.999812    1   
 1596  635      1   0.999922  0.999848  0.999906  0.999956  0.999979    1   
 1745   77      1   0.999923  0.999983  0.999821  0.999984  0.999904    1   
 1216  293      1   0.999928  0.999959  0.999936  0.999972  0.999844    1   
 1131  216      1   0.999937  0.999949  0.999893  0.999968  0.999937    1   
 1279   35      1   0.999946  0.999956  0.999905  0.999986  0.999935    1   
 1730  756      1   0.999982  0.999991  0.999972  0.999995  0.999970    1   
 1830  846      1   0.999990  0.999992  0.999995  0.999976  0.999997    1   
 1886  897      1   0.999994  0.999998  0.999990  0.999998  0.999989    1   
 1866  879      1   0.999994  0.999986  0.999995  0.999998  0.999996    1   
 1880  891      1   0.999997  0.999999  0.999996  0.999998  0.999994    1   
 1127  212      1   0.999999  0.999998  0.999999  1.000000  1.000000    1   
 
         fpr  
 1824  0.980  
 213   0.981  
 1365  0.982  
 1933  0.983  
 1450  0.984  
 1282  0.985  
 1326  0.986  
 1401  0.987  
 1445  0.988  
 1596  0.989  
 1745  0.990  
 1216  0.991  
 1131  0.992  
 1279  0.993  
 1730  0.994  
 1830  0.995  
 1886  0.996  
 1866  0.997  
 1880  0.998  
 1127  0.999  }

In [21]:
def print_relevant_ROC_identifiers( data_key, ROC_region ) :
    print data_key, ROC_region
    print_str = ''
    for i in ROC_samples[data_key][ROC_region]['IDs'][:4] : print_str+=str(i)+','
    print print_str[:-1]
    print_str = ''
    for i in ROC_samples[data_key][ROC_region]['avgscores'][:4] : print_str+=str(i)+' '
    print print_str

In [22]:
print_relevant_ROC_identifiers('LSST10','tp')
print_relevant_ROC_identifiers('LSST10','fp')
print_relevant_ROC_identifiers('LSST10','bp')
print_relevant_ROC_identifiers('LSST10','bn')
print_relevant_ROC_identifiers('LSST10','fn')
print_relevant_ROC_identifiers('LSST10','tn')


LSST10 tp
840,119,427,939
0.998765847743 0.998846728 0.999068322456 0.999110818251 
LSST10 fp
1530,1592,1885,1760
0.84549915204 0.85305320065 0.85350637025 0.853671372684 
LSST10 bp
306,777,753,101
0.46870530545 0.469794138215 0.470777481305 0.470788648267 
LSST10 bn
1188,1852,1919,1662
0.466607435224 0.466922407936 0.46746689584 0.467720773946 
LSST10 fn
50,570,877,493
0.0669018108657 0.100376596908 0.105024656761 0.114386215568 
LSST10 tn
1361,1941,1727,1509
0.0653693055153 0.0674157632027 0.0856030885577 0.0883207040728 

In [51]:
lens_param_files= {
    'HST':'/home/babyostrich/Documents/Repos/StrongCNN/data/control_data/HSTlikeModels/BCG/id_lensparam_test.txt',
              'nHST':'/home/babyostrich/Documents/Repos/StrongCNN/data/control_data/HSTlikeModels/noBCG/id_lensparam_test.txt',       #'LSST1':'/home/babyostrich/Documents/Repos/StrongCNN/data/control_data/LSSTlikeModels/single/BCG/id_lensparam_test.txt',
    'LSST-best':'/home/babyostrich/Documents/Repos/StrongCNN/data/control_data/LSSTlikeModels/single/BCG/id_lensparam_test.txt',
    'nLSST-best':'/home/babyostrich/Documents/Repos/StrongCNN/data/control_data/LSSTlikeModels/single/noBCG/id_lensparam_test.txt',
    'LSST10':'/home/babyostrich/Documents/Repos/StrongCNN/data/control_data/LSSTlikeModels/stack/BCG/id_lensparam_test.txt',
    'nLSST10':'/home/babyostrich/Documents/Repos/StrongCNN/data/control_data/LSSTlikeModels/stack/noBCG/id_lensparam_test.txt',
    
}
lens_param_data = {}
for k, v in lens_param_files.iteritems() :
    lens_param_data[k] = pd.read_csv(v,delimiter=' ')

In [39]:
from matplotlib import cm

def create_colorbar(cmap, minimum, maximum) :
    # Using contourf to provide my colorbar info, then clearing the figure                                                                            
    Z = [[0,0],[0,0]]
    step=(maximum-minimum)/1000.
    levels = np.arange(minimum, maximum,step)
    CS3 = plt.contourf(Z, levels, cmap=cmap)
    plt.clf()
    return CS3

cmap = plt.get_cmap('hot_r')

def get_color(color_array) :
    normalized_color_array = (color_array - color_array.min() ) / (color_array.max()-color_array.min())
    return normalized_color_array

def plot_lens_param(data_key, xaxis_label, color_label,yaxis_label='avgscores',class_label=1,make_colorbar=True) :
    lens_param_subdata = lens_param_data[data_key][lens_param_data[data_key]['label'] == class_label ]
    xdata = lens_param_subdata[xaxis_label]
    ydata = lens_param_subdata[yaxis_label]
    cdata = lens_param_subdata[color_label]
    CS3 = create_colorbar(cmap, cdata.min(), cdata.max())
    if make_colorbar :
        plt.scatter( xdata, ydata,
                    color = cmap(get_color(cdata)),
                            marker='*',cmap=cmap)
    else :
        plt.scatter( xdata, ydata,
                    color = 'b',
                            marker='*')
    plt.xlabel(xaxis_label.replace('_',' '),fontsize='xx-large')
    plt.ylabel('Score',fontsize='xx-large')
    plt.ylim(0,1.1)
    print xdata.max()
    plt.xlim(xdata.min(),xdata.max()+(xdata.max()-xdata.min())*.1)
    #plt.gca().yaxis.set_major_formatter(ticker.FuncFormatter(myLogFormat))
    if make_colorbar : plt.colorbar(CS3)
    plt.text(0.7,0.1,data_key, ha='left', va='center', transform=plt.gca().transAxes,fontsize='xx-large')
    plt.grid()
    plt.savefig('paper_figures/lens_parameter_dependence/'+data_key+'_'+xaxis_label+'_'+color_label+'.pdf')

In [43]:
plot_lens_param('LSST10','Magnification','z',class_label=1,make_colorbar=False)
plot_lens_param('LSST-best','Magnification','z',class_label=1,make_colorbar=False)
plot_lens_param('HST','Magnification','z',class_label=1,make_colorbar=False)


30.8120063191
30.8120063191
30.8120063191

In [60]:
from calculate_ThetaE import calculate_ThetaE
def lens_param_add_ThetaE(data_key) :
    lens_param_data[data_key]['Einstein_Radius'] = calculate_ThetaE(
        lens_param_data[data_key]['Velocity_Dispersion'],
        lens_param_data[data_key]['z'],
        2.0
        )

In [62]:
for k, v in lens_param_data.iteritems() : lens_param_add_ThetaE(k)

In [63]:
plot_lens_param('HST','Einstein_Radius','z',class_label=1,make_colorbar=False)


2.6122412035

In [64]:
plot_lens_param('LSST-best','Einstein_Radius','z',class_label=1,make_colorbar=False)


2.6122412035

In [65]:
plot_lens_param('LSST10','Einstein_Radius','z',class_label=1,make_colorbar=False)


2.6122412035

In [ ]: