In [ ]:
import sys
import glob
import re
import fnmatch
import math
import os
from os import listdir
from os.path import join, isfile, basename

import itertools

import numpy as np
from numpy import float32, int32, uint8, dtype, genfromtxt

import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt

import scipy
from scipy.stats import ttest_ind

import pandas as pd

import colorsys

import template_common as tc

In [ ]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [ ]:
# base_dir='/nrs/saalfeld/john/projects/flyChemStainAtlas/all_evals/distanceStatsNorm'
base_dir='/nrs/saalfeld/john/projects/flyChemStainAtlas/all_evals/distanceStatsWarpNorm'

dest_dir = '/groups/saalfeld/home/bogovicj/pubDrafts/grpDrosTemplate/grpDrosTemplate/tables'
fig_dir = '/groups/saalfeld/home/bogovicj/pubDrafts/grpDrosTemplate/grpDrosTemplate/figs'


alg_list = ['antsRegDog', 'antsRegOwl', 'antsRegYang', 'cmtkCOG', 'cmtkCow', 'cmtkHideo']
# template_list = [ 'JFRC2013_lo', 'JFRCtemplate2010', 'TeforBrain_f', 'F-antsFlip_lo', 'F-cmtkFlip_lof', 'FCWB']
template_list = [ 'JFRC2013_lo', 'JFRCtemplate2010', 'TeforBrain_f', 'F-antsFlip_lo', 'FCWB']


# alg_name_map = { 'antsRegDog': 'ANTs A', 'antsRegOwl' : 'ANTs B', 'antsRegYang' : 'ANTs C',
#                 'cmtkCOG' : 'CMTK A',  'cmtkHideo' : 'CMTK B', 'cmtkCow' : 'CMTK C'}

# template_name_map = { 'JFRC2013_lo':'JFRC2013', 'JFRCtemplate2010' : 'JFRC2010', 
#                      'TeforBrain_f' : 'Tefor', 'F-antsFlip_lo' : 'JRC2018', 
#                      'F-cmtkFlip_lof' : 'CMTK groupwise', 'FCWB':'FCWB' }

In [ ]:
labels = [16,64,8,32,2,4,65,66,33,67,34,17,69,70,35,71,9,18,72,36,73,74,37,75,19,76,38,77,39,78,79,20,5,40,80,10,81,82,83,84,85,86,11,22,23,24,12,3,6,49,50,25,51,13,52,26,53,27,54,55,56,28,7,14,57,58,29,59,30,60,15,61,31,62,63]
label_names_file = '/groups/saalfeld/home/bogovicj/vfb/DrosAdultBRAINdomains/refData/Original_Index.tsv'

label_names = pd.read_csv( label_names_file, delimiter='\t', header=0 )
# print label_names[ label_names['Stack id'] == 11 ]['JFRCtempate2010.mask130819' ].iloc[0]
# print label_names[ label_names['Stack id'] == 70 ]['JFRCtempate2010.mask130819' ].iloc[0]

def get_label_name( label_id ):
    return label_names[ label_names['Stack id'] == label_id ]['JFRCtempate2010.mask130819' ].iloc[0]

def clean_cols( df ):
    ## clean up the weird columns
    df.columns = [ c[0] if c[1].startswith('Unnamed') else c[1] for c in df.columns.values ]
    
def flatten_heir_cols( df ):
    ## flatten heirarchical columns
    df.columns = [ '_'.join(c) for c in df.columns.values ]

In [ ]:
# Load everything
# os.listdir( base_dir )

df = None
# for tmp in template_list:
for f in glob.glob( ''.join([base_dir,'/*.csv']) ):
#     f = glob.glob( ''.join([base_dir,'/',tmp,'.csv']) )
    print( f )
    this_df = pd.read_csv( f, header=[0,1], index_col=0 )
    if df is None:
        df = this_df
    else:
        df = df.append( this_df )

clean_cols( df )
df['std'] = df.apply( lambda x: math.sqrt(x['var']), axis=1)
df['gam_std'] = df.apply( lambda x: math.sqrt(x['gam_var']), axis=1)
df['ray_std'] = df.apply( lambda x: math.sqrt(x['ray_var']), axis=1)
df.reset_index( drop=True )

In [ ]:
# with open( '/'.join([dest_dir, 'total_stat_table_raw.tex' ]), 'w') as f:
#     f.write( df.to_latex())

In [ ]:
# df_mn_grp = df.loc[ df.LABEL == -1, mean_cols ].groupby(['TEMPLATE','ALG'],as_index=True)

# keep_cols = ['TEMPLATE','ALG','mean','ray_mean','gam_mean', 'std', 'var', 'ray_var', 'gam_var']
keep_cols = ['TEMPLATE','ALG','mean','ray_mean','gam_mean', 'std', 'ray_std', 'gam_std', 'gam_mode','median']


# print( df_mn_grp )
df_label = df.loc[ df.LABEL == -1, keep_cols ]
# print( df_mn_grp.to_latex())

df_label['ray_mn_diff'] = df_label.apply( lambda x: x['mean'] - x['ray_mean'], axis=1)
df_label['gam_mn_diff'] = df_label.apply( lambda x: x['mean'] - x['gam_mean'], axis=1)
# df_label['ray_var_diff'] = df_label.apply( lambda x: x['var'] - x['ray_var'], axis=1)
# df_label['gam_var_diff'] = df_label.apply( lambda x: x['var'] - x['gam_var'], axis=1)
df_label['ray_std_diff'] = df_label.apply( lambda x: x['std'] - x['ray_std'], axis=1)
df_label['gam_std_diff'] = df_label.apply( lambda x: x['std'] - x['gam_std'], axis=1)
df_label

In [ ]:
# with open( '/'.join([dest_dir, 'stat_table_raw.tex' ]), 'w') as f:
#     f.write( df.to_latex())

# df_label.loc[df_label.ALG != 'ALL',['TEMPLATE','ALG','mean','std','median','gam_mode']].sort_values( by=['mean'] )
df_label_sorted = df_label.loc[df_label.ALG != 'ALL',['TEMPLATE','ALG','mean','std','median']].sort_values( by=['mean'] )

# with open( '/'.join([dest_dir, 'stat_table_affineNorm_raw_20180524.tex' ]), 'w') as f:
#     f.write( df_label_sorted.to_latex())

df_label_sorted

In [ ]:
df_label[df_label.ALG == 'ALL']

In [ ]:
# Table with the best algorithm ( by empirical mean )

## Hard to show which algorithm is the best this way
# best_alg = df_label.groupby(['TEMPLATE']).aggregate( {'mean' : 'min' })
# best_alg

df_mean_bestAlg = None
for template in df_label.TEMPLATE.unique():
    minval = np.min( df_label.loc[ df_label.TEMPLATE == template, 'mean'])
    df_t = df_label.loc[ (df_label.TEMPLATE == template) & (df_label.loc[:,'mean'] == minval ), :]
    
    if df_mean_bestAlg is None:
        df_mean_bestAlg = df_t
    else:
        df_mean_bestAlg = df_mean_bestAlg.append( df_t )
        
df_mean_bestAlg

In [ ]:
# merge relevant tables and write to tex

dest_file = 'gamMnStd_affineNorm_byTemplate_raw_20180524.tex'
mean_col_name = 'gam_mean'
std_col_name = 'gam_std'
cols = ['TEMPLATE', mean_col_name, std_col_name]

# stick with two decimals places
pd.options.display.float_format = '{:,.2f}'.format

allk = df_label.loc[ df_label.ALG == 'ALL', cols].set_index('TEMPLATE')
bestk = df_mean_bestAlg.loc[ :, cols].set_index('TEMPLATE')
dog = df_label.loc[ df_label.ALG == 'antsRegDog', cols ].set_index('TEMPLATE')
cog = df_label.loc[ df_label.ALG == 'cmtkCOG', cols ].set_index('TEMPLATE')

# print( allk )
    
tbl_mean = allk.join( bestk, lsuffix='_All', rsuffix='_Best' )
tbl_mean = tbl_mean.join( dog ) # suffixes do nothing because no overlapping columns
tbl_mean = tbl_mean.join( cog, lsuffix='_antsRegDog', rsuffix='_cmtkCOG')

# Merge mean and std columns into a string column with std in parens
def mnStd2String( x ):
    return '{:0.2f} ({:0.2f})'.format( x['_'.join([mean_col_name ,s])], x['_'.join([std_col_name,s])])

# Make a new table
tbl_mean_format = pd.DataFrame() # Formatted version of the table
suffixlist = ['All', 'Best', 'antsRegDog', 'cmtkCOG']
for s in suffixlist:
    tbl_mean_format[s] = tbl_mean.apply( mnStd2String, axis=1)

# with open( '/'.join([dest_dir, dest_file ]), 'w') as f:
#     f.write( tbl_mean_format.to_latex())

In [ ]:
tbl_mean_format

In [ ]:
## Plot the gamma fits

def parse_gam_param_string( gam_param_string ):
    return [ float(p) for p in gam_param_string.replace('(','').replace(')','').split(',') ]

gam_param_col = 'gam_params_fl'
x = np.linspace( 0, 25, 100 )

for template in template_list:
    print( template )
    
    gam_params_string = df.loc[ (df.TEMPLATE == template) & (df.ALG == 'antsRegDog') & (df.LABEL == -1), gam_param_col ][0]
    gam_params = parse_gam_param_string( gam_params_string )
    gam_fitted = scipy.stats.gamma.pdf( x, gam_params[0], loc=gam_params[1], scale=gam_params[2])
    
    plt.plot( gam_fitted, label=template )


fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)
plt.legend()
plt.rc('font', size=25 )
# plt.savefig( '/'.join([fig_dir,'gamDist_antsRegDog_byTemplate.svg']))
# plt.savefig( '/'.join([fig_dir,'gamDist_antsRegDog_byTemplate.pdf']))

In [ ]:
# jacstats_f = '/nrs/saalfeld/john/projects/flyChemStainAtlas/all_evals/jacobianStats/jacobian_stats_wSubs.csv'
jacstats_f = '/nrs/saalfeld/john/projects/flyChemStainAtlas/all_evals/jacobianStats/jacobian_stats_wSubs.csv'
jac_df = pd.read_csv( jacstats_f )

In [ ]:
# print( jac_df.head())
# print( ' ')
# print( jac_df.tail())

In [ ]:
# jacstd_all_df = jac_df[ jac_df.LABEL == -1 ].groupby( ['ALG','TEMPLATE'])#.pivot( columns='STAT', values='VALUE')
# jacstd_all_df = jac_df.loc[ jac_df.LABEL == -1, ['TEMPLATE','ALG','STAT','VALUE']]
jacstd_all_df = jac_df.loc[ (jac_df.LABEL == -1) & (jac_df.STAT=='STDDEV'), ['TEMPLATE','ALG','VALUE']]

jacstd_all_df.columns = ['TEMPLATE','ALG','JACSTD']
# jacstd_all_df['VAR'] = jacstd_all_df.apply( lambda x: x['JACSTD']*x['JACSTD'])
jacstd_all_df.head()

In [ ]:
# Assumes the counts (weights for each mean are identical )
def pooledMeanAndVariance( means, variances, counts=None ):
    grand_mean = np.mean( means )
    if counts is not None:
        top = counts * (np.array( variances) + means)
        return  ( np.sum(top)/ np.sum(counts)) - grand_mean
    else:
        top = ( np.array(variances) + means )
        return  (np.sum(top) / len(means)) - grand_mean

means =  [ 1.0, 6.0, 2.0 ]
variances =  [ 2.0, 12.0, 4.0 ]
counts = [ 10, 10, 10 ]

print( pooledMeanAndVariance( means,variances ))
print( pooledMeanAndVariance( means, variances, counts ))

def addTAcol( df ):
    df['TA'] = df.apply( lambda x: '{}:{}'.format( x['TEMPLATE'], x['ALG']), axis=1 )

In [ ]:
# tmp =  jac_df.loc[ (jac_df.TEMPLATE == 'FCWB') & (jac_df.ALG == 'antsRegDog')]
# tmp_l = tmp.loc[ tmp.LABEL == 49 ]
# tmp_l

tmp =  jacstd_all_df.loc[ (jacstd_all_df.TEMPLATE == 'FCWB') & (jacstd_all_df.ALG == 'antsRegDog')]
# tmp_l = tmp.loc[ tmp.LABEL == 49 ]
tmp

In [ ]:
labels_w_all = labels
labels_w_all += [-1]

# jacstd_all_df.groupby(['TEMPLATE']).mean()
jac_df_all = jac_df.loc[ (jac_df.LABEL == -1)]

# print( len(jac_df_all) )
# print( ' ' )

# jac_df_all.sample(10)

t_list = [] # templates
a_list = [] # alg
l_list = [] # labels
m_list = [] # means
s_list = [] # stddev

# template_list= ['FCWB']
for t in template_list:
    for a in alg_list:
#         print('alg ', a )
        this_template =  jac_df.loc[ (jac_df.TEMPLATE == t) & (jac_df.ALG == a)]
        
        for label in labels_w_all:
#             print( 'label', label )
            
            if( label == -1 ):
                dat = this_template
            else:
                dat = this_template.loc[ this_template.LABEL == label ]

#             c = this_template.loc[ this_template.STAT =='COUNT']
            m = dat.loc[ dat.STAT =='MEAN']
            s = dat.loc[ dat.STAT =='STDDEV']

            if( len(m) == 0 ):
                t_list += [t]
                a_list += [a]
                l_list += [label]
                m_list += [ float('nan') ]
                s_list += [ float('nan') ]
                continue
                
            pooledVar = pooledMeanAndVariance( m.VALUE, s.VALUE * s.VALUE )
#             print( 'pv ', pooledVar )
            pooledStd = math.sqrt(pooledVar)
            
            t_list += [t]
            a_list += [a]
            l_list += [label]
            m_list += [ np.mean( m.VALUE )]
            s_list += [ pooledStd ]
            

summary_df = pd.DataFrame( {
    'TEMPLATE':t_list,
    'ALG':a_list,
    'LABEL':l_list,
    'MEAN':m_list,
    'STDDEV':s_list })

summary_df.head()

In [ ]:
summary_df_labels = summary_df.loc[ summary_df.LABEL == -1 ]

addTAcol( df_label )
addTAcol( summary_df_labels )

summary_df_labels.columns = ['ALG', 'LABEL', 'JACMEAN', 'JACSTDDEV', 'TEMPLATE', 'TA']

summary_df_labels.head()

In [ ]:
# print( len(df_label[df_label.ALG != 'ALL']) )
# print( len(summary_df_labels) )

# print( df_label)
# print( ' ' )
# print( summary_df_labels)


df_dist_jac = summary_df_labels.set_index('TA').join( df_label.set_index('TA'), lsuffix='_jac' )

In [ ]:
# summary_df_labels[ summary_df_labels.ALG == 'antsRegDog']
pd.options.display.float_format = '{:,.4f}'.format
jac_ants_tbl = summary_df_labels[ summary_df_labels.ALG == 'antsRegDog'].pivot(columns='TEMPLATE', values='JACSTDDEV')
jac_ants_tbl

In [ ]:
jac_summary_writeme = summary_df_labels[['TEMPLATE','ALG','JACMEAN','JACSTDDEV']]
# summary_df_labels[['ALG','TEMPLATE','JACMEAN','JACSTDDEV']].groupby(('TEMPLATE','ALG'),as_index=False)

# with open( '/'.join([dest_dir, 'supp_jacMnStd_raw.tex' ]), 'w') as f:
#     f.write( jac_summary_writeme.to_latex() )

In [ ]:
pd.options.display.float_format = '{:,.2f}'.format
# print( template_list )

# template_indices = [ template_list.index(t) for t in df_dist_jac.loc[:,'TEMPLATE'] ]
# template_indices

template_color_map = { 'JFRC2010':'firebrick',
                       'JFRC2013':'navy',
                       'FCWB':'darkgreen',
                       'Tefor':'darkorchid',
                       'JRC2018':'black',
#                        'CMTK groupwise':'gray'
                       'CMTK groupwise':'darkorange'
                     }

In [ ]:
marker_map = { 'antsRegDog':'o', 'antsRegOwl':'v', 'antsRegYang':'s',
              'cmtkCOG':'+', 'cmtkCow':'d', 'cmtkHideo':'x' }
marker_map
marker_map['cmtkCOG']
# alg_list

for a in alg_list:
    df_alg_subset =  df_dist_jac.loc[df_dist_jac.ALG == a,:]
    
#     colors=[ template_list.index(t) for t in df_alg_subset.loc[:,'TEMPLATE'] ]
    colors=[ template_color_map[tc.template_name(t)] for t in df_alg_subset.loc[:,'TEMPLATE'] ]

#     plt.scatter( df_alg_subset.loc[:,'JACSTDDEV'], 
#                  df_alg_subset.loc[:,'mean'], c=colors, marker=marker_map[a] )

    plt.scatter( df_alg_subset.loc[:,'JACSTDDEV'], 
                 df_alg_subset.loc[:,'mean'], c=colors, marker='o' )
    
plt.xlabel('Jacobian determinant standard deviation', size=18)
plt.ylabel('Mean distance (um)', size=18)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)

ax = plt.gca()
for i,row in df_dist_jac.iterrows():
    t=(row['TEMPLATE']).lstrip(' ')
    s = "   " + tc.template_name(t) + " : " + tc.alg_name(row['ALG'].lstrip(' '))
    ax.annotate( s, (row['JACSTDDEV'], row['mean'] ), size=13, color=template_color_map[tc.template_name(t)] )

plt.grid( which='major', linestyle=':', dashes=(3,3))

fig = plt.gcf()
a = fig.set_size_inches( 16, 10 )

# plt.savefig( '/groups/saalfeld/home/bogovicj/pubDrafts/grpDrosTemplate/grpDrosTemplate/figs/distAffineNorm_vs_jacstd_colors_20180531.svg')
# plt.savefig( '/groups/saalfeld/home/bogovicj/pubDrafts/grpDrosTemplate/grpDrosTemplate/figs/distWarpNorm_vs_jacstd_colors_20180531.svg')