In [ ]:
import sys
import glob
import re
import fnmatch
import math
import os
from os import listdir
from os.path import join, isfile, basename
import itertools
import numpy as np
from numpy import float32, int32, uint8, dtype, genfromtxt
import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt
import scipy
from scipy.stats import ttest_ind
import pandas as pd
import colorsys
import template_common as tc
In [ ]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))
In [ ]:
# base_dir='/nrs/saalfeld/john/projects/flyChemStainAtlas/all_evals/distanceStatsNorm'
base_dir='/nrs/saalfeld/john/projects/flyChemStainAtlas/all_evals/distanceStatsWarpNorm'
dest_dir = '/groups/saalfeld/home/bogovicj/pubDrafts/grpDrosTemplate/grpDrosTemplate/tables'
fig_dir = '/groups/saalfeld/home/bogovicj/pubDrafts/grpDrosTemplate/grpDrosTemplate/figs'
alg_list = ['antsRegDog', 'antsRegOwl', 'antsRegYang', 'cmtkCOG', 'cmtkCow', 'cmtkHideo']
# template_list = [ 'JFRC2013_lo', 'JFRCtemplate2010', 'TeforBrain_f', 'F-antsFlip_lo', 'F-cmtkFlip_lof', 'FCWB']
template_list = [ 'JFRC2013_lo', 'JFRCtemplate2010', 'TeforBrain_f', 'F-antsFlip_lo', 'FCWB']
# alg_name_map = { 'antsRegDog': 'ANTs A', 'antsRegOwl' : 'ANTs B', 'antsRegYang' : 'ANTs C',
# 'cmtkCOG' : 'CMTK A', 'cmtkHideo' : 'CMTK B', 'cmtkCow' : 'CMTK C'}
# template_name_map = { 'JFRC2013_lo':'JFRC2013', 'JFRCtemplate2010' : 'JFRC2010',
# 'TeforBrain_f' : 'Tefor', 'F-antsFlip_lo' : 'JRC2018',
# 'F-cmtkFlip_lof' : 'CMTK groupwise', 'FCWB':'FCWB' }
In [ ]:
labels = [16,64,8,32,2,4,65,66,33,67,34,17,69,70,35,71,9,18,72,36,73,74,37,75,19,76,38,77,39,78,79,20,5,40,80,10,81,82,83,84,85,86,11,22,23,24,12,3,6,49,50,25,51,13,52,26,53,27,54,55,56,28,7,14,57,58,29,59,30,60,15,61,31,62,63]
label_names_file = '/groups/saalfeld/home/bogovicj/vfb/DrosAdultBRAINdomains/refData/Original_Index.tsv'
label_names = pd.read_csv( label_names_file, delimiter='\t', header=0 )
# print label_names[ label_names['Stack id'] == 11 ]['JFRCtempate2010.mask130819' ].iloc[0]
# print label_names[ label_names['Stack id'] == 70 ]['JFRCtempate2010.mask130819' ].iloc[0]
def get_label_name( label_id ):
return label_names[ label_names['Stack id'] == label_id ]['JFRCtempate2010.mask130819' ].iloc[0]
def clean_cols( df ):
## clean up the weird columns
df.columns = [ c[0] if c[1].startswith('Unnamed') else c[1] for c in df.columns.values ]
def flatten_heir_cols( df ):
## flatten heirarchical columns
df.columns = [ '_'.join(c) for c in df.columns.values ]
In [ ]:
# Load everything
# os.listdir( base_dir )
df = None
# for tmp in template_list:
for f in glob.glob( ''.join([base_dir,'/*.csv']) ):
# f = glob.glob( ''.join([base_dir,'/',tmp,'.csv']) )
print( f )
this_df = pd.read_csv( f, header=[0,1], index_col=0 )
if df is None:
df = this_df
else:
df = df.append( this_df )
clean_cols( df )
df['std'] = df.apply( lambda x: math.sqrt(x['var']), axis=1)
df['gam_std'] = df.apply( lambda x: math.sqrt(x['gam_var']), axis=1)
df['ray_std'] = df.apply( lambda x: math.sqrt(x['ray_var']), axis=1)
df.reset_index( drop=True )
In [ ]:
# with open( '/'.join([dest_dir, 'total_stat_table_raw.tex' ]), 'w') as f:
# f.write( df.to_latex())
In [ ]:
# df_mn_grp = df.loc[ df.LABEL == -1, mean_cols ].groupby(['TEMPLATE','ALG'],as_index=True)
# keep_cols = ['TEMPLATE','ALG','mean','ray_mean','gam_mean', 'std', 'var', 'ray_var', 'gam_var']
keep_cols = ['TEMPLATE','ALG','mean','ray_mean','gam_mean', 'std', 'ray_std', 'gam_std', 'gam_mode','median']
# print( df_mn_grp )
df_label = df.loc[ df.LABEL == -1, keep_cols ]
# print( df_mn_grp.to_latex())
df_label['ray_mn_diff'] = df_label.apply( lambda x: x['mean'] - x['ray_mean'], axis=1)
df_label['gam_mn_diff'] = df_label.apply( lambda x: x['mean'] - x['gam_mean'], axis=1)
# df_label['ray_var_diff'] = df_label.apply( lambda x: x['var'] - x['ray_var'], axis=1)
# df_label['gam_var_diff'] = df_label.apply( lambda x: x['var'] - x['gam_var'], axis=1)
df_label['ray_std_diff'] = df_label.apply( lambda x: x['std'] - x['ray_std'], axis=1)
df_label['gam_std_diff'] = df_label.apply( lambda x: x['std'] - x['gam_std'], axis=1)
df_label
In [ ]:
# with open( '/'.join([dest_dir, 'stat_table_raw.tex' ]), 'w') as f:
# f.write( df.to_latex())
# df_label.loc[df_label.ALG != 'ALL',['TEMPLATE','ALG','mean','std','median','gam_mode']].sort_values( by=['mean'] )
df_label_sorted = df_label.loc[df_label.ALG != 'ALL',['TEMPLATE','ALG','mean','std','median']].sort_values( by=['mean'] )
# with open( '/'.join([dest_dir, 'stat_table_affineNorm_raw_20180524.tex' ]), 'w') as f:
# f.write( df_label_sorted.to_latex())
df_label_sorted
In [ ]:
df_label[df_label.ALG == 'ALL']
In [ ]:
# Table with the best algorithm ( by empirical mean )
## Hard to show which algorithm is the best this way
# best_alg = df_label.groupby(['TEMPLATE']).aggregate( {'mean' : 'min' })
# best_alg
df_mean_bestAlg = None
for template in df_label.TEMPLATE.unique():
minval = np.min( df_label.loc[ df_label.TEMPLATE == template, 'mean'])
df_t = df_label.loc[ (df_label.TEMPLATE == template) & (df_label.loc[:,'mean'] == minval ), :]
if df_mean_bestAlg is None:
df_mean_bestAlg = df_t
else:
df_mean_bestAlg = df_mean_bestAlg.append( df_t )
df_mean_bestAlg
In [ ]:
# merge relevant tables and write to tex
dest_file = 'gamMnStd_affineNorm_byTemplate_raw_20180524.tex'
mean_col_name = 'gam_mean'
std_col_name = 'gam_std'
cols = ['TEMPLATE', mean_col_name, std_col_name]
# stick with two decimals places
pd.options.display.float_format = '{:,.2f}'.format
allk = df_label.loc[ df_label.ALG == 'ALL', cols].set_index('TEMPLATE')
bestk = df_mean_bestAlg.loc[ :, cols].set_index('TEMPLATE')
dog = df_label.loc[ df_label.ALG == 'antsRegDog', cols ].set_index('TEMPLATE')
cog = df_label.loc[ df_label.ALG == 'cmtkCOG', cols ].set_index('TEMPLATE')
# print( allk )
tbl_mean = allk.join( bestk, lsuffix='_All', rsuffix='_Best' )
tbl_mean = tbl_mean.join( dog ) # suffixes do nothing because no overlapping columns
tbl_mean = tbl_mean.join( cog, lsuffix='_antsRegDog', rsuffix='_cmtkCOG')
# Merge mean and std columns into a string column with std in parens
def mnStd2String( x ):
return '{:0.2f} ({:0.2f})'.format( x['_'.join([mean_col_name ,s])], x['_'.join([std_col_name,s])])
# Make a new table
tbl_mean_format = pd.DataFrame() # Formatted version of the table
suffixlist = ['All', 'Best', 'antsRegDog', 'cmtkCOG']
for s in suffixlist:
tbl_mean_format[s] = tbl_mean.apply( mnStd2String, axis=1)
# with open( '/'.join([dest_dir, dest_file ]), 'w') as f:
# f.write( tbl_mean_format.to_latex())
In [ ]:
tbl_mean_format
In [ ]:
## Plot the gamma fits
def parse_gam_param_string( gam_param_string ):
return [ float(p) for p in gam_param_string.replace('(','').replace(')','').split(',') ]
gam_param_col = 'gam_params_fl'
x = np.linspace( 0, 25, 100 )
for template in template_list:
print( template )
gam_params_string = df.loc[ (df.TEMPLATE == template) & (df.ALG == 'antsRegDog') & (df.LABEL == -1), gam_param_col ][0]
gam_params = parse_gam_param_string( gam_params_string )
gam_fitted = scipy.stats.gamma.pdf( x, gam_params[0], loc=gam_params[1], scale=gam_params[2])
plt.plot( gam_fitted, label=template )
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)
plt.legend()
plt.rc('font', size=25 )
# plt.savefig( '/'.join([fig_dir,'gamDist_antsRegDog_byTemplate.svg']))
# plt.savefig( '/'.join([fig_dir,'gamDist_antsRegDog_byTemplate.pdf']))
In [ ]:
# jacstats_f = '/nrs/saalfeld/john/projects/flyChemStainAtlas/all_evals/jacobianStats/jacobian_stats_wSubs.csv'
jacstats_f = '/nrs/saalfeld/john/projects/flyChemStainAtlas/all_evals/jacobianStats/jacobian_stats_wSubs.csv'
jac_df = pd.read_csv( jacstats_f )
In [ ]:
# print( jac_df.head())
# print( ' ')
# print( jac_df.tail())
In [ ]:
# jacstd_all_df = jac_df[ jac_df.LABEL == -1 ].groupby( ['ALG','TEMPLATE'])#.pivot( columns='STAT', values='VALUE')
# jacstd_all_df = jac_df.loc[ jac_df.LABEL == -1, ['TEMPLATE','ALG','STAT','VALUE']]
jacstd_all_df = jac_df.loc[ (jac_df.LABEL == -1) & (jac_df.STAT=='STDDEV'), ['TEMPLATE','ALG','VALUE']]
jacstd_all_df.columns = ['TEMPLATE','ALG','JACSTD']
# jacstd_all_df['VAR'] = jacstd_all_df.apply( lambda x: x['JACSTD']*x['JACSTD'])
jacstd_all_df.head()
In [ ]:
# Assumes the counts (weights for each mean are identical )
def pooledMeanAndVariance( means, variances, counts=None ):
grand_mean = np.mean( means )
if counts is not None:
top = counts * (np.array( variances) + means)
return ( np.sum(top)/ np.sum(counts)) - grand_mean
else:
top = ( np.array(variances) + means )
return (np.sum(top) / len(means)) - grand_mean
means = [ 1.0, 6.0, 2.0 ]
variances = [ 2.0, 12.0, 4.0 ]
counts = [ 10, 10, 10 ]
print( pooledMeanAndVariance( means,variances ))
print( pooledMeanAndVariance( means, variances, counts ))
def addTAcol( df ):
df['TA'] = df.apply( lambda x: '{}:{}'.format( x['TEMPLATE'], x['ALG']), axis=1 )
In [ ]:
# tmp = jac_df.loc[ (jac_df.TEMPLATE == 'FCWB') & (jac_df.ALG == 'antsRegDog')]
# tmp_l = tmp.loc[ tmp.LABEL == 49 ]
# tmp_l
tmp = jacstd_all_df.loc[ (jacstd_all_df.TEMPLATE == 'FCWB') & (jacstd_all_df.ALG == 'antsRegDog')]
# tmp_l = tmp.loc[ tmp.LABEL == 49 ]
tmp
In [ ]:
labels_w_all = labels
labels_w_all += [-1]
# jacstd_all_df.groupby(['TEMPLATE']).mean()
jac_df_all = jac_df.loc[ (jac_df.LABEL == -1)]
# print( len(jac_df_all) )
# print( ' ' )
# jac_df_all.sample(10)
t_list = [] # templates
a_list = [] # alg
l_list = [] # labels
m_list = [] # means
s_list = [] # stddev
# template_list= ['FCWB']
for t in template_list:
for a in alg_list:
# print('alg ', a )
this_template = jac_df.loc[ (jac_df.TEMPLATE == t) & (jac_df.ALG == a)]
for label in labels_w_all:
# print( 'label', label )
if( label == -1 ):
dat = this_template
else:
dat = this_template.loc[ this_template.LABEL == label ]
# c = this_template.loc[ this_template.STAT =='COUNT']
m = dat.loc[ dat.STAT =='MEAN']
s = dat.loc[ dat.STAT =='STDDEV']
if( len(m) == 0 ):
t_list += [t]
a_list += [a]
l_list += [label]
m_list += [ float('nan') ]
s_list += [ float('nan') ]
continue
pooledVar = pooledMeanAndVariance( m.VALUE, s.VALUE * s.VALUE )
# print( 'pv ', pooledVar )
pooledStd = math.sqrt(pooledVar)
t_list += [t]
a_list += [a]
l_list += [label]
m_list += [ np.mean( m.VALUE )]
s_list += [ pooledStd ]
summary_df = pd.DataFrame( {
'TEMPLATE':t_list,
'ALG':a_list,
'LABEL':l_list,
'MEAN':m_list,
'STDDEV':s_list })
summary_df.head()
In [ ]:
summary_df_labels = summary_df.loc[ summary_df.LABEL == -1 ]
addTAcol( df_label )
addTAcol( summary_df_labels )
summary_df_labels.columns = ['ALG', 'LABEL', 'JACMEAN', 'JACSTDDEV', 'TEMPLATE', 'TA']
summary_df_labels.head()
In [ ]:
# print( len(df_label[df_label.ALG != 'ALL']) )
# print( len(summary_df_labels) )
# print( df_label)
# print( ' ' )
# print( summary_df_labels)
df_dist_jac = summary_df_labels.set_index('TA').join( df_label.set_index('TA'), lsuffix='_jac' )
In [ ]:
# summary_df_labels[ summary_df_labels.ALG == 'antsRegDog']
pd.options.display.float_format = '{:,.4f}'.format
jac_ants_tbl = summary_df_labels[ summary_df_labels.ALG == 'antsRegDog'].pivot(columns='TEMPLATE', values='JACSTDDEV')
jac_ants_tbl
In [ ]:
jac_summary_writeme = summary_df_labels[['TEMPLATE','ALG','JACMEAN','JACSTDDEV']]
# summary_df_labels[['ALG','TEMPLATE','JACMEAN','JACSTDDEV']].groupby(('TEMPLATE','ALG'),as_index=False)
# with open( '/'.join([dest_dir, 'supp_jacMnStd_raw.tex' ]), 'w') as f:
# f.write( jac_summary_writeme.to_latex() )
In [ ]:
pd.options.display.float_format = '{:,.2f}'.format
# print( template_list )
# template_indices = [ template_list.index(t) for t in df_dist_jac.loc[:,'TEMPLATE'] ]
# template_indices
template_color_map = { 'JFRC2010':'firebrick',
'JFRC2013':'navy',
'FCWB':'darkgreen',
'Tefor':'darkorchid',
'JRC2018':'black',
# 'CMTK groupwise':'gray'
'CMTK groupwise':'darkorange'
}
In [ ]:
marker_map = { 'antsRegDog':'o', 'antsRegOwl':'v', 'antsRegYang':'s',
'cmtkCOG':'+', 'cmtkCow':'d', 'cmtkHideo':'x' }
marker_map
marker_map['cmtkCOG']
# alg_list
for a in alg_list:
df_alg_subset = df_dist_jac.loc[df_dist_jac.ALG == a,:]
# colors=[ template_list.index(t) for t in df_alg_subset.loc[:,'TEMPLATE'] ]
colors=[ template_color_map[tc.template_name(t)] for t in df_alg_subset.loc[:,'TEMPLATE'] ]
# plt.scatter( df_alg_subset.loc[:,'JACSTDDEV'],
# df_alg_subset.loc[:,'mean'], c=colors, marker=marker_map[a] )
plt.scatter( df_alg_subset.loc[:,'JACSTDDEV'],
df_alg_subset.loc[:,'mean'], c=colors, marker='o' )
plt.xlabel('Jacobian determinant standard deviation', size=18)
plt.ylabel('Mean distance (um)', size=18)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
ax = plt.gca()
for i,row in df_dist_jac.iterrows():
t=(row['TEMPLATE']).lstrip(' ')
s = " " + tc.template_name(t) + " : " + tc.alg_name(row['ALG'].lstrip(' '))
ax.annotate( s, (row['JACSTDDEV'], row['mean'] ), size=13, color=template_color_map[tc.template_name(t)] )
plt.grid( which='major', linestyle=':', dashes=(3,3))
fig = plt.gcf()
a = fig.set_size_inches( 16, 10 )
# plt.savefig( '/groups/saalfeld/home/bogovicj/pubDrafts/grpDrosTemplate/grpDrosTemplate/figs/distAffineNorm_vs_jacstd_colors_20180531.svg')
# plt.savefig( '/groups/saalfeld/home/bogovicj/pubDrafts/grpDrosTemplate/grpDrosTemplate/figs/distWarpNorm_vs_jacstd_colors_20180531.svg')