In [ ]:
import sys
import glob
import re
import fnmatch
import math
import re
import os
from os import listdir
from os.path import join, isfile, basename
import itertools
import numpy as np
from numpy import float32, int32, uint8, dtype, genfromtxt
from scipy.stats import ttest_ind
import pandas as pd
import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator, LogLocator, FormatStrFormatter
import seaborn as sns
import colorsys
In [ ]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))
In [ ]:
# Meta parameters
# leave empty for raw stats,
# set to 'log' for log stats
islog=''
# what label to use (-1 combines all labels)
label=-1
# make a string from the label
labelstr = 'all' if( label < 0 ) else str(label )
print( labelstr )
In [ ]:
# Load the table
table_f='/nrs/saalfeld/john/projects/flyChemStainAtlas/all_evals/jacobianStats/jacobian_{}stats_wSubs.csv'.format( islog )
df_raw = pd.read_csv( table_f )
In [ ]:
df_raw['TEMPLATEALG'] = df_raw.apply( lambda x : str(x['TEMPLATE']+' '+x['ALG']), axis=1 )
In [ ]:
df_raw.tail()
In [ ]:
# df_raw[['TEMPLATEALG','STAT','VALUE']]
# dfp = df_raw[['TEMPLATEALG','STAT','VALUE']].pivot( index='TEMPLATEALG', columns='STAT', values='VALUE' )
# dfp
# Mean of means
dfm = df_raw[ (df_raw.STAT == 'MEAN') & (df_raw.LABEL == label) ].drop(columns=['LINE','LABEL'])
dfmg = dfm.groupby(['TEMPLATEALG'], as_index=False ).mean()
dfmg.columns = ['TEMPLATEALG','MEAN']
print(dfmg.head())
print( ' ' )
# Mean of variances
# ( average of stdds is sqrt( sd_1^2 + sd_2^2 + ... + sd_k^2 ))
dfv = df_raw[ (df_raw.STAT == 'STDDEV') & (df_raw.LABEL == label) ].drop(columns=['LINE','LABEL','STAT'])
dfv['VARIANCE'] = dfv.apply( lambda x : x['VALUE']*x['VALUE'], axis=1)
dfvg = dfv.groupby(['TEMPLATEALG'], as_index=False ).sum()
# Can drop the value ( formerly stdev )
# new stdev is the sqrt of sum of variances
dfvg['STDDEV'] = dfvg.apply( lambda x : math.sqrt( x['VARIANCE'] ), axis=1 )
dfvg = dfvg.drop( columns=['VALUE'])
print( dfvg.head())
In [ ]:
# Generate additional stats
## RANGE = MAX - MIN
dfr = df_raw[ (df_raw.LABEL == label) & ((df_raw.STAT == 'MIN') | (df_raw.STAT == 'MAX')) ]
df_rangep = pd.pivot_table( dfr, values='VALUE', columns=['STAT'],
index=['TEMPLATEALG','SUBJECT'])
df_rangeMeans = df_rangep.groupby(['TEMPLATEALG'] ).mean().reset_index()
df_rangeMeans['RANGE'] = df_rangeMeans.apply( lambda x: x['MAX'] - x['MIN'], axis=1 )
In [ ]:
# Combine these two tables
# df_meanStd = dfmg.set_index('TEMPLATEALG').join( dfvg.set_index('TEMPLATEALG') )
df_meanStd = dfmg.join( dfvg, lsuffix='', rsuffix='_S' )
# also join the min/max/range table
df_meanStdRange = df_meanStd.join( df_rangeMeans, lsuffix='', rsuffix='_R' )
# add template and alg back in
df_rangeMeans['TEMPLATE'] = df_rangeMeans.apply( lambda x: x['TEMPLATEALG'].split(' ')[0], axis=1 )
df_rangeMeans['ALG'] = df_rangeMeans.apply( lambda x: x['TEMPLATEALG'].split(' ')[1], axis=1 )
# df_rangeMeans
In [ ]:
# Plot mean agains standard deviation
plt.scatter( df_meanStd.MEAN, df_meanStd.STDDEV )
plt.xlabel('Mean-mean jacobian determinant')
plt.ylabel('Mean-stddev jacobian determinant ')
ax = plt.gca()
for i,row in df_meanStd.iterrows():
s = " " + row['TEMPLATEALG_M']
ax.annotate( s, (row['MEAN'],row['STDDEV']))
fig = plt.gcf()
a = fig.set_size_inches( 16, 10 )
In [ ]:
# Plot ranges grouping by template
plotme = df_rangeMeans[['TEMPLATE','ALG','RANGE']]
sns.barplot( data=plotme, x='ALG', y='RANGE', hue='TEMPLATE' )
fig = plt.gcf()
a = fig.set_size_inches( 16, 10 )
In [ ]:
# Plot ranges grouping by algorithm
sns.barplot( data=plotme, x='TEMPLATE', y='RANGE', hue='ALG' )
fig = plt.gcf()
a = fig.set_size_inches( 16, 10 )
In [ ]: