notebook.community

Edit and run



In [ ]:

    
import sys
import glob
import re
import fnmatch
import math
import re
import os
from os import listdir
from os.path import join, isfile, basename

import itertools

import numpy as np
from numpy import float32, int32, uint8, dtype, genfromtxt

from scipy.stats import ttest_ind

import pandas as pd

import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator, LogLocator, FormatStrFormatter

import seaborn as sns

import colorsys



In [ ]:

    
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))



In [ ]:

    
# Meta parameters


# leave empty for raw stats, 
#   set to 'log' for log stats 
islog=''

# what label to use (-1 combines all labels)
label=-1

# make a string from the label
labelstr = 'all' if( label < 0 ) else str(label )
print( labelstr )



In [ ]:

    
# Load the table
table_f='/nrs/saalfeld/john/projects/flyChemStainAtlas/all_evals/jacobianStats/jacobian_{}stats_wSubs.csv'.format( islog )
df_raw = pd.read_csv( table_f )



In [ ]:

    
df_raw['TEMPLATEALG'] = df_raw.apply( lambda x : str(x['TEMPLATE']+' '+x['ALG']), axis=1 )



In [ ]:

    
df_raw.tail()



In [ ]:

    
# df_raw[['TEMPLATEALG','STAT','VALUE']]
# dfp = df_raw[['TEMPLATEALG','STAT','VALUE']].pivot( index='TEMPLATEALG', columns='STAT', values='VALUE' )
# dfp

# Mean of means
dfm = df_raw[ (df_raw.STAT == 'MEAN') & (df_raw.LABEL == label) ].drop(columns=['LINE','LABEL'])
dfmg = dfm.groupby(['TEMPLATEALG'], as_index=False ).mean()
dfmg.columns = ['TEMPLATEALG','MEAN']
print(dfmg.head())
print( ' ' )

# Mean of variances
# ( average of stdds is  sqrt( sd_1^2 + sd_2^2 + ... + sd_k^2 ))
dfv = df_raw[ (df_raw.STAT == 'STDDEV') & (df_raw.LABEL == label) ].drop(columns=['LINE','LABEL','STAT'])
dfv['VARIANCE'] = dfv.apply( lambda x : x['VALUE']*x['VALUE'], axis=1)
dfvg = dfv.groupby(['TEMPLATEALG'], as_index=False ).sum()

# Can drop the value ( formerly stdev )
# new stdev is the sqrt of sum of variances

dfvg['STDDEV'] = dfvg.apply( lambda x : math.sqrt( x['VARIANCE'] ), axis=1 )
dfvg = dfvg.drop( columns=['VALUE'])
print( dfvg.head())



In [ ]:

    
# Generate additional stats

## RANGE = MAX - MIN
dfr = df_raw[ (df_raw.LABEL == label) & ((df_raw.STAT == 'MIN') | (df_raw.STAT == 'MAX')) ]

df_rangep = pd.pivot_table( dfr, values='VALUE', columns=['STAT'], 
                   index=['TEMPLATEALG','SUBJECT'])

df_rangeMeans = df_rangep.groupby(['TEMPLATEALG'] ).mean().reset_index()
df_rangeMeans['RANGE'] = df_rangeMeans.apply( lambda x: x['MAX'] - x['MIN'], axis=1 )



In [ ]:

    
# Combine these two tables
# df_meanStd = dfmg.set_index('TEMPLATEALG').join( dfvg.set_index('TEMPLATEALG') )
df_meanStd = dfmg.join( dfvg, lsuffix='', rsuffix='_S' )

# also join the min/max/range table
df_meanStdRange = df_meanStd.join( df_rangeMeans, lsuffix='', rsuffix='_R' )

# add template and alg back in
df_rangeMeans['TEMPLATE'] = df_rangeMeans.apply( lambda x: x['TEMPLATEALG'].split(' ')[0], axis=1 )
df_rangeMeans['ALG'] = df_rangeMeans.apply( lambda x: x['TEMPLATEALG'].split(' ')[1], axis=1 )
# df_rangeMeans



In [ ]:

    
# Plot mean agains standard deviation

plt.scatter( df_meanStd.MEAN, df_meanStd.STDDEV )
plt.xlabel('Mean-mean jacobian determinant')
plt.ylabel('Mean-stddev jacobian determinant ')

ax = plt.gca()
for i,row in df_meanStd.iterrows():
    s = "   " + row['TEMPLATEALG_M']
    ax.annotate( s, (row['MEAN'],row['STDDEV']))

fig = plt.gcf()
a = fig.set_size_inches( 16, 10 )



In [ ]:

    
# Plot ranges grouping by template

plotme = df_rangeMeans[['TEMPLATE','ALG','RANGE']]

sns.barplot( data=plotme, x='ALG', y='RANGE', hue='TEMPLATE' )
fig = plt.gcf()
a = fig.set_size_inches( 16, 10 )



In [ ]:

    
# Plot ranges grouping by algorithm


sns.barplot( data=plotme, x='TEMPLATE', y='RANGE', hue='ALG' )
fig = plt.gcf()
a = fig.set_size_inches( 16, 10 )



In [ ]: