Tables and results describing h5 transform files
In [1]:
    
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))
    
    
In [2]:
    
bridge_list = ['JRC2018F_FAFB', 'JRC2018F_FCWB', 'JRC2018F_JFRC2010', 'JRC2018F_JFRC2013', 'JRC2018F_TEFOR']
    
In [3]:
    
sizes_data_f="/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/scripts/h5Analysis/h5Sizes.csv"
sizes_h5 = pd.read_csv( sizes_data_f )
sizes_h5
    
    Out[3]:
Here I'll tabulate sizes of various transformations out of ants These came from the transforms here:
/groups/saalfeld/public/jrc2018/transformations
Specifically, these subfolders / files:
JRC2018F_FAFB/*Warp.nii*jrc2018F-FCWB/*Warp.nii*jrc2018F-jfrc2010/*Warp.nii*jrc2018F-jfrc2013/*Warp.nii*JRC2018F_TEFOR/*Warp.nii*
In [4]:
    
ants_transform_sizes = [
    ('JRC2018F_FAFB_Warp.nii',2833742131),
    ('JRC2018F_FAFB_InverseWarp.nii',2832463797),
    ('JRC2018F_FCWB_Warp.nii',7893413462),
    ('JRC2018F_FCWB_InverseWarp.nii',7897010532),
    ('JRC2018F_JFRC2010_Warp.nii',8133116764),
    ('JRC2018F_JFRC2010_InverseWarp.nii',8143715252),
    ('JRC2018F_JFRC2013_Warp.nii',8002888343),
    ('JRC2018F_JFRC2013_InverseWarp.nii',8003178281),
    ('JRC2018F_TEFOR_Warp.nii',8127594570),
    ('JRC2018F_TEFOR_InverseWarp.nii',8136532225),
]
ants_df_raw = pd.DataFrame( ants_transform_sizes )
ants_df_raw.columns = [ 'file', 'size(bytes)']
    
In [5]:
    
def bridge_index( x, bridge_list ):
    for i, b in enumerate( bridge_list ):
        if x.startswith( b ):
            return i
ants_df_raw['bridge_idx'] = ants_df_raw.apply( lambda x: (bridge_index(x['file'], bridge_list )), axis=1)
    
In [6]:
    
# Build a dataframe containing the combined size of the compressed inverse and forward fields
ants_df_data = []
for i in range( len(bridge_list)):
    ants_df_data += [ (bridge_list[i], ants_df_raw[ants_df_raw.bridge_idx == i ]['size(bytes)'].sum()) ] 
ants_df = pd.DataFrame( ants_df_data )
ants_df.columns = [ 'file', 'size(bytes)']
    
In [7]:
    
# Combine h5 size and ants dfield size tables
billion=1e9
sizes_h5['file'] = sizes_h5.apply( lambda x: x['file'].replace('.h5',''), axis=1 )
df = sizes_h5.set_index('file').join(ants_df.set_index('file'), rsuffix='_ants')
# Compute relative size
df['ratio'] = df.apply( lambda x: x['size(bytes)']/x['size(bytes)_ants'], axis=1)
df['h5 Size (GB)'] = df.apply( lambda x: x['size(bytes)']/billion, axis=1)
df['Size (GB)'] = df.apply( lambda x: x['size(bytes)_ants']/billion, axis=1)
df
    
    Out[7]:
In [8]:
    
df.index.names = ['Transform']
df.columns= ['size(bytes)', 'size(bytes)_ants', 'Size Ratio', 'h5 Size (Gb)', 'Size (Gb)']
df_toWrite = df[[ 'Size (Gb)', 'h5 Size (Gb)','Size Ratio']]
h5size_table_f = 'h5SizeTable_%s.tex'%(datetime.date.today().strftime('%Y%m%d'))
# Should I write
if( False ):
    print( 'writing : ', h5size_table_f )
    with open( h5size_table_f, 'w') as f:
        f.write( df_toWrite.to_latex())
    
    
In [9]:
    
factors_by_level = { 0:1, 1:2, 2:4 }
err_data_f="/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/examples/errTableLevels.csv"
err_df = pd.read_csv( err_data_f )
# make column for downsampling factor
err_df['downsample factor'] = err_df.apply( lambda x: factors_by_level[ x['level']], axis=1 )
    
In [10]:
    
h5err_table_f = 'h5QuantErrorTable_%s.tex'%(datetime.date.today().strftime('%Y%m%d'))
err_toWrite = err_df[['xfm', 'downsample factor', 'avg','max']]
err_toWrite.set_index(['xfm','downsample factor'], inplace=True)
pd.options.display.float_format = '{:,.3f}'.format
# Should I write
if( False ):
    print( 'writing : ', h5err_table_f )
    with open( h5err_table_f, 'w') as f:
        f.write( err_toWrite.to_latex())
    
    
In [104]:
    
h5IndivTransformF = "/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/scripts/h5Analysis/h5timesIndv.csv"
antsIndivTransformF = "/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/scripts/h5Analysis/antstimesIndv.csv"
h5SkelTimes = pd.read_csv( h5IndivTransformF )
antsSkelTimes = pd.read_csv( antsIndivTransformF )
print( 'ants mean skel time (ms): ', antsSkelTimes['time'].mean() )
print( 'ants std skel time (ms): ', antsSkelTimes['time'].std() )
print( 'h5 mean skel time (ms): ', h5SkelTimes['time'].mean() )
print( 'h5 std skel time (ms): ', h5SkelTimes['time'].std() )
print( 'num skels: ')
print( '  ')
print( 'ants mean skel time (s): ', (antsSkelTimes['time']/1000).mean() )
print( 'ants std skel time (s): ', (antsSkelTimes['time']/1000).std() )
print( 'h5 mean skel time (s): ', (h5SkelTimes['time']/1000).mean() )
print( 'h5 std skel time (s): ', (h5SkelTimes['time']/1000).std() )
print( ' ' )
print( 'relative speedup: ', (antsSkelTimes['time'].mean()/h5SkelTimes['time'].mean()))
    
    
In [112]:
    
h5ImgTransformF = "/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/scripts/h5Analysis/h5ImageTransformTimes.csv"
antsImgTransformF = "/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/scripts/h5Analysis/antsImageTransformTimes.csv"
h5ImgTimes = pd.read_csv( h5ImgTransformF )
antsImgTimes = pd.read_csv( antsImgTransformF )
h5ImgTimes['time(s)'] = h5ImgTimes.apply( lambda x: x['time']/1000., axis=1)
antsImgTimes['time(s)'] = antsImgTimes.apply( lambda x: x['time']/1000., axis=1)
print( 'ants mean img time (s): ', antsImgTimes['time(s)'].mean() )
print( 'ants std img time (s): ', antsImgTimes['time(s)'].std() )
print( ' ' )
print( 'h5 mean img time (s): ', h5ImgTimes['time(s)'].mean() )
print( 'h5 std img time (s): ', h5ImgTimes['time(s)'].std() )
    
    
In [85]:
    
h5DatasetSizesF = "/groups/saalfeld/public/jrc2018/transformations/quantized_multiscale/sizesByDataset.csv"
h5datasetSizes_table_f = 'h5DatasetSizesTable_raw_%s.tex'%(datetime.date.today().strftime('%Y%m%d'))
pd.options.display.float_format = '{:,.4f}'.format
h5DatasetSizes = pd.read_csv( h5DatasetSizesF )
h5DatasetSizes['sizeGb'] = h5DatasetSizes['size'] / 1e9
h5DatasetSizes['sizeMb'] = h5DatasetSizes['size'] / 1e6
if( False ):
    print( 'writing : ', h5datasetSizes_table_f )
    with open( h5datasetSizes_table_f, 'w') as f:
        f.write( h5DatasetSizes.to_latex())
        
# h5DatasetSizes
    
In [97]:
    
df_h5SizesErrs = h5DatasetSizes
df_h5SizesErrs
# df_h5SizesErrs['sizeRatios'] = df_h5SizesErrs.loc[ h5DatasetSizes['transform'] == 'JRC2018F_FAFB.' ]
# TODO ugly but ok
def sizeRatios( x ):
    if x['transform'] == 'JRC2018F_FAFB' and x['direction'] == 'F':
        #print('0')
        return x['size'] / ants_transform_sizes[0][1]
    elif x['transform'] == 'JRC2018F_FAFB' and x['direction'] == 'I':
        #print('1')
        return x['size'] / ants_transform_sizes[1][1]
    elif x['transform'] == 'JRC2018F_FCWB' and x['direction'] == 'F':
        #print('2')
        return x['size'] / ants_transform_sizes[2][1]
    elif x['transform'] == 'JRC2018F_FCWB' and x['direction'] == 'I':
        #print('3')
        return x['size'] / ants_transform_sizes[3][1]
    elif x['transform'] == 'JRC2018F_JFRC2010' and x['direction'] == 'F':
        #print('4')
        return x['size'] / ants_transform_sizes[4][1]
    elif x['transform'] == 'JRC2018F_JFRC2010' and x['direction'] == 'I':
        #print('5')
        return x['size'] / ants_transform_sizes[5][1]
    elif x['transform'] == 'JRC2018F_JFRC2013' and x['direction'] == 'F':
        #print('6')
        return x['size'] / ants_transform_sizes[6][1]
    elif x['transform'] == 'JRC2018F_JFRC2013' and x['direction'] == 'I':
        #print('7')
        return x['size'] / ants_transform_sizes[7][1]
    elif x['transform'] == 'JRC2018F_TEFOR' and x['direction'] == 'F':
        #print('8')
        return x['size'] / ants_transform_sizes[8][1]
    elif x['transform'] == 'JRC2018F_TEFOR' and x['direction'] == 'I':
        #print('9')
        return x['size'] / ants_transform_sizes[9][1]
    else:
        return -1
    
df_h5SizesErrs['sizeRatio'] = df_h5SizesErrs.apply( lambda x: sizeRatios(x), axis=1)
df_h5SizesErrs['sizeRatio_oneover'] = df_h5SizesErrs.apply( lambda x: 1.0/x['sizeRatio'], axis=1)
df_h5SizesErrs['xfm'] = df_h5SizesErrs.apply( 
    lambda x: '{}({})'.format(x['transform'], x['direction']) , axis=1)
# df_h5SizesErrs
# i = h5DatasetSizes['transform'] == 'JRC2018F_FAFB'
# df_h5SizesErrs.loc[i]['sizeRatios'] = df_h5SizesErrs.loc[ i ]['size'] / ants_transform_sizes[0][1]
# df_h5SizesErrs
    
In [100]:
    
h5datasetSizes_table_f = 'h5DatasetErrorsSizesTable_raw_%s.tex'%(datetime.date.today().strftime('%Y%m%d'))
size_err_df = err_df.set_index(['xfm','downsample factor']).join( df_h5SizesErrs.set_index(['xfm','downsample factor']))
size_err_df_writeme = size_err_df[['avg','max', 'sizeMb', 'sizeRatio_oneover']]
if( False ):
    print( 'writing : ', h5datasetSizes_table_f )
    with open( h5datasetSizes_table_f, 'w') as f:
        f.write( size_err_df_writeme.to_latex())
        
size_err_df_writeme
    
    
    Out[100]:
In [ ]: