Tables and results describing h5 transform files
In [1]:
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))
In [2]:
bridge_list = ['JRC2018F_FAFB', 'JRC2018F_FCWB', 'JRC2018F_JFRC2010', 'JRC2018F_JFRC2013', 'JRC2018F_TEFOR']
In [3]:
sizes_data_f="/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/scripts/h5Analysis/h5Sizes.csv"
sizes_h5 = pd.read_csv( sizes_data_f )
sizes_h5
Out[3]:
Here I'll tabulate sizes of various transformations out of ants These came from the transforms here:
/groups/saalfeld/public/jrc2018/transformations
Specifically, these subfolders / files:
JRC2018F_FAFB/*Warp.nii*
jrc2018F-FCWB/*Warp.nii*
jrc2018F-jfrc2010/*Warp.nii*
jrc2018F-jfrc2013/*Warp.nii*
JRC2018F_TEFOR/*Warp.nii*
In [4]:
ants_transform_sizes = [
('JRC2018F_FAFB_Warp.nii',2833742131),
('JRC2018F_FAFB_InverseWarp.nii',2832463797),
('JRC2018F_FCWB_Warp.nii',7893413462),
('JRC2018F_FCWB_InverseWarp.nii',7897010532),
('JRC2018F_JFRC2010_Warp.nii',8133116764),
('JRC2018F_JFRC2010_InverseWarp.nii',8143715252),
('JRC2018F_JFRC2013_Warp.nii',8002888343),
('JRC2018F_JFRC2013_InverseWarp.nii',8003178281),
('JRC2018F_TEFOR_Warp.nii',8127594570),
('JRC2018F_TEFOR_InverseWarp.nii',8136532225),
]
ants_df_raw = pd.DataFrame( ants_transform_sizes )
ants_df_raw.columns = [ 'file', 'size(bytes)']
In [5]:
def bridge_index( x, bridge_list ):
for i, b in enumerate( bridge_list ):
if x.startswith( b ):
return i
ants_df_raw['bridge_idx'] = ants_df_raw.apply( lambda x: (bridge_index(x['file'], bridge_list )), axis=1)
In [6]:
# Build a dataframe containing the combined size of the compressed inverse and forward fields
ants_df_data = []
for i in range( len(bridge_list)):
ants_df_data += [ (bridge_list[i], ants_df_raw[ants_df_raw.bridge_idx == i ]['size(bytes)'].sum()) ]
ants_df = pd.DataFrame( ants_df_data )
ants_df.columns = [ 'file', 'size(bytes)']
In [7]:
# Combine h5 size and ants dfield size tables
billion=1e9
sizes_h5['file'] = sizes_h5.apply( lambda x: x['file'].replace('.h5',''), axis=1 )
df = sizes_h5.set_index('file').join(ants_df.set_index('file'), rsuffix='_ants')
# Compute relative size
df['ratio'] = df.apply( lambda x: x['size(bytes)']/x['size(bytes)_ants'], axis=1)
df['h5 Size (GB)'] = df.apply( lambda x: x['size(bytes)']/billion, axis=1)
df['Size (GB)'] = df.apply( lambda x: x['size(bytes)_ants']/billion, axis=1)
df
Out[7]:
In [8]:
df.index.names = ['Transform']
df.columns= ['size(bytes)', 'size(bytes)_ants', 'Size Ratio', 'h5 Size (Gb)', 'Size (Gb)']
df_toWrite = df[[ 'Size (Gb)', 'h5 Size (Gb)','Size Ratio']]
h5size_table_f = 'h5SizeTable_%s.tex'%(datetime.date.today().strftime('%Y%m%d'))
# Should I write
if( False ):
print( 'writing : ', h5size_table_f )
with open( h5size_table_f, 'w') as f:
f.write( df_toWrite.to_latex())
In [9]:
factors_by_level = { 0:1, 1:2, 2:4 }
err_data_f="/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/examples/errTableLevels.csv"
err_df = pd.read_csv( err_data_f )
# make column for downsampling factor
err_df['downsample factor'] = err_df.apply( lambda x: factors_by_level[ x['level']], axis=1 )
In [10]:
h5err_table_f = 'h5QuantErrorTable_%s.tex'%(datetime.date.today().strftime('%Y%m%d'))
err_toWrite = err_df[['xfm', 'downsample factor', 'avg','max']]
err_toWrite.set_index(['xfm','downsample factor'], inplace=True)
pd.options.display.float_format = '{:,.3f}'.format
# Should I write
if( False ):
print( 'writing : ', h5err_table_f )
with open( h5err_table_f, 'w') as f:
f.write( err_toWrite.to_latex())
In [104]:
h5IndivTransformF = "/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/scripts/h5Analysis/h5timesIndv.csv"
antsIndivTransformF = "/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/scripts/h5Analysis/antstimesIndv.csv"
h5SkelTimes = pd.read_csv( h5IndivTransformF )
antsSkelTimes = pd.read_csv( antsIndivTransformF )
print( 'ants mean skel time (ms): ', antsSkelTimes['time'].mean() )
print( 'ants std skel time (ms): ', antsSkelTimes['time'].std() )
print( 'h5 mean skel time (ms): ', h5SkelTimes['time'].mean() )
print( 'h5 std skel time (ms): ', h5SkelTimes['time'].std() )
print( 'num skels: ')
print( ' ')
print( 'ants mean skel time (s): ', (antsSkelTimes['time']/1000).mean() )
print( 'ants std skel time (s): ', (antsSkelTimes['time']/1000).std() )
print( 'h5 mean skel time (s): ', (h5SkelTimes['time']/1000).mean() )
print( 'h5 std skel time (s): ', (h5SkelTimes['time']/1000).std() )
print( ' ' )
print( 'relative speedup: ', (antsSkelTimes['time'].mean()/h5SkelTimes['time'].mean()))
In [112]:
h5ImgTransformF = "/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/scripts/h5Analysis/h5ImageTransformTimes.csv"
antsImgTransformF = "/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/scripts/h5Analysis/antsImageTransformTimes.csv"
h5ImgTimes = pd.read_csv( h5ImgTransformF )
antsImgTimes = pd.read_csv( antsImgTransformF )
h5ImgTimes['time(s)'] = h5ImgTimes.apply( lambda x: x['time']/1000., axis=1)
antsImgTimes['time(s)'] = antsImgTimes.apply( lambda x: x['time']/1000., axis=1)
print( 'ants mean img time (s): ', antsImgTimes['time(s)'].mean() )
print( 'ants std img time (s): ', antsImgTimes['time(s)'].std() )
print( ' ' )
print( 'h5 mean img time (s): ', h5ImgTimes['time(s)'].mean() )
print( 'h5 std img time (s): ', h5ImgTimes['time(s)'].std() )
In [85]:
h5DatasetSizesF = "/groups/saalfeld/public/jrc2018/transformations/quantized_multiscale/sizesByDataset.csv"
h5datasetSizes_table_f = 'h5DatasetSizesTable_raw_%s.tex'%(datetime.date.today().strftime('%Y%m%d'))
pd.options.display.float_format = '{:,.4f}'.format
h5DatasetSizes = pd.read_csv( h5DatasetSizesF )
h5DatasetSizes['sizeGb'] = h5DatasetSizes['size'] / 1e9
h5DatasetSizes['sizeMb'] = h5DatasetSizes['size'] / 1e6
if( False ):
print( 'writing : ', h5datasetSizes_table_f )
with open( h5datasetSizes_table_f, 'w') as f:
f.write( h5DatasetSizes.to_latex())
# h5DatasetSizes
In [97]:
df_h5SizesErrs = h5DatasetSizes
df_h5SizesErrs
# df_h5SizesErrs['sizeRatios'] = df_h5SizesErrs.loc[ h5DatasetSizes['transform'] == 'JRC2018F_FAFB.' ]
# TODO ugly but ok
def sizeRatios( x ):
if x['transform'] == 'JRC2018F_FAFB' and x['direction'] == 'F':
#print('0')
return x['size'] / ants_transform_sizes[0][1]
elif x['transform'] == 'JRC2018F_FAFB' and x['direction'] == 'I':
#print('1')
return x['size'] / ants_transform_sizes[1][1]
elif x['transform'] == 'JRC2018F_FCWB' and x['direction'] == 'F':
#print('2')
return x['size'] / ants_transform_sizes[2][1]
elif x['transform'] == 'JRC2018F_FCWB' and x['direction'] == 'I':
#print('3')
return x['size'] / ants_transform_sizes[3][1]
elif x['transform'] == 'JRC2018F_JFRC2010' and x['direction'] == 'F':
#print('4')
return x['size'] / ants_transform_sizes[4][1]
elif x['transform'] == 'JRC2018F_JFRC2010' and x['direction'] == 'I':
#print('5')
return x['size'] / ants_transform_sizes[5][1]
elif x['transform'] == 'JRC2018F_JFRC2013' and x['direction'] == 'F':
#print('6')
return x['size'] / ants_transform_sizes[6][1]
elif x['transform'] == 'JRC2018F_JFRC2013' and x['direction'] == 'I':
#print('7')
return x['size'] / ants_transform_sizes[7][1]
elif x['transform'] == 'JRC2018F_TEFOR' and x['direction'] == 'F':
#print('8')
return x['size'] / ants_transform_sizes[8][1]
elif x['transform'] == 'JRC2018F_TEFOR' and x['direction'] == 'I':
#print('9')
return x['size'] / ants_transform_sizes[9][1]
else:
return -1
df_h5SizesErrs['sizeRatio'] = df_h5SizesErrs.apply( lambda x: sizeRatios(x), axis=1)
df_h5SizesErrs['sizeRatio_oneover'] = df_h5SizesErrs.apply( lambda x: 1.0/x['sizeRatio'], axis=1)
df_h5SizesErrs['xfm'] = df_h5SizesErrs.apply(
lambda x: '{}({})'.format(x['transform'], x['direction']) , axis=1)
# df_h5SizesErrs
# i = h5DatasetSizes['transform'] == 'JRC2018F_FAFB'
# df_h5SizesErrs.loc[i]['sizeRatios'] = df_h5SizesErrs.loc[ i ]['size'] / ants_transform_sizes[0][1]
# df_h5SizesErrs
In [100]:
h5datasetSizes_table_f = 'h5DatasetErrorsSizesTable_raw_%s.tex'%(datetime.date.today().strftime('%Y%m%d'))
size_err_df = err_df.set_index(['xfm','downsample factor']).join( df_h5SizesErrs.set_index(['xfm','downsample factor']))
size_err_df_writeme = size_err_df[['avg','max', 'sizeMb', 'sizeRatio_oneover']]
if( False ):
print( 'writing : ', h5datasetSizes_table_f )
with open( h5datasetSizes_table_f, 'w') as f:
f.write( size_err_df_writeme.to_latex())
size_err_df_writeme
Out[100]:
In [ ]: