Tables and results describing h5 transform files


In [1]:
import datetime

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))



In [2]:
bridge_list = ['JRC2018F_FAFB', 'JRC2018F_FCWB', 'JRC2018F_JFRC2010', 'JRC2018F_JFRC2013', 'JRC2018F_TEFOR']

In [3]:
sizes_data_f="/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/scripts/h5Analysis/h5Sizes.csv"
sizes_h5 = pd.read_csv( sizes_data_f )
sizes_h5


Out[3]:
file size(bytes)
0 JRC2018F_FAFB.h5 580398481
1 JRC2018F_FCWB.h5 1291412644
2 JRC2018F_JFRC2010.h5 1648355301
3 JRC2018F_JFRC2013.h5 1394903421
4 JRC2018F_TEFOR.h5 1623178929

Here I'll tabulate sizes of various transformations out of ants These came from the transforms here:

/groups/saalfeld/public/jrc2018/transformations

Specifically, these subfolders / files:

  • JRC2018F_FAFB/*Warp.nii*
  • jrc2018F-FCWB/*Warp.nii*
  • jrc2018F-jfrc2010/*Warp.nii*
  • jrc2018F-jfrc2013/*Warp.nii*
  • JRC2018F_TEFOR/*Warp.nii*

In [4]:
ants_transform_sizes = [
    ('JRC2018F_FAFB_Warp.nii',2833742131),
    ('JRC2018F_FAFB_InverseWarp.nii',2832463797),
    ('JRC2018F_FCWB_Warp.nii',7893413462),
    ('JRC2018F_FCWB_InverseWarp.nii',7897010532),
    ('JRC2018F_JFRC2010_Warp.nii',8133116764),
    ('JRC2018F_JFRC2010_InverseWarp.nii',8143715252),
    ('JRC2018F_JFRC2013_Warp.nii',8002888343),
    ('JRC2018F_JFRC2013_InverseWarp.nii',8003178281),
    ('JRC2018F_TEFOR_Warp.nii',8127594570),
    ('JRC2018F_TEFOR_InverseWarp.nii',8136532225),
]
ants_df_raw = pd.DataFrame( ants_transform_sizes )
ants_df_raw.columns = [ 'file', 'size(bytes)']

In [5]:
def bridge_index( x, bridge_list ):
    for i, b in enumerate( bridge_list ):
        if x.startswith( b ):
            return i

ants_df_raw['bridge_idx'] = ants_df_raw.apply( lambda x: (bridge_index(x['file'], bridge_list )), axis=1)

In [6]:
# Build a dataframe containing the combined size of the compressed inverse and forward fields
ants_df_data = []
for i in range( len(bridge_list)):
    ants_df_data += [ (bridge_list[i], ants_df_raw[ants_df_raw.bridge_idx == i ]['size(bytes)'].sum()) ] 


ants_df = pd.DataFrame( ants_df_data )
ants_df.columns = [ 'file', 'size(bytes)']

In [7]:
# Combine h5 size and ants dfield size tables
billion=1e9

sizes_h5['file'] = sizes_h5.apply( lambda x: x['file'].replace('.h5',''), axis=1 )
df = sizes_h5.set_index('file').join(ants_df.set_index('file'), rsuffix='_ants')

# Compute relative size
df['ratio'] = df.apply( lambda x: x['size(bytes)']/x['size(bytes)_ants'], axis=1)
df['h5 Size (GB)'] = df.apply( lambda x: x['size(bytes)']/billion, axis=1)
df['Size (GB)'] = df.apply( lambda x: x['size(bytes)_ants']/billion, axis=1)
df


Out[7]:
size(bytes) size(bytes)_ants ratio h5 Size (GB) Size (GB)
file
JRC2018F_FAFB 580398481 5666205928 0.102432 0.580398 5.666206
JRC2018F_FCWB 1291412644 15790423994 0.081785 1.291413 15.790424
JRC2018F_JFRC2010 1648355301 16276832016 0.101270 1.648355 16.276832
JRC2018F_JFRC2013 1394903421 16006066624 0.087148 1.394903 16.006067
JRC2018F_TEFOR 1623178929 16264126795 0.099801 1.623179 16.264127

In [8]:
df.index.names = ['Transform']
df.columns= ['size(bytes)', 'size(bytes)_ants', 'Size Ratio', 'h5 Size (Gb)', 'Size (Gb)']

df_toWrite = df[[ 'Size (Gb)', 'h5 Size (Gb)','Size Ratio']]

h5size_table_f = 'h5SizeTable_%s.tex'%(datetime.date.today().strftime('%Y%m%d'))

# Should I write
if( False ):
    print( 'writing : ', h5size_table_f )
    with open( h5size_table_f, 'w') as f:
        f.write( df_toWrite.to_latex())


writing :  h5SizeTable_20191008.tex

Quantization / downsampling errors


In [9]:
factors_by_level = { 0:1, 1:2, 2:4 }
err_data_f="/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/examples/errTableLevels.csv"
err_df = pd.read_csv( err_data_f )

# make column for downsampling factor
err_df['downsample factor'] = err_df.apply( lambda x: factors_by_level[ x['level']], axis=1 )

In [10]:
h5err_table_f = 'h5QuantErrorTable_%s.tex'%(datetime.date.today().strftime('%Y%m%d'))

err_toWrite = err_df[['xfm', 'downsample factor', 'avg','max']]
err_toWrite.set_index(['xfm','downsample factor'], inplace=True)
pd.options.display.float_format = '{:,.3f}'.format

# Should I write
if( False ):
    print( 'writing : ', h5err_table_f )
    with open( h5err_table_f, 'w') as f:
        f.write( err_toWrite.to_latex())


writing :  h5QuantErrorTable_20191008.tex

Skeleton transform times


In [104]:
h5IndivTransformF = "/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/scripts/h5Analysis/h5timesIndv.csv"
antsIndivTransformF = "/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/scripts/h5Analysis/antstimesIndv.csv"

h5SkelTimes = pd.read_csv( h5IndivTransformF )
antsSkelTimes = pd.read_csv( antsIndivTransformF )

print( 'ants mean skel time (ms): ', antsSkelTimes['time'].mean() )
print( 'ants std skel time (ms): ', antsSkelTimes['time'].std() )
print( 'h5 mean skel time (ms): ', h5SkelTimes['time'].mean() )
print( 'h5 std skel time (ms): ', h5SkelTimes['time'].std() )
print( 'num skels: ')
print( '  ')
print( 'ants mean skel time (s): ', (antsSkelTimes['time']/1000).mean() )
print( 'ants std skel time (s): ', (antsSkelTimes['time']/1000).std() )
print( 'h5 mean skel time (s): ', (h5SkelTimes['time']/1000).mean() )
print( 'h5 std skel time (s): ', (h5SkelTimes['time']/1000).std() )
print( ' ' )
print( 'relative speedup: ', (antsSkelTimes['time'].mean()/h5SkelTimes['time'].mean()))


ants mean skel time (ms):  28794.86144578313
ants std skel time (ms):  1762.7934164760288
h5 mean skel time (ms):  2505.9397590361446
h5 std skel time (ms):  395.29124972178204
num skels: 
  
ants mean skel time (s):  28.794861445783134
ants std skel time (s):  1.7627934164760293
h5 mean skel time (s):  2.5059397590361443
h5 std skel time (s):  0.395291249721782
 
relative speedup:  11.490643915901016

Image transform times


In [112]:
h5ImgTransformF = "/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/scripts/h5Analysis/h5ImageTransformTimes.csv"
antsImgTransformF = "/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/scripts/h5Analysis/antsImageTransformTimes.csv"

h5ImgTimes = pd.read_csv( h5ImgTransformF )
antsImgTimes = pd.read_csv( antsImgTransformF )

h5ImgTimes['time(s)'] = h5ImgTimes.apply( lambda x: x['time']/1000., axis=1)
antsImgTimes['time(s)'] = antsImgTimes.apply( lambda x: x['time']/1000., axis=1)

print( 'ants mean img time (s): ', antsImgTimes['time(s)'].mean() )
print( 'ants std img time (s): ', antsImgTimes['time(s)'].std() )
print( ' ' )
print( 'h5 mean img time (s): ', h5ImgTimes['time(s)'].mean() )
print( 'h5 std img time (s): ', h5ImgTimes['time(s)'].std() )


ants mean img time (s):  343.34270000000004
ants std img time (s):  54.70575850462789
 
h5 mean img time (s):  292.2076
h5 std img time (s):  11.481494832991046

h5 dataset sizes


In [85]:
h5DatasetSizesF = "/groups/saalfeld/public/jrc2018/transformations/quantized_multiscale/sizesByDataset.csv"
h5datasetSizes_table_f = 'h5DatasetSizesTable_raw_%s.tex'%(datetime.date.today().strftime('%Y%m%d'))

pd.options.display.float_format = '{:,.4f}'.format
h5DatasetSizes = pd.read_csv( h5DatasetSizesF )
h5DatasetSizes['sizeGb'] = h5DatasetSizes['size'] / 1e9
h5DatasetSizes['sizeMb'] = h5DatasetSizes['size'] / 1e6


if( False ):
    print( 'writing : ', h5datasetSizes_table_f )
    with open( h5datasetSizes_table_f, 'w') as f:
        f.write( h5DatasetSizes.to_latex())
        
# h5DatasetSizes

In [97]:
df_h5SizesErrs = h5DatasetSizes
df_h5SizesErrs

# df_h5SizesErrs['sizeRatios'] = df_h5SizesErrs.loc[ h5DatasetSizes['transform'] == 'JRC2018F_FAFB.' ]

# TODO ugly but ok
def sizeRatios( x ):
    if x['transform'] == 'JRC2018F_FAFB' and x['direction'] == 'F':
        #print('0')
        return x['size'] / ants_transform_sizes[0][1]
    elif x['transform'] == 'JRC2018F_FAFB' and x['direction'] == 'I':
        #print('1')
        return x['size'] / ants_transform_sizes[1][1]
    elif x['transform'] == 'JRC2018F_FCWB' and x['direction'] == 'F':
        #print('2')
        return x['size'] / ants_transform_sizes[2][1]
    elif x['transform'] == 'JRC2018F_FCWB' and x['direction'] == 'I':
        #print('3')
        return x['size'] / ants_transform_sizes[3][1]
    elif x['transform'] == 'JRC2018F_JFRC2010' and x['direction'] == 'F':
        #print('4')
        return x['size'] / ants_transform_sizes[4][1]
    elif x['transform'] == 'JRC2018F_JFRC2010' and x['direction'] == 'I':
        #print('5')
        return x['size'] / ants_transform_sizes[5][1]
    elif x['transform'] == 'JRC2018F_JFRC2013' and x['direction'] == 'F':
        #print('6')
        return x['size'] / ants_transform_sizes[6][1]
    elif x['transform'] == 'JRC2018F_JFRC2013' and x['direction'] == 'I':
        #print('7')
        return x['size'] / ants_transform_sizes[7][1]
    elif x['transform'] == 'JRC2018F_TEFOR' and x['direction'] == 'F':
        #print('8')
        return x['size'] / ants_transform_sizes[8][1]
    elif x['transform'] == 'JRC2018F_TEFOR' and x['direction'] == 'I':
        #print('9')
        return x['size'] / ants_transform_sizes[9][1]
    else:
        return -1
    
df_h5SizesErrs['sizeRatio'] = df_h5SizesErrs.apply( lambda x: sizeRatios(x), axis=1)

df_h5SizesErrs['sizeRatio_oneover'] = df_h5SizesErrs.apply( lambda x: 1.0/x['sizeRatio'], axis=1)

df_h5SizesErrs['xfm'] = df_h5SizesErrs.apply( 
    lambda x: '{}({})'.format(x['transform'], x['direction']) , axis=1)


# df_h5SizesErrs
# i = h5DatasetSizes['transform'] == 'JRC2018F_FAFB'
# df_h5SizesErrs.loc[i]['sizeRatios'] = df_h5SizesErrs.loc[ i ]['size'] / ants_transform_sizes[0][1]
# df_h5SizesErrs

In [100]:
h5datasetSizes_table_f = 'h5DatasetErrorsSizesTable_raw_%s.tex'%(datetime.date.today().strftime('%Y%m%d'))

size_err_df = err_df.set_index(['xfm','downsample factor']).join( df_h5SizesErrs.set_index(['xfm','downsample factor']))
size_err_df_writeme = size_err_df[['avg','max', 'sizeMb', 'sizeRatio_oneover']]

if( False ):
    print( 'writing : ', h5datasetSizes_table_f )
    with open( h5datasetSizes_table_f, 'w') as f:
        f.write( size_err_df_writeme.to_latex())
        

size_err_df_writeme


writing :  h5DatasetErrorsSizesTable_raw_20191009.tex
Out[100]:
avg max sizeMb sizeRatio_oneover
xfm downsample factor
JRC2018F_FAFB(F) 1 0.0110 0.0385 247.5539 11.4470
2 0.0182 0.4464 38.9670 72.7215
4 0.0524 1.2518 5.7071 496.5255
JRC2018F_FAFB(I) 1 0.0110 0.0383 243.3362 11.6401
2 0.0184 0.6138 38.4193 73.7250
4 0.0527 1.8581 5.6419 502.0375
JRC2018F_FCWB(F) 1 0.0114 0.0392 540.1667 14.6129
2 0.0163 0.6218 89.3700 88.3228
4 0.0434 1.6542 13.8266 570.8879
JRC2018F_FCWB(I) 1 0.0114 0.0388 542.2779 14.5627
2 0.0169 0.4460 90.0228 87.7224
4 0.0462 1.2926 13.8451 570.3818
JRC2018F_JFRC2010(F) 1 0.0116 0.0388 683.2130 11.9042
2 0.0181 0.4150 112.0876 72.5604
4 0.0552 1.2737 17.1350 474.6485
JRC2018F_JFRC2010(I) 1 0.0115 0.0395 702.4834 11.5928
2 0.0184 0.3407 114.3231 71.2342
4 0.0570 1.0231 17.2097 473.2061
JRC2018F_JFRC2013(F) 1 0.0113 0.0394 585.4150 13.6705
2 0.0154 0.2915 98.6468 81.1267
4 0.0408 0.9458 15.0793 530.7191
JRC2018F_JFRC2013(I) 1 0.0114 0.0395 580.5981 13.7844
2 0.0158 0.4738 98.1901 81.5070
4 0.0422 0.9680 15.0707 531.0416
JRC2018F_TEFOR(F) 1 0.0115 0.0394 681.1018 11.9330
2 0.0188 0.3757 110.7215 73.4057
4 0.0574 1.0585 17.1177 474.8076
JRC2018F_TEFOR(I) 1 0.0115 0.0397 684.5082 11.8867
2 0.0194 0.5686 110.7172 73.4893
4 0.0601 1.5733 17.1091 475.5684

In [ ]: