Tables and results describing h5 transform files



In [1]:

    
import datetime

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))



In [2]:

    
bridge_list = ['JRC2018F_FAFB', 'JRC2018F_FCWB', 'JRC2018F_JFRC2010', 'JRC2018F_JFRC2013', 'JRC2018F_TEFOR']



In [3]:

    
sizes_data_f="/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/scripts/h5Analysis/h5Sizes.csv"
sizes_h5 = pd.read_csv( sizes_data_f )
sizes_h5









    Out[3]:







  
    
      
      file
      size(bytes)
    
  
  
    
      0
      JRC2018F_FAFB.h5
      580398481
    
    
      1
      JRC2018F_FCWB.h5
      1291412644
    
    
      2
      JRC2018F_JFRC2010.h5
      1648355301
    
    
      3
      JRC2018F_JFRC2013.h5
      1394903421
    
    
      4
      JRC2018F_TEFOR.h5
      1623178929

Here I'll tabulate sizes of various transformations out of ants These came from the transforms here:

/groups/saalfeld/public/jrc2018/transformations

Specifically, these subfolders / files:

JRC2018F_FAFB/*Warp.nii*
jrc2018F-FCWB/*Warp.nii*
jrc2018F-jfrc2010/*Warp.nii*
jrc2018F-jfrc2013/*Warp.nii*
JRC2018F_TEFOR/*Warp.nii*



In [4]:

    
ants_transform_sizes = [
    ('JRC2018F_FAFB_Warp.nii',2833742131),
    ('JRC2018F_FAFB_InverseWarp.nii',2832463797),
    ('JRC2018F_FCWB_Warp.nii',7893413462),
    ('JRC2018F_FCWB_InverseWarp.nii',7897010532),
    ('JRC2018F_JFRC2010_Warp.nii',8133116764),
    ('JRC2018F_JFRC2010_InverseWarp.nii',8143715252),
    ('JRC2018F_JFRC2013_Warp.nii',8002888343),
    ('JRC2018F_JFRC2013_InverseWarp.nii',8003178281),
    ('JRC2018F_TEFOR_Warp.nii',8127594570),
    ('JRC2018F_TEFOR_InverseWarp.nii',8136532225),
]
ants_df_raw = pd.DataFrame( ants_transform_sizes )
ants_df_raw.columns = [ 'file', 'size(bytes)']



In [5]:

    
def bridge_index( x, bridge_list ):
    for i, b in enumerate( bridge_list ):
        if x.startswith( b ):
            return i

ants_df_raw['bridge_idx'] = ants_df_raw.apply( lambda x: (bridge_index(x['file'], bridge_list )), axis=1)



In [6]:

    
# Build a dataframe containing the combined size of the compressed inverse and forward fields
ants_df_data = []
for i in range( len(bridge_list)):
    ants_df_data += [ (bridge_list[i], ants_df_raw[ants_df_raw.bridge_idx == i ]['size(bytes)'].sum()) ] 


ants_df = pd.DataFrame( ants_df_data )
ants_df.columns = [ 'file', 'size(bytes)']



In [7]:

    
# Combine h5 size and ants dfield size tables
billion=1e9

sizes_h5['file'] = sizes_h5.apply( lambda x: x['file'].replace('.h5',''), axis=1 )
df = sizes_h5.set_index('file').join(ants_df.set_index('file'), rsuffix='_ants')

# Compute relative size
df['ratio'] = df.apply( lambda x: x['size(bytes)']/x['size(bytes)_ants'], axis=1)
df['h5 Size (GB)'] = df.apply( lambda x: x['size(bytes)']/billion, axis=1)
df['Size (GB)'] = df.apply( lambda x: x['size(bytes)_ants']/billion, axis=1)
df









    Out[7]:







  
    
      
      size(bytes)
      size(bytes)_ants
      ratio
      h5 Size (GB)
      Size (GB)
    
    
      file
      
      
      
      
      
    
  
  
    
      JRC2018F_FAFB
      580398481
      5666205928
      0.102432
      0.580398
      5.666206
    
    
      JRC2018F_FCWB
      1291412644
      15790423994
      0.081785
      1.291413
      15.790424
    
    
      JRC2018F_JFRC2010
      1648355301
      16276832016
      0.101270
      1.648355
      16.276832
    
    
      JRC2018F_JFRC2013
      1394903421
      16006066624
      0.087148
      1.394903
      16.006067
    
    
      JRC2018F_TEFOR
      1623178929
      16264126795
      0.099801
      1.623179
      16.264127



In [8]:

    
df.index.names = ['Transform']
df.columns= ['size(bytes)', 'size(bytes)_ants', 'Size Ratio', 'h5 Size (Gb)', 'Size (Gb)']

df_toWrite = df[[ 'Size (Gb)', 'h5 Size (Gb)','Size Ratio']]

h5size_table_f = 'h5SizeTable_%s.tex'%(datetime.date.today().strftime('%Y%m%d'))

# Should I write
if( False ):
    print( 'writing : ', h5size_table_f )
    with open( h5size_table_f, 'w') as f:
        f.write( df_toWrite.to_latex())









    



writing :  h5SizeTable_20191008.tex

Quantization / downsampling errors



In [9]:

    
factors_by_level = { 0:1, 1:2, 2:4 }
err_data_f="/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/examples/errTableLevels.csv"
err_df = pd.read_csv( err_data_f )

# make column for downsampling factor
err_df['downsample factor'] = err_df.apply( lambda x: factors_by_level[ x['level']], axis=1 )



In [10]:

    
h5err_table_f = 'h5QuantErrorTable_%s.tex'%(datetime.date.today().strftime('%Y%m%d'))

err_toWrite = err_df[['xfm', 'downsample factor', 'avg','max']]
err_toWrite.set_index(['xfm','downsample factor'], inplace=True)
pd.options.display.float_format = '{:,.3f}'.format

# Should I write
if( False ):
    print( 'writing : ', h5err_table_f )
    with open( h5err_table_f, 'w') as f:
        f.write( err_toWrite.to_latex())









    



writing :  h5QuantErrorTable_20191008.tex

Skeleton transform times



In [104]:

    
h5IndivTransformF = "/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/scripts/h5Analysis/h5timesIndv.csv"
antsIndivTransformF = "/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/scripts/h5Analysis/antstimesIndv.csv"

h5SkelTimes = pd.read_csv( h5IndivTransformF )
antsSkelTimes = pd.read_csv( antsIndivTransformF )

print( 'ants mean skel time (ms): ', antsSkelTimes['time'].mean() )
print( 'ants std skel time (ms): ', antsSkelTimes['time'].std() )
print( 'h5 mean skel time (ms): ', h5SkelTimes['time'].mean() )
print( 'h5 std skel time (ms): ', h5SkelTimes['time'].std() )
print( 'num skels: ')
print( '  ')
print( 'ants mean skel time (s): ', (antsSkelTimes['time']/1000).mean() )
print( 'ants std skel time (s): ', (antsSkelTimes['time']/1000).std() )
print( 'h5 mean skel time (s): ', (h5SkelTimes['time']/1000).mean() )
print( 'h5 std skel time (s): ', (h5SkelTimes['time']/1000).std() )
print( ' ' )
print( 'relative speedup: ', (antsSkelTimes['time'].mean()/h5SkelTimes['time'].mean()))









    



ants mean skel time (ms):  28794.86144578313
ants std skel time (ms):  1762.7934164760288
h5 mean skel time (ms):  2505.9397590361446
h5 std skel time (ms):  395.29124972178204
num skels: 
  
ants mean skel time (s):  28.794861445783134
ants std skel time (s):  1.7627934164760293
h5 mean skel time (s):  2.5059397590361443
h5 std skel time (s):  0.395291249721782
 
relative speedup:  11.490643915901016

Image transform times



In [112]:

    
h5ImgTransformF = "/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/scripts/h5Analysis/h5ImageTransformTimes.csv"
antsImgTransformF = "/groups/saalfeld/home/bogovicj/dev/template/template-building-pub/scripts/h5Analysis/antsImageTransformTimes.csv"

h5ImgTimes = pd.read_csv( h5ImgTransformF )
antsImgTimes = pd.read_csv( antsImgTransformF )

h5ImgTimes['time(s)'] = h5ImgTimes.apply( lambda x: x['time']/1000., axis=1)
antsImgTimes['time(s)'] = antsImgTimes.apply( lambda x: x['time']/1000., axis=1)

print( 'ants mean img time (s): ', antsImgTimes['time(s)'].mean() )
print( 'ants std img time (s): ', antsImgTimes['time(s)'].std() )
print( ' ' )
print( 'h5 mean img time (s): ', h5ImgTimes['time(s)'].mean() )
print( 'h5 std img time (s): ', h5ImgTimes['time(s)'].std() )









    



ants mean img time (s):  343.34270000000004
ants std img time (s):  54.70575850462789
 
h5 mean img time (s):  292.2076
h5 std img time (s):  11.481494832991046

h5 dataset sizes



In [85]:

    
h5DatasetSizesF = "/groups/saalfeld/public/jrc2018/transformations/quantized_multiscale/sizesByDataset.csv"
h5datasetSizes_table_f = 'h5DatasetSizesTable_raw_%s.tex'%(datetime.date.today().strftime('%Y%m%d'))

pd.options.display.float_format = '{:,.4f}'.format
h5DatasetSizes = pd.read_csv( h5DatasetSizesF )
h5DatasetSizes['sizeGb'] = h5DatasetSizes['size'] / 1e9
h5DatasetSizes['sizeMb'] = h5DatasetSizes['size'] / 1e6


if( False ):
    print( 'writing : ', h5datasetSizes_table_f )
    with open( h5datasetSizes_table_f, 'w') as f:
        f.write( h5DatasetSizes.to_latex())
        
# h5DatasetSizes



In [97]:

    
df_h5SizesErrs = h5DatasetSizes
df_h5SizesErrs

# df_h5SizesErrs['sizeRatios'] = df_h5SizesErrs.loc[ h5DatasetSizes['transform'] == 'JRC2018F_FAFB.' ]

# TODO ugly but ok
def sizeRatios( x ):
    if x['transform'] == 'JRC2018F_FAFB' and x['direction'] == 'F':
        #print('0')
        return x['size'] / ants_transform_sizes[0][1]
    elif x['transform'] == 'JRC2018F_FAFB' and x['direction'] == 'I':
        #print('1')
        return x['size'] / ants_transform_sizes[1][1]
    elif x['transform'] == 'JRC2018F_FCWB' and x['direction'] == 'F':
        #print('2')
        return x['size'] / ants_transform_sizes[2][1]
    elif x['transform'] == 'JRC2018F_FCWB' and x['direction'] == 'I':
        #print('3')
        return x['size'] / ants_transform_sizes[3][1]
    elif x['transform'] == 'JRC2018F_JFRC2010' and x['direction'] == 'F':
        #print('4')
        return x['size'] / ants_transform_sizes[4][1]
    elif x['transform'] == 'JRC2018F_JFRC2010' and x['direction'] == 'I':
        #print('5')
        return x['size'] / ants_transform_sizes[5][1]
    elif x['transform'] == 'JRC2018F_JFRC2013' and x['direction'] == 'F':
        #print('6')
        return x['size'] / ants_transform_sizes[6][1]
    elif x['transform'] == 'JRC2018F_JFRC2013' and x['direction'] == 'I':
        #print('7')
        return x['size'] / ants_transform_sizes[7][1]
    elif x['transform'] == 'JRC2018F_TEFOR' and x['direction'] == 'F':
        #print('8')
        return x['size'] / ants_transform_sizes[8][1]
    elif x['transform'] == 'JRC2018F_TEFOR' and x['direction'] == 'I':
        #print('9')
        return x['size'] / ants_transform_sizes[9][1]
    else:
        return -1
    
df_h5SizesErrs['sizeRatio'] = df_h5SizesErrs.apply( lambda x: sizeRatios(x), axis=1)

df_h5SizesErrs['sizeRatio_oneover'] = df_h5SizesErrs.apply( lambda x: 1.0/x['sizeRatio'], axis=1)

df_h5SizesErrs['xfm'] = df_h5SizesErrs.apply( 
    lambda x: '{}({})'.format(x['transform'], x['direction']) , axis=1)


# df_h5SizesErrs
# i = h5DatasetSizes['transform'] == 'JRC2018F_FAFB'
# df_h5SizesErrs.loc[i]['sizeRatios'] = df_h5SizesErrs.loc[ i ]['size'] / ants_transform_sizes[0][1]
# df_h5SizesErrs



In [100]:

    
h5datasetSizes_table_f = 'h5DatasetErrorsSizesTable_raw_%s.tex'%(datetime.date.today().strftime('%Y%m%d'))

size_err_df = err_df.set_index(['xfm','downsample factor']).join( df_h5SizesErrs.set_index(['xfm','downsample factor']))
size_err_df_writeme = size_err_df[['avg','max', 'sizeMb', 'sizeRatio_oneover']]

if( False ):
    print( 'writing : ', h5datasetSizes_table_f )
    with open( h5datasetSizes_table_f, 'w') as f:
        f.write( size_err_df_writeme.to_latex())
        

size_err_df_writeme









    



writing :  h5DatasetErrorsSizesTable_raw_20191009.tex






    Out[100]:







  
    
      
      
      avg
      max
      sizeMb
      sizeRatio_oneover
    
    
      xfm
      downsample factor
      
      
      
      
    
  
  
    
      JRC2018F_FAFB(F)
      1
      0.0110
      0.0385
      247.5539
      11.4470
    
    
      2
      0.0182
      0.4464
      38.9670
      72.7215
    
    
      4
      0.0524
      1.2518
      5.7071
      496.5255
    
    
      JRC2018F_FAFB(I)
      1
      0.0110
      0.0383
      243.3362
      11.6401
    
    
      2
      0.0184
      0.6138
      38.4193
      73.7250
    
    
      4
      0.0527
      1.8581
      5.6419
      502.0375
    
    
      JRC2018F_FCWB(F)
      1
      0.0114
      0.0392
      540.1667
      14.6129
    
    
      2
      0.0163
      0.6218
      89.3700
      88.3228
    
    
      4
      0.0434
      1.6542
      13.8266
      570.8879
    
    
      JRC2018F_FCWB(I)
      1
      0.0114
      0.0388
      542.2779
      14.5627
    
    
      2
      0.0169
      0.4460
      90.0228
      87.7224
    
    
      4
      0.0462
      1.2926
      13.8451
      570.3818
    
    
      JRC2018F_JFRC2010(F)
      1
      0.0116
      0.0388
      683.2130
      11.9042
    
    
      2
      0.0181
      0.4150
      112.0876
      72.5604
    
    
      4
      0.0552
      1.2737
      17.1350
      474.6485
    
    
      JRC2018F_JFRC2010(I)
      1
      0.0115
      0.0395
      702.4834
      11.5928
    
    
      2
      0.0184
      0.3407
      114.3231
      71.2342
    
    
      4
      0.0570
      1.0231
      17.2097
      473.2061
    
    
      JRC2018F_JFRC2013(F)
      1
      0.0113
      0.0394
      585.4150
      13.6705
    
    
      2
      0.0154
      0.2915
      98.6468
      81.1267
    
    
      4
      0.0408
      0.9458
      15.0793
      530.7191
    
    
      JRC2018F_JFRC2013(I)
      1
      0.0114
      0.0395
      580.5981
      13.7844
    
    
      2
      0.0158
      0.4738
      98.1901
      81.5070
    
    
      4
      0.0422
      0.9680
      15.0707
      531.0416
    
    
      JRC2018F_TEFOR(F)
      1
      0.0115
      0.0394
      681.1018
      11.9330
    
    
      2
      0.0188
      0.3757
      110.7215
      73.4057
    
    
      4
      0.0574
      1.0585
      17.1177
      474.8076
    
    
      JRC2018F_TEFOR(I)
      1
      0.0115
      0.0397
      684.5082
      11.8867
    
    
      2
      0.0194
      0.5686
      110.7172
      73.4893
    
    
      4
      0.0601
      1.5733
      17.1091
      475.5684



In [ ]:

	file	size(bytes)
0	JRC2018F_FAFB.h5	580398481
1	JRC2018F_FCWB.h5	1291412644
2	JRC2018F_JFRC2010.h5	1648355301
3	JRC2018F_JFRC2013.h5	1394903421
4	JRC2018F_TEFOR.h5	1623178929

	size(bytes)	size(bytes)_ants	ratio	h5 Size (GB)	Size (GB)
file
JRC2018F_FAFB	580398481	5666205928	0.102432	0.580398	5.666206
JRC2018F_FCWB	1291412644	15790423994	0.081785	1.291413	15.790424
JRC2018F_JFRC2010	1648355301	16276832016	0.101270	1.648355	16.276832
JRC2018F_JFRC2013	1394903421	16006066624	0.087148	1.394903	16.006067
JRC2018F_TEFOR	1623178929	16264126795	0.099801	1.623179	16.264127

		avg	max	sizeMb	sizeRatio_oneover
xfm	downsample factor
JRC2018F_FAFB(F)	1	0.0110	0.0385	247.5539	11.4470
	2	0.0182	0.4464	38.9670	72.7215
	4	0.0524	1.2518	5.7071	496.5255
JRC2018F_FAFB(I)	1	0.0110	0.0383	243.3362	11.6401
	2	0.0184	0.6138	38.4193	73.7250
	4	0.0527	1.8581	5.6419	502.0375
JRC2018F_FCWB(F)	1	0.0114	0.0392	540.1667	14.6129
	2	0.0163	0.6218	89.3700	88.3228
	4	0.0434	1.6542	13.8266	570.8879
JRC2018F_FCWB(I)	1	0.0114	0.0388	542.2779	14.5627
	2	0.0169	0.4460	90.0228	87.7224
	4	0.0462	1.2926	13.8451	570.3818
JRC2018F_JFRC2010(F)	1	0.0116	0.0388	683.2130	11.9042
	2	0.0181	0.4150	112.0876	72.5604
	4	0.0552	1.2737	17.1350	474.6485
JRC2018F_JFRC2010(I)	1	0.0115	0.0395	702.4834	11.5928
	2	0.0184	0.3407	114.3231	71.2342
	4	0.0570	1.0231	17.2097	473.2061
JRC2018F_JFRC2013(F)	1	0.0113	0.0394	585.4150	13.6705
	2	0.0154	0.2915	98.6468	81.1267
	4	0.0408	0.9458	15.0793	530.7191
JRC2018F_JFRC2013(I)	1	0.0114	0.0395	580.5981	13.7844
	2	0.0158	0.4738	98.1901	81.5070
	4	0.0422	0.9680	15.0707	531.0416
JRC2018F_TEFOR(F)	1	0.0115	0.0394	681.1018	11.9330
	2	0.0188	0.3757	110.7215	73.4057
	4	0.0574	1.0585	17.1177	474.8076
JRC2018F_TEFOR(I)	1	0.0115	0.0397	684.5082	11.8867
	2	0.0194	0.5686	110.7172	73.4893
	4	0.0601	1.5733	17.1091	475.5684