In [ ]:
%matplotlib inline

In [ ]:
from __future__ import division , print_function
import os
import glob
import errno
import ConfigParser
from pprint import pprint
from tqdm import tqdm
import pylab as plt
import h5py
import numpy as np

In [ ]:
def mkdir_p(path):
    try:
        os.makedirs(path)
    except OSError as exc:  # Python >2.5
        if exc.errno == errno.EEXIST and os.path.isdir(path):
            pass
        else:
            raise
            
def find_tomo_config(data_root):
    log_files = glob.glob(os.path.join(data_root,'*.log'))
    if len(log_files) == 0:
        return []
    else:
        return log_files[0]
    
def read_config(config_path):
    def as_dict(config):
        d = dict(config._sections)
        for k in d:
            d[k] = dict(config._defaults, **d[k])
            d[k].pop('__name__', None)
        return d
    
    config = ConfigParser.RawConfigParser()
    config.optionxform = str
    config.read(config_path)
    res = as_dict(config)
    return res
    
def find_data_folders(data_root):
    data_dirs = []
    for root, dirs, files in os.walk(data_root):
        if 'Raw' in dirs: # looking for 'Raw' subdirectory 
            data_dir = os.path.join(root, 'Raw')
            log_files = find_tomo_config(data_dir) # try to find *.log files
            if len(log_files) > 0:
                data_dirs.append(root)
    return data_dirs

def check_datafiles(data_files):
    for idf, df in enumerate(data_files):
        if not '{:04d}.tif'.format(idf) in df:
            raise ValueError('!!! File number {} missing. Found {}'.format(idf, df))
            
def write_projections_h5(out_file, data_files, overwrite=False):
    if os.path.exists(out_file) and overwrite==False:
        print('!!! File {} exist. Skiping write projections.'.format(out_file))
    else:
        print('Creating file: {}'.format(out_file))
        test_data = plt.imread(data_files[0])
        print('Size of projection image:', test_data.shape)
        print('Image data type:', test_data.dtype)
        print('Number of images:', len(data_files))
        with h5py.File(out_file,'a') as h5f:
            h5f.create_dataset('data', (len(data_files), test_data.shape[0], test_data.shape[1]),
                chunks=True, dtype=test_data.dtype)
            for idf, df in tqdm(enumerate(data_files)):
                df = plt.imread(data_files[idf])
                h5f['data'][idf]=df
                
def store_dict_hdf5(hdf5_file_name, input_dict):
    """
    Store dictionary in hdf5 file.
    
    :param hdf5_file_name:
    :param input_dict:
    """

    def store_group(group, pearent_goup):
        """
        Store group (not a value)in HDF5 file.
        
        :param group:
        :param pearent_goup:
        """

        for (k, v) in list(group.items()):
            if isinstance(v, dict):
                if k in pearent_goup:
                    del pearent_goup[k]
                tmp_group = pearent_goup.create_group(k)
                store_group(v, tmp_group)
            else:
                store_value(k, v, pearent_goup)

    def store_value(name, value, group):
        """
        Store value (scalar, string, array, etc.) in HDF5 file
        
        :param name:
        :param value:
        :param group:
        """
        if value is not None:
            if name in group:
                del group[name]
            if '/' in name:
                name = name.replace('/','')
            try:
                group.create_dataset(name, data=value, chunks=True,
                                     compression='gzip', compression_opts=3
                                     )
            except ValueError:  # if h5py not support compression
                group.create_dataset(name, data=value, chunks=True)
            except TypeError:
                group.create_dataset(name, data=value)
            except Exception:
                print("!!! Error at name='{}' value='{}' group='{}'".format(name, value, group))
                raise

    with h5py.File(hdf5_file_name, 'a') as res_file:
        store_group(input_dict, res_file)

In [ ]:
def main():
    data_root = '/diskmnt/a/makov/yaivan' # path to folders with data
    tomo_dirs = find_data_folders(data_root=data_root)
    for tomo_dir in tomo_dirs:
        raw_dir = os.path.join(tomo_dir,'Raw')
        print('Tomo data dir:\t', tomo_dir)
        
        config_file = find_tomo_config(raw_dir)
        config = read_config(config_file)
        
        object_name = config['Acquisition']['Filename Prefix']
        print('Object name:\t', object_name)
        
        #find projections files
        data_files  = glob.glob(os.path.join(raw_dir,object_name+'[0-9]'*4+'.tif'))
        data_files = sorted(data_files)
        print('Data files found:', len(data_files))
        print('First file:', data_files[0])
        print('Last file:', data_files[-1])
        check_datafiles(data_files)
        
        
        output_dir = os.path.join(tomo_dir,'_tmp')
        mkdir_p(output_dir)
        hdf5_file_name = os.path.join(output_dir, 'raw.h5')
        #strote projections to 'data' dataset
        write_projections_h5(hdf5_file_name, data_files)
        
        # store config
        store_dict_hdf5(hdf5_file_name, {'config':config})
        
        #store drift map
        drift_file = os.path.join(raw_dir, object_name+'_TS.crv')
        if os.path.isfile(drift_file):
            print('Store drift file:', drift_file)
            drift_map = np.loadtxt(drift_file,skiprows=2)
            store_dict_hdf5(hdf5_file_name, {'drift':drift_map})
        else:
            print('!!! Drift file not found:', drift_file)
        
        len(data_files)
        print()
main()

In [ ]:
data_root = '/diskmnt/a/makov/yaivan'
!find {data_root} -type f -name raw.h5

In [ ]: