Comparison of HDF5 and CSV files produced for the same simulation


In [71]:
import os
import glob

import pandas as pd
import numpy as np

from atntools.features import get_simulation_data

In [72]:
set_dir = '/Users/ben/SFSU/thesis/data/box/5-species/2-8-9-55-80/set183'
#features_orig = pd.read_csv(os.path.join(set_dir, 'features.set183.orig.csv'))
#features_test = pd.read_csv(os.path.join(set_dir, 'features.set183.test.csv'))

csv_files = sorted(glob.glob(os.path.join(set_dir, 'biomass-data', '*.csv')))
h5_files = sorted(glob.glob(os.path.join(set_dir, 'biomass-data', '*.h5')))

for csv_file, h5_file in zip(csv_files, h5_files):
    csv_df = get_simulation_data(csv_file)[2]
    h5_df = get_simulation_data(h5_file)[2]
    identical = (csv_df == h5_df).sum(axis=0).min() == 5000
    if not identical:
        print("{} and {} differ".format(csv_file, h5_file))