Merges 2 serialized datasets into a single one


In [2]:
%run ../../utils.ipynb
import dill
from dataset import *
filename1='/data/scratch/mmerouani/data/speedup_dataset_research_batch1001-2500.pkl'
filename2='/data/scratch/mmerouani/data/speedup_dataset_research_batch2501-3000.pkl'
merged_filename='speedup_dataset_research_batch1001-3000.pkl'

In [3]:
print("loading dataset 1")
f = open(filename1, 'rb')
dataset_dict1 = dill.load(f)
f.close()
print("loading dataset 2")
f = open(filename2, 'rb')
dataset_dict2 = dill.load(f)
f.close()


loading dataset 1
loading dataset 2

In [4]:
nb_prog_ds1 = len(dataset_dict1['programs'])
nb_schedules_ds1 = len(dataset_dict1['schedules'])
ds2_program_indexes = [x+nb_prog_ds1 for x in dataset_dict2['program_indexes']]
programs = dataset_dict1['programs'] + dataset_dict2['programs'] 
program_indexes = dataset_dict1['program_indexes'] + ds2_program_indexes
schedules_array = dataset_dict1['schedules'] + dataset_dict2['schedules']
exec_times_array = dataset_dict1['exec_times'] + dataset_dict2['exec_times']
speedup_array = dataset_dict1['speedup'] + dataset_dict2['speedup']

In [5]:
save  = {'programs':programs, 'program_indexes':program_indexes,
             'schedules':schedules_array, 'exec_times':exec_times_array, 'speedup': speedup_array}
f = open(merged_filename, 'wb')
dill.dump(save, f)
f.close()

In [6]:
#cheking
print("Checking the result")
f = open(merged_filename, 'rb')
mds = dill.load(f)
f.close()


Checking the result

In [7]:
#checking
for i in range(len(mds['schedules'])):
    assert mds['schedules'][i].name[:12]==mds['programs'][mds['program_indexes'][i]].name,"error1 at i="+str(i)
    if (i<nb_schedules_ds1):
        assert (mds['speedup'][i]==dataset_dict1['speedup'][i]) & (mds['schedules'][i]==dataset_dict1['schedules'][i]),"error2 at i="+str(i)
    else:
        assert (mds['speedup'][i]==dataset_dict2['speedup'][i-nb_schedules_ds1]) & (mds['schedules'][i]==dataset_dict2['schedules'][i-nb_schedules_ds1]),"error2 at i="+str(i)
print("Done")


Done

In [ ]: