In [ ]:
cd '/local_data/dgrossman/VeRi/'

In [ ]:
import h5py
import json
import numpy as np

def makeJsonList(fileName):
    retval = list()
    with open(fileName,'r') as f:
        for line in f:
            line = line.strip()
            line = json.loads(line)
            retval.append(line)
    return retval

def extractColumn(colName,jsonList,t):
    retval = list()
    for line in jsonList:
        if t == str:
            retval.append(str(line[colName]).encode('ascii','ignore'))
        if t == int:
            retval.append(int(line[colName]))
        if t == float:
            for element in line[colName]:
                retval.append(float(element))
    return retval

def make5file(file5Name, names, jsonList):
    with h5py.File(file5Name,'w') as f:  
        for o, i, t, t2 in names:
            print(o,i)
            temp = extractColumn(o,jsonList,t)
            f.create_dataset(i,data=temp,dtype=t2)

def main(inFileName,outFileName):
    jsonList = makeJsonList(inFileName)
    f = np.dtype('float')
    c = h5py.special_dtype(vlen=bytes)
    names = [('colorID','colorID',int,int), ('vehicleID','vehicleID',str,c), ('resnet50','feats',float,f), ('imageName','ids',str,c), ('typeID','typeID',int,int), ('cameraID','cameraID',str,c)]
    make5file(outFileNAme,names,jsonList)
    
if __name__ == '__main__':
    main(sys.argv[1],sys.argv[2])

In [ ]:
main('./test_uniqfile.json','./test_uniqfile.p5')

In [ ]:
jsonList[0].keys()

In [ ]:
f = np.dtype('float')
c = h5py.special_dtype(vlen=bytes)
names = [('colorID','colorID',int,int), ('vehicleID','vehicleID',str,c), ('resnet50','feats',float,f), ('imageName','ids',str,c), ('typeID','typeID',int,int), ('cameraID','cameraID',str,c)]

In [ ]:
make5file('1test_features',names,jsonList)

In [ ]: