Initialize software environment


In [17]:
import csv
from csv import DictWriter
import json
from dateutil import parser

Format Map/Reduce Output


In [21]:
category_count = {}

with open("./categoryCount.tsv", "r") as fp:
    for fields in csv.reader(fp, delimiter="\t"):
        category_count[parser.parse(fields[0])] = json.loads(fields[1])

with open("./categoryCount.csv", "w") as fp:
    keys = category_count.keys()
    keys.sort()
    
    fieldnames = [u'date']
    service_categories = category_count[keys[0]].keys()
    service_categories.sort()
    fieldnames.extend(service_categories)
    
    writerobj = DictWriter(fp, fieldnames)
    writerobj.writeheader()
    
    for idx in range(0, len(keys)):
        row_dict = {u'date': keys[idx]}
        
        for cur_category in category_count[keys[idx]].keys():
            row_dict[cur_category] = category_count[keys[idx]][cur_category]

        writerobj.writerow(row_dict)

In [ ]: