In [15]:
import json
import os, re
from glob import glob
from pprint import pprint

In [16]:
# Replace this with the location of the BIDS folder to be analyzed.
os.chdir('/Users/wtriplett/Desktop/Completed/ds030_final')

In [48]:
def decompose_json_location (f):
    """Splits up a BIDS path into its components and returns a tuple
    with the components along with the original path."""
    the_re = '^(?:.*)/(anat|fmap|func|dwi)/(sub-[^_]+)(?:_(ses-[^_]+))?(?:_(task-[^_]+))?(?:_(run-[^_]+))?(?:_(.+)).json$'
    r = re.match(the_re, f)
    return (r.group(1), 
            r.group(2),
            r.group(3), 
            r.group(4) or r.group(6), 
            f)
    
def get_all_jsons ():
    """Looks for .json files in a BIDS folder, but not the beh or physio ones.
    Only the imaging related ones."""
    jsons = []
    subs = glob('sub-*')
    for sub in subs:
        walker = os.walk(sub)
        for entry in walker:
            save = [ os.path.join(entry[0], i) for i in entry[2] 
                    if (i.endswith('.json') and (i.find('beh') == -1 and i.find('physio') == -1)) ]
            if len(save) != 0:
                jsons = jsons + save
        
    return jsons

In [49]:
def _compare_equal (a, b, float_eps=1e-6):
    """Compares two things for equality.
    I think it doesn't work with dictionaries that are nested inside the argument dictionaries, though.
    It was desined to work with simple data structure of primitive types and maybe a list.
    There may be a better way to do this in python, but eventually the issue
    of floating point precision would come up, so maybe this method is OK."""
    if type(a) != type(b):
        return False
    if type(a) in [ bool, str, long, int, unicode ]: #type(a) == int or type(b) == str:
        if a == b:
            return True
    elif type(a) == float:
        if abs(a - b) < float_eps:
            return True
    elif type(a) == list:
        if len(a) != len(b):
            return False
        if all(map(_compare_equal, a, b)):
            return True

    return False
    
def dict_intersect (ref, mov):
    """Compute the intersection of the sets of elements contained in two dictionaries."""
    intersection = {}
    
    # using ref.keys, but either works since intersection
    for k in ref.keys():
        
        if k not in mov.keys():
            continue
            
        if type(ref[k]) != type(mov[k]):
            continue
                
        if _compare_equal(ref[k], mov[k]):
            intersection[k] = ref[k]
                
    return intersection

In [51]:
# Gather up the json files in the current folder
jsons = get_all_jsons()

In [52]:
# Extract the information from the path names
jsons_tupled = map(decompose_json_location, jsons)

In [53]:
# Group them into sets based on their task/modality
# The intersection will be computed over these groups.
files_per_set = {}

for j in jsons_tupled:
    if j[3] in files_per_set.keys():
        files_per_set[j[3]].append(j[4])
    else:
        files_per_set[j[3]] = [ j[4] ]

In [54]:
# Perform the intersection and write the computed intersection to
# a top-level file with the suffix '_intersection'
for key in files_per_set.keys():
    with open(files_per_set[key][0], 'r') as f: 
        reference_data = json.load(f)
    
    for test_file in files_per_set[key]:
        with open(test_file, 'r') as test_fp: test_data = json.load(test_fp)
        reference_data = dict_intersect(reference_data, test_data)
    
    outfile = '%s_intersection.json' % key
    with open(outfile, 'w') as out_fp:
        json.dump(reference_data, out_fp, indent=3, sort_keys=True)

In [ ]: