In [1]:
ROOT = '/Users/zephaniahgrunschlag/Desktop/PK2016/files_web_accessible_issue'

In [6]:
ls -ltrh $ROOT


total 23904
-rw-r--r--@ 1 zephaniahgrunschlag  staff    11M Dec 23 10:50 e.listing
-rw-r--r--@ 1 zephaniahgrunschlag  staff   1.2M Dec 23 10:53 files_web_accessible.listing

In [ ]:
data_root_stuff = !cat "$ROOT/e.listing"
print len(data_root_stuff)
data_root_stuff

In [ ]:
fwa_stuff = !cat "$ROOT/files_web_accessible.listing"
print len(fwa_stuff)
fwa_stuff = [ '/'.join(x.split('/')[1:]) for x in fwa_stuff]
fwa_stuff = [x for x in fwa_stuff if x not in  ['', '.DS_Store']]
print len(fwa_stuff)
fwa_stuff

In [ ]:
lookup = { '/'.join(x.split('/')[2:]) : x for x in data_root_stuff}
lookup

In [ ]:
found = {x : lookup[x] for x in fwa_stuff if x in lookup}
print(len(found))
found

In [ ]:
not_found = [x for x in fwa_stuff if x not in found]
not_found

In [ ]:
found_files_only = { x: found[x] for x in found if len(x.split('/')) > 1 } 
print(len(found_files_only))
found_files_only

In [60]:
data_root_cnt = {}
for x in found_files_only:
    data_root = found_files_only[x].split('/')[1]
    data_root_cnt[data_root] = (data_root_cnt[data_root]+1 if data_root in data_root_cnt else 1)
data_root_cnt


Out[60]:
{'dataroot_2': 7040, 'dataroot_4': 16304}

In [ ]: