Using the very hepful Mapzen Weekly OSM Metro Extracts (https://mapzen.com/metro-extracts), I was able to download the OSM file for Boston, Massachusetts, USA.
In [7]:
!ls -la /Users/excalibur/Dropbox/nanodegree/data/
The boston_massachusetts.osm file is 366.4 MB.
In [8]:
!head /Users/excalibur/Dropbox/nanodegree/data/boston_massachusetts.osm
In [12]:
import xml.etree.cElementTree as ET
import pprint as pp
import os
In [13]:
# system beep
def finished():
os.system("printf '\a'")
os.system("printf '\a'")
In [14]:
filename = '/Users/excalibur/Dropbox/nanodegree/data/boston_massachusetts.osm'
tags = {}
for event,element in ET.iterparse(filename):
if element.tag not in tags:
tags[element.tag] = {}
tags[element.tag]['count'] = 1
tags[element.tag]['attributes'] = {}
tags[element.tag]['children'] = {}
tags[element.tag]['grandchildren'] = {}
tags[element.tag]['greatgrandchildren'] = {}
else:
tags[element.tag]['count'] += 1
for attribute_key,attribute_val in element.attrib.items():
if attribute_key not in tags[element.tag]['attributes']:
tags[element.tag]['attributes'][attribute_key] = 1
else:
tags[element.tag]['attributes'][attribute_key] += 1
for child in element:
if child.tag not in tags[element.tag]['children']:
tags[element.tag]['children'][child.tag] = 1
else:
tags[element.tag]['children'][child.tag] += 1
for grandchild in child:
if grandchild.tag not in tags[element.tag]['grandchildren']:
tags[element.tag]['grandchildren'][grandchild.tag] = 1
else:
tags[element.tag]['grandchildren'][grandchild.tag] += 1
for greatgrandchild in grandchild:
if greatgrandchild.tag not in tags[element.tag]['greatgrandchild']:
tags[element.tag]['greatgrandchild'][greatgrandchild.tag] = 1
else:
tags[element.tag]['greatgrandchild'][greatgrandchild.tag] += 1
# clean up unused dictionaries
for item in tags.items():
if not item[1]['attributes']:
del item[1]['attributes']
if not item[1]['children']:
del item[1]['children']
if not item[1]['grandchildren']:
del item[1]['grandchildren']
if not item[1]['greatgrandchildren']:
del item[1]['greatgrandchildren']
pp.pprint(tags)
finished()
To help focus this project, I realized that this might be the perfect time to try to implement and old idea I had with a buddy of mine.