In [9]:
#!/usr/bin/python
#coding=utf-8
In [2]:
import os
import lxml
from lxml import etree
import xmltodict, sys, gc
from pymongo import MongoClient
gc.enable() #Enable Garbadge Collection
client = MongoClient()
db = client.re
streetsDB = db.streets
hwTypes = ['motorway', 'trunk', 'primary', 'secondary', 'tertiary', 'pedestrian', 'unclassified', 'service']
http://www.ibm.com/developerworks/xml/library/x-hiperfparse/
In [39]:
def process_element(elem):
print("element:",str(elem.attrib))
if (elem.tag=="node"):
fnode.write((etree.tostring(elem).decode('utf-8'))+"\r\n")
elif (elem.tag=="way"):
fway.write((etree.tostring(elem).decode('utf-8'))+"\r\n")
elif (elem.tag=="relation"):
frelation.write((etree.tostring(elem)).decode('utf-8')+"\r\n")
data = etree.tostring(elem)
#data = etree.tostring(elem)
#data = xmltodict.parse(data)
#print(data.decode('ascii'))
#print(str(elem))
In [165]:
from pprint import *
def fast_iter(context, func, file, maxline):
print('Process XML...')
placement = 0
try:
for event, elem in context:
placement += 1
if (maxline > 0):
if (placement >= maxline): break
print(placement,"elem: ")
#print("element",str(elem.attrib))
data = etree.tostring(elem)
print(data)
global data2
data2 = xmltodict.parse(data)
pprint(data2)
#if (file):
# file.write(str(elem.attrib) + "\n")
#else:
# print("file is null.")
#func(elem)
elem.clear()
#while elem.getprevious() is not None:
# del elem.getparent()[0]
except Exception as ex:
print("Error:",ex)
del context
In [166]:
def process_tag(osmfile, tagname, maxline):
filename_tag = osmfile + "_" + tagname + ".json"
print("Filename output: ",filename_tag)
ftag = open(filename_tag,"w+")
context = etree.iterparse(osmfile, tag = tagname)
fast_iter(context,process_element,ftag,maxline)
ftag.close()
In [167]:
osmfile = '../data/muenchen.osm'
#process_tag(osmfile,'node',5)
process_tag(osmfile,'way',2)
#process_tag(osmfile,'relation',0)
In [152]:
pprint(data2)
In [164]:
for i in data2["way"]["nd"]:
print("nd=",i["@ref"])
In [163]:
for i in data2["way"]["tag"]:
print(i["@k"],"=",i["@v"])
In [171]:
import json
jsonStr = json.dumps(data2)
pprint(jsonStr)
In [182]:
jsonobj = json.loads(jsonStr)
pprint(jsonobj)
In [186]:
jsonobj["way"]["tag"]
Out[186]:
In [ ]: