In [1]:
    
#!/usr/bin/python
#coding=utf-8
    
OSM为xml格式,解析文件结构使用强大的requests数据下载包,网页和xml分析神器BeautifulSoup。 **注意:**由于BeautifulSoup将数据读到内存处理,因此不适合大数据量的处理。
In [8]:
    
from bs4 import BeautifulSoup as bs
import requests as req
from pprint import *
    
In [ ]:
    
url = "http://api.openstreetmap.org/api/0.6/map?bbox=11.54,48.14,11.543,48.145"
try:
    r = req.get(url)
    print(r)
except Exception as ex:
    print("Error:",ex)
    
In [3]:
    
!wget -c -O osm_test.osm "http://api.openstreetmap.org/api/0.6/map?bbox=11.54,48.14,11.543,48.145"
    
    
In [5]:
    
!ls -l -h
    
    
In [10]:
    
#bsr = bs(atext,"lxml")
bsr = bs(open("osm_test.osm"),"lxml")
mbr = bsr.find_all('bounds')
print(mbr)
    
    
In [11]:
    
nodelist = bsr.find_all('node')
print("All Nodes:",len(nodelist),", list 0-5:")
pprint(nodelist[0:5])
    
    
In [14]:
    
node = nodelist[0]
print(node)
    
    
In [15]:
    
node.attrs
    
    Out[15]:
In [16]:
    
for (k,v) in node.attrs.items():
    print(k,":",v)
    
    
In [17]:
    
import pandas as pd
nodelist2 = []
for node in nodelist[0:10]:
    nodelist2.append(node.attrs)
#print(nodelist2)
df = pd.DataFrame(nodelist2)
df
    
    Out[17]:
注意: 需要安装shapely和geopandas包。在anaconda先运行source activate GISpark,然后安装:
conda install -y -c https://conda.anaconda.org/conda-forge fiona  
conda install -y -c https://conda.anaconda.org/conda-forge gdal  
conda install -y -c https://conda.anaconda.org/conda-forge geopandas  
conda install -y -c https://conda.anaconda.org/conda-forge geojson
In [18]:
    
from shapely.geometry import (Point, LinearRing, LineString, Polygon, MultiPoint)
from geopandas import GeoSeries, GeoDataFrame
from geopandas.base import GeoPandasBase
def node2pandas(nodelist):
    nodelist2 = []
    for node in nodelist[0:10]:
        nodelist2.append(node.attrs)
    df = pd.DataFrame(nodelist2)
    return df
def pandas2geopandas(nodelist):
    pass
def node2geopandas(nodelist):
    df = node2pandas(nodelist)
    ps = []
    ps0 = [1]
    for index, row in df.iterrows():
        #print(index,':',row['lat'],'-',row['lon'])
        ps0[0] = Point(float(row['lon']),float(row['lat']))
        ps.append(ps0[0])
    gs = GeoSeries(ps,crs={'init': 'epsg:4326', 'no_defs': True})        
    geodf = GeoDataFrame({'id' : df["id"],'user' : df["id"], 
                        'lon' : df["lon"],'lat' : df["lat"],
                        'timestamp' : df["timestamp"],'uid' : df["uid"],'version' : df["version"],
                        'geometry' : gs
                        })
    return geodf
    
In [19]:
    
gdf = node2geopandas(nodelist)
gdf
    
    Out[19]:
In [20]:
    
filename = "osm_test.shp"
gdf.to_file(filename)
    
In [21]:
    
!ls -l -h
    
    
In [ ]: