In [1]:
#!/usr/bin/python
#coding=utf-8
OSM为xml格式,解析文件结构使用强大的requests数据下载包,网页和xml分析神器BeautifulSoup。 **注意:**由于BeautifulSoup将数据读到内存处理,因此不适合大数据量的处理。
In [8]:
from bs4 import BeautifulSoup as bs
import requests as req
from pprint import *
In [ ]:
url = "http://api.openstreetmap.org/api/0.6/map?bbox=11.54,48.14,11.543,48.145"
try:
r = req.get(url)
print(r)
except Exception as ex:
print("Error:",ex)
In [3]:
!wget -c -O osm_test.osm "http://api.openstreetmap.org/api/0.6/map?bbox=11.54,48.14,11.543,48.145"
In [5]:
!ls -l -h
In [10]:
#bsr = bs(atext,"lxml")
bsr = bs(open("osm_test.osm"),"lxml")
mbr = bsr.find_all('bounds')
print(mbr)
In [11]:
nodelist = bsr.find_all('node')
print("All Nodes:",len(nodelist),", list 0-5:")
pprint(nodelist[0:5])
In [14]:
node = nodelist[0]
print(node)
In [15]:
node.attrs
Out[15]:
In [16]:
for (k,v) in node.attrs.items():
print(k,":",v)
In [17]:
import pandas as pd
nodelist2 = []
for node in nodelist[0:10]:
nodelist2.append(node.attrs)
#print(nodelist2)
df = pd.DataFrame(nodelist2)
df
Out[17]:
注意: 需要安装shapely和geopandas包。在anaconda先运行source activate GISpark,然后安装:
conda install -y -c https://conda.anaconda.org/conda-forge fiona
conda install -y -c https://conda.anaconda.org/conda-forge gdal
conda install -y -c https://conda.anaconda.org/conda-forge geopandas
conda install -y -c https://conda.anaconda.org/conda-forge geojson
In [18]:
from shapely.geometry import (Point, LinearRing, LineString, Polygon, MultiPoint)
from geopandas import GeoSeries, GeoDataFrame
from geopandas.base import GeoPandasBase
def node2pandas(nodelist):
nodelist2 = []
for node in nodelist[0:10]:
nodelist2.append(node.attrs)
df = pd.DataFrame(nodelist2)
return df
def pandas2geopandas(nodelist):
pass
def node2geopandas(nodelist):
df = node2pandas(nodelist)
ps = []
ps0 = [1]
for index, row in df.iterrows():
#print(index,':',row['lat'],'-',row['lon'])
ps0[0] = Point(float(row['lon']),float(row['lat']))
ps.append(ps0[0])
gs = GeoSeries(ps,crs={'init': 'epsg:4326', 'no_defs': True})
geodf = GeoDataFrame({'id' : df["id"],'user' : df["id"],
'lon' : df["lon"],'lat' : df["lat"],
'timestamp' : df["timestamp"],'uid' : df["uid"],'version' : df["version"],
'geometry' : gs
})
return geodf
In [19]:
gdf = node2geopandas(nodelist)
gdf
Out[19]:
In [20]:
filename = "osm_test.shp"
gdf.to_file(filename)
In [21]:
!ls -l -h
In [ ]: