In [1]:
%matplotlib inline
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
Read US states geojson
In [2]:
gdf = gpd.read_file('zip://UScounties.zip')
# .shp or zip://filepath.zip or geojson or
# URL or ...
In [3]:
type(gdf)
Out[3]:
In [4]:
gdf.head(4)
Out[4]:
In [5]:
gdf.plot()
Out[5]:
In [6]:
gdf_us = gdf[~gdf.STATE_NAME.isin(['Alaska', 'Hawaii'])]
gdf_us.plot()
Out[6]:
In [7]:
gdf_va = gdf[gdf.STATE_NAME.eq('Virginia')]
gdf_va.head()
Out[7]:
Basic plot
In [8]:
gdf_va.plot()
Out[8]:
Styling the plot
In [9]:
ax = gdf_va.plot(figsize=(18, 6), alpha=0.4, edgecolor='k')
Taking a closer look at properties
In [10]:
gdf_va.iloc[0:3]
Out[10]:
In [11]:
# __repr__ plot the first row
gdf_va.iloc[0].geometry
Out[11]:
In [12]:
# get the bounds of a shape
gdf_va.geometry.iloc[0].bounds
Out[12]:
GeoDataFrame Ops
In [13]:
# area of each county
gdf_va.geometry.area
Out[13]:
In [14]:
# boundary of each county
gdf_va.geometry.boundary
Out[14]:
In [15]:
# get bounds for each shape
gdf_va.geometry.bounds
Out[15]:
In [16]:
# get bounds of the collection of counties
gdf_va.geometry.total_bounds
Out[16]:
Convex hull
In [17]:
# plot map with state's convex hull
hulls = gdf_va['geometry'].convex_hull
hulls.plot(ax=gdf_va.plot(figsize=(15, 5)), alpha=0.4, color='r')
Out[17]:
In [18]:
# plot map with state's bounding box
envelope = gpd.GeoSeries(hulls.envelope)
envelope.plot(ax=gdf_va.plot(figsize=(15, 5)), alpha=0.3, color='r')
Out[18]:
In [19]:
# plot counties's boundary
gdf_va.geometry.boundary.plot(figsize=(15, 5))
Out[19]:
In [20]:
gdf_va.head().plot()
Out[20]:
In [21]:
# merge few shapes into one
from shapely.ops import cascaded_union
boundary = gpd.GeoSeries(cascaded_union(gdf_va['geometry'].head()))
boundary.plot()
Out[21]:
In [22]:
boundary
Out[22]:
In [23]:
gdf_us.shape
Out[23]:
In [24]:
# Read data
df = pd.read_csv('county-data.csv', converters={'FIPS': str})
In [25]:
# See few ros
df.head()
Out[25]:
In [27]:
# merge on FIPS key/column
gdf_usm = gdf_us.merge(df, on=['FIPS'])
In [28]:
# merged data
gdf_usm.head(3)
Out[28]:
Which counties are insured well?
In [29]:
gdf_usm.plot(column='insured', figsize=(12, 9))
Out[29]:
In [30]:
gdf_usm['NAME'].value_counts().head(10)
Out[30]:
The Lincoln Counties
In [31]:
ax = gdf_usm.geometry.boundary.plot(linewidth=0.1, edgecolor='grey', figsize=(15, 9))
gdf_usm[gdf_usm['NAME'].str.contains('Lincoln')].plot(ax=ax)
Out[31]:
In [32]:
# Aggregation with dissolve
gdf_usm_states = gdf_usm.dissolve(by='STATE_NAME')
ax = gdf_usm_states.geometry.boundary.plot(linewidth=0.1, edgecolor='grey', figsize=(15, 9))
gdf_usm[gdf_usm['NAME'].str.contains('Lincoln')].plot(ax=ax)
Out[32]:
In [33]:
print(gdf_usm.geometry[0])
In [34]:
gdf_cities = gpd.read_file("zip://ne_110m_populated_places.zip")
# http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/110m/cultural/ne_110m_populated_places.zip
gdf_cities.head()
Out[34]:
In [35]:
gdf_rivers = gpd.read_file("zip://ne_50m_rivers_lake_centerlines.zip")
# http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/50m/physical/ne_50m_rivers_lake_centerlines.zip
gdf_rivers.head()
Out[35]:
In [36]:
ax = gdf_rivers.plot(figsize=(15, 5))
gdf_cities.plot(ax=ax, color='red')
Out[36]:
In [37]:
x0, y0, x1, y1 = gdf_usm.geometry.total_bounds
ax = gdf.plot(edgecolor='k', linewidth=0.2, facecolor='none', figsize=(15, 10))
gdf_rivers.plot(ax=ax)
gdf_cities.plot(ax=ax, color='red')
ax.set(xlim=(x0, x1), ylim=(y0, y1))
Out[37]:
In [38]:
from shapely.geometry import Point, Polygon, LineString
In [39]:
line = LineString([(0, 0), (1, 2), (2, 2)])
poly = line.buffer(1)
poly
Out[39]:
In [40]:
poly.contains(line)
Out[40]:
Create data from Lat Long
In [41]:
df1 = pd.DataFrame([
{'city': 'Reston', 'Latitude': 38.9586, 'Longitude': -77.3570},
{'city': 'Princeton', 'Latitude': 40.3573, 'Longitude': -74.6672},
])
df1['Point'] = [Point(*x) for x in zip(df1.Longitude, df1.Latitude)]
gdf1 = gpd.GeoDataFrame(df1, geometry='Point')
gdf1
Out[41]:
In [42]:
ax = gdf_usm_states.plot(edgecolor='k', linewidth=0.3, facecolor='none', figsize=(15, 10))
gdf1.plot(ax=ax)
Out[42]:
In [43]:
# filtering shapes/frames
gdf_usm[gdf_usm.contains(gdf1.iloc[0, -1])]
Out[43]:
In [44]:
mississippi = gdf_rivers[gdf_rivers['name'].eq('Mississippi') & gdf_rivers['featurecla'].eq('River')]
mississippi
Out[44]:
In [45]:
mississippi.plot()
Out[45]:
In [46]:
ax = gdf_usm.plot(edgecolor='k', linewidth=0.1, facecolor='none', figsize=(15, 10))
mississippi.plot(ax=ax)
Out[46]:
In [47]:
# Missouri crosses through these counties
gdf_usm[gdf_usm.crosses(mississippi.geometry.squeeze())]
Out[47]:
In [48]:
ax = gdf_usm.plot(edgecolor='k', linewidth=0.1, facecolor='none', figsize=(15, 10))
gdf_usm[gdf_usm.crosses(mississippi.geometry.squeeze())].plot(ax=ax)
Out[48]:
In [49]:
# Spatial joins on frames
gdf2 = gdf_cities[gdf_cities.within(gdf_usm.geometry.unary_union)]
gdf2
Out[49]:
In [50]:
cities_thick = gdf2.assign(geometry=gdf2.buffer(1))
gpd.overlay(gdf_usm, cities_thick, how='difference').plot()
Out[50]:
In [51]:
mississippi_thick = mississippi.assign(geometry=mississippi.buffer(1))
gpd.overlay(gdf_usm, mississippi_thick, how='difference').plot()
Out[51]:
In [52]:
from shapely.geometry import shape, mapping
shp = mapping(gdf.geometry[0])
shp
Out[52]:
In [53]:
shp['coordinates'] = pd.np.round(pd.np.array(shp['coordinates']), 2)
shp
Out[53]:
In [54]:
shape(shp)
Out[54]:
In [55]:
fairfax = gdf[gdf['NAME'].eq('Fairfax')]
fairfax
Out[55]:
In [56]:
# fairfax's neighbours
gdf[gdf.geometry.touches(fairfax.geometry.iloc[0])].plot()
Out[56]:
In [57]:
# States with larger boundaries
gdf_us.loc[gdf_us.geometry.length.sort_values(ascending=False).index].head()
Out[57]:
In [58]:
# VA counties geometric capitals
ax = gdf_va.plot(edgecolor='k', facecolor='none', figsize=(15, 10))
gdf_va.geometry.centroid.plot(ax=ax)
Out[58]: