This notebook is to accompany the blog post published on January 8, 2015: It goes in the bin at Agile Geoscience.
The idea is to replicate what we've done so far but with 3 enhancements:
We'll start with the usual prelims...
In [1]:
import numpy as np
import matplotlib.pyplot as plt
from shapely.geometry import Point, LineString
import geopandas as gpd
import pandas as pd
from fiona.crs import from_epsg
%matplotlib inline
In [2]:
class Survey:
"""
A seismic survey.
"""
def __init__(self, params):
# Assign the variables from the parameter dict,
# using dict.items() for Python 3 compatibility.
for k, v in params.items():
setattr(self, k, v)
# These are just a convenience; we could use the
# tuples directly, or make objects with attrs.
self.xmi = self.corner[0]
self.ymi = self.corner[1]
self.x = self.size[0]
self.y = self.size[1]
self.SL = self.line_spacing[0]
self.RL = self.line_spacing[1]
self.si = self.point_spacing[0]
self.ri = self.point_spacing[1]
self.shiftx = -self.si/2.
self.shifty = -self.ri/2.
@property
def lines(self):
"""
Returns number of (src, rcvr) lines.
"""
slines = int(self.x/self.SL) + 1
rlines = int(self.y/self.RL) + 1
return slines, rlines
@property
def points_per_line(self):
"""
Returns number of (src, rcvr) points per line.
"""
spoints = int(self.y/self.si) + 2
rpoints = int(self.x/self.ri) + 2
return spoints, rpoints
@property
def src(self):
s = [Point(self.xmi+line*self.SL, self.ymi+s*self.si)
for line in range(self.lines[0])
for s in range(self.points_per_line[0])
]
S = gpd.GeoSeries(s)
S.crs = from_epsg(26911)
return S
@property
def rcvr(self):
r = [Point(self.xmi + r*self.ri + self.shiftx, self.ymi + line*self.RL - self.shifty)
for line in range(self.lines[1])
for r in range(self.points_per_line[1])
]
R = gpd.GeoSeries(r)
R.crs = from_epsg(self.epsg)
return R
@property
def layout(self):
"""
Provide a GeoDataFrame of all points,
labelled as columns and in hierarchical index.
"""
# Feels like there might be a better way to do this...
sgdf = gpd.GeoDataFrame({'geometry': self.src, 'station': 'src'})
rgdf = gpd.GeoDataFrame({'geometry': self.rcvr, 'station': 'rcvr'})
# Concatenate with a hierarchical index
layout = pd.concat([sgdf,rgdf], keys=['sources','receivers'])
layout.crs = from_epsg(self.epsg)
return layout
Perhaps s and r should be objects too. I think you might want to have survey.receivers.x for the list of x locations, for example.
In [3]:
params = {'corner': (5750000,4710000),
'size': (3000,1800),
'line_spacing': (600,600),
'point_spacing': (100,100),
'epsg': 26911 # http://spatialreference.org/ref/epsg/26911/
}
survey = Survey(params)
In [4]:
s = survey.src
r = survey.rcvr
r[:10]
Out[4]:
In [5]:
layout = survey.layout
layout[:10]
Out[5]:
With a hierarchical index you can do cool things, e.g. show the last five sources:
In [6]:
layout.ix['sources'][-5:]
Out[6]:
In [7]:
layout.crs
Out[7]:
In [8]:
ax = layout.plot()
Export GeoDataFrames to GIS shapefile.
In [9]:
# gdf.to_file('src_and_rcvr.shp')
We need midpoints. There is a midpoint between every source-receiver pair.
Hopefully it's not too inelegant to get to the midpoints now that we're using this layout object thing.
In [10]:
midpoint_list = [LineString([r, s]).interpolate(0.5, normalized=True)
for r in layout.ix['receivers'].geometry
for s in layout.ix['sources'].geometry
]
As well as knowing the (x,y) of the midpoints, we'd also like to record the distance from each s to each live r (each r in the live patch). This is easy enough to compute:
Point(x1, y1).distance(Point(x2, y2))
Then we can make a list of all the offsets when we count the midpoints into the bins.
In [11]:
offsets = [r.distance(s)
for r in layout.ix['receivers'].geometry
for s in layout.ix['sources'].geometry
]
In [12]:
azimuths = [(180.0/np.pi) * np.arctan((r.x - s.x)/(r.y - s.y))
for r in layout.ix['receivers'].geometry
for s in layout.ix['sources'].geometry
]
In [13]:
offsetx = np.array(offsets)*np.cos(np.array(azimuths)*np.pi/180.)
offsety = np.array(offsets)*np.sin(np.array(azimuths)*np.pi/180.)
Make a Geoseries of the midpoints, offsets and azimths:
In [14]:
midpoints = gpd.GeoDataFrame({
'geometry' : midpoint_list,
'offset' : offsets,
'azimuth': azimuths,
'offsetx' : offsetx,
'offsety' : offsety
})
midpoints[:5]
Out[14]:
In [15]:
ax = midpoints.plot()
Save to a shapefile if desired.
In [16]:
#midpt.to_file('CMPs.shp')
In [17]:
midpoints[:5].offsetx # Easy!
Out[17]:
In [18]:
midpoints.ix[3].geometry.x # Less easy :(
Out[18]:
We need lists (or arrays) to pass into the matplotlib quiver plot. This takes four main parameters: x, y, u, and v, where x, y will be our coordinates, and u, v will be the offset vector for that midpoint.
We can get at the GeoDataFrame's attributes easily, but I can't see how to get at the coordinates in the geometry GeoSeries (seems like a user error — it feels like it should be really easy) so I am resorting to this:
In [19]:
x = [m.geometry.x for i, m in midpoints.iterrows()]
y = [m.geometry.y for i, m in midpoints.iterrows()]
In [20]:
fig = plt.figure(figsize=(12,8))
plt.quiver(x, y, midpoints.offsetx, midpoints.offsety, units='xy', width=0.5, scale=1/0.025, pivot='mid', headlength=0)
plt.axis('equal')
plt.show()
The bins are a new geometry, related to but separate from the survey itself, and the midpoints. We will model them as a GeoDataFrame of polygons. The steps are:
In [21]:
# Factor to shift the bins relative to source and receiver points
jig = survey.si / 4.
bin_centres = gpd.GeoSeries([Point(survey.xmi + 0.5*r*survey.ri - jig, survey.ymi + 0.5*s*survey.si + jig)
for r in range(2*(survey.points_per_line[1]-1))
for s in range(2*(survey.points_per_line[0]-1))
])
# Buffers are diamond shaped so we have to scale and rotate them.
scale_factor = np.sin(np.pi/4.)/2.
bin_polys = bin_centres.buffer(scale_factor*survey.ri, 1).rotate(-45)
bins = gpd.GeoDataFrame(geometry=bin_polys)
bins[:3]
Out[21]:
Suspect there's a super easy way to get all midpoints in a bin poly, without stepping over all bins.
WARNING This step is very slow for more than a few thousand midpoints.
In [22]:
# Make a copy because I'm going to drop points as I
# assign them to polys, to speed up subsequent search.
midpts = midpoints.copy()
offsets, azimuths = [], [] # To hold complete list.
# Loop over bin polygons with index i.
for i, bin_i in bins.iterrows():
o, a = [], [] # To hold list for this bin only.
# Now loop over all midpoints with index j.
for j, midpt_j in midpts.iterrows():
if bin_i.geometry.contains(midpt_j.geometry):
# Then it's a hit! Add it to the lists,
# and drop it so we have less hunting.
o.append(midpt_j.offset)
a.append(midpt_j.azimuth)
midpts = midpts.drop([j])
# Add the bin_i lists to the master list
# and go around the outer loop again.
offsets.append(o)
azimuths.append(a)
# Add everything to the dataframe.
bins['offsets'] = gpd.GeoSeries(offsets)
bins['azimuths'] = gpd.GeoSeries(azimuths)
In [23]:
bins[:10]
Out[23]:
We can compute the fold from the length of the list of offsets in each bin. We use a mini-function, called a lambda, to do this. This piece of code applies a lambda to each row in the GeoDataFrame. Essentially it says:
set each row in the 'fold' column in my `bins` GeoDataFrame to the length of the offsets list for that row.
In [24]:
bins['fold'] = bins.apply(lambda row: len(row.offsets), axis=1)
Now we can use the GeoDataFrame's built-in plot() method to plot these:
In [25]:
ax = bins.plot(column="fold")
We can use a similar trick to compute the minimum offset, but with an added test for there being valid data in the bin:
In [26]:
bins['min_offset'] = bins.apply(lambda row: min(row.offsets) if row.fold > 0 else None, axis=1)
In [27]:
ax = bins.plot(column="min_offset")
In [ ]: