The idea is to replicate what we've done so far but with 3 enhancements:
We'll start with the usual prelims...
In [5]:
import numpy as np
import matplotlib.pyplot as plt
from shapely.geometry import Point, LineString
import geopandas as gpd
import pandas as pd
from fiona.crs import from_epsg
%matplotlib inline
In [6]:
class Survey:
"""
A seismic survey.
"""
def __init__(self, params):
# Assign the variables from the parameter dict,
# using dict.items() for Python 3 compatibility.
for k, v in params.items():
setattr(self, k, v)
# These are just a convenience; we could use the
# tuples directly, or make objects with attrs.
self.xmi = self.corner[0]
self.ymi = self.corner[1]
self.x = self.size[0]
self.y = self.size[1]
self.SL = self.line_spacing[0]
self.RL = self.line_spacing[1]
self.si = self.point_spacing[0]
self.ri = self.point_spacing[1]
self.shiftx = -self.si/2.
self.shifty = -self.ri/2.
@property
def lines(self):
"""
Returns number of (src, rcvr) lines.
"""
slines = int(self.x/self.SL) + 1
rlines = int(self.y/self.RL) + 1
return slines, rlines
@property
def points_per_line(self):
"""
Returns number of (src, rcvr) points per line.
"""
spoints = int(self.y/self.si) + 2
rpoints = int(self.x/self.ri) + 2
return spoints, rpoints
@property
def src(self):
s = [Point(self.xmi+line*self.SL, self.ymi+s*self.si)
for line in range(self.lines[0])
for s in range(self.points_per_line[0])
]
S = gpd.GeoSeries(s)
S.crs = from_epsg(26911)
return S
@property
def rcvr(self):
r = [Point(self.xmi + r*self.ri + self.shiftx, self.ymi + line*self.RL - self.shifty)
for line in range(self.lines[1])
for r in range(self.points_per_line[1])
]
R = gpd.GeoSeries(r)
R.crs = from_epsg(self.epsg)
return R
@property
def layout(self):
"""
Provide a GeoDataFrame of all points,
labelled as columns and in hierarchical index.
"""
# Feels like there might be a better way to do this...
sgdf = gpd.GeoDataFrame({'geometry': self.src, 'station': 'src'})
rgdf = gpd.GeoDataFrame({'geometry': self.rcvr, 'station': 'rcvr'})
# Concatenate with a hierarchical index
layout = pd.concat([sgdf,rgdf], keys=['sources','receivers'])
layout.crs = from_epsg(self.epsg)
return layout
Perhaps s and r should be objects too. I think you might want to have survey.receivers.x for the list of x locations, for example.
In [7]:
params = {'corner': (5750000,4710000),
'size': (3000,1800),
'line_spacing': (600,600),
'point_spacing': (100,100),
'epsg': 26911 # http://spatialreference.org/ref/epsg/26911/
}
survey = Survey(params)
In [8]:
s = survey.src
r = survey.rcvr
r[:10]
Out[8]:
In [9]:
layout = survey.layout
layout[:10]
Out[9]:
With a hierarchical index you can do cool things, e.g. show the last five sources:
In [10]:
layout.ix['sources'][-5:]
Out[10]:
In [11]:
layout.crs
Out[11]:
In [12]:
ax = layout.plot()
Export GeoDataFrames to GIS shapefile.
In [13]:
# gdf.to_file('src_and_rcvr.shp')
We need midpoints. There is a midpoint between every source-receiver pair.
Hopefully it's not too inelegant to get to the midpoints now that we're using this layout object thing.
In [14]:
midpoint_list = [LineString([r, s]).interpolate(0.5, normalized=True)
for r in layout.ix['receivers'].geometry
for s in layout.ix['sources'].geometry
]
As well as knowing the (x,y) of the midpoints, we'd also like to record the distance from each s to each live r (each r in the live patch). This is easy enough to compute:
Point(x1, y1).distance(Point(x2, y2))
Then we can make a list of all the offsets when we count the midpoints into the bins.
In [15]:
offsets = [r.distance(s)
for r in layout.ix['receivers'].geometry
for s in layout.ix['sources'].geometry
]
In [16]:
azimuths = [(180.0/np.pi) * np.arctan((r.x - s.x)/(r.y - s.y))
for r in layout.ix['receivers'].geometry
for s in layout.ix['sources'].geometry
]
In [17]:
offsetx = np.array(offsets)*np.cos(np.array(azimuths)*np.pi/180.)
offsety = np.array(offsets)*np.sin(np.array(azimuths)*np.pi/180.)
Make a Geoseries of the midpoints, offsets and azimths:
In [18]:
midpoints = gpd.GeoDataFrame({
'geometry' : midpoint_list,
'offset' : offsets,
'azimuth': azimuths,
'offsetx' : offsetx,
'offsety' : offsety
})
midpoints[:5]
Out[18]:
In [19]:
ax = midpoints.plot()
Save to a shapefile if desired.
In [20]:
#midpt.to_file('CMPs.shp')
In [21]:
midpoints[:5].offsetx # Easy!
Out[21]:
In [22]:
midpoints.ix[3].geometry.x # Less easy :(
Out[22]:
We need lists (or arrays) to pass into the matplotlib quiver plot. This takes four main parameters: x, y, u, and v, where x, y will be our coordinates, and u, v will be the offset vector for that midpoint.
We can get at the GeoDataFrame's attributes easily, but I can't see how to get at the coordinates in the geometry GeoSeries (seems like a user error — it feels like it should be really easy) so I am resorting to this:
In [23]:
x = [m.geometry.x for i, m in midpoints.iterrows()]
y = [m.geometry.y for i, m in midpoints.iterrows()]
In [24]:
fig = plt.figure(figsize=(12,8))
plt.quiver(x, y, midpoints.offsetx, midpoints.offsety, units='xy', width=0.5, scale=1/0.025, pivot='mid', headlength=0)
plt.axis('equal')
plt.show()
The bins are a new geometry, related to but separate from the survey itself, and the midpoints. We will model them as a GeoDataFrame of polygons. The steps are:
In [30]:
# Factor to shift the bins relative to source and receiver points
jig = survey.si / 4.
bin_centres = gpd.GeoSeries([Point(survey.xmi + 0.5*r*survey.ri + jig, survey.ymi + 0.5*s*survey.si + jig)
for r in range(2*survey.points_per_line[1] - 3)
for s in range(2*survey.points_per_line[0] - 2)
])
# Buffers are diamond shaped so we have to scale and rotate them.
scale_factor = np.sin(np.pi/4.)/2.
bin_polys = bin_centres.buffer(scale_factor*survey.ri, 1).rotate(-45)
bins = gpd.GeoDataFrame(geometry=bin_polys)
bins[:3]
Out[30]:
In [31]:
ax = bins.plot()
Thank you to Jake Wasserman and Kelsey Jordahl for this code snippet, and many pointers.
This takes about 20 seconds to run on my iMac, compared to something close to 30 minutes for the old nested loops.
In [32]:
reindexed = bins.reset_index().rename(columns={'index':'bins_index'})
joined = gpd.tools.sjoin(reindexed, midpoints)
bin_stats = joined.groupby('bins_index')['offset']\
.agg({'fold': len, 'min_offset': np.min})
bins = gpd.GeoDataFrame(bins.join(bin_stats))
In [33]:
joined[:10]
Out[33]:
In [34]:
bins[:10]
Out[34]:
In [35]:
ax = bins.plot(column="fold")
In [36]:
ax = bins.plot(column="min_offset")
In [ ]: