In [1]:
%pylab inline
import numpy as np
import esenummet
import matplotlib.pyplot as plt
matplotlib.rcParams['font.size'] = 14
matplotlib.rcParams['font.family'] = 'serif'
# curve fitting
import scipy.optimize as scop
# interpolation
import scipy.interpolate as scip
Given a sample of discrete datapoints, curve fitting and interpolation allow for two different, yet related tasks:
To illustrate, consider the following scenarios:
In [2]:
esenummet.interpolation_vs_curve_fitting()
In the last session, we computed the average water table height on an island. In the meantime, a well has been drilled in the center of the island and a value of h = 1.5 m was determined. A simple plot of the water table would look like this:
In [3]:
# water level lake
h_L = 10.
# water level in the center of the island
h_c = 1.5
# cross-sectional length of the island
L = 100.
# data points
h = np.array([h_L, h_c, h_L])
# spatial coordinates
x = np.array([-L/2, 0., L/2])
# plotting
plt.plot(x, h, 'x', mew=2, ms=10)
plt.plot(x, h, '--')
plt.xlim(-L/2-L/4, L/2+L/4)
plt.ylim(0, h_L+h_L/4)
plt.xlabel('x')
plt.ylabel('water table height')
plt.show()
This corresponds to linear interpolation between datapoints. In the presented case, we know that the water table will not vary linearly, therefore the chosen interpolation type is a poor approximation. We can improve by applying our knowledge of the underlying process, and assume a quadratic interpolation to approximate the variable between known values, using scip.interp1d:
interp = scip.interp1d(x, y, kind='quadratic')
x_int = np.linspace(np.min(x), np.max(x), 20)
interp
to get the interpolated value:
y = interp(x_int)
In [4]:
# generate interpolation object
quad_interp1d = scip.interp1d(x, h, kind='quadratic')
# generate sample points within interpolation range
x_int = np.linspace(np.min(x), np.max(x), 20)
# evaluate interpolation at these sample points
h_int = quad_interp1d(x_int)
# plotting
plt.plot(x, h, 'x', mew=2, ms=10)
plt.plot(x_int, h_int, '--', marker = 'x')
plt.xlim(-L/2-L/4, L/2+L/4)
plt.ylim(0, h_L+h_L/4)
plt.xlabel('x')
plt.ylabel('water table height')
plt.show()
The kind
parameter can be
linear
, requires at least 2 datapointsquadratic
, requires at least 3 datapointscubic
, requires at least 4 datapointsWe can also explicitly evaluate the interpolation at a point:
In [5]:
print 'The water table at -20 m is', quad_interp1d(-20), 'm'
The following code snippet pretty much sums up what there is to know to apply one-dimensional interpolation in python:
In [6]:
# data
x = np.linspace(0, 10, 10)
y = np.cos(-x**2/8.0)
# linear and cubic interpolation objects
linear_interp1d = scip.interp1d(x, y, kind='linear')
cubic_interp1d = scip.interp1d(x, y, kind='cubic')
# points to evaluate interpolation at
x_int = np.linspace(np.min(x), np.max(x), 100)
# evaluating interpolation
y_int_linear = linear_interp1d(x_int)
y_int_cubic = cubic_interp1d(x_int)
# plot
plt.scatter(x, y, marker='x', s=45)
plt.plot(x_int, y_int_linear,'-', color='green')
plt.plot(x_int, y_int_cubic,'--', color='red')
plt.show()
Given surface height measurements
x = np.array([0, 5, 7, 9, 15, 20, 25, 29, 32, 34])
y = np.array([100, 75, 60, 90, 110, 120, 80, 90, 75, 80])
create a linear, quadratic and cubic interpolation of 100 points. Assuming the surface height points represent a hilly landscape
and the task is to assess total land mass, answer the following questions:
x=26
?
In [7]:
# data
x = np.array([0, 5, 7, 9, 15, 20, 25, 29, 32, 34])
y = np.array([100, 75, 60, 90, 110, 120, 80, 90, 75, 80])
# linear, quadratic and cubic fit
linear_interp1d = scip.interp1d(x, y, kind='linear')
quad_interp1d = scip.interp1d(x, y, kind='quadratic')
cubic_interp1d = scip.interp1d(x, y, kind='cubic')
# points to evaluate interpolation over
x_int = np.linspace(np.min(x), np.max(x), 100)
# obtaining interpolated values
y_int_linear = linear_interp1d(x_int)
y_int_quad = quad_interp1d(x_int)
y_int_cubic = cubic_interp1d(x_int)
#plot
plt.scatter(x, y, marker='x', s=45)
plt.plot(x_int, y_int_linear,'-', color='green')
plt.plot(x_int, y_int_cubic,'--', color='red')
plt.plot(x_int, y_int_quad,'-.', color='magenta')
plt.show()
We will try to create a countour map that represents the depth variation of an oil reservoir:
Discrete data points are given by their x, y and z location:
In [8]:
# data points
x = [2.5,6.8,2.9,7,4.9,9,1,4.8,6,4,2.7,7,8,7.8,7.5,9.2,5,2.3,3]
y = [6,3.7,3,3.2,1.8,4.5,4,3.7,2.7,5.5,2.4,5.6,4.5,2.5,5.5,3,6,4.5,3.5]
z = [10250,9790,10000,9820,10200,10230,10220,9900,10050,10100,10210,10050,10100,10110,10255,10260,10201,9941,9860]
# 1. points to evaluate interpolation at
interpolated_points = 200
xspace = np.linspace(np.min(x), np.max(x), interpolated_points)
yspace = np.linspace(np.min(y), np.max(y), interpolated_points)
X, Y = np.meshgrid(xspace, yspace)
# 2. cubic interpolation in 2D
Z = scip.griddata((x,y), z, (X,Y), method='cubic')
# plot
matplotlib.rcParams['figure.figsize'] = 14,7
# isodepth colors
img = plt.contourf(X, Y, Z, 7, zorder=0, cmap=plt.get_cmap('jet_r'))
# data point positions
plt.plot(x, y, 'x', mew=2, ms=10, color='black')
# contour lines
ctour = plt.contour(X, Y, Z, 7, colors='black')
# depth labels
plt.clabel(ctour, inline=1, fontsize=10)
plt.show()
1.Remove the last two data points. How does it change our perception of the structure?
In [9]:
# 2. cubic interpolation in 2D
# we now use a subset of the sample array, indicated by [:-2], which means all points up to the second to last
Z = scip.griddata((x[:-2],y[:-2]), z[:-2], (X,Y), method='cubic')
# plot
matplotlib.rcParams['figure.figsize'] = 14,7
# isodepth colors
img = plt.contourf(X, Y, Z, 7, zorder=0, cmap=plt.get_cmap('jet_r'))
# data point positions
plt.plot(x[:-2], y[:-2], 'x', mew=2, ms=10, color='black')
# contour lines
ctour = plt.contour(X, Y, Z, 7, colors='black')
# depth labels
plt.clabel(ctour, inline=1, fontsize=10)
plt.show()
2.With all data points, change the interpolation method to linear
. What happens to the highest point on the left side of our model?
In [10]:
# 2. cubic interpolation in 2D
Z = scip.griddata((x,y), z, (X,Y), method='linear')
# plot
matplotlib.rcParams['figure.figsize'] = 14,7
# isodepth colors
img = plt.contourf(X, Y, Z, 7, zorder=0, cmap=plt.get_cmap('jet_r'))
# data point positions
plt.plot(x, y, 'x', mew=2, ms=10, color='black')
# contour lines
ctour = plt.contour(X, Y, Z, 7, colors='black')
# depth labels
plt.clabel(ctour, inline=1, fontsize=10)
plt.show()
Given a functional f(x), a least squares fit is obtained when the sum of squared errors between samples and function values is minimized. The function for which the error is to be minimized below is, as an example
\begin{equation} f(x) = x^{1.5} \end{equation}
In [11]:
esenummet.least_squares()
For production data of a gas reservoir, curve fits are often used to predict future production, also referred to as decline curve analysis.
We're assuming an exponential decline here:
\begin{equation} f(x) = \frac{a}{e^{b x}} \end{equation}To solve a least squares fit in python based on data, the following generic steps are performed using scipy's curve_fit:
def exponential(x, a, b):
return a/np.exp(b*x)
Obtain the fit using `scipy.optimize.curve_fit':
po, pc = scop.curve_fit(exponential, x, y)
The returned list po
now contains the parameters for exponential
, such that po = [a,b]
. We can now obtain the fitted curve by defining x between min
, max
and values
points, and evaluate exponential
thereover:
x = np.linspace(min, max, values)
y = exponential(x, *po)
In [12]:
# data
time = np.array([0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78])
prod = np.array([3233,3530,3152,2088,3038,2108,2132,1654,1762,1967,1760,1649,1633,1680,1398,1622,1393,1436,1352,1402,1459,1373,1262,1346,1325,1319,1309,1206,1249,1446,1255,1227,1268,1233,1175,1129,1242,1247,1198,1058,1172,1242,1214,1148,1689,971,1084,1028,1164,1297,1040,1045,1196,991,1065,898,1020,966,1162,1069,1055,1035,1045,1076,1108,918,1051,1049,1039,1133,887,924,983,1077,1092,973,920,1040])
# function to be fitted
def exponential(x, a, b):
return a/np.exp(b*x)
# get parameters for best fit
po_exp, pc_exp = scop.curve_fit(exponential, time, prod)
# points to evaluate fitted function at (x-values)
time_fit = np.linspace(np.min(time), np.max(time), 100)
# evaluate fit (y-values)
prod_fit = exponential(time_fit, *po_exp)
# plot
matplotlib.rcParams['figure.figsize'] = 10,6
plt.plot(time, prod, 'x', mew=2, ms=5)
plt.plot(time_fit, prod_fit, '--')
plt.xlabel('Time [Months]')
plt.ylabel('Production Rate [Mcf]')
plt.show()
1.Fit above production data from month 0 to month 15, and based on the obtained fit project production in month 60! You can access a subset of the array by using
up_to_month = 15
# subsets:
time[time<=up_to_month]
prod[time<=up_to_month]
In [13]:
# get parameters for best fit up to 15 months
up_to_month = 15
# obtain fitted function parameters
po_exp_lim, pc_exp_lim = scop.curve_fit(exponential, time[time<=up_to_month], prod[time<=up_to_month])
# evaluate fitted function
prod_fit_lim = exponential(time_fit, *po_exp_lim)
# plot
matplotlib.rcParams['figure.figsize'] = 10,6
plt.plot(time, prod, 'x', mew=2, ms=5)
# fit from 60 month data
plt.plot(time, exponential(time, *po_exp), '--')
# fit from 15 month data
plt.plot(time[time<up_to_month], prod[time<up_to_month], 'x', mew=2, ms=5, color='red')
plt.plot(time_fit, prod_fit_lim, '--', color='red')
plt.xlabel('Time [Months]')
plt.ylabel('Production Rate [Mcf]')
plt.show()
print 'Predicted production for month 60 at month {0:d}: {1:.2f} Mcf'.format(up_to_month,exponential(60, *po_exp_lim))
2.For the following dataset
x = np.array([0.057,0.057,0.043,0.094,0.173,0.216,0.231,0.231,0.295,0.374,0.375,0.461,0.482,0.525,0.547,0.647,0.719,0.748,0.805,
0.862,0.919,0.963,1.013])
y = np.array([0.020,0.104,0.152,0.180,0.236,0.277,0.333,0.423,0.409,0.451,0.548,0.597,0.555,0.569,0.618,0.631,0.659,0.729,0.75,
0.777,0.777,0.833,0.861])
obtain (a) an early time linear fit, for x < 0.4
; (b) a late time linear fit for 'x > 0.4'; (c) a linear fit over the entire x range; and (d) an exponential fit over the entire range.
NB: For linear, use
\begin{equation} f(x) = a + bx \end{equation}and for exponential
\begin{equation} f(x) = a + x^b \end{equation}
In [14]:
x = np.array([0.057,0.057,0.043,0.094,0.173,0.216,0.231,0.231,0.295,0.374,0.375,0.461,0.482,0.525,0.547,0.647,0.719,0.748,0.805,
0.862,0.919,0.963,1.013])
y = np.array([0.020,0.104,0.152,0.180,0.236,0.277,0.333,0.423,0.409,0.451,0.548,0.597,0.555,0.569,0.618,0.631,0.659,0.729,0.75,
0.777,0.777,0.833,0.861])
# linear fit - (a), (b), (c)
def linear(x, a, b):
return a + b*x
# exponential - (d)
def exponential(x, a, b):
return a + x**b
# (a)
x_border = 0.4
po_lin_short, pc_lin_short = scop.curve_fit(linear, x[x<x_border], y[x<x_border])
# (b)
po_lin_long, pc_lin_long = scop.curve_fit(linear, x[x>x_border], y[x>x_border])
# (c)
po_lin, pc_lin = scop.curve_fit(linear, x, y)
# (d)
po_exp, pc_exp = scop.curve_fit(exponential, x, y)
# sample fit over
x_fit = np.linspace(np.min(x), np.max(x), 100)
# (a)
y_fit_lin_short = linear(x_fit, *po_lin_short)
# (b)
y_fit_lin_long = linear(x_fit, *po_lin_long)
# (c)
y_fit_lin = linear(x_fit, *po_lin)
# (d)
y_fit_exp = exponential(x_fit, *po_exp)
# (a)
plt.plot(x, y, 'x', mew=2, ms=5)
plt.plot(x_fit, y_fit_lin_short)
plt.show()
# (b)
plt.plot(x, y, 'x', mew=2, ms=5)
plt.plot(x_fit, y_fit_lin_long)
plt.show()
# (c)
plt.plot(x, y, 'x', mew=2, ms=5)
plt.plot(x_fit, y_fit_lin)
plt.show()
# (d)
plt.plot(x, y, 'x', mew=2, ms=5)
plt.plot(x_fit, y_fit_exp)
plt.show()
In [14]: