模块使用
In [1]:
import numpy as np
from scipy import io as spio
a = np.ones((3,3))
spio.savemat('file.mat',{'a':a})
data = spio.loadmat('file.mat',struct_as_record=True)
data['a']
Out[1]:
In [2]:
from scipy import misc
misc.imread('fname.png')
Out[2]:
In [7]:
import matplotlib.pyplot as plt
plt.imread('fname.png')
Out[7]:
In [8]:
from scipy import linalg
arr= np.array([[1,2],
[3,4]])
linalg.det(arr)
Out[8]:
In [9]:
arr = np.array([[3,2],
[6,4]])
linalg.det(arr)
Out[9]:
In [10]:
linalg.det(np.ones(3,4))
In [12]:
arr = np.array([[1,2],[3,4]])
iarr = linalg.inv(arr)
iarr
Out[12]:
In [14]:
# 验证
np.allclose(np.dot(arr,iarr),np.eye(2))
Out[14]:
In [15]:
# 奇异矩阵求逆抛出异常
arr = np.array([[3,2],[6,4]])
linalg.inv(arr)
In [17]:
arr = np.arange(9).reshape((3,3)) + np.diag([1,0,1])
uarr,spec,vharr = linalg.svd(arr)
spec
Out[17]:
In [18]:
sarr = np.diag(spec)
svd_mat = uarr.dot(sarr).dot(vharr)
np.allclose(svd_mat,arr)
Out[18]:
SVD常用于统计和信号处理领域。其他的一些标准分解方法(QR, LU, Cholesky, Schur) 在 scipy.linalg 中也能够找到。
In [19]:
from scipy import optimize
def f(x):
return x**2 + 10*np.sin(x)
x = np.arange(-10,10,0.1)
plt.plot(x,f(x))
plt.show()
此函数有一个全局最小值,约为-1.3,含有一个局部最小值,约为3.8.
在寻找最小值的过程中,确定初始值,用梯度下降的方法,bfgs是一个很好的方法。
In [20]:
optimize.fmin_bfgs(f,0)
Out[20]:
In [21]:
# 但是方法的缺陷是陷入局部最优解
optimize.fmin_bfgs(f,5)
Out[21]:
可以在一个区间中找到一个最小值
In [23]:
xmin_local = optimize.fminbound(f,0,10)
xmin_local
Out[23]:
寻找函数的零点
In [24]:
# guess 1
root = optimize.fsolve(f,1)
root
Out[24]:
In [25]:
# guess -2.5
root = optimize.fsolve(f,-2.5)
root
Out[25]:
In [26]:
xdata = np.linspace(-10,10,num=20)
ydata = f(xdata)+np.random.randn(xdata.size)
我们已经知道函数的形式$x^2+\sin(x)$,但是每一项的系数不清楚,因此进行拟合处理
In [33]:
def f2(x,a,b):
return a*x**2 + b*np.sin(x)
guess=[3,2]
params,params_covariance = optimize.curve_fit(f2, xdata, ydata, guess)
params
Out[33]:
In [41]:
x = np.arange(-10,10,0.1)
def f(x):
return x**2 + 10 * np.sin(x)
grid = (-10,10,0.1)
xmin_global = optimize.brute(f,(grid,))
xmin_local = optimize.fminbound(f,0,10)
root = optimize.fsolve(f,1)
root2 = optimize.fsolve(f,-2.5)
xdata = np.linspace(-10,10,num=20)
np.random.seed(1234)
ydata = f(xdata)+np.random.randn(xdata.size)
def f2(x,a,b):
return a*x**2 + b * np.sin(x)
guess=[2,2]
params,_ =optimize.curve_fit(f2,xdata,ydata,guess)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(x,f(x),'b-',label='f(x)')
ax.plot(x,f2(x,*params),'r--',label='Curve fit result')
xmins = np.array([xmin_global[0],xmin_local])
ax.plot(xmins,f(xmins),'go',label='minize')
roots = np.array([root,root2])
ax.plot(roots,f(roots),'kv',label='Roots')
ax.legend()
ax.set_xlabel('x')
ax.set_ylabel('f(x)')
plt.show()
In [42]:
a = np.random.normal(size=1000)
bins = np.arange(-4,5)
bins
Out[42]:
In [44]:
histogram = np.histogram(a,bins=bins,normed=True)[0]
bins = 0.5*(bins[1:]+bins[:-1])
bins
Out[44]:
In [46]:
from scipy import stats
b =stats.norm.pdf(bins)
plt.plot(bins,histogram)
plt.plot(bins,b)
plt.show()
In [47]:
np.median(a)
Out[47]:
In [48]:
stats.scoreatpercentile(a,50)
Out[48]:
In [49]:
stats.scoreatpercentile(a,90)
Out[49]:
统计检验的结果常用作一个决策指标。例如,如果我们有两组观察点,它们都来自高斯过程,我们可以使用 T-检验 来判断两组观察点是都显著不同:
In [50]:
a = np.random.normal(0,1,size=100)
b = np.random.normal(0,1,size=10)
stats.ttest_ind(a,b)
Out[50]:
返回结果分成连个部分
In [62]:
measured_time = np.linspace(0,1,10)
noise = (np.random.random(10)*2-1) *1e-1
measure = np.sin(2*np.pi*measured_time) + noise
from scipy.interpolate import interp1d
linear_interp = interp1d(measured_time, measure)
computed_time = np.linspace(0, 1, 50)
linear_results = linear_interp(computed_time)
cublic_interp = interp1d(measured_time, measure, kind='cubic')
cublic_results = cublic_interp(computed_time)
plt.plot(measured_time,measure,'o',label='points')
plt.plot(computed_time,linear_results,'r-',label='linear interp')
plt.plot(computed_time,cublic_results,'y-',label='cublic interp')
plt.legend()
3 plt.show()
In [25]:
import numpy as np
import matplotlib.pyplot as plt
months = np.arange(1,13)
mins = [-62,-59,-56,-46,-32,-18,-9,-13,-25,-46,-52,-48]
maxes = [17,19,21,28,33,38,37,37,31,23,19,18]
fig,ax = plt.subplots()
plt.plot(months,mins,'b-',label='min')
plt.plot(months,maxes,'r-',label='max')
plt.ylim(-80,80)
plt.xlim(0.5,12.5)
plt.xlabel('month')
plt.ylabel('temperature')
plt.xticks([1,2,3,4,5,6,7,8,9,10,11,12],
['Jan.','Feb.','Mar.','Apr.','May.','Jun.','Jul.','Aug.','Sep,','Oct.','Nov.','Dec.'])
plt.legend()
plt.title('Alaska temperature')
plt.show()
从图像上来看,温度的最高值和最低值都符合二次函数的特点,$y = at^2+bt+c$,其中$c$为时间$t$的偏置。
In [41]:
from scipy import optimize
def f(t,a,b,c):
return a * t**2+b*t+c
guess = [-1,8,50]
params_min,_ = optimize.curve_fit(f,months,mins,guess)
params_max,_ = optimize.curve_fit(f,months,maxes,guess)
times = np.linspace(1,12,30)
plt.plot(times,f(times,*params_min),'b--',label='min_fit')
plt.plot(times,f(times,*params_max),'r--',label='max_fit')
plt.plot(months,mins,'bo',label='min')
plt.plot(months,maxes,'ro',label='max')
plt.ylim(-80,80)
plt.xlim(0.5,12.5)
plt.xlabel('month')
plt.ylabel('temperature')
plt.xticks([1,2,3,4,5,6,7,8,9,10,11,12],
['Jan.','Feb.','Mar.','Apr.','May.','Jun.','Jul.','Aug.','Sep,','Oct.','Nov.','Dec.'])
plt.title('Alaska temperature')
plt.show()
温度最高值拟合效果较好,但温度最低值拟合效果不太好
驼峰函数 $$f(x,y)=(4-2.1x^2+\frac{x^4}{3})x^2+xy+(4y^2-4)y^2$$
In [44]:
import numpy as np
from scipy import optimize
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
def sixhump(x):
return (4 - 2.1*x[0]**2 + x[0]**4 / 3.) * x[0]**2 + x[0] * x[1] + (-4 + 4*x[1]**2) * x[1] **2
x = np.linspace(-2, 2)
y = np.linspace(-1, 1)
xg, yg = np.meshgrid(x, y)
#plt.figure() # simple visualization for use in tutorial
#plt.imshow(sixhump([xg, yg]))
#plt.colorbar()
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
surf = ax.plot_surface(xg, yg, sixhump([xg, yg]), rstride=1, cstride=1,
cmap=plt.cm.jet, linewidth=0, antialiased=False)
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('f(x, y)')
ax.set_title('Six-hump Camelback function')
plt.show()
In [53]:
min1 = optimize.fmin_bfgs(sixhump,[0,-0.5])
min2 = optimize.fmin_bfgs(sixhump,[0,0.5])
min3 = optimize.fmin_bfgs(sixhump,[-1.4,1.0])
In [55]:
local1 = sixhump(min1)
local2 = sixhump(min2)
local3 = sixhump(min3)
print local1,local2,local3