In [ ]:
# -*- coding: utf-8 -*-
import numpy as np
from scipy.optimize import leastsq
import matplotlib.pyplot as plt
def func(x, p):
"""
Function to generate raw data: A*sin(2*pi*k*x + theta)
"""
A, k, theta = p
return A*np.sin(2*np.pi*k*x+theta)
def residuals(p, y, x):
return y - func(x, p)
x = np.linspace(0, -2*np.pi, 100)
A, k, theta = 10, 0.34, np.pi/6
y0 = func(x, [A, k, theta])
y1 = y0 + 2 * np.random.randn(len(x))
p0 = [7, 0.2, 0]
# 调用leastsq进行数据拟合
# residuals为计算误差的函数
# p0为拟合参数的初始值
# args为需要拟合的实验数据
plsq = leastsq(residuals, p0, args=(y1, x))
print("Real Parameters:", [A, k, theta])
print("Regression Parameters", plsq[0]) # 实验数据拟合后的参数
plt.plot(x, y0, label="Real Data")
plt.plot(x, y1, label="Real Data with Noise")
plt.plot(x, func(x, plsq[0]), label="Regression Data")
plt.legend()
plt.show()
In [ ]:
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
mu = 0
sigma = 1
x = np.arange(-5, 5, 0.1)
y = stats.norm.pdf(x, mu, sigma)
plt.figure()
plt.title('Probability Density')
plt.plot(x, y, label='mu={}, sigma={}'.format(mu, sigma))
plt.legend()
plt.show()
In [ ]:
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
mu = 0
sigma = 1
x = np.arange(-5, 5, 0.1)
y = stats.norm.cdf(x, mu, sigma)
plt.figure()
plt.title('Cumulative Distribution')
plt.plot(x, y, label='mu={}, sigma={}'.format(mu, sigma))
plt.legend()
plt.show()
In [ ]:
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
x = np.arange(-5, 5, 0.1)
plt.figure()
for sigma in [0.5, 1, 1.5]:
plt.title('Probability Density')
plt.plot(x, stats.norm.pdf(x, 0, sigma), label='mu={}, sigma={}'.format(0, sigma))
plt.legend()
plt.show()
In [ ]:
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
x = np.arange(-5, 5, 0.1)
plt.figure()
for sigma in [0.5, 1, 1.5]:
plt.title('Cumulative Distribution')
plt.plot(x, stats.norm.cdf(x, 0, sigma), label='mu={}, sigma={}'.format(0, sigma))
plt.legend()
plt.show()
In [ ]:
from scipy import stats
import matplotlib.pyplot as plt
fig = plt.figure()
ax1 = fig.add_subplot(211)
x = stats.loggamma.rvs(5, size=500) + 5
prob = stats.probplot(x, dist=stats.norm, plot=ax1)
ax1.set_xlabel('')
ax1.set_title('Probplot against normal distribution')
print('Use boxcox to transform the data to make it closest to normal')
ax2 = fig.add_subplot(212)
xt, _ = stats.boxcox(x)
prob = stats.probplot(xt, dist=stats.norm, plot=ax2)
ax2.set_title('Probplot after Box-Cox transformation')
plt.show()
In [ ]:
from scipy import stats
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
plt.figure(figsize=(16, 8))
A1 = np.random.normal(loc=0.0, scale=1.0, size=5000)
A2 = np.random.normal(loc=0.0, scale=5.0, size=5000)
plt.subplot(221)
sns.distplot(A1, kde=True, rug=False, norm_hist=True, label='A1')
sns.distplot(A2, kde=True, rug=False, norm_hist=True, label='A2')
plt.legend()
B1 = np.random.normal(loc=10.0, scale=1.0, size=5000)
B2 = np.random.normal(loc=10.0, scale=5.0, size=5000)
plt.subplot(222)
sns.distplot(B1, kde=True, rug=False, norm_hist=True, label='B1')
sns.distplot(B2, kde=True, rug=False, norm_hist=True, label='B2')
plt.legend()
C = np.random.exponential(2, 5000)
plt.subplot(223)
sns.distplot(C, kde=True, rug=False, norm_hist=True, label='C')
plt.legend()
D, _ = stats.boxcox(C)
plt.subplot(224)
sns.distplot(D, kde=True, rug=False, norm_hist=True, label='D')
plt.legend()
plt.show()
In [ ]:
from scipy import stats
import numpy as np
def normality_test(arr):
print('--------')
print('Skewness of dataset {}'.format(stats.skew(arr)))
print('Skewness test p-value {}'.format(stats.skewtest(arr)[1]))
print('Kurtosis of dataset {}'.format(stats.kurtosis(arr)))
print('Kurtosis test p-value {}'.format(stats.kurtosistest(arr)[1]))
print('Normal test p-value {}'.format(stats.normaltest(arr)[1]))
A1 = np.random.normal(loc=0.0, scale=1.0, size=50000)
A2 = np.random.normal(loc=0.0, scale=5.0, size=50000)
B1 = np.random.normal(loc=10.0, scale=1.0, size=50000)
B2 = np.random.normal(loc=10.0, scale=5.0, size=50000)
C = np.random.exponential(2, 50000)
D, _ = stats.boxcox(C)
normality_test(A1)
normality_test(A2)
normality_test(B1)
normality_test(B2)
normality_test(C)
normality_test(D)
In [ ]:
from sklearn import preprocessing
from scipy import stats
import numpy as np
import pandas as pd
def normality_test(arr):
print('--------')
print('Skewness of dataset {}'.format(stats.skew(arr)))
print('Skewness test p-value {}'.format(stats.skewtest(arr)[1]))
print('Kurtosis of dataset {}'.format(stats.kurtosis(arr)))
print('Kurtosis test p-value {}'.format(stats.kurtosistest(arr)[1]))
print('Normal test p-value {}'.format(stats.normaltest(arr)[1]))
A1 = np.random.normal(loc=10.0, scale=5.0, size=5000)
A2 = preprocessing.scale(A1)
B1 = np.random.exponential(2, 5000)
B2 = preprocessing.scale(B1)
C1, _ = stats.boxcox(B1)
C2 = preprocessing.scale(C1)
normality_test(A1)
normality_test(A2)
normality_test(B1)
normality_test(B2)
normality_test(C1)
normality_test(C2)
pd.DataFrame({'A1': A1, 'A2': A2, 'B1': B1, 'B2': B2, 'C1': C1, 'C2': C2}).describe()