Sveučilište u Zagrebu
Fakultet elektrotehnike i računarstva
http://www.fer.unizg.hr/predmet/su
Ak. god. 2015./2016.
(c) 2015 Jan Šnajder
Verzija: 0.8 (2015-11-01)
In [3]:
import scipy as sp
import scipy.stats as stats
import matplotlib.pyplot as plt
from numpy.random import normal
%pylab inline
Pravilo lanca (engl. chain rule) $$P(x,y,z) = P(x) P(y|x) P(z|x,y)$$
Općenito: $$ \begin{align*} P(x_1,\dots,x_n) &= P(x_1)P(x_2|x_1)P(x_3|x_1,x_2)\cdots P(x_n|x_1,\dots,x_{n-1})\\ &= \prod_{k=1}^n P(x_k|x_1,\dots,x_{k-1}) \end{align*} $$
In [2]:
from scipy import stats
X = sp.random.random(100)
Y0 = sp.random.random(100)
noise = stats.norm.rvs(size=100)
Y1 = X + 0.2 * noise
Y2 = 3 * Y1
Y3 = -Y1
Y4 = 1 - (X - 0.5)**2 + 0.05 * noise
In [3]:
for Y in [Y0, Y1, Y2, Y3, Y4]:
plt.scatter(X,Y, label="r = %.3f" % stats.pearsonr(X, Y)[0])
plt.legend()
plt.show()
Za nezavisne varijable $X$ i $Y$ vrijedi: $$ \begin{align*} \mathbb{E}[XY] &= \mathbb{E}[X]\, \mathbb{E}[Y]\\ \mathrm{Var}(X+Y) &= \mathrm{Var}(X) + \mathrm{Var}(Y)\\ \mathrm{Cov}(X, Y) &= \rho_{X,Y} = 0 \end{align*} $$
Nezavisne varijable su nekorelirane, ali obrat općenito ne vrijedi: nelinarno zavisne varijable mogu imati nisku korelaciju
Varijable $X$ i $Y$ su uvjetno nezavisne uz danu varijablu Z, što označavamo kao $X\bot Y|Z$, akko $$ P(X|Y,Z) = P(X|Z) $$ ili $$ P(X,Y|Z) = P(X|Z) P(Y|Z) $$
Jednom kada nam je poznat ishod varijable $Z$, znanje o ishodu varijable $Y$ ne utječe na ishod varijable $X$ (i obrnuto)
Npr.:
In [4]:
mu = 0.3
p = stats.bernoulli(mu)
xs = sp.array([0,1])
for x in xs:
plt.plot(x, p.pmf(x), 'bo', ms=8, label='bernoulli pmf')
plt.vlines(x, 0, p.pmf(x), colors='b', lw=5, alpha=0.5)
plt.xlim(xmin=-1, xmax=2)
plt.ylim(ymax=1)
plt.show()
In [5]:
X = p.rvs(size=100); X
Out[5]:
In [6]:
sp.mean(X)
Out[6]:
In [7]:
sp.var(X)
Out[7]:
In [8]:
xs = linspace(0,1)
plt.plot(xs, xs * (1-xs));
$\mathbf{x}=(x_1,x_2,\dots,x_K)^\mathrm{T}$ je binaran vektor indikatorskih varijabli
Vjerojatnosti pojedinih vrijednosti: $\boldsymbol{\mu}=(\mu_1,\dots,\mu_K)^\mathrm{T}$, $\sum_k \mu_k=1$, $\mu_k\geq 0$
In [9]:
xs = sp.linspace(-5, 5)
for s in range(1, 5):
plt.plot(xs, stats.norm.pdf(xs, 0, s), label='$\sigma=%d$' % s)
plt.legend()
plt.show()
In [10]:
print stats.norm.cdf(1, 0, 1) - stats.norm.cdf(-1, 0, 1)
print stats.norm.cdf(2, 0, 1) - stats.norm.cdf(-2, 0, 1)
print stats.norm.cdf(3, 0, 1) - stats.norm.cdf(-3, 0, 1)
In [11]:
p = stats.norm(loc=5, scale=3)
In [12]:
X = p.rvs(size=30); X
Out[12]:
In [13]:
sp.mean(X)
Out[13]:
In [14]:
sp.var(X)
Out[14]:
In [15]:
mu = [0, 1]
covm = sp.array([[1, 1], [1, 3]])
p = stats.multivariate_normal(mu, covm)
In [16]:
print covm
In [17]:
x = np.linspace(-2, 2)
y = np.linspace(-2, 2)
X, Y = np.meshgrid(x, y)
XY = np.dstack((X,Y))
In [18]:
plt.contour(X, Y, p.pdf(XY));
In [19]:
covm1 = sp.array([[1, 0], [0, 5]])
print covm1
In [20]:
plt.contour(X, Y, stats.multivariate_normal.pdf(XY, mean=mu, cov=covm1 ));
In [21]:
covm2 = sp.array([[5, 0], [0, 5]])
print covm2
In [22]:
plt.contour(X, Y, stats.multivariate_normal.pdf(XY, mean=mu, cov=covm2 ));
In [23]:
plt.contour(X, Y, stats.multivariate_normal.pdf(XY, mean=mu, cov=[[1,0],[0,1]] ));
In [24]:
from scipy import linalg
x00 = sp.array([0,0])
x01 = sp.array([0,1])
x10 = sp.array([1,0])
x11 = sp.array([1,1])
In [25]:
linalg.norm(x00 - x01, ord=2)
Out[25]:
In [26]:
linalg.norm(x00 - x10, ord=2)
Out[26]:
In [27]:
linalg.norm(x00 - x11, ord=2)
Out[27]:
In [28]:
sqrt(sp.dot((x00 - x11),(x00 - x11)))
Out[28]:
In [29]:
def mahalanobis(x1, x2, covm):
return sqrt(sp.dot(sp.dot((x1 - x2), linalg.inv(covm)), (x1 - x2)))
# ili: from scipy.spatial.distance import mahalanobis
In [30]:
covm1 = sp.array([[1, 0], [0, 5]])
mahalanobis(x00, x01, covm1)
Out[30]:
In [31]:
mahalanobis(x00, x10, covm1)
Out[31]:
In [32]:
mahalanobis(x00, x11, covm1)
Out[32]:
In [33]:
mahalanobis(x00, x11, sp.eye(2))
Out[33]:
Procjenitelj $\Theta$ je nepristran procjenitelj (engl. unbiased estimator) parametra $\theta$ akko $$ \mathbb{E}[\Theta]=\theta $$
Pristranost procjenitelj (engl. estimator bias): $$ b_\theta(\Theta) = \mathbb{E}[\Theta]-\theta $$
In [34]:
X = stats.norm.rvs(size=10, loc=0, scale=1) # mean=0, stdev=var=1
sp.mean(X)
Out[34]:
In [35]:
mean = 0
n = 10
N = 10000
for i in range(N):
X = stats.norm.rvs(size=n)
mean += sp.sum(X) / len(X)
mean / N
Out[35]:
In [36]:
def st_dev(X):
n = len(X)
mean = sp.sum(X) / n
s = 0
for i in range(len(X)):
s += (X[i] - mean)**2
return s / n
In [37]:
X = stats.norm.rvs(size=10, loc=0, scale=1) # mean=0, stdev=var=1
st_dev(X)
Out[37]:
In [38]:
stdev = 0
n = 10
N = 10000
for i in range(N):
X = stats.norm.rvs(size=n)
stdev += st_dev(X)
stdev / N
Out[38]:
In [39]:
stdev = 0
n = 10
N = 10000
for i in range(N):
X = stats.norm.rvs(size=n)
stdev += st_dev(X)
stdev / N
Out[39]:
In [40]:
def likelihood(mu, m, N):
return mu**m * (1 - mu)**(N - m)
In [41]:
xs = linspace(0,1)
plt.plot(xs, likelihood(xs, 8, 10));
In [42]:
xs = linspace(0,1)
plt.plot(xs, likelihood(xs, 5, 10));
In [43]:
xs = linspace(0,1)
plt.plot(xs, likelihood(xs, 10, 10));
In [44]:
p = stats.norm(5, 2)
X = sort(p.rvs(30))
plt.scatter(X, sp.zeros(len(X)));
In [45]:
mean_mle = sp.mean(X); mean_mle
Out[45]:
In [46]:
var_mle = np.var(X, axis=0, ddof=1); var_mle
Out[46]:
In [47]:
p_mle = stats.norm(mean_mle, sqrt(var_mle))
In [48]:
plt.scatter(X, p_mle.pdf(X))
plt.plot(X, p.pdf(X), c='gray');
plt.plot(X, p_mle.pdf(X), c='blue', linewidth=2)
plt.vlines(X, 0, p_mle.pdf(X), colors='b', lw=2, alpha=0.2)
plt.show()
In [60]:
mu = [3, 2]
covm = sp.array([[5, 2], [2, 10]])
p = stats.multivariate_normal(mu, covm)
In [61]:
x = np.linspace(-10, 10)
y = np.linspace(-10, 10)
X, Y = np.meshgrid(x, y)
XY = np.dstack((X,Y))
plt.contour(X, Y, p.pdf(XY))
plt.show()
In [62]:
D = p.rvs(100)
plt.contour(X, Y, p.pdf(XY), cmap='binary', alpha=0.5)
plt.scatter(D[:,0], D[:,1])
plt.show()
In [63]:
mean_mle = sp.mean(D, axis=0); mean_mle
Out[63]:
In [64]:
cov_mle = 0
s = 0
for x in D:
s += sp.outer(x - mean_mle, x - mean_mle)
cov_mle = s / len(D)
In [65]:
cov_mle
Out[65]:
In [66]:
sp.cov(D, rowvar=0, bias=0)
Out[66]:
In [67]:
p_mle = stats.multivariate_normal(mean_mle, cov_mle)
plt.contour(X, Y, p_mle.pdf(XY));
In [68]:
plt.contour(X, Y, p.pdf(XY), cmap='binary', alpha=0.5)
plt.scatter(D[:,0], D[:,1], c='gray', alpha=0.5)
plt.contour(X, Y, p_mle.pdf(XY), cmap='Blues', linewidths=2);
In [ ]:
TODO
In [4]:
xs = sp.linspace(0,1)
beta = stats.beta(1,1)
In [30]:
plt.plot(xs,stats.beta.pdf(xs,1,1), label='a=1,b=1')
plt.plot(xs,stats.beta.pdf(xs,2,2), label='a=2,b=2')
plt.plot(xs,stats.beta.pdf(xs,4,2), label='a=4,b=2')
plt.plot(xs,stats.beta.pdf(xs,2,4), label='a=2,b=4')
plt.legend()
plt.show()