Kaggle https://www.kaggle.com/datasets
UCI Machine Learning Repository https://archive.ics.uci.edu/ml/datasets.html
Scikit-learn http://scikit-learn.org/stable/datasets/
Türkiye İstatistik Kurumu http://www.tuik.gov.tr
A Handbook of Small Data Sets edited by D.J. Hand, F. Daly, A.D. Lunn, K.J. McConway and E. Ostrowski from Chapman and Hall, ISBN 0 412 39920 2. https://www.stat.ncsu.edu/research/sas/sicl/data/
(Fisher, 1926)
Sınıf 1 | Sınıf 2 | Sınıf 3 | ? | ? |
---|---|---|---|---|
Iris Setosa | Iris Versicolour | Iris Virginica | ? | ? |
Features
In [5]:
import numpy as np
import pandas as pd
df_iris = pd.read_csv(u'data/iris.txt',sep=' ')
for i in range(150):
print(df_iris[i:i+1])
In [ ]:
Regresyondan çok farklı değil, sadece çıktılar kategorik.
Sınıf | Öznitelik1 | Öznitelik2 |
---|---|---|
0 | 5.7 | 3.1 |
1 | -0.3 | 2 |
--- | --- | |
$y_i$ | $x_{i,1}$ | $x_{i,2}$ |
--- | --- | |
1 | 0.4 | 5 |
Yeni veri:
Sınıf | Öznitelik1 | Öznitelik2 |
---|---|---|
? | 4.8 | 3.2 |
? | -0.7 | 2.4 |
--- | --- |
İki sınıf: $y_i \in \{0,1\}$.
$$ \Pr\{y_i = 1\} = \pi_i = \sigma(x_i^\top w) $$$\sigma(x)$: sigmoid fonksyonu \begin{eqnarray} \sigma(x) & = & \frac{1}{1+e^{-x}} \end{eqnarray}
\begin{eqnarray} \pi_i & = & \sigma(x_i^\top w) \\ y_i & \sim &\mathcal{BE}(\pi) \end{eqnarray}Her etiket için bir para atıyoruz ve paranın $1$ gelme olasılığını özniteliklere bağlıyoruz.
\begin{eqnarray} p(y|\pi) = \pi^y(1-\pi)^{1-y} \end{eqnarray}
In [46]:
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import scipy as sc
import pandas as pd
from ipywidgets import interact, interactive, fixed
import ipywidgets as widgets
from IPython.display import clear_output, display, HTML
from matplotlib import rc
import scipy as sc
import scipy.optimize as opt
def sigmoid(x):
return 1/(1+np.exp(-x))
def inv_sigmoid(p=0.5):
xs = opt.bisect(lambda x: sigmoid(x)-p, a=-100, b=100)
return xs
def inv_sigmoid1D(w, b, p=0.5):
xs = opt.bisect(lambda x: sigmoid(w*x+b)-p, a=-100, b=100)
return xs
fig = plt.figure(figsize=(6,4))
ax = fig.gca()
ax.set_ylim([-0.1,1.1])
x = np.linspace(-10,10,100)
ax.set_xlim([-10,10])
ln = plt.Line2D(x, sigmoid(x))
ln2 = plt.axvline([0], ls= ':', color='k')
ln_left = plt.axvline([0], ls= ':', color='b')
ln_right = plt.axvline([0], ls= ':', color='r')
ax.add_line(ln)
plt.close(fig)
ax.set_xlabel('$x$')
ax.set_ylabel('$\sigma(wx + b)$')
def plot_fun(w, b):
ln.set_ydata(sigmoid(w*x+b))
if np.abs(w)>0.00001:
ln2.set_xdata(inv_sigmoid1D(w,b,0.5))
ln_left.set_xdata(inv_sigmoid1D(w,b,0.25))
ln_right.set_xdata(inv_sigmoid1D(w,b,0.75))
display(fig)
res = interact(plot_fun, w=(-3, 3, 0.1), b=(-10.0,10.0,0.1))
Verilen özniteliklerle etiketleri gözlemleme ihtimali:
$\begin{eqnarray} p(y_1, y_2, \dots, y_N|w, X ) &=& \left(\prod_{i : y_i=1} \sigma(x_i^\top w) \right) \left(\prod_{i : y_i=0}(1- \sigma(x_i^\top w)) \right) \end{eqnarray} $
$ \begin{eqnarray} {\cal L}(w) & = & \log p(y_1, y_2, \dots, y_N|w, x_1, x_2, \dots, x_N ) \\ & = & \sum_{i : y_i=1} \log \sigma(x_i^\top w) + \sum_{i : y_i=0} \log (1- \sigma(x_i^\top w)) \\ & = & \sum_{i : y_i=1} x_i^\top w - \sum_{i : y_i=1} \log(1+e^{x_i^\top w}) - \sum_{i : y_i=0}\log({1+e^{x_i^\top w}}) \\ & = & \sum_i y_i x_i^\top w - \sum_{i} \log(1+e^{x_i^\top w}) \\ & = & y^\top X w - \mathbf{1}^\top \text{logsumexp}(0, X w) \end{eqnarray} $
$ \begin{eqnarray} w^* & = & \arg\max_{w} {\cal L}(w) \end{eqnarray} $
$\mathbf{1}^T v = \sum_i v_i$.
$$ f = \text{logsumexp}(a, b) \equiv \log(e^a + e^b) $$$$ f_i = \text{logsumexp}(a_i, b_i) \equiv \log(e^{a_i} + e^{b_i}) $$En küçük kare hata yaklaşımından farklı olarak burada gerçekten bayır çıkışı kullanmamız gerkiyor.
Hinge (Menteşe) hata fonksyonu $$ h(x) = \left\{ \begin{array}{cc} 0 & x < 0 \\x & x \geq 0 \end{array} \right. $$
$$ f_\alpha(x) = \frac{1}{\alpha}\text{logsumexp}(0, \alpha x) $$$\alpha = 1$, orjinal logsumexp. Büyük $\alpha$: hinge
In [40]:
%matplotlib inline
import numpy as np
import matplotlib as mpl
import matplotlib.pylab as plt
def logsumexp(a,b):
m = np.max([a,b])
return m + np.log(np.exp(a-m) + np.exp(b-m))
def hinge(x):
return x if x>0 else 0
xx = np.arange(-5,3,0.1)
plt.figure(figsize=(12,10))
for i,alpha in enumerate([1,2,5,10]):
f = [logsumexp(0, alpha*z)/alpha for z in xx]
h = [hinge(z) for z in xx]
plt.subplot(2,2,i+1)
plt.plot(xx, f, 'r')
plt.plot(xx, h, 'k:')
plt.xlabel('z')
#plt.title('a = '+ str(alpha))
if alpha==1:
plt.legend([ 'logsumexp(0,z)','hinge(z)' ], loc=2 )
else:
plt.legend([ 'logsumexp(0,{a} z)/{a}'.format(a=alpha),'hinge(z)' ], loc=2 )
plt.show()
Negatif olabilirlik azaltmak - Hata düşürmek
$$ - \mathcal{L}(\pi) = - \sum_i l_i(w) $$$$ E_i(w) \equiv -l_i(w) = - y_i x_i^\top w + \text{logsumexp}(0, x_i^\top w) = - y_i z_i + \text{logsumexp}(0, z_i) $$
In [21]:
xx = np.arange(-10,10,0.1)
y = 1
f = [-y*z + logsumexp(0, z) for z in xx]
f0 = [logsumexp(0, z) for z in xx]
plt.figure(figsize=(12,5))
plt.subplot(1,2,1)
plt.plot(xx, f, 'r')
plt.xlabel('$z_i$')
plt.ylabel('$-l_i$')
plt.title('Pozitif örnekler için hata. $y = $'+str(y))
plt.subplot(1,2,2)
plt.plot(xx, f0, 'r')
plt.xlabel('$z_i$')
plt.ylabel('$-l_i$')
plt.title('Negatif örnekler için hata. $y = 0$')
plt.show()
Sentetik veri kümesi üzerinde gösterim
In [47]:
%matplotlib inline
import numpy as np
import matplotlib as mpl
import matplotlib.pylab as plt
# Generate a random logistic regression problem
def sigmoid(t):
return np.exp(t)/(1+np.exp(t))
def generate_toy_dataset(number_of_features=3, number_of_datapoints=20, styles = ['ob', 'xr']):
D = number_of_features
N = number_of_datapoints
# Some random features
X = 2*np.random.rand(N,D)-1
X[:,0] = 1
# Generate a random paramater vector
w_true = np.random.randn(D,1)
# Generate class labels
pi = sigmoid(np.dot(X, w_true))
y = np.array([1 if u else 0 for u in np.random.rand(N,1) < pi]).reshape((N))
return X, y, w_true, D, N
In [48]:
styles = ['ob', 'xr']
X, y, w_true, D, N = generate_toy_dataset(number_of_features=3, number_of_datapoints=20, styles=styles)
xl = -1.5; xr = 1.5; yl = -1.5; yr = 1.5
fig = plt.figure(figsize=(5,5))
plt.plot(X[y==1,1],X[y==1,2],styles[1])
plt.plot(X[y==0,1],X[y==0,2],styles[0])
ax = fig.gca()
ax.set_ylim([yl, yr])
ax.set_xlim([xl, xr])
plt.show()
In [49]:
# Implement Gradient Descent
w = np.random.randn(D)
# Learnig rate
eta = 0.05
W = []
MAX_ITER = 200
for epoch in range(MAX_ITER):
W.append(w)
dL = np.dot(X.T, y-sigmoid(np.dot(X,w)))
w = w + eta*dL
In [50]:
xl = -1.5
xr = 1.5
yl = -1.5
yr = 1.5
fig = plt.figure(figsize=(5,5))
ax = fig.gca()
ax.set_ylim([yl, yr])
ax.set_xlim([xl, xr])
plt.plot(X[y==1,1],X[y==1,2],styles[1])
plt.plot(X[y==0,1],X[y==0,2],styles[0])
ln = plt.Line2D([],[],color='k')
ln_left = plt.Line2D([],[],ls= ':', color=styles[0][1])
ln_right = plt.Line2D([],[],ls= ':', color=styles[1][1])
ax.add_line(ln)
ax.add_line(ln_left)
ax.add_line(ln_right)
plt.close(fig)
ax.set_xlabel('$x_1$')
ax.set_ylabel('$x_2$')
ax.set_xticks(np.arange(xl,xr))
ax.set_yticks(np.arange(yl,yr))
ax.grid(True)
def plot_boundry(w0,w1,w2):
if w1 != 0:
xa = -(w0+w2*yl)/w1
xb = -(w0+w2*yr)/w1
ln.set_xdata([xa, xb])
ln.set_ydata([yl, yr])
xa = -(-inv_sigmoid(0.25) + w0+w2*yl)/w1
xb = -(-inv_sigmoid(0.25) + w0+w2*yr)/w1
ln_left.set_xdata([xa, xb])
ln_left.set_ydata([yl, yr])
xa = -(-inv_sigmoid(0.75) + w0+w2*yl)/w1
xb = -(-inv_sigmoid(0.75) + w0+w2*yr)/w1
ln_right.set_xdata([xa, xb])
ln_right.set_ydata([yl, yr])
elif w2!=0:
ya = -(w0+w1*xl)/w2
yb = -(w0+w1*xr)/w2
ln.set_xdata([xl, xr])
ln.set_ydata([ya, yb])
ya = -(-inv_sigmoid(0.25) + w0+w1*xl)/w2
yb = -(-inv_sigmoid(0.25) + w0+w1*xr)/w2
ln_left.set_xdata([xl, xr])
ln_left.set_ydata([ya, yb])
ya = -(-inv_sigmoid(0.75) + w0+w1*xl)/w2
yb = -(-inv_sigmoid(0.75) + w0+w1*xr)/w2
ln_right.set_xdata([xl, xr])
ln_right.set_ydata([ya, yb])
else:
ln.set_xdata([])
ln.set_ydata([])
display(fig)
def plot_boundry_of_weight(iteration=0):
i = iteration
w = W[i]
plot_boundry(w[0],w[1],w[2])
interact(plot_boundry_of_weight, iteration=(0,len(W)-1))
Out[50]: