Sveučilište u Zagrebu
Fakultet elektrotehnike i računarstva
http://www.fer.unizg.hr/predmet/su
Ak. god. 2015./2016.
(c) 2015 Jan Šnajder
Verzija: 0.5 (2015-10-15)
NEPOTPUNO
Glavni paketi (core packages):
Dodatni paketi:
Ćelije se evaluiraju sa SHIFT+ENTER
Markdown tekst s posebnim formatiranjem i kodom u $\LaTeX$-u: $f(\mathbf{x}) = \sum_{i=1}^n \ln \frac{P(x)P(y)}{P(x, y)}$
In [1]:
10
Out[1]:
In [2]:
_
Out[2]:
In [3]:
Out[3]:
In [4]:
Out[4]:
In [5]:
?
In [6]:
%quickref
In [7]:
x = 5
In [8]:
x
Out[8]:
In [9]:
print(x)
In [10]:
print x
In [11]:
type(x)
Out[11]:
In [12]:
(x + 1) ** 2
Out[12]:
In [13]:
x += 1; x
Out[13]:
In [14]:
?x
In [15]:
del x
In [16]:
x
In [ ]:
X=7; varijabla_s_vrlo_dugackim_imenom = 747
In [ ]:
x=1; y=-2
In [ ]:
x==y
In [ ]:
(x==y)==False
In [ ]:
x!=y
In [ ]:
x==y or (x>0 and not y>0)
In [ ]:
z = 42 if x==y else 66
In [ ]:
z
In [ ]:
moj_string = 'ringe ringe'
In [ ]:
'hopa' + ' ' + "cupa"
In [ ]:
moj_string += ' raja'; moj_string
In [ ]:
len(moj_string)
In [ ]:
print "X=%0.2f y=%d, s='%s'" % (x, y, moj_string)
In [ ]:
1/2
In [ ]:
1/2.0
In [ ]:
1/float(2)
In [ ]:
round(0.5)
In [ ]:
import math
In [ ]:
math.sqrt(68)
In [ ]:
math.exp(1)
In [ ]:
math.log(_)
In [ ]:
math.log(100, 2)
In [ ]:
xs = [5, 6, 2, 3] # Stvara listu
In [ ]:
xs
In [ ]:
xs[0] # Zero-based indeksiranje
In [ ]:
xs[-1] # Negativni indeksi broje od kraja liste
In [ ]:
xs[1] = 100 # Ažuriranje liste
xs
In [ ]:
xs[1] = 'foo' # Liste mogu biti heterogene
xs
In [ ]:
xs[3] = [1,2]
xs
In [ ]:
xs.append(x) # Dodaje na kraj
xs
In [ ]:
xs + [77, 88]
In [ ]:
xs.extend([77, 88]); xs
In [ ]:
xs.pop() # Skida zadnji element liste
In [ ]:
xs
In [ ]:
xs[0:2]
In [ ]:
xs[1:]
In [ ]:
xs[:3]
In [ ]:
xs[:]
In [ ]:
xs[:-2] # Sve osim zadnja dva
In [ ]:
xs[0:2] = [1,2]
xs
In [ ]:
range(10)
In [ ]:
range(1, 10)
In [ ]:
range(0, 51, 5)
In [ ]:
for x in range(5):
print x
In [ ]:
for x in xs: print x
In [ ]:
for ix, x in enumerate(range(0, 51, 5)):
print ix, x
In [ ]:
xs = []
for x in range(10):
xs.append(x ** 2)
xs
In [ ]:
[x ** 2 for x in range(10)]
In [ ]:
[x ** 2 for x in range(10) if x % 2 == 0]
In [ ]:
[(x, x ** 2) for x in range(10)]
In [ ]:
zip([1, 2, 3], [4, 5, 6])
In [ ]:
zip(*[(1, 4), (2, 5), (3, 6)])
In [ ]:
xs, ys = zip(*[(1, 4), (2, 5), (3, 6)])
In [ ]:
xs
In [ ]:
map(lambda x : x + 1, xs)
In [ ]:
[ x + 1 for x in xs ]
In [ ]:
ys = []
for x in xs :
ys.append(x + 1)
ys
In [ ]:
sum(ys)
In [17]:
d = {'zagreb' : 790017, 'split' : 178102, 'rijeka' : 128624}
In [18]:
d['split']
Out[18]:
In [19]:
d['osijek']
In [20]:
d.get('osijek', 0)
Out[20]:
In [21]:
d['osijek'] = 108048; d
Out[21]:
In [22]:
'rijeka' in d
Out[22]:
In [23]:
d['zagreb'] = 790200; d
Out[23]:
In [24]:
del d['rijeka']; d
Out[24]:
Iteriranje po rječniku:
In [25]:
for grad in d:
print 'Grad %s ima %d stanovnika' % (grad, d[grad])
Iteriranje po ključevima i po vrijednostima:
In [26]:
for grad, stanovnici in d.iteritems():
print 'Grad %s ima %d stanovnika' % (grad, stanovnici)
Ugniježđeni rječnici:
In [27]:
d2 = {'zagreb' : {'trešnjevka' : 120240, 'centar' : 145302}}
d2 ['zagreb']['trešnjevka']
Out[27]:
In [28]:
def inc(x): return x + 1
In [29]:
def sign(x):
if x > 0:
return 'pozitivno'
elif x < 0:
return 'negativno'
else:
return 'nula'
for x in [-1, 0, 1]:
print sign(x)
Podrazumijevani argumenti:
In [30]:
def broj_stanovnika(grad, godina=2015):
if grad in d:
return d[grad] + round((godina - 2015) * 10000 * (-1.2))
else:
raise ValueError('Nepoznat neki grad')
In [31]:
broj_stanovnika('zagreb')
Out[31]:
In [32]:
broj_stanovnika('zagreb', godina=2020)
Out[32]:
In [33]:
broj_stanovnika('zadar')
In [34]:
class RegistarStanovnika:
# Konstruktor
def __init__(self, drzava, d):
self.drzava = drzava # Varijabla instance (drugačija za svaku instancu)
self.d = d
prirast = -1.2 # Varijabla klase (dijele ju sve instance)
# Metoda
def broj_stanovnika(self, grad, godina=2015):
if grad in self.d:
return self.d[grad] + round((godina - 2015) * 10000 * self.prirast)
else:
raise ValueError('Nepoznat neki grad')
def ukupno_stanovnika(self):
return sum(self.d.values())
In [35]:
reg = RegistarStanovnika('Hrvatska', {'zagreb' : 790017, 'split' : 178102, 'rijeka' : 128624})
In [36]:
reg.broj_stanovnika('split')
Out[36]:
In [37]:
reg.ukupno_stanovnika()
Out[37]:
In [1]:
import numpy as np
In [39]:
?np
In [40]:
np.__version__
Out[40]:
Jednodimenzijsko polje (polje ranga 1):
In [2]:
a = np.array([1, 2, 3])
In [42]:
a
Out[42]:
In [43]:
print a
In [44]:
type(a)
Out[44]:
In [45]:
a = np.array([1, 2, 3], dtype=np.float64)
In [46]:
a
Out[46]:
In [47]:
a[0]
Out[47]:
In [48]:
a[0] = 100; a
Out[48]:
In [49]:
a.shape
Out[49]:
In [50]:
len(a)
Out[50]:
In [51]:
np.array([1,'a',2])
Out[51]:
Matrica (dvodimenzijsko polje, polje ranga 2):
In [52]:
m = np.array([[1,2,3],[4,5,6]])
In [53]:
print m
In [54]:
m[1]
Out[54]:
In [55]:
m[1,1]
Out[55]:
In [56]:
m[1][1]
Out[56]:
In [57]:
m.shape
Out[57]:
In [58]:
m2 = np.array([[1,2,3],[4,5]])
In [59]:
print m2
Izrezivanje (engl. slicing):
In [60]:
print m
In [61]:
m[:,1]
Out[61]:
In [62]:
m[0,1:3]
Out[62]:
In [63]:
m[1,:2] = [77, 78]
In [64]:
m
Out[64]:
Uočiti razliku:
In [65]:
m[:,0] # daje polje ranga 1
Out[65]:
In [66]:
m[:,0:1] # daje polje ranga 2
Out[66]:
Trodimenzijsko polje (tenzor ranga 3):
In [67]:
t = np.array([[[1,2],[3,4]],[[4,5],[6,7]]])
In [68]:
t.shape
Out[68]:
In [69]:
t[0,1,1]
Out[69]:
In [70]:
t[0]
Out[70]:
In [71]:
t[0,:,1]
Out[71]:
In [6]:
np.zeros((5,5))
Out[6]:
In [7]:
np.ones((3,1))
Out[7]:
In [10]:
np.full((5,5), 55)
Out[10]:
In [11]:
np.eye(6)
Out[11]:
In [14]:
np.random.random((4,4))
Out[14]:
In [18]:
np.arange(1, 10)
Out[18]:
In [19]:
np.arange(1, 10, 2)
Out[19]:
In [20]:
np.linspace(1, 10, 5)
Out[20]:
In [23]:
np.linspace(1, 10)
Out[23]:
Indeksiranje poljem brojeva:
In [26]:
a = np.array([[1,2], [3, 4], [5, 6]]); a
Out[26]:
In [28]:
a[0,1]
Out[28]:
In [29]:
a[[0,2]] # Nije isto kao a[0,2] !
Out[29]:
In [30]:
a[[0,1,2], [0,1,0]] # Isto kao: np.array([a[0,0], a[1,1], a[2,0]])
Out[30]:
Indeksiranje Booleovim poljem:
In [31]:
a
Out[31]:
In [32]:
bool_ix = a > 2
bool_ix
Out[32]:
In [33]:
a[bool_ix]
Out[33]:
In [34]:
a[a > 2]
Out[34]:
Širenje (eng. broadcasting):
In [35]:
x = np.array([[1, 2], [3, 4]])
v = np.array([1, 2])
In [37]:
print x
In [89]:
x + v
Out[89]:
In [90]:
np.ones((2,2,3)) * 5
Out[90]:
Naslagivanje (engl. stacking):
In [38]:
v
Out[38]:
In [40]:
np.vstack([v, v])
Out[40]:
In [41]:
np.vstack([x, x])
Out[41]:
In [42]:
np.vstack((v, x))
Out[42]:
In [43]:
np.hstack((v, v))
Out[43]:
In [44]:
np.hstack((x, x))
Out[44]:
In [45]:
np.hstack((v, x))
In [46]:
np.column_stack((v, x))
Out[46]:
In [47]:
x
Out[47]:
In [98]:
np.dstack((x, x))
Out[98]:
In [99]:
np.shape(_)
Out[99]:
Preoblikovanje polja:
In [50]:
m = np.array([[ 1, 2, 3], [77, 78, 6]])
m.reshape(3, 2)
Out[50]:
In [51]:
x = np.array([[1,2],[3,4]])
y = np.array([[5,6],[7,8]])
In [102]:
print x; print y
Operacije "po elementima" (element-wise):
In [53]:
x + y
Out[53]:
In [54]:
x - y
Out[54]:
In [55]:
x / 2.0
Out[55]:
In [56]:
x.dtype
Out[56]:
In [57]:
(x/2.0).dtype
Out[57]:
In [58]:
x * y
Out[58]:
In [59]:
x.dtype='float64'
y.dtype='float64'
In [60]:
x / y
Out[60]:
In [111]:
np.sqrt(x)
Out[111]:
Vektorske/matrične operacije:
In [61]:
x = np.array([[1,2],[3,4]])
y = np.array([[5,6],[7,8]])
v = np.array([1,2])
w = np.array([5,3])
Skalarni (unutarnji, dot) umnožak vektora: $ \begin{pmatrix} 1 & 2 \\ \end{pmatrix} \cdot \begin{pmatrix} 5\\ 3\\ \end{pmatrix} = 11 $
In [113]:
print v.dot(w)
print w.dot(v)
print np.dot(v, w)
In [62]:
x.dot(v)
Out[62]:
In [63]:
np.dot(x, v)
Out[63]:
In [120]:
v.dot(x)
Out[120]:
In [119]:
np.dot(v,x)
Out[119]:
Primijetite da nema razlike između vektor-stupca i vektor-retka.
In [122]:
x.dot(y)
Out[122]:
In [121]:
np.dot(x, y)
Out[121]:
In [64]:
np.outer(v, w)
Out[64]:
Ostale operacije:
In [65]:
x = np.array([0, 2, 4, 1])
In [66]:
np.max(x)
Out[66]:
In [67]:
np.argmax(x)
Out[67]:
In [76]:
x = np.random.random(10); x
Out[76]:
In [69]:
np.mean(x)
Out[69]:
In [70]:
np.median(x)
Out[70]:
In [71]:
np.var(x)
Out[71]:
In [72]:
np.std(x)
Out[72]:
In [73]:
x = np.array([1, 2, np.nan])
np.mean(x)
Out[73]:
In [74]:
np.nanmean(x)
Out[74]:
In [77]:
np.ptp(x)
Out[77]:
In [85]:
X = np.array([[1,2],[3,4]])
print X
In [79]:
np.mean(X)
Out[79]:
In [83]:
np.mean(X, axis=0)
Out[83]:
In [86]:
np.cov(X)
Out[86]:
In [87]:
x = np.random.random(10000); x
Out[87]:
In [88]:
np.histogram(x)
Out[88]:
In [89]:
x = np.array([[1,2],[3,4]]); x
Out[89]:
In [90]:
np.sum(x)
Out[90]:
In [143]:
np.sum(x, axis=0)
Out[143]:
In [144]:
np.sum(x, axis=1)
Out[144]:
In [91]:
x.T
Out[91]:
In [92]:
v
Out[92]:
In [93]:
v.T
Out[93]:
In [94]:
x.diagonal()
Out[94]:
In [95]:
x.trace() # == x.sum(x.diagonal())
Out[95]:
Aplikacija funkcije na polje:
In [97]:
x
Out[97]:
In [99]:
np.apply_along_axis(sum, 1, x)
Out[99]:
In [100]:
np.apply_along_axis(len, 1, x)
Out[100]:
Većina ugrađenih funkcija su vektorizirane, tj. moguće ih je primijeniti na cijelo polje tako da provode operaciju nad pojedinačnim elementima polja. Npr.:
In [101]:
np.sign(x)
Out[101]:
In [102]:
np.log(x)
Out[102]:
Isto vrijedi i za korisnički definirane funkcije koje su definirane pomoći vektoriziranih ugrađenih funkcija:
In [103]:
def inc(x) : return x + 1
In [104]:
inc(x)
Out[104]:
Složenije funkcije treba eksplicitno vektorizirati pomoću numpy.vectorize
(ili jednostavno aplicirati funkciju u for
petlji, što funkcija vectorize
zapravo i radi).
Permutacije:
In [105]:
x = np.arange(0,10); x
Out[105]:
In [108]:
np.random.permutation(x)
Out[108]:
In [109]:
x
Out[109]:
In [110]:
np.random.shuffle(x); x
Out[110]:
In [111]:
x
Out[111]:
In [112]:
l = [1, 2, 3]
a = np.array(l); a
Out[112]:
In [113]:
list(a)
Out[113]:
In [114]:
a.tolist()
Out[114]:
In [115]:
l = [[1, 2, 3], [4,5,6]]
a = np.array(l); a
Out[115]:
In [116]:
list(a)
Out[116]:
In [117]:
a.tolist()
Out[117]:
In [118]:
import scipy as sp
In [119]:
sp.__version__
Out[119]:
SciPy importa NumPy. Npr.:
In [166]:
x = sp.array([1,2,3])
Iz biblioteke SciPy interesantni su nam moduli scipy.linalg
i scipy.stats
.
In [122]:
from scipy import linalg
Inverz matrice:
In [123]:
y
Out[123]:
In [124]:
y_inv = linalg.inv(y); y_inv
Out[124]:
In [125]:
sp.dot(y, y_inv)
Out[125]:
Determinanta:
In [126]:
linalg.det(y)
Out[126]:
Euklidska norma ($l_2$-norma) vektora: $\|\mathbf{x}\|_2 = \sqrt{\sum_i x_i^2}$
In [127]:
w
Out[127]:
In [128]:
linalg.norm(w)
Out[128]:
Općenita $p$-norma: $\|\mathbf{x}\|_p = \big(\sum_i |x_i|^p\big)^{1/p}$
In [130]:
linalg.norm(w, ord=1)
Out[130]:
In [131]:
linalg.norm(w, ord=sp.inf)
Out[131]:
In [132]:
from scipy import stats
In [133]:
stats.norm
Out[133]:
In [136]:
stats.norm.pdf(0)
Out[136]:
In [137]:
xs = sp.linspace(-2, 2, 10);
In [138]:
stats.norm.pdf(xs)
Out[138]:
In [139]:
stats.norm.pdf(xs, loc=1, scale=2)
Out[139]:
Uzorkovanje iz normalne distribucije:
In [140]:
stats.norm.rvs(loc=1, scale=2, size=10)
Out[140]:
"Zamrzavanje" distribucije:
In [141]:
normal = stats.norm(1, 2)
In [142]:
normal.pdf(xs)
Out[142]:
In [144]:
normal.rvs(size=5)
Out[144]:
Multivarijatna Gaussova distribucija:
In [145]:
?stats.multivariate_normal
In [146]:
mean = sp.array([1.0, 3.0])
cov = sp.array([[2.0, 0.3], [0.5, 0.7]])
mnormal = stats.multivariate_normal(mean, cov)
In [148]:
mnormal.pdf([1, 0])
Out[148]:
In [149]:
np.random.seed(42) # Radi reproducibilnosti rezultata
mnormal.rvs(size=5)
Out[149]:
Koeficijent korelacije:
In [150]:
x, y = np.random.random((2, 10))
In [152]:
y
Out[152]:
In [153]:
stats.pearsonr(x, y)
Out[153]:
matplotlib
sadrži više modula: pyplot
, image
, matplot3d
, ...
In [160]:
import matplotlib.pyplot as plt
import matplotlib
In [161]:
matplotlib.__version__
Out[161]:
In [162]:
%pylab inline
pylab
kombinira pyplot
i numpy
. Gornja naredba (ipython magic) osigurava da pplotovi budu renderirani direktno u bilježnicu, umjesto da otvoaraju zaseban prozor.
In [165]:
plt.plot([1,2,3,4,5], [4,5,5,7,3])
plt.show()
In [262]:
plt.plot([4,5,5,7,3]);
In [263]:
plt.plot([4,5,5,7,3], 'ro');
In [166]:
def f(x) : return x**2
In [167]:
xs = linspace(0,100); xs
Out[167]:
In [266]:
f(xs)
Out[266]:
In [168]:
plt.plot(xs, f(xs));
In [268]:
plt.plot(xs, f(xs), 'bo');
In [269]:
plt.plot(xs, f(xs), 'r+');
In [169]:
plt.plot(xs, 1 - f(xs), 'b', xs, f(xs)/2 - 1000, 'r--');
In [170]:
plt.plot(xs, f(xs), label='f(x)')
plt.plot(xs, 1 - f(xs), label='1-f(x)')
plt.legend()
plt.show()
In [171]:
xs = linspace(-5,5)
plt.plot(xs, stats.norm.pdf(xs), 'g--');
plt.plot(xs, stats.norm.pdf(xs, loc=1, scale=2), 'r', linewidth=3);
In [173]:
plt.scatter([0, 1, 2, 0], [4, 5, 2, 1])
plt.show()
In [274]:
plt.scatter([0,1,2,0], [4, 5, 2, 1], s=200, marker='s');
In [174]:
np.random.random(10)
Out[174]:
In [175]:
for c in 'rgb':
plt.scatter(sp.random.random(100), sp.random.random(100), s=200, alpha=0.5, marker='o', c=c)
In [178]:
x = np.linspace(1,5,5); x
Out[178]:
In [179]:
X, Y = np.meshgrid(x, x)
In [180]:
X
Out[180]:
In [181]:
Y
Out[181]:
In [182]:
Z = 10 * X + Y
Z
Out[182]:
In [183]:
plt.pcolormesh(X, Y, Z, cmap='gray')
plt.show()
In [184]:
mnormal = stats.multivariate_normal([0, 1], [[1, 1], [0.2, 3]])
In [185]:
mnormal.pdf([1,1])
Out[185]:
In [186]:
x = np.linspace(-1, 1)
y = np.linspace(-2, 2)
X, Y = np.meshgrid(x, y)
In [187]:
shape(X)
Out[187]:
In [188]:
In [189]:
shape(XY)
Out[189]:
In [190]:
mnormal.pdf(XY)
Out[190]:
In [191]:
plt.pcolormesh(X, Y, mnormal.pdf(XY))
plt.show()
In [291]:
plt.contourf(X, Y, mnormal.pdf(XY));
In [292]:
plt.contourf(X, Y, mnormal.pdf(XY), levels=[0,0.06, 0.07]);
In [193]:
plt.contour(X, Y, mnormal.pdf(XY));
In [194]:
x = linspace(-10,10)
X, Y = np.meshgrid(x, x)
Z = X*3 + Y
In [195]:
plt.contour(X, Y, Z);
In [296]:
plt.contour(X, Y, Z, levels=[0]);
Kombinacija više grafikona:
In [297]:
plt.contour(X, Y, Z, levels=[0])
plt.scatter([-5,-3,2,5], [4, 5, 2, 1])
plt.show()
In [298]:
np.random.seed(42)
x = stats.norm.rvs(size=1000)
In [299]:
plt.hist(x);
Više-manje istovjetno s:
In [300]:
hist, bins = np.histogram(x)
centers = (bins[:-1] + bins[1:]) / 2
plt.bar(centers, hist);
TODO
In [301]:
import pandas as pd
pd.__version__
Out[301]:
TODO
In [302]:
import sklearn
sklearn.__version__
Out[302]:
TODO