Load Numpy


In [87]:
import numpy as np

In [88]:
np.version.full_version


Out[88]:
'1.13.1'

In [89]:
%pwd


Out[89]:
u'/workshop/matrix/mProto/ml-workshop/src'

Basic Array Operations


In [90]:
a = np.array([0,1, 2, 3, 4, 5])
# a = np.array(range(0,6), dtype=np.int32)
a


Out[90]:
array([0, 1, 2, 3, 4, 5])

In [91]:
a.ndim


Out[91]:
1

In [92]:
a.shape


Out[92]:
(6,)

In [93]:
a.dtype


Out[93]:
dtype('int64')

In [94]:
b = a.astype(np.float32)
print a.dtype
print b.dtype


int64
float32

Reshape


In [95]:
b = a.reshape((3,2))
print b
print b.ndim
print b.shape


[[0 1]
 [2 3]
 [4 5]]
2
(3, 2)

Manipulate Elements


In [96]:
b[1][0] = 77
print b


[[ 0  1]
 [77  3]
 [ 4  5]]

In [97]:
print a


[ 0  1 77  3  4  5]

In [98]:
c = a.reshape((3, 2)).copy()
print c
c[0][0] = -99
print c


[[ 0  1]
 [77  3]
 [ 4  5]]
[[-99   1]
 [ 77   3]
 [  4   5]]

In [99]:
print a


[ 0  1 77  3  4  5]

Element-wise Mathematics


In [100]:
d = a * 2
print d


[  0   2 154   6   8  10]

In [101]:
e = a ** 2
print e


[   0    1 5929    9   16   25]

In [102]:
print [1,2,3,4,5] * 2


[1, 2, 3, 4, 5, 1, 2, 3, 4, 5]

In [103]:
# print [1,2,3,4,5] ** 2

Indexing & Conditional Indexing


In [104]:
b = a[0:2]
b


Out[104]:
array([0, 1])

In [105]:
print a[[2,3,4]]


[77  3  4]

In [106]:
print a > 4


[False False  True False False  True]

In [107]:
print a[a > 4]


[77  5]

In [108]:
idx = np.where(a > 4)
print idx


(array([2, 5]),)

In [109]:
print a[~(a > 4)]


[0 1 3 4]

In [110]:
sum(a > 4)


Out[110]:
2

In [111]:
b = a.reshape((3,2))
np.where(b > 4)


Out[111]:
(array([1, 2]), array([0, 1]))

Descriptive Statistics First-Time


In [112]:
from scipy.stats import mode

a = np.array([3, 2, 3, 4, 5, 1, 0, 3, 7, 3])

print np.mean(a)
print np.median(a)
print mode(a)[0], mode(a)[1]
print np.std(a)
print np.var(a)
print np.ptp(a)


3.1
3.0
[3] [4]
1.86815416923
3.49
7

For mode function, if there is more than mode value, only the first is returned.


In [113]:
m = np.array([0,1, 77, 3, 4, 5, 3, 4])
mode(m)


Out[113]:
ModeResult(mode=array([3]), count=array([2]))

Performance


In [114]:
%timeit sum([x * x for x in xrange(1000)])
%timeit na = np.arange(1000); sum(na * na)
%timeit na = np.arange(1000); na.dot(na)


The slowest run took 4.46 times longer than the fastest. This could mean that an intermediate result is being cached.
10000 loops, best of 3: 45 µs per loop
10000 loops, best of 3: 72.1 µs per loop
The slowest run took 7.52 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 2.66 µs per loop