Provides an efficient way to store and manipulate arrays. Numpy is all about VECTORIZATION. Mental model is different than regular python and works with:
In [ ]:
In [5]:
import random
class RandomWalker(object):
def __init__(self):
self.position = 0
def walk(self, n):
self.position = 0
for i in range(n):
yield self.position
self.position += 2*random.randint(0,1) - 1
In [10]:
%%%timeit
walker = RandomWalker()
walk = [position for position in walker.walk(10000)]
In [12]:
def random_walk_f(n):
position = 0
walk = [position]
for i in range(n):
position = 2 * random.randint(0,1) - 1
walk.append(position)
return walk
In [13]:
%%%timeit
walk = random_walk_f(10000)
small improvement in time
In [14]:
from itertools import accumulate
def random_walker_v(n):
steps = random.sample([1, -1] * n, n)
return list(accumulate(steps))
In [16]:
%%%timeit
walk = random_walker_v(10000)
WOW 2x as fast
In [17]:
import numpy as np
def random_walker_np(n):
steps = 2 * np.random.randint(0, 2, size=n) - 1
return np.cumsum(steps)
In [18]:
%%%timeit
walk = random_walker_np(10000)
Clobber the namespace so we dont have to np.
In [1]:
import numpy as np
Create an np array. You can pass any type of python seq: list, tuples, etc
In [25]:
a = np.array([0,1,2,3,4,5])
In [26]:
a
Out[26]:
In [3]:
m = np.array([[1,2,3], [4,5,6]])
In [5]:
m.shape
Out[5]:
In [ ]:
In [ ]:
In [27]:
ad = a.data
list(ad)
Out[27]:
In [28]:
# what type is a
type(a)
Out[28]:
In [29]:
# what is the numerica type of the elements in the array
a.dtype
Out[29]:
In [30]:
# What shape (dimensions) is the array
a.shape
Out[30]:
In [33]:
# Bytes per element. 32bit integers should be 4 bytes
a.itemsize
Out[33]:
In [34]:
# Total size in bytes of the array
a.nbytes
Out[34]:
In [35]:
# Beware of type coercion
# a holds dtypes int32
print(a)
a[0] = 10.38383
print(a)
In [37]:
x = np.array([0,1,1.5,3])
y = np.array([1,2,3,1])
In [7]:
# Element wise addition
In [8]:
# Element wise subtraction
In [ ]:
In [43]:
%%%timeit
dy = y[1:] - y[:-1]
%%capture
In [40]:
%%capture timeit_result
%timeit python_list1 = range(1,1000)
%timeit python_list2 = np.arange(1,1000)
In [41]:
print(timeit_result)
In [19]:
data_set = random.random((2,3))
print(data_set)
In [ ]:
In [18]:
# example of namespace....cant access np.max and builtin max is being used
In [ ]:
In [17]:
max(data_set[0])
Out[17]: