In [1]:
from __future__ import division
from numpy.random import randn
from pandas import Series
import numpy as np
np.set_printoptions(precision=4)
import sys; sys.path.append('book_scripts')
%cd book_scripts
In [2]:
ints = np.ones(10, dtype=np.uint16)
floats = np.ones(10, dtype=np.float32)
np.issubdtype(ints.dtype, np.integer)
np.issubdtype(floats.dtype, np.floating)
Out[2]:
In [3]:
np.float64.mro()
Out[3]:
In [4]:
arr = np.arange(8)
arr
arr.reshape((4, 2))
Out[4]:
In [5]:
arr.reshape((4, 2)).reshape((2, 4))
Out[5]:
In [6]:
arr = np.arange(15)
arr.reshape((5, -1))
Out[6]:
In [7]:
other_arr = np.ones((3, 5))
other_arr.shape
arr.reshape(other_arr.shape)
Out[7]:
In [8]:
arr = np.arange(15).reshape((5, 3))
arr
arr.ravel()
Out[8]:
In [9]:
arr.flatten()
Out[9]:
In [10]:
arr = np.arange(12).reshape((3, 4))
arr
arr.ravel()
arr.ravel('F')
Out[10]:
In [11]:
arr1 = np.array([[1, 2, 3], [4, 5, 6]])
arr2 = np.array([[7, 8, 9], [10, 11, 12]])
np.concatenate([arr1, arr2], axis=0)
np.concatenate([arr1, arr2], axis=1)
Out[11]:
In [12]:
np.vstack((arr1, arr2))
np.hstack((arr1, arr2))
Out[12]:
In [13]:
from numpy.random import randn
arr = randn(5, 2)
arr
first, second, third = np.split(arr, [1, 3])
first
second
third
Out[13]:
In [14]:
arr = np.arange(6)
arr1 = arr.reshape((3, 2))
arr2 = randn(3, 2)
np.r_[arr1, arr2]
np.c_[np.r_[arr1, arr2], arr]
Out[14]:
In [15]:
np.c_[1:6, -10:-5]
Out[15]:
In [16]:
arr = np.arange(3)
arr.repeat(3)
Out[16]:
In [17]:
arr.repeat([2, 3, 4])
Out[17]:
In [18]:
arr = randn(2, 2)
arr
arr.repeat(2, axis=0)
Out[18]:
In [19]:
arr.repeat([2, 3], axis=0)
arr.repeat([2, 3], axis=1)
Out[19]:
In [20]:
arr
np.tile(arr, 2)
Out[20]:
In [21]:
arr
np.tile(arr, (2, 1))
np.tile(arr, (3, 2))
Out[21]:
In [22]:
arr = np.arange(10) * 100
inds = [7, 1, 2, 6]
arr[inds]
Out[22]:
In [23]:
arr.take(inds)
arr.put(inds, 42)
arr
arr.put(inds, [40, 41, 42, 43])
arr
Out[23]:
In [24]:
inds = [2, 0, 2, 1]
arr = randn(2, 4)
arr
arr.take(inds, axis=1)
Out[24]:
In [25]:
arr = np.arange(5)
arr
arr * 4
Out[25]:
In [26]:
arr = randn(4, 3)
arr.mean(0)
demeaned = arr - arr.mean(0)
demeaned
demeaned.mean(0)
Out[26]:
In [27]:
arr
row_means = arr.mean(1)
row_means.reshape((4, 1))
demeaned = arr - row_means.reshape((4, 1))
demeaned.mean(1)
Out[27]:
In [28]:
arr - arr.mean(1)
In [29]:
arr - arr.mean(1).reshape((4, 1))
Out[29]:
In [30]:
arr = np.zeros((4, 4))
arr_3d = arr[:, np.newaxis, :]
arr_3d.shape
Out[30]:
In [31]:
arr_1d = np.random.normal(size=3)
arr_1d[:, np.newaxis]
arr_1d[np.newaxis, :]
Out[31]:
In [32]:
arr = randn(3, 4, 5)
depth_means = arr.mean(2)
depth_means
demeaned = arr - depth_means[:, :, np.newaxis]
demeaned.mean(2)
Out[32]:
In [34]:
def demean_axis(arr, axis=0):
means = arr.mean(axis)
# This generalized things like [:, :, np.newaxis] to N dimensions
indexer = [slice(None)] * arr.ndim
indexer[axis] = np.newaxis
return arr - means[indexer]
In [35]:
arr = np.zeros((4, 3))
arr[:] = 5
arr
Out[35]:
In [36]:
col = np.array([1.28, -0.42, 0.44, 1.6])
arr[:] = col[:, np.newaxis]
arr
arr[:2] = [[-1.37], [0.509]]
arr
Out[36]:
In [37]:
arr = np.arange(10)
np.add.reduce(arr)
arr.sum()
Out[37]:
In [38]:
np.random.seed(12346)
In [39]:
arr = randn(5, 5)
arr[::2].sort(1) # sort a few rows
arr[:, :-1] < arr[:, 1:]
np.logical_and.reduce(arr[:, :-1] < arr[:, 1:], axis=1)
Out[39]:
In [40]:
arr = np.arange(15).reshape((3, 5))
np.add.accumulate(arr, axis=1)
Out[40]:
In [41]:
arr = np.arange(3).repeat([1, 2, 2])
arr
np.multiply.outer(arr, np.arange(5))
Out[41]:
In [42]:
result = np.subtract.outer(randn(3, 4), randn(5))
result.shape
Out[42]:
In [43]:
arr = np.arange(10)
np.add.reduceat(arr, [0, 5, 8])
Out[43]:
In [44]:
arr = np.multiply.outer(np.arange(4), np.arange(5))
arr
np.add.reduceat(arr, [0, 2, 4], axis=1)
Out[44]:
In [45]:
def add_elements(x, y):
return x + y
add_them = np.frompyfunc(add_elements, 2, 1)
add_them(np.arange(8), np.arange(8))
Out[45]:
In [46]:
add_them = np.vectorize(add_elements, otypes=[np.float64])
add_them(np.arange(8), np.arange(8))
Out[46]:
In [47]:
arr = randn(10000)
%timeit add_them(arr, arr)
%timeit np.add(arr, arr)
In [48]:
dtype = [('x', np.float64), ('y', np.int32)]
sarr = np.array([(1.5, 6), (np.pi, -2)], dtype=dtype)
sarr
Out[48]:
In [49]:
sarr[0]
sarr[0]['y']
Out[49]:
In [50]:
sarr['x']
Out[50]:
In [51]:
dtype = [('x', np.int64, 3), ('y', np.int32)]
arr = np.zeros(4, dtype=dtype)
arr
Out[51]:
In [52]:
arr[0]['x']
Out[52]:
In [53]:
arr['x']
Out[53]:
In [54]:
dtype = [('x', [('a', 'f8'), ('b', 'f4')]), ('y', np.int32)]
data = np.array([((1, 2), 5), ((3, 4), 6)], dtype=dtype)
data['x']
data['y']
data['x']['a']
Out[54]:
In [55]:
arr = randn(6)
arr.sort()
arr
Out[55]:
In [56]:
arr = randn(3, 5)
arr
arr[:, 0].sort() # Sort first column values in-place
arr
Out[56]:
In [57]:
arr = randn(5)
arr
np.sort(arr)
arr
Out[57]:
In [58]:
arr = randn(3, 5)
arr
arr.sort(axis=1)
arr
Out[58]:
In [59]:
arr[:, ::-1]
Out[59]:
In [60]:
values = np.array([5, 0, 1, 3, 2])
indexer = values.argsort()
indexer
values[indexer]
Out[60]:
In [61]:
arr = randn(3, 5)
arr[0] = values
arr
arr[:, arr[0].argsort()]
Out[61]:
In [62]:
first_name = np.array(['Bob', 'Jane', 'Steve', 'Bill', 'Barbara'])
last_name = np.array(['Jones', 'Arnold', 'Arnold', 'Jones', 'Walters'])
sorter = np.lexsort((first_name, last_name))
zip(last_name[sorter], first_name[sorter])
Out[62]:
In [63]:
values = np.array(['2:first', '2:second', '1:first', '1:second', '1:third'])
key = np.array([2, 2, 1, 1, 1])
indexer = key.argsort(kind='mergesort')
indexer
values.take(indexer)
Out[63]:
In [64]:
arr = np.array([0, 1, 7, 12, 15])
arr.searchsorted(9)
Out[64]:
In [65]:
arr.searchsorted([0, 8, 11, 16])
Out[65]:
In [66]:
arr = np.array([0, 0, 0, 1, 1, 1, 1])
arr.searchsorted([0, 1])
arr.searchsorted([0, 1], side='right')
Out[66]:
In [67]:
data = np.floor(np.random.uniform(0, 10000, size=50))
bins = np.array([0, 100, 1000, 5000, 10000])
data
Out[67]:
In [68]:
labels = bins.searchsorted(data)
labels
Out[68]:
In [69]:
Series(data).groupby(labels).mean()
Out[69]:
In [70]:
np.digitize(data, bins)
Out[70]:
In [71]:
X = np.array([[ 8.82768214, 3.82222409, -1.14276475, 2.04411587],
[ 3.82222409, 6.75272284, 0.83909108, 2.08293758],
[-1.14276475, 0.83909108, 5.01690521, 0.79573241],
[ 2.04411587, 2.08293758, 0.79573241, 6.24095859]])
X[:, 0] # one-dimensional
y = X[:, :1] # two-dimensional by slicing
X
y
Out[71]:
In [72]:
np.dot(y.T, np.dot(X, y))
Out[72]:
In [73]:
Xm = np.matrix(X)
ym = Xm[:, 0]
Xm
ym
ym.T * Xm * ym
Out[73]:
In [74]:
Xm.I * X
Out[74]:
In [75]:
mmap = np.memmap('mymmap', dtype='float64', mode='w+', shape=(10000, 10000))
mmap
Out[75]:
In [76]:
section = mmap[:5]
In [77]:
section[:] = np.random.randn(5, 10000)
mmap.flush()
mmap
del mmap
In [78]:
mmap = np.memmap('mymmap', dtype='float64', shape=(10000, 10000))
mmap
Out[78]:
In [79]:
%xdel mmap
!rm mymmap
In [80]:
arr_c = np.ones((1000, 1000), order='C')
arr_f = np.ones((1000, 1000), order='F')
arr_c.flags
arr_f.flags
arr_f.flags.f_contiguous
Out[80]:
In [81]:
%timeit arr_c.sum(1)
%timeit arr_f.sum(1)
In [82]:
arr_f.copy('C').flags
Out[82]:
In [83]:
arr_c[:50].flags.contiguous
arr_c[:, :50].flags
Out[83]:
In [84]:
%xdel arr_c
%xdel arr_f
%cd ..
from numpy cimport ndarray, float64_t
def sum_elements(ndarray[float64_t] arr):
cdef Py_ssize_t i, n = len(arr)
cdef float64_t result = 0
for i in range(n):
result += arr[i]
return result