In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from pandas import DataFrame, Series

In [2]:
arr = np.arange(3)

arr.repeat(3)


Out[2]:
array([0, 0, 0, 1, 1, 1, 2, 2, 2])

In [3]:
arr.repeat([2, 3, 4])


Out[3]:
array([0, 0, 1, 1, 1, 2, 2, 2, 2])

In [4]:
from numpy.random import randn

In [5]:
arr = randn(2, 2)
arr


Out[5]:
array([[ 1.63601648, -0.83344146],
       [ 0.47724003, -0.08775426]])

In [6]:
arr.repeat(2, axis=0)


Out[6]:
array([[ 1.63601648, -0.83344146],
       [ 1.63601648, -0.83344146],
       [ 0.47724003, -0.08775426],
       [ 0.47724003, -0.08775426]])

In [7]:
arr.repeat([2, 3], axis=0)


Out[7]:
array([[ 1.63601648, -0.83344146],
       [ 1.63601648, -0.83344146],
       [ 0.47724003, -0.08775426],
       [ 0.47724003, -0.08775426],
       [ 0.47724003, -0.08775426]])

In [8]:
arr.repeat([2, 3], axis=1)


Out[8]:
array([[ 1.63601648,  1.63601648, -0.83344146, -0.83344146, -0.83344146],
       [ 0.47724003,  0.47724003, -0.08775426, -0.08775426, -0.08775426]])

In [9]:
arr


Out[9]:
array([[ 1.63601648, -0.83344146],
       [ 0.47724003, -0.08775426]])

In [10]:
np.tile(arr, 2)


Out[10]:
array([[ 1.63601648, -0.83344146,  1.63601648, -0.83344146],
       [ 0.47724003, -0.08775426,  0.47724003, -0.08775426]])

In [11]:
np.tile(arr, (2, 1))


Out[11]:
array([[ 1.63601648, -0.83344146],
       [ 0.47724003, -0.08775426],
       [ 1.63601648, -0.83344146],
       [ 0.47724003, -0.08775426]])

In [12]:
np.tile(arr, (3, 2))


Out[12]:
array([[ 1.63601648, -0.83344146,  1.63601648, -0.83344146],
       [ 0.47724003, -0.08775426,  0.47724003, -0.08775426],
       [ 1.63601648, -0.83344146,  1.63601648, -0.83344146],
       [ 0.47724003, -0.08775426,  0.47724003, -0.08775426],
       [ 1.63601648, -0.83344146,  1.63601648, -0.83344146],
       [ 0.47724003, -0.08775426,  0.47724003, -0.08775426]])

In [13]:
## 花式索引的等价函数

In [14]:
arr = np.arange(10) * 100
inds = [7, 1, 2, 6]
arr[inds]


Out[14]:
array([700, 100, 200, 600])

In [15]:
arr.take(inds)


Out[15]:
array([700, 100, 200, 600])

In [17]:
arr.put(inds, 42)
arr


Out[17]:
array([  0,  42,  42, 300, 400, 500,  42,  42, 800, 900])

In [18]:
arr.put(inds, [40, 41, 42, 43])
arr


Out[18]:
array([  0,  41,  42, 300, 400, 500,  43,  40, 800, 900])

In [19]:
inds = [2, 0, 2, 1]
arr = randn(2, 4)
arr


Out[19]:
array([[ 0.93129879, -0.35411414,  0.57702374,  0.24116572],
       [-0.83636279,  0.91580841,  0.31046141,  1.75450793]])

In [20]:
arr.take(inds, axis=1)


Out[20]:
array([[ 0.57702374,  0.93129879,  0.57702374, -0.35411414],
       [ 0.31046141, -0.83636279,  0.31046141,  0.91580841]])

In [21]:
arr = randn(1000, 50)
# 500行随机样本
inds = np.random.permutation(1000)[:500]
%timeit arr[inds]


The slowest run took 4.15 times longer than the fastest. This could mean that an intermediate result is being cached.
10000 loops, best of 3: 34 µs per loop

In [22]:
%timeit arr.take(inds, axis=0)


10000 loops, best of 3: 25.9 µs per loop

In [23]:
arr = np.arange(5)
arr


Out[23]:
array([0, 1, 2, 3, 4])

In [24]:
arr * 4


Out[24]:
array([ 0,  4,  8, 12, 16])

In [30]:
arr = randn(4, 3)
arr.mean(0)


Out[30]:
array([ 0.19983727,  0.37342783, -0.05995232])

In [31]:
demeaned = arr - arr.mean(0)
demeaned


Out[31]:
array([[-0.27610684, -0.21123702, -0.16088269],
       [-0.84854155,  0.50728507,  0.07791194],
       [-0.27615565,  0.76700785, -0.69072193],
       [ 1.40080404, -1.0630559 ,  0.77369269]])

In [32]:
demeaned.mean(0)


Out[32]:
array([ -5.55111512e-17,   0.00000000e+00,   0.00000000e+00])

In [35]:
row_means = arr.mean(1)
row_means.reshape((4, 1))


Out[35]:
array([[-0.04497126],
       [ 0.08332275],
       [ 0.10448102],
       [ 0.54158454]])

In [36]:
demeaned = arr - row_means.reshape((4, 1))
demeaned.mean(1)


Out[36]:
array([ -9.25185854e-18,   3.23815049e-17,   0.00000000e+00,
        -3.70074342e-17])

In [37]:
arr - arr.mean(1)


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-37-7b87b85a20b2> in <module>()
----> 1 arr - arr.mean(1)

ValueError: operands could not be broadcast together with shapes (4,3) (4,) 

In [38]:
arr - arr.mean(1).reshape((4, 1))


Out[38]:
array([[-0.03129831,  0.20716207, -0.17586375],
       [-0.73202702,  0.79739015, -0.06536313],
       [-0.1807994 ,  1.03595467, -0.85515527],
       [ 1.05905677, -1.2312126 ,  0.17215583]])

In [40]:
arr = np.zeros((4, 4))
arr_3d = arr[:, np.newaxis, :]
arr_3d.shape


Out[40]:
(4L, 1L, 4L)

In [41]:
arr_1d = np.random.normal(size=3)
arr_1d[:, np.newaxis]


Out[41]:
array([[ 0.31555779],
       [ 0.17220104],
       [ 1.97139767]])

In [42]:
arr_1d[np.newaxis, :]


Out[42]:
array([[ 0.31555779,  0.17220104,  1.97139767]])

In [43]:
arr = randn(3, 4, 5)
depth_means = arr.mean(2)
depth_means


Out[43]:
array([[ 0.17218435, -0.34911607,  0.58013683,  0.41087967],
       [-0.61084914, -0.37566525,  0.29728931,  0.55414718],
       [ 0.32010987,  0.15772747, -0.60769567, -0.57177377]])

In [44]:
demeaned = arr - depth_means[:, :, np.newaxis]
demeaned.mean(2)


Out[44]:
array([[  1.11022302e-17,  -8.88178420e-17,   4.44089210e-17,
          0.00000000e+00],
       [ -2.22044605e-17,  -3.33066907e-17,   0.00000000e+00,
          0.00000000e+00],
       [  2.22044605e-17,  -1.11022302e-17,   6.66133815e-17,
         -4.44089210e-17]])

In [ ]:
def demean_axis(arr, axis=0):
    means = arr.mean(axis)
    
    # 下面这些一般化的东西类似于N维的[:, :, np.newaxis]
    indexer = [slice(None)] * arr.ndim
    indexer[axis] = np.newaxis
    return arr - means[indexer]

In [45]:
arr = np.zeros((4, 3))
arr[:] = 5
arr


Out[45]:
array([[ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.]])

In [46]:
col = np.array([1.28, -0.42, 0.44, 1.6])
arr[:] = col[:, np.newaxis]
arr


Out[46]:
array([[ 1.28,  1.28,  1.28],
       [-0.42, -0.42, -0.42],
       [ 0.44,  0.44,  0.44],
       [ 1.6 ,  1.6 ,  1.6 ]])

In [47]:
arr[:2] = [[-1.37], [0.509]]
arr


Out[47]:
array([[-1.37 , -1.37 , -1.37 ],
       [ 0.509,  0.509,  0.509],
       [ 0.44 ,  0.44 ,  0.44 ],
       [ 1.6  ,  1.6  ,  1.6  ]])

In [ ]: