In [1]:
#创建ndarray
data1 = [6,7.5,8,0,1]
arr1 = np.array(data1)

In [2]:
arr1


Out[2]:
array([ 6. ,  7.5,  8. ,  0. ,  1. ])

In [3]:
data2 = [[1,2,3,4], [5,6,7,8]]

In [4]:
arr2 = np.array(data2)

In [5]:
arr2


Out[5]:
array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [6]:
arr2.ndim


Out[6]:
2

In [7]:
arr2.shape


Out[7]:
(2, 4)

In [8]:
arr1.dtype


Out[8]:
dtype('float64')

In [9]:
arr2.dtype


Out[9]:
dtype('int32')

In [10]:
np.zeros(10)


Out[10]:
array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

In [11]:
np.zeros((3,6))


Out[11]:
array([[ 0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.]])

In [12]:
np.empty((2,3,2))


Out[12]:
array([[[  6.55260007e-261,  -4.22059888e-089],
        [ -1.16987348e-224,   7.57642993e-053],
        [  1.52117017e-183,  -9.80277240e-012]],

       [[ -2.71905602e-147,   1.75813789e+025],
        [  5.38839532e-111,  -2.27677482e+066],
        [ -6.31963153e-070,   4.08035250e+102]]])

In [13]:
#ndarray的数据类型
arr1 = np.array([1,2,3], dtype=np.float64)
arr2 = np.array([1,2,3], dtype=np.int32)

In [14]:
arr1.dtype


Out[14]:
dtype('float64')

In [15]:
arr2.dtype


Out[15]:
dtype('int32')

In [16]:
arr = np.array([1,2,3,4,5])

In [17]:
arr


Out[17]:
array([1, 2, 3, 4, 5])

In [18]:
arr.dtype


Out[18]:
dtype('int32')

In [19]:
float_arr = arr.astype(np.float64)

In [20]:
float_arr.dtype


Out[20]:
dtype('float64')

In [21]:
float_arr


Out[21]:
array([ 1.,  2.,  3.,  4.,  5.])

In [22]:
arr = np.array([3.7,-1.2,-2.6,0.5,12.9,10.1])

In [23]:
arr


Out[23]:
array([  3.7,  -1.2,  -2.6,   0.5,  12.9,  10.1])

In [24]:
arr.astype(np.int32)


Out[24]:
array([ 3, -1, -2,  0, 12, 10])

In [25]:
numeric_strings = np.array(['1.25','-9.6','42'], dtype=np.string_)

In [26]:
numeric_strings


Out[26]:
array(['1.25', '-9.6', '42'], 
      dtype='|S4')

In [27]:
numeric_strings.astype(float)


Out[27]:
array([  1.25,  -9.6 ,  42.  ])

In [28]:
numeric_strings.dtype


Out[28]:
dtype('|S4')

In [29]:
int_array = np.arange(10)

In [30]:
int_array


Out[30]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [31]:
calibers = np.array([.22,.270,.357,.380,.44,.50], dtype=np.float64)

In [32]:
int_array.astype(calibers.dtype)


Out[32]:
array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.])

In [33]:
empty_unit32 = np.empty(8, dtype='u4')

In [34]:
empty_unit32


Out[34]:
array([82247777, 56265304, 53656512,  5065924, 45886880, 82317536,
        4539816, 81850904], dtype=uint32)

In [35]:
#数组与标量之间的运算
arr = np.array([[1.,2.,3.], [4.,5.,6.]])

In [36]:
arr


Out[36]:
array([[ 1.,  2.,  3.],
       [ 4.,  5.,  6.]])

In [37]:
arr * arr


Out[37]:
array([[  1.,   4.,   9.],
       [ 16.,  25.,  36.]])

In [38]:
arr - arr


Out[38]:
array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.]])

In [39]:
1/arr


Out[39]:
array([[ 1.        ,  0.5       ,  0.33333333],
       [ 0.25      ,  0.2       ,  0.16666667]])

In [40]:
arr ** 0.5


Out[40]:
array([[ 1.        ,  1.41421356,  1.73205081],
       [ 2.        ,  2.23606798,  2.44948974]])

In [41]:
arr = np.arange(10)

In [42]:
arr


Out[42]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [43]:
arr[5]


Out[43]:
5

In [44]:
arr[5:8]


Out[44]:
array([5, 6, 7])

In [45]:
arr[5:8] = 12

In [46]:
arr


Out[46]:
array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9])

In [47]:
arr_slice = arr[5:8]

In [48]:
arr_slice


Out[48]:
array([12, 12, 12])

In [49]:
arr_slice[1] = 12345

In [50]:
arr_slice


Out[50]:
array([   12, 12345,    12])

In [51]:
arr


Out[51]:
array([    0,     1,     2,     3,     4,    12, 12345,    12,     8,     9])

In [52]:
arr2d = np.array([[1,2,3],[4,5,6],[7,8,9]])

In [53]:
arr2d


Out[53]:
array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [54]:
arr2d[2]


Out[54]:
array([7, 8, 9])

In [55]:
arr2d[0][2]


Out[55]:
3

In [56]:
arr2d[0,2]


Out[56]:
3

In [57]:
arr3d = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])

In [58]:
arr3d


Out[58]:
array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [59]:
arr3d[0]


Out[59]:
array([[1, 2, 3],
       [4, 5, 6]])

In [60]:
old_values = arr3d[0].copy()

In [61]:
arr3d[0] = 42

In [62]:
arr3d


Out[62]:
array([[[42, 42, 42],
        [42, 42, 42]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [63]:
arr3d[0] = old_values

In [64]:
arr3d


Out[64]:
array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [65]:
arr3d[1,0]


Out[65]:
array([7, 8, 9])

In [66]:
arr[1:6]


Out[66]:
array([ 1,  2,  3,  4, 12])

In [67]:
arr2d


Out[67]:
array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [68]:
arr2d


Out[68]:
array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [69]:
arr2d[:2]


Out[69]:
array([[1, 2, 3],
       [4, 5, 6]])

In [70]:
arr2d[:2,:1]


Out[70]:
array([[1],
       [4]])

In [71]:
arr2d[1,:2]


Out[71]:
array([4, 5])

In [72]:
arr2d[2,:1]


Out[72]:
array([7])

In [73]:
arr2d[:, :1]


Out[73]:
array([[1],
       [4],
       [7]])

In [74]:
arr2d[:2,1:]


Out[74]:
array([[2, 3],
       [5, 6]])

In [75]:
arr2d[:2,1:] = 0

In [76]:
arr2d


Out[76]:
array([[1, 0, 0],
       [4, 0, 0],
       [7, 8, 9]])

In [77]:
#布尔型索引
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
names


Out[77]:
array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], 
      dtype='|S4')

In [78]:
data = randn(7,4)

In [79]:
data


Out[79]:
array([[ 0.81110363,  1.45816609,  0.26587949, -1.94735035],
       [-1.26613907, -0.11271029, -0.29527518,  1.40822264],
       [ 0.74703127,  1.14987524, -0.97529476,  0.45439644],
       [ 0.08794572,  0.16219383, -0.53724172,  0.8295287 ],
       [ 0.63542807,  0.42884396,  0.82052316,  1.05402477],
       [ 1.70713119,  2.45764633,  1.18974952,  0.04223886],
       [ 0.7608705 , -0.27126957,  1.04069859,  1.86033599]])

In [80]:
names == 'Bob'


Out[80]:
array([ True, False, False,  True, False, False, False], dtype=bool)

In [81]:
data[names == 'Bob']


Out[81]:
array([[ 0.81110363,  1.45816609,  0.26587949, -1.94735035],
       [ 0.08794572,  0.16219383, -0.53724172,  0.8295287 ]])

In [82]:
data[names == 'Bob', 2:]


Out[82]:
array([[ 0.26587949, -1.94735035],
       [-0.53724172,  0.8295287 ]])

In [83]:
data[names == 'Bob', 3]


Out[83]:
array([-1.94735035,  0.8295287 ])

In [84]:
names != 'Bob'


Out[84]:
array([False,  True,  True, False,  True,  True,  True], dtype=bool)

In [85]:
data[-(names == 'Bob')]


Out[85]:
array([[-1.26613907, -0.11271029, -0.29527518,  1.40822264],
       [ 0.74703127,  1.14987524, -0.97529476,  0.45439644],
       [ 0.63542807,  0.42884396,  0.82052316,  1.05402477],
       [ 1.70713119,  2.45764633,  1.18974952,  0.04223886],
       [ 0.7608705 , -0.27126957,  1.04069859,  1.86033599]])

In [86]:
mask = (names == 'Bob') | (names == 'Will')

In [87]:
mask


Out[87]:
array([ True, False,  True,  True,  True, False, False], dtype=bool)

In [88]:
data[mask]


Out[88]:
array([[ 0.81110363,  1.45816609,  0.26587949, -1.94735035],
       [ 0.74703127,  1.14987524, -0.97529476,  0.45439644],
       [ 0.08794572,  0.16219383, -0.53724172,  0.8295287 ],
       [ 0.63542807,  0.42884396,  0.82052316,  1.05402477]])

In [89]:
data


Out[89]:
array([[ 0.81110363,  1.45816609,  0.26587949, -1.94735035],
       [-1.26613907, -0.11271029, -0.29527518,  1.40822264],
       [ 0.74703127,  1.14987524, -0.97529476,  0.45439644],
       [ 0.08794572,  0.16219383, -0.53724172,  0.8295287 ],
       [ 0.63542807,  0.42884396,  0.82052316,  1.05402477],
       [ 1.70713119,  2.45764633,  1.18974952,  0.04223886],
       [ 0.7608705 , -0.27126957,  1.04069859,  1.86033599]])

In [90]:
data[data < 0] = 0

In [91]:
data


Out[91]:
array([[ 0.81110363,  1.45816609,  0.26587949,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  1.40822264],
       [ 0.74703127,  1.14987524,  0.        ,  0.45439644],
       [ 0.08794572,  0.16219383,  0.        ,  0.8295287 ],
       [ 0.63542807,  0.42884396,  0.82052316,  1.05402477],
       [ 1.70713119,  2.45764633,  1.18974952,  0.04223886],
       [ 0.7608705 ,  0.        ,  1.04069859,  1.86033599]])

In [92]:
data[names != 'Joe'] = 7

In [93]:
data


Out[93]:
array([[ 7.        ,  7.        ,  7.        ,  7.        ],
       [ 0.        ,  0.        ,  0.        ,  1.40822264],
       [ 7.        ,  7.        ,  7.        ,  7.        ],
       [ 7.        ,  7.        ,  7.        ,  7.        ],
       [ 7.        ,  7.        ,  7.        ,  7.        ],
       [ 1.70713119,  2.45764633,  1.18974952,  0.04223886],
       [ 0.7608705 ,  0.        ,  1.04069859,  1.86033599]])

In [94]:
arr = np.empty((8, 4))

In [95]:
for i in range(8):
    arr[i] = i

In [96]:
arr


Out[96]:
array([[ 0.,  0.,  0.,  0.],
       [ 1.,  1.,  1.,  1.],
       [ 2.,  2.,  2.,  2.],
       [ 3.,  3.,  3.,  3.],
       [ 4.,  4.,  4.,  4.],
       [ 5.,  5.,  5.,  5.],
       [ 6.,  6.,  6.,  6.],
       [ 7.,  7.,  7.,  7.]])

In [97]:
arr[[4,3,0,6]]


Out[97]:
array([[ 4.,  4.,  4.,  4.],
       [ 3.,  3.,  3.,  3.],
       [ 0.,  0.,  0.,  0.],
       [ 6.,  6.,  6.,  6.]])

In [98]:
arr[[-3,-5,-7]]


Out[98]:
array([[ 5.,  5.,  5.,  5.],
       [ 3.,  3.,  3.,  3.],
       [ 1.,  1.,  1.,  1.]])

In [99]:
#关于reshape的知识将在第12章讲解
arr = np.arange(32).reshape((8, 4))

In [100]:
arr


Out[100]:
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [101]:
#选出的元素(1,0)、(5,3)、(7,1)和(2,2)
arr[[1,5,7,2], [0,3,1,2]]


Out[101]:
array([ 4, 23, 29, 10])

In [102]:
#?不明白
arr[[1,5,7,2]][:, [0,3,1,2]]


Out[102]:
array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])

In [103]:
#?不明白
arr[np.ix_([1,5,7,2], [0,3,1,2])]


Out[103]:
array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])

In [104]:
#数组转置和轴对换
arr = np.arange(15).reshape((3, 5))

In [105]:
arr


Out[105]:
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [106]:
arr.T


Out[106]:
array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [107]:
arr = np.random.randn(6, 3)

In [108]:
arr


Out[108]:
array([[-1.02522473,  1.236374  , -0.07913632],
       [ 0.73460909, -0.22349927,  1.41921032],
       [ 1.21581091, -0.66043541,  0.00610092],
       [-0.98124997,  1.19306594, -1.07562935],
       [ 0.29234749, -1.01693436, -0.77043462],
       [ 0.7459582 ,  1.08795916,  1.23048189]])

In [109]:
arr.T


Out[109]:
array([[-1.02522473,  0.73460909,  1.21581091, -0.98124997,  0.29234749,
         0.7459582 ],
       [ 1.236374  , -0.22349927, -0.66043541,  1.19306594, -1.01693436,
         1.08795916],
       [-0.07913632,  1.41921032,  0.00610092, -1.07562935, -0.77043462,
         1.23048189]])

In [110]:
np.dot(arr.T, arr)


Out[110]:
array([[ 4.67370463, -2.89113243,  2.87922959],
       [-2.89113243,  5.65596449,  0.41983492],
       [ 2.87922959,  0.41983492,  5.2850914 ]])

In [111]:
arr = np.arange(16).reshape((2, 2, 4))

In [112]:
arr


Out[112]:
array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]])

In [113]:
arr.transpose((1, 0, 2))


Out[113]:
array([[[ 0,  1,  2,  3],
        [ 8,  9, 10, 11]],

       [[ 4,  5,  6,  7],
        [12, 13, 14, 15]]])

In [114]:
arr


Out[114]:
array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]])

In [115]:
arr.swapaxes(1,2 )


Out[115]:
array([[[ 0,  4],
        [ 1,  5],
        [ 2,  6],
        [ 3,  7]],

       [[ 8, 12],
        [ 9, 13],
        [10, 14],
        [11, 15]]])

In [116]:
#通用函数:快速的元素级数组函数
arr = np.arange(10)
arr


Out[116]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [117]:
#平方根函数
np.sqrt(arr)


Out[117]:
array([ 0.        ,  1.        ,  1.41421356,  1.73205081,  2.        ,
        2.23606798,  2.44948974,  2.64575131,  2.82842712,  3.        ])

In [118]:
np.exp(arr)


Out[118]:
array([  1.00000000e+00,   2.71828183e+00,   7.38905610e+00,
         2.00855369e+01,   5.45981500e+01,   1.48413159e+02,
         4.03428793e+02,   1.09663316e+03,   2.98095799e+03,
         8.10308393e+03])

In [119]:
x = randn(8)

In [120]:
y = randn(8)

In [121]:
x


Out[121]:
array([ 0.43481702,  0.57140437,  0.9165387 ,  0.94950625, -0.06333394,
        1.79515454, -0.81781857,  0.97588037])

In [122]:
y


Out[122]:
array([ 1.29777267,  1.87283897,  1.30990092, -0.41159583,  0.55428726,
       -1.23498834, -1.78549484,  0.4566318 ])

In [123]:
#元素级最大值
np.maximum(x, y)


Out[123]:
array([ 1.29777267,  1.87283897,  1.30990092,  0.94950625,  0.55428726,
        1.79515454, -0.81781857,  0.97588037])

In [124]:
arr = randn(7) * 5

In [125]:
arr


Out[125]:
array([ 1.18707352,  4.43644337, -0.06806101,  2.95240127, -2.99881655,
        0.67637143,  6.34281466])

In [126]:
np.modf(arr)


Out[126]:
(array([ 0.18707352,  0.43644337, -0.06806101,  0.95240127, -0.99881655,
        0.67637143,  0.34281466]),
 array([ 1.,  4., -0.,  2., -2.,  0.,  6.]))

In [127]:
#利用数组进行数据处理
points = np.arange(-5,5,0.01) #1000个间隔相等的点

In [128]:
xs, ys = np.meshgrid(points, points)

In [129]:
ys


Out[129]:
array([[-5.  , -5.  , -5.  , ..., -5.  , -5.  , -5.  ],
       [-4.99, -4.99, -4.99, ..., -4.99, -4.99, -4.99],
       [-4.98, -4.98, -4.98, ..., -4.98, -4.98, -4.98],
       ..., 
       [ 4.97,  4.97,  4.97, ...,  4.97,  4.97,  4.97],
       [ 4.98,  4.98,  4.98, ...,  4.98,  4.98,  4.98],
       [ 4.99,  4.99,  4.99, ...,  4.99,  4.99,  4.99]])

In [130]:
xs


Out[130]:
array([[-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
       [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
       [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
       ..., 
       [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
       [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
       [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99]])

In [131]:
import matplotlib.pyplot as plt

In [132]:
z = np.sqrt(xs ** 2 + ys ** 2)

In [133]:
z


Out[133]:
array([[ 7.07106781,  7.06400028,  7.05693985, ...,  7.04988652,
         7.05693985,  7.06400028],
       [ 7.06400028,  7.05692568,  7.04985815, ...,  7.04279774,
         7.04985815,  7.05692568],
       [ 7.05693985,  7.04985815,  7.04278354, ...,  7.03571603,
         7.04278354,  7.04985815],
       ..., 
       [ 7.04988652,  7.04279774,  7.03571603, ...,  7.0286414 ,
         7.03571603,  7.04279774],
       [ 7.05693985,  7.04985815,  7.04278354, ...,  7.03571603,
         7.04278354,  7.04985815],
       [ 7.06400028,  7.05692568,  7.04985815, ...,  7.04279774,
         7.04985815,  7.05692568]])

In [134]:
#根据网格对函数求值的结果
plt.imshow(z, cmap=plt.cm.gray)
plt.colorbar()
plt.title("Image plot of $\sqrt{x^2 + y^2}$ for a grid of values")


Out[134]:
<matplotlib.text.Text at 0x4f2d1d0>

In [135]:
#将条件逻辑表述为数组运算
xarr = np.array([1.1,1.2,1.3,1.4,1.5])
yarr = np.array([2.1,2.2,2.3,2.4,2.5])
cond = np.array([True,False,True,True,False])

In [136]:
#列表推导式写法
result = [(x if c else y)
          for x,y,c in zip(xarr,yarr,cond)]

In [137]:
result


Out[137]:
[1.1000000000000001, 2.2000000000000002, 1.3, 1.3999999999999999, 2.5]

In [138]:
#使用np的where函数,非常简洁
result = np.where(cond, xarr, yarr)

In [139]:
result


Out[139]:
array([ 1.1,  2.2,  1.3,  1.4,  2.5])

In [140]:
arr = randn(4,4)

In [141]:
arr


Out[141]:
array([[-0.75274026, -1.15958638,  0.21931505, -1.35652414],
       [-1.27364132, -0.33634098, -0.01696858, -1.66114499],
       [-0.19572556, -0.76036435, -2.15367228,  1.3391826 ],
       [-0.34657113,  1.93961494,  0.25510572,  0.86166626]])

In [142]:
np.where(arr > 0, 2, -2)


Out[142]:
array([[-2, -2,  2, -2],
       [-2, -2, -2, -2],
       [-2, -2, -2,  2],
       [-2,  2,  2,  2]])

In [143]:
np.where(arr > 0, 2, arr) #只将正值设置为2


Out[143]:
array([[-0.75274026, -1.15958638,  2.        , -1.35652414],
       [-1.27364132, -0.33634098, -0.01696858, -1.66114499],
       [-0.19572556, -0.76036435, -2.15367228,  2.        ],
       [-0.34657113,  2.        ,  2.        ,  2.        ]])

In [144]:
#数学和统计方法

In [145]:
arr = np.random.randn(5,4) #正态分布的数据
arr


Out[145]:
array([[ 0.04285724, -0.46979914, -1.45368614, -0.16900705],
       [ 0.05571362, -0.27033122,  0.9435601 , -0.83312733],
       [ 0.02831579, -0.07000229,  1.35356731, -0.92777504],
       [-0.38719936,  0.07446213, -0.70089881, -1.60831783],
       [ 0.16189536, -1.48119468,  1.63450262, -0.70733943]])

In [146]:
arr.mean()


Out[146]:
-0.23919020735342436

In [147]:
np.mean(arr)


Out[147]:
-0.23919020735342436

In [148]:
arr.sum()


Out[148]:
-4.7838041470684871

In [149]:
arr.mean(axis=1)


Out[149]:
array([-0.51240877, -0.02604621,  0.09602644, -0.65548847, -0.09803403])

In [150]:
arr.sum(0)


Out[150]:
array([-0.09841734, -2.21686519,  1.77704508, -4.24556668])

In [151]:
arr = np.array([[0,1,2], [3,4,5], [6,7,8]])

In [152]:
arr


Out[152]:
array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [153]:
#所有元素的累计和
arr.cumsum(0)


Out[153]:
array([[ 0,  1,  2],
       [ 3,  5,  7],
       [ 9, 12, 15]])

In [154]:
#所有元素的累计积
arr.cumprod(1)


Out[154]:
array([[  0,   0,   0],
       [  3,  12,  60],
       [  6,  42, 336]])

In [155]:
#用于布尔型数组的方法

In [156]:
arr = randn(100)

In [157]:
(arr > 0).sum() # 正值的数据


Out[157]:
51

In [158]:
bools = np.array([False, False, True, False])

In [159]:
bools.any()


Out[159]:
True

In [160]:
bools.all()


Out[160]:
False

In [161]:
arr = randn(8)

In [162]:
arr


Out[162]:
array([ 0.76778018,  1.14494394, -1.57795749,  0.62891676, -0.76375674,
       -0.25677015, -0.10815103, -0.89523133])

In [163]:
arr.sort()

In [164]:
arr


Out[164]:
array([-1.57795749, -0.89523133, -0.76375674, -0.25677015, -0.10815103,
        0.62891676,  0.76778018,  1.14494394])

In [165]:
arr = randn(5, 3)

In [166]:
arr


Out[166]:
array([[-0.5501172 , -0.37133664, -0.43584403],
       [-0.29823443, -0.84254986, -0.20266977],
       [-0.85781095,  0.69607031, -1.03768108],
       [ 1.32678068,  0.60986752, -0.34480666],
       [ 0.38689584, -0.5707471 ,  1.5483942 ]])

In [167]:
arr.sort(1)

In [168]:
arr


Out[168]:
array([[-0.5501172 , -0.43584403, -0.37133664],
       [-0.84254986, -0.29823443, -0.20266977],
       [-1.03768108, -0.85781095,  0.69607031],
       [-0.34480666,  0.60986752,  1.32678068],
       [-0.5707471 ,  0.38689584,  1.5483942 ]])

In [169]:
large_arr = randn(1000)

In [170]:
large_arr.sort()

In [171]:
large_arr[int(0.05 * len(large_arr))] #%5分位数


Out[171]:
-1.6295263687286095

In [172]:
#唯一化以及其他的集合逻辑

In [173]:
names= np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])

In [174]:
#找出数组中的唯一值并返回已排序的结果
np.unique(names)


Out[174]:
array(['Bob', 'Joe', 'Will'], 
      dtype='|S4')

In [175]:
ints = np.array([3,3,3,2,2,1,1,4,4])

In [176]:
np.unique(ints)


Out[176]:
array([1, 2, 3, 4])

In [177]:
sorted(set(names))


Out[177]:
['Bob', 'Joe', 'Will']

In [178]:
values = np.array([6,0,0,3,2,5,6])

In [179]:
#用于测试一个数组中的值在另一个数组中的成员资格,返回一个布尔型数据
np.in1d(values, [2,3,6])


Out[179]:
array([ True, False, False,  True,  True, False,  True], dtype=bool)

In [180]:
#np.setxor1d(x,y) 集合的对称差,即存在于一个数组中但不同存在于两个数组中的元素
arr_x = np.array([1,4,5,7,9])
arr_y = np.array([1,3,6,7,9])

In [181]:
np.setxor1d(arr_x, arr_y)


Out[181]:
array([3, 4, 5, 6])

In [182]:
arr_x


Out[182]:
array([1, 4, 5, 7, 9])

In [183]:
arr_y


Out[183]:
array([1, 3, 6, 7, 9])

In [184]:
#np.intersect1d(x, y)计算x和y中的公共元素,并返回有序结果
arr_x_y_common = np.intersect1d(arr_x, arr_y)

In [185]:
arr_x_y_common


Out[185]:
array([1, 7, 9])

In [186]:
#去掉arr_x中包含的arr_y中的元素
arr_final = np.setxor1d(arr_x, arr_x_y_common)

In [187]:
arr_final


Out[187]:
array([4, 5])

In [188]:
#用于数组的文件输入输出

In [189]:
#将数据已二进制格式保存到磁盘

In [190]:
arr =  np.arange(10)

In [191]:
np.save('book_scripts/ch04/some_array', arr)

In [192]:
np.load('book_scripts/ch04/some_array.npy')


Out[192]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [193]:
np.savez('book_scripts/ch04/array_archive.npz', x=arr_x, y=arr_y)

In [194]:
arch = np.load('book_scripts/ch04/array_archive.npz')

In [195]:
arch['x']


Out[195]:
array([1, 4, 5, 7, 9])

In [196]:
arch['y']


Out[196]:
array([1, 3, 6, 7, 9])

In [197]:
#存取文本文件

In [198]:
#############################################
#  去掉new_arr_x中包含的new_arr_y中的元素,
#  如果new_arr_y中的元素不在new_arr_x中,则忽略
#np.savetxt('book_scripts/ch04/arr_x.txt', arr_x, fmt='%d')
#np.savetxt('book_scripts/ch04/arr_y.txt', arr_y, fmt='%d')

In [199]:
new_arr_x = np.loadtxt('book_scripts/ch04/arr_x.txt', dtype='int')
new_arr_x


Out[199]:
array([1653, 1119, 1326, 1324, 1308, 1242, 1226, 1148, 1096, 1089, 1046,
       1044])

In [200]:
new_arr_y = np.loadtxt('book_scripts/ch04/arr_y.txt', dtype='int')
new_arr_y


Out[200]:
array([1111, 1324, 1212, 1148, 1313, 1044])

In [201]:
#np.intersect1d(x, y)计算x和y中的公共元素,并返回有序结果
new_arr_x_y_common = np.intersect1d(new_arr_x, new_arr_y)
new_arr_x_y_common


Out[201]:
array([1044, 1148, 1324])

In [202]:
#去掉new_arr_x中包含的new_arr_y中的元素
new_arr_final = np.setxor1d(new_arr_x, new_arr_x_y_common)
new_arr_final


Out[202]:
array([1046, 1089, 1096, 1119, 1226, 1242, 1308, 1326, 1653])

In [203]:
np.savetxt('book_scripts/ch04/arr_final.txt', new_arr_final, fmt='%d')

In [204]:
#线性代数

In [205]:
x = np.array([[1.,2.,3.], [4.,5.,6.]])

In [206]:
y = np.array([[6.,23.], [-1, 7], [8,9]])

In [207]:
x


Out[207]:
array([[ 1.,  2.,  3.],
       [ 4.,  5.,  6.]])

In [208]:
y


Out[208]:
array([[  6.,  23.],
       [ -1.,   7.],
       [  8.,   9.]])

In [209]:
x.dot(y)


Out[209]:
array([[  28.,   64.],
       [  67.,  181.]])

In [210]:
np.ones(3)


Out[210]:
array([ 1.,  1.,  1.])

In [211]:
np.dot(x, np.ones(3))


Out[211]:
array([  6.,  15.])

In [212]:
from numpy.linalg import inv, qr

In [213]:
X = randn(5, 5)

In [214]:
X


Out[214]:
array([[ 1.00141091,  0.56001421, -0.94141183, -1.21675986,  1.18494845],
       [-1.60438795,  1.02195801, -0.31412022, -0.4555567 , -0.79154916],
       [-0.01429437, -1.23977569, -1.21346572,  1.0819347 , -0.21664157],
       [-1.07121549,  1.8521514 ,  0.52019473, -0.36186728,  0.54315857],
       [ 1.23612482, -0.34789404,  0.50262863,  0.71903009,  0.54005797]])

In [215]:
mat = X.T.dot(X)

In [216]:
mat


Out[216]:
array([[ 6.25259604, -3.4751847 , -0.3573526 ,  0.77339629,  2.54540819],
       [-3.4751847 ,  6.44655293,  1.44482151, -3.40869825,  0.94137387],
       [-0.3573526 ,  1.44482151,  2.98066489,  0.14884469, -0.04999896],
       [ 0.77339629, -3.40869825,  0.14884469,  3.50657136, -1.12382759],
       [ 2.54540819,  0.94137387, -0.04999896, -1.12382759,  2.66427032]])

In [217]:
inv(mat)


Out[217]:
array([[ 3.98262637,  4.08666658, -1.66501429,  1.70284449, -4.56186303],
       [ 4.08666658,  4.63321654, -1.94351609,  2.19398829, -4.6524306 ],
       [-1.66501429, -1.94351609,  1.15787702, -0.96474657,  1.89222546],
       [ 1.70284449,  2.19398829, -0.96474657,  1.51207153, -1.78237401],
       [-4.56186303, -4.6524306 ,  1.89222546, -1.78237401,  5.66121547]])

In [218]:
mat.dot(inv(mat))


Out[218]:
array([[  1.00000000e+00,  -3.55271368e-15,   0.00000000e+00,
          0.00000000e+00,   1.77635684e-15],
       [  8.88178420e-16,   1.00000000e+00,   0.00000000e+00,
         -8.88178420e-16,   8.88178420e-16],
       [  3.05311332e-16,   2.49800181e-16,   1.00000000e+00,
          1.52655666e-16,   1.38777878e-15],
       [ -1.77635684e-15,   0.00000000e+00,   8.88178420e-16,
          1.00000000e+00,   1.77635684e-15],
       [  0.00000000e+00,   0.00000000e+00,   8.88178420e-16,
          8.88178420e-16,   1.00000000e+00]])

In [219]:
q,r=qr(mat)

In [220]:
r


Out[220]:
array([[-7.64047195,  5.87506808,  1.09060059, -2.15690471, -2.43104351],
       [ 0.        , -5.8061559 , -1.36074994,  4.26888612, -3.06089319],
       [ 0.        ,  0.        , -2.84309301, -1.40046534,  0.53210374],
       [ 0.        ,  0.        ,  0.        , -0.98062315, -0.39323328],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  0.11093155]])

In [221]:
#随机数生成 没看明白??

In [222]:
samples = np.random.normal(size=(4, 4))

In [223]:
samples


Out[223]:
array([[-1.16324779,  0.64329938, -2.72770899,  0.35688054],
       [ 0.45800554, -0.73873085,  1.27307117, -0.32289307],
       [-0.04590176, -0.95087958, -1.03833333, -0.93123995],
       [-1.07581859,  1.00599811,  0.9297899 ,  0.29705384]])

In [224]:
from random import normalvariate

In [225]:
N = 1000000

In [226]:
%timeit samples = [normalvariate(0, 1) for _ in xrange(N)]


1 loops, best of 3: 759 ms per loop

In [227]:
%timeit np.random.normal(size=N)


10 loops, best of 3: 42.8 ms per loop

In [228]:
#范例:随机漫步

In [231]:
import random
position = 0
walk = [position]
steps = 1000
for i in xrange(steps):
    step = 1 if random.randint(0, 1) else -1
    position += step
    walk.append(position)

In [232]:
walk[:10]


Out[232]:
[0, 1, 0, -1, 0, 1, 0, -1, -2, -1]

In [233]:
nsteps = 1000

In [234]:
draws = np.random.randint(0, 2, size=nsteps)

In [235]:
steps = np.where(draws > 0, 1, -1)

In [236]:
walk = steps.cumsum()

In [237]:
walk.min()


Out[237]:
-17

In [238]:
walk.max()


Out[238]:
18

In [239]:
(np.abs(walk) >= 10).argmax()


Out[239]:
109

In [240]:
nwalks = 5000

In [241]:
nsteps = 1000

In [242]:
draws = np.random.randint(0, 2, size=(nwalks, nsteps)) # 0或1

In [243]:
steps = np.where(draws > 0, 1, -1)

In [244]:
walks = steps.cumsum(1)

In [245]:
walks


Out[245]:
array([[  1,   0,   1, ...,  16,  15,  14],
       [ -1,  -2,  -3, ...,  74,  75,  76],
       [  1,   0,   1, ..., -38, -39, -40],
       ..., 
       [ -1,   0,  -1, ..., -16, -17, -18],
       [  1,   2,   1, ..., -34, -33, -34],
       [  1,   0,   1, ...,  38,  39,  38]])

In [246]:
walks.max()


Out[246]:
125

In [248]:
walks.min()


Out[248]:
-117

In [250]:
hits30 = (np.abs(walks) >= 30).any(1)

In [251]:
hits30


Out[251]:
array([False,  True,  True, ...,  True,  True,  True], dtype=bool)

In [252]:
hits30.sum() # 到达30或-30的数量


Out[252]:
3367

In [253]:
crossing_times = (np.abs(walks[hits30]) >= 30).argmax(1)

In [254]:
crossing_times.mean()


Out[254]:
507.12058212058213

In [255]:
steps = np.random.normal(loc=0, scale=0.25, size=(nwalks, nsteps))

In [256]:
steps


Out[256]:
array([[ 0.26326377, -0.26071554, -0.35678889, ..., -0.14108572,
        -0.05920089, -0.1865041 ],
       [-0.01613575,  0.02241958,  0.3545697 , ..., -0.19637899,
         0.2418674 ,  0.01486891],
       [-0.16546889, -0.46850467, -0.0294241 , ...,  0.03970497,
        -0.22021778,  0.04250674],
       ..., 
       [-0.56555492,  0.08620911, -0.30094671, ...,  0.04304703,
        -0.17696317,  0.07054844],
       [-0.19303453, -0.02150201,  0.46550494, ...,  0.29255065,
         0.28999259, -0.32506304],
       [-0.18365136, -0.13764277, -0.07703957, ...,  0.13765976,
        -0.67951398,  0.28733688]])

In [ ]: