In [1]:
#创建ndarray
data1 = [6,7.5,8,0,1]
arr1 = np.array(data1)
In [2]:
arr1
Out[2]:
In [3]:
data2 = [[1,2,3,4], [5,6,7,8]]
In [4]:
arr2 = np.array(data2)
In [5]:
arr2
Out[5]:
In [6]:
arr2.ndim
Out[6]:
In [7]:
arr2.shape
Out[7]:
In [8]:
arr1.dtype
Out[8]:
In [9]:
arr2.dtype
Out[9]:
In [10]:
np.zeros(10)
Out[10]:
In [11]:
np.zeros((3,6))
Out[11]:
In [12]:
np.empty((2,3,2))
Out[12]:
In [13]:
#ndarray的数据类型
arr1 = np.array([1,2,3], dtype=np.float64)
arr2 = np.array([1,2,3], dtype=np.int32)
In [14]:
arr1.dtype
Out[14]:
In [15]:
arr2.dtype
Out[15]:
In [16]:
arr = np.array([1,2,3,4,5])
In [17]:
arr
Out[17]:
In [18]:
arr.dtype
Out[18]:
In [19]:
float_arr = arr.astype(np.float64)
In [20]:
float_arr.dtype
Out[20]:
In [21]:
float_arr
Out[21]:
In [22]:
arr = np.array([3.7,-1.2,-2.6,0.5,12.9,10.1])
In [23]:
arr
Out[23]:
In [24]:
arr.astype(np.int32)
Out[24]:
In [25]:
numeric_strings = np.array(['1.25','-9.6','42'], dtype=np.string_)
In [26]:
numeric_strings
Out[26]:
In [27]:
numeric_strings.astype(float)
Out[27]:
In [28]:
numeric_strings.dtype
Out[28]:
In [29]:
int_array = np.arange(10)
In [30]:
int_array
Out[30]:
In [31]:
calibers = np.array([.22,.270,.357,.380,.44,.50], dtype=np.float64)
In [32]:
int_array.astype(calibers.dtype)
Out[32]:
In [33]:
empty_unit32 = np.empty(8, dtype='u4')
In [34]:
empty_unit32
Out[34]:
In [35]:
#数组与标量之间的运算
arr = np.array([[1.,2.,3.], [4.,5.,6.]])
In [36]:
arr
Out[36]:
In [37]:
arr * arr
Out[37]:
In [38]:
arr - arr
Out[38]:
In [39]:
1/arr
Out[39]:
In [40]:
arr ** 0.5
Out[40]:
In [41]:
arr = np.arange(10)
In [42]:
arr
Out[42]:
In [43]:
arr[5]
Out[43]:
In [44]:
arr[5:8]
Out[44]:
In [45]:
arr[5:8] = 12
In [46]:
arr
Out[46]:
In [47]:
arr_slice = arr[5:8]
In [48]:
arr_slice
Out[48]:
In [49]:
arr_slice[1] = 12345
In [50]:
arr_slice
Out[50]:
In [51]:
arr
Out[51]:
In [52]:
arr2d = np.array([[1,2,3],[4,5,6],[7,8,9]])
In [53]:
arr2d
Out[53]:
In [54]:
arr2d[2]
Out[54]:
In [55]:
arr2d[0][2]
Out[55]:
In [56]:
arr2d[0,2]
Out[56]:
In [57]:
arr3d = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])
In [58]:
arr3d
Out[58]:
In [59]:
arr3d[0]
Out[59]:
In [60]:
old_values = arr3d[0].copy()
In [61]:
arr3d[0] = 42
In [62]:
arr3d
Out[62]:
In [63]:
arr3d[0] = old_values
In [64]:
arr3d
Out[64]:
In [65]:
arr3d[1,0]
Out[65]:
In [66]:
arr[1:6]
Out[66]:
In [67]:
arr2d
Out[67]:
In [68]:
arr2d
Out[68]:
In [69]:
arr2d[:2]
Out[69]:
In [70]:
arr2d[:2,:1]
Out[70]:
In [71]:
arr2d[1,:2]
Out[71]:
In [72]:
arr2d[2,:1]
Out[72]:
In [73]:
arr2d[:, :1]
Out[73]:
In [74]:
arr2d[:2,1:]
Out[74]:
In [75]:
arr2d[:2,1:] = 0
In [76]:
arr2d
Out[76]:
In [77]:
#布尔型索引
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
names
Out[77]:
In [78]:
data = randn(7,4)
In [79]:
data
Out[79]:
In [80]:
names == 'Bob'
Out[80]:
In [81]:
data[names == 'Bob']
Out[81]:
In [82]:
data[names == 'Bob', 2:]
Out[82]:
In [83]:
data[names == 'Bob', 3]
Out[83]:
In [84]:
names != 'Bob'
Out[84]:
In [85]:
data[-(names == 'Bob')]
Out[85]:
In [86]:
mask = (names == 'Bob') | (names == 'Will')
In [87]:
mask
Out[87]:
In [88]:
data[mask]
Out[88]:
In [89]:
data
Out[89]:
In [90]:
data[data < 0] = 0
In [91]:
data
Out[91]:
In [92]:
data[names != 'Joe'] = 7
In [93]:
data
Out[93]:
In [94]:
arr = np.empty((8, 4))
In [95]:
for i in range(8):
arr[i] = i
In [96]:
arr
Out[96]:
In [97]:
arr[[4,3,0,6]]
Out[97]:
In [98]:
arr[[-3,-5,-7]]
Out[98]:
In [99]:
#关于reshape的知识将在第12章讲解
arr = np.arange(32).reshape((8, 4))
In [100]:
arr
Out[100]:
In [101]:
#选出的元素(1,0)、(5,3)、(7,1)和(2,2)
arr[[1,5,7,2], [0,3,1,2]]
Out[101]:
In [102]:
#?不明白
arr[[1,5,7,2]][:, [0,3,1,2]]
Out[102]:
In [103]:
#?不明白
arr[np.ix_([1,5,7,2], [0,3,1,2])]
Out[103]:
In [104]:
#数组转置和轴对换
arr = np.arange(15).reshape((3, 5))
In [105]:
arr
Out[105]:
In [106]:
arr.T
Out[106]:
In [107]:
arr = np.random.randn(6, 3)
In [108]:
arr
Out[108]:
In [109]:
arr.T
Out[109]:
In [110]:
np.dot(arr.T, arr)
Out[110]:
In [111]:
arr = np.arange(16).reshape((2, 2, 4))
In [112]:
arr
Out[112]:
In [113]:
arr.transpose((1, 0, 2))
Out[113]:
In [114]:
arr
Out[114]:
In [115]:
arr.swapaxes(1,2 )
Out[115]:
In [116]:
#通用函数:快速的元素级数组函数
arr = np.arange(10)
arr
Out[116]:
In [117]:
#平方根函数
np.sqrt(arr)
Out[117]:
In [118]:
np.exp(arr)
Out[118]:
In [119]:
x = randn(8)
In [120]:
y = randn(8)
In [121]:
x
Out[121]:
In [122]:
y
Out[122]:
In [123]:
#元素级最大值
np.maximum(x, y)
Out[123]:
In [124]:
arr = randn(7) * 5
In [125]:
arr
Out[125]:
In [126]:
np.modf(arr)
Out[126]:
In [127]:
#利用数组进行数据处理
points = np.arange(-5,5,0.01) #1000个间隔相等的点
In [128]:
xs, ys = np.meshgrid(points, points)
In [129]:
ys
Out[129]:
In [130]:
xs
Out[130]:
In [131]:
import matplotlib.pyplot as plt
In [132]:
z = np.sqrt(xs ** 2 + ys ** 2)
In [133]:
z
Out[133]:
In [134]:
#根据网格对函数求值的结果
plt.imshow(z, cmap=plt.cm.gray)
plt.colorbar()
plt.title("Image plot of $\sqrt{x^2 + y^2}$ for a grid of values")
Out[134]:
In [135]:
#将条件逻辑表述为数组运算
xarr = np.array([1.1,1.2,1.3,1.4,1.5])
yarr = np.array([2.1,2.2,2.3,2.4,2.5])
cond = np.array([True,False,True,True,False])
In [136]:
#列表推导式写法
result = [(x if c else y)
for x,y,c in zip(xarr,yarr,cond)]
In [137]:
result
Out[137]:
In [138]:
#使用np的where函数,非常简洁
result = np.where(cond, xarr, yarr)
In [139]:
result
Out[139]:
In [140]:
arr = randn(4,4)
In [141]:
arr
Out[141]:
In [142]:
np.where(arr > 0, 2, -2)
Out[142]:
In [143]:
np.where(arr > 0, 2, arr) #只将正值设置为2
Out[143]:
In [144]:
#数学和统计方法
In [145]:
arr = np.random.randn(5,4) #正态分布的数据
arr
Out[145]:
In [146]:
arr.mean()
Out[146]:
In [147]:
np.mean(arr)
Out[147]:
In [148]:
arr.sum()
Out[148]:
In [149]:
arr.mean(axis=1)
Out[149]:
In [150]:
arr.sum(0)
Out[150]:
In [151]:
arr = np.array([[0,1,2], [3,4,5], [6,7,8]])
In [152]:
arr
Out[152]:
In [153]:
#所有元素的累计和
arr.cumsum(0)
Out[153]:
In [154]:
#所有元素的累计积
arr.cumprod(1)
Out[154]:
In [155]:
#用于布尔型数组的方法
In [156]:
arr = randn(100)
In [157]:
(arr > 0).sum() # 正值的数据
Out[157]:
In [158]:
bools = np.array([False, False, True, False])
In [159]:
bools.any()
Out[159]:
In [160]:
bools.all()
Out[160]:
In [161]:
arr = randn(8)
In [162]:
arr
Out[162]:
In [163]:
arr.sort()
In [164]:
arr
Out[164]:
In [165]:
arr = randn(5, 3)
In [166]:
arr
Out[166]:
In [167]:
arr.sort(1)
In [168]:
arr
Out[168]:
In [169]:
large_arr = randn(1000)
In [170]:
large_arr.sort()
In [171]:
large_arr[int(0.05 * len(large_arr))] #%5分位数
Out[171]:
In [172]:
#唯一化以及其他的集合逻辑
In [173]:
names= np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
In [174]:
#找出数组中的唯一值并返回已排序的结果
np.unique(names)
Out[174]:
In [175]:
ints = np.array([3,3,3,2,2,1,1,4,4])
In [176]:
np.unique(ints)
Out[176]:
In [177]:
sorted(set(names))
Out[177]:
In [178]:
values = np.array([6,0,0,3,2,5,6])
In [179]:
#用于测试一个数组中的值在另一个数组中的成员资格,返回一个布尔型数据
np.in1d(values, [2,3,6])
Out[179]:
In [180]:
#np.setxor1d(x,y) 集合的对称差,即存在于一个数组中但不同存在于两个数组中的元素
arr_x = np.array([1,4,5,7,9])
arr_y = np.array([1,3,6,7,9])
In [181]:
np.setxor1d(arr_x, arr_y)
Out[181]:
In [182]:
arr_x
Out[182]:
In [183]:
arr_y
Out[183]:
In [184]:
#np.intersect1d(x, y)计算x和y中的公共元素,并返回有序结果
arr_x_y_common = np.intersect1d(arr_x, arr_y)
In [185]:
arr_x_y_common
Out[185]:
In [186]:
#去掉arr_x中包含的arr_y中的元素
arr_final = np.setxor1d(arr_x, arr_x_y_common)
In [187]:
arr_final
Out[187]:
In [188]:
#用于数组的文件输入输出
In [189]:
#将数据已二进制格式保存到磁盘
In [190]:
arr = np.arange(10)
In [191]:
np.save('book_scripts/ch04/some_array', arr)
In [192]:
np.load('book_scripts/ch04/some_array.npy')
Out[192]:
In [193]:
np.savez('book_scripts/ch04/array_archive.npz', x=arr_x, y=arr_y)
In [194]:
arch = np.load('book_scripts/ch04/array_archive.npz')
In [195]:
arch['x']
Out[195]:
In [196]:
arch['y']
Out[196]:
In [197]:
#存取文本文件
In [198]:
#############################################
# 去掉new_arr_x中包含的new_arr_y中的元素,
# 如果new_arr_y中的元素不在new_arr_x中,则忽略
#np.savetxt('book_scripts/ch04/arr_x.txt', arr_x, fmt='%d')
#np.savetxt('book_scripts/ch04/arr_y.txt', arr_y, fmt='%d')
In [199]:
new_arr_x = np.loadtxt('book_scripts/ch04/arr_x.txt', dtype='int')
new_arr_x
Out[199]:
In [200]:
new_arr_y = np.loadtxt('book_scripts/ch04/arr_y.txt', dtype='int')
new_arr_y
Out[200]:
In [201]:
#np.intersect1d(x, y)计算x和y中的公共元素,并返回有序结果
new_arr_x_y_common = np.intersect1d(new_arr_x, new_arr_y)
new_arr_x_y_common
Out[201]:
In [202]:
#去掉new_arr_x中包含的new_arr_y中的元素
new_arr_final = np.setxor1d(new_arr_x, new_arr_x_y_common)
new_arr_final
Out[202]:
In [203]:
np.savetxt('book_scripts/ch04/arr_final.txt', new_arr_final, fmt='%d')
In [204]:
#线性代数
In [205]:
x = np.array([[1.,2.,3.], [4.,5.,6.]])
In [206]:
y = np.array([[6.,23.], [-1, 7], [8,9]])
In [207]:
x
Out[207]:
In [208]:
y
Out[208]:
In [209]:
x.dot(y)
Out[209]:
In [210]:
np.ones(3)
Out[210]:
In [211]:
np.dot(x, np.ones(3))
Out[211]:
In [212]:
from numpy.linalg import inv, qr
In [213]:
X = randn(5, 5)
In [214]:
X
Out[214]:
In [215]:
mat = X.T.dot(X)
In [216]:
mat
Out[216]:
In [217]:
inv(mat)
Out[217]:
In [218]:
mat.dot(inv(mat))
Out[218]:
In [219]:
q,r=qr(mat)
In [220]:
r
Out[220]:
In [221]:
#随机数生成 没看明白??
In [222]:
samples = np.random.normal(size=(4, 4))
In [223]:
samples
Out[223]:
In [224]:
from random import normalvariate
In [225]:
N = 1000000
In [226]:
%timeit samples = [normalvariate(0, 1) for _ in xrange(N)]
In [227]:
%timeit np.random.normal(size=N)
In [228]:
#范例:随机漫步
In [231]:
import random
position = 0
walk = [position]
steps = 1000
for i in xrange(steps):
step = 1 if random.randint(0, 1) else -1
position += step
walk.append(position)
In [232]:
walk[:10]
Out[232]:
In [233]:
nsteps = 1000
In [234]:
draws = np.random.randint(0, 2, size=nsteps)
In [235]:
steps = np.where(draws > 0, 1, -1)
In [236]:
walk = steps.cumsum()
In [237]:
walk.min()
Out[237]:
In [238]:
walk.max()
Out[238]:
In [239]:
(np.abs(walk) >= 10).argmax()
Out[239]:
In [240]:
nwalks = 5000
In [241]:
nsteps = 1000
In [242]:
draws = np.random.randint(0, 2, size=(nwalks, nsteps)) # 0或1
In [243]:
steps = np.where(draws > 0, 1, -1)
In [244]:
walks = steps.cumsum(1)
In [245]:
walks
Out[245]:
In [246]:
walks.max()
Out[246]:
In [248]:
walks.min()
Out[248]:
In [250]:
hits30 = (np.abs(walks) >= 30).any(1)
In [251]:
hits30
Out[251]:
In [252]:
hits30.sum() # 到达30或-30的数量
Out[252]:
In [253]:
crossing_times = (np.abs(walks[hits30]) >= 30).argmax(1)
In [254]:
crossing_times.mean()
Out[254]:
In [255]:
steps = np.random.normal(loc=0, scale=0.25, size=(nwalks, nsteps))
In [256]:
steps
Out[256]:
In [ ]: