In [1]:
%matplotlib inline
In [2]:
import numpy as np
In [4]:
import numpy as np
arr = np.arange(1000)
def get_sum(arr):
acc = 0
for item in arr:
acc += item
return acc
%timeit get_sum(arr)
In [5]:
%timeit sum(arr)
In [6]:
%timeit np.sum(arr)
In [11]:
# List Tuple
a = np.array([1, 2, 3, 4])
b = np.array((5, 6, 7, 8))
c = np.array([[1, 2, 3, 4],[4, 5, 6, 7], [7, 8, 9, 10]], dtype=int)
b
Out[11]:
In [12]:
c
Out[12]:
In [13]:
c.dtype
Out[13]:
In [14]:
# shape属性
print a.shape
print c.shape
In [15]:
a.itemsize
Out[15]:
In [16]:
a.ndim
Out[16]:
In [17]:
a.size
Out[17]:
In [18]:
a = np.random.random(4)
In [19]:
a.dtype
Out[19]:
In [20]:
print a
In [21]:
a.dtype = 'float32'
In [22]:
print a
In [23]:
c = a.astype(np.float32)
c
Out[23]:
In [24]:
c = a.astype(np.int32)
c
Out[24]:
In [25]:
# 开始值、终值和步长来创建一维数组
np.arange(0,1,0.1)
Out[25]:
In [28]:
np.linspace(0, 1, 12)
Out[28]:
In [29]:
np.logspace(0, 2, 20)
Out[29]:
In [30]:
np.empty([2, 2])
Out[30]:
In [31]:
np.empty([2, 2], dtype=int)
Out[31]:
In [32]:
a = np.array([[1., 2., 3.],[4.,5.,6.]])
print np.empty_like(a)
In [33]:
np.eye(2, dtype=int)
Out[33]:
In [34]:
np.identity(3)
Out[34]:
In [35]:
np.ones(5)
Out[35]:
In [36]:
np.ones((5,), dtype=np.int)
Out[36]:
In [37]:
np.ones((2, 1))
Out[37]:
In [38]:
np.ones((2,2))
Out[38]:
In [39]:
a = np.array([1.0,2.0,3.0])
b = np.array([2.0,2.0,2.0])
a * b
Out[39]:
In [40]:
## 当不相等时,则会采用规则对其:
a = np.array([1.0,2.0,3.0])
b = 2
a * b
Out[40]:
In [42]:
a = np.arange(0, 6).reshape(6, 1)
a.shape
a
Out[42]:
In [44]:
b = np.arange(0, 5)
b.shape
b
Out[44]:
In [45]:
c = a + b
In [46]:
print c
In [48]:
a = np.array([1, 2, 3, 4])
d = a.reshape((2,2))
d
Out[48]:
In [49]:
a = np.arange(6).reshape((3, 2))
a
Out[49]:
In [50]:
a=np.array([[0,1],[2,3]])
np.resize(a,(2,3))
Out[50]:
In [51]:
np.resize(a,(1,4))
Out[51]:
In [52]:
np.resize(a,(2,4))
Out[52]:
In [55]:
arr = np.arange(10)
arr[5]
Out[55]:
In [56]:
arr[5:8]
Out[56]:
In [57]:
arr_slice = arr[5:8]
arr_slice[1] = 12345
In [58]:
arr
Out[58]:
In [59]:
arr_slice[:] = 64
In [60]:
arr
Out[60]:
In [61]:
arr_copy = arr[5:8].copy()
In [62]:
arr2d = np.array([[1,2,3],[4,5,6],[7,8,9]])
In [63]:
arr2d[2]
Out[63]:
In [64]:
arr2d[0][2]
Out[64]:
In [65]:
arr2d[0,2]
Out[65]:
In [66]:
arr3d = np.array([[[1,2,3],[4,5,6]],[[7,8,9],[10,11,12]]])
arr3d
Out[66]:
In [67]:
arr3d.shape
Out[67]:
In [68]:
arr3d[0]
Out[68]:
In [69]:
old_values = arr3d[0].copy()
arr3d[0]= 42
arr3d
Out[69]:
In [70]:
arr2d
Out[70]:
In [71]:
arr2d[:2]
Out[71]:
In [72]:
arr2d[:2,1:]
Out[72]:
In [73]:
arr2d[1,:2]
Out[73]:
In [74]:
arr2d[2,:1]
Out[74]:
In [75]:
arr2d[:,:1]
Out[75]:
In [76]:
arr2d[:2,1:] = 0
arr2d
Out[76]:
In [77]:
names = np.array(['Bob','Joe','Will','Bob','Will','Joe','Joe'])
In [78]:
import numpy.random
data = numpy.random.randn(7,4)
data
Out[78]:
In [79]:
names
Out[79]:
In [80]:
names == 'Bob'
Out[80]:
In [81]:
data[names == 'Bob']
Out[81]:
In [82]:
x = np.linspace(0, 2*np.pi, 10)
x
Out[82]:
In [83]:
y = np.sin(x)
In [84]:
print y
In [85]:
t = np.sin(x,x)
In [86]:
id(t) == id(x)
Out[86]:
In [87]:
import time
import math
import numpy as np
x = [i * 0.001 for i in xrange(1000000)]
start = time.clock()
for i, t in enumerate(x):
x[i] = math.sin(t)
print "math.sin:", time.clock() - start
x = [i * 0.001 for i in xrange(1000000)]
x = np.array(x)
start = time.clock()
np.sin(x,x)
print "numpy.sin:", time.clock() - start
In [88]:
a = np.arange(0,4)
print a
b = np.arange(1,5)
print b
np.add(a,b)
Out[88]:
In [90]:
def triangle_wave(x, c, c0, hc):
x = x - int(x) # 三角波
if x >= c: r = 0.0
elif x < c0: r = x / c0 * hc
else: r = (c-x) / (c-c0) * hc
return r
In [91]:
x = np.linspace(0, 2, 1000)
y = np.array([triangle_wave(t, 0.6, 0.4, 1.0) for t in x])
In [92]:
triangle_ufunc = np.frompyfunc( lambda x: triangle_wave(x, 0.6, 0.4, 1.0), 1, 1)
y2 = triangle_ufunc(x)
In [93]:
data = np.array([
... [1,2,1],
... [0,3,1],
... [2,1,4],
... [1,3,1]])
In [94]:
data
Out[94]:
In [95]:
np.sum(data, axis=1)
Out[95]:
In [96]:
np.min(data, axis=0)
Out[96]:
In [97]:
np.average(data)
Out[97]:
In [98]:
data = np.random.randint(0, 5, [4,3,2,3])
In [99]:
data
Out[99]:
In [100]:
data.sum(axis=0)
Out[100]:
In [101]:
data = np.random.randint(0, 5, [3,2,3])
In [102]:
data
Out[102]:
In [103]:
np.sort(data) ## 默认对最大的axis进行排序,这里即是axis=2
Out[103]:
In [104]:
np.sort(data, axis=0)
Out[104]:
In [105]:
np.sort(data, axis=1)
Out[105]:
In [106]:
np.sort(data, axis=2)
Out[106]:
In [107]:
np.sort(data, axis=None)
Out[107]:
In [108]:
import numpy as np
from tempfile import TemporaryFile
origin_array = np.array([1, 2, 3, 4])
np.save('/tmp/array', origin_array)
array_from_file = np.load('/tmp/array.npy')
print array_from_file
In [109]:
import numpy as np
origin_array = np.array([1, 2, 3, 4])
np.savetxt('array.txt', origin_array)
array_from_file = np.loadtxt('array.txt')
print array_from_file
In [110]:
array = np.fromstring('1 2 3 4', dtype=float, sep=' ')
print array
In [111]:
# 悲剧了
array = np.array([1, 2, 3, 4], dtype=int)
print np.fromstring(array.tostring())
In [112]:
print np.fromstring(array.tostring(), dtype=int)
In [113]:
import pandas as pd
s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])
In [114]:
s
Out[114]:
In [115]:
s.index
Out[115]:
In [116]:
pd.Series(np.random.randn(5))
Out[116]:
In [ ]:
### for dict
In [118]:
d = {'a' : 0., 'b' : 1., 'c' : 2.}
In [119]:
pd.Series(d)
Out[119]:
In [120]:
pd.Series(d, index=['b', 'c', 'd', 'a'])
Out[120]:
In [ ]:
## 常量构造
In [121]:
pd.Series(5., index=['a', 'b', 'c', 'd', 'e'])
Out[121]:
In [122]:
s[0]
Out[122]:
In [123]:
s[:3]
Out[123]:
In [124]:
s[s > s.median()]
Out[124]:
In [125]:
s[[4, 3, 1]]
Out[125]:
In [126]:
np.exp(s)
Out[126]:
In [ ]:
### Index Label
In [127]:
s['a']
Out[127]:
In [128]:
s['e'] = 12.
In [129]:
s
Out[129]:
In [130]:
'e' in s
Out[130]:
In [131]:
't' in s
Out[131]:
In [ ]:
### 向量化操作
In [132]:
s + s
Out[132]:
In [133]:
s * 2
Out[133]:
In [134]:
np.exp(s)
Out[134]:
In [135]:
s[1:] + s[:-1]
Out[135]:
In [ ]:
## DataFrame
In [136]:
d = {'one' : pd.Series([1., 2., 3.], index=['a', 'b', 'c']),
'two' : pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}
In [137]:
df = pd.DataFrame(d)
In [138]:
df
Out[138]:
In [139]:
pd.DataFrame(d, index=['d', 'b', 'a'])
Out[139]:
In [140]:
pd.DataFrame(d, index=['d', 'b', 'a'], columns=['two', 'three'])
Out[140]:
In [141]:
### 字典构造
d = {'one' : [1., 2., 3., 4.],
'two' : [4., 3., 2., 1.]}
In [142]:
pd.DataFrame(d)
Out[142]:
In [143]:
pd.DataFrame(d, index=['a', 'b', 'c', 'd'])
Out[143]:
In [ ]:
## 结构化数据
In [144]:
data = np.zeros((2,), dtype=[('A', 'i4'),('B', 'f4'),('C', 'a10')])
In [145]:
data
Out[145]:
In [146]:
data[:] = [(1,2.,'Hello'), (2,3.,"World")]
In [147]:
data
Out[147]:
In [148]:
pd.DataFrame(data)
Out[148]:
In [149]:
pd.DataFrame(data, index=['first', 'second'])
Out[149]:
In [150]:
pd.DataFrame(data, columns=['C', 'A', 'B'])
Out[150]:
In [151]:
data2 = [{'a': 1, 'b': 2}, {'a': 5, 'b': 10, 'c': 20}]
In [152]:
pd.DataFrame(data2)
Out[152]:
In [153]:
pd.DataFrame(data2, index=['first', 'second'])
Out[153]:
In [154]:
pd.DataFrame(data2, columns=['a', 'b'])
Out[154]:
In [ ]:
#### records
In [155]:
pd.DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])],
orient='index', columns=['one', 'two', 'three'])
Out[155]:
In [156]:
df['one']
Out[156]:
In [157]:
df['three'] = df['one'] * df['two']
In [158]:
df['flag'] = df['one'] > 2
In [159]:
df
Out[159]:
In [160]:
del df['two']
In [161]:
three = df.pop('three')
In [162]:
df
Out[162]:
In [163]:
df['foo'] = 'bar'
In [164]:
df['one_trunc'] = df['one'][:2]
In [165]:
df
Out[165]:
In [166]:
df.insert(1, 'bar', df['one'])
In [167]:
df
Out[167]:
In [168]:
df.loc['b']
Out[168]:
In [169]:
df.iloc[2]
Out[169]:
In [170]:
df = pd.DataFrame(np.random.randn(5, 3), index=['a', 'c', 'e', 'f', 'h'],
columns=['one', 'two', 'three'])
In [171]:
df
Out[171]:
In [172]:
df['four'] = 'bar'
In [173]:
df['five'] = df['one'] > 0
In [174]:
df
Out[174]:
In [175]:
df2 = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])
In [176]:
df2
Out[176]:
In [177]:
df2['one']
Out[177]:
In [178]:
pd.isnull(df2['one'])
Out[178]:
In [179]:
df2['four'].notnull()
Out[179]:
In [180]:
df2.isnull()
Out[180]:
In [181]:
s = pd.Series([1, 2, 3])
s.loc[0] = None
In [182]:
s
Out[182]:
In [183]:
df2
Out[183]:
In [184]:
df2.fillna(0)
Out[184]:
In [185]:
df2['four'].fillna('missing')
Out[185]:
In [186]:
df.fillna(method='pad')
Out[186]:
In [187]:
df.fillna(method='pad', limit=1)
Out[187]:
In [188]:
df1=pd.DataFrame({'key':['a','b','b'],'data1':range(3)})
In [189]:
df2=pd.DataFrame({'key':['a','b','c'],'data2':range(3)})
In [190]:
df1
Out[190]:
In [191]:
df2
Out[191]:
In [192]:
df1.merge(df2)
Out[192]:
In [193]:
pd.merge(df2,df1,how='left')
Out[193]:
In [195]:
left=pd.DataFrame({'key1':['foo','foo','bar'],
'key2':['one','two','one'],
'lval':[1,2,3]})
right=pd.DataFrame({'key1':['foo','foo','bar','bar'],
'key2':['one','one','one','two'],
'lval':[4,5,6,7]})
pd.merge(left,right,on=['key1','key2'],how='outer')
Out[195]:
In [196]:
df3=pd.DataFrame({'key3':['foo','foo','bar','bar'], #将上面的right的key 改了名字 如果两个对象的列名不同,可以分别指定
'key4':['one','one','one','two'],
'lval':[4,5,6,7]})
In [197]:
pd.merge(left,df3,left_on='key1',right_on='key3')
Out[197]:
In [198]:
df1=pd.DataFrame(np.random.randn(3,4),columns=['a','b','c','d'])
In [199]:
df2=pd.DataFrame(np.random.randn(2,3),columns=['b','d','a'])
In [200]:
pd.concat([df1,df2])
Out[200]:
In [201]:
pd.concat([df1,df2],ignore_index=True)
Out[201]:
In [202]:
df = pd.DataFrame({'key1':['a','a','b','b','a'],
'key2':['one','two','one','two','one'],
'data1':np.random.randn(5),
'data2':np.random.randn(5)})
In [203]:
df
Out[203]:
In [204]:
grouped = df.groupby(df['key1'])
In [205]:
grouped
Out[205]:
In [206]:
grouped.mean()
Out[206]:
In [207]:
df['data1'].groupby(df['key1']).mean()
Out[207]:
In [208]:
df.groupby(df['key2'])['data2'].mean()
Out[208]:
In [209]:
df.groupby('key1')['data1','data2'].agg(lambda arr:arr.max()-arr.min())
Out[209]:
In [210]:
df.groupby('key1')['data1','data2'].agg(['min','max'])
Out[210]:
In [211]:
np.random.seed(2)
In [212]:
ser = pd.Series(np.arange(1, 10.1, .25)**2 + np.random.randn(37))
In [213]:
bad = np.array([4, 13, 14, 15, 16, 17, 18, 20, 29])
In [214]:
ser[bad] = np.nan
In [215]:
methods = ['linear', 'quadratic', 'cubic']
In [216]:
df = pd.DataFrame({m: ser.interpolate(method=m) for m in methods})
In [217]:
df.plot()
Out[217]:
In [218]:
import numpy as np
import matplotlib.pyplot as plt
In [219]:
X = np.linspace(-np.pi, np.pi, 256,endpoint=True)
C,S = np.cos(X), np.sin(X)
plt.plot(X,C)
plt.plot(X,S)
# plt.show()
Out[219]:
In [ ]:
plt.figure(figsize=(8,6), dpi=80)
# 创建一个新的 1 * 1 的子图,接下来的图样绘制在其中的第 1 块(也是唯一的一块)
plt.subplot(1,1,1)
X = np.linspace(-np.pi, np.pi, 256,endpoint=True)
C,S = np.cos(X), np.sin(X)
# 绘制余弦曲线,使用蓝色的、连续的、宽度为 1 (像素)的线条
plt.plot(X, C, color="blue", linewidth=1.0, linestyle="-")
# 绘制正弦曲线,使用绿色的、连续的、宽度为 1 (像素)的线条
plt.plot(X, S, color="green", linewidth=1.0, linestyle="-")
# 设置横轴的上下限
plt.xlim(-4.0,4.0)
# 设置横轴记号
plt.xticks(np.linspace(-4,4,9,endpoint=True))
# 设置纵轴的上下限
plt.ylim(-1.0,1.0)
# 设置纵轴记号
plt.yticks(np.linspace(-1,1,5,endpoint=True))
# 以分辨率 72 来保存图片
# savefig("exercice_2.png",dpi=72)
# 在屏幕上显示
# plt.show()
In [220]:
plt.figure(figsize=(10,6), dpi=80)
plt.plot(X, C, color="blue", linewidth=2.5, linestyle="-")
plt.plot(X, S, color="red", linewidth=2.5, linestyle="-")
Out[220]:
In [221]:
plt.xlim(X.min()*1.1, X.max()*1.1)
plt.ylim(C.min()*1.1, C.max()*1.1)
plt.plot(X, C, color="blue", linewidth=2.5, linestyle="-")
plt.plot(X, S, color="red", linewidth=2.5, linestyle="-")
Out[221]:
In [222]:
plt.xticks( [-np.pi, -np.pi/2, 0, np.pi/2, np.pi])
plt.yticks([-1, 0, +1])
plt.plot(X, C, color="blue", linewidth=2.5, linestyle="-")
plt.plot(X, S, color="red", linewidth=2.5, linestyle="-")
Out[222]:
In [223]:
plt.xticks([-np.pi, -np.pi/2, 0, np.pi/2, np.pi],
[r'$-\pi$', r'$-\pi/2$', r'$0$', r'$+\pi/2$', r'$+\pi$'])
plt.yticks([-1, 0, +1],
[r'$-1$', r'$0$', r'$+1$'])
plt.plot(X, C, color="blue", linewidth=2.5, linestyle="-")
plt.plot(X, S, color="red", linewidth=2.5, linestyle="-")
Out[223]:
In [224]:
ax = plt.gca()
ax.spines['right'].set_color('none')
ax.spines['top'].set_color('none')
ax.xaxis.set_ticks_position('bottom')
ax.spines['bottom'].set_position(('data',0))
ax.yaxis.set_ticks_position('left')
ax.spines['left'].set_position(('data',0))
plt.plot(X, C, color="blue", linewidth=2.5, linestyle="-")
plt.plot(X, S, color="red", linewidth=2.5, linestyle="-")
Out[224]:
In [225]:
plt.plot(X, C, color="blue", linewidth=2.5, linestyle="-", label="cosine")
plt.plot(X, S, color="red", linewidth=2.5, linestyle="-", label="sine")
plt.legend(loc='upper left')
Out[225]:
In [226]:
t = 2*np.pi/3
plt.plot([t,t],[0,np.cos(t)], color ='blue', linewidth=2.5, linestyle="--")
plt.scatter([t,],[np.cos(t),], 50, color ='blue')
plt.annotate(r'$\sin(\frac{2\pi}{3})=\frac{\sqrt{3}}{2}$',
xy=(t, np.sin(t)), xycoords='data',
xytext=(+10, +30), textcoords='offset points', fontsize=16,
arrowprops=dict(arrowstyle="->", connectionstyle="arc3,rad=.2"))
plt.plot([t,t],[0,np.sin(t)], color ='red', linewidth=2.5, linestyle="--")
plt.scatter([t,],[np.sin(t),], 50, color ='red')
plt.annotate(r'$\cos(\frac{2\pi}{3})=-\frac{1}{2}$',
xy=(t, np.cos(t)), xycoords='data',
xytext=(-90, -50), textcoords='offset points', fontsize=16,
arrowprops=dict(arrowstyle="->", connectionstyle="arc3,rad=.2"))
plt.plot(X, C, color="blue", linewidth=2.5, linestyle="-", label="cosine")
plt.plot(X, S, color="red", linewidth=2.5, linestyle="-", label="sine")
Out[226]:
In [227]:
plt.axes([.1,.1,1,1])
plt.axes([.2,.2,.3,.3],axisbg='green')
Out[227]:
In [228]:
plt.subplot(2,1,1,axisbg='y')
plt.subplot(2,1,2)
Out[228]:
In [229]:
for idx, color in enumerate('rgbyck'):
plt.subplot(3,2,1+idx,axisbg=color)
In [ ]:
In [230]:
n = 1024
X = np.random.normal(0,1,n)
Y = np.random.normal(0,1,n)
T = np.arctan2(Y,X)
plt.scatter(X,Y,s=75,c=T,alpha=.5)
Out[230]:
In [231]:
n = 12
X = np.arange(n)
Y1 = (1-X/float(n)) * np.random.uniform(0.5,1.0,n)
Y2 = (1-X/float(n)) * np.random.uniform(0.5,1.0,n)
plt.bar(X, +Y1, facecolor='#9999ff', edgecolor='white')
plt.bar(X, -Y2, facecolor='#ff9999', edgecolor='white')
for x,y in zip(X,Y1):
plt.text(x+0.4, y+0.05, '%.2f' % y, ha='center', va= 'bottom')
plt.ylim(-1.25,+1.25)
Out[231]:
In [232]:
def f(x,y): return (1-x/2+x**5+y**3)*np.exp(-x**2-y**2)
n = 256
x = np.linspace(-3,3,n)
y = np.linspace(-3,3,n)
X,Y = np.meshgrid(x,y)
plt.contourf(X, Y, f(X,Y), 8, alpha=.75, cmap='jet')
C = plt.contour(X, Y, f(X,Y), 8, colors='black', linewidth=.5)
In [ ]: