### Numpy

Numpy提供了一个genfromtxt函数可以从表格数据中创建数组，数据存放到Numpy数组中后，系统处理数据就轻松的多。

``````

In [1]:

# use StringIo
import numpy as np
from StringIO import StringIO
in_data = StringIO("10,20,30\n45,65,23\n33,54,62")

``````
``````

In [2]:

# 使用Numpy的genfromtxt来读取数据，并创建一个Numpy数组
data = np.genfromtxt(in_data,dtype=int,delimiter=",")
data

``````
``````

Out[2]:

array([[10, 20, 30],
[45, 65, 23],
[33, 54, 62]])

``````
``````

In [3]:

# clear some col that we don't use
in_data = StringIO("10,20,30\n45,65,23\n33,54,62")
data = np.genfromtxt(in_data,dtype=int,delimiter=",",usecols=(0,1))
data

``````
``````

Out[3]:

array([[10, 20],
[45, 65],
[33, 54]])

``````
``````

In [4]:

# set column name
in_data = StringIO("10,20,30\n45,65,23\n33,54,62")
data = np.genfromtxt(in_data,dtype=int,delimiter=',',names="a,b,c")
data

``````
``````

Out[4]:

array([(10, 20, 30), (45, 65, 23), (33, 54, 62)],
dtype=[('a', '<i4'), ('b', '<i4'), ('c', '<i4')])

``````

``````

In [5]:

in_data = StringIO("a,b,c\n10,20,30\n45,65,23\n33,54,62")
data = np.genfromtxt(in_data,dtype=int,delimiter=',',names=True)
data

``````
``````

Out[5]:

array([(10, 20, 30), (45, 65, 23), (33, 54, 62)],
dtype=[('a', '<i4'), ('b', '<i4'), ('c', '<i4')])

``````

### 对列进行预处理

``````

In [6]:

# 首先看一下不进行预处理的结果
# 30kg,inr2000,31.13,56.45,1
# 45kg,inr3000,34.34,346.2,2

in_data = StringIO('30kg,inr2000,31.13,56.45,1\n45kg,inr3000,34.34,346.2,2')
data = np.genfromtxt(in_data,delimiter=',')
data

``````
``````

Out[6]:

array([[    nan,     nan,   31.13,   56.45,    1.  ],
[    nan,     nan,   34.34,  346.2 ,    2.  ]])

``````

``````

In [7]:

import numpy as np
from StringIO import StringIO

# 定义一个数据集
in_data = StringIO('30kg,inr2000,31.13,56.45,1\n45kg,inr3000,34.34,346.2,2')

# 使用模板预处理
strip_func_1 = lambda x:float(x.rstrip('kg'))
strip_func_2 = lambda x:float(x.lstrip('inr'))

# 创建一个函数的字典
convert_funcs = {0:strip_func_1,1:strip_func_2}

# 将面板用到genfromtxt
data = np.genfromtxt(in_data,delimiter=',',converters=convert_funcs)
data

``````
``````

Out[7]:

array([[  3.00000000e+01,   2.00000000e+03,   3.11300000e+01,
5.64500000e+01,   1.00000000e+00],
[  4.50000000e+01,   3.00000000e+03,   3.43400000e+01,
3.46200000e+02,   2.00000000e+00]])

``````

``````

In [8]:

in_data = StringIO('10,20,30\n23,,34\n36,31,76')
miss_func = lambda x:float(x.strip() or -1)
data = np.genfromtxt(in_data,delimiter=',',converters={1:miss_func})
data

``````
``````

Out[8]:

array([[ 10.,  20.,  30.],
[ 23.,  -1.,  34.],
[ 36.,  31.,  76.]])

``````

Numpy 是一个用来高效的处理数组和矩阵的python库

``````

In [1]:

import numpy as np
# 创建数组
a_list = [1,2,3]
an_array = np.array(a_list)
print an_array
# 指定数据类型
an_array = np.array(a_list,dtype=float)
print an_array

``````
``````

[1 2 3]
[ 1.  2.  3.]

``````
``````

In [2]:

# 创建矩阵
a_listoflist = [[1,2,3],[5,6,7],[8,9,10]]
a_matrix = np.matrix(a_listoflist,dtype=float)
print a_matrix

``````
``````

[[  1.   2.   3.]
[  5.   6.   7.]
[  8.   9.  10.]]

``````
``````

In [3]:

# 矩阵a元素个数
print a_matrix.size

``````
``````

9

``````
``````

In [4]:

# 矩阵维度
print a_matrix.ndim

``````
``````

2

``````
``````

In [6]:

# 矩阵的行、列
print a_matrix.shape

``````
``````

(3, 3)

``````
``````

In [9]:

def display_shape(a):
print a
print 'Number of elements in a=%d'%(a.size)
print 'Number of dimensions in a = %d'%(a.ndim)
print 'Rows and cols in a',a.shape

``````

### numpy创建数组的方式

``````

In [10]:

# 使用np.arange来创建
created_array = np.arange(1,10,dtype=float)
display_shape(created_array)

``````
``````

[ 1.  2.  3.  4.  5.  6.  7.  8.  9.]
Number of elements in a=9
Number of dimensions in a = 1
Rows and cols in a (9,)

``````
``````

In [11]:

# 使用np.linspace来创建
# 使用linspace,我们知道在给定的范围里有多少个元素。默认情况下，返回50个元素
created_array = np.linspace(1,10)
display_shape(created_array)

``````
``````

[  1.           1.18367347   1.36734694   1.55102041   1.73469388
1.91836735   2.10204082   2.28571429   2.46938776   2.65306122
2.83673469   3.02040816   3.20408163   3.3877551    3.57142857
3.75510204   3.93877551   4.12244898   4.30612245   4.48979592
4.67346939   4.85714286   5.04081633   5.2244898    5.40816327
5.59183673   5.7755102    5.95918367   6.14285714   6.32653061
6.51020408   6.69387755   6.87755102   7.06122449   7.24489796
7.42857143   7.6122449    7.79591837   7.97959184   8.16326531
8.34693878   8.53061224   8.71428571   8.89795918   9.08163265
9.26530612   9.44897959   9.63265306   9.81632653  10.        ]
Number of elements in a=50
Number of dimensions in a = 1
Rows and cols in a (50,)

``````
``````

In [12]:

# 使用np.logspace来创建Numpy数组
created_array = np.logspace(1,10,base=10.0)
display_shape(created_array)

``````
``````

[  1.00000000e+01   1.52641797e+01   2.32995181e+01   3.55648031e+01
5.42867544e+01   8.28642773e+01   1.26485522e+02   1.93069773e+02
2.94705170e+02   4.49843267e+02   6.86648845e+02   1.04811313e+03
1.59985872e+03   2.44205309e+03   3.72759372e+03   5.68986603e+03
8.68511374e+03   1.32571137e+04   2.02358965e+04   3.08884360e+04
4.71486636e+04   7.19685673e+04   1.09854114e+05   1.67683294e+05
2.55954792e+05   3.90693994e+05   5.96362332e+05   9.10298178e+05
1.38949549e+06   2.12095089e+06   3.23745754e+06   4.94171336e+06
7.54312006e+06   1.15139540e+07   1.75751062e+07   2.68269580e+07
4.09491506e+07   6.25055193e+07   9.54095476e+07   1.45634848e+08
2.22299648e+08   3.39322177e+08   5.17947468e+08   7.90604321e+08
1.20679264e+09   1.84206997e+09   2.81176870e+09   4.29193426e+09
6.55128557e+09   1.00000000e+10]
Number of elements in a=50
Number of dimensions in a = 1
Rows and cols in a (50,)

``````
``````

In [13]:

# arange 指定步长
created_array = np.arange(1,10,2,dtype=int)
display_shape(created_array)

``````
``````

[1 3 5 7 9]
Number of elements in a=5
Number of dimensions in a = 1
Rows and cols in a (5,)

``````
``````

In [14]:

# 创建一个所以元素都是1的特殊矩阵
ones_matrix = np.ones((3,3))
display_shape(ones_matrix)

``````
``````

[[ 1.  1.  1.]
[ 1.  1.  1.]
[ 1.  1.  1.]]
Number of elements in a=9
Number of dimensions in a = 2
Rows and cols in a (3, 3)

``````
``````

In [15]:

# 创建一个所有元素都为0的矩阵
zeros_matrix = np.zeros((3,3))
display_shape(zeros_matrix)

``````
``````

[[ 0.  0.  0.]
[ 0.  0.  0.]
[ 0.  0.  0.]]
Number of elements in a=9
Number of dimensions in a = 2
Rows and cols in a (3, 3)

``````
``````

In [16]:

# 验证矩阵
identity_matrix = np.eye(N=3,M=3,k=0)
display_shape(identity_matrix)

``````
``````

[[ 1.  0.  0.]
[ 0.  1.  0.]
[ 0.  0.  1.]]
Number of elements in a=9
Number of dimensions in a = 2
Rows and cols in a (3, 3)

``````

``````

In [17]:

identity_matrix = np.eye(N=3,M=3,k=1)
display_shape(identity_matrix)

``````
``````

[[ 0.  1.  0.]
[ 0.  0.  1.]
[ 0.  0.  0.]]
Number of elements in a=9
Number of dimensions in a = 2
Rows and cols in a (3, 3)

``````

reshape函数可以控制数组的形态

``````

In [18]:

a_matrix = np.arange(9).reshape(3,3)
display_shape(a_matrix)

``````
``````

[[0 1 2]
[3 4 5]
[6 7 8]]
Number of elements in a=9
Number of dimensions in a = 2
Rows and cols in a (3, 3)

``````

raval和flatten函数可以将矩阵转化为一维数组

### 随机数

``````

In [19]:

# 产生1-100的10个随机整数
general_random_numbers = np.random.randint(1,100,size=10)
print general_random_numbers

``````
``````

[43 37 68 28 70 43 44 13 17 95]

``````
``````

In [20]:

# 使用normal包
uniform_rnd_numbers = np.random.normal(loc=0.2,scale=0.2,size=10)
print uniform_rnd_numbers

``````
``````

[ 0.31560127  0.38196501 -0.09393321  0.39450662  0.36126174 -0.02476259
0.13728167  0.19001759  0.24057992 -0.27137875]

``````
``````

In [21]:

uniform_rnd_numbers = np.random.normal(loc=0.2,scale=0.2,size=(3,3))
print uniform_rnd_numbers

``````
``````

[[ 0.33783552  0.21471375  0.60085734]
[ 0.10925539 -0.0676383   0.09488882]
[ 0.02735518  0.16188257  0.09601844]]

``````