In [4]:
from sklearn.datasets import load_iris
# 导入 IRIS 数据集
iris = load_iris()
Out[4]:
(150, 4)
In [7]:
# 特征矩阵
iris.data
# 目标向量
iris.target
print('特征 shape: ', iris.data.shape)
print('目标 shape: ', iris.target.shape)
特征 shape: (150, 4)
目标 shape: (150,)
通过特征提取,我们能得到未经处理的特征,这时的特征可能有以下问题:
我们使用sklearn中的preproccessing库来进行数据预处理,可以覆盖以上问题的解决方案。
In [8]:
iris.data
Out[8]:
array([[ 5.1, 3.5, 1.4, 0.2],
[ 4.9, 3. , 1.4, 0.2],
[ 4.7, 3.2, 1.3, 0.2],
[ 4.6, 3.1, 1.5, 0.2],
[ 5. , 3.6, 1.4, 0.2],
[ 5.4, 3.9, 1.7, 0.4],
[ 4.6, 3.4, 1.4, 0.3],
[ 5. , 3.4, 1.5, 0.2],
[ 4.4, 2.9, 1.4, 0.2],
[ 4.9, 3.1, 1.5, 0.1],
[ 5.4, 3.7, 1.5, 0.2],
[ 4.8, 3.4, 1.6, 0.2],
[ 4.8, 3. , 1.4, 0.1],
[ 4.3, 3. , 1.1, 0.1],
[ 5.8, 4. , 1.2, 0.2],
[ 5.7, 4.4, 1.5, 0.4],
[ 5.4, 3.9, 1.3, 0.4],
[ 5.1, 3.5, 1.4, 0.3],
[ 5.7, 3.8, 1.7, 0.3],
[ 5.1, 3.8, 1.5, 0.3],
[ 5.4, 3.4, 1.7, 0.2],
[ 5.1, 3.7, 1.5, 0.4],
[ 4.6, 3.6, 1. , 0.2],
[ 5.1, 3.3, 1.7, 0.5],
[ 4.8, 3.4, 1.9, 0.2],
[ 5. , 3. , 1.6, 0.2],
[ 5. , 3.4, 1.6, 0.4],
[ 5.2, 3.5, 1.5, 0.2],
[ 5.2, 3.4, 1.4, 0.2],
[ 4.7, 3.2, 1.6, 0.2],
[ 4.8, 3.1, 1.6, 0.2],
[ 5.4, 3.4, 1.5, 0.4],
[ 5.2, 4.1, 1.5, 0.1],
[ 5.5, 4.2, 1.4, 0.2],
[ 4.9, 3.1, 1.5, 0.1],
[ 5. , 3.2, 1.2, 0.2],
[ 5.5, 3.5, 1.3, 0.2],
[ 4.9, 3.1, 1.5, 0.1],
[ 4.4, 3. , 1.3, 0.2],
[ 5.1, 3.4, 1.5, 0.2],
[ 5. , 3.5, 1.3, 0.3],
[ 4.5, 2.3, 1.3, 0.3],
[ 4.4, 3.2, 1.3, 0.2],
[ 5. , 3.5, 1.6, 0.6],
[ 5.1, 3.8, 1.9, 0.4],
[ 4.8, 3. , 1.4, 0.3],
[ 5.1, 3.8, 1.6, 0.2],
[ 4.6, 3.2, 1.4, 0.2],
[ 5.3, 3.7, 1.5, 0.2],
[ 5. , 3.3, 1.4, 0.2],
[ 7. , 3.2, 4.7, 1.4],
[ 6.4, 3.2, 4.5, 1.5],
[ 6.9, 3.1, 4.9, 1.5],
[ 5.5, 2.3, 4. , 1.3],
[ 6.5, 2.8, 4.6, 1.5],
[ 5.7, 2.8, 4.5, 1.3],
[ 6.3, 3.3, 4.7, 1.6],
[ 4.9, 2.4, 3.3, 1. ],
[ 6.6, 2.9, 4.6, 1.3],
[ 5.2, 2.7, 3.9, 1.4],
[ 5. , 2. , 3.5, 1. ],
[ 5.9, 3. , 4.2, 1.5],
[ 6. , 2.2, 4. , 1. ],
[ 6.1, 2.9, 4.7, 1.4],
[ 5.6, 2.9, 3.6, 1.3],
[ 6.7, 3.1, 4.4, 1.4],
[ 5.6, 3. , 4.5, 1.5],
[ 5.8, 2.7, 4.1, 1. ],
[ 6.2, 2.2, 4.5, 1.5],
[ 5.6, 2.5, 3.9, 1.1],
[ 5.9, 3.2, 4.8, 1.8],
[ 6.1, 2.8, 4. , 1.3],
[ 6.3, 2.5, 4.9, 1.5],
[ 6.1, 2.8, 4.7, 1.2],
[ 6.4, 2.9, 4.3, 1.3],
[ 6.6, 3. , 4.4, 1.4],
[ 6.8, 2.8, 4.8, 1.4],
[ 6.7, 3. , 5. , 1.7],
[ 6. , 2.9, 4.5, 1.5],
[ 5.7, 2.6, 3.5, 1. ],
[ 5.5, 2.4, 3.8, 1.1],
[ 5.5, 2.4, 3.7, 1. ],
[ 5.8, 2.7, 3.9, 1.2],
[ 6. , 2.7, 5.1, 1.6],
[ 5.4, 3. , 4.5, 1.5],
[ 6. , 3.4, 4.5, 1.6],
[ 6.7, 3.1, 4.7, 1.5],
[ 6.3, 2.3, 4.4, 1.3],
[ 5.6, 3. , 4.1, 1.3],
[ 5.5, 2.5, 4. , 1.3],
[ 5.5, 2.6, 4.4, 1.2],
[ 6.1, 3. , 4.6, 1.4],
[ 5.8, 2.6, 4. , 1.2],
[ 5. , 2.3, 3.3, 1. ],
[ 5.6, 2.7, 4.2, 1.3],
[ 5.7, 3. , 4.2, 1.2],
[ 5.7, 2.9, 4.2, 1.3],
[ 6.2, 2.9, 4.3, 1.3],
[ 5.1, 2.5, 3. , 1.1],
[ 5.7, 2.8, 4.1, 1.3],
[ 6.3, 3.3, 6. , 2.5],
[ 5.8, 2.7, 5.1, 1.9],
[ 7.1, 3. , 5.9, 2.1],
[ 6.3, 2.9, 5.6, 1.8],
[ 6.5, 3. , 5.8, 2.2],
[ 7.6, 3. , 6.6, 2.1],
[ 4.9, 2.5, 4.5, 1.7],
[ 7.3, 2.9, 6.3, 1.8],
[ 6.7, 2.5, 5.8, 1.8],
[ 7.2, 3.6, 6.1, 2.5],
[ 6.5, 3.2, 5.1, 2. ],
[ 6.4, 2.7, 5.3, 1.9],
[ 6.8, 3. , 5.5, 2.1],
[ 5.7, 2.5, 5. , 2. ],
[ 5.8, 2.8, 5.1, 2.4],
[ 6.4, 3.2, 5.3, 2.3],
[ 6.5, 3. , 5.5, 1.8],
[ 7.7, 3.8, 6.7, 2.2],
[ 7.7, 2.6, 6.9, 2.3],
[ 6. , 2.2, 5. , 1.5],
[ 6.9, 3.2, 5.7, 2.3],
[ 5.6, 2.8, 4.9, 2. ],
[ 7.7, 2.8, 6.7, 2. ],
[ 6.3, 2.7, 4.9, 1.8],
[ 6.7, 3.3, 5.7, 2.1],
[ 7.2, 3.2, 6. , 1.8],
[ 6.2, 2.8, 4.8, 1.8],
[ 6.1, 3. , 4.9, 1.8],
[ 6.4, 2.8, 5.6, 2.1],
[ 7.2, 3. , 5.8, 1.6],
[ 7.4, 2.8, 6.1, 1.9],
[ 7.9, 3.8, 6.4, 2. ],
[ 6.4, 2.8, 5.6, 2.2],
[ 6.3, 2.8, 5.1, 1.5],
[ 6.1, 2.6, 5.6, 1.4],
[ 7.7, 3. , 6.1, 2.3],
[ 6.3, 3.4, 5.6, 2.4],
[ 6.4, 3.1, 5.5, 1.8],
[ 6. , 3. , 4.8, 1.8],
[ 6.9, 3.1, 5.4, 2.1],
[ 6.7, 3.1, 5.6, 2.4],
[ 6.9, 3.1, 5.1, 2.3],
[ 5.8, 2.7, 5.1, 1.9],
[ 6.8, 3.2, 5.9, 2.3],
[ 6.7, 3.3, 5.7, 2.5],
[ 6.7, 3. , 5.2, 2.3],
[ 6.3, 2.5, 5. , 1.9],
[ 6.5, 3. , 5.2, 2. ],
[ 6.2, 3.4, 5.4, 2.3],
[ 5.9, 3. , 5.1, 1.8]])
In [9]:
from sklearn.preprocessing import StandardScaler
# 标准化, 返回值为标准化后的数据
iris_standard = StandardScaler().fit_transform(iris.data)
In [11]:
from sklearn.preprocessing import MinMaxScaler
#区间缩放,返回值为缩放到[0, 1]区间的数据
iris_minmax = MinMaxScaler().fit_transform(iris.data)
In [13]:
from sklearn.preprocessing import Normalizer
#归一化,返回值为归一化后的数据
iris_norm = Normalizer().fit_transform(iris.data)
In [14]:
iris_norm
Out[14]:
array([[ 0.80377277, 0.55160877, 0.22064351, 0.0315205 ],
[ 0.82813287, 0.50702013, 0.23660939, 0.03380134],
[ 0.80533308, 0.54831188, 0.2227517 , 0.03426949],
[ 0.80003025, 0.53915082, 0.26087943, 0.03478392],
[ 0.790965 , 0.5694948 , 0.2214702 , 0.0316386 ],
[ 0.78417499, 0.5663486 , 0.2468699 , 0.05808704],
[ 0.78010936, 0.57660257, 0.23742459, 0.0508767 ],
[ 0.80218492, 0.54548574, 0.24065548, 0.0320874 ],
[ 0.80642366, 0.5315065 , 0.25658935, 0.03665562],
[ 0.81803119, 0.51752994, 0.25041771, 0.01669451],
[ 0.80373519, 0.55070744, 0.22325977, 0.02976797],
[ 0.786991 , 0.55745196, 0.26233033, 0.03279129],
[ 0.82307218, 0.51442011, 0.24006272, 0.01714734],
[ 0.8025126 , 0.55989251, 0.20529392, 0.01866308],
[ 0.81120865, 0.55945424, 0.16783627, 0.02797271],
[ 0.77381111, 0.59732787, 0.2036345 , 0.05430253],
[ 0.79428944, 0.57365349, 0.19121783, 0.05883625],
[ 0.80327412, 0.55126656, 0.22050662, 0.04725142],
[ 0.8068282 , 0.53788547, 0.24063297, 0.04246464],
[ 0.77964883, 0.58091482, 0.22930848, 0.0458617 ],
[ 0.8173379 , 0.51462016, 0.25731008, 0.03027177],
[ 0.78591858, 0.57017622, 0.23115252, 0.06164067],
[ 0.77577075, 0.60712493, 0.16864581, 0.03372916],
[ 0.80597792, 0.52151512, 0.26865931, 0.07901744],
[ 0.776114 , 0.54974742, 0.30721179, 0.03233808],
[ 0.82647451, 0.4958847 , 0.26447184, 0.03305898],
[ 0.79778206, 0.5424918 , 0.25529026, 0.06382256],
[ 0.80641965, 0.54278246, 0.23262105, 0.03101614],
[ 0.81609427, 0.5336001 , 0.21971769, 0.03138824],
[ 0.79524064, 0.54144043, 0.27072022, 0.03384003],
[ 0.80846584, 0.52213419, 0.26948861, 0.03368608],
[ 0.82225028, 0.51771314, 0.22840286, 0.06090743],
[ 0.76578311, 0.60379053, 0.22089897, 0.0147266 ],
[ 0.77867447, 0.59462414, 0.19820805, 0.02831544],
[ 0.81803119, 0.51752994, 0.25041771, 0.01669451],
[ 0.82512295, 0.52807869, 0.19802951, 0.03300492],
[ 0.82699754, 0.52627116, 0.19547215, 0.03007264],
[ 0.81803119, 0.51752994, 0.25041771, 0.01669451],
[ 0.80212413, 0.54690282, 0.23699122, 0.03646019],
[ 0.80779568, 0.53853046, 0.23758697, 0.03167826],
[ 0.80033301, 0.56023311, 0.20808658, 0.04801998],
[ 0.86093857, 0.44003527, 0.24871559, 0.0573959 ],
[ 0.78609038, 0.57170209, 0.23225397, 0.03573138],
[ 0.78889479, 0.55222635, 0.25244633, 0.09466737],
[ 0.76693897, 0.57144472, 0.28572236, 0.06015208],
[ 0.82210585, 0.51381615, 0.23978087, 0.05138162],
[ 0.77729093, 0.57915795, 0.24385598, 0.030482 ],
[ 0.79594782, 0.55370283, 0.24224499, 0.03460643],
[ 0.79837025, 0.55735281, 0.22595384, 0.03012718],
[ 0.81228363, 0.5361072 , 0.22743942, 0.03249135],
[ 0.76701103, 0.35063361, 0.51499312, 0.15340221],
[ 0.74549757, 0.37274878, 0.52417798, 0.17472599],
[ 0.75519285, 0.33928954, 0.53629637, 0.16417236],
[ 0.75384916, 0.31524601, 0.54825394, 0.17818253],
[ 0.7581754 , 0.32659863, 0.5365549 , 0.17496355],
[ 0.72232962, 0.35482858, 0.57026022, 0.16474184],
[ 0.72634846, 0.38046824, 0.54187901, 0.18446945],
[ 0.75916547, 0.37183615, 0.51127471, 0.15493173],
[ 0.76301853, 0.33526572, 0.53180079, 0.15029153],
[ 0.72460233, 0.37623583, 0.54345175, 0.19508524],
[ 0.76923077, 0.30769231, 0.53846154, 0.15384615],
[ 0.73923462, 0.37588201, 0.52623481, 0.187941 ],
[ 0.78892752, 0.28927343, 0.52595168, 0.13148792],
[ 0.73081412, 0.34743622, 0.56308629, 0.16772783],
[ 0.75911707, 0.3931142 , 0.48800383, 0.17622361],
[ 0.76945444, 0.35601624, 0.50531337, 0.16078153],
[ 0.70631892, 0.37838513, 0.5675777 , 0.18919257],
[ 0.75676497, 0.35228714, 0.53495455, 0.13047672],
[ 0.76444238, 0.27125375, 0.55483721, 0.18494574],
[ 0.76185188, 0.34011245, 0.53057542, 0.14964948],
[ 0.6985796 , 0.37889063, 0.56833595, 0.21312598],
[ 0.77011854, 0.35349703, 0.50499576, 0.16412362],
[ 0.74143307, 0.29421947, 0.57667016, 0.17653168],
[ 0.73659895, 0.33811099, 0.56754345, 0.14490471],
[ 0.76741698, 0.34773582, 0.51560829, 0.15588157],
[ 0.76785726, 0.34902603, 0.51190484, 0.16287881],
[ 0.76467269, 0.31486523, 0.53976896, 0.15743261],
[ 0.74088576, 0.33173989, 0.55289982, 0.18798594],
[ 0.73350949, 0.35452959, 0.55013212, 0.18337737],
[ 0.78667474, 0.35883409, 0.48304589, 0.13801311],
[ 0.76521855, 0.33391355, 0.52869645, 0.15304371],
[ 0.77242925, 0.33706004, 0.51963422, 0.14044168],
[ 0.76434981, 0.35581802, 0.51395936, 0.15814134],
[ 0.70779525, 0.31850786, 0.60162596, 0.1887454 ],
[ 0.69333409, 0.38518561, 0.57777841, 0.1925928 ],
[ 0.71524936, 0.40530797, 0.53643702, 0.19073316],
[ 0.75457341, 0.34913098, 0.52932761, 0.16893434],
[ 0.77530021, 0.28304611, 0.54147951, 0.15998258],
[ 0.72992443, 0.39103094, 0.53440896, 0.16944674],
[ 0.74714194, 0.33960997, 0.54337595, 0.17659719],
[ 0.72337118, 0.34195729, 0.57869695, 0.15782644],
[ 0.73260391, 0.36029701, 0.55245541, 0.1681386 ],
[ 0.76262994, 0.34186859, 0.52595168, 0.1577855 ],
[ 0.76986879, 0.35413965, 0.5081134 , 0.15397376],
[ 0.73544284, 0.35458851, 0.55158213, 0.1707278 ],
[ 0.73239618, 0.38547167, 0.53966034, 0.15418867],
[ 0.73446047, 0.37367287, 0.5411814 , 0.16750853],
[ 0.75728103, 0.3542121 , 0.52521104, 0.15878473],
[ 0.78258054, 0.38361791, 0.4603415 , 0.16879188],
[ 0.7431482 , 0.36505526, 0.5345452 , 0.16948994],
[ 0.65387747, 0.34250725, 0.62274045, 0.25947519],
[ 0.69052512, 0.32145135, 0.60718588, 0.22620651],
[ 0.71491405, 0.30207636, 0.59408351, 0.21145345],
[ 0.69276796, 0.31889319, 0.61579374, 0.1979337 ],
[ 0.68619022, 0.31670318, 0.61229281, 0.232249 ],
[ 0.70953708, 0.28008043, 0.61617694, 0.1960563 ],
[ 0.67054118, 0.34211284, 0.61580312, 0.23263673],
[ 0.71366557, 0.28351098, 0.61590317, 0.17597233],
[ 0.71414125, 0.26647062, 0.61821183, 0.19185884],
[ 0.69198788, 0.34599394, 0.58626751, 0.24027357],
[ 0.71562645, 0.3523084 , 0.56149152, 0.22019275],
[ 0.71576546, 0.30196356, 0.59274328, 0.21249287],
[ 0.71718148, 0.31640359, 0.58007326, 0.22148252],
[ 0.6925518 , 0.30375079, 0.60750157, 0.24300063],
[ 0.67767924, 0.32715549, 0.59589036, 0.28041899],
[ 0.69589887, 0.34794944, 0.57629125, 0.25008866],
[ 0.70610474, 0.3258945 , 0.59747324, 0.1955367 ],
[ 0.69299099, 0.34199555, 0.60299216, 0.19799743],
[ 0.70600618, 0.2383917 , 0.63265489, 0.21088496],
[ 0.72712585, 0.26661281, 0.60593821, 0.18178146],
[ 0.70558934, 0.32722984, 0.58287815, 0.23519645],
[ 0.68307923, 0.34153961, 0.59769433, 0.24395687],
[ 0.71486543, 0.25995106, 0.62202576, 0.18567933],
[ 0.73122464, 0.31338199, 0.56873028, 0.20892133],
[ 0.69595601, 0.3427843 , 0.59208198, 0.21813547],
[ 0.71529453, 0.31790868, 0.59607878, 0.17882363],
[ 0.72785195, 0.32870733, 0.56349829, 0.21131186],
[ 0.71171214, 0.35002236, 0.57170319, 0.21001342],
[ 0.69594002, 0.30447376, 0.60894751, 0.22835532],
[ 0.73089855, 0.30454106, 0.58877939, 0.1624219 ],
[ 0.72766159, 0.27533141, 0.59982915, 0.18683203],
[ 0.71578999, 0.34430405, 0.5798805 , 0.18121266],
[ 0.69417747, 0.30370264, 0.60740528, 0.2386235 ],
[ 0.72366005, 0.32162669, 0.58582004, 0.17230001],
[ 0.69385414, 0.29574111, 0.63698085, 0.15924521],
[ 0.73154399, 0.28501714, 0.57953485, 0.21851314],
[ 0.67017484, 0.36168166, 0.59571097, 0.2553047 ],
[ 0.69804799, 0.338117 , 0.59988499, 0.196326 ],
[ 0.71066905, 0.35533453, 0.56853524, 0.21320072],
[ 0.72415258, 0.32534391, 0.56672811, 0.22039426],
[ 0.69997037, 0.32386689, 0.58504986, 0.25073566],
[ 0.73337886, 0.32948905, 0.54206264, 0.24445962],
[ 0.69052512, 0.32145135, 0.60718588, 0.22620651],
[ 0.69193502, 0.32561648, 0.60035539, 0.23403685],
[ 0.68914871, 0.33943145, 0.58629069, 0.25714504],
[ 0.72155725, 0.32308533, 0.56001458, 0.24769876],
[ 0.72965359, 0.28954508, 0.57909015, 0.22005426],
[ 0.71653899, 0.3307103 , 0.57323119, 0.22047353],
[ 0.67467072, 0.36998072, 0.58761643, 0.25028107],
[ 0.69025916, 0.35097923, 0.5966647 , 0.21058754]])
In [ ]: