In [18]:
from sklearn.feature_selection import SelectKBest
from scipy.stats import pearsonr
from sklearn.datasets import load_iris
iris=load_iris()
#选择K个最好的特征,返回选择特征后的数据
#第一个参数为计算评估特征是否好的函数,该函数输入特征矩阵和目标向量,输出二元组(评分,P值)的数组,数组第i项为第i个特征的评分和P值。在此定义为计算相关系数
#参数k为选择的特征个数
# 定义函数
def multivariate_pearsonr(X, y):
scores, pvalues = [], []
for ret in map(lambda x:pearsonr(x, y), X.T):
scores.append(abs(ret[0]))
pvalues.append(ret[1])
return (np.array(scores), np.array(pvalues))
transformer = SelectKBest(score_func=multivariate_pearsonr, k=2)
Xt_pearson = transformer.fit_transform(iris.data, iris.target)
print(Xt_pearson)
[[ 1.4 0.2]
[ 1.4 0.2]
[ 1.3 0.2]
[ 1.5 0.2]
[ 1.4 0.2]
[ 1.7 0.4]
[ 1.4 0.3]
[ 1.5 0.2]
[ 1.4 0.2]
[ 1.5 0.1]
[ 1.5 0.2]
[ 1.6 0.2]
[ 1.4 0.1]
[ 1.1 0.1]
[ 1.2 0.2]
[ 1.5 0.4]
[ 1.3 0.4]
[ 1.4 0.3]
[ 1.7 0.3]
[ 1.5 0.3]
[ 1.7 0.2]
[ 1.5 0.4]
[ 1. 0.2]
[ 1.7 0.5]
[ 1.9 0.2]
[ 1.6 0.2]
[ 1.6 0.4]
[ 1.5 0.2]
[ 1.4 0.2]
[ 1.6 0.2]
[ 1.6 0.2]
[ 1.5 0.4]
[ 1.5 0.1]
[ 1.4 0.2]
[ 1.5 0.1]
[ 1.2 0.2]
[ 1.3 0.2]
[ 1.5 0.1]
[ 1.3 0.2]
[ 1.5 0.2]
[ 1.3 0.3]
[ 1.3 0.3]
[ 1.3 0.2]
[ 1.6 0.6]
[ 1.9 0.4]
[ 1.4 0.3]
[ 1.6 0.2]
[ 1.4 0.2]
[ 1.5 0.2]
[ 1.4 0.2]
[ 4.7 1.4]
[ 4.5 1.5]
[ 4.9 1.5]
[ 4. 1.3]
[ 4.6 1.5]
[ 4.5 1.3]
[ 4.7 1.6]
[ 3.3 1. ]
[ 4.6 1.3]
[ 3.9 1.4]
[ 3.5 1. ]
[ 4.2 1.5]
[ 4. 1. ]
[ 4.7 1.4]
[ 3.6 1.3]
[ 4.4 1.4]
[ 4.5 1.5]
[ 4.1 1. ]
[ 4.5 1.5]
[ 3.9 1.1]
[ 4.8 1.8]
[ 4. 1.3]
[ 4.9 1.5]
[ 4.7 1.2]
[ 4.3 1.3]
[ 4.4 1.4]
[ 4.8 1.4]
[ 5. 1.7]
[ 4.5 1.5]
[ 3.5 1. ]
[ 3.8 1.1]
[ 3.7 1. ]
[ 3.9 1.2]
[ 5.1 1.6]
[ 4.5 1.5]
[ 4.5 1.6]
[ 4.7 1.5]
[ 4.4 1.3]
[ 4.1 1.3]
[ 4. 1.3]
[ 4.4 1.2]
[ 4.6 1.4]
[ 4. 1.2]
[ 3.3 1. ]
[ 4.2 1.3]
[ 4.2 1.2]
[ 4.2 1.3]
[ 4.3 1.3]
[ 3. 1.1]
[ 4.1 1.3]
[ 6. 2.5]
[ 5.1 1.9]
[ 5.9 2.1]
[ 5.6 1.8]
[ 5.8 2.2]
[ 6.6 2.1]
[ 4.5 1.7]
[ 6.3 1.8]
[ 5.8 1.8]
[ 6.1 2.5]
[ 5.1 2. ]
[ 5.3 1.9]
[ 5.5 2.1]
[ 5. 2. ]
[ 5.1 2.4]
[ 5.3 2.3]
[ 5.5 1.8]
[ 6.7 2.2]
[ 6.9 2.3]
[ 5. 1.5]
[ 5.7 2.3]
[ 4.9 2. ]
[ 6.7 2. ]
[ 4.9 1.8]
[ 5.7 2.1]
[ 6. 1.8]
[ 4.8 1.8]
[ 4.9 1.8]
[ 5.6 2.1]
[ 5.8 1.6]
[ 6.1 1.9]
[ 6.4 2. ]
[ 5.6 2.2]
[ 5.1 1.5]
[ 5.6 1.4]
[ 6.1 2.3]
[ 5.6 2.4]
[ 5.5 1.8]
[ 4.8 1.8]
[ 5.4 2.1]
[ 5.6 2.4]
[ 5.1 2.3]
[ 5.1 1.9]
[ 5.9 2.3]
[ 5.7 2.5]
[ 5.2 2.3]
[ 5. 1.9]
[ 5.2 2. ]
[ 5.4 2.3]
[ 5.1 1.8]]
In [19]:
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import mutual_info_classif
#选择K个最好的特征,返回选择特征后的数据
SelectKBest(mutual_info_classif, k=2).fit_transform(iris.data, iris.target)
Out[19]:
array([[ 1.4, 0.2],
[ 1.4, 0.2],
[ 1.3, 0.2],
[ 1.5, 0.2],
[ 1.4, 0.2],
[ 1.7, 0.4],
[ 1.4, 0.3],
[ 1.5, 0.2],
[ 1.4, 0.2],
[ 1.5, 0.1],
[ 1.5, 0.2],
[ 1.6, 0.2],
[ 1.4, 0.1],
[ 1.1, 0.1],
[ 1.2, 0.2],
[ 1.5, 0.4],
[ 1.3, 0.4],
[ 1.4, 0.3],
[ 1.7, 0.3],
[ 1.5, 0.3],
[ 1.7, 0.2],
[ 1.5, 0.4],
[ 1. , 0.2],
[ 1.7, 0.5],
[ 1.9, 0.2],
[ 1.6, 0.2],
[ 1.6, 0.4],
[ 1.5, 0.2],
[ 1.4, 0.2],
[ 1.6, 0.2],
[ 1.6, 0.2],
[ 1.5, 0.4],
[ 1.5, 0.1],
[ 1.4, 0.2],
[ 1.5, 0.1],
[ 1.2, 0.2],
[ 1.3, 0.2],
[ 1.5, 0.1],
[ 1.3, 0.2],
[ 1.5, 0.2],
[ 1.3, 0.3],
[ 1.3, 0.3],
[ 1.3, 0.2],
[ 1.6, 0.6],
[ 1.9, 0.4],
[ 1.4, 0.3],
[ 1.6, 0.2],
[ 1.4, 0.2],
[ 1.5, 0.2],
[ 1.4, 0.2],
[ 4.7, 1.4],
[ 4.5, 1.5],
[ 4.9, 1.5],
[ 4. , 1.3],
[ 4.6, 1.5],
[ 4.5, 1.3],
[ 4.7, 1.6],
[ 3.3, 1. ],
[ 4.6, 1.3],
[ 3.9, 1.4],
[ 3.5, 1. ],
[ 4.2, 1.5],
[ 4. , 1. ],
[ 4.7, 1.4],
[ 3.6, 1.3],
[ 4.4, 1.4],
[ 4.5, 1.5],
[ 4.1, 1. ],
[ 4.5, 1.5],
[ 3.9, 1.1],
[ 4.8, 1.8],
[ 4. , 1.3],
[ 4.9, 1.5],
[ 4.7, 1.2],
[ 4.3, 1.3],
[ 4.4, 1.4],
[ 4.8, 1.4],
[ 5. , 1.7],
[ 4.5, 1.5],
[ 3.5, 1. ],
[ 3.8, 1.1],
[ 3.7, 1. ],
[ 3.9, 1.2],
[ 5.1, 1.6],
[ 4.5, 1.5],
[ 4.5, 1.6],
[ 4.7, 1.5],
[ 4.4, 1.3],
[ 4.1, 1.3],
[ 4. , 1.3],
[ 4.4, 1.2],
[ 4.6, 1.4],
[ 4. , 1.2],
[ 3.3, 1. ],
[ 4.2, 1.3],
[ 4.2, 1.2],
[ 4.2, 1.3],
[ 4.3, 1.3],
[ 3. , 1.1],
[ 4.1, 1.3],
[ 6. , 2.5],
[ 5.1, 1.9],
[ 5.9, 2.1],
[ 5.6, 1.8],
[ 5.8, 2.2],
[ 6.6, 2.1],
[ 4.5, 1.7],
[ 6.3, 1.8],
[ 5.8, 1.8],
[ 6.1, 2.5],
[ 5.1, 2. ],
[ 5.3, 1.9],
[ 5.5, 2.1],
[ 5. , 2. ],
[ 5.1, 2.4],
[ 5.3, 2.3],
[ 5.5, 1.8],
[ 6.7, 2.2],
[ 6.9, 2.3],
[ 5. , 1.5],
[ 5.7, 2.3],
[ 4.9, 2. ],
[ 6.7, 2. ],
[ 4.9, 1.8],
[ 5.7, 2.1],
[ 6. , 1.8],
[ 4.8, 1.8],
[ 4.9, 1.8],
[ 5.6, 2.1],
[ 5.8, 1.6],
[ 6.1, 1.9],
[ 6.4, 2. ],
[ 5.6, 2.2],
[ 5.1, 1.5],
[ 5.6, 1.4],
[ 6.1, 2.3],
[ 5.6, 2.4],
[ 5.5, 1.8],
[ 4.8, 1.8],
[ 5.4, 2.1],
[ 5.6, 2.4],
[ 5.1, 2.3],
[ 5.1, 1.9],
[ 5.9, 2.3],
[ 5.7, 2.5],
[ 5.2, 2.3],
[ 5. , 1.9],
[ 5.2, 2. ],
[ 5.4, 2.3],
[ 5.1, 1.8]])
In [20]:
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
#递归特征消除法,返回特征选择后的数据
#参数estimator为基模型
#参数n_features_to_select为选择的特征个数
RFE(estimator=LogisticRegression(), n_features_to_select=2).fit_transform(iris.data,iris.target)
Out[20]:
array([[ 3.5, 0.2],
[ 3. , 0.2],
[ 3.2, 0.2],
[ 3.1, 0.2],
[ 3.6, 0.2],
[ 3.9, 0.4],
[ 3.4, 0.3],
[ 3.4, 0.2],
[ 2.9, 0.2],
[ 3.1, 0.1],
[ 3.7, 0.2],
[ 3.4, 0.2],
[ 3. , 0.1],
[ 3. , 0.1],
[ 4. , 0.2],
[ 4.4, 0.4],
[ 3.9, 0.4],
[ 3.5, 0.3],
[ 3.8, 0.3],
[ 3.8, 0.3],
[ 3.4, 0.2],
[ 3.7, 0.4],
[ 3.6, 0.2],
[ 3.3, 0.5],
[ 3.4, 0.2],
[ 3. , 0.2],
[ 3.4, 0.4],
[ 3.5, 0.2],
[ 3.4, 0.2],
[ 3.2, 0.2],
[ 3.1, 0.2],
[ 3.4, 0.4],
[ 4.1, 0.1],
[ 4.2, 0.2],
[ 3.1, 0.1],
[ 3.2, 0.2],
[ 3.5, 0.2],
[ 3.1, 0.1],
[ 3. , 0.2],
[ 3.4, 0.2],
[ 3.5, 0.3],
[ 2.3, 0.3],
[ 3.2, 0.2],
[ 3.5, 0.6],
[ 3.8, 0.4],
[ 3. , 0.3],
[ 3.8, 0.2],
[ 3.2, 0.2],
[ 3.7, 0.2],
[ 3.3, 0.2],
[ 3.2, 1.4],
[ 3.2, 1.5],
[ 3.1, 1.5],
[ 2.3, 1.3],
[ 2.8, 1.5],
[ 2.8, 1.3],
[ 3.3, 1.6],
[ 2.4, 1. ],
[ 2.9, 1.3],
[ 2.7, 1.4],
[ 2. , 1. ],
[ 3. , 1.5],
[ 2.2, 1. ],
[ 2.9, 1.4],
[ 2.9, 1.3],
[ 3.1, 1.4],
[ 3. , 1.5],
[ 2.7, 1. ],
[ 2.2, 1.5],
[ 2.5, 1.1],
[ 3.2, 1.8],
[ 2.8, 1.3],
[ 2.5, 1.5],
[ 2.8, 1.2],
[ 2.9, 1.3],
[ 3. , 1.4],
[ 2.8, 1.4],
[ 3. , 1.7],
[ 2.9, 1.5],
[ 2.6, 1. ],
[ 2.4, 1.1],
[ 2.4, 1. ],
[ 2.7, 1.2],
[ 2.7, 1.6],
[ 3. , 1.5],
[ 3.4, 1.6],
[ 3.1, 1.5],
[ 2.3, 1.3],
[ 3. , 1.3],
[ 2.5, 1.3],
[ 2.6, 1.2],
[ 3. , 1.4],
[ 2.6, 1.2],
[ 2.3, 1. ],
[ 2.7, 1.3],
[ 3. , 1.2],
[ 2.9, 1.3],
[ 2.9, 1.3],
[ 2.5, 1.1],
[ 2.8, 1.3],
[ 3.3, 2.5],
[ 2.7, 1.9],
[ 3. , 2.1],
[ 2.9, 1.8],
[ 3. , 2.2],
[ 3. , 2.1],
[ 2.5, 1.7],
[ 2.9, 1.8],
[ 2.5, 1.8],
[ 3.6, 2.5],
[ 3.2, 2. ],
[ 2.7, 1.9],
[ 3. , 2.1],
[ 2.5, 2. ],
[ 2.8, 2.4],
[ 3.2, 2.3],
[ 3. , 1.8],
[ 3.8, 2.2],
[ 2.6, 2.3],
[ 2.2, 1.5],
[ 3.2, 2.3],
[ 2.8, 2. ],
[ 2.8, 2. ],
[ 2.7, 1.8],
[ 3.3, 2.1],
[ 3.2, 1.8],
[ 2.8, 1.8],
[ 3. , 1.8],
[ 2.8, 2.1],
[ 3. , 1.6],
[ 2.8, 1.9],
[ 3.8, 2. ],
[ 2.8, 2.2],
[ 2.8, 1.5],
[ 2.6, 1.4],
[ 3. , 2.3],
[ 3.4, 2.4],
[ 3.1, 1.8],
[ 3. , 1.8],
[ 3.1, 2.1],
[ 3.1, 2.4],
[ 3.1, 2.3],
[ 2.7, 1.9],
[ 3.2, 2.3],
[ 3.3, 2.5],
[ 3. , 2.3],
[ 2.5, 1.9],
[ 3. , 2. ],
[ 3.4, 2.3],
[ 3. , 1.8]])
In [21]:
from sklearn.feature_selection import SelectFromModel
from sklearn.ensemble import GradientBoostingClassifier
#GBDT作为基模型的特征选择
SelectFromModel(GradientBoostingClassifier()).fit_transform(iris.data, iris.target)
Out[21]:
array([[ 1.4, 0.2],
[ 1.4, 0.2],
[ 1.3, 0.2],
[ 1.5, 0.2],
[ 1.4, 0.2],
[ 1.7, 0.4],
[ 1.4, 0.3],
[ 1.5, 0.2],
[ 1.4, 0.2],
[ 1.5, 0.1],
[ 1.5, 0.2],
[ 1.6, 0.2],
[ 1.4, 0.1],
[ 1.1, 0.1],
[ 1.2, 0.2],
[ 1.5, 0.4],
[ 1.3, 0.4],
[ 1.4, 0.3],
[ 1.7, 0.3],
[ 1.5, 0.3],
[ 1.7, 0.2],
[ 1.5, 0.4],
[ 1. , 0.2],
[ 1.7, 0.5],
[ 1.9, 0.2],
[ 1.6, 0.2],
[ 1.6, 0.4],
[ 1.5, 0.2],
[ 1.4, 0.2],
[ 1.6, 0.2],
[ 1.6, 0.2],
[ 1.5, 0.4],
[ 1.5, 0.1],
[ 1.4, 0.2],
[ 1.5, 0.1],
[ 1.2, 0.2],
[ 1.3, 0.2],
[ 1.5, 0.1],
[ 1.3, 0.2],
[ 1.5, 0.2],
[ 1.3, 0.3],
[ 1.3, 0.3],
[ 1.3, 0.2],
[ 1.6, 0.6],
[ 1.9, 0.4],
[ 1.4, 0.3],
[ 1.6, 0.2],
[ 1.4, 0.2],
[ 1.5, 0.2],
[ 1.4, 0.2],
[ 4.7, 1.4],
[ 4.5, 1.5],
[ 4.9, 1.5],
[ 4. , 1.3],
[ 4.6, 1.5],
[ 4.5, 1.3],
[ 4.7, 1.6],
[ 3.3, 1. ],
[ 4.6, 1.3],
[ 3.9, 1.4],
[ 3.5, 1. ],
[ 4.2, 1.5],
[ 4. , 1. ],
[ 4.7, 1.4],
[ 3.6, 1.3],
[ 4.4, 1.4],
[ 4.5, 1.5],
[ 4.1, 1. ],
[ 4.5, 1.5],
[ 3.9, 1.1],
[ 4.8, 1.8],
[ 4. , 1.3],
[ 4.9, 1.5],
[ 4.7, 1.2],
[ 4.3, 1.3],
[ 4.4, 1.4],
[ 4.8, 1.4],
[ 5. , 1.7],
[ 4.5, 1.5],
[ 3.5, 1. ],
[ 3.8, 1.1],
[ 3.7, 1. ],
[ 3.9, 1.2],
[ 5.1, 1.6],
[ 4.5, 1.5],
[ 4.5, 1.6],
[ 4.7, 1.5],
[ 4.4, 1.3],
[ 4.1, 1.3],
[ 4. , 1.3],
[ 4.4, 1.2],
[ 4.6, 1.4],
[ 4. , 1.2],
[ 3.3, 1. ],
[ 4.2, 1.3],
[ 4.2, 1.2],
[ 4.2, 1.3],
[ 4.3, 1.3],
[ 3. , 1.1],
[ 4.1, 1.3],
[ 6. , 2.5],
[ 5.1, 1.9],
[ 5.9, 2.1],
[ 5.6, 1.8],
[ 5.8, 2.2],
[ 6.6, 2.1],
[ 4.5, 1.7],
[ 6.3, 1.8],
[ 5.8, 1.8],
[ 6.1, 2.5],
[ 5.1, 2. ],
[ 5.3, 1.9],
[ 5.5, 2.1],
[ 5. , 2. ],
[ 5.1, 2.4],
[ 5.3, 2.3],
[ 5.5, 1.8],
[ 6.7, 2.2],
[ 6.9, 2.3],
[ 5. , 1.5],
[ 5.7, 2.3],
[ 4.9, 2. ],
[ 6.7, 2. ],
[ 4.9, 1.8],
[ 5.7, 2.1],
[ 6. , 1.8],
[ 4.8, 1.8],
[ 4.9, 1.8],
[ 5.6, 2.1],
[ 5.8, 1.6],
[ 6.1, 1.9],
[ 6.4, 2. ],
[ 5.6, 2.2],
[ 5.1, 1.5],
[ 5.6, 1.4],
[ 6.1, 2.3],
[ 5.6, 2.4],
[ 5.5, 1.8],
[ 4.8, 1.8],
[ 5.4, 2.1],
[ 5.6, 2.4],
[ 5.1, 2.3],
[ 5.1, 1.9],
[ 5.9, 2.3],
[ 5.7, 2.5],
[ 5.2, 2.3],
[ 5. , 1.9],
[ 5.2, 2. ],
[ 5.4, 2.3],
[ 5.1, 1.8]])
In [ ]:
Content source: jacksu/machine-learning
Similar notebooks: