In [18]:
from sklearn.feature_selection import SelectKBest
from scipy.stats import pearsonr
from sklearn.datasets import load_iris

iris=load_iris()
#选择K个最好的特征,返回选择特征后的数据

#第一个参数为计算评估特征是否好的函数,该函数输入特征矩阵和目标向量,输出二元组(评分,P值)的数组,数组第i项为第i个特征的评分和P值。在此定义为计算相关系数
#参数k为选择的特征个数
# 定义函数
def multivariate_pearsonr(X, y):
    scores, pvalues = [], []
    for ret in map(lambda x:pearsonr(x, y), X.T):
        scores.append(abs(ret[0]))
        pvalues.append(ret[1])
    return (np.array(scores), np.array(pvalues))

transformer = SelectKBest(score_func=multivariate_pearsonr, k=2)
Xt_pearson = transformer.fit_transform(iris.data, iris.target)
print(Xt_pearson)


[[ 1.4  0.2]
 [ 1.4  0.2]
 [ 1.3  0.2]
 [ 1.5  0.2]
 [ 1.4  0.2]
 [ 1.7  0.4]
 [ 1.4  0.3]
 [ 1.5  0.2]
 [ 1.4  0.2]
 [ 1.5  0.1]
 [ 1.5  0.2]
 [ 1.6  0.2]
 [ 1.4  0.1]
 [ 1.1  0.1]
 [ 1.2  0.2]
 [ 1.5  0.4]
 [ 1.3  0.4]
 [ 1.4  0.3]
 [ 1.7  0.3]
 [ 1.5  0.3]
 [ 1.7  0.2]
 [ 1.5  0.4]
 [ 1.   0.2]
 [ 1.7  0.5]
 [ 1.9  0.2]
 [ 1.6  0.2]
 [ 1.6  0.4]
 [ 1.5  0.2]
 [ 1.4  0.2]
 [ 1.6  0.2]
 [ 1.6  0.2]
 [ 1.5  0.4]
 [ 1.5  0.1]
 [ 1.4  0.2]
 [ 1.5  0.1]
 [ 1.2  0.2]
 [ 1.3  0.2]
 [ 1.5  0.1]
 [ 1.3  0.2]
 [ 1.5  0.2]
 [ 1.3  0.3]
 [ 1.3  0.3]
 [ 1.3  0.2]
 [ 1.6  0.6]
 [ 1.9  0.4]
 [ 1.4  0.3]
 [ 1.6  0.2]
 [ 1.4  0.2]
 [ 1.5  0.2]
 [ 1.4  0.2]
 [ 4.7  1.4]
 [ 4.5  1.5]
 [ 4.9  1.5]
 [ 4.   1.3]
 [ 4.6  1.5]
 [ 4.5  1.3]
 [ 4.7  1.6]
 [ 3.3  1. ]
 [ 4.6  1.3]
 [ 3.9  1.4]
 [ 3.5  1. ]
 [ 4.2  1.5]
 [ 4.   1. ]
 [ 4.7  1.4]
 [ 3.6  1.3]
 [ 4.4  1.4]
 [ 4.5  1.5]
 [ 4.1  1. ]
 [ 4.5  1.5]
 [ 3.9  1.1]
 [ 4.8  1.8]
 [ 4.   1.3]
 [ 4.9  1.5]
 [ 4.7  1.2]
 [ 4.3  1.3]
 [ 4.4  1.4]
 [ 4.8  1.4]
 [ 5.   1.7]
 [ 4.5  1.5]
 [ 3.5  1. ]
 [ 3.8  1.1]
 [ 3.7  1. ]
 [ 3.9  1.2]
 [ 5.1  1.6]
 [ 4.5  1.5]
 [ 4.5  1.6]
 [ 4.7  1.5]
 [ 4.4  1.3]
 [ 4.1  1.3]
 [ 4.   1.3]
 [ 4.4  1.2]
 [ 4.6  1.4]
 [ 4.   1.2]
 [ 3.3  1. ]
 [ 4.2  1.3]
 [ 4.2  1.2]
 [ 4.2  1.3]
 [ 4.3  1.3]
 [ 3.   1.1]
 [ 4.1  1.3]
 [ 6.   2.5]
 [ 5.1  1.9]
 [ 5.9  2.1]
 [ 5.6  1.8]
 [ 5.8  2.2]
 [ 6.6  2.1]
 [ 4.5  1.7]
 [ 6.3  1.8]
 [ 5.8  1.8]
 [ 6.1  2.5]
 [ 5.1  2. ]
 [ 5.3  1.9]
 [ 5.5  2.1]
 [ 5.   2. ]
 [ 5.1  2.4]
 [ 5.3  2.3]
 [ 5.5  1.8]
 [ 6.7  2.2]
 [ 6.9  2.3]
 [ 5.   1.5]
 [ 5.7  2.3]
 [ 4.9  2. ]
 [ 6.7  2. ]
 [ 4.9  1.8]
 [ 5.7  2.1]
 [ 6.   1.8]
 [ 4.8  1.8]
 [ 4.9  1.8]
 [ 5.6  2.1]
 [ 5.8  1.6]
 [ 6.1  1.9]
 [ 6.4  2. ]
 [ 5.6  2.2]
 [ 5.1  1.5]
 [ 5.6  1.4]
 [ 6.1  2.3]
 [ 5.6  2.4]
 [ 5.5  1.8]
 [ 4.8  1.8]
 [ 5.4  2.1]
 [ 5.6  2.4]
 [ 5.1  2.3]
 [ 5.1  1.9]
 [ 5.9  2.3]
 [ 5.7  2.5]
 [ 5.2  2.3]
 [ 5.   1.9]
 [ 5.2  2. ]
 [ 5.4  2.3]
 [ 5.1  1.8]]

In [19]:
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import mutual_info_classif

#选择K个最好的特征,返回选择特征后的数据
SelectKBest(mutual_info_classif, k=2).fit_transform(iris.data, iris.target)


Out[19]:
array([[ 1.4,  0.2],
       [ 1.4,  0.2],
       [ 1.3,  0.2],
       [ 1.5,  0.2],
       [ 1.4,  0.2],
       [ 1.7,  0.4],
       [ 1.4,  0.3],
       [ 1.5,  0.2],
       [ 1.4,  0.2],
       [ 1.5,  0.1],
       [ 1.5,  0.2],
       [ 1.6,  0.2],
       [ 1.4,  0.1],
       [ 1.1,  0.1],
       [ 1.2,  0.2],
       [ 1.5,  0.4],
       [ 1.3,  0.4],
       [ 1.4,  0.3],
       [ 1.7,  0.3],
       [ 1.5,  0.3],
       [ 1.7,  0.2],
       [ 1.5,  0.4],
       [ 1. ,  0.2],
       [ 1.7,  0.5],
       [ 1.9,  0.2],
       [ 1.6,  0.2],
       [ 1.6,  0.4],
       [ 1.5,  0.2],
       [ 1.4,  0.2],
       [ 1.6,  0.2],
       [ 1.6,  0.2],
       [ 1.5,  0.4],
       [ 1.5,  0.1],
       [ 1.4,  0.2],
       [ 1.5,  0.1],
       [ 1.2,  0.2],
       [ 1.3,  0.2],
       [ 1.5,  0.1],
       [ 1.3,  0.2],
       [ 1.5,  0.2],
       [ 1.3,  0.3],
       [ 1.3,  0.3],
       [ 1.3,  0.2],
       [ 1.6,  0.6],
       [ 1.9,  0.4],
       [ 1.4,  0.3],
       [ 1.6,  0.2],
       [ 1.4,  0.2],
       [ 1.5,  0.2],
       [ 1.4,  0.2],
       [ 4.7,  1.4],
       [ 4.5,  1.5],
       [ 4.9,  1.5],
       [ 4. ,  1.3],
       [ 4.6,  1.5],
       [ 4.5,  1.3],
       [ 4.7,  1.6],
       [ 3.3,  1. ],
       [ 4.6,  1.3],
       [ 3.9,  1.4],
       [ 3.5,  1. ],
       [ 4.2,  1.5],
       [ 4. ,  1. ],
       [ 4.7,  1.4],
       [ 3.6,  1.3],
       [ 4.4,  1.4],
       [ 4.5,  1.5],
       [ 4.1,  1. ],
       [ 4.5,  1.5],
       [ 3.9,  1.1],
       [ 4.8,  1.8],
       [ 4. ,  1.3],
       [ 4.9,  1.5],
       [ 4.7,  1.2],
       [ 4.3,  1.3],
       [ 4.4,  1.4],
       [ 4.8,  1.4],
       [ 5. ,  1.7],
       [ 4.5,  1.5],
       [ 3.5,  1. ],
       [ 3.8,  1.1],
       [ 3.7,  1. ],
       [ 3.9,  1.2],
       [ 5.1,  1.6],
       [ 4.5,  1.5],
       [ 4.5,  1.6],
       [ 4.7,  1.5],
       [ 4.4,  1.3],
       [ 4.1,  1.3],
       [ 4. ,  1.3],
       [ 4.4,  1.2],
       [ 4.6,  1.4],
       [ 4. ,  1.2],
       [ 3.3,  1. ],
       [ 4.2,  1.3],
       [ 4.2,  1.2],
       [ 4.2,  1.3],
       [ 4.3,  1.3],
       [ 3. ,  1.1],
       [ 4.1,  1.3],
       [ 6. ,  2.5],
       [ 5.1,  1.9],
       [ 5.9,  2.1],
       [ 5.6,  1.8],
       [ 5.8,  2.2],
       [ 6.6,  2.1],
       [ 4.5,  1.7],
       [ 6.3,  1.8],
       [ 5.8,  1.8],
       [ 6.1,  2.5],
       [ 5.1,  2. ],
       [ 5.3,  1.9],
       [ 5.5,  2.1],
       [ 5. ,  2. ],
       [ 5.1,  2.4],
       [ 5.3,  2.3],
       [ 5.5,  1.8],
       [ 6.7,  2.2],
       [ 6.9,  2.3],
       [ 5. ,  1.5],
       [ 5.7,  2.3],
       [ 4.9,  2. ],
       [ 6.7,  2. ],
       [ 4.9,  1.8],
       [ 5.7,  2.1],
       [ 6. ,  1.8],
       [ 4.8,  1.8],
       [ 4.9,  1.8],
       [ 5.6,  2.1],
       [ 5.8,  1.6],
       [ 6.1,  1.9],
       [ 6.4,  2. ],
       [ 5.6,  2.2],
       [ 5.1,  1.5],
       [ 5.6,  1.4],
       [ 6.1,  2.3],
       [ 5.6,  2.4],
       [ 5.5,  1.8],
       [ 4.8,  1.8],
       [ 5.4,  2.1],
       [ 5.6,  2.4],
       [ 5.1,  2.3],
       [ 5.1,  1.9],
       [ 5.9,  2.3],
       [ 5.7,  2.5],
       [ 5.2,  2.3],
       [ 5. ,  1.9],
       [ 5.2,  2. ],
       [ 5.4,  2.3],
       [ 5.1,  1.8]])

In [20]:
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression

#递归特征消除法,返回特征选择后的数据
#参数estimator为基模型
#参数n_features_to_select为选择的特征个数
RFE(estimator=LogisticRegression(), n_features_to_select=2).fit_transform(iris.data,iris.target)


Out[20]:
array([[ 3.5,  0.2],
       [ 3. ,  0.2],
       [ 3.2,  0.2],
       [ 3.1,  0.2],
       [ 3.6,  0.2],
       [ 3.9,  0.4],
       [ 3.4,  0.3],
       [ 3.4,  0.2],
       [ 2.9,  0.2],
       [ 3.1,  0.1],
       [ 3.7,  0.2],
       [ 3.4,  0.2],
       [ 3. ,  0.1],
       [ 3. ,  0.1],
       [ 4. ,  0.2],
       [ 4.4,  0.4],
       [ 3.9,  0.4],
       [ 3.5,  0.3],
       [ 3.8,  0.3],
       [ 3.8,  0.3],
       [ 3.4,  0.2],
       [ 3.7,  0.4],
       [ 3.6,  0.2],
       [ 3.3,  0.5],
       [ 3.4,  0.2],
       [ 3. ,  0.2],
       [ 3.4,  0.4],
       [ 3.5,  0.2],
       [ 3.4,  0.2],
       [ 3.2,  0.2],
       [ 3.1,  0.2],
       [ 3.4,  0.4],
       [ 4.1,  0.1],
       [ 4.2,  0.2],
       [ 3.1,  0.1],
       [ 3.2,  0.2],
       [ 3.5,  0.2],
       [ 3.1,  0.1],
       [ 3. ,  0.2],
       [ 3.4,  0.2],
       [ 3.5,  0.3],
       [ 2.3,  0.3],
       [ 3.2,  0.2],
       [ 3.5,  0.6],
       [ 3.8,  0.4],
       [ 3. ,  0.3],
       [ 3.8,  0.2],
       [ 3.2,  0.2],
       [ 3.7,  0.2],
       [ 3.3,  0.2],
       [ 3.2,  1.4],
       [ 3.2,  1.5],
       [ 3.1,  1.5],
       [ 2.3,  1.3],
       [ 2.8,  1.5],
       [ 2.8,  1.3],
       [ 3.3,  1.6],
       [ 2.4,  1. ],
       [ 2.9,  1.3],
       [ 2.7,  1.4],
       [ 2. ,  1. ],
       [ 3. ,  1.5],
       [ 2.2,  1. ],
       [ 2.9,  1.4],
       [ 2.9,  1.3],
       [ 3.1,  1.4],
       [ 3. ,  1.5],
       [ 2.7,  1. ],
       [ 2.2,  1.5],
       [ 2.5,  1.1],
       [ 3.2,  1.8],
       [ 2.8,  1.3],
       [ 2.5,  1.5],
       [ 2.8,  1.2],
       [ 2.9,  1.3],
       [ 3. ,  1.4],
       [ 2.8,  1.4],
       [ 3. ,  1.7],
       [ 2.9,  1.5],
       [ 2.6,  1. ],
       [ 2.4,  1.1],
       [ 2.4,  1. ],
       [ 2.7,  1.2],
       [ 2.7,  1.6],
       [ 3. ,  1.5],
       [ 3.4,  1.6],
       [ 3.1,  1.5],
       [ 2.3,  1.3],
       [ 3. ,  1.3],
       [ 2.5,  1.3],
       [ 2.6,  1.2],
       [ 3. ,  1.4],
       [ 2.6,  1.2],
       [ 2.3,  1. ],
       [ 2.7,  1.3],
       [ 3. ,  1.2],
       [ 2.9,  1.3],
       [ 2.9,  1.3],
       [ 2.5,  1.1],
       [ 2.8,  1.3],
       [ 3.3,  2.5],
       [ 2.7,  1.9],
       [ 3. ,  2.1],
       [ 2.9,  1.8],
       [ 3. ,  2.2],
       [ 3. ,  2.1],
       [ 2.5,  1.7],
       [ 2.9,  1.8],
       [ 2.5,  1.8],
       [ 3.6,  2.5],
       [ 3.2,  2. ],
       [ 2.7,  1.9],
       [ 3. ,  2.1],
       [ 2.5,  2. ],
       [ 2.8,  2.4],
       [ 3.2,  2.3],
       [ 3. ,  1.8],
       [ 3.8,  2.2],
       [ 2.6,  2.3],
       [ 2.2,  1.5],
       [ 3.2,  2.3],
       [ 2.8,  2. ],
       [ 2.8,  2. ],
       [ 2.7,  1.8],
       [ 3.3,  2.1],
       [ 3.2,  1.8],
       [ 2.8,  1.8],
       [ 3. ,  1.8],
       [ 2.8,  2.1],
       [ 3. ,  1.6],
       [ 2.8,  1.9],
       [ 3.8,  2. ],
       [ 2.8,  2.2],
       [ 2.8,  1.5],
       [ 2.6,  1.4],
       [ 3. ,  2.3],
       [ 3.4,  2.4],
       [ 3.1,  1.8],
       [ 3. ,  1.8],
       [ 3.1,  2.1],
       [ 3.1,  2.4],
       [ 3.1,  2.3],
       [ 2.7,  1.9],
       [ 3.2,  2.3],
       [ 3.3,  2.5],
       [ 3. ,  2.3],
       [ 2.5,  1.9],
       [ 3. ,  2. ],
       [ 3.4,  2.3],
       [ 3. ,  1.8]])

In [21]:
from sklearn.feature_selection import SelectFromModel
from sklearn.ensemble import GradientBoostingClassifier

#GBDT作为基模型的特征选择
SelectFromModel(GradientBoostingClassifier()).fit_transform(iris.data, iris.target)


Out[21]:
array([[ 1.4,  0.2],
       [ 1.4,  0.2],
       [ 1.3,  0.2],
       [ 1.5,  0.2],
       [ 1.4,  0.2],
       [ 1.7,  0.4],
       [ 1.4,  0.3],
       [ 1.5,  0.2],
       [ 1.4,  0.2],
       [ 1.5,  0.1],
       [ 1.5,  0.2],
       [ 1.6,  0.2],
       [ 1.4,  0.1],
       [ 1.1,  0.1],
       [ 1.2,  0.2],
       [ 1.5,  0.4],
       [ 1.3,  0.4],
       [ 1.4,  0.3],
       [ 1.7,  0.3],
       [ 1.5,  0.3],
       [ 1.7,  0.2],
       [ 1.5,  0.4],
       [ 1. ,  0.2],
       [ 1.7,  0.5],
       [ 1.9,  0.2],
       [ 1.6,  0.2],
       [ 1.6,  0.4],
       [ 1.5,  0.2],
       [ 1.4,  0.2],
       [ 1.6,  0.2],
       [ 1.6,  0.2],
       [ 1.5,  0.4],
       [ 1.5,  0.1],
       [ 1.4,  0.2],
       [ 1.5,  0.1],
       [ 1.2,  0.2],
       [ 1.3,  0.2],
       [ 1.5,  0.1],
       [ 1.3,  0.2],
       [ 1.5,  0.2],
       [ 1.3,  0.3],
       [ 1.3,  0.3],
       [ 1.3,  0.2],
       [ 1.6,  0.6],
       [ 1.9,  0.4],
       [ 1.4,  0.3],
       [ 1.6,  0.2],
       [ 1.4,  0.2],
       [ 1.5,  0.2],
       [ 1.4,  0.2],
       [ 4.7,  1.4],
       [ 4.5,  1.5],
       [ 4.9,  1.5],
       [ 4. ,  1.3],
       [ 4.6,  1.5],
       [ 4.5,  1.3],
       [ 4.7,  1.6],
       [ 3.3,  1. ],
       [ 4.6,  1.3],
       [ 3.9,  1.4],
       [ 3.5,  1. ],
       [ 4.2,  1.5],
       [ 4. ,  1. ],
       [ 4.7,  1.4],
       [ 3.6,  1.3],
       [ 4.4,  1.4],
       [ 4.5,  1.5],
       [ 4.1,  1. ],
       [ 4.5,  1.5],
       [ 3.9,  1.1],
       [ 4.8,  1.8],
       [ 4. ,  1.3],
       [ 4.9,  1.5],
       [ 4.7,  1.2],
       [ 4.3,  1.3],
       [ 4.4,  1.4],
       [ 4.8,  1.4],
       [ 5. ,  1.7],
       [ 4.5,  1.5],
       [ 3.5,  1. ],
       [ 3.8,  1.1],
       [ 3.7,  1. ],
       [ 3.9,  1.2],
       [ 5.1,  1.6],
       [ 4.5,  1.5],
       [ 4.5,  1.6],
       [ 4.7,  1.5],
       [ 4.4,  1.3],
       [ 4.1,  1.3],
       [ 4. ,  1.3],
       [ 4.4,  1.2],
       [ 4.6,  1.4],
       [ 4. ,  1.2],
       [ 3.3,  1. ],
       [ 4.2,  1.3],
       [ 4.2,  1.2],
       [ 4.2,  1.3],
       [ 4.3,  1.3],
       [ 3. ,  1.1],
       [ 4.1,  1.3],
       [ 6. ,  2.5],
       [ 5.1,  1.9],
       [ 5.9,  2.1],
       [ 5.6,  1.8],
       [ 5.8,  2.2],
       [ 6.6,  2.1],
       [ 4.5,  1.7],
       [ 6.3,  1.8],
       [ 5.8,  1.8],
       [ 6.1,  2.5],
       [ 5.1,  2. ],
       [ 5.3,  1.9],
       [ 5.5,  2.1],
       [ 5. ,  2. ],
       [ 5.1,  2.4],
       [ 5.3,  2.3],
       [ 5.5,  1.8],
       [ 6.7,  2.2],
       [ 6.9,  2.3],
       [ 5. ,  1.5],
       [ 5.7,  2.3],
       [ 4.9,  2. ],
       [ 6.7,  2. ],
       [ 4.9,  1.8],
       [ 5.7,  2.1],
       [ 6. ,  1.8],
       [ 4.8,  1.8],
       [ 4.9,  1.8],
       [ 5.6,  2.1],
       [ 5.8,  1.6],
       [ 6.1,  1.9],
       [ 6.4,  2. ],
       [ 5.6,  2.2],
       [ 5.1,  1.5],
       [ 5.6,  1.4],
       [ 6.1,  2.3],
       [ 5.6,  2.4],
       [ 5.5,  1.8],
       [ 4.8,  1.8],
       [ 5.4,  2.1],
       [ 5.6,  2.4],
       [ 5.1,  2.3],
       [ 5.1,  1.9],
       [ 5.9,  2.3],
       [ 5.7,  2.5],
       [ 5.2,  2.3],
       [ 5. ,  1.9],
       [ 5.2,  2. ],
       [ 5.4,  2.3],
       [ 5.1,  1.8]])

In [ ]: