In [18]:
from sklearn.datasets import load_iris
import numpy as np
dataset = load_iris()

X = dataset.data
Y = dataset.target

#print(dataset.DESCR)

attribute_means = X.mean(axis=0)

print(attribute_means)


[ 5.84333333  3.054       3.75866667  1.19866667]

In [16]:
X_d = np.array(X >= attribute_means, dtype='int')
print (X_d[0:5])


[[0 1 0 0]
 [0 0 0 0]
 [0 1 0 0]
 [0 1 0 0]
 [0 1 0 0]]

In [23]:
from collections import defaultdict
from operator import itemgetter

#下面创建函数声明,参数分别是数据集、类别数组、选好的特征索引值、特征值。
def train_feature_value(X, y_true, feature_index, value):
    class_counts = defaultdict(int)
    for sample, y in zip(X, y_true):
        if sample[feature_index] == value:
            class_counts[y] += 1
    sorted_class_counts = sorted(class_counts.items(), key=lambda k:k[1], reverse=True)
    most_frequent_class = sorted_class_counts[0][0]
    print (most_frequent_class0
    incorrect_predictions = [class_count for class_value, class_countin class_counts.items()
                             if class_value != most_frequent_class]
    error = sum(incorrect_predictions)
    return most_frequent_class, error


  File "<ipython-input-23-0c140216535c>", line 13
    incorrect_predictions = [class_count for class_value, class_countin class_counts.items()
                        ^
SyntaxError: invalid syntax

In [ ]: