In [18]:
from sklearn.datasets import load_iris
import numpy as np
dataset = load_iris()
X = dataset.data
Y = dataset.target
#print(dataset.DESCR)
attribute_means = X.mean(axis=0)
print(attribute_means)
In [16]:
X_d = np.array(X >= attribute_means, dtype='int')
print (X_d[0:5])
In [23]:
from collections import defaultdict
from operator import itemgetter
#下面创建函数声明,参数分别是数据集、类别数组、选好的特征索引值、特征值。
def train_feature_value(X, y_true, feature_index, value):
class_counts = defaultdict(int)
for sample, y in zip(X, y_true):
if sample[feature_index] == value:
class_counts[y] += 1
sorted_class_counts = sorted(class_counts.items(), key=lambda k:k[1], reverse=True)
most_frequent_class = sorted_class_counts[0][0]
print (most_frequent_class0
incorrect_predictions = [class_count for class_value, class_countin class_counts.items()
if class_value != most_frequent_class]
error = sum(incorrect_predictions)
return most_frequent_class, error
In [ ]: