In [15]:
# -*- coding: utf-8 -*-
from matplotlib import pyplot as plt
import numpy as np
from sklearn.datasets import load_iris

data = load_iris()
# load_iris returns an object with several fields
features = data.data
feature_names = data.feature_names
target = data.target
target_names = data.target_names

for t in range(3):
    if t == 0:
        c = 'r'
        marker = '>'
    elif t == 1:
        c = 'g'
        marker = 'o'
    elif t == 2:
        c = 'b'
        marker = 'x'
    plt.scatter(
        features[target == t, 0],
        features[target == t, 1],
        marker=marker,
        c=c,
    )
# ========== 第一个分类器模型
# use numpy indexing to get an array of strings
labels = target_names[target]
# petal length as pos 2
plength = features[:, 2]
is_setosa = (labels == 'setosa')
max_setosa = plength[is_setosa].max()
min_non_setosa = plength[~is_setosa].min()
print 'maximum of setosa: %s' % max_setosa
print 'minimum of others: %s' % min_non_setosa


maximum of setosa: 1.9
minimum of others: 3.0

In [ ]:
"""
virginica, versicolor
"""

In [16]:
features = features[~is_setosa]
labels = labels[~is_setosa]
is_virginica = (labels == 'virginica')
# 循环测试不同的值,找出准确度最高的作为阈值
best_acc = -1.0
for fi in range(features.shape[1]):
    # 测试不同的值
    thresh = features[:, fi]
    for t in thresh:
        feature_i = features[:, fi]
        pred = (feature_i > t)
        acc = (pred == is_virginica).mean()
        rev_acc = (pred == ~is_virginica).mean()
        if rev_acc > acc:
            reverse = True
            acc = rev_acc
        else:
            reverse = False
        if acc > best_acc:
            best_acc = acc
            best_fi = fi
            best_t = t
            best_reverse = reverse

print 'best: ', best_acc


best:  0.94

In [1]:
def is_virginica_test(fi, t, reverse, example):
    'Apply threshold model to a new example'
    test = example[fi] > t
    if reverse:
        test = not test
    return test

In [ ]: