In [15]:
# -*- coding: utf-8 -*-
from matplotlib import pyplot as plt
import numpy as np
from sklearn.datasets import load_iris
data = load_iris()
# load_iris returns an object with several fields
features = data.data
feature_names = data.feature_names
target = data.target
target_names = data.target_names
for t in range(3):
if t == 0:
c = 'r'
marker = '>'
elif t == 1:
c = 'g'
marker = 'o'
elif t == 2:
c = 'b'
marker = 'x'
plt.scatter(
features[target == t, 0],
features[target == t, 1],
marker=marker,
c=c,
)
# ========== 第一个分类器模型
# use numpy indexing to get an array of strings
labels = target_names[target]
# petal length as pos 2
plength = features[:, 2]
is_setosa = (labels == 'setosa')
max_setosa = plength[is_setosa].max()
min_non_setosa = plength[~is_setosa].min()
print 'maximum of setosa: %s' % max_setosa
print 'minimum of others: %s' % min_non_setosa
In [ ]:
"""
virginica, versicolor
"""
In [16]:
features = features[~is_setosa]
labels = labels[~is_setosa]
is_virginica = (labels == 'virginica')
# 循环测试不同的值,找出准确度最高的作为阈值
best_acc = -1.0
for fi in range(features.shape[1]):
# 测试不同的值
thresh = features[:, fi]
for t in thresh:
feature_i = features[:, fi]
pred = (feature_i > t)
acc = (pred == is_virginica).mean()
rev_acc = (pred == ~is_virginica).mean()
if rev_acc > acc:
reverse = True
acc = rev_acc
else:
reverse = False
if acc > best_acc:
best_acc = acc
best_fi = fi
best_t = t
best_reverse = reverse
print 'best: ', best_acc
In [1]:
def is_virginica_test(fi, t, reverse, example):
'Apply threshold model to a new example'
test = example[fi] > t
if reverse:
test = not test
return test
In [ ]: