In [33]:
import numpy as np
from collections import defaultdict
dataset_filename = "affinity_dataset.txt"
X = np.loadtxt(dataset_filename)
n_sample,n_features = X.shape
num_apple_purchases = 0
for sample in X:
if sample[3] == 1:
num_apple_purchases +=1
print ("%d prople bought apples " % num_apple_purchases)
#规则应验
valid_rules = defaultdict(int)
#规则无效
invalid_rules = defaultdict(int)
#条件相同的规则数量
num_occurances = defaultdict(int)
for sample in X:
for premise in range(4):
if sample[premise] == 0:continue
num_occurances[premise] +=1
for conclusion in range(4):
if premise == conclusion: continue
if sample[conclusion] == 1:
valid_rules[(premise, conclusion)] += 1
else:
invalid_rules[(premise, conclusion)] += 1
print (num_occurances)
#print (invalid_rules)
#print (valid_rules)
support = valid_rules
confidence = defaultdict(float)
for premise,conclusion in valid_rules.keys():
rule =(premise,conclusion)
confidence[rule] = '%.3f' % (float(support[rule])/float(num_occurances[premise]))
print (confidence)
In [34]:
# 标签集合
features = ["面包","牛奶","奶酪","苹果","香蕉"]
def print_rule(premise,conclusion,features):
premise_name = features[premise]
conclusion_name = features[conclusion]
print("Rule: If a person buys {0} they will also buy{1}".format(premise_name, conclusion_name))
print(" - Support: {0}".format(support[(premise,conclusion)]))
print(" - Confidence: %.3f" % (float(confidence[(premise,conclusion)])))
premise=1
conclusion=2
print_rule(premise,conclusion,features)
In [39]:
sorted_confidence = sorted(confidence.items(), key=lambda k:k[1],reverse=True)
for index in range(len(features)):
print ("rule #{0}".format(index+1))
premise,conclusion = sorted_confidence[index][0]
print_rule(premise,conclusion,features)
In [ ]: