In [19]:
# Generating Association Rules from Frequent Itemsets
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
In [22]:
df = pd.read_csv('https://raw.githubusercontent.com/fclesio/learning-space/master/Datasets/01%20-%20Association%20Rules/Crimes.csv')
dataset = df.values
In [24]:
# Transform the dataframe in an Array to be called in the ML APIs
te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
# We get the columns of the dataset and the array of object
df = pd.DataFrame(te_ary, columns=te.columns_)
# Now we apply the Apriori algorithm using a support of 60% and using the
# column names
frequent_itemsets = apriori(df, min_support=0.5, use_colnames=True)
frequent_itemsets
Out[24]:
In [25]:
from mlxtend.frequent_patterns import association_rules
association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)
Out[25]:
In [26]:
# Rule Generation and Selection Criteria
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.2)
rules
Out[26]:
In [27]:
rules["antecedent_len"] = rules["antecedents"].apply(lambda x: len(x))
rules
Out[27]:
In [28]:
rules[ (rules['antecedent_len'] >= 2) &
(rules['confidence'] > 0.75) &
(rules['lift'] > 1.2) ]
Out[28]:
In [29]:
rules[rules['antecedents'] == {'Eggs', 'Kidney Beans'}]
Out[29]:
In [30]:
# Frequent Itemsets with Incomplete Antecedent and Consequent Information
import pandas as pd
dict = {'itemsets': [['177', '176'], ['177', '179'],
['176', '178'], ['176', '179'],
['93', '100'], ['177', '178'],
['177', '176', '178']],
'support':[0.253623, 0.253623, 0.217391,
0.217391, 0.181159, 0.108696, 0.108696]}
freq_itemsets = pd.DataFrame(dict)
freq_itemsets
from mlxtend.frequent_patterns import association_rules
In [31]:
res = association_rules(freq_itemsets, support_only=True, min_threshold=0.1)
res
Out[31]:
In [32]:
res = res[['antecedents', 'consequents', 'support']]
res
Out[32]: