In [19]:
# Generating Association Rules from Frequent Itemsets
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori

In [22]:
df = pd.read_csv('https://raw.githubusercontent.com/fclesio/learning-space/master/Datasets/01%20-%20Association%20Rules/Crimes.csv')

dataset = df.values

In [24]:
# Transform the dataframe in an Array to be called in the ML APIs 
te = TransactionEncoder()

te_ary = te.fit(dataset).transform(dataset)

# We get the columns of the dataset and the array of object
df = pd.DataFrame(te_ary, columns=te.columns_)

# Now we apply the Apriori algorithm using a support of 60% and using the
# column names
frequent_itemsets = apriori(df, min_support=0.5, use_colnames=True)
frequent_itemsets


Out[24]:
support itemsets
0 0.539744 (Altissimo)
1 0.929487 (Nao)
2 1.000000 (Sim)
3 0.853846 (Zona_Sul)
4 0.500000 (Nao, Altissimo)
5 0.539744 (Altissimo, Sim)
6 0.929487 (Nao, Sim)
7 0.787179 (Zona_Sul, Nao)
8 0.853846 (Zona_Sul, Sim)
9 0.500000 (Nao, Altissimo, Sim)
10 0.787179 (Zona_Sul, Nao, Sim)

In [25]:
from mlxtend.frequent_patterns import association_rules

association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)


Out[25]:
antecedents consequents antecedent support consequent support support confidence lift leverage conviction
0 (Altissimo) (Nao) 0.539744 0.929487 0.500000 0.926366 0.996642 -0.001685 0.957610
1 (Altissimo) (Sim) 0.539744 1.000000 0.539744 1.000000 1.000000 0.000000 inf
2 (Nao) (Sim) 0.929487 1.000000 0.929487 1.000000 1.000000 0.000000 inf
3 (Sim) (Nao) 1.000000 0.929487 0.929487 0.929487 1.000000 0.000000 1.000000
4 (Zona_Sul) (Nao) 0.853846 0.929487 0.787179 0.921922 0.991861 -0.006460 0.903107
5 (Nao) (Zona_Sul) 0.929487 0.853846 0.787179 0.846897 0.991861 -0.006460 0.954608
6 (Zona_Sul) (Sim) 0.853846 1.000000 0.853846 1.000000 1.000000 0.000000 inf
7 (Sim) (Zona_Sul) 1.000000 0.853846 0.853846 0.853846 1.000000 0.000000 1.000000
8 (Nao, Altissimo) (Sim) 0.500000 1.000000 0.500000 1.000000 1.000000 0.000000 inf
9 (Altissimo, Sim) (Nao) 0.539744 0.929487 0.500000 0.926366 0.996642 -0.001685 0.957610
10 (Altissimo) (Nao, Sim) 0.539744 0.929487 0.500000 0.926366 0.996642 -0.001685 0.957610
11 (Zona_Sul, Nao) (Sim) 0.787179 1.000000 0.787179 1.000000 1.000000 0.000000 inf
12 (Zona_Sul, Sim) (Nao) 0.853846 0.929487 0.787179 0.921922 0.991861 -0.006460 0.903107
13 (Nao, Sim) (Zona_Sul) 0.929487 0.853846 0.787179 0.846897 0.991861 -0.006460 0.954608
14 (Zona_Sul) (Nao, Sim) 0.853846 0.929487 0.787179 0.921922 0.991861 -0.006460 0.903107
15 (Nao) (Zona_Sul, Sim) 0.929487 0.853846 0.787179 0.846897 0.991861 -0.006460 0.954608
16 (Sim) (Zona_Sul, Nao) 1.000000 0.787179 0.787179 0.787179 1.000000 0.000000 1.000000

In [26]:
# Rule Generation and Selection Criteria
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.2)
rules


Out[26]:
antecedents consequents antecedent support consequent support support confidence lift leverage conviction

In [27]:
rules["antecedent_len"] = rules["antecedents"].apply(lambda x: len(x))
rules


Out[27]:
antecedents consequents antecedent support consequent support support confidence lift leverage conviction antecedent_len

In [28]:
rules[ (rules['antecedent_len'] >= 2) &
       (rules['confidence'] > 0.75) &
       (rules['lift'] > 1.2) ]


Out[28]:
antecedents consequents antecedent support consequent support support confidence lift leverage conviction antecedent_len

In [29]:
rules[rules['antecedents'] == {'Eggs', 'Kidney Beans'}]


Out[29]:
antecedents consequents antecedent support consequent support support confidence lift leverage conviction antecedent_len

In [30]:
# Frequent Itemsets with Incomplete Antecedent and Consequent Information
import pandas as pd

dict = {'itemsets': [['177', '176'], ['177', '179'],
                     ['176', '178'], ['176', '179'],
                     ['93', '100'], ['177', '178'],
                     ['177', '176', '178']],
        'support':[0.253623, 0.253623, 0.217391,
                   0.217391, 0.181159, 0.108696, 0.108696]}

freq_itemsets = pd.DataFrame(dict)
freq_itemsets

from mlxtend.frequent_patterns import association_rules

In [31]:
res = association_rules(freq_itemsets, support_only=True, min_threshold=0.1)
res


Out[31]:
antecedents consequents antecedent support consequent support support confidence lift leverage conviction
0 (176) (177) NaN NaN 0.253623 NaN NaN NaN NaN
1 (177) (176) NaN NaN 0.253623 NaN NaN NaN NaN
2 (179) (177) NaN NaN 0.253623 NaN NaN NaN NaN
3 (177) (179) NaN NaN 0.253623 NaN NaN NaN NaN
4 (176) (178) NaN NaN 0.217391 NaN NaN NaN NaN
5 (178) (176) NaN NaN 0.217391 NaN NaN NaN NaN
6 (176) (179) NaN NaN 0.217391 NaN NaN NaN NaN
7 (179) (176) NaN NaN 0.217391 NaN NaN NaN NaN
8 (100) (93) NaN NaN 0.181159 NaN NaN NaN NaN
9 (93) (100) NaN NaN 0.181159 NaN NaN NaN NaN
10 (177) (178) NaN NaN 0.108696 NaN NaN NaN NaN
11 (178) (177) NaN NaN 0.108696 NaN NaN NaN NaN
12 (176, 177) (178) NaN NaN 0.108696 NaN NaN NaN NaN
13 (176, 178) (177) NaN NaN 0.108696 NaN NaN NaN NaN
14 (177, 178) (176) NaN NaN 0.108696 NaN NaN NaN NaN
15 (176) (177, 178) NaN NaN 0.108696 NaN NaN NaN NaN
16 (177) (176, 178) NaN NaN 0.108696 NaN NaN NaN NaN
17 (178) (176, 177) NaN NaN 0.108696 NaN NaN NaN NaN

In [32]:
res = res[['antecedents', 'consequents', 'support']]
res


Out[32]:
antecedents consequents support
0 (176) (177) 0.253623
1 (177) (176) 0.253623
2 (179) (177) 0.253623
3 (177) (179) 0.253623
4 (176) (178) 0.217391
5 (178) (176) 0.217391
6 (176) (179) 0.217391
7 (179) (176) 0.217391
8 (100) (93) 0.181159
9 (93) (100) 0.181159
10 (177) (178) 0.108696
11 (178) (177) 0.108696
12 (176, 177) (178) 0.108696
13 (176, 178) (177) 0.108696
14 (177, 178) (176) 0.108696
15 (176) (177, 178) 0.108696
16 (177) (176, 178) 0.108696
17 (178) (176, 177) 0.108696