notebook.community

Edit and run



In [19]:

    
# Generating Association Rules from Frequent Itemsets
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori



In [22]:

    
df = pd.read_csv('https://raw.githubusercontent.com/fclesio/learning-space/master/Datasets/01%20-%20Association%20Rules/Crimes.csv')

dataset = df.values



In [24]:

    
# Transform the dataframe in an Array to be called in the ML APIs 
te = TransactionEncoder()

te_ary = te.fit(dataset).transform(dataset)

# We get the columns of the dataset and the array of object
df = pd.DataFrame(te_ary, columns=te.columns_)

# Now we apply the Apriori algorithm using a support of 60% and using the
# column names
frequent_itemsets = apriori(df, min_support=0.5, use_colnames=True)
frequent_itemsets









    Out[24]:







  
    
      
      support
      itemsets
    
  
  
    
      0
      0.539744
      (Altissimo)
    
    
      1
      0.929487
      (Nao)
    
    
      2
      1.000000
      (Sim)
    
    
      3
      0.853846
      (Zona_Sul)
    
    
      4
      0.500000
      (Nao, Altissimo)
    
    
      5
      0.539744
      (Altissimo, Sim)
    
    
      6
      0.929487
      (Nao, Sim)
    
    
      7
      0.787179
      (Zona_Sul, Nao)
    
    
      8
      0.853846
      (Zona_Sul, Sim)
    
    
      9
      0.500000
      (Nao, Altissimo, Sim)
    
    
      10
      0.787179
      (Zona_Sul, Nao, Sim)



In [25]:

    
from mlxtend.frequent_patterns import association_rules

association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)









    Out[25]:







  
    
      
      antecedents
      consequents
      antecedent support
      consequent support
      support
      confidence
      lift
      leverage
      conviction
    
  
  
    
      0
      (Altissimo)
      (Nao)
      0.539744
      0.929487
      0.500000
      0.926366
      0.996642
      -0.001685
      0.957610
    
    
      1
      (Altissimo)
      (Sim)
      0.539744
      1.000000
      0.539744
      1.000000
      1.000000
      0.000000
      inf
    
    
      2
      (Nao)
      (Sim)
      0.929487
      1.000000
      0.929487
      1.000000
      1.000000
      0.000000
      inf
    
    
      3
      (Sim)
      (Nao)
      1.000000
      0.929487
      0.929487
      0.929487
      1.000000
      0.000000
      1.000000
    
    
      4
      (Zona_Sul)
      (Nao)
      0.853846
      0.929487
      0.787179
      0.921922
      0.991861
      -0.006460
      0.903107
    
    
      5
      (Nao)
      (Zona_Sul)
      0.929487
      0.853846
      0.787179
      0.846897
      0.991861
      -0.006460
      0.954608
    
    
      6
      (Zona_Sul)
      (Sim)
      0.853846
      1.000000
      0.853846
      1.000000
      1.000000
      0.000000
      inf
    
    
      7
      (Sim)
      (Zona_Sul)
      1.000000
      0.853846
      0.853846
      0.853846
      1.000000
      0.000000
      1.000000
    
    
      8
      (Nao, Altissimo)
      (Sim)
      0.500000
      1.000000
      0.500000
      1.000000
      1.000000
      0.000000
      inf
    
    
      9
      (Altissimo, Sim)
      (Nao)
      0.539744
      0.929487
      0.500000
      0.926366
      0.996642
      -0.001685
      0.957610
    
    
      10
      (Altissimo)
      (Nao, Sim)
      0.539744
      0.929487
      0.500000
      0.926366
      0.996642
      -0.001685
      0.957610
    
    
      11
      (Zona_Sul, Nao)
      (Sim)
      0.787179
      1.000000
      0.787179
      1.000000
      1.000000
      0.000000
      inf
    
    
      12
      (Zona_Sul, Sim)
      (Nao)
      0.853846
      0.929487
      0.787179
      0.921922
      0.991861
      -0.006460
      0.903107
    
    
      13
      (Nao, Sim)
      (Zona_Sul)
      0.929487
      0.853846
      0.787179
      0.846897
      0.991861
      -0.006460
      0.954608
    
    
      14
      (Zona_Sul)
      (Nao, Sim)
      0.853846
      0.929487
      0.787179
      0.921922
      0.991861
      -0.006460
      0.903107
    
    
      15
      (Nao)
      (Zona_Sul, Sim)
      0.929487
      0.853846
      0.787179
      0.846897
      0.991861
      -0.006460
      0.954608
    
    
      16
      (Sim)
      (Zona_Sul, Nao)
      1.000000
      0.787179
      0.787179
      0.787179
      1.000000
      0.000000
      1.000000



In [26]:

    
# Rule Generation and Selection Criteria
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.2)
rules









    Out[26]:







  
    
      
      antecedents
      consequents
      antecedent support
      consequent support
      support
      confidence
      lift
      leverage
      conviction



In [27]:

    
rules["antecedent_len"] = rules["antecedents"].apply(lambda x: len(x))
rules









    Out[27]:







  
    
      
      antecedents
      consequents
      antecedent support
      consequent support
      support
      confidence
      lift
      leverage
      conviction
      antecedent_len



In [28]:

    
rules[ (rules['antecedent_len'] >= 2) &
       (rules['confidence'] > 0.75) &
       (rules['lift'] > 1.2) ]









    Out[28]:







  
    
      
      antecedents
      consequents
      antecedent support
      consequent support
      support
      confidence
      lift
      leverage
      conviction
      antecedent_len



In [29]:

    
rules[rules['antecedents'] == {'Eggs', 'Kidney Beans'}]









    Out[29]:







  
    
      
      antecedents
      consequents
      antecedent support
      consequent support
      support
      confidence
      lift
      leverage
      conviction
      antecedent_len



In [30]:

    
# Frequent Itemsets with Incomplete Antecedent and Consequent Information
import pandas as pd

dict = {'itemsets': [['177', '176'], ['177', '179'],
                     ['176', '178'], ['176', '179'],
                     ['93', '100'], ['177', '178'],
                     ['177', '176', '178']],
        'support':[0.253623, 0.253623, 0.217391,
                   0.217391, 0.181159, 0.108696, 0.108696]}

freq_itemsets = pd.DataFrame(dict)
freq_itemsets

from mlxtend.frequent_patterns import association_rules



In [31]:

    
res = association_rules(freq_itemsets, support_only=True, min_threshold=0.1)
res









    Out[31]:







  
    
      
      antecedents
      consequents
      antecedent support
      consequent support
      support
      confidence
      lift
      leverage
      conviction
    
  
  
    
      0
      (176)
      (177)
      NaN
      NaN
      0.253623
      NaN
      NaN
      NaN
      NaN
    
    
      1
      (177)
      (176)
      NaN
      NaN
      0.253623
      NaN
      NaN
      NaN
      NaN
    
    
      2
      (179)
      (177)
      NaN
      NaN
      0.253623
      NaN
      NaN
      NaN
      NaN
    
    
      3
      (177)
      (179)
      NaN
      NaN
      0.253623
      NaN
      NaN
      NaN
      NaN
    
    
      4
      (176)
      (178)
      NaN
      NaN
      0.217391
      NaN
      NaN
      NaN
      NaN
    
    
      5
      (178)
      (176)
      NaN
      NaN
      0.217391
      NaN
      NaN
      NaN
      NaN
    
    
      6
      (176)
      (179)
      NaN
      NaN
      0.217391
      NaN
      NaN
      NaN
      NaN
    
    
      7
      (179)
      (176)
      NaN
      NaN
      0.217391
      NaN
      NaN
      NaN
      NaN
    
    
      8
      (100)
      (93)
      NaN
      NaN
      0.181159
      NaN
      NaN
      NaN
      NaN
    
    
      9
      (93)
      (100)
      NaN
      NaN
      0.181159
      NaN
      NaN
      NaN
      NaN
    
    
      10
      (177)
      (178)
      NaN
      NaN
      0.108696
      NaN
      NaN
      NaN
      NaN
    
    
      11
      (178)
      (177)
      NaN
      NaN
      0.108696
      NaN
      NaN
      NaN
      NaN
    
    
      12
      (176, 177)
      (178)
      NaN
      NaN
      0.108696
      NaN
      NaN
      NaN
      NaN
    
    
      13
      (176, 178)
      (177)
      NaN
      NaN
      0.108696
      NaN
      NaN
      NaN
      NaN
    
    
      14
      (177, 178)
      (176)
      NaN
      NaN
      0.108696
      NaN
      NaN
      NaN
      NaN
    
    
      15
      (176)
      (177, 178)
      NaN
      NaN
      0.108696
      NaN
      NaN
      NaN
      NaN
    
    
      16
      (177)
      (176, 178)
      NaN
      NaN
      0.108696
      NaN
      NaN
      NaN
      NaN
    
    
      17
      (178)
      (176, 177)
      NaN
      NaN
      0.108696
      NaN
      NaN
      NaN
      NaN



In [32]:

    
res = res[['antecedents', 'consequents', 'support']]
res









    Out[32]:







  
    
      
      antecedents
      consequents
      support
    
  
  
    
      0
      (176)
      (177)
      0.253623
    
    
      1
      (177)
      (176)
      0.253623
    
    
      2
      (179)
      (177)
      0.253623
    
    
      3
      (177)
      (179)
      0.253623
    
    
      4
      (176)
      (178)
      0.217391
    
    
      5
      (178)
      (176)
      0.217391
    
    
      6
      (176)
      (179)
      0.217391
    
    
      7
      (179)
      (176)
      0.217391
    
    
      8
      (100)
      (93)
      0.181159
    
    
      9
      (93)
      (100)
      0.181159
    
    
      10
      (177)
      (178)
      0.108696
    
    
      11
      (178)
      (177)
      0.108696
    
    
      12
      (176, 177)
      (178)
      0.108696
    
    
      13
      (176, 178)
      (177)
      0.108696
    
    
      14
      (177, 178)
      (176)
      0.108696
    
    
      15
      (176)
      (177, 178)
      0.108696
    
    
      16
      (177)
      (176, 178)
      0.108696
    
    
      17
      (178)
      (176, 177)
      0.108696

	support	itemsets
0	0.539744	(Altissimo)
1	0.929487	(Nao)
2	1.000000	(Sim)
3	0.853846	(Zona_Sul)
4	0.500000	(Nao, Altissimo)
5	0.539744	(Altissimo, Sim)
6	0.929487	(Nao, Sim)
7	0.787179	(Zona_Sul, Nao)
8	0.853846	(Zona_Sul, Sim)
9	0.500000	(Nao, Altissimo, Sim)
10	0.787179	(Zona_Sul, Nao, Sim)

	antecedents	consequents	antecedent support	consequent support	support	confidence	lift	leverage	conviction
0	(176)	(177)	NaN	NaN	0.253623	NaN	NaN	NaN	NaN
1	(177)	(176)	NaN	NaN	0.253623	NaN	NaN	NaN	NaN
2	(179)	(177)	NaN	NaN	0.253623	NaN	NaN	NaN	NaN
3	(177)	(179)	NaN	NaN	0.253623	NaN	NaN	NaN	NaN
4	(176)	(178)	NaN	NaN	0.217391	NaN	NaN	NaN	NaN
5	(178)	(176)	NaN	NaN	0.217391	NaN	NaN	NaN	NaN
6	(176)	(179)	NaN	NaN	0.217391	NaN	NaN	NaN	NaN
7	(179)	(176)	NaN	NaN	0.217391	NaN	NaN	NaN	NaN
8	(100)	(93)	NaN	NaN	0.181159	NaN	NaN	NaN	NaN
9	(93)	(100)	NaN	NaN	0.181159	NaN	NaN	NaN	NaN
10	(177)	(178)	NaN	NaN	0.108696	NaN	NaN	NaN	NaN
11	(178)	(177)	NaN	NaN	0.108696	NaN	NaN	NaN	NaN
12	(176, 177)	(178)	NaN	NaN	0.108696	NaN	NaN	NaN	NaN
13	(176, 178)	(177)	NaN	NaN	0.108696	NaN	NaN	NaN	NaN
14	(177, 178)	(176)	NaN	NaN	0.108696	NaN	NaN	NaN	NaN
15	(176)	(177, 178)	NaN	NaN	0.108696	NaN	NaN	NaN	NaN
16	(177)	(176, 178)	NaN	NaN	0.108696	NaN	NaN	NaN	NaN
17	(178)	(176, 177)	NaN	NaN	0.108696	NaN	NaN	NaN	NaN