In [1]:
import fim
In [2]:
import pandas as pd
In [13]:
df = pd.read_csv('clause-segment-relation.csv', index_col=0)
In [14]:
df.head()
Out[14]:
In [22]:
row0 = df.iloc[0] # access the first row
nrows = len(df) # number of rows in a DataFrame
In [58]:
# list of rows. each row is represented as
# a list of column values
tracts = [df.iloc[i].values for i in range(len(df))]
In [84]:
import fim
def dataframe2arules(dataframe, min_support=10, min_confidence=80):
"""extract association rules from a DataFrame
Parameters
----------
min_support : int
at least n rows have to match the rule (default: 10)
min_confidence : int
minimum confidence of an assoc. rule (default: 80%)
"""
tracts = (dataframe.iloc[i].values
for i in range(len(dataframe)))
return fim.arules(tracts, supp=min_support, conf=min_confidence)
In [76]:
dataframe2arules(df)
# 168 satellites
# 127 S-clause satellites
# 60 (S-clause) condition satellites
# 21 circumstance satellites
Out[76]:
In [63]:
dataframe2arules(df[df['clause'] != 'S'])
Out[63]:
In [77]:
dataframe2arules(df[df['segment'] != 'satellite'])
# 24 nucleii
# 8 NP-clause nucleii
Out[77]:
In [83]:
dataframe2arules(df[df['relation'] != 'condition'])
Out[83]:
In [ ]: