In [1]:
source("https://raw.githubusercontent.com/eogasawara/mylibrary/master/myPreprocessing.R")
loadlibrary("arules")
loadlibrary("arulesViz")
loadlibrary("arulesSequences")
In [2]:
data(AdultUCI)
dim(AdultUCI)
head(AdultUCI)
In [3]:
AdultUCI$fnlwgt <- NULL
AdultUCI$"education-num" <- NULL
In [4]:
AdultUCI$age <- ordered(cut(AdultUCI$age, c(15,25,45,65,100)),
labels = c("Young", "Middle-aged", "Senior", "Old"))
AdultUCI$"hours-per-week" <- ordered(cut(AdultUCI$"hours-per-week",
c(0,25,40,60,168)),
labels = c("Part-time", "Full-time", "Over-time", "Workaholic"))
AdultUCI$"capital-gain" <- ordered(cut(AdultUCI$"capital-gain",
c(-Inf,0,median(AdultUCI$"capital-gain"[AdultUCI$"capital-gain">0]),
Inf)), labels = c("None", "Low", "High"))
AdultUCI$"capital-loss" <- ordered(cut(AdultUCI$"capital-loss",
c(-Inf,0, median(AdultUCI$"capital-loss"[AdultUCI$"capital-loss">0]),
Inf)), labels = c("None", "Low", "High"))
head(AdultUCI)
In [5]:
AdultTrans <- as(AdultUCI, "transactions")
In [6]:
rules <- apriori(AdultTrans, parameter=list(supp = 0.5, conf = 0.9, minlen=2, maxlen= 10, target = "rules"),
appearance=list(rhs = c("capital-gain=None"), default="lhs"), control=NULL)
inspect(rules)
In [7]:
rules_a <- as(rules, "data.frame")
head(rules_a)
In [8]:
imrules <- interestMeasure(rules, transactions = AdultTrans)
head(imrules)
In [9]:
nrules <- rules[!is.redundant(rules)]
In [10]:
arules::inspect(nrules)
In [16]:
st <- supportingTransactions(nrules[1], AdultTrans)
trans <- unique(st@data@i)
length(trans)
print(c(length(trans)/length(AdultTrans), nrules[1]@quality$support))
Now we can see the transactions (trans) that support rules 1 and 2. As can be observed, the support for both rules is not the sum of the support of each rule.
In [17]:
st <- supportingTransactions(nrules[1:2], AdultTrans)
trans <- unique(st@data@i)
length(trans)
print(c(length(trans)/length(AdultTrans), nrules[1:2]@quality$support))
In [18]:
options(repr.plot.width=7, repr.plot.height=4)
plot(rules)
In [19]:
options(repr.plot.width=7, repr.plot.height=4)
plot(rules, method="paracoord", control=list(reorder=TRUE))
In [20]:
x <- read_baskets(con = system.file("misc", "zaki.txt", package = "arulesSequences"), info = c("sequenceID","eventID","SIZE"))
as(x, "data.frame")
In [21]:
s1 <- cspade(x, parameter = list(support = 0.4), control = list(verbose = TRUE))
as(s1, "data.frame")
In [ ]: