In [1]:
%load_ext rmagic
In [2]:
%%R
library("e1071")
dir = "19-05-2014"
load(file = paste(dir, "svms", "mutation-event-classifier.svm", sep="/"))
load(file = paste(dir, "svms", "mvars.data", sep="/"))
#load(file = paste(dir, "svms", "mprogs.data", sep="/"))
#read.csv(textConnection(readLines(mycon)), sep="\t", header = F)
In [3]:
%%R
options(stringsAsFactors=F)
mycon = gzcon(gzfile(paste(dir, "filtered_traces.csv.gz", sep="/"), open="r"))
more_program_events = read.csv(textConnection(readLines(mycon)), sep="\t", header = F)
cats = factor(more_program_events[,4], levels = c("R","B"))
#more_program_events[,4] = factor(more_program_events[,4])
#more_program_events <- droplevels(more_program_events)
print(nrow(more_program_events))
In [4]:
%%R
library(tm)
mut_more_corpus = Corpus(VectorSource(more_program_events[,2]))
evs_more_corpus = Corpus(VectorSource(more_program_events[,3]))
print(mut_more_corpus)
print(evs_more_corpus)
#print(more_program_events[,1])
In [5]:
%%R
mut_more_dm = DocumentTermMatrix(mut_more_corpus)
evs_more_dm = DocumentTermMatrix(evs_more_corpus)
#print(robust_dm)
#print(buggy_dm)
sink("/dev/null")
mut_more_dm_df = as.data.frame(inspect(mut_more_dm))
#print(rownames(more_dm_df))
#rownames(more_dm_df) = 1:nrow(more_dm)
#print(rownames(more_dm_df))
mut_more_dm_df["class"] = cats
evs_more_dm_df = (as.data.frame(inspect(evs_more_dm)))
#print(rownames(more_dm_df))
#rownames(more_dm_df) = 1:nrow(more_dm)
#print(rownames(more_dm_df))
evs_more_dm_df["class"] = cats
sink()
In [6]:
%%R
#dm_df = merge(robust_dm_df, buggy_dm_df,all=TRUE, sort=FALSE)
#print(dm_df[1,])
#print(nrow(dm_df))
#dm_df[is.na(dm_df)] = 0
#test = cbind(mut_more_dm_df[,names(mut_more_dm_df) != "class"], evs_more_dm_df)
robust_cases = mut_more_dm_df[mut_more_dm_df$class == "R",]
buggy_cases = mut_more_dm_df[mut_more_dm_df$class == "B",]
n = nrow(robust_cases)
rsample = sample(nrow(robust_cases))
robust_cases = robust_cases[rsample[1:n],]
rsample = sample(nrow(buggy_cases))
buggy_cases = buggy_cases[rsample[1:n],]
print(nrow(robust_cases))
print(nrow(buggy_cases))
#robust_cases = more_dm_df[more_dm_df$class == "R",]
#buggy_cases = more_dm_df[more_dm_df$class == "B",]
#both_robust_cases = cbind(mut_robust_cases[,names(mut_robust_cases) != "class"], evs_robust_cases)
#print(ncol(robust_cases))
#print(ncol(buggy_cases))
#print(nrow(robust_cases))
#print(nrow(buggy_cases))
#print(names(buggy_cases))
#print(test[829,])
In [7]:
%%R
library("caret")
test = rbind(robust_cases, buggy_cases)
x_test = test[,names(test) != "class"]
y_test = test[,"class"]
x_vars = names(x_test)
m_vars = m_vars[m_vars != "class"]
missing_vars = m_vars[! m_vars %in% x_vars]
#print(missing_vars)
x_test[,missing_vars] = 0
#Test data summary
print(table(Reference=y_test))
load(file = paste(dir, "svms", "mutation-classifier.svm", sep="/"))
z = predict(m,x_test)
print("Mutation only classifier:")
print(confusionMatrix(z, y_test))
load(file = paste(dir, "svms", "mutation-event-classifier.svm", sep="/"))
z = predict(m,x_test)
print("Mutation-event classifier:")
print(confusionMatrix(z, y_test))
In [ ]: