In [1]:
%load_ext rmagic


During startup - Warning messages:
1: Setting LC_TIME failed, using "C" 
2: Setting LC_MONETARY failed, using "C" 
3: Setting LC_PAPER failed, using "C" 
4: Setting LC_MEASUREMENT failed, using "C" 

In [2]:
%%R
library("e1071")

dir = "19-05-2014"

load(file = paste(dir, "svms", "mutation-event-classifier.svm", sep="/"))
load(file = paste(dir, "svms", "mvars.data", sep="/"))
#load(file = paste(dir, "svms", "mprogs.data", sep="/"))
#read.csv(textConnection(readLines(mycon)), sep="\t", header = F)

In [3]:
%%R

options(stringsAsFactors=F)

mycon = gzcon(gzfile(paste(dir, "filtered_traces.csv.gz", sep="/"), open="r"))
more_program_events = read.csv(textConnection(readLines(mycon)), sep="\t", header = F)

cats = factor(more_program_events[,4], levels = c("R","B"))

#more_program_events[,4] = factor(more_program_events[,4])
#more_program_events <- droplevels(more_program_events)

print(nrow(more_program_events))


[1] 2333

In [4]:
%%R

library(tm)

mut_more_corpus = Corpus(VectorSource(more_program_events[,2]))
evs_more_corpus = Corpus(VectorSource(more_program_events[,3]))

print(mut_more_corpus)
print(evs_more_corpus)
#print(more_program_events[,1])


A corpus with 2333 text documents
A corpus with 2333 text documents

In [5]:
%%R

mut_more_dm  = DocumentTermMatrix(mut_more_corpus)
evs_more_dm  = DocumentTermMatrix(evs_more_corpus)

#print(robust_dm)
#print(buggy_dm)

sink("/dev/null")

mut_more_dm_df =  as.data.frame(inspect(mut_more_dm))
#print(rownames(more_dm_df))
#rownames(more_dm_df) = 1:nrow(more_dm)
#print(rownames(more_dm_df))
mut_more_dm_df["class"] = cats

evs_more_dm_df =  (as.data.frame(inspect(evs_more_dm)))
#print(rownames(more_dm_df))
#rownames(more_dm_df) = 1:nrow(more_dm)
#print(rownames(more_dm_df))
evs_more_dm_df["class"] = cats
    
sink()

In [6]:
%%R

#dm_df = merge(robust_dm_df, buggy_dm_df,all=TRUE, sort=FALSE) 

#print(dm_df[1,])
#print(nrow(dm_df))
#dm_df[is.na(dm_df)] = 0

#test = cbind(mut_more_dm_df[,names(mut_more_dm_df) != "class"], evs_more_dm_df)



robust_cases = mut_more_dm_df[mut_more_dm_df$class == "R",]
buggy_cases = mut_more_dm_df[mut_more_dm_df$class == "B",]

n = nrow(robust_cases)

rsample = sample(nrow(robust_cases))
robust_cases = robust_cases[rsample[1:n],]

rsample = sample(nrow(buggy_cases))
buggy_cases = buggy_cases[rsample[1:n],]

print(nrow(robust_cases))
print(nrow(buggy_cases))

#robust_cases = more_dm_df[more_dm_df$class == "R",]
#buggy_cases  = more_dm_df[more_dm_df$class == "B",]

#both_robust_cases = cbind(mut_robust_cases[,names(mut_robust_cases) != "class"], evs_robust_cases)

#print(ncol(robust_cases))
#print(ncol(buggy_cases))

#print(nrow(robust_cases))
#print(nrow(buggy_cases))

#print(names(buggy_cases))

#print(test[829,])


[1] 837
[1] 837

In [7]:
%%R

library("caret")

test = rbind(robust_cases, buggy_cases)

x_test = test[,names(test) != "class"]
y_test  = test[,"class"]

x_vars = names(x_test)

m_vars = m_vars[m_vars != "class"]
missing_vars = m_vars[! m_vars %in% x_vars]
#print(missing_vars)
x_test[,missing_vars] = 0

#Test data summary
print(table(Reference=y_test))

load(file = paste(dir, "svms", "mutation-classifier.svm", sep="/"))
z = predict(m,x_test)

print("Mutation only classifier:")
print(confusionMatrix(z, y_test))

load(file = paste(dir, "svms", "mutation-event-classifier.svm", sep="/"))
z = predict(m,x_test)

print("Mutation-event classifier:")
print(confusionMatrix(z, y_test))


Loading required package: lattice
Loading required package: ggplot2
Reference
  R   B 
837 837 
[1] "Mutation only classifier:"
Confusion Matrix and Statistics

          Reference
Prediction   R   B
         R 496 202
         B 341 635
                                         
               Accuracy : 0.6756         
                 95% CI : (0.6526, 0.698)
    No Information Rate : 0.5            
    P-Value [Acc > NIR] : < 2.2e-16      
                                         
                  Kappa : 0.3513         
 Mcnemar's Test P-Value : 3.178e-09      
                                         
            Sensitivity : 0.5926         
            Specificity : 0.7587         
         Pos Pred Value : 0.7106         
         Neg Pred Value : 0.6506         
             Prevalence : 0.5000         
         Detection Rate : 0.2963         
   Detection Prevalence : 0.4170         
      Balanced Accuracy : 0.6756         
                                         
       'Positive' Class : R              
                                         
[1] "Mutation-event classifier:"
Confusion Matrix and Statistics

          Reference
Prediction   R   B
         R 394  88
         B 443 749
                                          
               Accuracy : 0.6828          
                 95% CI : (0.6599, 0.7051)
    No Information Rate : 0.5             
    P-Value [Acc > NIR] : < 2.2e-16       
                                          
                  Kappa : 0.3656          
 Mcnemar's Test P-Value : < 2.2e-16       
                                          
            Sensitivity : 0.4707          
            Specificity : 0.8949          
         Pos Pred Value : 0.8174          
         Neg Pred Value : 0.6284          
             Prevalence : 0.5000          
         Detection Rate : 0.2354          
   Detection Prevalence : 0.2879          
      Balanced Accuracy : 0.6828          
                                          
       'Positive' Class : R               
                                          

In [ ]: