In [1]:
suppressMessages(library(e1071))
suppressMessages(library(fastAdaboost))
suppressMessages(library(caret))
suppressMessages(library(sparsediscrim))
suppressMessages(library(tree))
suppressMessages(library(fastAdaboost))
suppressMessages(library(bnlearn))
library(ropls)
library(MASS)
set.seed(201703)
In [2]:
# train_kmeans, golub_train_r, test_kmeans, golub_test_r
load("../transformed data/paper29.rda")
In [3]:
get_p = function(train_d, train_r){
tr_m_aml = colMeans(train_d[train_r == "AML",])
tr_sd_aml = apply(train_d[train_r == "AML",], 2, sd)
tr_m_all = colMeans(train_d[train_r == "ALL",])
tr_sd_all = apply(train_d[train_r == "ALL",], 2, sd)
p = (tr_m_aml-tr_m_all)/(tr_sd_aml+tr_sd_all)
return(p)
}
classifier1 = function(train_p, train_r, test_p, test_r){
train_m_aml = colMeans(train_p[train_r == "AML",])
train_m_all = colMeans(train_p[train_r =="ALL",])
b = (train_m_aml+train_m_all)/2
p = get_p(train_p, train_r)
#train
train_vote = t(p*t(sweep(train_p, 2, b)))
train_V1 = apply(train_vote, 1, function(x) sum(x[x>0]))
train_V2 = abs(apply(train_vote, 1, function(x) sum(x[x<=0])))
train_pred = (train_V1-train_V2)/(train_V1+train_V2)
train_pred_r = ifelse(abs(train_pred)>0.3, ifelse(train_pred>0, "AML", "ALL"), "Uncertain")
train_table = table(Train_Predict = train_pred_r, Train_Actual = train_r)
##train_table
#test
test_vote = t(p*t(sweep(test_p, 2, b)))
test_V1 = apply(test_vote, 1, function(x) sum(x[x>0]))
test_V2 = abs(apply(test_vote, 1, function(x) sum(x[x<=0])))
test_pred = (test_V1-test_V2)/(test_V1+test_V2)
test_pred_r = ifelse(abs(test_pred)>0.3, ifelse(test_pred>0, "AML", "ALL"), "Uncertain")
test_table = table(Test_Predict = test_pred_r, Test_Actual = test_r)
##test_table
return(list(train = train_table, test = test_table))
}
In [4]:
paper1 = classifier1(train_kmeans, golub_train_r, test_kmeans, golub_test_r)
paper1$train
paper1$test
In [5]:
# helper function
cl_nn_helper = function(new_s, train, train_label){
# use Pearson correlation
corr = apply(train, 1, cor, new_s)
train_label[corr==max(corr)]
}
classifier3nn = function(train_p, train_r, test_p, test_r){
nn_train_pr_p1 = apply(train_p,1, cl_nn_helper, train_p, train_r)
nn_test_pr_p1 = apply(test_p,1, cl_nn_helper, train_p, train_r)
train_table = table(Train_Predict = nn_train_pr_p1, Train_Actual =train_r)
test_table = table(Test_Predict = nn_test_pr_p1, Test_Actual = test_r)
return(list(train = train_table, test = test_table))
}
paper3nn = classifier3nn(train_kmeans, golub_train_r, test_kmeans, golub_test_r)
paper3nn$train
paper3nn$test
In [6]:
# linear SVM
classifier3lsvm = function(train_p, train_r, test_p, test_r){
r_train = data.frame(train_p, Y = factor(train_r))
r_test =data.frame( test_p, Y = factor(test_r))
svm_linear = svm(Y~., data = r_train)
svm_l_trpr = predict(svm_linear, r_train)
svm_l_tepr = predict(svm_linear, newdata = r_test)
train_table = table(Train_Predicted = svm_l_trpr, Train_Actual = train_r)
test_table = table(Test_Predicted = svm_l_tepr, Test_Actual = test_r)
return(list(train = train_table, test = test_table))
}
paper3lsvm = classifier3lsvm(train_kmeans, golub_train_r, test_kmeans, golub_test_r)
paper3lsvm$train
paper3lsvm$test
In [7]:
classifier3qsvm = function(train_p, train_r, test_p, test_r){
r_train = data.frame(train_p, Y = factor(train_r))
r_test =data.frame( test_p, Y = factor(test_r))
svm_quad = svm(Y~., data = r_train, kernel = "polynomial", degree = 2, gamma =0.01, coef0 = 100)
svm_q_trpr = predict(svm_quad, r_train )
svm_q_tepr = predict(svm_quad, newdata = r_test)
train_table = table(Train_Predicted = svm_q_trpr, Train_Actual = train_r)
test_table = table(Test_Predicted = svm_q_tepr, Test_Actual = test_r)
return(list(train = train_table, test = test_table))
}
paper3qsvm = classifier3qsvm(train_kmeans, golub_train_r, test_kmeans, golub_test_r)
paper3qsvm$train
paper3qsvm$test
In [8]:
classifier3Ada = function(train_p, train_r, test_p, test_r){
r_train_p1 = data.frame(train_p, Y = factor(train_r))
r_test_p1 =data.frame( test_p, Y = factor(test_r))
ada_cl_p1 = adaboost(Y~., data = r_train_p1, 100)
ada_train_pr_p1 = predict(ada_cl_p1, r_train_p1)
ada_test_pr_p1 = predict(ada_cl_p1, newdata = r_test_p1)
train_table = table(Train_Predict = ada_train_pr_p1$class, Train_Actual = train_r)
test_table = table(Test_Predict = ada_test_pr_p1$class, Test_Actual =test_r)
return(list(train = train_table, test = test_table))
}
paper3Ada = classifier3Ada(train_kmeans, golub_train_r, test_kmeans, golub_test_r)
paper3Ada$train
paper3Ada$test
In [9]:
pca_helper = function(train_p, train_r, test_p, test_r, K=3){
pca_slt = getLoadingMN(opls(train_p, printL = F, predI = K))
pca_train_s = t(t(pca_slt)%*%t(train_p))
pca_test_s = t(t(pca_slt)%*%t(test_p))
return(list(pca_train = pca_train_s, pca_test = pca_test_s))
}
pls_helper = function(train_p, train_r, test_p, test_r, K=3){
pls_slt = getLoadingMN(opls(train_p, train_r, printL = F, predI = K))
pls_train_s = t(t(pls_slt)%*%t(train_p))
pls_test_s = t(t(pls_slt)%*%t(test_p))
return(list(pls_train = pls_train_s, pls_test = pls_test_s))
}
classifier6logit = function(train_p, train_r, test_p, test_r, dr = "pca"){
if(dr == "pca"){
pca_result = pca_helper(train_p, train_r, test_p, test_r)
train_data = data.frame(response = train_r, pca_result$pca_train)
test_data = pca_result$pca_test
}else{
pls_result = pls_helper(train_p, train_r, test_p, test_r)
train_data = data.frame(response = train_r, pls_result$pls_train)
test_data = pls_result$pls_test
}
ld_s = train(response~., data = train_data, method = "glm", family = "binomial", trControl = trainControl(method = "LOOCV"))
ld_tr = predict(ld_s)
ld_te = predict(ld_s, data.frame(test_data))
ld_ac = mean(ld_te == test_r)
ld_re = c(LOOCV = ld_s$results$Accuracy, Test = ld_ac)
train_table = table(Train_Predict = ld_tr, Train_Actual = train_r)
test_table = table(Test_Predict = ld_te, Test_Actual =test_r)
return(list(train = train_table, test = test_table, re = ld_re))
}
classifier6qda = function(train_p, train_r, test_p, test_r, dr= "pca"){
if(dr == "pca"){
pca_result = pca_helper(train_p, train_r, test_p, test_r)
train_data = data.frame(response = train_r, pca_result$pca_train)
test_data = pca_result$pca_test
}else{
pls_result = pls_helper(train_p, train_r, test_p, test_r)
train_data = data.frame(response = train_r, pls_result$pls_train)
test_data = pls_result$pls_test
}
qda_s = train(response~., data = train_data, method = "qda", trControl = trainControl(method = "LOOCV"))
qda_tr = predict(qda_s)
qda_te = predict(qda_s, data.frame(test_data))
qda_ac = mean(qda_te == test_r)
qda_re = c(LOOCV = qda_s$results$Accuracy, Test = qda_ac)
train_table = table(Train_Predict = qda_tr, Train_Actual = train_r)
test_table = table(Test_Predict = qda_te, Test_Actual =test_r)
return(list(train = train_table, test = test_table, re = qda_re))
}
In [10]:
options(warn=-1)
pca_logit = classifier6logit(train_kmeans, golub_train_r, test_kmeans, golub_test_r)
print("pca, logit")
pca_logit$train
pca_logit$test
pca_qda = classifier6qda(train_kmeans, golub_train_r, test_kmeans, golub_test_r)
print("pca, qda")
pca_qda$train
pca_qda$test
pls_logit = classifier6logit(train_kmeans, golub_train_r, test_kmeans, golub_test_r, "pls")
print("pls, logit")
pls_logit$train
pls_logit$test
pls_qda = classifier6qda(train_kmeans, golub_train_r, test_kmeans, golub_test_r, "pls")
print("pls, qda")
pls_qda$train
pls_qda$test
In [11]:
# Distance measure used in the paper
Distance = function(predictor, test){
1- apply(predictor, 1, cor, test)
}
# NN classification process
paper9_nn = function(test, pk, learning, response){
distance = Distance(learning, test)
index = order(distance)[1:pk]
cl = ifelse(sum(response[index] == "AML")>sum(response[index]=="ALL"), "AML", "ALL")
cl
}
# leave-one-cross-validation to tune k
mycv= function(pk,learning,response){
error = 0
for(i in 1:nrow(learning)){
cl = paper9_nn(learning[i,], pk, learning[-i, ], response[-i])
error = error+(cl == response[i])
}
error
}
classifier9nn = function(train_p, train_r, test_p, test_r){
k = seq(1, 21, 2)
choose_k = sapply(k,mycv, learning = train_p, response= train_r)
nn_train = apply(train_p, 1, paper9_nn, k[which.min(choose_k)], train_p, train_r)
nn_test = apply(test_p ,1, paper9_nn, k[which.min(choose_k)], train_p ,train_r)
train_table = table(Train_Predict = nn_train, Train_Actual = train_r)
test_table = table(Test_Predict = nn_test, Test_Actual = test_r)
return(list(train = train_table, test = test_table))
}
paper9nn = classifier9nn(train_kmeans, golub_train_r, test_kmeans, golub_test_r)
paper9nn$train
paper9nn$test
In [12]:
# test_paper3 test_response train_paper3 train_response loaded
classifier9dt = function(train_p, train_r, test_p, test_r){
cbine_data = data.frame(response = factor(train_r), train_p)
tree_mdl = tree(response~.,data = cbine_data)
tree_tr = predict(tree_mdl, data.frame(train_p), type = "class")
tree_te = predict(tree_mdl, data.frame(test_p), type = "class")
train_table = table(Train_Predict = tree_tr, Train_Actual = train_r)
test_table = table(Test_Predict = tree_te, Test_Actual = test_r)
return(list(train = train_table, test = test_table))
}
paper9dt = classifier9dt(train_kmeans, golub_train_r, test_kmeans, golub_test_r)
paper9dt$train
paper9dt$test
In [13]:
my_baghelper = function(train, test){
bg = sample(nrow(train), replace = T)
temp_md = tree(response~., data = train[bg, ])
predict(temp_md, test, type = "class")
}
classifier9bg = function(train_p, train_r, test_p, test_r, B = 50){
cbine_data = data.frame(response = factor(train_r), train_p)
t_tr = replicate(B, my_baghelper(cbine_data, data.frame(train_p)))
pred_tr = apply(t_tr, 1, function(x) ifelse(sum(x == "AML")>sum(x =="ALL"), "AML", "ALL"))
t_te = replicate(B, my_baghelper(cbine_data, data.frame(test_p)))
pred_te = apply(t_te, 1, function(x) ifelse(sum(x == "AML")>sum(x =="ALL"), "AML", "ALL"))
train_table = table(Train_predict = pred_tr, Train_Actual = train_r)
test_table = table(Test_predict = pred_te, Test_Actual = test_r)
return(list(train = train_table, test = test_table))
}
paper9bg = classifier9bg(train_kmeans, golub_train_r, test_kmeans, golub_test_r)
paper9bg$train
paper9bg$test
In [14]:
CPD = function(x1, x2, d = 0.75){
a = runif(nrow(x1), 0, d)
a*x1+(1-a)*x2
}
# helper function for each bagging with CPD
my_cpdhelper = function(train, test){
id1 = sample(nrow(train), replace = T)
id2 = sample(nrow(train), replace = T)
temp = CPD(train[id1, -1], train[id2,-1])
temp_md = tree(response~., data = data.frame(temp, response = train$response[id1]))
predict(temp_md, test, type = "class")
}
classifier9bgcpd = function(train_p, train_r, test_p, test_r, B = 50){
cbine_data = data.frame(response = factor(train_r), train_p)
t_tr = replicate(B, my_cpdhelper(cbine_data, data.frame(train_p)))
pred_tr = apply(t_tr, 1, function(x) ifelse(sum(x == "AML")>sum(x =="ALL"), "AML", "ALL"))
t_te = replicate(B, my_cpdhelper(cbine_data, data.frame(test_p)))
pred_te = apply(t_te, 1, function(x) ifelse(sum(x == "AML")>sum(x =="ALL"), "AML", "ALL"))
train_table = table(Train_predict = pred_tr, Train_Actual = train_r)
test_table = table(Test_predict = pred_te, Test_Actual = test_r)
return(list(train = train_table, test = test_table))
}
paper9bgcpd = classifier9bg(train_kmeans, golub_train_r, test_kmeans, golub_test_r)
paper9bgcpd$train
paper9bgcpd$test
In [15]:
classifier9flda = function(train_p, train_r, test_p, test_r, B = 50){
cbine_data = data.frame(response = factor(train_r), train_p)
flda_md = MASS::lda(response~., data = cbine_data)
flda_tr = predict(flda_md, data.frame(train_p))$class
flda_te = predict(flda_md, data.frame(test_p))$class
train_table = table(Train_predict = flda_tr, Train_Actual = train_r)
test_table = table(Test_predict = flda_te, Test_Actual = test_r)
return(list(train = train_table, test = test_table))
}
paper9flda = classifier9bg(train_kmeans, golub_train_r, test_kmeans, golub_test_r)
paper9flda$train
paper9flda$test
In [16]:
classifier9dlda = function(train_p, train_r, test_p, test_r, B = 50){
cbine_data = data.frame(response = factor(train_r), train_p)
dlda_md = dlda(response~., data = cbine_data)
dlda_tr = predict(dlda_md, data.frame(train_p))$class
dlda_te = predict(dlda_md, data.frame(test_p))$class
train_table = table(Train_predict = dlda_tr, Train_Actual = train_r)
test_table = table(Test_predict = dlda_te, Test_Actual = test_r)
return(list(train = train_table, test = test_table))
}
paper9dlda = classifier9bg(train_kmeans, golub_train_r, test_kmeans, golub_test_r)
paper9dlda$train
paper9dlda$test
In [17]:
classifier9dqda = function(train_p, train_r, test_p, test_r, B = 50){
cbine_data = data.frame(response = factor(train_r), train_p)
dqda_md = dlda(response~., data = cbine_data)
dqda_tr = predict(dqda_md, data.frame(train_p))$class
dqda_te = predict(dqda_md, data.frame(test_p))$class
train_table = table(Train_predict = dqda_tr, Train_Actual = train_r)
test_table = table(Test_predict = dqda_te, Test_Actual = test_r)
return(list(train = train_table, test = test_table))
}
paper9dqda = classifier9bg(train_kmeans, golub_train_r, test_kmeans, golub_test_r)
paper9dqda$train
paper9dqda$test
In [18]:
classifier29 = function(train_p, train_r, test_p, test_r, B = 50){
train_data = data.frame(train_p, class = train_r)
test_data = data.frame(test_p)
eg = empty.graph(c("class", colnames(data.frame(train_p))))
arcs(eg) = matrix(c(rep("class", 50),
colnames(data.frame(train_p))),
ncol = 2, byrow = F,
dimnames = list(c(), c("from", "to")))
fitted = bn.fit(eg, train_data)
predict_tr = predict(fitted, node = "class", method="bayes-lw", train_data)
predict_te = predict(fitted, node = "class", method="bayes-lw", test_data)
train_table = table(Train_predict = predict_tr, Train_Actual = train_r)
test_table = table(Test_predict = predict_te, Test_Actual = test_r)
return(list(train = train_table, test = test_table))
}
paper29 = classifier29(train_kmeans, golub_train_r, test_kmeans, golub_test_r)
paper29$train
paper29$test