Bagging with CPD


In [1]:
suppressMessages(library(tree))
load("../transformed data/golub3571.rda")
load("../transformed data/paper9.rda")
# Settings as specified in the paper
p = 40 # number of genes for FLDA
B = 50 # Aggregation predictors
N = 200 # repeat classification N times
d = c(0.05, 0.1,0.25, 0.5, 0.75, 1) # CPD parameter
set.seed(2017)

In [2]:
cbine_data = data.frame(response = factor(total3571_response), scale_golub_merge)
d = 0.75
# implement CPD
CPD = function(d, x1, x2){
    a = runif(nrow(x1), 0, d)
    a*x1+(1-a)*x2
}
# helper function for each bagging with CPD
my_cpdhelper = function(train, test){
    id1 = sample(nrow(train), replace = T)
    id2 = sample(nrow(train), replace = T)
    temp = CPD(d, train[id1, -1], train[id2,-1])
    temp_md = tree(response~., data = data.frame(temp, response = train$response[id1]))
    predict(temp_md, test, type = "class")
}
#initialize the error vector
cpd_error = numeric(N)
# repeat N times
for(i in 1:N){
    cpd_index = mysplit(nrow(cbine_data))
    cpd_train = cbine_data[-cpd_index,]
    cpd_test = cbine_data[cpd_index,]
    
    # gene selection
    temp_bw = order(BW(cpd_train[, -1], cpd_train$response), decreasing = T)[1:p]
    cpd_train_t = data.frame(response = cpd_train$response, cpd_train[,temp_bw+1])
    cpd_test_t= data.frame(response = cpd_test$response, cpd_test[,temp_bw+1])
   
    t1 = replicate(B, my_cpdhelper(cpd_train_t, cpd_test_t))
    pred = apply(t1, 1, function(x) ifelse(sum(x == "AML")>sum(x =="ALL"), "AML", "ALL"))
    cpd_error[i] = sum(pred != cpd_test_t$response)
}
resultCPD = c(Median = median(cpd_error), Upper_quartile = quantile(cpd_error, 0.75))

In [3]:
resultCPD


Median
2
Upper_quartile.75%
3