In [1]:
load("../transformed data/golub3571.rda")
set.seed(201703)
In [2]:
# Settings as specified in the paper
p = 40 # number of genes for FLDA
B = 50 # Aggregation predictors
N = 200 # repeat classification N times
d = c(0.05, 0.1,0.25, 0.5, 0.75, 1) # CPD parameter
In [3]:
# Split train test as specified in the paper
mysplit = function(n){
sample(1:n, floor(n/3))
}
# implement function for calculating BW as stated in the paper(the ratio of between-group to within group sums of squares)
BW = function(predictor, response){
overall = colMeans(predictor)
ALL_mean = apply(predictor, 2, function(x) mean(x[response == "ALL"]))
AML_mean = apply(predictor, 2, function(x) mean(x[response == "AML"]))
numerator = sum(response == "ALL")*(ALL_mean-overall)^2+sum(response == "AML")*(AML_mean-overall)^2
denumerator = colSums((t(t(predictor[response == "ALL", ])-ALL_mean))^2)+colSums((t(t(predictor[response == "AML", ])-AML_mean))^2)
numerator/denumerator
}
# randomly feature select once for comparison for furthur study
id = mysplit(nrow(scale_golub_merge))
train_p = scale_golub_merge[-id,]
train_r = total3571_response[-id]
test_p = scale_golub_merge[id,]
test_r = total3571_response[id]
temp_bw = order(BW(train_p, train_r), decreasing = T)[1:50]
train_BW_predictor = train_p[,temp_bw]
test_BW_predictor = test_p[,temp_bw]
save(train_BW_predictor, train_r, test_BW_predictor, test_r,mysplit, BW, file = "../transformed data/paper9.rda")