Cases with more than 10% of couples (sens,spec) such that the expected number of trials concerning the disease after reclassification is bellow 0 (ie the estimated amount of false positives is higher than the number of observed positives).
We will analyse each case and modify distributions of sensitivity and specificity to avoid that problem, following the directions of Fox et al. Int J Epidemiol 2005
In [1]:
data <- read.table("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/database_RCTs_regions_27diseases.txt")
Lgbd <- lapply(as.character(data$GBD27),function(x){as.numeric(unlist(strsplit(x,"&")))})
In [2]:
Mgbd <- read.table("../Data/27_gbd_groups.txt")
sms <- list.files("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/Replicates/Metrics_over_repl/")
dis <- as.numeric(substr(sms,25,nchar(sms)-4))
Mgbd$x[!1:27%in%dis]
In [3]:
PERF <- read.csv('../Tables/Performances_per_27disease_data.csv')
PERF[!1:27%in%dis,]
Ntst <- sum(PERF[1,grep("Dis",names(PERF))])
Ntst
In [4]:
d <- 23
ss_sp <- read.table(paste(c("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/Replicates/",as.character(d),"/Sens_spec.txt"),collapse=""))
options(repr.plot.width=9, repr.plot.height=3)
par(mfrow=c(1,4))
for(i in 1:ncol(ss_sp)) hist(ss_sp[,i],xlim=c(0,1),main=colnames(ss_sp)[i],xlab=NULL)
In [5]:
is_dis <- sapply(Lgbd,function(x){d%in%x})
table(is_dis)
In [6]:
sens <- ss_sp$sens_dis
spec <- ss_sp$spec_dis
N <- nrow(data)
a_dis <- sum(is_dis)
b_dis <- N-a_dis
As <- (a_dis-(1-spec)*N)/(sens - (1-spec))
Bs <- N-As
T1 <- sens*As
T0 <- spec*Bs
F1 <- (1-spec)*Bs
F0 <- (1-sens)*As
PPV_dis <- T1/(T1+F1)
NPV_dis <- T0/(T0+F0)
In [7]:
table(As<0)
In [8]:
table(sens - (1-spec) <0)
In [9]:
table(a_dis-(1-spec)*N<0)
In [10]:
1-a_dis/N
We need to avoid cases in which specificity is lower than 1-a_dis/N.
What if we consider that we observed the number of false positives in the test set with a certain error
In [11]:
ts <- binom.test(PERF$TN_Dis[d],PERF$FP_Dis[d] + PERF$TN_Dis[d],conf.level = 0.5)
binom.test(PERF$TN_Dis[d],PERF$FP_Dis[d] + PERF$TN_Dis[d],conf.level = 0.5)
In [12]:
(1-ts$conf.int)*(PERF$FP_Dis[d] + PERF$TN_Dis[d])
In [13]:
New_FP <- ceiling((1-ts$conf.int)*(PERF$FP_Dis[d] + PERF$TN_Dis[d]))[2]
New_TN <- PERF$TN_Dis[d] + (PERF$FP_Dis[d] - New_FP)
In [14]:
New_FP
New_TN
In [15]:
set.seed(123)
spec_new <- rbeta(10000,New_TN+1,New_FP+1)
In [16]:
table(a_dis-(1-spec_new)*N<0)
In [17]:
hist(spec,col=rgb(0.8,0.8,0.8),xlim=c(0.975,1))
hist(spec_new, col=rgb(0.6,0.6,0.6),xlim=c(0.975,1))
In [18]:
#Adding new specificity
ss_sp$spec_dis <- spec_new
dir.create(paste("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/Replicates_add/",as.character(d),sep=""))
write.table(ss_sp,
paste(c("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/Replicates_add/",
as.character(d),"/Sens_spec.txt"),collapse=""))
In [19]:
#Simulating replicates
#which trials concern the disease
is_dis <- sapply(Lgbd,function(x){d%in%x})
#which trials concern another disease
is_oth <- sapply(Lgbd,function(x){sum(setdiff(1:27,d)%in%x)>0})
sens <- ss_sp$sens_dis
spec <- ss_sp$spec_dis
a_dis <- sum(is_dis)
b_dis <- N-a_dis
As <- (a_dis-(1-spec)*N)/(sens - (1-spec))
Bs <- N-As
T1 <- sens*As
T0 <- spec*Bs
F1 <- (1-spec)*Bs
F0 <- (1-sens)*As
PPV_dis <- T1/(T1+F1)
NPV_dis <- T0/(T0+F0)
sens <- ss_sp$sens_oth
spec <- ss_sp$spec_oth
a_oth <- sum(is_oth)
b_oth <- N-a_oth
As <- (a_oth-(1-spec)*N)/(sens - (1-spec))
Bs <- N-As
T1 <- sens*As
T0 <- spec*Bs
F1 <- (1-spec)*Bs
F0 <- (1-sens)*As
PPV_oth <- T1/(T1+F1)
NPV_oth <- T0/(T0+F0)
false_it <- PPV_dis<0 | PPV_dis>1 |
NPV_dis<0 | NPV_dis>1 |
PPV_oth<0 | PPV_oth>1 |
NPV_oth<0 | NPV_oth>1
print(paste(c(d,"has",sum(false_it),"suppressed false iterations"
),collapse=" "))
PPV_dis <- PPV_dis[!false_it]
NPV_dis <- NPV_dis[!false_it]
PPV_oth <- PPV_oth[!false_it]
NPV_oth <- NPV_oth[!false_it]
In [20]:
t0 <- proc.time()
L <- list()
#Simulation: reclassifying each trial
for(k in 1:length(PPV_dis)){
tp_dis <- runif(a_dis)
tn_dis <- runif(b_dis)
recl_dis <- is_dis
recl_dis[recl_dis==TRUE][tp_dis>PPV_dis[k]] <- FALSE
recl_dis[recl_dis==FALSE][tn_dis>NPV_dis[k]] <- TRUE
rt <- as.numeric(recl_dis)
#Oth_dis
tp_oth <- runif(a_oth)
tn_oth <- runif(b_oth)
recl_oth <- is_oth
recl_oth[recl_oth==TRUE][tp_oth>PPV_oth[k]] <- FALSE
recl_oth[recl_oth==FALSE][tn_oth>NPV_oth[k]] <- TRUE
write.table(data.frame(recl_dis=as.numeric(recl_dis),recl_oth=as.numeric(recl_oth)),
paste(c("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/Replicates_add/",
as.character(d),"/Reclassification_",
"_",k,".txt"),collapse=""),row.names=FALSE)
}
t1 <- proc.time()
(t1-t0)/60
In [21]:
d <- 9
PERF[9,]
In [22]:
ss_sp <- read.table(paste(c("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/Replicates/",as.character(d),"/Sens_spec.txt"),collapse=""))
options(repr.plot.width=9, repr.plot.height=3)
par(mfrow=c(1,4))
for(i in 1:ncol(ss_sp)) hist(ss_sp[,i],xlim=c(0,1),main=colnames(ss_sp)[i],xlab=NULL)
In [23]:
is_dis <- sapply(Lgbd,function(x){d%in%x})
table(is_dis)
In [24]:
sens <- ss_sp$sens_dis
spec <- ss_sp$spec_dis
N <- nrow(data)
a_dis <- sum(is_dis)
b_dis <- N-a_dis
As <- (a_dis-(1-spec)*N)/(sens - (1-spec))
Bs <- N-As
T1 <- sens*As
T0 <- spec*Bs
F1 <- (1-spec)*Bs
F0 <- (1-sens)*As
PPV_dis <- T1/(T1+F1)
NPV_dis <- T0/(T0+F0)
In [25]:
table(As<0)
In [26]:
table(sens - (1-spec) <0)
In [27]:
table(a_dis-(1-spec)*N<0)
In [28]:
1-a_dis/N
We need to avoid cases in which specificity is lower than 1-a_dis/N.
What if we consider that we observed the number of false positives in the test set with a certain error
In [29]:
ts <- binom.test(PERF$TN_Dis[d],PERF$FP_Dis[d] + PERF$TN_Dis[d],conf.level = 0.5)
binom.test(PERF$TN_Dis[d],PERF$FP_Dis[d] + PERF$TN_Dis[d],conf.level = 0.5)
In [30]:
(1-ts$conf.int)*(PERF$FP_Dis[d] + PERF$TN_Dis[d])
In [31]:
New_FP <- ceiling((1-ts$conf.int)*(PERF$FP_Dis[d] + PERF$TN_Dis[d]))[2]
New_TN <- PERF$TN_Dis[d] + (PERF$FP_Dis[d] - New_FP)
In [32]:
New_FP
New_TN
In [33]:
set.seed(123)
spec_new <- rbeta(10000,New_TN+1,New_FP+1)
table(a_dis-(1-spec_new)*N<0)
In [34]:
hist(spec,col=rgb(0.8,0.8,0.8),xlim=c(0.994,1))
hist(spec_new, col=rgb(0.6,0.6,0.6),xlim=c(0.994,1))
In [35]:
#Adding new specificity
ss_sp$spec_dis <- spec_new
dir.create(paste("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/Replicates_add/",as.character(d),sep=""))
write.table(ss_sp,
paste(c("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/Replicates_add/",
as.character(d),"/Sens_spec.txt"),collapse=""))
In [36]:
#Simulating replicates
#which trials concern the disease
is_dis <- sapply(Lgbd,function(x){d%in%x})
#which trials concern another disease
is_oth <- sapply(Lgbd,function(x){sum(setdiff(1:27,d)%in%x)>0})
sens <- ss_sp$sens_dis
spec <- ss_sp$spec_dis
a_dis <- sum(is_dis)
b_dis <- N-a_dis
As <- (a_dis-(1-spec)*N)/(sens - (1-spec))
Bs <- N-As
T1 <- sens*As
T0 <- spec*Bs
F1 <- (1-spec)*Bs
F0 <- (1-sens)*As
PPV_dis <- T1/(T1+F1)
NPV_dis <- T0/(T0+F0)
sens <- ss_sp$sens_oth
spec <- ss_sp$spec_oth
a_oth <- sum(is_oth)
b_oth <- N-a_oth
As <- (a_oth-(1-spec)*N)/(sens - (1-spec))
Bs <- N-As
T1 <- sens*As
T0 <- spec*Bs
F1 <- (1-spec)*Bs
F0 <- (1-sens)*As
PPV_oth <- T1/(T1+F1)
NPV_oth <- T0/(T0+F0)
false_it <- PPV_dis<0 | PPV_dis>1 |
NPV_dis<0 | NPV_dis>1 |
PPV_oth<0 | PPV_oth>1 |
NPV_oth<0 | NPV_oth>1
print(paste(c(d,"has",sum(false_it),"suppressed false iterations"
),collapse=" "))
PPV_dis <- PPV_dis[!false_it]
NPV_dis <- NPV_dis[!false_it]
PPV_oth <- PPV_oth[!false_it]
NPV_oth <- NPV_oth[!false_it]
In [37]:
t0 <- proc.time()
L <- list()
#Simulation: reclassifying each trial
for(k in 1:length(PPV_dis)){
tp_dis <- runif(a_dis)
tn_dis <- runif(b_dis)
recl_dis <- is_dis
recl_dis[recl_dis==TRUE][tp_dis>PPV_dis[k]] <- FALSE
recl_dis[recl_dis==FALSE][tn_dis>NPV_dis[k]] <- TRUE
rt <- as.numeric(recl_dis)
#Oth_dis
tp_oth <- runif(a_oth)
tn_oth <- runif(b_oth)
recl_oth <- is_oth
recl_oth[recl_oth==TRUE][tp_oth>PPV_oth[k]] <- FALSE
recl_oth[recl_oth==FALSE][tn_oth>NPV_oth[k]] <- TRUE
write.table(data.frame(recl_dis=as.numeric(recl_dis),recl_oth=as.numeric(recl_oth)),
paste(c("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/Replicates_add/",
as.character(d),"/Reclassification_",
"_",k,".txt"),collapse=""),row.names=FALSE)
}
t1 <- proc.time()
(t1-t0)/60
In [38]:
d <- 21
PERF[d,]
In [39]:
ss_sp <- read.table(paste(c("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/Replicates/",as.character(d),"/Sens_spec.txt"),collapse=""))
options(repr.plot.width=9, repr.plot.height=3)
par(mfrow=c(1,4))
for(i in 1:ncol(ss_sp)) hist(ss_sp[,i],xlim=c(0,1),main=colnames(ss_sp)[i],xlab=NULL)
In [40]:
is_dis <- sapply(Lgbd,function(x){d%in%x})
table(is_dis)
In [41]:
sens <- ss_sp$sens_dis
spec <- ss_sp$spec_dis
N <- nrow(data)
a_dis <- sum(is_dis)
b_dis <- N-a_dis
As <- (a_dis-(1-spec)*N)/(sens - (1-spec))
Bs <- N-As
T1 <- sens*As
T0 <- spec*Bs
F1 <- (1-spec)*Bs
F0 <- (1-sens)*As
PPV_dis <- T1/(T1+F1)
NPV_dis <- T0/(T0+F0)
In [42]:
table(As<0)
In [43]:
table(sens - (1-spec) <0)
In [44]:
table(a_dis-(1-spec)*N<0)
In [45]:
1-a_dis/N
We need to avoid cases in which specificity is lower than 1-a_dis/N.
What if we consider that we observed the number of false positives in the test set with a certain error
In [46]:
ts <- binom.test(PERF$TN_Dis[d],PERF$FP_Dis[d] + PERF$TN_Dis[d],conf.level = 0.5)
binom.test(PERF$TN_Dis[d],PERF$FP_Dis[d] + PERF$TN_Dis[d],conf.level = 0.5)
In [47]:
New_FP <- ceiling((1-ts$conf.int)*(PERF$FP_Dis[d] + PERF$TN_Dis[d]))[2]
New_TN <- PERF$TN_Dis[d] + (PERF$FP_Dis[d] - New_FP)
In [48]:
New_FP
New_TN
In [49]:
set.seed(123)
spec_new <- rbeta(10000,New_TN+1,New_FP+1)
table(a_dis-(1-spec_new)*N<0)
After modifying specificity we do not achieve less than 10% of false iterations, we then do not conduct simulations for this disease
In [50]:
d <- 11
PERF[d,]
In [51]:
ss_sp <- read.table(paste(c("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/Replicates/",as.character(d),"/Sens_spec.txt"),collapse=""))
options(repr.plot.width=9, repr.plot.height=3)
par(mfrow=c(1,4))
for(i in 1:ncol(ss_sp)) hist(ss_sp[,i],xlim=c(0,1),main=colnames(ss_sp)[i],xlab=NULL)
In [52]:
is_dis <- sapply(Lgbd,function(x){d%in%x})
table(is_dis)
In [53]:
sens <- ss_sp$sens_dis
spec <- ss_sp$spec_dis
N <- nrow(data)
a_dis <- sum(is_dis)
b_dis <- N-a_dis
As <- (a_dis-(1-spec)*N)/(sens - (1-spec))
Bs <- N-As
T1 <- sens*As
T0 <- spec*Bs
F1 <- (1-spec)*Bs
F0 <- (1-sens)*As
PPV_dis <- T1/(T1+F1)
NPV_dis <- T0/(T0+F0)
In [54]:
table(As<0)
In [55]:
table(sens - (1-spec) <0)
In [56]:
table(a_dis-(1-spec)*N<0)
In [57]:
1-a_dis/N
We need to avoid cases in which specificity is lower than 1-a_dis/N.
What if we consider that we observed the number of false positives in the test set with a certain error
In [58]:
ts <- binom.test(PERF$TN_Dis[d],PERF$FP_Dis[d] + PERF$TN_Dis[d],conf.level = 0.5)
binom.test(PERF$TN_Dis[d],PERF$FP_Dis[d] + PERF$TN_Dis[d],conf.level = 0.5)
In [59]:
New_FP <- ceiling((1-ts$conf.int)*(PERF$FP_Dis[d] + PERF$TN_Dis[d]))[2]
New_TN <- PERF$TN_Dis[d] + (PERF$FP_Dis[d] - New_FP)
In [60]:
New_FP
New_TN
The number of FP is not modified with this method, leading to the same proportion of false iterations. We do not conduct simulations for this disease
In [62]:
d <- 27
PERF[d,]
In [63]:
ss_sp <- read.table(paste(c("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/Replicates/",as.character(d),"/Sens_spec.txt"),collapse=""))
options(repr.plot.width=9, repr.plot.height=3)
par(mfrow=c(1,4))
for(i in 1:ncol(ss_sp)) hist(ss_sp[,i],xlim=c(0,1),main=colnames(ss_sp)[i],xlab=NULL)
In [64]:
is_dis <- sapply(Lgbd,function(x){d%in%x})
table(is_dis)
No observations in the test set, and only 3 observations in the dataset. Impossible to fix. We do not conduct simulations for this disease