Starting from replicates, deriving from each replicate of the mapping the difference between research and burden within diseases


In [1]:
library(data.table)

In [2]:
GBD <- read.table("../Data/DALY_YLL_deaths_per_region_and_27_diseases_2005.txt")
GBD <- GBD[order(GBD$Region,GBD$Disease),]

In [3]:
Mgbd <- read.table("../Data/27_gbd_groups.txt")
sms <- list.files("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/Replicates/Metrics_over_repl/")
dis <- as.numeric(substr(sms,25,nchar(sms)-4))

In [4]:
k <- 1
DF <- fread(paste(c("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/Replicates/Metrics_over_repl/Metrics_over_replicates_",
                    as.character(k),".txt"),collapse=""))
regs <- sort(unique(DF$Region))
regs <- regs[regs!="All" & regs!="Non-HI"]

In [5]:
data_f <- data.frame()

In [6]:
for(k in dis[dis!=0]){

    DF <- fread(paste(c("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/Replicates/Metrics_over_repl/Metrics_over_replicates_",
                    as.character(k),".txt"),collapse=""))

    
    DFr <- DF[DF$Region%in%regs & DF$Dis == "dis",]
    DFr$RCTs_all <- rep(DF$RCTs[DF$Dis=="dis" & DF$Region=="All"],each=length(regs))
    DFr$RCTs_NHI <- rep(DF$RCTs[DF$Dis=="dis" & DF$Region=="Non-HI"],each=length(regs))
    DFr$Patients_all <- rep(DF$Patients[DF$Dis=="dis" & DF$Region=="All"],each=length(regs))
    DFr$Patients_NHI <- rep(DF$Patients[DF$Dis=="dis" & DF$Region=="Non-HI"],each=length(regs))


    nb_sims <- nrow(DFr)/length(regs)

    DFr$RCTs_prop_all <- 100*DFr$RCTs/DFr$RCTs_all
    DFr$RCTs_prop_NHI <- 100*DFr$RCTs/DFr$RCTs_NHI
    DFr$Patients_prop_all <- 100*DFr$Patients/DFr$Patients_all
    DFr$Patients_prop_NHI <- 100*DFr$Patients/DFr$Patients_NHI

    gbd_mt <- GBD[GBD$Disease==as.character(Mgbd$x[k]) & GBD$metr=="daly" & GBD$Region!="All",]
    gbd_mt$burden_prop_all <- 100*gbd_mt$burden/sum(gbd_mt$burden)
    gbd_mt$burden_prop_NHI <- 100*gbd_mt$burden/sum(gbd_mt$burden[gbd_mt$Region!="High-income"])
    DFr$burden_daly_prop_all <- rep(gbd_mt$burden_prop_all,times=nb_sims)
    DFr$burden_daly_prop_NHI <- rep(gbd_mt$burden_prop_NHI,times=nb_sims)

    gbd_mt <- GBD[GBD$Disease==as.character(Mgbd$x[k]) & GBD$metr=="death" & GBD$Region!="All",]
    gbd_mt$burden_prop_all <- 100*gbd_mt$burden/sum(gbd_mt$burden)
    gbd_mt$burden_prop_NHI <- 100*gbd_mt$burden/sum(gbd_mt$burden[gbd_mt$Region!="High-income"])
    DFr$burden_death_prop_all <- rep(gbd_mt$burden_prop_all,times=nb_sims)
    DFr$burden_death_prop_NHI <- rep(gbd_mt$burden_prop_NHI,times=nb_sims)

    gbd_mt <- GBD[GBD$Disease==as.character(Mgbd$x[k]) & GBD$metr=="yld" & GBD$Region!="All",]
    gbd_mt$burden_prop_all <- 100*gbd_mt$burden/sum(gbd_mt$burden)
    gbd_mt$burden_prop_NHI <- 100*gbd_mt$burden/sum(gbd_mt$burden[gbd_mt$Region!="High-income"])
    DFr$burden_yld_prop_all <- rep(gbd_mt$burden_prop_all,times=nb_sims)
    DFr$burden_yld_prop_NHI <- rep(gbd_mt$burden_prop_NHI,times=nb_sims)

    gbd_mt <- GBD[GBD$Disease==as.character(Mgbd$x[k]) & GBD$metr=="yll" & GBD$Region!="All",]
    gbd_mt$burden_prop_all <- 100*gbd_mt$burden/sum(gbd_mt$burden)
    gbd_mt$burden_prop_NHI <- 100*gbd_mt$burden/sum(gbd_mt$burden[gbd_mt$Region!="High-income"])
    DFr$burden_yll_prop_all <- rep(gbd_mt$burden_prop_all,times=nb_sims)
    DFr$burden_yll_prop_NHI <- rep(gbd_mt$burden_prop_NHI,times=nb_sims)

    #Suppressing HI values for Non-HI ratio
    DFr <- DFr[DFr$Region=="High-income",c(names(DFr)[grep("NHI",names(DFr))]):=0]
    DFr$sim <- rep(1:nb_sims,each=length(regs))

df <- 
DFr[,.(
       #What percentage of RCTs added (reallocated) for perfect alignment
       #DALYs
       #RCTs all vs burden all
       sum(abs(RCTs_prop_all[burden_daly_prop_all>RCTs_prop_all]-burden_daly_prop_all[burden_daly_prop_all>RCTs_prop_all])),
       #Patients all vs burden all
       sum(abs(Patients_prop_all[burden_daly_prop_all>Patients_prop_all]-burden_daly_prop_all[burden_daly_prop_all>Patients_prop_all])),
       #RCTs NHI vs burden NHI
       sum(abs(RCTs_prop_NHI[burden_daly_prop_NHI>RCTs_prop_NHI]-burden_daly_prop_NHI[burden_daly_prop_NHI>RCTs_prop_NHI])),
       #Patients NHI vs burden NHI
       sum(abs(Patients_prop_NHI[burden_daly_prop_NHI>Patients_prop_NHI]-burden_daly_prop_NHI[burden_daly_prop_NHI>Patients_prop_NHI])),
       #Deaths
       sum(abs(RCTs_prop_all[burden_death_prop_all>RCTs_prop_all]-burden_death_prop_all[burden_death_prop_all>RCTs_prop_all])),
       sum(abs(Patients_prop_all[burden_death_prop_all>Patients_prop_all]-burden_death_prop_all[burden_death_prop_all>Patients_prop_all])),
       sum(abs(RCTs_prop_NHI[burden_death_prop_NHI>RCTs_prop_NHI]-burden_death_prop_NHI[burden_death_prop_NHI>RCTs_prop_NHI])),
       sum(abs(Patients_prop_NHI[burden_death_prop_NHI>Patients_prop_NHI]-burden_death_prop_NHI[burden_death_prop_NHI>Patients_prop_NHI])),
       #YLD
       sum(abs(RCTs_prop_all[burden_yld_prop_all>RCTs_prop_all]-burden_yld_prop_all[burden_yld_prop_all>RCTs_prop_all])),
       sum(abs(Patients_prop_all[burden_yld_prop_all>Patients_prop_all]-burden_yld_prop_all[burden_yld_prop_all>Patients_prop_all])),
       sum(abs(RCTs_prop_NHI[burden_yld_prop_NHI>RCTs_prop_NHI]-burden_yld_prop_NHI[burden_yld_prop_NHI>RCTs_prop_NHI])),
       sum(abs(Patients_prop_NHI[burden_yld_prop_NHI>Patients_prop_NHI]-burden_yld_prop_NHI[burden_yld_prop_NHI>Patients_prop_NHI])),
       #YLL
       sum(abs(RCTs_prop_all[burden_yll_prop_all>RCTs_prop_all]-burden_yll_prop_all[burden_yll_prop_all>RCTs_prop_all])),
       sum(abs(Patients_prop_all[burden_yll_prop_all>Patients_prop_all]-burden_yll_prop_all[burden_yll_prop_all>Patients_prop_all])),
       sum(abs(RCTs_prop_NHI[burden_yll_prop_NHI>RCTs_prop_NHI]-burden_yll_prop_NHI[burden_yll_prop_NHI>RCTs_prop_NHI])),
       sum(abs(Patients_prop_NHI[burden_yll_prop_NHI>Patients_prop_NHI]-burden_yll_prop_NHI[burden_yll_prop_NHI>Patients_prop_NHI])),

       #What percentage of RCTs added (reallocated) for No_gap
       #DALYs
       #RCTs all vs burden all
       sum(abs(RCTs_prop_all[burden_daly_prop_all>2*RCTs_prop_all]-burden_daly_prop_all[burden_daly_prop_all>2*RCTs_prop_all]/2)),
       #Patients all vs burden all
       sum(abs(Patients_prop_all[burden_daly_prop_all>2*Patients_prop_all]-burden_daly_prop_all[burden_daly_prop_all>2*Patients_prop_all]/2)),
       #RCTs NHI vs burden NHI
       sum(abs(RCTs_prop_NHI[burden_daly_prop_NHI>2*RCTs_prop_NHI]-burden_daly_prop_NHI[burden_daly_prop_NHI>2*RCTs_prop_NHI]/2)),
       #Patients NHI vs burden NHI
       sum(abs(Patients_prop_NHI[burden_daly_prop_NHI>2*Patients_prop_NHI]-burden_daly_prop_NHI[burden_daly_prop_NHI>2*Patients_prop_NHI]/2)),
       #Deaths
       sum(abs(RCTs_prop_all[burden_death_prop_all>2*RCTs_prop_all]-burden_death_prop_all[burden_death_prop_all>2*RCTs_prop_all]/2)),
       sum(abs(Patients_prop_all[burden_death_prop_all>2*Patients_prop_all]-burden_death_prop_all[burden_death_prop_all>2*Patients_prop_all]/2)),
       sum(abs(RCTs_prop_NHI[burden_death_prop_NHI>2*RCTs_prop_NHI]-burden_death_prop_NHI[burden_death_prop_NHI>2*RCTs_prop_NHI]/2)),
       sum(abs(Patients_prop_NHI[burden_death_prop_NHI>2*Patients_prop_NHI]-burden_death_prop_NHI[burden_death_prop_NHI>2*Patients_prop_NHI]/2)),
       #YLD
       sum(abs(RCTs_prop_all[burden_yld_prop_all>2*RCTs_prop_all]-burden_yld_prop_all[burden_yld_prop_all>2*RCTs_prop_all]/2)),
       sum(abs(Patients_prop_all[burden_yld_prop_all>2*Patients_prop_all]-burden_yld_prop_all[burden_yld_prop_all>2*Patients_prop_all]/2)),
       sum(abs(RCTs_prop_NHI[burden_yld_prop_NHI>2*RCTs_prop_NHI]-burden_yld_prop_NHI[burden_yld_prop_NHI>2*RCTs_prop_NHI]/2)),
       sum(abs(Patients_prop_NHI[burden_yld_prop_NHI>2*Patients_prop_NHI]-burden_yld_prop_NHI[burden_yld_prop_NHI>2*Patients_prop_NHI]/2)),
       #YLL
       sum(abs(RCTs_prop_all[burden_yll_prop_all>2*RCTs_prop_all]-burden_yll_prop_all[burden_yll_prop_all>2*RCTs_prop_all]/2)),
       sum(abs(Patients_prop_all[burden_yll_prop_all>2*Patients_prop_all]-burden_yll_prop_all[burden_yll_prop_all>2*Patients_prop_all]/2)),
       sum(abs(RCTs_prop_NHI[burden_yll_prop_NHI>2*RCTs_prop_NHI]-burden_yll_prop_NHI[burden_yll_prop_NHI>2*RCTs_prop_NHI]/2)),
       sum(abs(Patients_prop_NHI[burden_yll_prop_NHI>2*Patients_prop_NHI]-burden_yll_prop_NHI[burden_yll_prop_NHI>2*Patients_prop_NHI]/2))


),by="sim"]

    #Rq: NAs appear when total number of RCTs or patients overall or across non HI regions is 0, meaning no 
    #possible share across regions. we suppress them
    dui <- df[,lapply(.SD,function(x){quantile(x,probs=c(0.025,0.5,0.975),na.rm=TRUE)}),.SDcols=paste("V",1:32,sep="")]

    dui <- cbind(as.character(Mgbd$x[k]),c("low","med","up"),dui)
    names(dui)<-c("Disease","UI",
    paste(rep(paste(rep(c("RCTs_vs","Patients_vs"),times=8),      
          rep(c("daly","death","yld","yll"),each=4),
          rep(rep(c("all","NHI"),each=2),times=4),sep="_"),2),rep(c("fill","nogap"),each=8*2),sep="_"))
    

    data_f <- rbind(data_f,dui)
    
    }

In [7]:
#All diseases
k <- 0

DF <- fread(paste(c("/media/igna/Elements/HotelDieu/Cochrane/MappingRCTs_vs_Burden/Replicates/Metrics_over_repl/Metrics_over_replicates_",
                    as.character(k),".txt"),collapse=""))

DFr <- DF[DF$Region%in%regs ,]
DFr$RCTs_all <- rep(DF$RCTs[DF$Region=="All"],each=length(regs))
DFr$RCTs_NHI <- rep(DF$RCTs[DF$Region=="Non-HI"],each=length(regs))
DFr$Patients_all <- rep(DF$Patients[DF$Region=="All"],each=length(regs))
DFr$Patients_NHI <- rep(DF$Patients[DF$Region=="Non-HI"],each=length(regs))

nb_sims <- nrow(DFr)/length(regs)

DFr$RCTs_prop_all <- 100*DFr$RCTs/DFr$RCTs_all
DFr$RCTs_prop_NHI <- 100*DFr$RCTs/DFr$RCTs_NHI
DFr$Patients_prop_all <- 100*DFr$Patients/DFr$Patients_all
DFr$Patients_prop_NHI <- 100*DFr$Patients/DFr$Patients_NHI

gbd_mt <- GBD[GBD$Disease=="all" & GBD$metr=="daly" & GBD$Region!="All",]
gbd_mt$burden_prop_all <- 100*gbd_mt$burden/sum(gbd_mt$burden)
gbd_mt$burden_prop_NHI <- 100*gbd_mt$burden/sum(gbd_mt$burden[gbd_mt$Region!="High-income"])
DFr$burden_daly_prop_all <- rep(gbd_mt$burden_prop_all,times=nb_sims)
DFr$burden_daly_prop_NHI <- rep(gbd_mt$burden_prop_NHI,times=nb_sims)

gbd_mt <- GBD[GBD$Disease=="all" & GBD$metr=="death" & GBD$Region!="All",]
gbd_mt$burden_prop_all <- 100*gbd_mt$burden/sum(gbd_mt$burden)
gbd_mt$burden_prop_NHI <- 100*gbd_mt$burden/sum(gbd_mt$burden[gbd_mt$Region!="High-income"])
DFr$burden_death_prop_all <- rep(gbd_mt$burden_prop_all,times=nb_sims)
DFr$burden_death_prop_NHI <- rep(gbd_mt$burden_prop_NHI,times=nb_sims)

gbd_mt <- GBD[GBD$Disease=="all" & GBD$metr=="yld" & GBD$Region!="All",]
gbd_mt$burden_prop_all <- 100*gbd_mt$burden/sum(gbd_mt$burden)
gbd_mt$burden_prop_NHI <- 100*gbd_mt$burden/sum(gbd_mt$burden[gbd_mt$Region!="High-income"])
DFr$burden_yld_prop_all <- rep(gbd_mt$burden_prop_all,times=nb_sims)
DFr$burden_yld_prop_NHI <- rep(gbd_mt$burden_prop_NHI,times=nb_sims)

gbd_mt <- GBD[GBD$Disease=="all" & GBD$metr=="yll" & GBD$Region!="All",]
gbd_mt$burden_prop_all <- 100*gbd_mt$burden/sum(gbd_mt$burden)
gbd_mt$burden_prop_NHI <- 100*gbd_mt$burden/sum(gbd_mt$burden[gbd_mt$Region!="High-income"])
DFr$burden_yll_prop_all <- rep(gbd_mt$burden_prop_all,times=nb_sims)
DFr$burden_yll_prop_NHI <- rep(gbd_mt$burden_prop_NHI,times=nb_sims)

DFr$sim <- rep(1:nb_sims,each=length(regs))

df <- 
DFr[,.(
       #What percentage of RCTs added (reallocated) for perfect alignment
       #DALYs
       #RCTs all vs burden all
       sum(abs(RCTs_prop_all[burden_daly_prop_all>RCTs_prop_all]-burden_daly_prop_all[burden_daly_prop_all>RCTs_prop_all])),
       #Patients all vs burden all
       sum(abs(Patients_prop_all[burden_daly_prop_all>Patients_prop_all]-burden_daly_prop_all[burden_daly_prop_all>Patients_prop_all])),
       #RCTs NHI vs burden NHI
       sum(abs(RCTs_prop_NHI[burden_daly_prop_NHI>RCTs_prop_NHI]-burden_daly_prop_NHI[burden_daly_prop_NHI>RCTs_prop_NHI])),
       #Patients NHI vs burden NHI
       sum(abs(Patients_prop_NHI[burden_daly_prop_NHI>Patients_prop_NHI]-burden_daly_prop_NHI[burden_daly_prop_NHI>Patients_prop_NHI])),
       #Deaths
       sum(abs(RCTs_prop_all[burden_death_prop_all>RCTs_prop_all]-burden_death_prop_all[burden_death_prop_all>RCTs_prop_all])),
       sum(abs(Patients_prop_all[burden_death_prop_all>Patients_prop_all]-burden_death_prop_all[burden_death_prop_all>Patients_prop_all])),
       sum(abs(RCTs_prop_NHI[burden_death_prop_NHI>RCTs_prop_NHI]-burden_death_prop_NHI[burden_death_prop_NHI>RCTs_prop_NHI])),
       sum(abs(Patients_prop_NHI[burden_death_prop_NHI>Patients_prop_NHI]-burden_death_prop_NHI[burden_death_prop_NHI>Patients_prop_NHI])),
       #YLD
       sum(abs(RCTs_prop_all[burden_yld_prop_all>RCTs_prop_all]-burden_yld_prop_all[burden_yld_prop_all>RCTs_prop_all])),
       sum(abs(Patients_prop_all[burden_yld_prop_all>Patients_prop_all]-burden_yld_prop_all[burden_yld_prop_all>Patients_prop_all])),
       sum(abs(RCTs_prop_NHI[burden_yld_prop_NHI>RCTs_prop_NHI]-burden_yld_prop_NHI[burden_yld_prop_NHI>RCTs_prop_NHI])),
       sum(abs(Patients_prop_NHI[burden_yld_prop_NHI>Patients_prop_NHI]-burden_yld_prop_NHI[burden_yld_prop_NHI>Patients_prop_NHI])),
       #YLL
       sum(abs(RCTs_prop_all[burden_yll_prop_all>RCTs_prop_all]-burden_yll_prop_all[burden_yll_prop_all>RCTs_prop_all])),
       sum(abs(Patients_prop_all[burden_yll_prop_all>Patients_prop_all]-burden_yll_prop_all[burden_yll_prop_all>Patients_prop_all])),
       sum(abs(RCTs_prop_NHI[burden_yll_prop_NHI>RCTs_prop_NHI]-burden_yll_prop_NHI[burden_yll_prop_NHI>RCTs_prop_NHI])),
       sum(abs(Patients_prop_NHI[burden_yll_prop_NHI>Patients_prop_NHI]-burden_yll_prop_NHI[burden_yll_prop_NHI>Patients_prop_NHI])),

       #What percentage of RCTs added (reallocated) for No_gap
       #DALYs
       #RCTs all vs burden all
       sum(abs(RCTs_prop_all[burden_daly_prop_all>2*RCTs_prop_all]-burden_daly_prop_all[burden_daly_prop_all>2*RCTs_prop_all]/2)),
       #Patients all vs burden all
       sum(abs(Patients_prop_all[burden_daly_prop_all>2*Patients_prop_all]-burden_daly_prop_all[burden_daly_prop_all>2*Patients_prop_all]/2)),
       #RCTs NHI vs burden NHI
       sum(abs(RCTs_prop_NHI[burden_daly_prop_NHI>2*RCTs_prop_NHI]-burden_daly_prop_NHI[burden_daly_prop_NHI>2*RCTs_prop_NHI]/2)),
       #Patients NHI vs burden NHI
       sum(abs(Patients_prop_NHI[burden_daly_prop_NHI>2*Patients_prop_NHI]-burden_daly_prop_NHI[burden_daly_prop_NHI>2*Patients_prop_NHI]/2)),
       #Deaths
       sum(abs(RCTs_prop_all[burden_death_prop_all>2*RCTs_prop_all]-burden_death_prop_all[burden_death_prop_all>2*RCTs_prop_all]/2)),
       sum(abs(Patients_prop_all[burden_death_prop_all>2*Patients_prop_all]-burden_death_prop_all[burden_death_prop_all>2*Patients_prop_all]/2)),
       sum(abs(RCTs_prop_NHI[burden_death_prop_NHI>2*RCTs_prop_NHI]-burden_death_prop_NHI[burden_death_prop_NHI>2*RCTs_prop_NHI]/2)),
       sum(abs(Patients_prop_NHI[burden_death_prop_NHI>2*Patients_prop_NHI]-burden_death_prop_NHI[burden_death_prop_NHI>2*Patients_prop_NHI]/2)),
       #YLD
       sum(abs(RCTs_prop_all[burden_yld_prop_all>2*RCTs_prop_all]-burden_yld_prop_all[burden_yld_prop_all>2*RCTs_prop_all]/2)),
       sum(abs(Patients_prop_all[burden_yld_prop_all>2*Patients_prop_all]-burden_yld_prop_all[burden_yld_prop_all>2*Patients_prop_all]/2)),
       sum(abs(RCTs_prop_NHI[burden_yld_prop_NHI>2*RCTs_prop_NHI]-burden_yld_prop_NHI[burden_yld_prop_NHI>2*RCTs_prop_NHI]/2)),
       sum(abs(Patients_prop_NHI[burden_yld_prop_NHI>2*Patients_prop_NHI]-burden_yld_prop_NHI[burden_yld_prop_NHI>2*Patients_prop_NHI]/2)),
       #YLL
       sum(abs(RCTs_prop_all[burden_yll_prop_all>2*RCTs_prop_all]-burden_yll_prop_all[burden_yll_prop_all>2*RCTs_prop_all]/2)),
       sum(abs(Patients_prop_all[burden_yll_prop_all>2*Patients_prop_all]-burden_yll_prop_all[burden_yll_prop_all>2*Patients_prop_all]/2)),
       sum(abs(RCTs_prop_NHI[burden_yll_prop_NHI>2*RCTs_prop_NHI]-burden_yll_prop_NHI[burden_yll_prop_NHI>2*RCTs_prop_NHI/2])),
       sum(abs(Patients_prop_NHI[burden_yll_prop_NHI>2*Patients_prop_NHI]-burden_yll_prop_NHI[burden_yll_prop_NHI>2*Patients_prop_NHI]/2))


),by="sim"]

#Rq: NAs appear when total number of RCTs or patients overall or across non HI regions is 0, meaning no 
#possible share across regions. we suppress them
dui <- df[,lapply(.SD,function(x){quantile(x,probs=c(0.025,0.5,0.975),na.rm=TRUE)}),.SDcols=paste("V",1:32,sep="")]

dui <- cbind("All",c("low","med","up"),dui)
names(dui)<-c("Disease","UI",
paste(rep(paste(rep(c("RCTs_vs","Patients_vs"),times=8),      
      rep(c("daly","death","yld","yll"),each=4),
      rep(rep(c("all","NHI"),each=2),times=4),sep="_"),2),rep(c("fill","nogap"),each=8*2),sep="_"))

data_f <- rbind(dui,data_f)

In [8]:
write.table(data_f,"../Data/Alignment_ratios_within_diseases_across_all_NHI_patients_metrs_burdens.txt")

In [ ]: