In [1]:
library(ggplot2)
library(gdata)


gdata: read.xls support for 'XLS' (Excel 97-2004) files ENABLED.

gdata: read.xls support for 'XLSX' (Excel 2007+) files ENABLED.

Attaching package: ‘gdata’

The following object is masked from ‘package:stats’:

    nobs

The following object is masked from ‘package:utils’:

    object.size

The following object is masked from ‘package:base’:

    startsWith


In [2]:
D <- read.table("../Data/All_data.txt")
D <- D[order(D$Region,D$Disease),]

In [3]:
names(D)


  1. 'Region'
  2. 'Disease'
  3. 'burden_daly'
  4. 'burden_yll'
  5. 'burden_yld'
  6. 'burden_death'
  7. 'Prop_loc_burden_daly'
  8. 'Prop_loc_burden_yll'
  9. 'Prop_loc_burden_yld'
  10. 'Prop_loc_burden_death'
  11. 'Prop_glob_burden_daly'
  12. 'Prop_glob_burden_yll'
  13. 'Prop_glob_burden_yld'
  14. 'Prop_glob_burden_death'
  15. 'Prop_NHI_burden_daly'
  16. 'Prop_NHI_burden_yll'
  17. 'Prop_NHI_burden_yld'
  18. 'Prop_NHI_burden_death'
  19. 'Nb_RCTs_low'
  20. 'Nb_RCTs_med'
  21. 'Nb_RCTs_up'
  22. 'Nb_Patients_low'
  23. 'Nb_Patients_med'
  24. 'Nb_Patients_up'
  25. 'Prop_loc_RCTs_low'
  26. 'Prop_loc_RCTs_med'
  27. 'Prop_loc_RCTs_up'
  28. 'Prop_loc_Patients_low'
  29. 'Prop_loc_Patients_med'
  30. 'Prop_loc_Patients_up'
  31. 'Prop_glob_RCTs_low'
  32. 'Prop_glob_RCTs_med'
  33. 'Prop_glob_RCTs_up'
  34. 'Prop_glob_Patients_low'
  35. 'Prop_glob_Patients_med'
  36. 'Prop_glob_Patients_up'
  37. 'Prop_NHI_RCTs_low'
  38. 'Prop_NHI_RCTs_med'
  39. 'Prop_NHI_RCTs_up'
  40. 'Prop_NHI_Patients_low'
  41. 'Prop_NHI_Patients_med'
  42. 'Prop_NHI_Patients_up'

In [4]:
metr_burden <- "daly"
metr_res <- "Patients"

In [5]:
#We compare RCTs to DALYs
dpl <- D[D$Region!="Non-HI",
          c(which(names(D)%in%c("Region","Disease")),
            intersect(grep(metr_burden,names(D)),grep("^burden",names(D))),
            intersect(grep(metr_res,names(D)),grep("^Nb",names(D)))),]

In [6]:
head(dpl)


RegionDiseaseburden_dalyNb_Patients_lowNb_Patients_medNb_Patients_up
1All All 2220063510.8007628545370.475 30082791 31513723.075
2All Cardiovascular and circulatory diseases287404109.09231 4161522.95 4775668 5435620.325
3All Chronic respiratory diseases112485355.22285 1338206.35 1560048.5 1813807.675
4All Cirrhosis of the liver30462721.1164 139002.125 352394 599427.35
5All Congenital anomalies43254504.439 12581.675 119546 380584.875
6All Diabetes, urinary diseases and male infertility75821480.094146 3616561.375 4123023.5 4694087.55

In [7]:
#Order diseases: increasing burden
dis <- dpl$Disease[dpl$Region=="All"][order(dpl$burden[dpl$Region=="All"])]

In [8]:
dis


  1. Leprosy
  2. Sudden infant death syndrome
  3. Gynecological diseases
  4. Hepatitis
  5. Sexually transmitted diseases excluding HIV
  6. Oral disorders
  7. Hemoglobinopathies and hemolytic anemias
  8. Maternal disorders
  9. Neglected tropical diseases excluding malaria
  10. Cirrhosis of the liver
  11. Sense organ diseases
  12. Digestive diseases (except cirrhosis)
  13. Skin and subcutaneous diseases
  14. Congenital anomalies
  15. Tuberculosis
  16. Neurological disorders
  17. Diabetes, urinary diseases and male infertility
  18. Nutritional deficiencies
  19. HIV/AIDS
  20. Malaria
  21. Chronic respiratory diseases
  22. Musculoskeletal disorders
  23. Mental and behavioral disorders
  24. Neoplasms
  25. Neonatal disorders
  26. Cardiovascular and circulatory diseases
  27. Diarrhea, lower respiratory infections, meningitis, and other common infectious diseases
  28. All

In [9]:
dis <- dis[dis!="All"]

In [10]:
#Number of RCTs per region
regs <- dpl$Region[dpl$Disease=="All"][order(dpl[dpl$Disease=="All",grep("med",names(dpl))],
                                                decreasing=TRUE)]

In [11]:
regs


  1. All
  2. High-income
  3. Sub-Saharian Africa
  4. Southeast Asia, East Asia and Oceania
  5. South Asia
  6. Central Europe, Eastern Europe, and Central Asia
  7. Latin America and Caribbean
  8. North Africa and Middle East

In [12]:
regs <- regs[regs!="All"]

In [13]:
#Region labels
reg_labs <- c("High-income countries",
              "Sub-Saharian\nAfrica",
              "Southeast Asia,\nEast Asia and Oceania",
              "South Asia",
              "Eastern Europe\nand Central Asia",              
              "Latin America\nand Caribbean", 
              "North Africa and\nMiddle East"
              )

In [14]:
dpl <- dpl[dpl$Region!="All" & dpl$Disease!="All",]

In [15]:
#Normalizing regions: max RCts = max GBD
Norm_fact <- max(dpl[,grep("up",names(dpl))],na.rm=TRUE)/max(dpl$burden)
dpl$gpl <- (dpl$burden/max(dpl$burden))*max(dpl[,grep("up",names(dpl))],na.rm=TRUE)

In [16]:
#Bar size = wdt*2
wdt <- 0.45
#Distance between regions (end to end)
d_reg <- 400*0.5*1e3
#Distance between center of region and start of bars (for disease labels)
esp_dis_nb <- 200*0.5*1e3
#Inner circle
IC <- 8

In [17]:
#Rectangles for a given region and disease
#Rg = central position of region
#d = name of the disease
#rg = name of the region
displt <- 
function(d,Rg,rg){
res_pl <- data.frame(  xmin = which(d==dis)-wdt,
                       xmax = which(d==dis)+wdt,
                       ymin = Rg+esp_dis_nb,
                       ymax = Rg+esp_dis_nb+dpl[dpl$Dis==d & dpl$Region==rg,grep("med",names(dpl))],
                       metr="Research",
                       reg=rg,
                       ycent=Rg,
                       dis_nb=which(d==dis),
                       disease=d)
burd_pl <- data.frame( xmin = which(d==dis)-wdt,
                       xmax = which(d==dis)+wdt,
                       ymin = Rg-esp_dis_nb,
                       ymax = Rg-esp_dis_nb-dpl$gpl[dpl$Dis==d & dpl$Region==rg],
                       metr="Burden",
                       reg=rg,
                       ycent=Rg,
                       dis_nb=which(d==dis),
                       disease=d)
rbind(res_pl,burd_pl)
}

In [18]:
displt_err <- function(d,Rg,rg){
    data.frame(x = which(d==dis),
               ymin = Rg+esp_dis_nb+dpl[dpl$Dis==d & dpl$Region==rg,grep("low",names(dpl))],
               ymax = Rg+esp_dis_nb+dpl[dpl$Dis==d & dpl$Region==rg,grep("up",names(dpl))],
               metr="Research",
               reg=rg,
               dis_nb=which(d==dis),
               disease=d)
}

In [19]:
#Rectangles pour toutes les maladies, une région donnée
regplt <- function(Rg,rg) do.call('rbind',lapply(dis,function(x){displt(x,Rg,rg)}))
regplt_err <- function(Rg,rg) do.call('rbind',lapply(dis,function(x){displt_err(x,Rg,rg)}))

In [20]:
#Emplacement des régions
RG <- 0
for(i in 2:length(regs)){
RG <- c(RG,
        RG[i-1]-(2*esp_dis_nb+
                 max(dpl$gpl[dpl$Region==regs[i-1]])+
                 d_reg+max(dpl[dpl$Region==regs[i],grep("up",names(dpl))],na.rm=TRUE)))
}

In [21]:
#DataFrame Plot
DPLOT <- do.call('rbind',lapply(1:length(regs),function(i){regplt(RG[i],regs[i])}))
#Error_bars dataframe
DPLOT_err <- do.call('rbind',lapply(1:length(regs),function(i){regplt_err(RG[i],regs[i])}))

In [22]:
#Inner circle
DPLOT$xmin <- DPLOT$xmin + IC
DPLOT$xmax <- DPLOT$xmax + IC
DPLOT$xcent <- DPLOT$dis_nb + IC
DPLOT_err$x <- DPLOT_err$x + IC
DPLOT_err$xcent <- DPLOT_err$dis_nb + IC

In [23]:
totalLength <- max(DPLOT_err$ymax,na.rm=TRUE)-min(DPLOT$ymax,na.rm=TRUE)+d_reg

In [24]:
#Polar coordinates
alphaStart <- 2*pi*((max(DPLOT_err$ymax[DPLOT_err$reg==regs[1]]+d_reg/2,na.rm=TRUE))/
                    totalLength)

In [25]:
#REGION LABELS
  readableAngle<-function(x){
    angle<-x*(360/totalLength)
  }
    familyLabelsDF<-data.frame(xmin=RG,label=reg_labs)
    familyLabelsDF$angle <- readableAngle(familyLabelsDF$xmin)

In [26]:
#Disease labels: size
DPLOT$size_dis_lab = 2.3*(40+DPLOT$dis_nb)/(40+max(DPLOT$dis_nb))

Research and burden tick marks


In [27]:
max(dpl[,grep("up",names(dpl))],na.rm=TRUE)/1e3


4939.83309173478

In [28]:
#Research
rcttks <- c(0,100,500,1000,2000,3000,4000,5000)*1e3
maj_rcts <- function(nb){
    x <- nb
    k <- 0
    while(x>=100){x <- x%/%10
                  k <- k+1}
    (x+1)*10^k
}

In [29]:
#Faire que les ticks aillent jusqu'au max des RCTs arrondi au sup
RCTtcks <- do.call('rbind',lapply(regs,function(x){
data.frame(
    breaks = unique(DPLOT$ymin[DPLOT$metr=="Research" & DPLOT$reg==x]) + 
             c(rcttks[2:findInterval(max(dpl[dpl$Region==x,grep("up",names(dpl))],na.rm=TRUE),rcttks)],
               maj_rcts(max(dpl[dpl$Region==x,grep("up",names(dpl))],na.rm=TRUE))),
    labels=c(rcttks[2:findInterval(max(dpl[dpl$Region==x,grep("up",names(dpl))],na.rm=TRUE),rcttks)],
             maj_rcts(max(dpl[dpl$Region==x,grep("up",names(dpl))],na.rm=TRUE)))/1e3,
    region=x)
}))
RCTtcks$col <- "1RCT"

In [30]:
#Pour GBD
gbdtks <- c(0,1e7,2e7,3e7,5e7,7.5e7,1e8,1.5e8,2e8)/1e6
maj_gbd <- function(x) ifelse(trunc(x)==x,x,trunc(x) + 1)

In [31]:
GBDtcks <- do.call('rbind',lapply(regs,function(x){
data.frame(
    breaks = unique(DPLOT$ymin[DPLOT$metr=="Burden" & DPLOT$reg==x]) -
        c(gbdtks[2:findInterval(max(dpl[dpl$Region==x,grep("^burden",names(dpl))],na.rm=TRUE)/1e6,gbdtks)],
          maj_gbd(max(dpl[dpl$Region==x,grep("^burden",names(dpl))],na.rm=TRUE)/1e6))*
        1e6*Norm_fact,
    labels=c(gbdtks[2:findInterval(max(dpl[dpl$Region==x,grep("^burden",names(dpl))],na.rm=TRUE)/1e6,gbdtks)],
             maj_gbd(max(dpl[dpl$Region==x,grep("^burden",names(dpl))],na.rm=TRUE)/1e6)),
    region=x)}))
GBDtcks$col <- "2GBD"

In [32]:
#High-income countries, burden from 44 to 45
GBDtcks[GBDtcks$labels==44 & GBDtcks$region=="High-income",1] <- 
GBDtcks[GBDtcks$labels==44 & GBDtcks$region=="High-income",1] - (45-44)*1e6*Norm_fact
GBDtcks[GBDtcks$labels==44 & GBDtcks$region=="High-income",2] <- 45
#Sub-Saharia Africa, burden from 126 to 125
GBDtcks[GBDtcks$labels==126 & GBDtcks$region=="Sub-Saharian Africa",1] <- 
GBDtcks[GBDtcks$labels==126 & GBDtcks$region=="Sub-Saharian Africa",1] - (125-126)*1e6*Norm_fact
GBDtcks[GBDtcks$labels==126 & GBDtcks$region=="Sub-Saharian Africa",2] <- 125
#Sub-Saharian Africa, suppressing research for 1000
RCTtcks <- RCTtcks[!(RCTtcks$labels==1000 & RCTtcks$region=="Sub-Saharian Africa"),]
#Southeast Asia, burden from 82 to 80
GBDtcks[GBDtcks$labels==82 & GBDtcks$region=="Southeast Asia, East Asia and Oceania",1] <- 
GBDtcks[GBDtcks$labels==82 & GBDtcks$region=="Southeast Asia, East Asia and Oceania",1] - (80-82)*1e6*Norm_fact
GBDtcks[GBDtcks$labels==82 & GBDtcks$region=="Southeast Asia, East Asia and Oceania",2] <- 80
#South Asia, burden from 131 to 130
GBDtcks[GBDtcks$labels==131 & GBDtcks$region=="South Asia",1] <- 
GBDtcks[GBDtcks$labels==131 & GBDtcks$region=="South Asia",1] - (130-131)*1e6*Norm_fact
GBDtcks[GBDtcks$labels==131 & GBDtcks$region=="South Asia",2] <- 130
#Eastern Europe, burden from 54 to 55
GBDtcks[GBDtcks$labels==54 & GBDtcks$region=="Central Europe, Eastern Europe, and Central Asia",1] <- 
GBDtcks[GBDtcks$labels==54 & GBDtcks$region=="Central Europe, Eastern Europe, and Central Asia",1] - (55-54)*1e6*Norm_fact
GBDtcks[GBDtcks$labels==54 & GBDtcks$region=="Central Europe, Eastern Europe, and Central Asia",2] <- 55
#Latin America, burden from 16 to 15
GBDtcks[GBDtcks$labels==16 & GBDtcks$region=="Latin America and Caribbean",1] <- 
GBDtcks[GBDtcks$labels==16 & GBDtcks$region=="Latin America and Caribbean",1] - (15-16)*1e6*Norm_fact
GBDtcks[GBDtcks$labels==16 & GBDtcks$region=="Latin America and Caribbean",2] <- 15

In [33]:
RCTtcks$labels <- as.character(RCTtcks$label)
GBDtcks$labels <- as.character(GBDtcks$label)
tcks <- rbind(RCTtcks,GBDtcks)
tcks$col <- as.factor(tcks$col)

GGPLOT Object


In [34]:
p <- ggplot(DPLOT) +
        geom_rect(aes(xmin=xmin,xmax=xmax,ymin=ymin,ymax=ymax,fill=metr)) +
        geom_errorbar(aes(x=x,ymax=ymax,ymin=ymin),size=0.1,width=0.5,data=DPLOT_err) + 
        #Disease numbers
        geom_text(aes( x=xcent,
                        y=ycent,
                        label=dis_nb,
                        hjust=0.5),
                   size=DPLOT$size_dis_lab,
                   col="#42442E") + 
        theme_minimal() + 
        theme(  axis.title.y=element_blank(),
                axis.text.y=element_blank(),
                axis.ticks.y=element_blank(),
                axis.title.x=element_blank(),
                axis.ticks.x=element_blank()
                ) + 
        theme(legend.position = "none") + 
        scale_x_continuous(breaks = NULL,limits = c(0,max(DPLOT$xmax,na.rm=TRUE)+3)) +
        #Region labels
        geom_text(
                  aes(  x=length(dis)+IC+3,
                        y=xmin,
                        label=label,
                        angle=angle,
                        hjust=0.5,vjust=0),
                  data=familyLabelsDF,
                  size=4.3) + 
        #Colors burden and research
        scale_fill_manual(values = c("Burden"="orange","Research"="blue"))

In [35]:
#Tickmarks
p <- p+ scale_y_continuous(minor_breaks = tcks$breaks, breaks=tcks$breaks,
                      labels=rep("",nrow(tcks)),
                    limits=c(min(DPLOT$ymax,na.rm=TRUE)-d_reg/2,max(DPLOT_err$ymax,na.rm=TRUE)+d_reg/2)) + 
        theme(panel.grid.minor=element_line(color="#D3D3D3",size=0.1)) + 
        geom_text(
            aes(x=length(dis)+IC+1.5,
            y=breaks,
            label=labels,
            hjust=0.5),
            data=tcks,
            size=2,
            col=as.numeric(tcks$col))

In [36]:
ggsave(filename = "../Figures/polar_props_Patients_DALYs.pdf",
      plot = p + coord_polar(theta="y",start=alphaStart,direction=-1),
      width=12,height=12)


Warning message:
“Removed 21 rows containing missing values (geom_rect).”Warning message:
“Removed 21 rows containing missing values (geom_errorbar).”

In [ ]:


In [ ]: