In [ ]:
# From AML studio it looks like:
# library("AzureML")
# ws <- workspace()
# dat <- download.datasets(ws, "paho-who-cases-reported-2016-03-02.csv")

# Otherwise:

dat <- read.csv("paho-who-cases-reported-2016-03-02.csv")

In [ ]:
head(dat, n = 50)

In [ ]:
# install.packages('dplyr', repos = "http://cloud.r-project.org")

In [ ]:
library(dplyr)
summary(dat)
colnames(dat)

library(dplyr)

dat_susp <- filter(dat, Measure.Names == 'Suspected') #Suspected.cases > 0)
head(dat_susp)

dat_conf <- filter(dat, Measure.Names != 'Suspected')
head(dat_conf)

In [ ]:
# Interactive graphics
# replace the lib argument with your local user library
# install.packages(c("DT", "htmlwidgets"),
#                 repos = "http://cloud.r-project.org")

In [ ]:
# World map plotting packages
install.packages(c("rworldmap", "RColorBrewer"),
                repos = "http://cloud.r-project.org")

In [21]:
# Convert categorical to numerical - remove ','
susp.cases.num <- gsub(",", "", dat_susp$Measure.Values, fixed = T)
dat_susp$Measure.Values <- as.numeric(susp.cases.num)
head(dat_susp)


Out[21]:
Country...territoryMeasure.NamesMonth.of.DateReport.Epi.WeekYear.of.DateCountry...territory.1DateMeasure.ValuesLaboratory.confirmed.casesSuspected.casesISO3
1ArubaSuspectedJanuary12016ArubaJanuary, 2016000ABW
2ArubaSuspectedJanuary22016ArubaJanuary, 2016000ABW
3ArubaSuspectedJanuary32016ArubaJanuary, 2016000ABW
4ArubaSuspectedJanuary42016ArubaJanuary, 2016000ABW
5BarbadosSuspectedJanuary12016BarbadosJanuary, 2016030BRB
6BarbadosSuspectedJanuary22016BarbadosJanuary, 2016000BRB

In [20]:
library(rworldmap)
head(countrySynonyms)


Out[20]:
IDISO3name1name2name3name4name5name6name7name8
11afgAfghanistanIslamic State of AfghanistanNA
22alaAland IslandsNA
33albAlbaniaRepublic of AlbaniaNA
44dzaAlgeriaPeople's Democratic Republic of AlgeriaNA
55asmAmerican SamoaTerritory of American SamoaNA
66andAndorraPrincipality of AndorraNA

In [19]:
tmp <- aggregate(dat_susp$Measure.Values, by=list(Country...territory = dat_susp$Country...territory), FUN=sum)
head(tmp)


Out[19]:
Country...territoryx
1Aruba0
2Barbados277
3Brazil1490
4Colombia41094
5Costa Rica0
6Dominican Republic395

In [17]:
library(rworldmap)
library(RColorBrewer)


# Some renaming of Countries needs to happen because of non-standard names in dataset
dat_susp[which(dat_susp$Country...territory == 'Bolivia'), 
         "Country...territory"] <- 'Bolivia, Plurinational State of'
dat_susp[which(dat_susp$Country...territory == 'Bonaire'), "Country...territory"] <- 'Bonaire, Sint Eustatius and Saba'
dat_susp[which(dat_susp$Country...territory == 'Curacao'), "Country...territory"] <- 'Curaçao'
dat_susp[which(dat_susp$Country...territory == 'Saint Martin'), "Country...territory"] <- 'Saint Martin (French part)'
dat_susp[which(dat_susp$Country...territory == 'Sint Maarten'), "Country...territory"] <- 'Sint Maarten (Dutch part)'
dat_susp[which(dat_susp$Country...territory == 'United States Virgin Islands'), "Country...territory"] <- 'Virgin Islands, U.S.'
dat_susp[which(dat_susp$Country...territory == 
               'Venezuela'), "Country...territory"] <- 'Venezuela, Bolivarian Republic of'


dat_susp$ISO3 <- 'none'

head(dat_susp)


for (i in 1:nrow(dat_susp)) {
    name <- dat_susp[i, 'Country...territory']
    x <- countrySynonyms[which(countrySynonyms$name1 == name), 'ISO3']
    if (length(x) > 0) {
        dat_susp[i, 'ISO3'] <- toupper(x)
    } else {
        dat_susp[i, 'ISO3'] <- 'none'
    }
}

head(dat_susp)


Warning message:
In `[<-.factor`(`*tmp*`, iseq, value = c("Bolivia, Plurinational State of", : invalid factor level, NA generatedWarning message:
In `[<-.factor`(`*tmp*`, iseq, value = c("Bonaire, Sint Eustatius and Saba", : invalid factor level, NA generatedWarning message:
In `[<-.factor`(`*tmp*`, iseq, value = c("Curaçao", "Curaçao", : invalid factor level, NA generatedWarning message:
In `[<-.factor`(`*tmp*`, iseq, value = c("Saint Martin (French part)", : invalid factor level, NA generatedWarning message:
In `[<-.factor`(`*tmp*`, iseq, value = c("Sint Maarten (Dutch part)", : invalid factor level, NA generatedWarning message:
In `[<-.factor`(`*tmp*`, iseq, value = c("Virgin Islands, U.S.", : invalid factor level, NA generatedWarning message:
In `[<-.factor`(`*tmp*`, iseq, value = c("Venezuela, Bolivarian Republic of", : invalid factor level, NA generated
Out[17]:
Country...territoryMeasure.NamesMonth.of.DateReport.Epi.WeekYear.of.DateCountry...territory.1DateMeasure.ValuesLaboratory.confirmed.casesSuspected.casesISO3
1ArubaSuspectedJanuary12016ArubaJanuary, 2016000none
2ArubaSuspectedJanuary22016ArubaJanuary, 2016000none
3ArubaSuspectedJanuary32016ArubaJanuary, 2016000none
4ArubaSuspectedJanuary42016ArubaJanuary, 2016000none
5BarbadosSuspectedJanuary12016BarbadosJanuary, 2016030none
6BarbadosSuspectedJanuary22016BarbadosJanuary, 2016000none
Out[17]:
Country...territoryMeasure.NamesMonth.of.DateReport.Epi.WeekYear.of.DateCountry...territory.1DateMeasure.ValuesLaboratory.confirmed.casesSuspected.casesISO3
1ArubaSuspectedJanuary12016ArubaJanuary, 2016000ABW
2ArubaSuspectedJanuary22016ArubaJanuary, 2016000ABW
3ArubaSuspectedJanuary32016ArubaJanuary, 2016000ABW
4ArubaSuspectedJanuary42016ArubaJanuary, 2016000ABW
5BarbadosSuspectedJanuary12016BarbadosJanuary, 2016030BRB
6BarbadosSuspectedJanuary22016BarbadosJanuary, 2016000BRB

In [18]:
sPDF <- joinCountryData2Map(dF = dat_susp, joinCode = "ISO3", nameJoinColumn = "ISO3")

# using your green colours

numCats <- 10 #set number of categories to use
palette = colorRampPalette(brewer.pal(n=9, name='Greens'))(numCats)
mapCountryData(sPDF, 
               mapTitle = 'Zika Virus Suspected Cases 2015-2016 by Country',
               nameColumnToPlot="Measure.Values",
               catMethod="fixedWidth", 
               numCats=numCats, 
               colourPalette=palette)


431 codes from your data successfully matched countries in the map
177 codes from your data failed to match with a country code in the map
221 codes from the map weren't represented in your data

In [ ]:
print('hello Berlin!')

In [ ]:
# install.packages('plotly', repos = "http://cloud.r-project.org")

In [ ]:
# Work in progress...

# # Scatter Plot
# library(plotly)

# set.seed(123)

# x <- rnorm(1000)
# y <- rchisq(1000, df = 1, ncp = 0)
# group <- sample(LETTERS[1:5], size = 1000, replace = T)
# size <- sample(1:5, size = 1000, replace = T)

# ds <- data.frame(x, y, group, size)

# p <- plot_ly(ds, x = x, y = y, mode = "markers", group = group, size = size) %>%
#   layout(title = "Scatter Plot")
# embed_notebook(p)

In [ ]:
# library(plotly)

# set.seed(100)
# d <- diamonds[sample(nrow(diamonds), 1000), ]
# p <- plot_ly(d, x = carat, y = price, text = paste("Clarity: ", clarity),
#         mode = "markers", color = carat, size = carat)

# embed_notebook(p)

In [ ]: