In [36]:
source("https://raw.githubusercontent.com/eogasawara/mylibrary/master/myGraphics.R")
plot_size(4,3)
In [37]:
loadlibrary("TSPred")
loadlibrary("STMotif")
In [38]:
x <- 2 # variable assignment
x # variable evaluation
is.numeric(x) # variable
weight <- c(60, 72, 57, 90, 95, 72) # vector with six observations
height <- c(1.75, 1.80, 1.65, 1.90, 1.74, 1.91)
bmi <- weight/height^2
print(bmi)
print(sprintf("%.2f +/- %.2f", mean(bmi), sd(bmi)))
In [39]:
plot(height, weight)
In [40]:
plot(height, weight)
hh <- c(1.65, 1.70, 1.75, 1.80, 1.85, 1.90)
lines(hh, 22.5 * hh^2)
In [41]:
t.test(bmi, mu=22.5)
In [43]:
plot(height, weight, pch=2)
args(plot.default)
#?graphics::plot
In [44]:
x <- c(A=1, B=NA, C=3)
mean(x)
mean(x, na.rm=TRUE)
names(x)
x["B"] <- 2
x["B"]*x
Matrices can be filled from vectors or data frames.
It is possible to associate names for rows and columns.
Operations such as transpose, scalar product, matrix product ($\%$*$\%$), determinants are available.
Additional documentation can be found at https://www.statmethods.net/advstats/matrix.html.
In [45]:
m <- 1:9
dim(m) <- c(3,3)
m
mb <- matrix(1:9, nrow=3,byrow=TRUE)
rownames(mb) = LETTERS[1:3]
mb
t(m)
m*x
det(m)
In [46]:
pain = c(0,3,2,2,1)
fpain = factor(pain,levels=0:3)
levels(fpain) = c("none","mild","medium","severe")
fpain
as.numeric(fpain)
levels(fpain)
In [47]:
x = c(5260,5470,5640,6180,6390,
6515,6805,7515,7515,8230,8770)
y = c(3910,4220,3885,5160,5645,
4680,5265,5975,6790,6900,7335)
lst <- list(A=x, B=y)
lst
lst$A
In [48]:
d <- data.frame(A=lst$A,B=lst$B)
d
df <- d[d$A > 7000 | d$A < 6000,]
df
In [49]:
lapply(d, min, na.rm=TRUE)
sapply(d, min, na.rm=TRUE)
apply(d, 1, min)
apply(d, 2, min)
In [50]:
sort(d$B)
o <- order(d$B)
o
ds <- d[o,]
ds
In [51]:
wine = read.table("http://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data", header = TRUE, sep = ",")
head(wine)
save(wine, file="wine.RData")
rm(wine)
load("wine.RData")
write.table(wine, file="wine.csv", row.names=FALSE, quote = FALSE)
In [52]:
create_dataset <- function() {
data <- read.table(text = "Year Months Flights Delays
2016 Jan-Mar 11 6
2016 Apr-Jun 12 5
2016 Jul-Sep 13 3
2016 Oct-Dec 12 5
2017 Jan-Mar 10 4
2017 Apr-Jun 9 3
2017 Jul-Sep 11 4
2017 Oct-Dec 25 15
2018 Jan-Mar 14 3
2018 Apr-Jun 12 5
2018 Jul-Sep 13 3
2018 Oct-Dec 15 4",
header = TRUE,sep = "")
data$OnTime <- data$Flights - data$Delays
data$Perc <- round(100 * data$Delays / data$Flights)
return(data)
}
data <- create_dataset()
head(data)
In [53]:
loadlibrary("dplyr")
data_sd <- create_dataset() %>%
select(variable=Months, value=Delays) %>%
group_by(variable) %>%
summarize(sd = sd(value), value = mean(value))
data_sd$variable <- factor(data_sd$variable,
levels = c('Jan-Mar','Apr-Jun','Jul-Sep','Oct-Dec'))
head(data_sd)
Library $ggplot$ contains advanced graphics.
The $myGraphics.ipynb$ notebook has some examples of creating nice graphics using $ggplot$. Additional information can be found at https://nbviewer.jupyter.org/github/eogasawara/mylibrary/blob/master/myGraphics.ipynb.
In [54]:
loadlibrary("RColorBrewer")
col_set <- brewer.pal(11, 'Spectral')
grf <- plot.bar(data_sd, colors=col_set[2], alpha=0.5)
grf <- grf + geom_errorbar(
aes(x=variable, ymin=value-sd, ymax=value+sd),
width=0.2, colour=col_set[2], alpha=0.9, size=1.1)
plot(grf)
In [55]:
loadlibrary("reshape")
data <- create_dataset()
head(data)
data <- melt(data[,c('Year', 'Months', 'Flights', 'Delays', 'OnTime', 'Perc')],
id.vars = c(1,2))
head(data)
In [56]:
data$x <- sprintf("%d-%s", data$Year, data$Months)
data$x <- factor(data$x,levels = data$x[1:12])
grf <- plot.series(data %>% filter(variable %in% c('Flights', 'Delays')),
colors=col_set[c(4,2)])
grf <- grf + theme(axis.text.x = element_text(angle=45, hjust=1))
plot(grf)
In [57]:
stores <- data.frame(
city = c("Rio de Janeiro", "Sao Paulo", "Paris", "New York", "Tokyo"),
value = c(10, 12, 20, 25, 18))
head(stores)
divisions <- data.frame(
city = c("Rio de Janeiro", "Sao Paulo", "Paris", "New York", "Tokyo"),
country = c("Brazil", "Brazil", "France", "US", "Japan"))
head(divisions)
data <- merge(stores, divisions, by.x="city", by.y="city")
head(data)
result <- data %>% group_by(country) %>% summarize(count = n(), amount = sum(value))
head(result)
In [58]:
for (i in 1:nrow(result)) {
value <- result$amount[i]
if (result$count[i] > 1) {
value <- 0.8*value
}
print(sprintf("%6s - %.1f", result$country[i], value))
}
In [ ]: