In [ ]:
library(forecast)
library(tseries)
In [ ]:
loadData <- function(dataFolder) {
files <- list.files(dataFolder)
data <- list()
for(file in files) {
df <- read.csv(paste0(dataFolder, "/", file), stringsAsFactors=F)
minYear <- min(df$Year)
complaintType <- substr(file,1,(nchar(file))-4)
tsObject <- ts(df$Complaints, start=c(minYear, 1), frequency = 12)
data[[complaintType]] <- tsObject
}
data
}
data <- loadData("../../data/topNComplaints")
# change dates if data changes
train_stop <- c(2015, 6)
test_start <- c(2015, 7)
In [ ]:
series <- data[["Dog menace "]]
series
In [ ]:
# data before 2012 are too few to consider
train_start <- c(2012, 4)
series <- window(series, start=train_start, end=c(2016, 9))
series
In [ ]:
tsdisplay(series)
In [ ]:
plot(series, col="red", lty=2)
lines(tsclean(series), lty=1)
legend("topright", col=c("red", "black"), lty=c(2,1), legend=c("Original", "Cleaned"))
In [ ]:
series.cleaned <- tsclean(series)
There are no significant outliers, and the data is clean.
In [ ]:
#Comparing the seasonality of entire dataset and the training data
plot(stl(series, s.window="periodic"))
# The series does look like it has a seasonal component - let's take a look at that.
plot(stl(window(series,end = c(2015,6)), s.window="periodic"))
# The series does look like it has a seasonal component - let's take a look at that.
In [ ]:
plot(stl(series, s.window=6)) # change s.window to something that make sense
In [ ]:
# let's take a look at which month this series peaks
# looks like there are two spikes, one in march and the other in september
# the data does contain another spike in between the two - which isn't apparent in the
# seasonal component
seasonal <- stl(series, s.window=6)$time.series[, 1] # change s.window
plot(seasonal, col="grey")
month1 <- 9 # september
month2 <- 3 # march
for(i in 2012:2016) {
abline(v=(month1-1)/12 + i, lty=2)
abline(v=(month2-1)/12 + i, lty=3)
}
In [ ]:
# let's superimpose this on the original data, to see if this effect
# is significant
plot(series, col="grey")
month1 <- 9
month2 <- 3
for(i in 2012:2016) {
abline(v=(month1-1)/12 + i, lty=2)
abline(v=(month2-1)/12 + i, lty=3)
}
In [ ]:
# this series looks like it fits the data well - since the seasonal component does seem to increase as time progresses
# let's set s.window = "periodic"
stl.fit <- stl(series, s.window="periodic")
series.adj <- seasadj(stl.fit)
tsdisplay(series.adj)
In [ ]:
stl.cleaned.fit <- stl(series.cleaned, s.window=6)
series.cleaned.adj <- seasadj(stl.cleaned.fit)
tsdisplay(series.cleaned.adj)
In [ ]:
Acf(series.adj)
In [ ]:
# the above series is a classic example of a series that requires a diff of order 1,
# so let's try that out and take a look at the Acf to see if it is overdifferenced
tsdisplay(diff(series.adj, lag=1, differences = 1))
In [ ]:
# looks like the series has a strong, negative ACF at lag12
# indicating a possible seasonality?
# let's also look at d=2
tsdisplay(diff(series.adj, lag = 1, differences = 2))
# this is clearly overdifferenced, so d <= 1
In [ ]:
# take a look at standard-deviation
sd.0 <- sd(series.adj)
sd.1 <- sd(diff(series.adj, differences = 1))
sd.2 <- sd(diff(series.adj, differences = 2))
print(paste0("SD with d = 0: ", sd.0, ", SD with d = 1: ", sd.1, ", SD with d = 2: ", sd.2))
# in terms of sd, d=1 is a better fit
In [ ]:
series.diff <- diff(series.adj, lag=1, differences = 1)
In [ ]:
plot(series.diff, col="grey")
# a 2x4 MA
lines(ma(ma(series.diff, order=2), order=4))
abline(mean(series.diff), 0, col="blue", lty=2)
In [ ]:
ndiffs(series.adj)
Next, we need to estimate p and q. To do this, we take a look at the PACF of the data. Note that this analysis is done on the differenced data. If we decide to fit a model with d=0, then we need to perform this analysis for the un-differenced data as well
In [ ]:
# let d=0 first
# looks like a AR(1), MA(4)
Pacf(series.adj)
In [ ]:
# let's try with d=1
# looks like MA(12) process, possibly an AR(3)
Pacf(series.diff)
In [ ]:
modelArima <- function(series, order, h, testData = NULL, lambda=NULL) {
fit <- Arima(series, order=order, lambda = lambda)
print(summary(fit))
predictions <- forecast(fit, h, lambda = lambda)
# compute max and min y
min.yvalue <- min(min(series), min(testData))
max.yvalue <- max(max(series), max(testData))
plot(predictions, ylim=c(min.yvalue, max.yvalue))
if(!is.null(testData)) {
lines(testData, col="red", lty=2)
print(accuracy(predictions, testData))
}
# check if residuals looklike white noise
Acf(residuals(fit), main="Residuals")
# portmantaeu test
print(Box.test(residuals(fit), lag=24, fitdf=4, type="Ljung"))
}
In [ ]:
# split the series into a test and a train set
series.train <- window(series.adj, end=train_stop)
series.test <- window(series.adj, start=test_start)
In [ ]:
modelArima(series.train, c(1, 0, 4), length(series.test), series.test)
In [ ]:
modelArima(series.train, c(3, 1, 12), length(series.test), series.test)
In [ ]:
adf.test(series)
In [ ]:
adf.test(series.adj)
In [ ]:
adf.test(series.diff)
In [ ]:
# let's try a BoxCox transform
est.lambda <- BoxCox.lambda(series.diff)
est.lambda
In [ ]:
# this is for diffed
adf.test(BoxCox(series.diff, lambda = est.lambda))
In [ ]:
# print the series out once
tsdisplay(BoxCox(series.diff, lambda = est.lambda))
In [ ]:
# let's retry the models with lambda
modelArima(series.train, c(1, 0, 4), length(series.test), series.test, lambda = BoxCox.lambda(series.adj))
In [ ]:
modelArima(series.train, c(3, 1, 12), length(series.test), series.test, est.lambda)
In [ ]:
# series = original data
# series.cleaned = outliers removed
# series.adj = original data, seasonally adjusted
# series.cleaned.adj = cleaned data, seasonally adjusted
# series.train = original seasonally adjusted data's train split
# series.test = original seasonally adjusted data's test split
# series.cleaned.train = cleaned seasonally adjusted data's train split
# series.cleaned.test = cleaned seasonally adjusted data's test split
# stl.fit = original data's stl
# stl.cleaned.fit = cleaned data's stl
# tsdisplay(series.adj)
train_start = c(2012,4)
train_end = c(2015,6)
test_start = c(2015, 7)
test_end = c(2016, 6)
seasonal = stl.fit[[1]][,1]
seasonal_cleaned = stl.cleaned.fit[[1]][,1]
In [ ]:
## Function for finding the average of seasonal components
period_stat <- function(ts_data_in, type = 1, start_value, years){
#type 1: sum
#type 2: mean
freq <- frequency(ts_data_in)
len <- length(ts_data_in)
freq_vector <- numeric(0)
freq_sum <- numeric(0)
vec <- numeric(0)
sum_vec <- numeric(0)
start_val <- start(ts_data_in)
ts_data_in <- c(rep(NA,start_val[2] - 1),ts_data_in)
max_limit <- ceiling(len/freq)
for(i in 1:max_limit){
vec <- ts_data_in[(((i-1)*freq)+1):(((i-1)*freq)+freq)]
freq_vector <- as.numeric(!is.na(vec))
vec[is.na(vec)] <- 0
if(i == 1){
sum_vec <- vec
freq_sum <- freq_vector
}else{
sum_vec <- sum_vec + vec
freq_sum <- freq_sum + freq_vector
}
}
final_ts <- numeric(0)
if(type == 1)
{
final_ts <- sum_vec
}else if(type == 2) {
final_ts <- (sum_vec/freq_sum)
} else {
stop("Invalid type")
}
return(ts(rep(final_ts,years),frequency = freq, start = start_value ))
}
In [ ]:
#Adjust the negative values in the ts data
min_ts_value <- min(series.adj)
min_ts_cleaned_value <- min(series.cleaned.adj)
bias_value <- (-1*min_ts_value) + 1
bias_value_cleaned <- (-1*min_ts_cleaned_value) + 1
#min(series)
#min(series.cleaned)
#min(series.adj)
#min(series.cleaned.adj)
ES_series <- series.adj + bias_value
ES_series_cleaned <- series.cleaned.adj + bias_value_cleaned
#plot(ES_series)
train_data_adj <- window(ES_series,start = train_start, end=train_end)
test_data_adj <- window(ES_series, start= test_start, end = test_end)
train_data_adj_cleaned <- window(ES_series_cleaned,start = train_start, end = train_end)
test_data_adj_cleaned <- window(ES_series_cleaned, start = test_start, end = test_end)
train_data <- window(series, start = train_start, end = train_end)
test_data <- window(series, start = test_start, end = test_end)
train_data_cleaned <- window(series.cleaned, start = train_start, end = train_end)
test_data_cleaned <- window(series.cleaned, start = test_start, end = test_end)
In [ ]:
#Getting the mean value from the seasonal components for the data set and not for the training set alone.
#Need to adjust based on the input from Suchana.
seasonal_mean <- period_stat(seasonal,2,c(2012,1),years = 7)
seasonal_cleaned_mean <- period_stat(seasonal_cleaned,2,c(2012,1),years = 7)
In [ ]:
#Preprocessing data. Removing 0 from the data
train_data_adj[train_data_adj==0]=0.01
train_data_adj_cleaned[train_data_adj_cleaned==0]=0.01
In [ ]:
all_types = c("ANN","AAN","AAA","ANA","MNN","MAN","MNA","MAA","MMN","MNM","MMM","MAM")
forecast_values = 12
# For eg: AAA -> additive level, additive trend and additive seasonality
# ANN -> No trend or seasonality
In [ ]:
fit_function <- function(train_data, test_data)
{
all_fit <- list()
test_models <- list()
print("Fitting various models: ")
for (bool in c(TRUE,FALSE)){
for (model_type in all_types){
if(bool & substr(model_type,2,2)=="N"){
next
}
test_model = ets(train_data, model = model_type,damped = bool)
#Box.test(test_model$residuals, lag = 20, type = "Ljung-Box")$p.value
all_fit[[paste0("ETS Model: ",model_type,", Damped: ",bool)]][1] <-
accuracy(f = forecast.ets(test_model,h=forecast_values)$mean,x = test_data)[5]
all_fit[[paste0("ETS Model: ",model_type,", Damped: ",bool)]][2] <-
100*(Box.test(test_model$residuals, lag = 20, type = "Ljung-Box")$p.value)
test_models[[paste0("ETS Model: ",model_type,", Damped: ",bool)]] <- test_model
print(test_model$method)
print(accuracy(f = forecast.ets(test_model,h=forecast_values)$mean, x = test_data)[5])
print("")
#Excluding the models which has auto correlated residuals @ 10% significance level
}
}
return(list(all_fit,test_models))
}
In [ ]:
# Fitting the models for all types of data - Original, cleaned, seasonally adjusted, cleaned - seasonally adjusted
models_adj <- fit_function(train_data_adj,test_data_adj) #Seasonally adjusted data
models_adj_cleaned <- fit_function(train_data_adj_cleaned,test_data_adj_cleaned) #Seasonally adjusted, cleaned(with outliers being removed) data
models <- fit_function(train_data,test_data) #Original data
models_cleaned <- fit_function(train_data_cleaned, test_data_cleaned) #Original, cleaned data
In [ ]:
all_fit_adj <- models_adj[[1]]
test_models_adj <- models_adj[[2]]
all_fit_adj_cleaned<- models_adj_cleaned[[1]]
test_models_adj_cleaned <- models_adj_cleaned[[2]]
all_fit <- models[[1]]
test_models <- models[[2]]
all_fit_cleaned <- models_cleaned[[1]]
test_models_cleaned <- models_cleaned[[2]]
In [ ]:
#Finding the best fit
proper_models <- all_fit_adj
best_mape <- min(sapply(proper_models,function(x)x[1]))
best_model <- names(which.min(sapply(proper_models,function(x)x[1])))
print(paste0("Best Model:",best_model))
print(paste0("Best Mape: ",best_mape))
#Finding top n fits
Top_n <- 3
if(length(proper_models)<3){Top_n <- length(proper_models)}
top_mape_val <- proper_models[order(sapply(proper_models, function(x)x[1]))][1:Top_n]
top_models_adj <- names(top_mape_val)
top_mape_val
seasonal_mean
In [ ]:
#Finding the best fit
proper_models <- all_fit_adj_cleaned
best_mape <- min(sapply(proper_models,function(x)x[1]))
best_model <- names(which.min(sapply(proper_models,function(x)x[1])))
print(paste0("Best Model:",best_model))
print(paste0("Best Mape: ",best_mape))
#Finding top n fits
#top_models <- c()
Top_n <- 3
if(length(proper_models)<3){Top_n <- length(proper_models)}
top_mape_val <- proper_models[order(sapply(proper_models, function(x)x[1]))][1:Top_n]
top_models_adj_cleaned <- names(top_mape_val)
top_mape_val
seasonal_cleaned_mean
In [ ]:
#Finding the best fit
proper_models <- all_fit
best_mape <- min(sapply(proper_models,function(x)x[1]))
best_model <- names(which.min(sapply(proper_models,function(x)x[1])))
print(paste0("Best Model:",best_model))
print(paste0("Best Mape: ",best_mape))
#Finding top n fits
Top_n <- 3
if(length(proper_models)<3){Top_n <- length(proper_models)}
top_mape_val <- proper_models[order(sapply(proper_models, function(x)x[1]))][1:Top_n]
top_models <- names(top_mape_val)
top_mape_val
seasonal_cleaned_mean
In [ ]:
#Finding the best fit
proper_models <- all_fit_cleaned
best_mape <- min(sapply(proper_models,function(x)x[1]))
best_model <- names(which.min(sapply(proper_models,function(x)x[1])))
print(paste0("Best Model:",best_model))
print(paste0("Best Mape: ",best_mape))
#Finding top n fits
Top_n <- 3
if(length(proper_models)<3){Top_n <- length(proper_models)}
top_mape_val <- proper_models[order(sapply(proper_models, function(x)x[1]))][1:Top_n]
top_models_cleaned <- names(top_mape_val)
top_mape_val
seasonal_cleaned_mean
In [ ]:
plot(ES_series,col = "black")
lines(test_data_adj, col = "blue")
lines(forecast.ets(test_models_adj[[top_models_adj[1]]],h=12)$mean, col = "red") #Top model
lines(forecast.ets(test_models_adj[[top_models_adj[2]]],h=12)$mean, col = "green") #Top second model
lines(forecast.ets(test_models_adj[[top_models_adj[3]]],h=12)$mean, col = "yellow") #Top third model
legend("topleft", lty=1,col = c("blue","red","green","yellow"),
c("Test data", "Best model", "Second best", "Third best"))
#Observation: Unusual peak at December'15. To check if it is an anomaly
In [ ]:
plot(ES_series_cleaned,col = "black")
lines(test_data_adj_cleaned, col = "blue")
lines(forecast.ets(test_models_adj_cleaned[[top_models_adj_cleaned[1]]],h=12)$mean, col = "red") #Top model
lines(forecast.ets(test_models_adj_cleaned[[top_models_adj_cleaned[2]]],h=12)$mean, col = "green") #Top second model
lines(forecast.ets(test_models_adj_cleaned[[top_models_adj_cleaned[3]]],h=12)$mean, col = "yellow") #Top third model
legend("topleft", lty=1,col = c("blue","red","green","yellow"),
c("Test data", "Best model", "Second best", "Third best"))
#Observation: Unusual peak at December'15. To check if it is an anomaly
In [ ]:
#all_fit
#test_models[[all_fit[1]]]
plot(series,col = "black")
lines(test_data, col = "blue")
top_models[1]
accuracy(test_models[[top_models[1]]])
top_models[2]
accuracy(test_models[[top_models[2]]])
top_models[3]
accuracy(test_models[[top_models[3]]])
lines(forecast.ets(test_models[[top_models[1]]],h=12)$mean, col = "red") #Top model
lines(forecast.ets(test_models[[top_models[2]]],h=12)$mean, col = "green") #Top second model
lines(forecast.ets(test_models[[top_models[3]]],h=12)$mean, col = "yellow") #Top third model
legend("topleft", lty=1,col = c("blue","red","green","yellow"),
c("Test data", "Best model", "Second best", "Third best"))
#Observation: Unusual peak at December'15. To check if it is an anomaly
In [ ]:
#plot(forecast.ets(test_models_cleaned[[top_models_cleaned[1]]],h=12))
accuracy(test_models_cleaned[[top_models_cleaned[1]]])
accuracy(test_models_cleaned[[top_models_cleaned[2]]])
accuracy(test_models_cleaned[[top_models_cleaned[3]]])
plot(series.cleaned,col = "black")
lines(test_data_cleaned, col = "blue")
lines(forecast.ets(test_models_cleaned[[top_models_cleaned[1]]],h=12)$mean, col = "red") #Top model
lines(forecast.ets(test_models_cleaned[[top_models_cleaned[2]]],h=12)$mean, col = "green") #Top second model
lines(forecast.ets(test_models_cleaned[[top_models_cleaned[3]]],h=12)$mean, col = "yellow") #Top third model
legend("topleft",lty=c(1,1,1,1,2),col = c("blue","red","green","yellow","brown"),
c("Test data(cleaned)", "Best model", "Second best", "Third best"))
#Observation: Unusual peak at December'15. To check if it is an anomaly
In [ ]:
print("Case 1: Seasonally adjusted data")
#Adding the bias value which was added to overcome the negative values
ES_series_bias <- ES_series - bias_value
test_series_bias <- test_data_adj - bias_value
forecast1_bias <- forecast.ets(test_models_adj[[top_models_adj[1]]],h=12)$mean - bias_value
forecast2_bias <- forecast.ets(test_models_adj[[top_models_adj[2]]],h=12)$mean - bias_value
forecast3_bias <- forecast.ets(test_models_adj[[top_models_adj[3]]],h=12)$mean - bias_value
#Adding back the seasonal value from stl decomposition
ES_value_adj <- ES_series_bias + seasonal
test_series_adj <- test_series_bias + seasonal
#Adding back the mean seasonal component to the forecasted data
forecast1_adj <- forecast1_bias + seasonal_mean
forecast2_adj <- forecast2_bias + seasonal_mean
forecast3_adj <- forecast3_bias + seasonal_mean
#Calculating the accuracy of the training data
accuracy(test_models_adj[[top_models_adj[1]]])
accuracy(test_models_adj[[top_models_adj[2]]])
accuracy(test_models_adj[[top_models_adj[3]]])
In [ ]:
#Checking the MAPE values with original data
print(paste0("Top model: ", top_models_adj[1]))
accuracy(forecast1_adj,test_series_adj)
print(paste0("Top model: ", top_models_adj[2]))
accuracy(forecast2_adj,test_series_adj)
print(paste0("Top model: ", top_models_adj[3]))
accuracy(forecast3_adj,test_series_adj)
#accuracy(test_data, forecast.ets(test_models[[top_models[3]]],h=12)$mean )
In [ ]:
print("Case 2: Seasonally adjusted & cleaned data")
#Adding the bias value which was added to overcome the negative values
ES_series_bias_cleaned <- ES_series_cleaned - bias_value_cleaned
test_series_bias_cleaned <- test_data_adj_cleaned - bias_value_cleaned
forecast1_bias <- forecast.ets(test_models_adj_cleaned[[top_models_adj_cleaned[1]]],h=12)$mean - bias_value_cleaned
forecast2_bias <- forecast.ets(test_models_adj_cleaned[[top_models_adj_cleaned[2]]],h=12)$mean - bias_value_cleaned
forecast3_bias <- forecast.ets(test_models_adj_cleaned[[top_models_adj_cleaned[3]]],h=12)$mean - bias_value_cleaned
#Adding back the seasonal value from stl decomposition
ES_value_adj_cleaned <- ES_series_bias_cleaned + seasonal_cleaned
test_series_adj_cleaned <- test_series_bias_cleaned + seasonal_cleaned
#Adding back the mean seasonal component to the forecasted data
forecast1_adj_cleaned <- forecast1_bias + seasonal_cleaned_mean
forecast2_adj_cleaned <- forecast2_bias + seasonal_cleaned_mean
forecast3_adj_cleaned <- forecast3_bias + seasonal_cleaned_mean
#Calculating the accuracy of the training data
accuracy(test_models_adj_cleaned[[top_models_adj_cleaned[1]]])
accuracy(test_models_adj_cleaned[[top_models_adj_cleaned[2]]])
accuracy(test_models_adj_cleaned[[top_models_adj_cleaned[3]]])
In [ ]:
#Checking the MAPE values with original data
print(paste0("Top model: ", top_models_adj_cleaned[1]))
accuracy(forecast1_adj_cleaned,test_series_adj_cleaned)
print(paste0("Top model: ", top_models_adj_cleaned[2]))
accuracy(forecast2_adj_cleaned,test_series_adj_cleaned)
print(paste0("Top model: ", top_models_adj_cleaned[3]))
accuracy(forecast3_adj_cleaned,test_series_adj_cleaned)
top_models
#accuracy(forecast.ets(test_models[[top_models[1]]],h=12)$mean, test_data)
#accuracy(test_data, forecast.ets(test_models[[top_models[3]]],h=12)$mean)
In [ ]:
#Ljung Box test - One of the checks to perform stationarity of TS data
# A small function
residual_analyis <- function(model_name){
print(model_name)
print(Box.test(test_models[[model_name]]$residuals, lag = 20, type = "Ljung-Box"))
#p_value <- Box.test(test_models[[model_name]]$residuals, lag = 20, type = "Ljung-Box")
Acf(test_models[[model_name]]$residuals, main = model_name)
}
In [ ]:
#Case 1: Seasonally adjusted models
#Residual Analysis for top three models
residual_analyis(top_models_adj[1]) #Top model
residual_analyis(top_models_adj[2]) #Second best model
residual_analyis(top_models_adj[3]) #Third best model
In [ ]:
#Case 2 - Seasonally adjusted cleaned models
#Residual Analysis for top three models
residual_analyis(top_models_adj_cleaned[1]) #Top model
residual_analyis(top_models_adj_cleaned[2]) #Second best model
residual_analyis(top_models_adj_cleaned[3]) #Third best model
In [ ]:
#Case 3 - Models on original data
#Residual Analysis for top three models
residual_analyis(top_models[1]) #Top model
residual_analyis(top_models[2]) #Second best model
residual_analyis(top_models[3]) #Third best model
In [ ]:
#Case 4 - Models on original data
#Residual Analysis for top three models
residual_analyis(top_models_cleaned[1]) #Top model
residual_analyis(top_models_cleaned[2]) #Second best model
residual_analyis(top_models_cleaned[3]) #Third best model
In [ ]:
plot(ES_value_adj,col = "black", ylab = "No of complaints",
main = "Model with seasonal adjustment")
lines(test_series_adj, col = "blue") #Original test data
accuracy(forecast1_adj,test_series_adj)
accuracy(forecast2_adj,test_series_adj)
accuracy(forecast3_adj,test_series_adj)
lines(test_series_bias + seasonal_mean, col = "brown", lty =2) #Deseasonlised data with average seasonal component applied
lines(forecast1_adj, col = "red") #Top model
lines(forecast2_adj, col = "green") #Top second model
lines(forecast3_adj, col = "yellow") #Top third model
legend("topleft",lty=c(1,1,1,1,2),col = c("blue","red","green","yellow","brown"),
c("Test data", "Best model", "Second best", "Third best","test data with seasonal mean"))
In [ ]:
plot(ES_value_adj_cleaned,col = "black", ylab = "No of complaints",
main = "Model with seasonal adjustment and cleaning")
lines(test_series_adj_cleaned, col = "blue") #Original test data
accuracy(forecast1_adj_cleaned,test_series_adj_cleaned)
accuracy(forecast2_adj_cleaned,test_series_adj_cleaned)
accuracy(forecast3_adj_cleaned,test_series_adj_cleaned)
lines(test_series_bias_cleaned + seasonal_cleaned_mean, col = "brown", lty =2) #Deseasonlised data with average seasonal component applied
lines(forecast1_adj_cleaned, col = "red") #Top model
lines(forecast2_adj_cleaned, col = "green") #Top second model
lines(forecast3_adj_cleaned, col = "yellow") #Top third model
legend("topleft",lty=c(1,1,1,1,2),col = c("blue","red","green","yellow","brown"),
c("Test data", "Best model", "Second best", "Third best","test data with seasonal mean"))
In [ ]:
plot(series,col = "black", ylab = "No of complaints",
main = "Model with original data")
lines(test_data, col = "blue") #Originayl test data
accuracy(forecast.ets(test_models[[top_models[1]]],h=12)$mean,test_data)
accuracy(forecast.ets(test_models[[top_models[2]]],h=12)$mean,test_data)
accuracy(forecast.ets(test_models[[top_models[3]]],h=12)$mean,test_data)
lines(forecast.ets(test_models[[top_models[1]]],h=12)$mean, col = "red") #Top model
lines(forecast.ets(test_models[[top_models[2]]],h=12)$mean, col = "green") #Top second model
lines(forecast.ets(test_models[[top_models[3]]],h=12)$mean, col = "yellow") #Top third model
legend("topleft", lty=1,col = c("blue","red","green","yellow"),
c("Test data", "Best model", "Second best", "Third best"))
#Observation: Unusual peak at December'15. To check if it is an anomaly
legend("topleft",lty=c(1,1,1,1,2),col = c("blue","red","green","yellow","brown"),
c("Test data", "Best model", "Second best", "Third best","test data with seasonal mean"))
In [ ]:
plot(series.cleaned,col = "black", main = "Model with cleaned data")
lines(test_data_cleaned, col = "blue")
accuracy(forecast.ets(test_models_cleaned[[top_models_cleaned[1]]],h=12)$mean,test_data_cleaned)
accuracy(forecast.ets(test_models_cleaned[[top_models_cleaned[2]]],h=12)$mean,test_data_cleaned)
accuracy(forecast.ets(test_models_cleaned[[top_models_cleaned[3]]],h=12)$mean,test_data_cleaned)
lines(forecast.ets(test_models_cleaned[[top_models_cleaned[1]]],h=12)$mean, col = "red") #Top model
lines(forecast.ets(test_models_cleaned[[top_models_cleaned[2]]],h=12)$mean, col = "green") #Top second model
lines(forecast.ets(test_models_cleaned[[top_models_cleaned[3]]],h=12)$mean, col = "yellow") #Top third model
legend("topleft",lty=c(1,1,1,1,2),col = c("blue","red","green","yellow","brown"),
c("Test data(cleaned)", "Best model", "Second best", "Third best","Actual test data"))
#Observation: Unusual peak at December'15. To check if it is an anomaly
In [ ]: