In [ ]:
library(forecast)
library(tseries)
In [ ]:
loadData <- function(dataFolder) {
files <- list.files(dataFolder)
data <- list()
for(file in files) {
df <- read.csv(paste0(dataFolder, "/", file), stringsAsFactors=F)
minYear <- min(df$Year)
complaintType <- substr(file,1,(nchar(file))-4)
tsObject <- ts(df$Complaints, start=c(minYear, 1), frequency = 12)
data[[complaintType]] <- tsObject
}
data
}
data <- loadData("../../data/topNComplaints")
In [ ]:
series <- data[["Mosquito menace "]]
series
In [ ]:
tsdisplay(series)
In [ ]:
# data before 2012 are too few to consider
train_start <- c(2012, 4)
series <- window(series, start= train_start, end=c(2016, 6))
tsdisplay(series)
In [ ]:
plot(series, col="red", lty=2)
lines(tsclean(series), lty=1)
legend("topright", col=c("red", "black"), lty=c(2,1), legend=c("Original", "Cleaned"))
Let's create the cleaned series. For initial analysis we will use both series, one cleaned, and other other left as is. For fitting time series models, we will stick to the cleaned series
In [ ]:
series.cleaned <- tsclean(series)
In [ ]:
plot(stl(series, s.window="periodic"))
# those two spikes in the seasonal component is pronounced probably due to the outliers, so for estimating
# the seasonal component it would be better to look at the cleaned adata
#Comparing the plot with the decomposition of training data alone
plot(stl(window(series,end = c(2015,6)) , s.window="periodic"))
In [ ]:
# let's fiddle with the s.window parameter
plot(stl(series, s.window=6))
In [ ]:
# now take a look at the cleaned series
plot(stl(series.cleaned, s.window=6))
# this is much more regular, especially the seasonal component.
In [ ]:
# let's take a look at which month this series peaks
seasonal <- stl(series.cleaned, s.window=6)$time.series[, 1] # change s.window
plot(seasonal, col="grey")
month <- 11 # change this to month you want
for(i in 2012:2016) {
abline(v=(month-1)/12 + i, lty=2)
}
In [ ]:
# this series looks like it fits the data well - since the seasonal component does seem to increase as time progresses
# let's set s.window = 6
stl.fit <- stl(series, s.window=6)
series.adj <- seasadj(stl.fit)
tsdisplay(series.adj)
In [ ]:
stl.cleaned.fit <- stl(series.cleaned, s.window=6)
series.cleaned.adj <- seasadj(stl.cleaned.fit)
tsdisplay(series.cleaned.adj)
In [ ]:
Acf(series.adj)
In [ ]:
# the above series is a classic example of a series that requires a diff of order 1,
# so let's try that out and take a look at the Acf to see if it is overdifferenced
tsdisplay(diff(series.adj, lag=1, differences = 1))
In [ ]:
# looks like the series has a strong, positive ACF at lag 12
# it's possible that this series still has a seasonal component
# let's also look at d=2
tsdisplay(diff(series, lag = 1, differences = 2))
In [ ]:
# take a look at standard-deviation
sd.0 <- sd(series.adj)
sd.1 <- sd(diff(series.adj, differences = 1))
sd.2 <- sd(diff(series.adj, differences = 2))
print(paste0("SD with d = 0: ", sd.0, ", SD with d = 1: ", sd.1, ", SD with d = 2: ", sd.2))
# in terms of sd, d=1 is a better fit
In [ ]:
ndiffs(series.adj)
In [ ]:
series.diff <- diff(series.adj, lag=1, differences = 1)
In [ ]:
plot(series.diff, col="grey")
# a 2x4 MA
lines(ma(ma(series.diff, order=2), order=4))
abline(mean(series.diff), 0, col="blue", lty=2)
Next, we need to estimate p and q. To do this, we take a look at the PACF of the data. Note that this analysis is done on the differenced data. If we decide to fit a model with d=0, then we need to perform this analysis for the un-differenced data as well
In [ ]:
# let d=0 first
# looks like a AR(1), MA(12)
Pacf(series.adj)
In [ ]:
# let's try with d=1
# looks like AR(11), MA(4) process
Pacf(series.diff)
In [ ]:
modelArima <- function(series, order, h, testData = NULL) {
fit <- Arima(series, order=order)
print(summary(fit))
predictions <- forecast(fit, h)
# compute max and min y
min.yvalue <- min(min(series), min(testData))
max.yvalue <- max(max(series), max(testData))
plot(predictions, ylim=c(min.yvalue, max.yvalue))
if(!is.null(testData)) {
lines(testData, col="red", lty=2)
print(accuracy(predictions, testData))
}
# check if residuals looklike white noise
Acf(residuals(fit), main="Residuals")
# portmantaeu test
print(Box.test(residuals(fit), lag=24, fitdf=4, type="Ljung"))
}
In [ ]:
# split the series into a test and a train set
series.train <- window(series.adj, end=c(2015, 6))
series.test <- window(series.adj, start=c(2015, 7))
In [ ]:
# with d=0, p=3, q=6
modelArima(series.train, c(1, 0, 12), length(series.test), series.test)
In [ ]:
# with d=1, p=0, q=2
modelArima(series.train, c(11, 1, 5), length(series.test), series.test)
In [ ]:
# series = original data
# series.cleaned = outliers removed
# series.adj = original data, seasonally adjusted
# series.cleaned.adj = cleaned data, seasonally adjusted
# series.train = original seasonally adjusted data's train split
# series.test = original seasonally adjusted data's test split
# series.cleaned.train = cleaned seasonally adjusted data's train split
# series.cleaned.test = cleaned seasonally adjusted data's test split
# stl.fit = original data's stl
# stl.cleaned.fit = cleaned data's stl
# tsdisplay(series.adj)
train_start = c(2012,4)
train_end = c(2015,6)
test_start = c(2015, 7)
test_end = c(2016, 6)
seasonal = stl.fit[[1]][,1]
seasonal_cleaned = stl.cleaned.fit[[1]][,1]
In [ ]:
## Function for finding the average of seasonal components
period_stat <- function(ts_data_in, type = 1, start_value, years){
#type 1: sum
#type 2: mean
freq <- frequency(ts_data_in)
len <- length(ts_data_in)
freq_vector <- numeric(0)
freq_sum <- numeric(0)
vec <- numeric(0)
sum_vec <- numeric(0)
start_val <- start(ts_data_in)
ts_data_in <- c(rep(NA,start_val[2] - 1),ts_data_in)
max_limit <- ceiling(len/freq)
for(i in 1:max_limit){
vec <- ts_data_in[(((i-1)*freq)+1):(((i-1)*freq)+freq)]
freq_vector <- as.numeric(!is.na(vec))
vec[is.na(vec)] <- 0
if(i == 1){
sum_vec <- vec
freq_sum <- freq_vector
}else{
sum_vec <- sum_vec + vec
freq_sum <- freq_sum + freq_vector
}
}
final_ts <- numeric(0)
if(type == 1)
{
final_ts <- sum_vec
}else if(type == 2) {
final_ts <- (sum_vec/freq_sum)
} else {
stop("Invalid type")
}
return(ts(rep(final_ts,years),frequency = freq, start = start_value ))
}
In [ ]:
#Adjust the negative values in the ts data
min_ts_value <- min(series.adj)
min_ts_cleaned_value <- min(series.cleaned.adj)
bias_value <- (-1*min_ts_value) + 1
bias_value_cleaned <- (-1*min_ts_cleaned_value) + 1
#min(series)
#min(series.cleaned)
#min(series.adj)
#min(series.cleaned.adj)
ES_series <- series.adj + bias_value
ES_series_cleaned <- series.cleaned.adj + bias_value_cleaned
#plot(ES_series)
train_data_adj <- window(ES_series,start = train_start, end=train_end)
test_data_adj <- window(ES_series, start= test_start, end = test_end)
train_data_adj_cleaned <- window(ES_series_cleaned,start = train_start, end = train_end)
test_data_adj_cleaned <- window(ES_series_cleaned, start = test_start, end = test_end)
train_data <- window(series, start = train_start, end = train_end)
test_data <- window(series, start = test_start, end = test_end)
train_data_cleaned <- window(series.cleaned, start = train_start, end = train_end)
test_data_cleaned <- window(series.cleaned, start = test_start, end = test_end)
In [ ]:
## Function for finding the average of seasonal components
period_stat <- function(ts_data_in, type = 1, start_value, years){
#type 1: sum
#type 2: mean
freq <- frequency(ts_data_in)
len <- length(ts_data_in)
freq_vector <- numeric(0)
freq_sum <- numeric(0)
vec <- numeric(0)
sum_vec <- numeric(0)
start_val <- start(ts_data_in)
ts_data_in <- c(rep(NA,start_val[2] - 1),ts_data_in)
max_limit <- ceiling(len/freq)
for(i in 1:max_limit){
vec <- ts_data_in[(((i-1)*freq)+1):(((i-1)*freq)+freq)]
freq_vector <- as.numeric(!is.na(vec))
vec[is.na(vec)] <- 0
if(i == 1){
sum_vec <- vec
freq_sum <- freq_vector
}else{
sum_vec <- sum_vec + vec
freq_sum <- freq_sum + freq_vector
}
}
final_ts <- numeric(0)
if(type == 1)
{
final_ts <- sum_vec
}else if(type == 2) {
final_ts <- (sum_vec/freq_sum)
} else {
stop("Invalid type")
}
return(ts(rep(final_ts,years),frequency = freq, start = start_value ))
}
In [ ]:
#Getting the mean value from the seasonal components for the data set and not for the training set alone.
#Need to adjust based on the input from Suchana.
seasonal_mean <- period_stat(seasonal,2,c(2012,1),years = 7)
seasonal_cleaned_mean <- period_stat(seasonal_cleaned,2,c(2012,1),years = 7)
In [ ]:
#Preprocessing data. Removing 0 from the data
train_data_adj[train_data_adj==0]=0.01
train_data_adj_cleaned[train_data_adj_cleaned==0]=0.01
In [ ]:
all_types = c("ANN","AAN","AAA","ANA","MNN","MAN","MNA","MAA","MMN","MNM","MMM","MAM")
forecast_values = 12
# For eg: AAA -> additive level, additive trend and additive seasonality
# ANN -> No trend or seasonality
In [ ]:
fit_function <- function(train_data, test_data)
{
all_fit <- list()
test_models <- list()
print("Fitting various models: ")
for (bool in c(TRUE,FALSE)){
for (model_type in all_types){
if(bool & substr(model_type,2,2)=="N"){
next
}
test_model = ets(train_data, model = model_type,damped = bool)
#Box.test(test_model$residuals, lag = 20, type = "Ljung-Box")$p.value
all_fit[[paste0("ETS Model: ",model_type,", Damped: ",bool)]][1] <-
accuracy(f = forecast.ets(test_model,h=forecast_values)$mean,x = test_data)[5]
all_fit[[paste0("ETS Model: ",model_type,", Damped: ",bool)]][2] <-
100*(Box.test(test_model$residuals, lag = 20, type = "Ljung-Box")$p.value)
test_models[[paste0("ETS Model: ",model_type,", Damped: ",bool)]] <- test_model
print(test_model$method)
print(accuracy(f = forecast.ets(test_model,h=forecast_values)$mean, x = test_data)[5])
print("")
#Excluding the models which has auto correlated residuals @ 10% significance level
}
}
return(list(all_fit,test_models))
}
In [ ]:
# Fitting the models for all types of data - Original, cleaned, seasonally adjusted, cleaned - seasonally adjusted
models_adj <- fit_function(train_data_adj,test_data_adj) #Seasonally adjusted data
models_adj_cleaned <- fit_function(train_data_adj_cleaned,test_data_adj_cleaned) #Seasonally adjusted, cleaned(with outliers being removed) data
models <- fit_function(train_data,test_data) #Original data
models_cleaned <- fit_function(train_data_cleaned, test_data_cleaned) #Original, cleaned data
In [ ]:
all_fit_adj <- models_adj[[1]]
test_models_adj <- models_adj[[2]]
all_fit_adj_cleaned<- models_adj_cleaned[[1]]
test_models_adj_cleaned <- models_adj_cleaned[[2]]
all_fit <- models[[1]]
test_models <- models[[2]]
all_fit_cleaned <- models_cleaned[[1]]
test_models_cleaned <- models_cleaned[[2]]
In [ ]:
#Finding the best fit
proper_models <- all_fit_adj
best_mape <- min(sapply(proper_models,function(x)x[1]))
best_model <- names(which.min(sapply(proper_models,function(x)x[1])))
print(paste0("Best Model:",best_model))
print(paste0("Best Mape: ",best_mape))
#Finding top n fits
#top_models <- c()
Top_n <- 3
if(length(proper_models)<3){Top_n <- length(proper_models)}
top_mape_val <- proper_models[order(sapply(proper_models, function(x)x[1]))][1:Top_n]
top_models_adj <- names(top_mape_val)
top_mape_val
seasonal_mean
In [ ]:
#Finding the best fit
proper_models <- all_fit_adj_cleaned
best_mape <- min(sapply(proper_models,function(x)x[1]))
best_model <- names(which.min(sapply(proper_models,function(x)x[1])))
print(paste0("Best Model:",best_model))
print(paste0("Best Mape: ",best_mape))
#Finding top n fits
#top_models <- c()
Top_n <- 3
if(length(proper_models)<3){Top_n <- length(proper_models)}
top_mape_val <- proper_models[order(sapply(proper_models, function(x)x[1]))][1:Top_n]
top_models_adj_cleaned <- names(top_mape_val)
top_mape_val
seasonal_cleaned_mean
In [ ]:
#Finding the best fit
proper_models <- all_fit
best_mape <- min(sapply(proper_models,function(x)x[1]))
best_model <- names(which.min(sapply(proper_models,function(x)x[1])))
print(paste0("Best Model:",best_model))
print(paste0("Best Mape: ",best_mape))
#Finding top n fits
#top_models <- c()
Top_n <- 3
if(length(proper_models)<3){Top_n <- length(proper_models)}
top_mape_val <- proper_models[order(sapply(proper_models, function(x)x[1]))][1:Top_n]
top_models<- names(top_mape_val)
top_mape_val
seasonal_cleaned_mean
In [ ]:
#Finding the best fit
proper_models <- all_fit_cleaned
if(length(proper_models)==0){
print("None of the model satisfies - Ljung-Box test; Model with least 3 p values taken")
p_values <- sapply(all_fit, function(x)x[2])
proper_models <- all_fit[order(p_values)][1:3]
}
best_mape <- min(sapply(proper_models,function(x)x[1]))
best_model <- names(which.min(sapply(proper_models,function(x)x[1])))
print(paste0("Best Model:",best_model))
print(paste0("Best Mape: ",best_mape))
#Finding top n fits
#top_models <- c()
Top_n <- 3
if(length(proper_models)<3){Top_n <- length(proper_models)}
top_mape_val <- proper_models[order(sapply(proper_models, function(x)x[1]))][1:Top_n]
top_models_cleaned <- names(top_mape_val)
top_mape_val
seasonal_cleaned_mean
In [ ]:
plot(ES_series,col = "black")
lines(test_data_adj, col = "blue")
lines(forecast.ets(test_models_adj[[top_models_adj[1]]],h=12)$mean, col = "red") #Top model
lines(forecast.ets(test_models_adj[[top_models_adj[2]]],h=12)$mean, col = "green") #Top second model
lines(forecast.ets(test_models_adj[[top_models_adj[3]]],h=12)$mean, col = "yellow") #Top third model
legend("topleft", lty=1,col = c("blue","red","green","yellow"),
c("Test data", "Best model", "Second best", "Third best"))
#Observation: Unusual peak at December'15. To check if it is an anomaly
In [ ]:
plot(ES_series_cleaned,col = "black")
lines(test_data_adj_cleaned, col = "blue")
lines(forecast.ets(test_models_adj_cleaned[[top_models_adj_cleaned[1]]],h=12)$mean, col = "red") #Top model
lines(forecast.ets(test_models_adj_cleaned[[top_models_adj_cleaned[2]]],h=12)$mean, col = "green") #Top second model
lines(forecast.ets(test_models_adj_cleaned[[top_models_adj_cleaned[3]]],h=12)$mean, col = "yellow") #Top third model
legend("topleft", lty=1,col = c("blue","red","green","yellow"),
c("Test data", "Best model", "Second best", "Third best"))
#Observation: Unusual peak at December'15. To check if it is an anomaly
In [ ]:
#all_fit
#test_models[[all_fit[1]]]
plot(series,col = "black")
lines(test_data, col = "blue")
accuracy(test_models[[top_models[1]]])
accuracy(test_models[[top_models[2]]])
accuracy(test_models[[top_models[3]]])
lines(forecast.ets(test_models[[top_models[1]]],h=12)$mean, col = "red") #Top model
lines(forecast.ets(test_models[[top_models[2]]],h=12)$mean, col = "green") #Top second model
lines(forecast.ets(test_models[[top_models[3]]],h=12)$mean, col = "yellow") #Top third model
legend("topleft", lty=1,col = c("blue","red","green","yellow"),
c("Test data", "Best model", "Second best", "Third best"))
#Observation: Unusual peak at December'15. To check if it is an anomaly
In [ ]:
#plot(forecast.ets(test_models_cleaned[[top_models_cleaned[1]]],h=12))
accuracy(test_models_cleaned[[top_models_cleaned[1]]])
accuracy(test_models_cleaned[[top_models_cleaned[2]]])
accuracy(test_models_cleaned[[top_models_cleaned[3]]])
plot(series.cleaned,col = "black")
lines(test_data_cleaned, col = "blue")
lines(forecast.ets(test_models_cleaned[[top_models_cleaned[1]]],h=12)$mean, col = "red") #Top model
lines(forecast.ets(test_models_cleaned[[top_models_cleaned[2]]],h=12)$mean, col = "green") #Top second model
lines(forecast.ets(test_models_cleaned[[top_models_cleaned[3]]],h=12)$mean, col = "yellow") #Top third model
legend("topleft",lty=c(1,1,1,1),col = c("blue","red","green","yellow","brown"),
c("Test data(cleaned)", "Best model", "Second best", "Third best"))
#Observation: Unusual peak at December'15. To check if it is an anomaly
In [ ]:
print("Case 1: Seasonally adjusted data")
#Adding the bias value which was added to overcome the negative values
ES_series_bias <- ES_series - bias_value
test_series_bias <- test_data_adj - bias_value
forecast1_bias <- forecast.ets(test_models_adj[[top_models_adj[1]]],h=12)$mean - bias_value
forecast2_bias <- forecast.ets(test_models_adj[[top_models_adj[2]]],h=12)$mean - bias_value
forecast3_bias <- forecast.ets(test_models_adj[[top_models_adj[3]]],h=12)$mean - bias_value
#Adding back the seasonal value from stl decomposition
ES_value_adj <- ES_series_bias + seasonal
test_series_adj <- test_series_bias + seasonal
#Adding back the mean seasonal component to the forecasted data
forecast1_adj <- forecast1_bias + seasonal_mean
forecast2_adj <- forecast2_bias + seasonal_mean
forecast3_adj <- forecast3_bias + seasonal_mean
#Calculating the accuracy of the training data
accuracy(test_models_adj[[top_models_adj[1]]])
accuracy(test_models_adj[[top_models_adj[2]]])
accuracy(test_models_adj[[top_models_adj[3]]])
In [ ]:
#Checking the MAPE values with original data
print(paste0("Top model: ", top_models_adj[1]))
accuracy(forecast1_adj,test_series_adj)
print(paste0("Top model: ", top_models_adj[2]))
accuracy(forecast2_adj,test_series_adj)
print(paste0("Top model: ", top_models_adj[3]))
accuracy(forecast3_adj,test_series_adj)
#accuracy(test_data, forecast.ets(test_models[[top_models[3]]],h=12)$mean )
In [ ]:
print("Case 2: Seasonally adjusted & cleaned data")
#Adding the bias value which was added to overcome the negative values
ES_series_bias_cleaned <- ES_series_cleaned - bias_value_cleaned
test_series_bias_cleaned <- test_data_adj_cleaned - bias_value_cleaned
forecast1_bias <- forecast.ets(test_models_adj_cleaned[[top_models_adj_cleaned[1]]],h=12)$mean - bias_value_cleaned
forecast2_bias <- forecast.ets(test_models_adj_cleaned[[top_models_adj_cleaned[2]]],h=12)$mean - bias_value_cleaned
forecast3_bias <- forecast.ets(test_models_adj_cleaned[[top_models_adj_cleaned[3]]],h=12)$mean - bias_value_cleaned
#Adding back the seasonal value from stl decomposition
ES_value_adj_cleaned <- ES_series_bias_cleaned + seasonal_cleaned
test_series_adj_cleaned <- test_series_bias_cleaned + seasonal_cleaned
#Adding back the mean seasonal component to the forecasted data
forecast1_adj_cleaned <- forecast1_bias + seasonal_cleaned_mean
forecast2_adj_cleaned <- forecast2_bias + seasonal_cleaned_mean
forecast3_adj_cleaned <- forecast3_bias + seasonal_cleaned_mean
#Calculating the accuracy of the training data
accuracy(test_models_adj_cleaned[[top_models_adj_cleaned[1]]])
accuracy(test_models_adj_cleaned[[top_models_adj_cleaned[2]]])
accuracy(test_models_adj_cleaned[[top_models_adj_cleaned[3]]])
In [ ]:
#Checking the MAPE values with original data
print(paste0("Top model: ", top_models_adj_cleaned[1]))
accuracy(forecast1_adj_cleaned,test_series_adj_cleaned)
print(paste0("Top model: ", top_models_adj_cleaned[2]))
accuracy(forecast2_adj_cleaned,test_series_adj_cleaned)
print(paste0("Top model: ", top_models_adj_cleaned[3]))
accuracy(forecast3_adj_cleaned,test_series_adj_cleaned)
top_models
#accuracy(forecast.ets(test_models[[top_models[1]]],h=12)$mean, test_data)
#accuracy(test_data, forecast.ets(test_models[[top_models[3]]],h=12)$mean)
In [ ]:
#Ljung Box test - One of the checks to perform stationarity of TS data
# A small function
residual_analyis <- function(model_name){
print(model_name)
print(Box.test(test_models[[model_name]]$residuals, lag = 20, type = "Ljung-Box"))
#p_value <- Box.test(test_models[[model_name]]$residuals, lag = 20, type = "Ljung-Box")
Acf(test_models[[model_name]]$residuals, main = model_name)
}
In [ ]:
#Case 1: Seasonally adjusted models
#Residual Analysis for top three models
residual_analyis(top_models_adj[1]) #Top model
residual_analyis(top_models_adj[2]) #Second best model
residual_analyis(top_models_adj[3]) #Third best model
In [ ]:
#Case 2 - Seasonally adjusted cleaned models
#Residual Analysis for top three models
residual_analyis(top_models_adj_cleaned[1]) #Top model
residual_analyis(top_models_adj_cleaned[2]) #Second best model
residual_analyis(top_models_adj_cleaned[3]) #Third best model
In [ ]:
#Case 3 - Models on original data
#Residual Analysis for top three models
residual_analyis(top_models[1]) #Top model
residual_analyis(top_models[2]) #Second best model
residual_analyis(top_models[3]) #Third best model
In [ ]:
#Case 4 - Models on original data
#Residual Analysis for top three models
residual_analyis(top_models_cleaned[1]) #Top model
residual_analyis(top_models_cleaned[2]) #Second best model
residual_analyis(top_models_cleaned[3]) #Third best model
In [ ]:
plot(ES_value_adj,col = "black", ylab = "No of complaints", ylim = c(-1000,3500),
main = "Model with seasonal adjustment")
lines(test_series_adj, col = "blue") #Original test data
accuracy(forecast1_adj,test_series_adj)
accuracy(forecast2_adj,test_series_adj)
accuracy(forecast3_adj,test_series_adj)
lines(test_series_bias + seasonal_mean, col = "brown", lty =2) #Deseasonlised data with average seasonal component applied
lines(forecast1_adj, col = "red") #Top model
lines(forecast2_adj, col = "green") #Top second model
lines(forecast3_adj, col = "yellow") #Top third model
legend("topleft",lty=c(1,1,1,1,2),col = c("blue","red","green","yellow","brown"),
c("Test data", "Best model", "Second best", "Third best","test data with seasonal mean"))
In [ ]:
plot(ES_value_adj_cleaned,col = "black", ylab = "No of complaints",
main = "Model with seasonal adjustment and cleaning")
lines(test_series_adj_cleaned, col = "blue") #Original test data
accuracy(forecast1_adj_cleaned,test_series_adj_cleaned)
accuracy(forecast2_adj_cleaned,test_series_adj_cleaned)
accuracy(forecast3_adj_cleaned,test_series_adj_cleaned)
lines(test_series_bias_cleaned + seasonal_cleaned_mean, col = "brown", lty =2) #Deseasonlised data with average seasonal component applied
lines(forecast1_adj_cleaned, col = "red") #Top model
lines(forecast2_adj_cleaned, col = "green") #Top second model
lines(forecast3_adj_cleaned, col = "yellow") #Top third model
legend("topleft",lty=c(1,1,1,1,2),col = c("blue","red","green","yellow","brown"),
c("Test data", "Best model", "Second best", "Third best","test data with seasonal mean"))
In [ ]:
plot(series,col = "black", ylab = "No of complaints",
main = "Model with original data")
lines(test_data, col = "blue") #Originayl test data
accuracy(forecast.ets(test_models[[top_models[1]]],h=12)$mean,test_data)
accuracy(forecast.ets(test_models[[top_models[2]]],h=12)$mean,test_data)
accuracy(forecast.ets(test_models[[top_models[3]]],h=12)$mean,test_data)
lines(forecast.ets(test_models[[top_models[1]]],h=12)$mean, col = "red") #Top model
lines(forecast.ets(test_models[[top_models[2]]],h=12)$mean, col = "green") #Top second model
lines(forecast.ets(test_models[[top_models[3]]],h=12)$mean, col = "yellow") #Top third model
legend("topleft", lty=1,col = c("blue","red","green","yellow"),
c("Test data", "Best model", "Second best", "Third best"))
#Observation: Unusual peak at December'15. To check if it is an anomaly
legend("topleft",lty=c(1,1,1,1,2),col = c("blue","red","green","yellow","brown"),
c("Test data", "Best model", "Second best", "Third best","test data with seasonal mean"))
In [ ]:
#plot(forecast.ets(test_models_cleaned[[top_models_cleaned[1]]],h=12))
plot(series.cleaned,col = "black", main = "Model with cleaned data")
lines(test_data_cleaned, col = "blue")
#lines(test_data, col = "brown", lty = 2)
accuracy(forecast.ets(test_models_cleaned[[top_models_cleaned[1]]],h=12)$mean,test_data_cleaned)
accuracy(forecast.ets(test_models_cleaned[[top_models_cleaned[2]]],h=12)$mean,test_data_cleaned)
accuracy(forecast.ets(test_models_cleaned[[top_models_cleaned[3]]],h=12)$mean,test_data_cleaned)
lines(forecast.ets(test_models_cleaned[[top_models_cleaned[1]]],h=12)$mean, col = "red") #Top model
lines(forecast.ets(test_models_cleaned[[top_models_cleaned[2]]],h=12)$mean, col = "green") #Top second model
lines(forecast.ets(test_models_cleaned[[top_models_cleaned[3]]],h=12)$mean, col = "yellow") #Top third model
legend("topleft",lty=c(1,1,1,1,2),col = c("blue","red","green","yellow","brown"),
c("Test data(cleaned)", "Best model", "Second best", "Third best","Actual test data"))
#Observation: Unusual peak at December'15. To check if it is an anomaly