In [ ]:
library(hash)
library(xts)
library(lubridate)
library(forecast)
library(fpp)
library(rucm)
In [ ]:
DATA_FOLDER <- "/home/samarth/workspaces/datakind-workspace/analytics/time-series/data/topNComplaints"
RESULTS_FOLDER <- "/home/samarth/workspaces/datakind-workspace/analytics/time-series/results/topNComplaints/ucm"
dataStart <- c(2012, 1)
dataEnd <- c(2015, 12)
trainStart <- c(2012, 1)
trainEnd <- c(2014, 12)
testStart <- c(2015, 1)
testEnd <- c(2015, 12)
In [ ]:
loadData <- function(dataFolder) {
files <- list.files(dataFolder)
data <- list()
for(file in files) {
df <- read.csv(paste0(dataFolder, "/", file), stringsAsFactors=F)
minYear <- min(df$Year)
complaintType <- substr(file,1,(nchar(file))-4)
tsObject <- ts(df$Complaints, start=c(minYear, 1), frequency = 12)
data[[complaintType]] <- window(tsObject, start=dataStart, end=dataEnd)
}
data
}
data <- loadData(DATA_FOLDER)
complaintTypes <- names(data)
data[[complaintTypes[1]]]
In [ ]:
# try it out
monthly <- data[[complaintTypes[6]]]
trainData <- window(monthly, start=trainStart, end=trainEnd)
testData <- window(monthly, start=testStart, end=testEnd)
fit <- ucm(formula=trainData~0, data=trainData, level=T, slope = F, season = TRUE, season.length=6)
pred <- predict(fit$model, n.ahead=12)
plot(monthly)
lines(pred, lty=2, col=1)
accuracy(testData, pred)
In [ ]:
# store experiment results in this data frame
results <- data.frame(Complaint.Type=c(), season.length=c(), cycle.period=c(), MAPE=c(), RMSE=c())
In [ ]:
for(complaintType in complaintTypes) {
monthly <- data[[complaintType]]
trainData <- window(monthly, start=trainStart, end=trainEnd)
testData <- window(monthly, start=testStart, end=testEnd)
grid <- expand.grid(season.length=seq(1, 3), cycle.period=seq(1, 3), MAPE=NA, RMSE=NA)
for(i in 1:nrow(grid)) {
getAccuracy <- function(trainData, testData, g) {
season.length <- g$season.length
cycle.period <- g$cycle.period
fit <- ucm(formula=trainData~0, data=trainData,
level=T, slope = T, season = T, cycle = F,
season.length=season.length, cycle.period=cycle.period)
pred <- predict(fit$model, n.ahead=12)
return(accuracy(testData, pred))
}
g <- grid[i, ]
acc <- tryCatch(
getAccuracy(trainData, testData, g),
error=function(e) e
)
if(inherits(acc, "error")) next
grid[i, ]$MAPE <- acc[1, 5]
grid[i, ]$RMSE <- acc[1, 2]
}
grid$Complaint.Type <- complaintType
results <- rbind(results, grid)
}
In [ ]:
tail(results)
In [ ]:
write.csv(results, file=paste0(RESULTS_FOLDER,"/results.csv"), row.names=F)
In [ ]: