In [ ]:
library(hash)
library(xts)
library(lubridate)
library(forecast)
library(fpp)
library(rucm)

In [ ]:
DATA_FOLDER <- "/home/samarth/workspaces/datakind-workspace/analytics/time-series/data/topNComplaints"
RESULTS_FOLDER <- "/home/samarth/workspaces/datakind-workspace/analytics/time-series/results/topNComplaints/ucm"
dataStart <- c(2012, 1)
dataEnd <- c(2015, 12)
trainStart <- c(2012, 1)
trainEnd <- c(2014, 12)
testStart <- c(2015, 1)
testEnd <- c(2015, 12)

In [ ]:
loadData <- function(dataFolder) {
    files <- list.files(dataFolder)
    data <- list()
    for(file in files) {    
        df <- read.csv(paste0(dataFolder, "/", file), stringsAsFactors=F)    
        minYear <- min(df$Year)
        complaintType <- substr(file,1,(nchar(file))-4)    
        tsObject <- ts(df$Complaints, start=c(minYear, 1), frequency = 12)
        data[[complaintType]] <- window(tsObject, start=dataStart, end=dataEnd)
    }
    data
}
data <- loadData(DATA_FOLDER)
complaintTypes <- names(data)
data[[complaintTypes[1]]]

In [ ]:
# try it out
monthly <- data[[complaintTypes[6]]]
trainData <- window(monthly, start=trainStart, end=trainEnd)
testData <- window(monthly, start=testStart, end=testEnd)
fit <- ucm(formula=trainData~0, data=trainData, level=T, slope = F, season = TRUE, season.length=6)
pred <- predict(fit$model, n.ahead=12)
plot(monthly)
lines(pred, lty=2, col=1)
accuracy(testData, pred)

In [ ]:
# store experiment results in this data frame
results <- data.frame(Complaint.Type=c(), season.length=c(), cycle.period=c(), MAPE=c(), RMSE=c())

In [ ]:
for(complaintType in complaintTypes) {
    monthly <- data[[complaintType]]
    trainData <- window(monthly, start=trainStart, end=trainEnd)
    testData <- window(monthly, start=testStart, end=testEnd)
    grid <- expand.grid(season.length=seq(1, 3), cycle.period=seq(1, 3), MAPE=NA, RMSE=NA)
    for(i in 1:nrow(grid)) {
        getAccuracy <- function(trainData, testData, g) {
            season.length <- g$season.length
            cycle.period <- g$cycle.period    
            fit <- ucm(formula=trainData~0, data=trainData, 
                       level=T, slope = T, season = T, cycle = F, 
                       season.length=season.length, cycle.period=cycle.period)
            pred <- predict(fit$model, n.ahead=12)
            return(accuracy(testData, pred))        
        }    
        g <- grid[i, ]
        acc <- tryCatch(
              getAccuracy(trainData, testData, g),
              error=function(e) e
            )
        if(inherits(acc, "error")) next        
        grid[i, ]$MAPE <- acc[1, 5]
        grid[i, ]$RMSE <- acc[1, 2]    
    }
    grid$Complaint.Type <- complaintType
    results <- rbind(results, grid)    
}

In [ ]:
tail(results)

In [ ]:
write.csv(results, file=paste0(RESULTS_FOLDER,"/results.csv"), row.names=F)

In [ ]: