In [1]:
# This benchmark predicts the maximum of the
# time that's elapsed so far in the trip and
# the mean time in the training set as the
# test trip duration
library(readr)
library(rjson)
test <- read_csv("/home/tony/ML/taxi/taxi2_time/test.csv")
mean_train_time <- 660
positions <- function(row) as.data.frame(do.call(rbind, fromJSON(row$POLYLINE)))
submission <- test["TRIP_ID"]
for (i in 1:nrow(test)) {
submission$TRAVEL_TIME[i] <- max(15*nrow(positions(test[i,])), mean_train_time)
}
write_csv(submission, "max_time_elapsed_mean_time_benchmark.csv")
In [2]:
install.packages('readr')
In [ ]: