Population dynamics time-series forecasting for BBS


In [1]:
library(forecast)
library(dplyr)
library(ggplot2)
source("forecast-bbs-core.R")


Loading required package: zoo

Attaching package: ‘zoo’

The following objects are masked from ‘package:base’:

    as.Date, as.Date.numeric

Loading required package: timeDate
This is forecast 6.1 


Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union

Please wait while retriever updates its scripts, ...
The retriever scripts are up-to-date!

  New to ecoretriever? Examples at
    https://github.com/ropensci/ecoretriever/
    Use citation(package='ecoretriever') for the package citation
    
Use suppressPackageStartupMessages() to suppress these messages in the future

Attaching package: ‘ecoretriever’

The following object is masked from ‘package:DBI’:

    fetch

Forecast population time-series

These data structures are then cached, so if the files exist this step can be skipped.


In [ ]:
# Add zeros to population time-series data
pop_dyn <- contig_ts_long %>%
    tbl_df() %>%
    select(site_id, year, species_id, abundance) %>%
    group_by(site_id) %>%
    do(get_popdyn_data_w_zeros(., min(.$year), max(.$year))) %>%
    arrange(site_id, year, species_id)
head(pop_dyn)

In [ ]:
# Forecast
popdyn_ts_forecasts <- get_ts_forecasts(group_by(pop_dyn, site_id, species_id), timecol = "year", responsecol = "abundance", lag = 5)
save(popdyn_ts_forecasts, file = "popdyn_ts_forecasts.RData")
popdyn_ts_forecasts_clean <- cleanup_multi_ts_forecasts(popdyn_ts_forecasts, c('site_id', 'species_id'))
save(popdyn_ts_forecasts_clean, file = "popdyn_ts_forecasts_clean.RData")

Run analyses on forecasts


In [2]:
load("popdyn_ts_forecasts.RData")

In [3]:
head(popdyn_ts_forecasts, 1)


Out[3]:
site_idspecies_idtimeperiodcast_naivecast_avgcast_arimatest_set
1200214401990, 1991, 1992, 1993, 1994Naive method, 0.5714286, -15.94783, 33.89566, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 2, -1, -1, 0, 0, 0, 0, Arima(x = x, order = c(0, 1, 0), lambda = lambda), x, 0, 0, 14, 1, 1, 1, 0, 0, 0, -7.420876e-23, -7.420876e-23, 7.420876e-23, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 7.420876e-23, 34.22899, 34.53472, 0, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 0, 0, 80, 95, 0, 0, 0, 0, 0, -0.9687619, -1.370036, -1.677945, -1.937524, -2.166218, -1.481594, -2.09529, -2.566195, -2.963187, -3.312944, 0.9687619, 1.370036, 1.677945, 1.937524, 2.166218, 1.481594, 2.09529, 2.566195, 2.963187, 3.312944, 0, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 0, 0, structure(c(0, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 0, 0), .Tsp = c(1, , 15, 1), class = "ts"), NA, 0, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 0, NA, 0, 0, 0, 0, 1, -1, 0, 2, -1, -1, 0, 0, 0, 0Mean, 80, 95, 0, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 0, 0, .[[responsecol]][1:(length(.[[responsecol]]) - lag)], 0.2666667, 0.2666667, 0.2666667, 0.2666667, 0.2666667, -0.5579511, -0.5579511, -0.5579511, -0.5579511, -0.5579511, -1.04827, -1.04827, -1.04827, -1.04827, -1.04827, 1.091284, 1.091284, 1.091284, 1.091284, 1.091284, 1.581603, 1.581603, 1.581603, 1.581603, 1.581603, 0.2666667, 0.1532712, 0.5936168, meanf(x = .[[responsecol]][1:(length(.[[responsecol]]) - lag)], , h = lag), 0.2666667, 0.2666667, 0.2666667, 0.2666667, 0.2666667, 0.2666667, 0.2666667, 0.2666667, 0.2666667, 0.2666667, 0.2666667, 0.2666667, 0.2666667, 0.2666667, 0.2666667, -0.2666667, -0.2666667, -0.2666667, -0.2666667, -0.2666667, 0.7333333, -0.2666667, -0.2666667, 1.733333, 0.7333333, -0.2666667, -0.2666667, -0.2666667, -0.2666667, -0.2666667ARIMA(0,0,0) with non-zero mean, 0.2666667, 0.3288889, 0.02192615, TRUE, -12.94381, 29.88763, 0, 0, 0, 0, 1, 0, 0, -0.2666667, -0.2666667, -0.2666667, -0.2666667, -0.2666667, 0.7333333, -0.2666667, -0.2666667, 1.733333, 0.7333333, -0.2666667, -0.2666667, -0.2666667, -0.2666667, -0.2666667, auto.arima(x = structure(list(x = structure(c(0, 0, 0, 0, 0, , 1, 0, 0, 2, 1, 0, 0, 0, 0, 0), .Tsp = c(1, 15, 1), class = "ts")), .Names = "x", row.names = c(NA, , -15L), class = "data.frame"), seasonal = FALSE), .[[responsecol]][1:(length(.[[responsecol]]) - lag)], 0, 0, 15, 1, -0.2666667, 0, 0, 1, 0, 1, 31.30373, 30.88763, 0, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 0, 0, 80, 95, 0.2666667, 0.2666667, 0.2666667, 0.2666667, 0.2666667, -0.4682882, -0.4682882, -0.4682882, -0.4682882, -0.4682882, -0.8573498, -0.8573498, -0.8573498, -0.8573498, -0.8573498, 1.001622, 1.001622, 1.001622, 1.001622, 1.001622, 1.390683, 1.390683, 1.390683, 1.390683, 1.390683, 0, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 0, 0, structure(c(0, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 0, 0), .Tsp = c(1, , 15, 1), class = "ts"), 0.2666667, 0.2666667, 0.2666667, 0.2666667, 0.2666667, 0.2666667, 0.2666667, 0.2666667, 0.2666667, 0.2666667, 0.2666667, 0.2666667, 0.2666667, 0.2666667, 0.2666667, -0.2666667, -0.2666667, -0.2666667, -0.2666667, -0.2666667, 0.7333333, -0.2666667, -0.2666667, 1.733333, 0.7333333, -0.2666667, -0.2666667, -0.2666667, -0.2666667, -0.26666670, 0, 0, 0, 0

In [4]:
load("popdyn_ts_forecasts_clean.RData")

In [5]:
head(popdyn_ts_forecasts_clean)


Out[5]:
modeltimeperiodobspt_fcastlo80hi80lo95hi95site_idspecies_id
1naive199000-0.96876190.9687619-1.4815941.48159420021440
2naive199100-1.3700361.370036-2.095292.0952920021440
3naive199200-1.6779451.677945-2.5661952.56619520021440
4naive199300-1.9375241.937524-2.9631872.96318720021440
5naive199400-2.1662182.166218-3.3129443.31294420021440
6avg199000.2666667-0.55795111.091284-1.048271.58160320021440

In [27]:
popdyn_ts_forecasts_clean_min <- filter(popdyn_ts_forecasts_clean, obs > 2, pt_fcast > 2)
ggplot(popdyn_ts_forecasts_clean_min, aes(x = log(pt_fcast), y = log(obs), fill = ..density..)) +
    stat_binhex(na.rm = TRUE, bins=25) +
    facet_wrap(~ model) +
    theme(aspect.ratio = 1)



In [28]:
forecast_mins <-
  popdyn_ts_forecasts_clean_min %>%
  group_by(site_id, model) %>%
  summarize(mins = min(timeperiod))
popdyn_ts_forecasts_clean_lags <-
  popdyn_ts_forecasts_clean_min %>% inner_join(forecast_mins, by = c("site_id", "model")) %>%
  mutate(lag = timeperiod - mins + 1)
head(popdyn_ts_forecasts_clean_lags)
nrow(popdyn_ts_forecasts_clean_lags)


Warning message:
: Grouping rowwise data frame strips rowwise nature
Out[28]:
modeltimeperiodobspt_fcastlo80hi80lo95hi95site_idspecies_idminslag
1naive1991330.43689695.563103-0.9199286.9199282002201019902
2arima199132.632671.3910113.8743280.73371684.5316222002201019902
3naive199035-1.23140611.23141-4.53011314.530112002273019901
4naive199245-5.79311215.79311-11.5066421.506642002273019903
5naive1993105-7.46281217.46281-14.0602324.060232002273019904
6naive199485-8.93384818.93385-16.3099826.309982002273019905
Out[28]:
511878

In [30]:
ggplot(popdyn_ts_forecasts_clean_lags, aes(x = log(pt_fcast), y = log(obs), fill = ..density..)) +
    geom_abline() +
    stat_binhex(na.rm = TRUE, bins=25) +
    facet_grid(lag ~ model) +
    theme(aspect.ratio = 1)



In [16]:
model_accuracies <- group_by(popdyn_ts_forecasts_clean, site_id, species_id, model) %>%
                    do(get_error_measures(.$obs, .$pt_fcast))
head(model_accuracies)


Warning message:
: Grouping rowwise data frame strips rowwise nature
Out[16]:
site_idspecies_idmodelMERMSEMAEMPEMAPE
120021440arima-0.26666670.26666670.2666667-InfInf
220021440avg-0.26666670.26666670.2666667-InfInf
320021440naive000NaNNaN
420021720arima0.40.89442720.4100100
520021720avg0.40.89442720.4100100
620021720naive0.40.89442720.4100100

In [17]:
save(model_accuracies, file = "popdyn_model_accuracies.RData")

In [18]:
#model_accuracies$model = with(model_accuracies, factor(model, levels = c("arima", "avg", "naive")))
ggplot(model_accuracies, aes(model, MAPE)) +
     geom_violin(aes(fill = model)) + ylim = c(0,1000) +
     labs(x = "Model", y = "Percentage Error") +
     scale_x_discrete(labels=c("Time-\nseries", "Average", "Naive")) +
     theme_grey(base_size = 24) +
     guides(fill=FALSE)


Error in c(0, 1000) + labs(x = "Model", y = "Percentage Error"): non-numeric argument to binary operator