Truncating plots moved to Plot() f

robinvanemden · robinvanemden · commit f6003441e132 · 2019-06-22T13:48:45.000+02:00
Truncating plots moved to Plot() for added flexibility.
diff --git a/R/functions_generic.R b/R/functions_generic.R
@@ -31,6 +31,14 @@ plot.History <- function(x, ...) {
     legend <- eval(args$legend)
   else
     legend <- TRUE
+  if ("trunc_per_agent" %in% names(args))
+    trunc_per_agent <- eval(args$trunc_per_agent)
+  else
+    trunc_per_agent <- TRUE
+  if ("trunc_over_agents" %in% names(args))
+    trunc_over_agents <- eval(args$trunc_over_agents)
+  else
+    trunc_over_agents <- TRUE
   if ("regret" %in% names(args))
     regret <- eval(args$regret)
   else
@@ -163,7 +171,9 @@ plot.History <- function(x, ...) {
       xlab = xlab,
       ylab = ylab,
       limit_agents = limit_agents,
-      limit_context = limit_context
+      limit_context = limit_context,
+      trunc_over_agents = trunc_over_agents,
+      trunc_per_agent = trunc_per_agent
     )
   } else if (type == "average") {
     Plot$new()$average(
@@ -193,7 +203,9 @@ plot.History <- function(x, ...) {
       ylab = ylab,
       cum_average = cum_average,
       limit_agents = limit_agents,
-      limit_context = limit_context
+      limit_context = limit_context,
+      trunc_over_agents = trunc_over_agents,
+      trunc_per_agent = trunc_per_agent
     )
   } else if (type == "optimal") {
     Plot$new()$optimal(
@@ -220,7 +232,9 @@ plot.History <- function(x, ...) {
       xlab = xlab,
       ylab = ylab,
       limit_agents = limit_agents,
-      limit_context = limit_context
+      limit_context = limit_context,
+      trunc_over_agents = trunc_over_agents,
+      trunc_per_agent = trunc_per_agent
     )
   } else if (type == "arms") {
     Plot$new()$arms(
@@ -240,7 +254,9 @@ plot.History <- function(x, ...) {
       xlab = xlab,
       ylab = ylab,
       limit_agents = limit_agents,
-      limit_context = limit_context
+      limit_context = limit_context,
+      trunc_over_agents = trunc_over_agents,
+      trunc_per_agent = trunc_per_agent
 
     )
   }
diff --git a/R/history.R b/R/history.R
@@ -343,6 +343,10 @@ History <- R6::R6Class(
 
       private$.cum_stats <- private$.data[, list(
 
+
+        sims                = length(reward),
+        sqrt_sims           = sqrt(length(reward)),
+
         regret_var          = var(regret),
         regret_sd           = sd(regret),
         regret              = mean(regret),
@@ -373,14 +377,15 @@ History <- R6::R6Class(
       private$.cum_stats[, cum_regret_rate := cum_regret / t]
 
       qn       <- qnorm(0.975)
-      sqrt_sim <- sqrt(self$get_simulation_count())
 
-      private$.cum_stats[, cum_regret_ci      := cum_regret_sd / sqrt_sim * qn]
-      private$.cum_stats[, cum_reward_ci      := cum_reward_sd / sqrt_sim * qn]
-      private$.cum_stats[, cum_regret_rate_ci := cum_regret_rate_sd / sqrt_sim * qn]
-      private$.cum_stats[, cum_reward_rate_ci := cum_reward_rate_sd / sqrt_sim * qn]
-      private$.cum_stats[, regret_ci          := regret_sd / sqrt_sim * qn]
-      private$.cum_stats[, reward_ci          := reward_sd / sqrt_sim * qn]
+      private$.cum_stats[, cum_regret_ci      := cum_regret_sd / sqrt_sims * qn]
+      private$.cum_stats[, cum_reward_ci      := cum_reward_sd / sqrt_sims * qn]
+      private$.cum_stats[, cum_regret_rate_ci := cum_regret_rate_sd / sqrt_sims * qn]
+      private$.cum_stats[, cum_reward_rate_ci := cum_reward_rate_sd / sqrt_sims * qn]
+      private$.cum_stats[, regret_ci          := regret_sd / sqrt_sims * qn]
+      private$.cum_stats[, reward_ci          := reward_sd / sqrt_sims * qn]
+
+      private$.cum_stats[,sqrt_sims:=NULL]
 
       private$.data[, cum_reward_rate := cum_reward / t]
       private$.data[, cum_regret_rate := cum_regret / t]
diff --git a/R/plot.R b/R/plot.R
@@ -31,7 +31,9 @@ Plot <- R6::R6Class(
                           legend_position    = "topleft",
                           legend_title       = NULL,
                           limit_agents       = NULL,
-                          limit_context      = NULL) {
+                          limit_context      = NULL,
+                          trunc_over_agents  = TRUE,
+                          trunc_per_agent    = TRUE) {
 
       self$history       <- history
 
@@ -85,7 +87,9 @@ Plot <- R6::R6Class(
         traces_max          = traces_max,
         traces_alpha        = traces_alpha,
         smooth              = smooth,
-        rate                = rate
+        rate                = rate,
+        trunc_over_agents   = trunc_over_agents,
+        trunc_per_agent     = trunc_per_agent
       )
 
       invisible(recordPlot())
@@ -116,7 +120,9 @@ Plot <- R6::R6Class(
                        legend_position    = "topleft",
                        legend_title       = NULL,
                        limit_agents       = NULL,
-                       limit_context      = NULL) {
+                       limit_context      = NULL,
+                       trunc_over_agents  = TRUE,
+                       trunc_per_agent    = TRUE) {
 
       self$history <- history
 
@@ -149,7 +155,9 @@ Plot <- R6::R6Class(
         traces              = traces,
         traces_max          = traces_max,
         traces_alpha        = traces_alpha,
-        smooth              = smooth
+        smooth              = smooth,
+        trunc_over_agents   = trunc_over_agents,
+        trunc_per_agent     = trunc_per_agent
       )
 
       invisible(recordPlot())
@@ -183,7 +191,9 @@ Plot <- R6::R6Class(
                        legend_position    = "topleft",
                        legend_title       = NULL,
                        limit_agents       = NULL,
-                       limit_context      = NULL) {
+                       limit_context      = NULL,
+                       trunc_over_agents  = TRUE,
+                       trunc_per_agent    = TRUE) {
       self$history <- history
 
       if (regret) {
@@ -225,7 +235,9 @@ Plot <- R6::R6Class(
         traces_max          = traces_max,
         traces_alpha        = traces_alpha,
         smooth              = smooth,
-        rate                = rate
+        rate                = rate,
+        trunc_over_agents   = trunc_over_agents,
+        trunc_per_agent     = trunc_per_agent
       )
 
       invisible(recordPlot())
@@ -248,7 +260,9 @@ Plot <- R6::R6Class(
                     legend_title       = NULL,
                     limit_context      = NULL,
                     smooth             = FALSE,
-                    limit_agents       = NULL) {
+                    limit_agents       = NULL,
+                    trunc_over_agents  = TRUE,
+                    trunc_per_agent    = TRUE) {
 
       self$history <- history
 
@@ -314,7 +328,10 @@ Plot <- R6::R6Class(
 
       eg                <- expand.grid(t = dt[sim == 1]$t, choice = seq(1.0, max_arm, 1))
       data              <- merge(data, eg, all = TRUE)
-      data[is.na(data)] <- 0.0
+      # turn NA into 0
+      for (j in seq_len(ncol(data)))
+        set(data,which(is.na(data[[j]])),j,0)
+
       data$dataum       <- ave(data$arm_count, data$t, FUN = cumsum)
       data$zero         <- 0.0
       min_ylim          <- 0
@@ -445,7 +462,9 @@ Plot <- R6::R6Class(
                        traces_alpha        = 0.3,
                        cum_average         = FALSE,
                        smooth              = FALSE,
-                       rate                = FALSE) {
+                       rate                = FALSE,
+                       trunc_over_agents   = TRUE,
+                       trunc_per_agent     = TRUE) {
 
       cum_flip <- FALSE
       if((line_data_name=="reward" || line_data_name=="regret") && isTRUE(cum_average)) {
@@ -472,7 +491,7 @@ Plot <- R6::R6Class(
         disp_data_name <- gsub("none", disp, disp_data_name)
         data <-
           self$history$get_cumulative_data(
-            limit_cols   = c("agent", "t", line_data_name, disp_data_name),
+            limit_cols   = c("agent", "t", "sims", line_data_name, disp_data_name),
             limit_agents = limit_agents,
             interval     = interval
           )
@@ -481,12 +500,28 @@ Plot <- R6::R6Class(
         disp <- NULL
         data <-
           self$history$get_cumulative_data(
-            limit_cols   = c("agent", "t", line_data_name),
+            limit_cols   = c("agent", "t", "sims", line_data_name),
             limit_agents = limit_agents,
             interval     = interval
           )
       }
 
+      agent_levels <- levels(droplevels(data$agent))
+      n_agents <- length(agent_levels)
+
+      # turn NA into 0
+      for (j in seq_len(ncol(data)))
+        data.table::set(data,which(is.na(data[[j]])),j,0)
+
+      if(isTRUE(trunc_per_agent))  {
+        data <- data[data$sims == max(data$sims)]
+      }
+
+      if(isTRUE(trunc_over_agents))  {
+        min_t_sim <- min(data[,max(t), by = c("agent")]$V1)
+        data <- data[t<=min_t_sim]
+      }
+
       if (!is.null(xlim)) {
         min_xlim <- xlim[1]
         max_xlim <- xlim[2]
@@ -495,9 +530,6 @@ Plot <- R6::R6Class(
         max_xlim <- data[, max(t)]
       }
 
-      agent_levels <- levels(droplevels(data$agent))
-      n_agents <- length(agent_levels)
-
       data.table::setorder(data, agent, t)
 
       if(cum_flip==TRUE) {
@@ -709,10 +741,10 @@ Plot <- R6::R6Class(
 
 #' Plot
 #'
-#' Generates plots from \code{History} data.
+#' Generates plots from \code{\link{History}} data.
 #'
 #' Usually not instantiated directly but invoked by calling the generic \code{plot(h)}, where \code{h}
-#' is an \code{History} class instance.
+#' is an \code{\link{History}} class instance.
 #'
 #' @name Plot
 #' @aliases average optimal arms do_plot gg_color_hue check_history_data
@@ -834,6 +866,14 @@ Plot <- R6::R6Class(
 #'   \item{\code{ylab}}{
 #'      \code{(character, NULL)} a title for the y axis
 #'   }
+#'   \item{\code{trunc_over_agents}}{
+#'      \code{(logical , TRUE)} Truncate the chart to the agent with the fewest time steps t.
+#'   }
+#'   \item{\code{trunc_per_agent}}{
+#'      \code{(logical , TRUE)} Truncate every agent's plot to the number of time steps that have been fully
+#'      simulated. That is, time steps for which the number of simulations equals the number defined in
+#'      \code{\link{Simulator}}'s \code{simulations} parameter.
+#'   }
 #'  }
 #'
 #'
diff --git a/R/simulator.R b/R/simulator.R
@@ -251,9 +251,6 @@ Simulator <- R6::R6Class(
       self$internal_history$set_meta_data("sim_total_duration", formatted_duration)
       message(paste0("Completed simulation in ",formatted_duration))
 
-      # TODO: this should be optional, and maybe done at plotside?
-      self$internal_history$truncate()
-
       start_time_stats <- Sys.time()
       message("Computing statistics.")
       # update statistics TODO: not always necessary, add option arg to class?
diff --git a/contextual.Rproj b/contextual.Rproj
@@ -19,6 +19,6 @@ BuildType: Package
 PackageUseDevtools: Yes
 PackageInstallArgs: --no-multiarch --with-keep.source
 PackageCheckArgs: --as-cran
-PackageRoxygenize: rd,collate,namespace,vignette
+PackageRoxygenize: vignette
 
 QuitChildProcessesOnExit: Yes
diff --git a/demo/replication_kruijswijk_2019/bandit_continuum_offon.R b/demo/replication_kruijswijk_2019/bandit_continuum_offon.R
@@ -0,0 +1,48 @@
+#' @export
+OnlineOfflineContinuumBandit <- R6::R6Class(
+  inherit = Bandit,
+  class = FALSE,
+  private = list(
+    S = NULL
+  ),
+  public = list(
+    class_name = "OnlineOfflineContinuumBandit",
+    delta = NULL,
+    c1 = NULL,
+    c2 = NULL,
+    arm_function = NULL,
+    choice = NULL,
+    initialize   = function(delta, horizon) {
+      self$c1 <- runif(1,0.25,0.75)
+      self$c2 <- runif(1,0.25,0.75)
+      self$arm_function <- function(x, c1 = 0.25, c2 = 0.75) {
+        -(x - c1) ^ 2 + c2  + rnorm(length(x), 0, 0.01)
+      }
+      self$delta <- delta
+      self$choice <- runif(horizon, min=0, max=1)
+      private$S <- data.frame(self$choice, self$arm_function(self$choice, self$c1, self$c2))
+      self$k <- 1
+    },
+    post_initialization = function() {
+      private$S <- private$S[sample(nrow(private$S)),]
+      colnames(private$S) <- c('choice', 'reward')
+      #print(private$S)
+    },
+    get_context = function(index) {
+      context           <- list()
+      context$k         <- self$k
+      context
+    },
+    get_reward = function(index, context, action) {
+      reward_at_index <- as.double(private$S$reward[[index]])
+      if (abs(private$S$choice[[index]] - action$choice) < self$delta) {
+        reward <- list(
+          reward = reward_at_index,
+          optimal_reward = self$c2
+        )
+      } else {
+        NULL
+      }
+    }
+  )
+)
diff --git a/demo/replication_kruijswijk_2019/lif_offline.R b/demo/replication_kruijswijk_2019/lif_offline.R
@@ -0,0 +1,49 @@
+library(contextual)
+library(here)
+setwd(here("demo","replication_kruijswijk_2019"))
+
+source("./bandit_continuum_offon.R")
+source("./policy_tbl.R")
+source("./policy_unifcont.R")
+source("./policy_efirst_regression.R")
+
+set.seed(100)
+
+
+
+horizon            <- 10000
+simulations        <- 10
+
+continuous_arms    <- function(x, c1 = 0.25, c2 = 0.75) {
+  -(x - c1) ^ 2 + c2  + rnorm(length(x), 0, 0.01)
+}
+
+
+
+choice <- runif(horizon, min=0, max=1)
+reward <- continuous_arms(choice)
+offline_data <- data.frame(choice, reward)
+
+int_time           <- 50
+amplitude          <- 0.05
+learn_rate         <- 1
+omega              <- 1#2*pi/int_time
+x0_start           <- runif(1)#2.0
+
+
+
+bandit             <- OnlineOfflineContinuumBandit$new(delta = 0.1, horizon = horizon)
+
+
+agents              <- list(Agent$new(UniformRandomContinuousPolicy$new(), bandit),
+                            Agent$new(ThompsonBayesianLinearPolicy$new(), bandit))
+                            #Agent$new(LifPolicy$new(int_time, amplitude, learn_rate, omega, x0_start), bandit),
+                            #Agent$new(EFirstRegressionPolicy$new(epsilon = 100), bandit))
+
+
+history            <- Simulator$new(agents      = agents,
+                                    horizon     = horizon,
+                                    simulations = simulations,
+                                    do_parallel = TRUE)$run()
+
+plot(history, type = "cumulative", regret = TRUE, rate = FALSE, disp = 'ci', trunc_over_agents = FALSE, trunc_per_agent = FALSE)
diff --git a/demo/replication_kruijswijk_2019/policy_efirst_regression.R b/demo/replication_kruijswijk_2019/policy_efirst_regression.R
diff --git a/demo/replication_kruijswijk_2019/policy_tbl.R b/demo/replication_kruijswijk_2019/policy_tbl.R
diff --git a/demo/replication_kruijswijk_2019/policy_unifcont.R b/demo/replication_kruijswijk_2019/policy_unifcont.R