Nth-iteration-labs
diff --git a/‎R/policy_cont_lif.R
Lines changed: 4 additions & 4 deletions b/‎R/policy_cont_lif.R
Lines changed: 4 additions & 4 deletions
diff --git a/‎demo/demo_offline_doubly_robust.R
Lines changed: 10 additions & 24 deletions b/‎demo/demo_offline_doubly_robust.R
Lines changed: 10 additions & 24 deletions
diff --git a/‎demo/replication_kruijswijk_2019/lif_offline.R
Lines changed: 2 additions & 1 deletion b/‎demo/replication_kruijswijk_2019/lif_offline.R
Lines changed: 2 additions & 1 deletion
diff --git a/‎demo/replication_kruijswijk_2019/policy_efirst_regression.R
Lines changed: 7 additions & 3 deletions b/‎demo/replication_kruijswijk_2019/policy_efirst_regression.R
Lines changed: 7 additions & 3 deletions
diff --git a/‎demo/replication_kruijswijk_2019/policy_tbl.R
Lines changed: 3 additions & 3 deletions b/‎demo/replication_kruijswijk_2019/policy_tbl.R
Lines changed: 3 additions & 3 deletions
diff --git a/‎demo/replication_van_emden_2018/section_3_2_2.R
Lines changed: 7 additions & 10 deletions b/‎demo/replication_van_emden_2018/section_3_2_2.R
Lines changed: 7 additions & 10 deletions
diff --git a/‎demo/replication_van_emden_2018/section_5_4.R
Lines changed: 3 additions & 3 deletions b/‎demo/replication_van_emden_2018/section_5_4.R
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs/articles/arxiv_2018/Sweave.sty
Lines changed: 0 additions & 53 deletions b/‎docs/articles/arxiv_2018/Sweave.sty
Lines changed: 0 additions & 53 deletions
@@ -20,18 +20,18 @@ LifPolicy <- R6::R6Class(
       self$x0_start  <- x0_start
     },
     set_parameters = function(context_params) {
-      self$theta_to_arms <- list('x0' = x0_start, 'Y' = rep(NA, inttime))
+      self$theta <- list('x0' = x0_start, 'Y' = rep(NA, inttime))
     },
     get_action = function(t, context) {
-      action$choice <- self$theta$x0[[1]] + amplitude*cos(omega * t)
+      action$choice <- self$theta$x0 + amplitude*cos(omega * t)
       action
     },
     set_reward = function(t, context, action, reward) {
       reward   <- reward$reward
       y <- amplitude*cos(omega * t)*reward
-      self$theta$Y[[1]] <- c(y, self$theta$Y[[1]])[seq_along(self$theta$Y[[1]])]
+      self$theta$Y <- c(y, self$theta$Y)[seq_along(self$theta$Y)]
       if (t > inttime)
-        self$theta$x0[[1]] <- self$theta$x0[[1]] + learnrate * sum( self$theta$Y[[1]] ) / inttime
+        self$theta$x0 <- self$theta$x0 + learnrate * sum( self$theta$Y ) / inttime
       self$theta
     }
   )
 
@@ -2,9 +2,7 @@ library(contextual)
 library(data.table)
 
 # Import myocardial infection dataset
-
-url  <- "http://d1ie9wlkzugsxr.cloudfront.net/data_propensity/myocardial_propensity.csv"
-data            <- fread(url)
+data  <- fread("http://d1ie9wlkzugsxr.cloudfront.net/data_propensity/myocardial_propensity.csv")
 
 simulations     <- 3000
 horizon         <- nrow(data)
@@ -16,9 +14,7 @@ data$trt        <- data$trt + 1
 data$alive      <- abs(data$death - 1)
 
 # Run regression per arm, predict outcomes, and save results, a column per arm
-
 f                <- alive ~ age + risk + severity
-
 model_f          <- function(arm) glm(f, data=data[trt==arm],
                                       family=binomial(link="logit"),
                                       y=FALSE, model=FALSE)
@@ -31,31 +27,21 @@ r_data           <- do.call(cbind, r_data)
 colnames(r_data) <- paste0("r", (1:max(arms)))
 
 # Bind data and model predictions
-
-data             <- cbind(data,r_data)
+data       <- cbind(data,r_data)
 
 # calculate propensity weights
+m          <- glm(I(trt-1) ~ age + risk + severity,
+                  data=data, family=binomial(link="logit"))
+data$p     <- predict(m, type = "response")
 
-m      <- glm(I(trt-1) ~ age + risk + severity, data=data, family=binomial(link="logit"))
-data$p <-predict(m, type = "response")
-
-# run bandit - when leaving out P1, Doubly Robust Bandit uses marginal prob per arm for propensities:
-# table(private$z)/length(private$z)
-
-f          <- alive ~ trt | age + risk + severity | r1 + r2  | p
+# formula notation of dataset:
+# (without p, doublyrobustbandit uses marginal prob per arm for propensities)
+f           <- alive ~ trt | age + risk + severity | r1 + r2  | p
 
-bandit     <- OfflineDoublyRobustBandit$new(formula = f, data = data)
-
-# Define agents.
+bandit      <- OfflineDoublyRobustBandit$new(formula = f, data = data)
 agents      <- list(Agent$new(LinUCBDisjointOptimizedPolicy$new(0.2), bandit, "LinUCB"))
-
-# Initialize the simulation.
-
-simulation  <- Simulator$new(agents = agents, simulations = simulations, horizon = horizon)
-
-# Run the simulation.
+simulation  <- Simulator$new(agents, horizon, simulations)
 sim  <- simulation$run()
-
 # plot the results
 plot(sim, type = "cumulative", regret = FALSE, rate = TRUE, legend_position = "bottomright")
 plot(sim, type = "arms", limit_agents = "LinUCB")
@@ -46,4 +46,5 @@ history            <- Simulator$new(agents      = agents,
                                     simulations = simulations,
                                     do_parallel = TRUE)$run()
 
-plot(history, type = "cumulative", regret = TRUE, rate = FALSE, disp = 'ci', trunc_over_agents = FALSE, trunc_per_agent = FALSE)
+plot(history, type = "cumulative", regret = TRUE,
+     rate = FALSE, disp = 'ci', trunc_over_agents = FALSE, trunc_per_agent = FALSE)
@@ -7,18 +7,21 @@ EFirstRegressionPolicy <- R6::R6Class(
     b = NULL,
     A = NULL,
     epsilon = NULL,
-    initialize = function(b = matrix(c(0, 0.025, -0.025), nrow=1, ncol=3, byrow = TRUE), 
-                          A = matrix(diag(c(2,2,5)), nrow=3, ncol=3, byrow = TRUE), 
+    initialize = function(b = matrix(c(0, 0.025, -0.025), nrow=1, ncol=3, byrow = TRUE),
+                          A = matrix(diag(c(2,2,5)), nrow=3, ncol=3, byrow = TRUE),
                           epsilon) {
       super$initialize()
       self$b <- b
       self$A <- A
       self$epsilon <- epsilon
+      print("initialize")
     },
     set_parameters = function(context_params) {
       self$theta <- list('b' = self$b, 'A' = self$A, 'epsilon' = self$epsilon)
+      print("set_parameters")
     },
     get_action = function(t, context) {
+      print("get_action")
       if(t <= epsilon){
         action$choice <- runif(1)
       } else {
@@ -29,6 +32,7 @@ EFirstRegressionPolicy <- R6::R6Class(
       action
     },
     set_reward = function(t, context, action, reward) {
+      print("set_reward")
         y <- reward$reward
         x <- action$choice
         x <- matrix(c(1,x,x^2), nrow = 1, ncol = 3, byrow = TRUE)
@@ -37,4 +41,4 @@ EFirstRegressionPolicy <- R6::R6Class(
         self$theta
     }
   )
-)
+)
@@ -7,8 +7,8 @@ ThompsonBayesianLinearPolicy <- R6::R6Class(
     J = NULL,
     P = NULL,
     err = NULL,
-    initialize = function(J = matrix(c(0, 0.025, -0.025), nrow=1, ncol=3, byrow = TRUE), 
-                          P = matrix(diag(c(2,2,5)), nrow=3, ncol=3, byrow = TRUE), 
+    initialize = function(J = matrix(c(0, 0.025, -0.025), nrow=1, ncol=3, byrow = TRUE),
+                          P = matrix(diag(c(2,2,5)), nrow=3, ncol=3, byrow = TRUE),
                           err=1) {
       super$initialize()
       self$J <- J
@@ -34,4 +34,4 @@ ThompsonBayesianLinearPolicy <- R6::R6Class(
       self$theta
     }
   )
-)
+)
@@ -18,16 +18,15 @@ eg_agent <- Agent$new(eg_policy, bandit)
 # Assign both agents to a list.
 
 ##################################################################################################
+#                        +-----+----+----------->  arms:  three ads
+#                        |     |    |
+click_probs <- matrix(c(0.5,  0.7, 0.1,  # -> context 1: older (p=.5)
+                        0.7,  0.1, 0.3), # -> context 2: young (p=.5)
 
-#                                    +-----+----+----------->  arms:      three ads
-#                                    |     |    |
-click_probabilities <- matrix(  c(  0.5,  0.7, 0.1,     # -->  context 1: older (p=.5)
-                                    0.7,  0.1, 0.3   ), # -->  context 2: young (p=.5)
-
-                                nrow = 2, ncol = 3, byrow = TRUE)
+                      nrow = 2, ncol = 3, byrow = TRUE)
 
 # Initialize a SyntheticBandit with contextual weights
-context_bandit <- ContextualBernoulliBandit$new(weights = click_probabilities)
+context_bandit <- ContextualBernoulliBandit$new(weights = click_probs)
 # Initialize LinUCBDisjointPolicy
 lucb_policy    <- LinUCBDisjointPolicy$new(0.6)
 # Initialize three Agents, binding each policy to a bandit.
@@ -36,12 +35,10 @@ eg_agent       <- Agent$new(eg_policy,   context_bandit)
 lucb_agent     <- Agent$new(lucb_policy, context_bandit)
 # Assign all agents to a list.
 agents <- list(ef_agent, eg_agent, lucb_agent)
-# Initialize a Simulator with the agent list, horizon, and number of simulations.
+# Initialize a Simulator with the agent list, horizon, and nr of simulations
 simulator <- Simulator$new(agents, horizon, simulations)
 # Now run the simulator.
 history <- simulator$run()
-# And plot the cumulative reward rate again.
-plot(history, type = "cumulative", regret = FALSE, rate = TRUE)
 
 par(mfrow = c(1, 2), mar = c(2,4,1,1) , cex=1.4)
 # Finally, plot the average reward per time step t
 
@@ -27,8 +27,8 @@ plot(history, type = "cumulative", legend_border = FALSE, no_par = TRUE )
 plot(history, type = "arms",  limit_agents = c("LinUCB"), no_par = TRUE)
 plot(history, type = "arms",  limit_agents = c("EGreedy"), no_par = TRUE)
 
-plot(history, type = "arms",  limit_agents = c("LinUCB"), limit_context = 1, no_par = TRUE)
-plot(history, type = "arms",  limit_agents = c("LinUCB"), limit_context = 2, no_par = TRUE )
-plot(history, type = "arms",  limit_agents = c("LinUCB"), limit_context = 3, no_par = TRUE )
+plot(history, type = "arms",  limit_agents = c("LinUCB"), limit_context = c("X.1"), no_par = TRUE)
+plot(history, type = "arms",  limit_agents = c("LinUCB"), limit_context = c("X.2"), no_par = TRUE )
+plot(history, type = "arms",  limit_agents = c("LinUCB"), limit_context = c("X.3"), no_par = TRUE )
 
 par(mfrow = c(1, 1))