Skip to content

Commit 16e98b1

Browse files
committed
Documentation update
1 parent cf8f741 commit 16e98b1

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+3132
-2864
lines changed

R/policy_cont_lif.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,18 @@ LifPolicy <- R6::R6Class(
2020
self$x0_start <- x0_start
2121
},
2222
set_parameters = function(context_params) {
23-
self$theta_to_arms <- list('x0' = x0_start, 'Y' = rep(NA, inttime))
23+
self$theta <- list('x0' = x0_start, 'Y' = rep(NA, inttime))
2424
},
2525
get_action = function(t, context) {
26-
action$choice <- self$theta$x0[[1]] + amplitude*cos(omega * t)
26+
action$choice <- self$theta$x0 + amplitude*cos(omega * t)
2727
action
2828
},
2929
set_reward = function(t, context, action, reward) {
3030
reward <- reward$reward
3131
y <- amplitude*cos(omega * t)*reward
32-
self$theta$Y[[1]] <- c(y, self$theta$Y[[1]])[seq_along(self$theta$Y[[1]])]
32+
self$theta$Y <- c(y, self$theta$Y)[seq_along(self$theta$Y)]
3333
if (t > inttime)
34-
self$theta$x0[[1]] <- self$theta$x0[[1]] + learnrate * sum( self$theta$Y[[1]] ) / inttime
34+
self$theta$x0 <- self$theta$x0 + learnrate * sum( self$theta$Y ) / inttime
3535
self$theta
3636
}
3737
)

demo/demo_offline_doubly_robust.R

Lines changed: 10 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,7 @@ library(contextual)
22
library(data.table)
33

44
# Import myocardial infection dataset
5-
6-
url <- "http://d1ie9wlkzugsxr.cloudfront.net/data_propensity/myocardial_propensity.csv"
7-
data <- fread(url)
5+
data <- fread("http://d1ie9wlkzugsxr.cloudfront.net/data_propensity/myocardial_propensity.csv")
86

97
simulations <- 3000
108
horizon <- nrow(data)
@@ -16,9 +14,7 @@ data$trt <- data$trt + 1
1614
data$alive <- abs(data$death - 1)
1715

1816
# Run regression per arm, predict outcomes, and save results, a column per arm
19-
2017
f <- alive ~ age + risk + severity
21-
2218
model_f <- function(arm) glm(f, data=data[trt==arm],
2319
family=binomial(link="logit"),
2420
y=FALSE, model=FALSE)
@@ -31,31 +27,21 @@ r_data <- do.call(cbind, r_data)
3127
colnames(r_data) <- paste0("r", (1:max(arms)))
3228

3329
# Bind data and model predictions
34-
35-
data <- cbind(data,r_data)
30+
data <- cbind(data,r_data)
3631

3732
# calculate propensity weights
33+
m <- glm(I(trt-1) ~ age + risk + severity,
34+
data=data, family=binomial(link="logit"))
35+
data$p <- predict(m, type = "response")
3836

39-
m <- glm(I(trt-1) ~ age + risk + severity, data=data, family=binomial(link="logit"))
40-
data$p <-predict(m, type = "response")
41-
42-
# run bandit - when leaving out P1, Doubly Robust Bandit uses marginal prob per arm for propensities:
43-
# table(private$z)/length(private$z)
44-
45-
f <- alive ~ trt | age + risk + severity | r1 + r2 | p
37+
# formula notation of dataset:
38+
# (without p, doublyrobustbandit uses marginal prob per arm for propensities)
39+
f <- alive ~ trt | age + risk + severity | r1 + r2 | p
4640

47-
bandit <- OfflineDoublyRobustBandit$new(formula = f, data = data)
48-
49-
# Define agents.
41+
bandit <- OfflineDoublyRobustBandit$new(formula = f, data = data)
5042
agents <- list(Agent$new(LinUCBDisjointOptimizedPolicy$new(0.2), bandit, "LinUCB"))
51-
52-
# Initialize the simulation.
53-
54-
simulation <- Simulator$new(agents = agents, simulations = simulations, horizon = horizon)
55-
56-
# Run the simulation.
43+
simulation <- Simulator$new(agents, horizon, simulations)
5744
sim <- simulation$run()
58-
5945
# plot the results
6046
plot(sim, type = "cumulative", regret = FALSE, rate = TRUE, legend_position = "bottomright")
6147
plot(sim, type = "arms", limit_agents = "LinUCB")

demo/replication_kruijswijk_2019/lif_offline.R

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,4 +46,5 @@ history <- Simulator$new(agents = agents,
4646
simulations = simulations,
4747
do_parallel = TRUE)$run()
4848

49-
plot(history, type = "cumulative", regret = TRUE, rate = FALSE, disp = 'ci', trunc_over_agents = FALSE, trunc_per_agent = FALSE)
49+
plot(history, type = "cumulative", regret = TRUE,
50+
rate = FALSE, disp = 'ci', trunc_over_agents = FALSE, trunc_per_agent = FALSE)

demo/replication_kruijswijk_2019/policy_efirst_regression.R

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,21 @@ EFirstRegressionPolicy <- R6::R6Class(
77
b = NULL,
88
A = NULL,
99
epsilon = NULL,
10-
initialize = function(b = matrix(c(0, 0.025, -0.025), nrow=1, ncol=3, byrow = TRUE),
11-
A = matrix(diag(c(2,2,5)), nrow=3, ncol=3, byrow = TRUE),
10+
initialize = function(b = matrix(c(0, 0.025, -0.025), nrow=1, ncol=3, byrow = TRUE),
11+
A = matrix(diag(c(2,2,5)), nrow=3, ncol=3, byrow = TRUE),
1212
epsilon) {
1313
super$initialize()
1414
self$b <- b
1515
self$A <- A
1616
self$epsilon <- epsilon
17+
print("initialize")
1718
},
1819
set_parameters = function(context_params) {
1920
self$theta <- list('b' = self$b, 'A' = self$A, 'epsilon' = self$epsilon)
21+
print("set_parameters")
2022
},
2123
get_action = function(t, context) {
24+
print("get_action")
2225
if(t <= epsilon){
2326
action$choice <- runif(1)
2427
} else {
@@ -29,6 +32,7 @@ EFirstRegressionPolicy <- R6::R6Class(
2932
action
3033
},
3134
set_reward = function(t, context, action, reward) {
35+
print("set_reward")
3236
y <- reward$reward
3337
x <- action$choice
3438
x <- matrix(c(1,x,x^2), nrow = 1, ncol = 3, byrow = TRUE)
@@ -37,4 +41,4 @@ EFirstRegressionPolicy <- R6::R6Class(
3741
self$theta
3842
}
3943
)
40-
)
44+
)

demo/replication_kruijswijk_2019/policy_tbl.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ ThompsonBayesianLinearPolicy <- R6::R6Class(
77
J = NULL,
88
P = NULL,
99
err = NULL,
10-
initialize = function(J = matrix(c(0, 0.025, -0.025), nrow=1, ncol=3, byrow = TRUE),
11-
P = matrix(diag(c(2,2,5)), nrow=3, ncol=3, byrow = TRUE),
10+
initialize = function(J = matrix(c(0, 0.025, -0.025), nrow=1, ncol=3, byrow = TRUE),
11+
P = matrix(diag(c(2,2,5)), nrow=3, ncol=3, byrow = TRUE),
1212
err=1) {
1313
super$initialize()
1414
self$J <- J
@@ -34,4 +34,4 @@ ThompsonBayesianLinearPolicy <- R6::R6Class(
3434
self$theta
3535
}
3636
)
37-
)
37+
)

demo/replication_van_emden_2018/section_3_2_2.R

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,15 @@ eg_agent <- Agent$new(eg_policy, bandit)
1818
# Assign both agents to a list.
1919

2020
##################################################################################################
21+
# +-----+----+-----------> arms: three ads
22+
# | | |
23+
click_probs <- matrix(c(0.5, 0.7, 0.1, # -> context 1: older (p=.5)
24+
0.7, 0.1, 0.3), # -> context 2: young (p=.5)
2125

22-
# +-----+----+-----------> arms: three ads
23-
# | | |
24-
click_probabilities <- matrix( c( 0.5, 0.7, 0.1, # --> context 1: older (p=.5)
25-
0.7, 0.1, 0.3 ), # --> context 2: young (p=.5)
26-
27-
nrow = 2, ncol = 3, byrow = TRUE)
26+
nrow = 2, ncol = 3, byrow = TRUE)
2827

2928
# Initialize a SyntheticBandit with contextual weights
30-
context_bandit <- ContextualBernoulliBandit$new(weights = click_probabilities)
29+
context_bandit <- ContextualBernoulliBandit$new(weights = click_probs)
3130
# Initialize LinUCBDisjointPolicy
3231
lucb_policy <- LinUCBDisjointPolicy$new(0.6)
3332
# Initialize three Agents, binding each policy to a bandit.
@@ -36,12 +35,10 @@ eg_agent <- Agent$new(eg_policy, context_bandit)
3635
lucb_agent <- Agent$new(lucb_policy, context_bandit)
3736
# Assign all agents to a list.
3837
agents <- list(ef_agent, eg_agent, lucb_agent)
39-
# Initialize a Simulator with the agent list, horizon, and number of simulations.
38+
# Initialize a Simulator with the agent list, horizon, and nr of simulations
4039
simulator <- Simulator$new(agents, horizon, simulations)
4140
# Now run the simulator.
4241
history <- simulator$run()
43-
# And plot the cumulative reward rate again.
44-
plot(history, type = "cumulative", regret = FALSE, rate = TRUE)
4542

4643
par(mfrow = c(1, 2), mar = c(2,4,1,1) , cex=1.4)
4744
# Finally, plot the average reward per time step t

demo/replication_van_emden_2018/section_5_4.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ plot(history, type = "cumulative", legend_border = FALSE, no_par = TRUE )
2727
plot(history, type = "arms", limit_agents = c("LinUCB"), no_par = TRUE)
2828
plot(history, type = "arms", limit_agents = c("EGreedy"), no_par = TRUE)
2929

30-
plot(history, type = "arms", limit_agents = c("LinUCB"), limit_context = 1, no_par = TRUE)
31-
plot(history, type = "arms", limit_agents = c("LinUCB"), limit_context = 2, no_par = TRUE )
32-
plot(history, type = "arms", limit_agents = c("LinUCB"), limit_context = 3, no_par = TRUE )
30+
plot(history, type = "arms", limit_agents = c("LinUCB"), limit_context = c("X.1"), no_par = TRUE)
31+
plot(history, type = "arms", limit_agents = c("LinUCB"), limit_context = c("X.2"), no_par = TRUE )
32+
plot(history, type = "arms", limit_agents = c("LinUCB"), limit_context = c("X.3"), no_par = TRUE )
3333

3434
par(mfrow = c(1, 1))

docs/articles/arxiv_2018/Sweave.sty

Lines changed: 0 additions & 53 deletions
This file was deleted.

0 commit comments

Comments
 (0)