kkholst · kkholst · Jun 3, 2025 · Jan 21, 2025 · Feb 4, 2025 · Feb 4, 2025
diff --git a/R/design.R b/R/design.R
@@ -44,45 +44,66 @@ model.extract2 <- function(frame, component) {
 #' @param specials.call (call) specials optionally defined as a call-type
 #' @param xlev a named list of character vectors giving the full set of levels
 #'   to be assumed for each factor
+#' @param design.matrix (logical) if FALSE then only response and specials are
+#'   returned. Otherwise, the design.matrix `x` is als part of the returned
+#'   object.
 #' @return An object of class 'design'
 #' @author Klaus Kähler Holst
 #' @export
 design <- function(formula, data, ..., # nolint
                    intercept = FALSE,
                    response = FALSE,
                    rm_envir = FALSE,
-                   specials = c("weights", "offset"),
+                   specials = NULL,
                    specials.call = NULL,
-                   xlev = NULL) {
-  tt <- terms(formula, data = data, specials = specials)
+                   xlev = NULL,
+                   design.matrix = TRUE) {
   dots <- substitute(list(...))
   if ("subset" %in% names(dots)) stop(
     "subset is not an allowed specials argument for targeted::design"
   )
-  mf <- model.frame(tt,
-    data = data, ...,
-    xlev = xlev,
-    drop.unused.levels = FALSE
-    )
-  mf <- model.frame(tt, data=data, ...)
+  tt <- terms(formula, data = data, specials = specials)
+
+  if (!design.matrix) { # only extract specials, response
+    des <- attr(tt, "factors")
+    sterm.list <- c()
+    for (s in specials) {
+      sterm <- rownames(des)[attr(tt, "specials")[[s]]]
+      sterm.list <- c(sterm.list, sterm)
+    }
+    fs <- update(formula, ~1)
+    if (length(sterm.list) > 0) {
+      upd <- paste(" ~ . - ", paste(sterm.list, collapse = " - "))
+      fs <- reformulate(paste(sterm.list, collapse = " + "))
+      fs <- update(formula, fs)
+      formula <- update(formula, upd)
+    }
+    mf <- model.frame(fs, data=data, ...)
+  } else { # also extract design matrix
+    mf <- model.frame(tt,
+                      data = data, ...,
+                      xlev = xlev,
+                      drop.unused.levels = FALSE
+                      )
+    if (is.null(xlev)) {
+      xlev <- .getXlevels(tt, mf)
+    }
+    xlev0 <- xlev
+  }
 
   y <- model.response(mf, type = "any")
-  # delete response to generate design matrix when creating making predictions
-  if (!response) tt <- delete.response(tt)
+  has_intercept <- attr(tt, "intercept") == 1L
   specials <- union(
     specials,
     names(dots)[-1] # removing "" at first position when calling dots, which
   ) # is a call object
-  if (is.null(xlev)) {
-    xlev <- .getXlevels(tt, mf)
-  }
-  xlev0 <- xlev
+
   term.labels <- attr(tt, "term.labels") # predictors
   specials.list <- c()
   if (length(specials) > 0) {
     des <- attr(tt, "factors")
-
     sterm.list <- c()
+
     for (s in specials) {
       w <- eval(substitute(model.extract2(mf, s), list(s = s)))
       specials.list <- c(specials.list, list(w))
@@ -91,20 +112,32 @@ design <- function(formula, data, ..., # nolint
     }
     names(specials.list) <- specials
     if (length(sterm.list) > 0) {
-      upd <- paste(" ~ . - ", paste(sterm.list, collapse = " - "))
-      reformulate
-      tmp.terms <- update(tt, upd) |> terms()
-      xlev0 <- .getXlevels(tmp.terms, mf)
-      mf <- model.frame(tmp.terms,
-        data = data, ...,
-        xlev = xlev0,
-        drop.unused.levels = FALSE
-      )
+      if ((nrow(attr(tt, "factors")) - attr(tt, "response")) ==
+          length(sterm.list)) {
+        # only specials on the rhs, remove everything
+        formula <- update(formula, ~1)
+      } else {
+        # remove specials from formula
+        formula <- drop.terms(tt,
+                              unlist(attr(tt, "specials")) -
+                                     attr(tt, "response"),
+                              keep.response = TRUE)
+      }
+      if (design.matrix) {
+        xlev0[sterm.list] <- NULL
+        mf <- model.frame(formula,
+                          data = data, ...,
+                          xlev = xlev0,
+                          drop.unused.levels = FALSE
+                          )
         # predictors without the specials
-      term.labels <- setdiff(term.labels,
-                             unlist(sterm.list))
+        term.labels <- setdiff(term.labels,
+                               unlist(sterm.list))
+
+      }
     }
   }
+
   if (!is.null(specials.call)) {
     specials.list2 <- eval(specials.call, data)
     for (n in names(specials.list2)) {
@@ -114,22 +147,31 @@ design <- function(formula, data, ..., # nolint
     }
   }
 
-  x <- model.matrix(mf, data = data, xlev = xlev0)
-  has_intercept <- attr(tt, "intercept") == 1L
-  if (!intercept && has_intercept) {
-    has_intercept <- FALSE
-    x <- x[, -1, drop = FALSE]
+  if (design.matrix) {
+    x <- model.matrix(mf, data = data, xlev = xlev0)
+    if (!intercept && has_intercept) {
+      has_intercept <- FALSE
+      x <- x[, -1, drop = FALSE]
+    }
+  } else {
+    term.labels <- NULL
+    x <- NULL
   }
 
+  # delete response to generate design matrix when making predictions
+  if (!response) tt <- delete.response(tt)
+
   if (rm_envir) attr(tt, ".Environment") <- NULL
   if (is.null(specials.call)) specials.call <- dots
 
   res <- c(
     list(
+      formula = formula, # formula without specials
       terms = tt,
       term.labels = term.labels,
       xlevels = xlev,
       x = x, y = y,
+      design.matrix = design.matrix,
       intercept = has_intercept,
       data = data[0, ], ## Empty data.frame to capture structure of data
       specials = specials,
@@ -146,6 +188,7 @@ update.design <- function(object, data = NULL, ...) {
   return(
     design(object$terms,
       data = data,
+      design.matrix = object$design.matrix,
       xlev = object$xlevels,
       intercept = object$intercept,
       specials = object$specials,
@@ -172,7 +215,7 @@ terms.design <- function(x, specials, ...) {
 
 #' @export
 summary.design <- function(object, ...) {
-  object$x <- object$x[0, ]
+  object$x <- object$x[0, , drop=FALSE]
   object$y <- NULL
   for (i in object$specials) object[[i]] <- NULL
   return(object)
@@ -182,7 +225,11 @@ summary.design <- function(object, ...) {
 print.design <- function(x, n=2, ...) {
   cat_ruler(" design object ", 10)
   cat(sprintf("\nresponse (length: %s)", length(x$y)))
-  lava::Print(x$y, n = n, ...)
+  if (length(x$y) > 0) {
+    lava::Print(x$y, n = n, ...)
+  } else {
+    cat("\n")
+  }
   specials <- c()
   for (nam in x$specials) {
     if (!is.null(x[[nam]])) {
@@ -197,7 +244,11 @@ print.design <- function(x, n=2, ...) {
     cat("\n")
   }
   cat(sprintf("\ndesign matrix (dim: %s)\n", paste0(dim(x$x), collapse = ", ")))
-  lava::Print(x$x, n = n, ...)
+  if (NROW(x$x) > 0) {
+    lava::Print(x$x, n = n, ...)
+  } else {
+    print(x$x)
+  }
   return(invisible(x))
 }
 

diff --git a/R/learner.R b/R/learner.R
@@ -80,6 +80,9 @@ learner <- R6::R6Class("learner", # nolint
     #' @param estimate.args optional arguments to estimate function
     #' @param specials optional specials terms (weights, offset,
     #'  id, subset, ...) passed on to [targeted::design]
+    #' @param formula.keep.specials if TRUE then special terms defined by
+    #' `specials` will be removed from the formula before it is being passed to
+    #' the estimate print.function()
     #' @param intercept (logical) include intercept in design matrix
     initialize = function(formula = NULL,
                           estimate,
@@ -88,6 +91,7 @@ learner <- R6::R6Class("learner", # nolint
                           estimate.args = NULL,
                           info = NULL,
                           specials = c(),
+                          formula.keep.specials = FALSE,
                           intercept = FALSE
                          ) {
       estimate <- add_dots(estimate)
@@ -115,11 +119,26 @@ learner <- R6::R6Class("learner", # nolint
       } else {
         if (fit_formula) { # Formula in arguments of estimation procedure
           private$fitfun <- function(data, ...) {
-            args <- private$update_args(private$estimate.args, ...)
+            des <- do.call(
+              targeted::design,
+              c(list(formula = private$.formula,
+                     data = data,
+                     design.matrix = FALSE),
+                private$des.args
+                )
+            )
+            args <- private$update_args(private$estimate.args, ...) #
+            form <- private$.formula
+            if (!private$formula.keep.specials) form <- des$formula
             args <- c(
-              args, list(formula = private$.formula, data = data)
+              args, list(formula = form, data = data)
             )
-            return(do.call(private$init.estimate, args))
+            if (length(des$specials) > 0) {
+              args <- c(args, des[des$specials])
+            }
+            return(structure(do.call(private$init.estimate, args),
+                             design = summary(des)
+                             ))
           }
         } else {
           #  Formula automatically processed into design matrix & response
@@ -140,7 +159,7 @@ learner <- R6::R6Class("learner", # nolint
           }
         }
         private$predfun <- function(object, data, ...) {
-          if (fit_formula || no_formula) {
+          if (no_formula) {
             predict_args_call <- private$update_args(predict.args, ...)
             args <- c(list(object, newdata = data), predict_args_call)
           } else {
@@ -151,15 +170,19 @@ learner <- R6::R6Class("learner", # nolint
             }
             predict_args_call <- predict.args
             predict_args_call[names(args)] <- args
-
+            newdata <- data
+            if (!fit_formula) {
+              newdata <- model.matrix(des)
+            }
             args <- c(list(object,
-              newdata = model.matrix(des)
+              newdata = newdata
             ), predict_args_call)
           }
           return(do.call(private$init.predict, args))
         }
       }
       private$.formula <- formula
+      private$formula.keep.specials <- formula.keep.specials
       self$info <- info
       private$init <- list(
         estimate.args = estimate.args,
@@ -247,15 +270,14 @@ learner <- R6::R6Class("learner", # nolint
       return(obj)
     },
 
-
     #' @description
     #' Extract response from data
     #' @param eval when FALSE return the untransformed outcome
     #' (i.e., return 'a' if formula defined as I(a==1) ~ ...)
     #' @param ... additional arguments to [targeted::design]
     response = function(data, eval = TRUE, ...) {
       if (eval) {
-        return(self$design(data = data, ...)$y)
+        return(self$design(data = data, ..., design.matrix = FALSE)$y)
       }
       if (is.null(private$.formula)) return(NULL)
       newf <- update(private$.formula, ~1)
@@ -303,6 +325,10 @@ learner <- R6::R6Class("learner", # nolint
     # @field .formula Model formula object // uses dot as a pre-fix to allow
     # using formula as an active binding
     .formula = NULL,
+    # @field formula.keep.specials if TRUE then special terms defined by
+    # `specials` will be removed from the formula before it is being passed to
+    # the estimate print.function()
+    formula.keep.specials = NULL,
     # @field init Information on the initialized model
     init = NULL,
     # When x$clone(deep=TRUE) is called, the deep_clone gets invoked once for
@@ -388,7 +414,8 @@ learner_print <- function(self, private) {
   if (!is.null(private$fitted)) {
     cat_ruler("\u2500", 18)
     fit <- self$fit
-    if (!is.null(fit$call)) fit$call <- substitute()
+    attr(fit, "design") <- NULL
+    if (!is.atomic(fit) && !is.null(fit$call)) fit$call <- substitute()
     cat(capture.output(print(fit)), sep ="\n")
   }
 

diff --git a/R/learner_mars.R b/R/learner_mars.R
@@ -36,7 +36,6 @@ learner_mars <- function(formula,
     ),
     list(...)
   )
-  args$specials <- union(args$specials, c("offset"))
 
   args$estimate <- function(formula, data, ...) earth::earth(formula, data, ...)
   args$predict <- function(object, newdata, ...) {

diff --git a/inst/tinytest/test_design.R b/inst/tinytest/test_design.R
@@ -75,7 +75,7 @@ test_design_ellipsis()
 test_design_specials <- function() {
   # offset is correctly identified as a special variable and not added as a
   # covariate
-  dd <- design(y ~ offset(x1), ddata)
+  dd <- design(y ~ offset(x1), ddata, specials="offset")
 
   expect_equal(ncol(dd$x), 0)
   offset_expect <- ddata$x1
@@ -89,7 +89,7 @@ test_design_specials <- function() {
   # an offset variable is not changed
   ddata1 <- ddata
   ddata1$offset <- 1
-  dd <- design(y ~ offset + x1, ddata1)
+  dd <- design(y ~ offset + x1, ddata1, specials="offset")
   expect_equivalent(
     as.matrix(ddata1[, c("offset", "x1")]),
     dd$x
@@ -114,8 +114,7 @@ test_design_specials <- function() {
   expect_equal(ddata$x1, unname(dd$offset))
 
   # test default weight special
-  weights <- identity
-  dd <- design(y ~ weights(x1), ddata)
+  dd <- design(y ~ weights(x1), ddata, specials="weights")
   expect_equal(unname(dd$weights), ddata$x1)
 }
 test_design_specials()