21
21
# ' @param size,fatten,linewidth For bar plots, `size`, `fatten`, and `linewidth`
22
22
# ' are passed to [ggplot2::geom_pointrange()] to control the appearance of the
23
23
# ' `yrep` points and intervals. For rootograms `size` is passed to
24
- # ' [ggplot2::geom_line()].
24
+ # ' [ggplot2::geom_line()] and [ggplot2::geom_pointrange()] .
25
25
# ' @param freq For bar plots only, if `TRUE` (the default) the y-axis will
26
26
# ' display counts. Setting `freq=FALSE` will put proportions on the y-axis.
27
+ # ' @param bound_distinct For `ppc_rootogram(style = "discrete)`,
28
+ # ' if `TRUE` then the observed counts will be plotted with different shapes
29
+ # ' depending on whether they are within the bounds of the `y` quantiles.
27
30
# '
28
31
# ' @template return-ggplot-or-data
29
32
# '
44
47
# ' }
45
48
# ' \item{`ppc_rootogram()`}{
46
49
# ' Rootograms allow for diagnosing problems in count data models such as
47
- # ' overdispersion or excess zeros. They consist of a histogram of `y` with the
48
- # ' expected counts based on `yrep` overlaid as a line along with uncertainty
49
- # ' intervals. The y-axis represents the square roots of the counts to
50
- # ' approximately adjust for scale differences and thus ease comparison between
51
- # ' observed and expected counts. Using the `style` argument, the histogram
52
- # ' style can be adjusted to focus on different aspects of the data:
50
+ # ' overdispersion or excess zeros. In `standing`, `hanging`, and `suspended`
51
+ # ' styles, they consist of a histogram of `y` with the expected counts based on
52
+ # ' `yrep` overlaid as a line along with uncertainty intervals.
53
+ # '
54
+ # ' Meanwhile, in `discrete` style, median counts based on `yrep` are laid
55
+ # ' as a point range with uncertainty intervals along with dots
56
+ # ' representing the `y`.
57
+ # '
58
+ # ' The y-axis represents the square roots of the counts to approximately
59
+ # ' adjust for scale differences and thus ease comparison between observed
60
+ # ' and expected counts. Using the `style` argument, the rootogram can be
61
+ # ' adjusted to focus on different aspects of the data:
53
62
# ' * _Standing_: basic histogram of observed counts with curve
54
63
# ' showing expected counts.
55
- # ' * _Hanging_: observed counts counts hanging from the curve
64
+ # ' * _Hanging_: observed counts hanging from the curve
56
65
# ' representing expected counts.
57
66
# ' * _Suspended_: histogram of the differences between expected and
58
67
# ' observed counts.
68
+ # ' * _Discrete_: a dot-and-whisker plot of the median counts and
69
+ # ' dots representing observed counts.
70
+ # '
71
+ # ' As it emphasizes the discrete nature of the count data,
72
+ # ' using `discrete` style is suggested.
59
73
# '
60
74
# ' **All of the rootograms are plotted on the square root scale**. See Kleiber
61
75
# ' and Zeileis (2016) for advice on interpreting rootograms and selecting
@@ -198,22 +212,22 @@ ppc_bars_grouped <-
198
212
fatten = 2.5 ,
199
213
linewidth = 1 ,
200
214
freq = TRUE ) {
201
- check_ignored_arguments(... )
202
- call <- match.call(expand.dots = FALSE )
203
- g <- eval(ungroup_call(" ppc_bars" , call ), parent.frame())
204
- if (fixed_y(facet_args )) {
205
- g <- g + expand_limits(y = 1.05 * max(g $ data [[" h" ]], na.rm = TRUE ))
215
+ check_ignored_arguments(... )
216
+ call <- match.call(expand.dots = FALSE )
217
+ g <- eval(ungroup_call(" ppc_bars" , call ), parent.frame())
218
+ if (fixed_y(facet_args )) {
219
+ g <- g + expand_limits(y = 1.05 * max(g $ data [[" h" ]], na.rm = TRUE ))
220
+ }
221
+ g +
222
+ bars_group_facets(facet_args ) +
223
+ force_axes_in_facets()
206
224
}
207
- g +
208
- bars_group_facets(facet_args ) +
209
- force_axes_in_facets()
210
- }
211
225
212
226
213
227
# ' @rdname PPC-discrete
214
228
# ' @export
215
229
# ' @param style For `ppc_rootogram`, a string specifying the rootogram
216
- # ' style. The options are `"standing"`, `"hanging"`, and
230
+ # ' style. The options are `"discrete"`, `" standing"`, `"hanging"`, and
217
231
# ' `"suspended"`. See the **Plot Descriptions** section, below, for
218
232
# ' details on the different styles.
219
233
# '
@@ -234,61 +248,34 @@ ppc_bars_grouped <-
234
248
# '
235
249
# ' ppc_rootogram(y, yrep, style = "hanging", prob = 0.8)
236
250
# ' ppc_rootogram(y, yrep, style = "suspended")
251
+ # ' ppc_rootogram(y, yrep, style = "discrete")
237
252
# '
238
253
ppc_rootogram <- function (y ,
239
254
yrep ,
240
- style = c(" standing" , " hanging" , " suspended" ),
255
+ style = c(" standing" , " hanging" , " suspended" , " discrete " ),
241
256
... ,
242
257
prob = 0.9 ,
243
- size = 1 ) {
258
+ size = 1 ,
259
+ bound_distinct = TRUE ) {
244
260
check_ignored_arguments(... )
245
261
style <- match.arg(style )
246
- y <- validate_y(y )
247
- yrep <- validate_predictions(yrep , length(y ))
248
- if (! all_counts(y )) {
249
- abort(" ppc_rootogram expects counts as inputs to 'y'." )
250
- }
251
- if (! all_counts(yrep )) {
252
- abort(" ppc_rootogram expects counts as inputs to 'yrep'." )
253
- }
254
262
255
- alpha <- (1 - prob ) / 2
256
- probs <- c(alpha , 1 - alpha )
257
- ymax <- max(y , yrep )
258
- xpos <- 0L : ymax
259
-
260
- # prepare a table for yrep
261
- tyrep <- as.list(rep(NA , nrow(yrep )))
262
- for (i in seq_along(tyrep )) {
263
- tyrep [[i ]] <- table(yrep [i ,])
264
- matches <- match(xpos , rownames(tyrep [[i ]]))
265
- tyrep [[i ]] <- as.numeric(tyrep [[i ]][matches ])
266
- }
267
- tyrep <- do.call(rbind , tyrep )
268
- tyrep [is.na(tyrep )] <- 0
269
- tyexp <- sqrt(colMeans(tyrep ))
270
- tyquantile <- sqrt(t(apply(tyrep , 2 , quantile , probs = probs )))
271
- colnames(tyquantile ) <- c(" tylower" , " tyupper" )
272
-
273
- # prepare a table for y
274
- ty <- table(y )
275
- ty <- sqrt(as.numeric(ty [match(xpos , rownames(ty ))]))
276
- if (style == " suspended" ) {
277
- ty <- tyexp - ty
278
- }
279
- ty [is.na(ty )] <- 0
280
- ypos <- ty / 2
281
- if (style == " hanging" ) {
282
- ypos <- tyexp - ypos
283
- }
263
+ data <- .ppc_rootogram_data(
264
+ y = y ,
265
+ yrep = yrep ,
266
+ style = style ,
267
+ prob = prob ,
268
+ bound_distinct = bound_distinct
269
+ )
284
270
285
- data <- data.frame (xpos , ypos , ty , tyexp , tyquantile )
286
- graph <- ggplot(data ) +
287
- aes(
288
- ymin = .data $ tylower ,
289
- ymax = .data $ tyupper ,
290
- height = .data $ ty
291
- ) +
271
+ # Building geoms for y and y_rep
272
+ geom_y <- if (style == " discrete" ) {
273
+ geom_point(
274
+ aes(y = .data $ obs , shape = .data $ obs_shape ),
275
+ size = size * 1.5 ,
276
+ color = get_color(" d" ),
277
+ fill = get_color(" d" ))
278
+ } else {
292
279
geom_tile(
293
280
aes(
294
281
x = .data $ xpos ,
@@ -298,34 +285,69 @@ ppc_rootogram <- function(y,
298
285
color = get_color(" lh" ),
299
286
linewidth = 0.25 ,
300
287
width = 1
301
- ) +
302
- bayesplot_theme_get()
303
-
304
- if (style != " standing" ) {
305
- graph <- graph + hline_0(size = 0.4 )
288
+ )
306
289
}
307
290
308
- graph <- graph +
291
+ geom_yrep <- if (style == " discrete" ) {
292
+ geom_pointrange(
293
+ aes(y = .data $ pred_median , ymin = .data $ lower , ymax = .data $ upper , color = " y_rep" ),
294
+ fill = get_color(" lh" ),
295
+ linewidth = size ,
296
+ size = size ,
297
+ fatten = 2 ,
298
+ alpha = 1
299
+ )
300
+ } else {
309
301
geom_smooth(
310
- aes(
311
- x = .data $ xpos ,
312
- y = .data $ tyexp ,
313
- color = " Expected"
314
- ),
302
+ aes(x = .data $ xpos , y = .data $ tyexp , color = " Expected" ),
315
303
fill = get_color(" d" ),
316
304
linewidth = size ,
317
305
stat = " identity"
318
- ) +
319
- scale_fill_manual(" " , values = get_color(" l" )) +
320
- scale_color_manual(" " , values = get_color(" dh" )) +
321
- labs(x = expression(italic(y )),
322
- y = expression(sqrt(Count )))
323
-
324
- if (style == " standing" ) {
325
- graph <- graph + dont_expand_y_axis()
306
+ )
326
307
}
327
308
328
- graph + reduce_legend_spacing(0.25 )
309
+ # Creating the graph
310
+ graph <- ggplot(data )
311
+
312
+ if (style == " discrete" ) {
313
+ graph <- graph +
314
+ geom_yrep +
315
+ geom_y +
316
+ aes(x = .data $ xpos ) +
317
+ scale_y_sqrt() +
318
+ scale_fill_manual(" " , values = get_color(" d" ), guide = " none" ) +
319
+ scale_color_manual(" " , values = get_color(" lh" ), labels = yrep_label()) +
320
+ labs(x = expression(italic(y )), y = " Count" ) +
321
+ bayesplot_theme_get() +
322
+ reduce_legend_spacing(0.25 ) +
323
+ scale_shape_manual(values = c(" In" = 22 , " Out" = 23 , " y" = 22 ), guide = " legend" , labels = c(" y" = expression(italic(y ))))
324
+ if (bound_distinct ) {
325
+ graph <- graph + guides(shape = guide_legend(expression(italic(y )~ within ~ bounds )))
326
+ } else {
327
+ graph <- graph + guides(shape = guide_legend(" " ))
328
+ }
329
+ } else {
330
+ graph <- graph +
331
+ geom_y +
332
+ geom_yrep +
333
+ aes(
334
+ ymin = .data $ tylower ,
335
+ ymax = .data $ tyupper ,
336
+ height = .data $ ty
337
+ ) +
338
+ scale_fill_manual(" " , values = get_color(" l" )) +
339
+ scale_color_manual(" " , values = get_color(" dh" )) +
340
+ labs(x = expression(italic(y )), y = expression(sqrt(Count ))) +
341
+ bayesplot_theme_get() +
342
+ reduce_legend_spacing(0.25 )
343
+ if (style == " standing" ) {
344
+ graph <- graph + dont_expand_y_axis()
345
+ } else {
346
+ graph <- graph + hline_0(size = 0.4 )
347
+ }
348
+ }
349
+
350
+ return (graph )
329
351
}
330
352
331
353
@@ -395,7 +417,7 @@ ppc_bars_data <-
395
417
data <-
396
418
reshape2 :: melt(tmp_data , id.vars = " group" ) %> %
397
419
count(.data $ group , .data $ value , .data $ variable ) %> %
398
- tidyr :: complete(.data $ group , .data $ value , .data $ variable , fill = list (n = 0 )) %> %
420
+ tidyr :: complete(.data $ group , .data $ value , .data $ variable , fill = list (n = 0 )) %> %
399
421
group_by(.data $ variable , .data $ group ) %> %
400
422
mutate(proportion = .data $ n / sum(.data $ n )) %> %
401
423
ungroup() %> %
@@ -440,3 +462,90 @@ bars_group_facets <- function(facet_args, scales_default = "fixed") {
440
462
fixed_y <- function (facet_args ) {
441
463
! isTRUE(facet_args [[" scales" ]] %in% c(" free" , " free_y" ))
442
464
}
465
+
466
+ # ' Internal function for `ppc_rootogram()`
467
+ # ' @param y,yrep User's `y` and `yrep` arguments.
468
+ # ' @param style,prob,bound_distinct User's `style`, `prob`, and
469
+ # ' (if applicable) `bound_distinct` arguments.
470
+ # ' @noRd
471
+ .ppc_rootogram_data <- function (y ,
472
+ yrep ,
473
+ style = c(" standing" , " hanging" , " suspended" , " discrete" ),
474
+ prob = 0.9 ,
475
+ bound_distinct ) {
476
+
477
+ y <- validate_y(y )
478
+ yrep <- validate_predictions(yrep , length(y ))
479
+ if (! all_counts(y )) {
480
+ abort(" ppc_rootogram expects counts as inputs to 'y'." )
481
+ }
482
+ if (! all_counts(yrep )) {
483
+ abort(" ppc_rootogram expects counts as inputs to 'yrep'." )
484
+ }
485
+
486
+ alpha <- (1 - prob ) / 2
487
+ probs <- c(alpha , 1 - alpha )
488
+ ymax <- max(y , yrep )
489
+ xpos <- 0L : ymax
490
+
491
+ # prepare a table for yrep
492
+ tyrep <- as.list(rep(NA , nrow(yrep )))
493
+ for (i in seq_along(tyrep )) {
494
+ tyrep [[i ]] <- table(yrep [i ,])
495
+ matches <- match(xpos , rownames(tyrep [[i ]]))
496
+ tyrep [[i ]] <- as.numeric(tyrep [[i ]][matches ])
497
+ }
498
+ tyrep <- do.call(rbind , tyrep )
499
+ tyrep [is.na(tyrep )] <- 0
500
+
501
+ # discrete style
502
+ if (style == " discrete" ){
503
+ pred_median <- apply(tyrep , 2 , median )
504
+ pred_quantile <- t(apply(tyrep , 2 , quantile , probs = probs ))
505
+ colnames(pred_quantile ) <- c(" lower" , " upper" )
506
+
507
+ # prepare a table for y
508
+ ty <- table(y )
509
+ y_count <- as.numeric(ty [match(xpos , rownames(ty ))])
510
+ y_count [is.na(y_count )] <- 0
511
+
512
+ if (bound_distinct ) {
513
+ # If the observed count is within the bounds of the predicted quantiles,
514
+ # use a different shape for the point
515
+ obs_shape <- obs_shape <- ifelse(y_count > = pred_quantile [, " lower" ] & y_count < = pred_quantile [, " upper" ], " In" , " Out" )
516
+ } else {
517
+ obs_shape <- rep(" y" , length(y_count )) # all points are the same shape for observed
518
+ }
519
+
520
+ data <- data.frame (
521
+ xpos = xpos ,
522
+ obs = y_count ,
523
+ pred_median = pred_median ,
524
+ lower = pred_quantile [, " lower" ],
525
+ upper = pred_quantile [, " upper" ],
526
+ obs_shape = obs_shape
527
+ )
528
+ }
529
+ # standing, hanging, suspended styles
530
+ else {
531
+ tyexp <- sqrt(colMeans(tyrep ))
532
+ tyquantile <- sqrt(t(apply(tyrep , 2 , quantile , probs = probs )))
533
+ colnames(tyquantile ) <- c(" tylower" , " tyupper" )
534
+
535
+ # prepare a table for y
536
+ ty <- table(y )
537
+ ty <- sqrt(as.numeric(ty [match(xpos , rownames(ty ))]))
538
+ if (style == " suspended" ) {
539
+ ty <- tyexp - ty
540
+ }
541
+ ty [is.na(ty )] <- 0
542
+ ypos <- ty / 2
543
+ if (style == " hanging" ) {
544
+ ypos <- tyexp - ypos
545
+ }
546
+
547
+ data <- data.frame (xpos , ypos , ty , tyexp , tyquantile )
548
+ }
549
+
550
+ return (data )
551
+ }
0 commit comments