Skip to content

Commit 2391f6b

Browse files
Merge pull request #166 from birdflow-science/no-abundance
2 parents 2034e5c + a55d72c commit 2391f6b

File tree

6 files changed

+101
-47
lines changed

6 files changed

+101
-47
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: BirdFlowR
22
Title: Predict and Visualize Bird Movement
3-
Version: 0.1.0.9048
3+
Version: 0.1.0.9049
44
Authors@R:
55
c(person("Ethan", "Plunkett", email = "[email protected]", role = c("aut", "cre"),
66
comment = c(ORCID = "0000-0003-4405-2251")),

NEWS.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,29 @@
1+
# BirdFlowR 0.1.0.9049
2+
2023-01-11
3+
4+
#### Drop "_clip"
5+
6+
Drop "_clip" from name when clipping with preprocess_species()
7+
See issue #165, but note I haven't added the extra metadata item yet.
8+
9+
#### Zero abundance for some timesteps
10+
11+
`validate_birdflow()` and thus also `preprocess_species()` now throw errors
12+
if any distribution doesn't sum to 1 or if the dynamic mask excludes all cells
13+
for a timestep. This addresses a problem discovered when attempting to fit
14+
models with species: "antnig" (Antillean Nighthawk) where for some timesteps
15+
the abundance grids from status and trends are all zero. Prior to this change
16+
it was possible to preprocess Antillean Nighthawk but then during fitting with
17+
BirdFlowPy confusing errors were thrown.
18+
19+
The change can be observed with:
20+
```{r}
21+
preprocess_species("antnig", res = 150, hdf5 = FALSE,
22+
skip_quality_checks = TRUE)`
23+
```
24+
25+
26+
127
# BirdFlowR 0.1.0.9048
228
2023-12-22
329

R/preprocess_species.R

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -357,8 +357,6 @@ preprocess_species <- function(species = NULL,
357357
if (any_output) {
358358
out_base <-
359359
file.path(out_dir, paste0(download_species, "_", st_year, "_", res, "km"))
360-
if (!is.null(clip))
361-
out_base <- paste0(out_base, "_clip")
362360

363361
paths <- list()
364362
if (hdf5) {

R/validate_BirdFlow.R

Lines changed: 59 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,12 @@ validate_BirdFlow <- function(x, error = TRUE, allow_incomplete = FALSE) {
6262
if (error) {
6363
if (allow_incomplete) {
6464
if (any(p$type == "error"))
65-
message <- paste0("Problems found by validate_BirdFlow:",
65+
message <- paste0("Problems found by validate_BirdFlow:\n\t",
6666
paste(p$problem[p$type == "error"],
67-
collapse = "; "))
67+
collapse = ";\n\t"))
6868
} else { # Don't allow incomplete:
6969
if (nrow(p) > 0)
70-
message <- paste("Problems found by validate_BirdFlow:\n\t",
70+
message <- paste0("Problems found by validate_BirdFlow:\n\t",
7171
paste(p$problem, collapse = "; \n\t"))
7272
}
7373
}
@@ -136,6 +136,48 @@ validate_BirdFlow <- function(x, error = TRUE, allow_incomplete = FALSE) {
136136
}
137137

138138

139+
140+
141+
# check dates
142+
if (!"dates" %in% names(x) || !is.data.frame(x$dates)) {
143+
p <- add_prob("x$dates is missing, NA or not a dataframe", "error", p)
144+
report_problems()
145+
} else { # dates exists and is data.frame
146+
147+
if (x$metadata$ebird_version_year < 2022) {
148+
# 2021 ebirdst models have use older dates format
149+
required_cols <- c("interval", "date", "doy", "start", "midpoint", "end")
150+
} else {
151+
#2022_ ebirdst models use newer dates format
152+
required_cols <- names(make_dates())
153+
}
154+
if (!all(required_cols %in% names(x$dates))) {
155+
p <- add_prob(paste0("x$dates is missing columns:",
156+
paste(setdiff(required_cols, names(x$dates)))),
157+
"error", p)
158+
report_problems()
159+
} # end if dates missing columns
160+
rm(required_cols)
161+
162+
163+
if ("distr" %in% names(x)) {
164+
if (is.null(dim(x$distr)) ||
165+
!length(dim(x$distr)) == 2 ||
166+
!is.numeric(x$distr)) {
167+
p <- add_prob("distr has wrong format", "error", p)
168+
report_problems()
169+
}
170+
171+
if (nrow(x$dates) != ncol(x$distr)) {
172+
p <- add_prob(paste0("x$dates and x$distr do not represent the same ",
173+
"number of timesteps."), "error", p)
174+
report_problems()
175+
}
176+
}
177+
} # end dates is data.frame
178+
179+
180+
139181
# check consistancy of has_ (transitions, marginals, distr)
140182
components <- c("transitions", "marginals", "distr")
141183
for (i in seq_along(components)) {
@@ -144,13 +186,26 @@ validate_BirdFlow <- function(x, error = TRUE, allow_incomplete = FALSE) {
144186
p <- add_prob(paste0("has_", components[i], " is not TRUE or FALSE"),
145187
"error", p)
146188
}
189+
147190
if (has_distr(x)) {
148191
if (!is.matrix(x$distr))
149192
p <- add_prob("distr is not a matrix", "error", p)
193+
sums_to_one <- get_distr(x) |>
194+
apply(2, sum) |>
195+
sapply(function(x) isTRUE(all.equal(x, 1)))
196+
if(!all(sums_to_one)){
197+
p <- add_prob("not all distributions sum to one",
198+
"error", p)
199+
}
150200
}
151201
if (has_dynamic_mask(x)) {
152-
if (!is.matrix(x$geom$dynamic_mask))
202+
if (!is.matrix(x$geom$dynamic_mask)){
153203
p <- add_prob("dynamic mask is not a matrix", "error", p)
204+
} else {
205+
if(!all(apply(get_dynamic_mask(x), 2, sum) > 0)){
206+
p <- add_prob("dynamic mask eliminates all cells for some timesteps", "error", p)
207+
}
208+
}
154209
}
155210

156211
if (has_marginals(x)) {
@@ -203,44 +258,6 @@ validate_BirdFlow <- function(x, error = TRUE, allow_incomplete = FALSE) {
203258

204259

205260

206-
# check dates
207-
if (!"dates" %in% names(x) || !is.data.frame(x$dates)) {
208-
p <- add_prob("x$dates is missing, NA or not a dataframe", "error", p)
209-
report_problems()
210-
} else { # dates exists and is data.frame
211-
212-
if (x$metadata$ebird_version_year < 2022) {
213-
# 2021 ebirdst models have use older dates format
214-
required_cols <- c("interval", "date", "doy", "start", "midpoint", "end")
215-
} else {
216-
#2022_ ebirdst models use newer dates format
217-
required_cols <- names(make_dates())
218-
}
219-
if (!all(required_cols %in% names(x$dates))) {
220-
p <- add_prob(paste0("x$dates is missing columns:",
221-
paste(setdiff(required_cols, names(x$dates)))),
222-
"error", p)
223-
report_problems()
224-
} # end if dates missing columns
225-
rm(required_cols)
226-
227-
228-
if ("distr" %in% names(x)) {
229-
if (is.null(dim(x$distr)) ||
230-
!length(dim(x$distr)) == 2 ||
231-
!is.numeric(x$distr)) {
232-
p <- add_prob("distr has wrong format", "error", p)
233-
report_problems()
234-
}
235-
236-
if (nrow(x$dates) != ncol(x$distr)) {
237-
p <- add_prob(paste0("x$dates and x$distr do not represent the same ",
238-
"number of timesteps."), "error", p)
239-
}
240-
}
241-
} # end dates is data.frame
242-
243-
244261
# consistency on n_active
245262
if (is.na(n_active(x))) {
246263
if (has_transitions(x) || has_marginals(x) || has_distr(x) ||

tests/testthat/test-preprocess_species.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,8 +184,8 @@ test_that("preprocess_species() works with clip", {
184184

185185
# Test that expect file was created
186186
created_files <- list.files(dir)
187-
expect_in(created_files, c("example_data_2021_30km_clip.hdf5", # ebird 2021
188-
"yebsap-example_2022_30km_clip.hdf5")) # 2022
187+
expect_in(created_files, c("example_data_2021_30km.hdf5", # ebird 2021
188+
"yebsap-example_2022_30km.hdf5")) # 2022
189189

190190
skip_if_wrong_ebirdst_for_snapshot()
191191

tests/testthat/test-validate_BirdFlow.R

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,5 +64,18 @@ test_that("validate_BirdFlow throws expected errors", {
6464
expect_error(validate_BirdFlow(bad),
6565
"Not all marginals have a sum of one")
6666

67+
# Distributions don't sum to 1
68+
bad <- bf
69+
bad$distr[ , 1] <- 0
70+
expect_error(validate_BirdFlow(bad),
71+
"not all distributions sum to one")
72+
73+
74+
# Empty dynamic mask (for any timestep)
75+
bad <- bf
76+
bad$geom$dynamic_mask[ , 1] <- 0
77+
expect_error(validate_BirdFlow(bad),
78+
"dynamic mask eliminates all cells for some timesteps")
79+
6780

6881
})

0 commit comments

Comments
 (0)