Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correctly render benchmark vignette in CRAN vs non-CRAN context #168

Merged
merged 14 commits into from
Feb 3, 2025
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 16 additions & 6 deletions data-raw/benchmark.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ for (version in c("CRAN", "main", "branch")) {
missing <- jsonlite::fromJSON("SCDB.lock")$packages$ref %>%
purrr::discard(rlang::is_installed)
if (length(missing) > 0) pak::pkg_install(missing, lib = lib_path)

# Explicitly install the packages
pak::pkg_install(source, lib = lib_path, dependencies = FALSE)
}


Expand Down Expand Up @@ -66,8 +69,12 @@ if (identical(Sys.getenv("CI"), "true") && identical(Sys.getenv("BACKEND"), ""))
version == "branch" ~ glue::glue("ssi-dk-SCDB-{sha}")
)

.libPaths(c(here::here("installations", lib_dir), lib_paths_default))
library("SCDB")
library("SCDB", lib.loc = here::here("installations", lib_dir))

# Add proper version labels to the benchmarks
if (version == "CRAN") {
version <- paste0("SCDB v", packageVersion("SCDB"))
}

# Open connection to the database
conns <- get_test_conns()
Expand Down Expand Up @@ -137,8 +144,11 @@ if (identical(Sys.getenv("CI"), "true") && identical(Sys.getenv("BACKEND"), ""))
"n" = n
)

dir.create("data", showWarnings = FALSE)
saveRDS(update_snapshot_benchmark, glue::glue("data/benchmark-update_snapshot_{names(conns)[[1]]}_{version}.rds"))
dir.create("inst/extdata", showWarnings = FALSE, recursive = TRUE)
saveRDS(
update_snapshot_benchmark,
glue::glue("inst/extdata/benchmark-update_snapshot_{names(conns)[[1]]}_{version}.rds")
)
})

# Benchmark 2, update_snapshot() with increasing data size
Expand All @@ -164,10 +174,10 @@ if (identical(Sys.getenv("CI"), "true") && identical(Sys.getenv("BACKEND"), ""))
"n" = n
)

dir.create("data", showWarnings = FALSE)
dir.create("inst/extdata", showWarnings = FALSE, recursive = TRUE)
saveRDS(
update_snapshot_benchmark,
glue::glue("data/benchmark-update_snapshot_complexity_{n}_{names(conns)[[1]]}_{version}.rds")
glue::glue("inst/extdata/benchmark-update_snapshot_complexity_{n}_{names(conns)[[1]]}_{version}.rds")
)
}

Expand Down
Binary file modified inst/extdata/benchmarks.rds
Binary file not shown.
125 changes: 84 additions & 41 deletions vignettes/benchmarks.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
comment = "#>"
)

# Set a flag to determine if this is being run on CRAN
on_cran <- !identical(Sys.getenv("NOT_CRAN"), "true")

# NOTE:
# To re-run the benchmarks, run the "benchmark" workflow on GitHub
```
Expand Down Expand Up @@ -41,8 +44,19 @@
The performance of this benchmark function is timed with the `{microbenchmark}` package using 10 replicates.
All benchmarks are run on the same machine.

The results of the benchmark are shown graphically below (mean and standard deviation), where we compare the current
development version of `SCDB` with the current CRAN version.
```{r benchmark context, results = "asis", include = FALSE}
if (on_cran) {
cat(
"The results of the benchmark are shown graphically below (mean and standard deviation), where measure the",

Check warning on line 50 in vignettes/benchmarks.Rmd

View workflow job for this annotation

GitHub Actions / ⚙️ Dispatch / lint / 🖋️ Lint

file=vignettes/benchmarks.Rmd,line=50,col=86,[nolint_line_length_linter] Code blocks should not be more than 85 characters. This line is 112 characters.
"performance of `SCDB`."
)
} else {
cat(
"The results of the benchmark are shown graphically below (mean and standard deviation), where we compare the",

Check warning on line 55 in vignettes/benchmarks.Rmd

View workflow job for this annotation

GitHub Actions / ⚙️ Dispatch / lint / 🖋️ Lint

file=vignettes/benchmarks.Rmd,line=55,col=86,[nolint_line_length_linter] Code blocks should not be more than 85 characters. This line is 115 characters.
"current development version of `SCDB` with the current CRAN version."
)
}
```

```{r benchmark_preprocessing, echo = FALSE, eval = requireNamespace("here")}
benchmark_location <- c(
Expand All @@ -52,12 +66,13 @@
purrr::discard(~ identical(., "")) %>%
purrr::pluck(1)

benchmarks <- readRDS(benchmark_location)
benchmarks <- readRDS(benchmark_location) %>%
dplyr::mutate("version" = as.character(.data$version))

# Determine if the SHA is on main
sha <- benchmarks %>%
dplyr::distinct(version) %>%
dplyr::filter(!(version %in% c("CRAN", "main", "branch"))) %>%
dplyr::distinct(.data$version) %>%
dplyr::filter(!startsWith(.data$version, "SCDB"), .data$version != "main") %>%
dplyr::pull("version")

# Check local git history
Expand All @@ -71,14 +86,17 @@
return(identical(Sys.getenv("CI"), "true"))
})

# If the SHA has been merged, use as the "main" version and remove the other,
# older, main version
if (on_main) {
# If we are on CRAN, use the newest benchmark (version = sha)
# This benchmark is then labelled with the newest version number of SCDB (the one we just deployed to CRAN)

Check warning on line 90 in vignettes/benchmarks.Rmd

View workflow job for this annotation

GitHub Actions / ⚙️ Dispatch / lint / 🖋️ Lint

file=vignettes/benchmarks.Rmd,line=90,col=86,[nolint_line_length_linter] Code blocks should not be more than 85 characters. This line is 107 characters.
if (on_cran) {
benchmarks <- benchmarks %>%
dplyr::filter(.data$version == !!sha) %>%
dplyr::mutate("version" = paste0("SCDB v", packageVersion("SCDB")))
} else if (on_main) {
# If the SHA has been merged, use as the "main" version and remove the other, older, main version

Check warning on line 96 in vignettes/benchmarks.Rmd

View workflow job for this annotation

GitHub Actions / ⚙️ Dispatch / lint / 🖋️ Lint

file=vignettes/benchmarks.Rmd,line=96,col=86,[nolint_line_length_linter] Code blocks should not be more than 85 characters. This line is 99 characters.
benchmarks <- benchmarks %>%
dplyr::filter(.data$version != "main") %>%
dplyr::mutate(
"version" = dplyr::if_else(.data$version == "CRAN", "CRAN", "development")
)
dplyr::mutate("version" = dplyr::if_else(.data$version == sha, "development", .data$version))

Check warning on line 99 in vignettes/benchmarks.Rmd

View workflow job for this annotation

GitHub Actions / ⚙️ Dispatch / lint / 🖋️ Lint

file=vignettes/benchmarks.Rmd,line=99,col=86,[nolint_line_length_linter] Code blocks should not be more than 85 characters. This line is 97 characters.
}

# Mean and standard deviation (see ggplot2::mean_se())
Expand All @@ -96,44 +114,43 @@
!stringr::str_ends(.data$benchmark_function, stringr::fixed("complexity"))
)

# Add note slow backends
slow_backends <- benchmark_1 %>%
dplyr::distinct(.data$database, .data$n) %>%
dplyr::filter(.data$n < max(.data$n)) %>%
dplyr::pull("database")

benchmark_1 <- benchmark_1 %>%
dplyr::mutate(
"database" = paste0(database, ifelse(database %in% slow_backends, "*", ""))
)

# Insert newline into database name to improve rendering of figures
labeller <- ggplot2::as_labeller(
function(l) stringr::str_replace_all(l, stringr::fixed(" v"), "\nv")
)


# Apply "dodging" to sub-groups to show graphically
dodge <- ggplot2::position_dodge(width = 0.6)

g <- ggplot2::ggplot(
benchmark_1,
ggplot2::aes(x = version, y = time / 1e9)
ggplot2::aes(x = version, y = time / 1e9, color = database)
) +
ggplot2::stat_summary(
fun.data = mean_sd,
geom = "pointrange",
size = 0.5,
linewidth = 1
size = 0.5, linewidth = 1,
RasmusSkytte marked this conversation as resolved.
Show resolved Hide resolved
position = dodge
) +
ggplot2::scale_x_discrete(guide = ggplot2::guide_axis(n.dodge = 2)) +
ggplot2::labs(x = "Codebase version", y = "Time (s)") +
ggplot2::theme(legend.position = "bottom")


if (on_cran) {
# Reduce font size for CRAN version
g <- g + ggplot2::theme(text = ggplot2::element_text(size = 8))

# Make the legend two rows
g <- g + ggplot2::guides(color = ggplot2::guide_legend(title = "", nrow = 2, byrow = TRUE))

Check warning on line 145 in vignettes/benchmarks.Rmd

View workflow job for this annotation

GitHub Actions / ⚙️ Dispatch / lint / 🖋️ Lint

file=vignettes/benchmarks.Rmd,line=145,col=86,[nolint_line_length_linter] Code blocks should not be more than 85 characters. This line is 93 characters.

} else {
# Add facets to non-CRAN rendering
g <- g +
ggplot2::facet_grid(

Check warning on line 150 in vignettes/benchmarks.Rmd

View workflow job for this annotation

GitHub Actions / ⚙️ Dispatch / lint / 🖋️ Lint

file=vignettes/benchmarks.Rmd,line=150,col=2,[indentation_linter] Indentation should be 4 spaces but is 2 spaces.
rows = ggplot2::vars(benchmark_function),
cols = ggplot2::vars(database),
labeller = labeller
) +
ggplot2::labs(x = "Codebase version", y = "Time (s)")

if (length(slow_backends) > 1) {
g <- g + ggplot2::labs(
caption = "* IMPORTANT: Benchmark data halved for this backend!"
)
}

Expand Down Expand Up @@ -171,26 +188,52 @@
)
)

ggplot2::ggplot(

# Apply "dodging" to sub-groups to show graphically
dodge <- ggplot2::position_dodge(width = 0.6)

# Set aesthetics for CRAN and non-CRAN versions
if (on_cran) {
aes <- ggplot2::aes(x = n * nrow(iris) / 1e3, y = time / 1e9, color = database)
} else {
aes <- ggplot2::aes(x = n * nrow(iris) / 1e3, y = time / 1e9, color = version)
}

g <- ggplot2::ggplot(
benchmark_2,
ggplot2::aes(x = n * nrow(iris) / 1e3, y = time / 1e9, color = version)
aes
) +
ggplot2::stat_summary(
fun.data = mean_sd,
geom = "pointrange",
size = 0.5,
linewidth = 1
size = 0.5, linewidth = 1,
position = dodge
) +
ggplot2::geom_smooth(method = "lm", formula = y ~ x, se = FALSE, linetype = 3) +
ggplot2::facet_grid(
rows = ggplot2::vars(benchmark_function),
cols = ggplot2::vars(database),
labeller = labeller
) +
ggplot2::labs(
x = "Data size (1,000 rows)",
y = "Time (s)",
color = "Codebase version"
) +
ggplot2::theme(panel.spacing = grid::unit(1, "lines"), legend.position = "bottom")


if (on_cran) {
# Reduce font size for CRAN version
g <- g + ggplot2::theme(text = ggplot2::element_text(size = 8))

# Make the legend two rows
g <- g + ggplot2::guides(color = ggplot2::guide_legend(title = "", nrow = 2, byrow = TRUE))

Check warning on line 226 in vignettes/benchmarks.Rmd

View workflow job for this annotation

GitHub Actions / ⚙️ Dispatch / lint / 🖋️ Lint

file=vignettes/benchmarks.Rmd,line=226,col=86,[nolint_line_length_linter] Code blocks should not be more than 85 characters. This line is 93 characters.

} else {
# Add facets to non-CRAN rendering
g <- g +
ggplot2::facet_grid(

Check warning on line 231 in vignettes/benchmarks.Rmd

View workflow job for this annotation

GitHub Actions / ⚙️ Dispatch / lint / 🖋️ Lint

file=vignettes/benchmarks.Rmd,line=231,col=2,[indentation_linter] Indentation should be 4 spaces but is 2 spaces.
rows = ggplot2::vars(benchmark_function),
cols = ggplot2::vars(database),
labeller = labeller
)
}

g
```
Loading