Skip to content

Commit

Permalink
feat: add layout_dedupe_ph_labels() to handle duplicate placeholder l…
Browse files Browse the repository at this point in the history
…abels

Building on the code by @Majid-Eismann, I added `layout_dedupe_ph_labels()`
to handle duplicate placeholder labels. By default, it will only detect duplicate
labels, but apply no changes. With `action = "rename"`, it auto-renames duplicate
labels and `action = "delete"` deletes duplicates, only keeping their first
occurence. If requested, output is printed to the console, informing the user
about the changes applied to the placeholder labels.

fix davidgohel#589
  • Loading branch information
markheckmann authored Aug 29, 2024
1 parent 4c83218 commit 70bf887
Show file tree
Hide file tree
Showing 11 changed files with 266 additions and 3 deletions.
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: officer
Title: Manipulation of Microsoft Word and PowerPoint Documents
Version: 0.6.7.005
Version: 0.6.7.006
Authors@R: c(
person("David", "Gohel", , "[email protected]", role = c("aut", "cre")),
person("Stefan", "Moog", , "[email protected]", role = "aut"),
Expand Down Expand Up @@ -48,7 +48,8 @@ Imports:
utils,
uuid,
xml2 (>= 1.1.0),
zip (>= 2.1.0)
zip (>= 2.1.0),
cli
Suggests:
devEMF,
doconv (>= 0.3.0),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ export(headers_replace_img_at_bkm)
export(headers_replace_text_at_bkm)
export(hyperlink_ftext)
export(image_to_base64)
export(layout_dedupe_ph_labels)
export(layout_properties)
export(layout_summary)
export(media_extract)
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ that can not contain ' ' and trigger an error if it contains a ' '.

## Features

- add `layout_dedupe_ph_labels()` to handle duplicate placholder labels (#589).
By default, it will only detect duplicate labels, but apply no changes. With
`action = "rename"`, it auto-renames duplicate labels and `action = "delete"`
deletes duplicates, only keeping their first occurence.
- new convenience functions `body_replace_gg_at_bkm()` and `body_replace_plot_at_bkm()`
to replace text content enclosed in a bookmark with a ggplot or a base plot.
- add `unit` (in, cm, mm) argument in function `page_size()`.
Expand Down
2 changes: 1 addition & 1 deletion R/ph_location.R
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ fortify_location.location_label <- function( x, doc, ...){

if( nrow(props) > 1) {
stop("Placeholder ", shQuote(x$ph_label),
" in the slide layout is duplicated. It needs to be unique.")
" in the slide layout is duplicated. It needs to be unique. Hint: layout_dedupe_ph_labels() helps handling duplicates.")
}

props <- props[, c("offx", "offy", "cx", "cy", "ph_label", "ph", "type", "rotation", "fld_id", "fld_type")]
Expand Down
140 changes: 140 additions & 0 deletions R/ppt_ph_dedupe_layout.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#' Detect and handle duplicate placeholder labels
#'
#' PowerPoint does not enforce unique placeholder labels in a layout.
#' Selecting a placeholder via its label using [ph_location_label] will throw
#' an error, if the label is not unique. [layout_dedupe_ph_labels] helps to detect,
#' rename, or delete duplicate placholder labels.
#'
#' @param x An `rpptx` object.
#' @param action Action to perform on duplicate placeholder labels. One of:
#' * `detect` (default) = show info on dupes only, make no changes
#' * `rename` = create unique labels. Labels are renamed by appending a sequential number
#' separated by dot to duplicate labels. For example, `c("title", "title")` becomes `c("title.1", "title.2")`.
#' * `delete` = only keep one of the placeholders with a duplicate label
#' @param print_info Print action information (e.g. renamed placeholders) to console?
#' Default is `FALSE`. Always `TRUE` for action `detect`.
#' @return A `rpptx` object (with modified placeholder labels).
#' @export
#' @examples
#' x <- read_pptx()
#' layout_dedupe_ph_labels(x)
#'
#' file <- system.file("doc_examples", "ph_dupes.pptx", package = "officer")
#' x <- read_pptx(file)
#' layout_dedupe_ph_labels(x)
#' layout_dedupe_ph_labels(x, "rename", print_info = TRUE)
#'
layout_dedupe_ph_labels <- function(x, action = "detect", print_info = FALSE) {
if (!inherits(x, "rpptx")) {
stop("'x' must be an 'rpptx' object", call. = FALSE)
}
action <- match.arg(action, c("detect", "rename", "delete"))
layout_names <- x$slideLayouts$get_metadata()$filename
xfrm_list <- lapply(layout_names, .dedupe_phs_in_layout, x = x, action = action)
x <- reload_slidelayouts(x) # reinit slideLayouts to get processed ph labels [e.g. when calling x$slideLayouts$get_xfrm_data()]
if (print_info | action == "detect") {
.print_dedupe_info(x = x, xfrm_list = xfrm_list, action = action)
}
invisible(x)
}


# handle placeholder labels in a single layout
#
# layout_file: layout filename (e.g. "slideLayout1.xml").
# x: An `rpptx` object
#
# returns: Dataframe with placeholder info. Only needed for .print_dedupe_info()
.dedupe_phs_in_layout <- function(layout_file, x, action = "rename") {
ph_label <- NULL
if (!grepl("\\.xml$", layout_file, ignore.case = TRUE)) {
stop("'layout_file' must be an .xml file", call. = FALSE)
}
action <- match.arg(action, c("detect", "rename", "delete"))
layout <- x$slideLayouts$collection_get(layout_file)
xfrm <- layout$xfrm()
xfrm <- subset(xfrm, duplicated(ph_label) | duplicated(ph_label, fromLast = TRUE))
if (nrow(xfrm) == 0) {
return()
}
xfrm <- transform(xfrm, ph_label_new = make_strings_unique(ph_label), delete_flag = duplicated(ph_label)) # prepare once for all action types
if (action == "detect") {
return(xfrm) # no further action required
} else if (action == "rename") {
xfrm$delete_flag <- FALSE
} else if (action == "delete") {
xfrm$ph_label_new <- xfrm$ph_label
}

# rename label or delete ph shape
layout_xml <- layout$get()
for (i in 1L:nrow(xfrm)) {
shape <- xml2::xml_find_first(layout_xml, sprintf("p:cSld/p:spTree/*[p:nvSpPr/p:cNvPr[@id='%s']]", xfrm$id[i]))
if (xfrm$delete_flag[i]) {
xml2::xml_remove(shape)
} else {
xml2::xml_find_first(shape, ".//p:cNvPr") |> xml2::xml_set_attr("name", xfrm$ph_label_new[i])
}
}
layout$save() # persist changes in slideout xml file
xfrm
}


# reload slideLayouts (if layout XML in package_dir has changed)
reload_slidelayouts <- function(x) {
x$slideLayouts$initialize(x$package_dir,
master_metadata = x$masterLayouts$get_metadata(),
master_xfrm = x$masterLayouts$xfrm()
)
x
}


# Create unique string by appending a sepatator and a number
# make_strings_unique(c("A", "B", "B", "C", "A"))
make_strings_unique <- function(x, sep = ".") {
ii <- stats::ave(x, x, FUN = seq_along)
paste0(x, sep, ii)
}


# helper mostly for testing
has_ph_dupes <- function(x) {
if (!inherits(x, "rpptx")) {
stop("'x' must be an 'rpptx' object", call. = FALSE)
}
xfrm <- x$slideLayouts$get_xfrm_data()
dupes <- stats::aggregate(ph_label ~ master_name + name, data = xfrm, FUN = function(x) sum(duplicated(x)) > 0)
any(dupes$ph_label)
}


# print info on what was done (if print_info = TRUE)
.print_dedupe_info <- function(x, xfrm_list, action) {
.df_1 <- do.call(rbind, xfrm_list)
if (is.null(.df_1)) {
cat("No duplicate placeholder labels detected.")
return(invisible(NULL))
}
.df_2 <- x$slideLayouts$get_xfrm_data()
.df_2 <- .df_2[, c("master_file", "master_name"), drop = FALSE] |> unique()
df <- merge(.df_1, .df_2, sort = FALSE)
rownames(df) <- NULL
df <- df[, c("master_name", "name", "ph_label", "ph_label_new", "delete_flag"), drop = FALSE]
colnames(df)[2] <- "layout_name"
if (action == "detect") {
cat("Placeholders with duplicate labels:\n")
cat(cli::col_grey("* 'ph_label_new' = new placeholder label for action = 'rename'\n"))
cat(cli::col_grey("* 'delete_flag' = deleted placeholders for action = 'delete'\n"))
} else if (action == "rename") {
df$delete_flag <- NULL
cat("Renamed duplicate placeholder labels:\n")
cat(cli::col_grey("* 'ph_label_new' = new placeholder label\n"))
} else if (action == "delete") {
df <- df[df$delete_flag, , drop = FALSE]
df$ph_label_new <- NULL
cat("Removed placeholders with duplicate labels:\n")
}
print(df)
}
1 change: 1 addition & 0 deletions R/pptx_informations.R
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ plot_layout_properties <- function (x, layout = NULL, master = NULL, labels = TR
#' \code{ph_location*} calls. The parameters are printed in their corresponding shapes.
#'
#' Note that if there are duplicated \code{ph_label}, you should not use \code{ph_location_label}.
#' Hint: You can dedupe labels using \code{\link{layout_dedupe_ph_labels}}.
#'
#' @param path path to the pptx file to use as base document or NULL to use the officer default
#' @param output_file filename to store the annotated powerpoint file or NULL to suppress generation
Expand Down
Binary file added inst/doc_examples/ph_dupes.pptx
Binary file not shown.
1 change: 1 addition & 0 deletions man/annotate_base.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

41 changes: 41 additions & 0 deletions man/layout_dedupe_ph_labels.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file added tests/testthat/docs_dir/test-pptx-dedupe-ph.pptx
Binary file not shown.
74 changes: 74 additions & 0 deletions tests/testthat/test-pptx-dedupe-ph-labels.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@



test_that("incorrect input formats are detected", {
expect_error(layout_dedupe_ph_labels("file.xxx"), regexp = "'x' must be an 'rpptx' object")
expect_error(has_ph_dupes("file.xxx"), regexp = "'x' must be an 'rpptx' object")
expect_error(.dedupe_phs_in_layout("file.xxx"), regexp = "'layout_file' must be an .xml file")
})



test_that("handling ph dupes function works when there are none", {
x <- read_pptx() # sample PPTX has no dupes
expect_false(has_ph_dupes(x))
. <- capture.output(expect_no_error({
layout_dedupe_ph_labels(x, print_info = TRUE)
layout_dedupe_ph_labels(x, action = "rename", print_info = TRUE)
layout_dedupe_ph_labels(x, action = "delete", print_info = TRUE)
}))
})


test_that("handling ph dupes works", {
file_in <- test_path("docs_dir/test-pptx-dedupe-ph.pptx")

# referencing a duplicate placeholder via its label should throw an error.
# if this should change for some reason, the test fails as we would need to
# check if deduplication is still relevant
x <- read_pptx(file_in)
x <- add_slide(x, layout = "2x2-dupes", master = "Master1")
expect_no_error(ph_with(x, "abc", ph_location_label(ph_label = "Title 1")))
expect_error(ph_with(x, "abc", ph_location_label(ph_label = "Content")))

# action = detect
x_det <- read_pptx(file_in)
expect_true(has_ph_dupes(x_det))
out <- capture.output({
x_det <- layout_dedupe_ph_labels(x_det)
})
expect_true(any(grepl("Content 7.1", out)))
expect_true(has_ph_dupes(x_det))

# action = rename
x_rename <- read_pptx(file_in)
before <- x_rename$slideLayouts$get_xfrm_data()$ph_label
out <- capture.output({
x_rename <- layout_dedupe_ph_labels(x_rename, action = "rename", print_info = TRUE)
})
expect_true(any(grepl("Content 7", out)))
expect_true(any(grepl("Content 7.1", out)))
after <- x_rename$slideLayouts$get_xfrm_data()$ph_label
expect_false(has_ph_dupes(x_rename))
expect_true(any(before != after))
expect_equal(length(before), length(after))

# action = delete
x_delete <- read_pptx(file_in)
before <- x_delete$slideLayouts$get_xfrm_data()$ph_label
out <- capture.output({
x_delete <- layout_dedupe_ph_labels(x_delete, action = "delete", print_info = TRUE)
})
expect_true(any(grepl("Content 7", out)))
after <- x_delete$slideLayouts$get_xfrm_data()$ph_label
expect_false(has_ph_dupes(x_delete))
expect_gt(length(before), length(after))

# annotate base should work with deduped phs
file <- tempfile(fileext = ".pptx")
output_file <- tempfile(fileext = ".pptx")
print(x_rename, target = file)
expect_no_error(annotate_base(file, output_file = output_file))
print(x_delete, target = file)
expect_no_error(annotate_base(file, output_file = output_file))
})

0 comments on commit 70bf887

Please sign in to comment.