From 6f519858e2af7cdf7d92895590226c33cc3eb941 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 10 Aug 2023 19:09:32 +0000 Subject: [PATCH 1/7] file path detection in paste_linter --- NEWS.md | 1 + R/paste_linter.R | 109 ++++++++++++++++++++++++++++- man/paste_linter.Rd | 9 ++- tests/testthat/test-paste_linter.R | 92 ++++++++++++++++++++++++ 4 files changed, 209 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 002664ce7..6a7f114d2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -24,6 +24,7 @@ + `yoda_test_linter()` * `sprintf_linter()` is pipe-aware, so that `x %>% sprintf(fmt = "%s")` no longer lints (#1943, @MichaelChirico). * `line_length_linter()` helpfully includes the line length in the lint message (#2057, @MichaelChirico). +* `paste_linter()` gains detection for file paths that are better constructed with `file.path()`, e.g. `paste0(dir, "/", file)` would be better as `file.path(dir, file)` (part of #884, @MichaelChirico). ### New linters diff --git a/R/paste_linter.R b/R/paste_linter.R index ed4f35424..3c2ea44a0 100644 --- a/R/paste_linter.R +++ b/R/paste_linter.R @@ -21,6 +21,8 @@ #' `paste()` with `sep = ""` is not linted. #' @param allow_to_string Logical, default `FALSE`. If `TRUE`, usage of #' `paste()` and `paste0()` with `collapse = ", "` is not linted. +#' @param allow_file_path Logical, default `FALSE`. If `TRUE`, usage of +#' `paste()` and `paste0()` to construct file paths is not linted. #' #' @examples #' # will produce lints @@ -44,6 +46,11 @@ #' linters = paste_linter() #' ) #' +#' lint( +#' text = 'paste0(dir, "/", file)', +#' linters = paste_linter() +#' ) +#' #' # okay #' lint( #' text = 'paste0("a", "b")', @@ -75,9 +82,14 @@ #' linters = paste_linter() #' ) #' +#' lint( +#' text = 'paste0(year, "/", month, "/", day)', +#' linters = paste_linter(allow_file_path = TRUE) +#' ) +#' #' @seealso [linters] for a complete list of linters available in lintr. #' @export -paste_linter <- function(allow_empty_sep = FALSE, allow_to_string = FALSE) { +paste_linter <- function(allow_empty_sep = FALSE, allow_to_string = FALSE, allow_file_path = FALSE) { sep_xpath <- " //SYMBOL_FUNCTION_CALL[text() = 'paste'] /parent::expr @@ -111,6 +123,76 @@ paste_linter <- function(allow_empty_sep = FALSE, allow_to_string = FALSE) { /parent::expr " + slash_str <- sprintf("STR_CONST[%s]", xp_text_in_table(c("'/'", '"/"'))) + str_not_start_with_slash <- + "STR_CONST[not(substring(text(), 2, 1) = '/')]" + str_not_end_with_slash <- + "STR_CONST[not(substring(text(), string-length(text()) - 1, 1) = '/')]" + non_str <- "SYMBOL or expr" + + # Type I: paste(..., sep = "/") + paste_file_path_xpath <- glue(" + //SYMBOL_FUNCTION_CALL[text() = 'paste'] + /parent::expr + /parent::expr[ + SYMBOL_SUB[text() = 'sep']/following-sibling::expr[1][{slash_str}] + and not(SYMBOL_SUB[text() = 'collapse']) + ] + ") + + # Type II: paste0(x, "/", y, "/", z) + # more specifically, paste0 need not be replaced by file.path when + # (1) there are consecutive SYMBOL|expr (like paste0(x, y)) + # (2) a STR_CONST precedes a SYMBOL|expr, but doesn't end with / + # (3) a STR_CONST follows a SYMBOL|expr, but doesn't start with / + # (4) there are consecutive STR_CONSTs but neither + # the first ends with / nor the second starts with / + # (5) there is only one argument + # (6) there is any NUM_CONST + # (7) collapse is used + # (8) / is the first character in an initial STR_CONST or the + # last character in a terminal STR_CONST, where file.path + # replacement would somewhat awkwardly look like + # file.path("", ...) and/or file.path(..., "") + # (9) a STR_CONST beginning or ending with >1 slashes (like "//x") + # use count() to exclude paste0(x) + # don't use starts-with() -- (1) there's no ends-with() in XPath 1.0 and + # (2) we need to cater for both ' and " quotations which ^ complexity + # NB: XPath's substring() signature is (text, initial_index, n_characters) + # an expression matching any of the not(...) conditions is _not_ a path + paste0_file_path_xpath <- glue(" + //SYMBOL_FUNCTION_CALL[text() = 'paste0'] + /parent::expr + /parent::expr[ + count(expr) > 2 + and not( + expr/NUM_CONST + or SYMBOL_SUB[text() = 'collapse'] + or expr[2][{slash_str}] + or expr[last()][{slash_str}] + or expr/STR_CONST[ + substring(text(), 2, 2) = '//' + or substring(text(), string-length(text()) - 2, 2) = '//' + ] + or expr[({non_str}) and following-sibling::expr[1][{non_str}]] + or expr[ + {str_not_end_with_slash} + and following-sibling::expr[1][ + {non_str} + or {str_not_start_with_slash} + ] + ] + or expr[ + {str_not_start_with_slash} + and preceding-sibling::expr[1][{non_str}] + ] + ) + ] + ") + + empty_paste_note <- + 'Note well that paste() converts empty inputs to "", whereas file.path() leaves it empty.' + Linter(function(source_expression) { if (!is_lint_level(source_expression, "expression")) { return(list()) @@ -170,6 +252,31 @@ paste_linter <- function(allow_empty_sep = FALSE, allow_to_string = FALSE) { type = "warning" ) + if (!allow_file_path) { + paste_file_path_expr <- xml_find_all(xml, paste_file_path_xpath) + optional_lints <- c(optional_lints, xml_nodes_to_lints( + paste_file_path_expr, + source_expression = source_expression, + lint_message = paste( + 'Construct file paths with file.path(...) instead of paste(..., sep = "/").', + 'If you are using paste(sep = "/") to construct a date, consider using format() or lubridate helpers instead.', + empty_paste_note + ), + type = "warning" + )) + + paste0_file_path_expr <- xml_find_all(xml, paste0_file_path_xpath) + optional_lints <- c(optional_lints, xml_nodes_to_lints( + paste0_file_path_expr, + source_expression = source_expression, + lint_message = paste( + 'Construct file paths with file.path(...) instead of paste0(x, "/", y, "/", z).', + empty_paste_note + ), + type = "warning" + )) + } + c(optional_lints, paste0_sep_lints, paste_strrep_lints) }) } diff --git a/man/paste_linter.Rd b/man/paste_linter.Rd index e83352f76..0206be2ff 100644 --- a/man/paste_linter.Rd +++ b/man/paste_linter.Rd @@ -4,7 +4,11 @@ \alias{paste_linter} \title{Raise lints for several common poor usages of \code{paste()}} \usage{ -paste_linter(allow_empty_sep = FALSE, allow_to_string = FALSE) +paste_linter( + allow_empty_sep = FALSE, + allow_to_string = FALSE, + allow_file_path = FALSE +) } \arguments{ \item{allow_empty_sep}{Logical, default \code{FALSE}. If \code{TRUE}, usage of @@ -12,6 +16,9 @@ paste_linter(allow_empty_sep = FALSE, allow_to_string = FALSE) \item{allow_to_string}{Logical, default \code{FALSE}. If \code{TRUE}, usage of \code{paste()} and \code{paste0()} with \code{collapse = ", "} is not linted.} + +\item{allow_file_path}{Logical, default \code{FALSE}. If \code{TRUE}, usage of +\code{paste()} and \code{paste0()} to construct file paths is not linted.} } \description{ The following issues are linted by default by this linter diff --git a/tests/testthat/test-paste_linter.R b/tests/testthat/test-paste_linter.R index a65bbb898..73e677f18 100644 --- a/tests/testthat/test-paste_linter.R +++ b/tests/testthat/test-paste_linter.R @@ -113,3 +113,95 @@ test_that("paste_linter blocks simple disallowed usages", { expect_lint("paste0(rep('*', 20L), collapse='')", lint_msg, linter) expect_lint("paste(rep('#', width), collapse='')", lint_msg, linter) }) + +test_that("paste_linter skips allowed usages for file paths", { + linter <- paste_linter() + + expect_lint("paste('a', 'b', 'c')", NULL, linter) + expect_lint("paste('a', 'b', 'c', sep = ',')", NULL, linter) + expect_lint("paste('a', 'b', collapse = '/')", NULL, linter) + expect_lint("cat(paste('a', 'b'), sep = '/')", NULL, linter) + expect_lint("sep <- '/'; paste('a', sep)", NULL, linter) + expect_lint("paste(sep = ',', '/', 'a')", NULL, linter) + + # paste(..., sep='/', collapse=collapse) is not a trivial swap to file.path + expect_lint("paste(x, y, sep = '/', collapse = ':')", NULL, linter) + + expect_lint("file.path('a', 'b', 'c')", NULL, linter) + + # testing the sep starts with / is not enough + expect_lint("paste('a', 'b', sep = '//')", NULL, linter) +}) + +test_that("paste_linter blocks simple disallowed usages for file paths", { + linter <- paste_linter() + lint_msg <- rex::rex("Construct file paths with file.path(...) instead of") + + expect_lint("paste(sep = '/', 'a', 'b')", lint_msg, linter) + expect_lint("paste('a', 'b', sep = '/')", lint_msg, linter) +}) + +test_that("paste_linter ignores non-path cases with paste0", { + linter <- paste_linter() + + expect_lint("paste0(x, y)", NULL, linter) + expect_lint("paste0('abc', 'def')", NULL, linter) + expect_lint("paste0('/abc', 'def/')", NULL, linter) + expect_lint("paste0(x, 'def/')", NULL, linter) + expect_lint("paste0('/abc', y)", NULL, linter) + expect_lint("paste0(foo(x), y)", NULL, linter) + expect_lint("paste0(foo(x), 'def')", NULL, linter) + + # these might be a different lint (as.character instead, e.g.) but not here + expect_lint("paste0(x)", NULL, linter) + expect_lint("paste0('a')", NULL, linter) + expect_lint("paste0('a', 1)", NULL, linter) + + # paste0(..., collapse=collapse) not directly mapped to file.path + expect_lint("paste0(x, collapse = '/')", NULL, linter) +}) + +test_that("paste_linter detects paths built with '/' and paste0", { + linter <- paste_linter() + lint_msg <- rex::rex("Construct file paths with file.path(...) instead of") + + expect_lint("paste0(x, '/', y)", lint_msg, linter) + expect_lint("paste0(x, '/', y, '/', z)", lint_msg, linter) + expect_lint("paste0(x, '/abc/', 'def/', y)", lint_msg, linter) + expect_lint("paste0(foo(x), '/abc/', 'def/', bar(y))", lint_msg, linter) +}) + +test_that("paste_linter skips initial/terminal '/' and repeated '/' for paths", { + linter <- paste_linter() + + expect_lint("paste0('/', x)", NULL, linter) + expect_lint("paste0(x, '/')", NULL, linter) + expect_lint("paste0(x, '//hey/', y)", NULL, linter) + expect_lint("paste0(x, '/hey//', y)", NULL, linter) +}) + +test_that("paste_linter doesn't skip all initial/terminal '/' for paths", { + linter <- paste_linter() + lint_msg <- rex::rex("Construct file paths with file.path(...) instead of") + + expect_lint('paste0("/abc/", "def")', lint_msg, linter) + expect_lint('paste0("abc/", "def/")', lint_msg, linter) +}) + +test_that("multiple path lints are generated correctly", { + expect_lint( + trim_some("{ + paste(x, y, sep = '/') + paste0(x, '/', y) + }"), + list( + rex::rex('paste(..., sep = "/")'), + rex::rex('paste0(x, "/", y, "/", z)') + ), + paste_linter() + ) +}) + +test_that("allow_file_path argument works", { + expect_lint("paste(x, y, sep = '/')", NULL, paste_linter(allow_file_path = TRUE)) +}) From cb04b09da91c5003428e29d0ed5cc2c74af3dbdd Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 10 Aug 2023 19:11:07 +0000 Subject: [PATCH 2/7] roxy --- man/paste_linter.Rd | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/man/paste_linter.Rd b/man/paste_linter.Rd index 0206be2ff..0c34ed5a4 100644 --- a/man/paste_linter.Rd +++ b/man/paste_linter.Rd @@ -62,6 +62,11 @@ lint( linters = paste_linter() ) +lint( + text = 'paste0(dir, "/", file)', + linters = paste_linter() +) + # okay lint( text = 'paste0("a", "b")', @@ -93,6 +98,11 @@ lint( linters = paste_linter() ) +lint( + text = 'paste0(year, "/", month, "/", day)', + linters = paste_linter(allow_file_path = TRUE) +) + } \seealso{ \link{linters} for a complete list of linters available in lintr. From 3f292dc574cbf270dbfaeec7d14b206b4ee7ffff Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 10 Aug 2023 12:27:30 -0700 Subject: [PATCH 3/7] line length --- R/paste_linter.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/paste_linter.R b/R/paste_linter.R index 3c2ea44a0..fe5d8d536 100644 --- a/R/paste_linter.R +++ b/R/paste_linter.R @@ -259,7 +259,8 @@ paste_linter <- function(allow_empty_sep = FALSE, allow_to_string = FALSE, allow source_expression = source_expression, lint_message = paste( 'Construct file paths with file.path(...) instead of paste(..., sep = "/").', - 'If you are using paste(sep = "/") to construct a date, consider using format() or lubridate helpers instead.', + 'If you are using paste(sep = "/") to construct a date,', + "consider using format() or lubridate helpers instead.", empty_paste_note ), type = "warning" From 86e5a7b59e3d36f4a884d6f55eac5a82a9a9c5cd Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 10 Aug 2023 21:26:59 +0000 Subject: [PATCH 4/7] -well --- R/paste_linter.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/paste_linter.R b/R/paste_linter.R index 3c2ea44a0..8f9809e73 100644 --- a/R/paste_linter.R +++ b/R/paste_linter.R @@ -191,7 +191,7 @@ paste_linter <- function(allow_empty_sep = FALSE, allow_to_string = FALSE, allow ") empty_paste_note <- - 'Note well that paste() converts empty inputs to "", whereas file.path() leaves it empty.' + 'Note that paste() converts empty inputs to "", whereas file.path() leaves it empty.' Linter(function(source_expression) { if (!is_lint_level(source_expression, "expression")) { From 17d092b789aa84dfde62cc38a9f3af4eb72f9c56 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 10 Aug 2023 21:43:37 +0000 Subject: [PATCH 5/7] xp_strip_comments --- R/xp_utils.R | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/R/xp_utils.R b/R/xp_utils.R index 29d767137..e5bec73e9 100644 --- a/R/xp_utils.R +++ b/R/xp_utils.R @@ -65,3 +65,17 @@ xp_find_location <- function(xml, xpath) { as.integer(xml_find_num(xml, xpath)) } } + +#' Strip XPath 2.0-style comments from an XPath +#' +#' xml2 uses XPath 1.0, which has no support for comments. But comments are +#' useful in a codebase with as many XPaths as we maintain, so we fudge our +#' way to XPath 2.0-ish support by writing this simple function to remove comments. +#' +#' @noRd +xpath_comment_re <- rex::rex( + "(:", + zero_or_more(not(":)")), + ":)" +) +xp_strip_comments <- function(xpath) rex::re_substitutes(xpath, xpath_comment_re, "", global = TRUE) From 300053620751d0f1ab0f7bf9274f30e661ba4ca1 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 10 Aug 2023 21:58:12 +0000 Subject: [PATCH 6/7] subsume comment into xpath --- R/paste_linter.R | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/R/paste_linter.R b/R/paste_linter.R index 8f9809e73..5e004ad42 100644 --- a/R/paste_linter.R +++ b/R/paste_linter.R @@ -141,40 +141,32 @@ paste_linter <- function(allow_empty_sep = FALSE, allow_to_string = FALSE, allow ") # Type II: paste0(x, "/", y, "/", z) - # more specifically, paste0 need not be replaced by file.path when - # (1) there are consecutive SYMBOL|expr (like paste0(x, y)) - # (2) a STR_CONST precedes a SYMBOL|expr, but doesn't end with / - # (3) a STR_CONST follows a SYMBOL|expr, but doesn't start with / - # (4) there are consecutive STR_CONSTs but neither - # the first ends with / nor the second starts with / - # (5) there is only one argument - # (6) there is any NUM_CONST - # (7) collapse is used - # (8) / is the first character in an initial STR_CONST or the - # last character in a terminal STR_CONST, where file.path - # replacement would somewhat awkwardly look like - # file.path("", ...) and/or file.path(..., "") - # (9) a STR_CONST beginning or ending with >1 slashes (like "//x") - # use count() to exclude paste0(x) - # don't use starts-with() -- (1) there's no ends-with() in XPath 1.0 and - # (2) we need to cater for both ' and " quotations which ^ complexity - # NB: XPath's substring() signature is (text, initial_index, n_characters) - # an expression matching any of the not(...) conditions is _not_ a path - paste0_file_path_xpath <- glue(" + paste0_file_path_xpath <- xp_strip_comments(glue(" //SYMBOL_FUNCTION_CALL[text() = 'paste0'] /parent::expr /parent::expr[ + (: exclude paste0(x) :) count(expr) > 2 + (: An expression matching _any_ of these conditions is _not_ a file path :) and not( + (: Any numeric input :) expr/NUM_CONST + (: A call using collapse= :) or SYMBOL_SUB[text() = 'collapse'] + (: First input is '/', meaning file.path() would need to start with '' :) or expr[2][{slash_str}] + (: Last input is '/', meaning file.path() would need to end with '' :) or expr[last()][{slash_str}] + (: String starting or ending with multiple / :) + (: TODO(#2075): run this logic on the actual R string :) or expr/STR_CONST[ + (: NB: this is (text, initial_index, n_characters) :) substring(text(), 2, 2) = '//' or substring(text(), string-length(text()) - 2, 2) = '//' ] + (: Consecutive non-strings like paste0(x, y) :) or expr[({non_str}) and following-sibling::expr[1][{non_str}]] + (: A string not ending with /, followed by non-string or string not starting with / :) or expr[ {str_not_end_with_slash} and following-sibling::expr[1][ @@ -182,13 +174,15 @@ paste_linter <- function(allow_empty_sep = FALSE, allow_to_string = FALSE, allow or {str_not_start_with_slash} ] ] + (: A string not starting with /, preceded by a non-string :) + (: NB: consecutive strings is covered by the previous condition :) or expr[ {str_not_start_with_slash} and preceding-sibling::expr[1][{non_str}] ] ) ] - ") + ")) empty_paste_note <- 'Note that paste() converts empty inputs to "", whereas file.path() leaves it empty.' From a1bc413c965eee49a0234647ace323f0a53b6b19 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 10 Aug 2023 22:58:44 +0000 Subject: [PATCH 7/7] found a lint in methods.R --- R/methods.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/methods.R b/R/methods.R index b65c9f8c9..58bc9543b 100644 --- a/R/methods.R +++ b/R/methods.R @@ -45,8 +45,7 @@ markdown <- function(x, info, ...) { as.character(x$line_number), ":", as.character(x$column_number), ":", "]", "(", - paste( - sep = "/", + file.path( "https://github.com", info$user, info$repo,