From c96d02408c2fb314211c12b9b538ac00fa2c2994 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sun, 19 Nov 2023 12:05:58 -0800 Subject: [PATCH] New object_overwrite_linter (#2307) --- DESCRIPTION | 1 + NAMESPACE | 1 + R/object_overwrite_linter.R | 114 +++++++++++++++ inst/lintr/linters.csv | 1 + man/best_practices_linters.Rd | 1 + man/configurable_linters.Rd | 1 + man/executing_linters.Rd | 1 + man/linters.Rd | 11 +- man/object_overwrite_linter.Rd | 70 +++++++++ man/readability_linters.Rd | 1 + man/robustness_linters.Rd | 1 + tests/testthat/test-object_overwrite_linter.R | 134 ++++++++++++++++++ 12 files changed, 332 insertions(+), 5 deletions(-) create mode 100644 R/object_overwrite_linter.R create mode 100644 man/object_overwrite_linter.Rd create mode 100644 tests/testthat/test-object_overwrite_linter.R diff --git a/DESCRIPTION b/DESCRIPTION index 7c4b578f4..626f65a3e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -143,6 +143,7 @@ Collate: 'nzchar_linter.R' 'object_length_linter.R' 'object_name_linter.R' + 'object_overwrite_linter.R' 'object_usage_linter.R' 'one_call_pipe_linter.R' 'outer_negation_linter.R' diff --git a/NAMESPACE b/NAMESPACE index 245d0849c..b314774e4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -105,6 +105,7 @@ export(numeric_leading_zero_linter) export(nzchar_linter) export(object_length_linter) export(object_name_linter) +export(object_overwrite_linter) export(object_usage_linter) export(one_call_pipe_linter) export(open_curly_linter) diff --git a/R/object_overwrite_linter.R b/R/object_overwrite_linter.R new file mode 100644 index 000000000..c727af898 --- /dev/null +++ b/R/object_overwrite_linter.R @@ -0,0 +1,114 @@ +#' Block assigning any variables whose name clashes with a `base` R function +#' +#' Re-using existing names creates a risk of subtle error best avoided. +#' Avoiding this practice also encourages using better, more descriptive names. +#' +#' @param packages Character vector of packages to search for names that should +#' be avoided. Defaults to the most common default packages: base, stats, +#' utils, tools, methods, graphics, and grDevices. +#' @param allow_names Character vector of object names to ignore, i.e., which +#' are allowed to collide with exports from `packages`. +#' +#' @examples +#' # will produce lints +#' code <- "function(x) {\n data <- x\n data\n}" +#' writeLines(code) +#' lint( +#' text = code, +#' linters = object_overwrite_linter() +#' ) +#' +#' code <- "function(x) {\n lint <- 'fun'\n lint\n}" +#' writeLines(code) +#' lint( +#' text = code, +#' linters = object_overwrite_linter(packages = "lintr") +#' ) +#' +#' # okay +#' code <- "function(x) {\n data('mtcars')\n}" +#' writeLines(code) +#' lint( +#' text = code, +#' linters = object_overwrite_linter() +#' ) +#' +#' code <- "function(x) {\n data <- x\n data\n}" +#' writeLines(code) +#' lint( +#' text = code, +#' linters = object_overwrite_linter(packages = "base") +#' ) +#' +#' # names in function signatures are ignored +#' lint( +#' text = "function(data) data <- subset(data, x > 0)", +#' linters = object_overwrite_linter() +#' ) +#' +#' @evalRd rd_tags("object_overwrite_linter") +#' @seealso +#' - [linters] for a complete list of linters available in lintr. +#' - +#' @export +object_overwrite_linter <- function( + packages = c("base", "stats", "utils", "tools", "methods", "graphics", "grDevices"), + allow_names = character()) { + for (package in packages) { + if (!requireNamespace(package, quietly = TRUE)) { + stop("Package '", package, "' is not available.") + } + } + pkg_exports <- lapply( + packages, + # .__C__ etc.: drop 150+ "virtual" names since they are very unlikely to appear anyway + function(pkg) setdiff(grep("^[.]__[A-Z]__", getNamespaceExports(pkg), value = TRUE, invert = TRUE), allow_names) + ) + pkg_exports <- data.frame( + package = rep(packages, lengths(pkg_exports)), + name = unlist(pkg_exports), + stringsAsFactors = FALSE + ) + + # test that the symbol doesn't match an argument name in the function + # NB: data.table := has parse token LEFT_ASSIGN as well + xpath <- glue(" + //SYMBOL[ + not(text() = ancestor::expr/preceding-sibling::SYMBOL_FORMALS/text()) + and ({ xp_text_in_table(pkg_exports$name) }) + ]/ + parent::expr[ + count(*) = 1 + and ( + following-sibling::LEFT_ASSIGN[text() != ':='] + or following-sibling::EQ_ASSIGN + or preceding-sibling::RIGHT_ASSIGN + ) + and ancestor::*[ + (self::expr or self::expr_or_assign_or_help or self::equal_assign) + and (preceding-sibling::FUNCTION or preceding-sibling::OP-LAMBDA) + ] + ] + ") + + Linter(function(source_expression) { + if (!is_lint_level(source_expression, "expression")) { + return(list()) + } + + xml <- source_expression$xml_parsed_content + + bad_expr <- xml_find_all(xml, xpath) + bad_symbol <- xml_text(xml_find_first(bad_expr, "SYMBOL")) + source_pkg <- pkg_exports$package[match(bad_symbol, pkg_exports$name)] + lint_message <- + sprintf("'%s' is an exported object from package '%s'. Avoid re-using such symbols.", bad_symbol, source_pkg) + + xml_nodes_to_lints( + bad_expr, + source_expression = source_expression, + lint_message = lint_message, + type = "warning" + ) + }) +} diff --git a/inst/lintr/linters.csv b/inst/lintr/linters.csv index 03c1968d8..aee3578f0 100644 --- a/inst/lintr/linters.csv +++ b/inst/lintr/linters.csv @@ -62,6 +62,7 @@ numeric_leading_zero_linter,style consistency readability nzchar_linter,efficiency best_practices consistency object_length_linter,style readability default configurable executing object_name_linter,style consistency default configurable executing +object_overwrite_linter,best_practices robustness readability configurable executing object_usage_linter,style readability correctness default executing configurable one_call_pipe_linter,style readability open_curly_linter,defunct diff --git a/man/best_practices_linters.Rd b/man/best_practices_linters.Rd index 67cd444ff..7e372d031 100644 --- a/man/best_practices_linters.Rd +++ b/man/best_practices_linters.Rd @@ -47,6 +47,7 @@ The following linters are tagged with 'best_practices': \item{\code{\link{literal_coercion_linter}}} \item{\code{\link{nonportable_path_linter}}} \item{\code{\link{nzchar_linter}}} +\item{\code{\link{object_overwrite_linter}}} \item{\code{\link{outer_negation_linter}}} \item{\code{\link{paste_linter}}} \item{\code{\link{pipe_return_linter}}} diff --git a/man/configurable_linters.Rd b/man/configurable_linters.Rd index 19899dd62..0f5adb229 100644 --- a/man/configurable_linters.Rd +++ b/man/configurable_linters.Rd @@ -33,6 +33,7 @@ The following linters are tagged with 'configurable': \item{\code{\link{nonportable_path_linter}}} \item{\code{\link{object_length_linter}}} \item{\code{\link{object_name_linter}}} +\item{\code{\link{object_overwrite_linter}}} \item{\code{\link{object_usage_linter}}} \item{\code{\link{paste_linter}}} \item{\code{\link{pipe_consistency_linter}}} diff --git a/man/executing_linters.Rd b/man/executing_linters.Rd index 04d6569e1..c6648a4f7 100644 --- a/man/executing_linters.Rd +++ b/man/executing_linters.Rd @@ -18,6 +18,7 @@ The following linters are tagged with 'executing': \item{\code{\link{namespace_linter}}} \item{\code{\link{object_length_linter}}} \item{\code{\link{object_name_linter}}} +\item{\code{\link{object_overwrite_linter}}} \item{\code{\link{object_usage_linter}}} \item{\code{\link{unused_import_linter}}} } diff --git a/man/linters.Rd b/man/linters.Rd index 66fee2b8f..f62f20f2a 100644 --- a/man/linters.Rd +++ b/man/linters.Rd @@ -17,20 +17,20 @@ see also \code{\link[=available_tags]{available_tags()}}. \section{Tags}{ The following tags exist: \itemize{ -\item{\link[=best_practices_linters]{best_practices} (60 linters)} +\item{\link[=best_practices_linters]{best_practices} (61 linters)} \item{\link[=common_mistakes_linters]{common_mistakes} (10 linters)} -\item{\link[=configurable_linters]{configurable} (34 linters)} +\item{\link[=configurable_linters]{configurable} (35 linters)} \item{\link[=consistency_linters]{consistency} (27 linters)} \item{\link[=correctness_linters]{correctness} (7 linters)} \item{\link[=default_linters]{default} (25 linters)} \item{\link[=deprecated_linters]{deprecated} (4 linters)} \item{\link[=efficiency_linters]{efficiency} (29 linters)} -\item{\link[=executing_linters]{executing} (5 linters)} +\item{\link[=executing_linters]{executing} (6 linters)} \item{\link[=package_development_linters]{package_development} (14 linters)} \item{\link[=pkg_testthat_linters]{pkg_testthat} (12 linters)} -\item{\link[=readability_linters]{readability} (60 linters)} +\item{\link[=readability_linters]{readability} (61 linters)} \item{\link[=regex_linters]{regex} (4 linters)} -\item{\link[=robustness_linters]{robustness} (16 linters)} +\item{\link[=robustness_linters]{robustness} (17 linters)} \item{\link[=style_linters]{style} (39 linters)} } } @@ -97,6 +97,7 @@ The following linters exist: \item{\code{\link{nzchar_linter}} (tags: best_practices, consistency, efficiency)} \item{\code{\link{object_length_linter}} (tags: configurable, default, executing, readability, style)} \item{\code{\link{object_name_linter}} (tags: configurable, consistency, default, executing, style)} +\item{\code{\link{object_overwrite_linter}} (tags: best_practices, configurable, executing, readability, robustness)} \item{\code{\link{object_usage_linter}} (tags: configurable, correctness, default, executing, readability, style)} \item{\code{\link{one_call_pipe_linter}} (tags: readability, style)} \item{\code{\link{outer_negation_linter}} (tags: best_practices, efficiency, readability)} diff --git a/man/object_overwrite_linter.Rd b/man/object_overwrite_linter.Rd new file mode 100644 index 000000000..86ccba5a9 --- /dev/null +++ b/man/object_overwrite_linter.Rd @@ -0,0 +1,70 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/object_overwrite_linter.R +\name{object_overwrite_linter} +\alias{object_overwrite_linter} +\title{Block assigning any variables whose name clashes with a \code{base} R function} +\usage{ +object_overwrite_linter( + packages = c("base", "stats", "utils", "tools", "methods", "graphics", "grDevices"), + allow_names = character() +) +} +\arguments{ +\item{packages}{Character vector of packages to search for names that should +be avoided. Defaults to the most common default packages: base, stats, +utils, tools, methods, graphics, and grDevices.} + +\item{allow_names}{Character vector of object names to ignore, i.e., which +are allowed to collide with exports from \code{packages}.} +} +\description{ +Re-using existing names creates a risk of subtle error best avoided. +Avoiding this practice also encourages using better, more descriptive names. +} +\examples{ +# will produce lints +code <- "function(x) {\n data <- x\n data\n}" +writeLines(code) +lint( + text = code, + linters = object_overwrite_linter() +) + +code <- "function(x) {\n lint <- 'fun'\n lint\n}" +writeLines(code) +lint( + text = code, + linters = object_overwrite_linter(packages = "lintr") +) + +# okay +code <- "function(x) {\n data('mtcars')\n}" +writeLines(code) +lint( + text = code, + linters = object_overwrite_linter() +) + +code <- "function(x) {\n data <- x\n data\n}" +writeLines(code) +lint( + text = code, + linters = object_overwrite_linter(packages = "base") +) + +# names in function signatures are ignored +lint( + text = "function(data) data <- subset(data, x > 0)", + linters = object_overwrite_linter() +) + +} +\seealso{ +\itemize{ +\item \link{linters} for a complete list of linters available in lintr. +\item \url{https://style.tidyverse.org/syntax.html#object-names} +} +} +\section{Tags}{ +\link[=best_practices_linters]{best_practices}, \link[=configurable_linters]{configurable}, \link[=executing_linters]{executing}, \link[=readability_linters]{readability}, \link[=robustness_linters]{robustness} +} diff --git a/man/readability_linters.Rd b/man/readability_linters.Rd index 0ce6547e9..101346b55 100644 --- a/man/readability_linters.Rd +++ b/man/readability_linters.Rd @@ -44,6 +44,7 @@ The following linters are tagged with 'readability': \item{\code{\link{nested_ifelse_linter}}} \item{\code{\link{numeric_leading_zero_linter}}} \item{\code{\link{object_length_linter}}} +\item{\code{\link{object_overwrite_linter}}} \item{\code{\link{object_usage_linter}}} \item{\code{\link{one_call_pipe_linter}}} \item{\code{\link{outer_negation_linter}}} diff --git a/man/robustness_linters.Rd b/man/robustness_linters.Rd index 18f12ca61..76882c4b1 100644 --- a/man/robustness_linters.Rd +++ b/man/robustness_linters.Rd @@ -20,6 +20,7 @@ The following linters are tagged with 'robustness': \item{\code{\link{missing_package_linter}}} \item{\code{\link{namespace_linter}}} \item{\code{\link{nonportable_path_linter}}} +\item{\code{\link{object_overwrite_linter}}} \item{\code{\link{routine_registration_linter}}} \item{\code{\link{sample_int_linter}}} \item{\code{\link{seq_linter}}} diff --git a/tests/testthat/test-object_overwrite_linter.R b/tests/testthat/test-object_overwrite_linter.R new file mode 100644 index 000000000..e434be321 --- /dev/null +++ b/tests/testthat/test-object_overwrite_linter.R @@ -0,0 +1,134 @@ +test_that("object_overwrite_linter skips allowed usages", { + linter <- object_overwrite_linter() + + expect_lint("function() DT <- data.frame(a = 1)", NULL, linter) + + # don't block names subassigned e.g. as columns or list elements + expect_lint("function() x$sd <- sd(rnorm(100))", NULL, linter) + + # These virtual names are ignored to slightly reduce the search space + expect_lint("function() .__C__logical <- TRUE", NULL, linter) +}) + +test_that("object_overwrite_linter blocks simple disallowed usages", { + linter <- object_overwrite_linter() + + expect_lint( + "foo <- function() data <- mtcars", + rex::rex("'data' is an exported object from package 'utils'."), + linter + ) + + expect_lint( + trim_some(" + foo <- function() { + sigma <- sd(rnorm(100)) + } + "), + rex::rex("'sigma' is an exported object from package 'stats'."), + linter + ) + + # not just the top level of the function + expect_lint( + trim_some(" + foo <- function() { + if (TRUE) { + sigma <- sd(rnorm(100)) + } else { + all <- FALSE + } + } + "), + list( + rex::rex("'sigma' is an exported object from package 'stats'."), + rex::rex("'all' is an exported object from package 'base'.") + ), + linter + ) +}) + +test_that("object_overwrite_linter skips any name assigned at the top level", { + linter <- object_overwrite_linter() + + expect_lint("data <- mtcars", NULL, linter) + expect_lint("sigma <- sd(rnorm(100))", NULL, linter) +}) + +test_that("object_overwrite_linter skips argument names", { + linter <- object_overwrite_linter() + + expect_lint("foo <- function(data) data <- data + 1", NULL, linter) + + expect_lint( + trim_some(" + bar <- function(a, b, c, sigma) { + sigma <- a * b * c ^ sigma + } + "), + NULL, + linter + ) +}) + +test_that("object_overwrite_linter skips data.table assignments with :=", { + expect_lint("foo <- function() x[, title := 4]", NULL, object_overwrite_linter()) +}) + +test_that("object_overwrite_linter optionally accepts package names", { + expect_lint("function() data <- 1", NULL, object_overwrite_linter(packages = "base")) + + expect_lint( + "function() lint <- TRUE", + rex::rex("'lint' is an exported object from package 'lintr'."), + object_overwrite_linter(packages = "lintr") + ) +}) + +test_that("non-<- assignments are detected", { + linter <- object_overwrite_linter() + lint_msg <- rex::rex("'sum' is an exported object from package 'base'.") + + expect_lint("function(x) sum(x) -> sum", lint_msg, linter) + expect_lint("function(x) sum <<- sum(x)", lint_msg, linter) + expect_lint("function(x) sum(x) ->> sum", lint_msg, linter) + expect_lint("function(x) sum = sum(x)", lint_msg, linter) +}) + +test_that("shorthand lambda is detected", { + skip_if_not_r_version("4.1.0") + + expect_lint("\\() data <- 1", "'data' is an exported object", object_overwrite_linter()) +}) + +test_that("allow_names= works to ignore certain symbols", { + expect_lint("function() data <- 1", NULL, object_overwrite_linter(allow_names = "data")) +}) + +test_that("lints vectorize", { + lines <- trim_some("{ + foo <- function() { + data <- 1 + var <- 2 + } + bar <- function(data) { + data <- data + 3 + sum <- 4 + } + }") + expect_lint( + lines, + list( + list(rex::rex("'data' is an exported object from package 'utils'."), line_number = 3L), + list(rex::rex("'var' is an exported object from package 'stats'."), line_number = 4L), + list(rex::rex("'sum' is an exported object from package 'base'."), line_number = 8L) + ), + object_overwrite_linter() + ) + + expect_lint( + lines, + list(rex::rex("'var' is an exported object from package 'stats'."), line_number = 4L), + object_overwrite_linter(packages = c("stats", "base"), allow_names = "sum") + ) +})