From 9c68874921eb7b29af6ddc98dc7d501636590d6f Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 10 Nov 2023 23:41:36 +0000 Subject: [PATCH 1/2] New sample_int_linter --- DESCRIPTION | 1 + NAMESPACE | 1 + NEWS.md | 4 ++ R/sample_int_linter.R | 60 ++++++++++++++++ inst/lintr/linters.csv | 1 + man/efficiency_linters.Rd | 1 + man/linters.Rd | 7 +- man/readability_linters.Rd | 1 + man/robustness_linters.Rd | 1 + man/sample_int_linter.Rd | 18 +++++ tests/testthat/test-sample_int_linter.R | 95 +++++++++++++++++++++++++ 11 files changed, 187 insertions(+), 3 deletions(-) create mode 100644 R/sample_int_linter.R create mode 100644 man/sample_int_linter.Rd create mode 100644 tests/testthat/test-sample_int_linter.R diff --git a/DESCRIPTION b/DESCRIPTION index 5fbd73edd..c2b5664c2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -155,6 +155,7 @@ Collate: 'regex_subset_linter.R' 'repeat_linter.R' 'routine_registration_linter.R' + 'sample_int_linter.R' 'scalar_in_linter.R' 'semicolon_linter.R' 'seq_linter.R' diff --git a/NAMESPACE b/NAMESPACE index c2cc030ca..96fc778bf 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -118,6 +118,7 @@ export(redundant_ifelse_linter) export(regex_subset_linter) export(repeat_linter) export(routine_registration_linter) +export(sample_int_linter) export(sarif_output) export(scalar_in_linter) export(semicolon_linter) diff --git a/NEWS.md b/NEWS.md index fc83535fb..a2ae65bab 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,10 @@ * More helpful errors for invalid configs (#2253, @MichaelChirico). +### New linters + +* `sample_int_linter()` for encouraging `sample.int(n, ...)` over equivalents like `sample(1:n, ...)` (part of #884, @MichaelChirico). + ### Lint accuracy fixes: removing false positives * `unreachable_code_linter()` ignores reachable code in inline functions like `function(x) if (x > 2) stop() else x` (#2259, @MEO265). diff --git a/R/sample_int_linter.R b/R/sample_int_linter.R new file mode 100644 index 000000000..a9da00f6f --- /dev/null +++ b/R/sample_int_linter.R @@ -0,0 +1,60 @@ +#' Require usage of sample.int(n, m, ...) over sample(1:n, m, ...) +#' +#' [sample.int()] is preferable to `sample()` for the case of sampling numbers +#' between 1 and `n`. `sample` calls `sample.int()` "under the hood". +#' +#' @evalRd rd_tags("sample_int_linter") +#' @seealso [linters] for a complete list of linters available in lintr. +#' @export +sample_int_linter <- function() { + # looking for anything like sample(1: that doesn't come after a $ extraction + # exclude TRUE/FALSE for sample(replace = TRUE, ...) usage. better + # would be match.arg() but this also works. + xpath <- glue(" + //SYMBOL_FUNCTION_CALL[text() = 'sample'] + /parent::expr[not(OP-DOLLAR or OP-AT)] + /following-sibling::expr[1][ + ( + expr[1]/NUM_CONST[text() = '1' or text() = '1L'] + and OP-COLON + ) + or expr/SYMBOL_FUNCTION_CALL[text() = 'seq_len'] + or ( + expr/SYMBOL_FUNCTION_CALL[text() = 'seq'] + and ( + count(expr) = 2 + or ( + expr[2]/NUM_CONST[text() = '1' or text() = '1L'] + and not(SYMBOL_SUB[ + text() = 'by' + and not(following-sibling::expr[1]/NUM_CONST[text() = '1' or text() = '1L']) + ]) + ) + ) + ) + or NUM_CONST[not(text() = 'TRUE' or text() = 'FALSE')] + ] + /parent::expr + ") + + Linter(function(source_expression) { + if (!is_lint_level(source_expression, "expression")) { + return(list()) + } + + xml <- source_expression$xml_parsed_content + + bad_expr <- xml_find_all(xml, xpath) + first_call <- xp_call_name(bad_expr, depth = 2L) + original <- sprintf("%s(n)", first_call) + original[!is.na(xml_find_first(bad_expr, "expr[2]/OP-COLON"))] <- "1:n" + original[!is.na(xml_find_first(bad_expr, "expr[2]/NUM_CONST"))] <- "n" + + xml_nodes_to_lints( + bad_expr, + source_expression = source_expression, + lint_message = glue("sample.int(n, m, ...) is preferable to sample({original}, m, ...)."), + type = "warning" + ) + }) +} diff --git a/inst/lintr/linters.csv b/inst/lintr/linters.csv index 4df2b4ecd..99a053a08 100644 --- a/inst/lintr/linters.csv +++ b/inst/lintr/linters.csv @@ -75,6 +75,7 @@ redundant_ifelse_linter,best_practices efficiency consistency configurable regex_subset_linter,best_practices efficiency repeat_linter,style readability routine_registration_linter,best_practices efficiency robustness +sample_int_linter,efficiency readability robustness scalar_in_linter,readability consistency best_practices efficiency semicolon_linter,style readability default configurable semicolon_terminator_linter,style readability deprecated configurable diff --git a/man/efficiency_linters.Rd b/man/efficiency_linters.Rd index 7122f90e8..146c0be54 100644 --- a/man/efficiency_linters.Rd +++ b/man/efficiency_linters.Rd @@ -28,6 +28,7 @@ The following linters are tagged with 'efficiency': \item{\code{\link{redundant_ifelse_linter}}} \item{\code{\link{regex_subset_linter}}} \item{\code{\link{routine_registration_linter}}} +\item{\code{\link{sample_int_linter}}} \item{\code{\link{scalar_in_linter}}} \item{\code{\link{seq_linter}}} \item{\code{\link{sort_linter}}} diff --git a/man/linters.Rd b/man/linters.Rd index d0fb9943b..6e5e929f2 100644 --- a/man/linters.Rd +++ b/man/linters.Rd @@ -24,12 +24,12 @@ The following tags exist: \item{\link[=correctness_linters]{correctness} (7 linters)} \item{\link[=default_linters]{default} (25 linters)} \item{\link[=deprecated_linters]{deprecated} (8 linters)} -\item{\link[=efficiency_linters]{efficiency} (25 linters)} +\item{\link[=efficiency_linters]{efficiency} (26 linters)} \item{\link[=executing_linters]{executing} (5 linters)} \item{\link[=package_development_linters]{package_development} (14 linters)} \item{\link[=pkg_testthat_linters]{pkg_testthat} (12 linters)} -\item{\link[=readability_linters]{readability} (54 linters)} -\item{\link[=robustness_linters]{robustness} (14 linters)} +\item{\link[=readability_linters]{readability} (55 linters)} +\item{\link[=robustness_linters]{robustness} (15 linters)} \item{\link[=style_linters]{style} (38 linters)} } } @@ -107,6 +107,7 @@ The following linters exist: \item{\code{\link{regex_subset_linter}} (tags: best_practices, efficiency)} \item{\code{\link{repeat_linter}} (tags: readability, style)} \item{\code{\link{routine_registration_linter}} (tags: best_practices, efficiency, robustness)} +\item{\code{\link{sample_int_linter}} (tags: efficiency, readability, robustness)} \item{\code{\link{scalar_in_linter}} (tags: best_practices, consistency, efficiency, readability)} \item{\code{\link{semicolon_linter}} (tags: configurable, default, readability, style)} \item{\code{\link{seq_linter}} (tags: best_practices, consistency, default, efficiency, robustness)} diff --git a/man/readability_linters.Rd b/man/readability_linters.Rd index ac207c223..cbe578d2d 100644 --- a/man/readability_linters.Rd +++ b/man/readability_linters.Rd @@ -52,6 +52,7 @@ The following linters are tagged with 'readability': \item{\code{\link{quotes_linter}}} \item{\code{\link{redundant_equals_linter}}} \item{\code{\link{repeat_linter}}} +\item{\code{\link{sample_int_linter}}} \item{\code{\link{scalar_in_linter}}} \item{\code{\link{semicolon_linter}}} \item{\code{\link{sort_linter}}} diff --git a/man/robustness_linters.Rd b/man/robustness_linters.Rd index 9581e5d7f..728114b33 100644 --- a/man/robustness_linters.Rd +++ b/man/robustness_linters.Rd @@ -21,6 +21,7 @@ The following linters are tagged with 'robustness': \item{\code{\link{namespace_linter}}} \item{\code{\link{nonportable_path_linter}}} \item{\code{\link{routine_registration_linter}}} +\item{\code{\link{sample_int_linter}}} \item{\code{\link{seq_linter}}} \item{\code{\link{strings_as_factors_linter}}} \item{\code{\link{T_and_F_symbol_linter}}} diff --git a/man/sample_int_linter.Rd b/man/sample_int_linter.Rd new file mode 100644 index 000000000..ed4c7a7fc --- /dev/null +++ b/man/sample_int_linter.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sample_int_linter.R +\name{sample_int_linter} +\alias{sample_int_linter} +\title{Require usage of sample.int(n, m, ...) over sample(1:n, m, ...)} +\usage{ +sample_int_linter() +} +\description{ +\code{\link[=sample.int]{sample.int()}} is preferable to \code{sample()} for the case of sampling numbers +between 1 and \code{n}. \code{sample} calls \code{sample.int()} "under the hood". +} +\seealso{ +\link{linters} for a complete list of linters available in lintr. +} +\section{Tags}{ +\link[=efficiency_linters]{efficiency}, \link[=readability_linters]{readability}, \link[=robustness_linters]{robustness} +} diff --git a/tests/testthat/test-sample_int_linter.R b/tests/testthat/test-sample_int_linter.R new file mode 100644 index 000000000..7e7c4c3ba --- /dev/null +++ b/tests/testthat/test-sample_int_linter.R @@ -0,0 +1,95 @@ +test_that("sample_int_linter skips allowed usages", { + linter <- sample_int_linter() + + expect_lint("sample(n, m)", NULL, linter) + expect_lint("sample(n, m, TRUE)", NULL, linter) + expect_lint("sample(n, m, prob = 1:n/n)", NULL, linter) + expect_lint("sample(foo(x), m, TRUE)", NULL, linter) + expect_lint("sample(n, replace = TRUE)", NULL, linter) + + expect_lint("sample(10:1, m)", NULL, linter) +}) + +test_that("sample_int_linter blocks simple disallowed usages", { + linter <- sample_int_linter() + lint_msg <- rex::rex("sample.int(n, m, ...) is preferable to sample(1:n, m, ...).") + + expect_lint("sample(1:10, 2)", lint_msg, linter) + # also matches literal integer + expect_lint("sample(1L:10L, 2)", lint_msg, linter) + expect_lint("sample(1:n, 2)", lint_msg, linter) + expect_lint("sample(1:k, replace = TRUE)", lint_msg, linter) + expect_lint("sample(1:foo(x), prob = bar(x))", lint_msg, linter) +}) + +test_that("sample_int_linter blocks sample(seq_len(n), ...) as well", { + expect_lint( + "sample(seq_len(10), 2)", + rex::rex("sample.int(n, m, ...) is preferable to sample(seq_len(n), m, ...)."), + sample_int_linter() + ) +}) + +test_that("sample_int_linter blocks sample(seq(n)) and sample(seq(1, ...))", { + linter <- sample_int_linter() + lint_msg <- rex::rex("sample.int(n, m, ...) is preferable to sample(seq(n), m, ...).") + + expect_lint("sample(seq(n), 5)", lint_msg, linter) + expect_lint("sample(seq(1, 10), 5)", lint_msg, linter) + expect_lint("sample(seq(1, 10, by = 1), 5)", lint_msg, linter) + expect_lint("sample(seq(1L, 10, by = 1L), 5)", lint_msg, linter) + + # lint doesn't apply when by= is used (except when set to literal 1) + expect_lint("sample(seq(1, 10, by = 2), 5)", NULL, linter) + expect_lint("sample(seq(1, 10, by = n), 5)", NULL, linter) +}) + +test_that("sample_int_linter catches literal integer/numeric in the first arg", { + linter <- sample_int_linter() + lint_msg <- rex::rex("sample.int(n, m, ...) is preferable to sample(n, m, ...).") + + expect_lint("sample(10L, 4)", lint_msg, linter) + expect_lint("sample(10, 5)", lint_msg, linter) +}) + +test_that("sample_int_linter skips TRUE or FALSE in the first argument", { + linter <- sample_int_linter() + + expect_lint("sample(replace = TRUE, letters)", NULL, linter) + expect_lint("sample(replace = FALSE, letters)", NULL, linter) +}) + +test_that("sample_int_linter skips x$sample() usage", { + linter <- sample_int_linter() + lint_msg <- rex::rex("sample.int(n, m, ...) is preferable to sample(n, m, ...).") + + expect_lint("foo$sample(1L)", NULL, linter) + expect_lint("foo$sample(1:10)", NULL, linter) + expect_lint("foo$sample(seq_len(10L))", NULL, linter) + # ditto for '@' slot extraction + expect_lint("foo@sample(1L)", NULL, linter) + + # however, base::sample qualification is still caught + expect_lint("base::sample(10L)", lint_msg, linter) + + # but also, not everything "below" a $ extraction is skipped + expect_lint("foo$bar(sample(10L))", lint_msg, linter) +}) + +test_that("multiple lints are generated correctly", { + expect_lint( + trim_some("{ + sample(1:10, 2) + sample(10, 2) + sample(seq_len(10), 2) + sample(seq(10), 2) + }"), + list( + rex::rex("sample(1:n"), + rex::rex("sample(n"), + rex::rex("sample(seq_len(n)"), + rex::rex("sample(seq(n)") + ), + sample_int_linter() + ) +}) From d6993ca03828fe779690c01787c41da792e2b36c Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sun, 12 Nov 2023 17:35:12 +0000 Subject: [PATCH 2/2] add metadata regression test --- tests/testthat/test-sample_int_linter.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-sample_int_linter.R b/tests/testthat/test-sample_int_linter.R index 7e7c4c3ba..bf217cbee 100644 --- a/tests/testthat/test-sample_int_linter.R +++ b/tests/testthat/test-sample_int_linter.R @@ -85,10 +85,10 @@ test_that("multiple lints are generated correctly", { sample(seq(10), 2) }"), list( - rex::rex("sample(1:n"), - rex::rex("sample(n"), - rex::rex("sample(seq_len(n)"), - rex::rex("sample(seq(n)") + list(rex::rex("sample(1:n"), line_number = 2L, column_number = 3L), + list(rex::rex("sample(n"), line_number = 3L, column_number = 3L), + list(rex::rex("sample(seq_len(n)"), line_number = 4L, column_number = 3L), + list(rex::rex("sample(seq(n)"), line_number = 5L, column_number = 3L) ), sample_int_linter() )