Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New sample_int_linter #2274

Merged
merged 5 commits into from
Nov 14, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ Collate:
'regex_subset_linter.R'
'repeat_linter.R'
'routine_registration_linter.R'
'sample_int_linter.R'
'scalar_in_linter.R'
'semicolon_linter.R'
'seq_linter.R'
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ export(redundant_ifelse_linter)
export(regex_subset_linter)
export(repeat_linter)
export(routine_registration_linter)
export(sample_int_linter)
export(sarif_output)
export(scalar_in_linter)
export(semicolon_linter)
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@

* More helpful errors for invalid configs (#2253, @MichaelChirico).

### New linters

* `sample_int_linter()` for encouraging `sample.int(n, ...)` over equivalents like `sample(1:n, ...)` (part of #884, @MichaelChirico).

### Lint accuracy fixes: removing false positives

* `unreachable_code_linter()` ignores reachable code in inline functions like `function(x) if (x > 2) stop() else x` (#2259, @MEO265).
Expand Down
60 changes: 60 additions & 0 deletions R/sample_int_linter.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#' Require usage of sample.int(n, m, ...) over sample(1:n, m, ...)
#'
#' [sample.int()] is preferable to `sample()` for the case of sampling numbers
#' between 1 and `n`. `sample` calls `sample.int()` "under the hood".
#'
#' @evalRd rd_tags("sample_int_linter")
#' @seealso [linters] for a complete list of linters available in lintr.
#' @export
sample_int_linter <- function() {
# looking for anything like sample(1: that doesn't come after a $ extraction
# exclude TRUE/FALSE for sample(replace = TRUE, ...) usage. better
# would be match.arg() but this also works.
xpath <- glue("
//SYMBOL_FUNCTION_CALL[text() = 'sample']
/parent::expr[not(OP-DOLLAR or OP-AT)]
/following-sibling::expr[1][
(
expr[1]/NUM_CONST[text() = '1' or text() = '1L']
and OP-COLON
)
or expr/SYMBOL_FUNCTION_CALL[text() = 'seq_len']
or (
expr/SYMBOL_FUNCTION_CALL[text() = 'seq']
and (
count(expr) = 2
or (
expr[2]/NUM_CONST[text() = '1' or text() = '1L']
and not(SYMBOL_SUB[
text() = 'by'
and not(following-sibling::expr[1]/NUM_CONST[text() = '1' or text() = '1L'])
])
)
)
)
or NUM_CONST[not(text() = 'TRUE' or text() = 'FALSE')]
]
/parent::expr
")

Linter(function(source_expression) {
if (!is_lint_level(source_expression, "expression")) {
return(list())
}

xml <- source_expression$xml_parsed_content

bad_expr <- xml_find_all(xml, xpath)
first_call <- xp_call_name(bad_expr, depth = 2L)
original <- sprintf("%s(n)", first_call)
original[!is.na(xml_find_first(bad_expr, "expr[2]/OP-COLON"))] <- "1:n"
original[!is.na(xml_find_first(bad_expr, "expr[2]/NUM_CONST"))] <- "n"

xml_nodes_to_lints(
bad_expr,
source_expression = source_expression,
lint_message = glue("sample.int(n, m, ...) is preferable to sample({original}, m, ...)."),
type = "warning"
)
})
}
1 change: 1 addition & 0 deletions inst/lintr/linters.csv
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ redundant_ifelse_linter,best_practices efficiency consistency configurable
regex_subset_linter,best_practices efficiency
repeat_linter,style readability
routine_registration_linter,best_practices efficiency robustness
sample_int_linter,efficiency readability robustness
scalar_in_linter,readability consistency best_practices efficiency
semicolon_linter,style readability default configurable
semicolon_terminator_linter,style readability deprecated configurable
Expand Down
1 change: 1 addition & 0 deletions man/efficiency_linters.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions man/linters.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/readability_linters.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/robustness_linters.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 18 additions & 0 deletions man/sample_int_linter.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

95 changes: 95 additions & 0 deletions tests/testthat/test-sample_int_linter.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
test_that("sample_int_linter skips allowed usages", {
linter <- sample_int_linter()

expect_lint("sample(n, m)", NULL, linter)
expect_lint("sample(n, m, TRUE)", NULL, linter)
expect_lint("sample(n, m, prob = 1:n/n)", NULL, linter)
expect_lint("sample(foo(x), m, TRUE)", NULL, linter)
expect_lint("sample(n, replace = TRUE)", NULL, linter)

expect_lint("sample(10:1, m)", NULL, linter)
})

test_that("sample_int_linter blocks simple disallowed usages", {
linter <- sample_int_linter()
lint_msg <- rex::rex("sample.int(n, m, ...) is preferable to sample(1:n, m, ...).")

expect_lint("sample(1:10, 2)", lint_msg, linter)
# also matches literal integer
expect_lint("sample(1L:10L, 2)", lint_msg, linter)
expect_lint("sample(1:n, 2)", lint_msg, linter)
expect_lint("sample(1:k, replace = TRUE)", lint_msg, linter)
expect_lint("sample(1:foo(x), prob = bar(x))", lint_msg, linter)
})

test_that("sample_int_linter blocks sample(seq_len(n), ...) as well", {
expect_lint(
"sample(seq_len(10), 2)",
rex::rex("sample.int(n, m, ...) is preferable to sample(seq_len(n), m, ...)."),
sample_int_linter()
)
})

test_that("sample_int_linter blocks sample(seq(n)) and sample(seq(1, ...))", {
linter <- sample_int_linter()
lint_msg <- rex::rex("sample.int(n, m, ...) is preferable to sample(seq(n), m, ...).")

expect_lint("sample(seq(n), 5)", lint_msg, linter)
expect_lint("sample(seq(1, 10), 5)", lint_msg, linter)
expect_lint("sample(seq(1, 10, by = 1), 5)", lint_msg, linter)
expect_lint("sample(seq(1L, 10, by = 1L), 5)", lint_msg, linter)

# lint doesn't apply when by= is used (except when set to literal 1)
expect_lint("sample(seq(1, 10, by = 2), 5)", NULL, linter)
expect_lint("sample(seq(1, 10, by = n), 5)", NULL, linter)
})

test_that("sample_int_linter catches literal integer/numeric in the first arg", {
linter <- sample_int_linter()
lint_msg <- rex::rex("sample.int(n, m, ...) is preferable to sample(n, m, ...).")

expect_lint("sample(10L, 4)", lint_msg, linter)
expect_lint("sample(10, 5)", lint_msg, linter)
})

test_that("sample_int_linter skips TRUE or FALSE in the first argument", {
linter <- sample_int_linter()

expect_lint("sample(replace = TRUE, letters)", NULL, linter)
expect_lint("sample(replace = FALSE, letters)", NULL, linter)
})

test_that("sample_int_linter skips x$sample() usage", {
linter <- sample_int_linter()
lint_msg <- rex::rex("sample.int(n, m, ...) is preferable to sample(n, m, ...).")

expect_lint("foo$sample(1L)", NULL, linter)
expect_lint("foo$sample(1:10)", NULL, linter)
expect_lint("foo$sample(seq_len(10L))", NULL, linter)
# ditto for '@' slot extraction
expect_lint("foo@sample(1L)", NULL, linter)

# however, base::sample qualification is still caught
expect_lint("base::sample(10L)", lint_msg, linter)

# but also, not everything "below" a $ extraction is skipped
expect_lint("foo$bar(sample(10L))", lint_msg, linter)
})

test_that("multiple lints are generated correctly", {
expect_lint(
trim_some("{
sample(1:10, 2)
sample(10, 2)
sample(seq_len(10), 2)
sample(seq(10), 2)
}"),
list(
rex::rex("sample(1:n"),
rex::rex("sample(n"),
rex::rex("sample(seq_len(n)"),
rex::rex("sample(seq(n)")
),
sample_int_linter()
)
})
Loading