From 7b1c6489f56eb7132d49743ca65dd0bfe45e2422 Mon Sep 17 00:00:00 2001 From: Andrew Bruce Date: Fri, 6 Dec 2024 22:17:23 -0800 Subject: [PATCH] * first pass at cms_distributions reimplementation --- DESCRIPTION | 4 +- NAMESPACE | 4 +- R/cms.R | 2 +- R/generated-globals.R | 12 ++- R/provider-package.R | 3 +- R/reassignments.R | 2 +- R/utils.R | 122 ++++++++++++++++------------ R/zzz.R | 100 +++++++++++++++++++++++ data-raw/dev/distro.R | 142 +++++++++++++++++++++++++++++++++ man/cms_distributions.Rd | 12 +++ pkgdown/_pkgdown.yml | 4 +- tests/testthat/_snaps/utils.md | 2 +- tests/testthat/test-utils.R | 4 +- 13 files changed, 346 insertions(+), 67 deletions(-) create mode 100644 R/zzz.R create mode 100644 data-raw/dev/distro.R create mode 100644 man/cms_distributions.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 6e9a6e81..23d0d312 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -17,6 +17,8 @@ Depends: R (>= 4.1.0) Imports: cli, + collapse, + arrow, dplyr, furrr, httr2, @@ -31,9 +33,9 @@ Imports: stringr, tidyr, vctrs, + data.table, zeallot Suggests: - data.table, fipio, fontawesome, fuimus, diff --git a/NAMESPACE b/NAMESPACE index 9bc4af5f..bb9f1deb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,6 +8,7 @@ export(betos) export(change) export(chg) export(clinicians) +export(cms_distributions) export(compare_hcpcs) export(df2chr) export(display_long) @@ -54,7 +55,8 @@ export(utilization) export(utilization_) export(years_df) export(years_vec) +import(rlang) +importFrom(collapse,"%==%") importFrom(lifecycle,deprecated) -importFrom(rlang,"%||%") importFrom(stringi,"%s+%") importFrom(zeallot,"%<-%") diff --git a/R/cms.R b/R/cms.R index 07d747bc..e9472973 100644 --- a/R/cms.R +++ b/R/cms.R @@ -98,7 +98,7 @@ format_param <- function(param, arg, type = "filter") { rlang::check_required(param) rlang::check_required(arg) - rlang::arg_match(type, c("filter", "sql")) + rlang::arg_match0(type, c("filter", "sql")) if (type %in% 'filter') {out <- paste0("filter[", param, "]=", arg)} if (type %in% 'sql') {out <- paste0("[WHERE ", param, " = ", "%22", arg, "%22", "]")} diff --git a/R/generated-globals.R b/R/generated-globals.R index a001ff3f..9dbfe975 100644 --- a/R/generated-globals.R +++ b/R/generated-globals.R @@ -5,10 +5,6 @@ utils::globalVariables(c( ".name_1", # ".pdi_1", - # - # - # - ":=", # "a2la", # @@ -148,14 +144,19 @@ utils::globalVariables(c( "Description", # # + # "distribution", # + # "distribution_accessURL", # + # "distribution_format", # + # "distribution_modified", # + # "distribution_title", # # @@ -249,6 +250,7 @@ utils::globalVariables(c( # # # + # "modified", # # @@ -394,6 +396,7 @@ utils::globalVariables(c( # # # + # "title", # "title._value", @@ -453,6 +456,7 @@ utils::globalVariables(c( # # # + # "year", # "zip", diff --git a/R/provider-package.R b/R/provider-package.R index 8575873e..fe4fe508 100644 --- a/R/provider-package.R +++ b/R/provider-package.R @@ -5,6 +5,7 @@ #' @importFrom lifecycle deprecated #' @importFrom zeallot %<-% #' @importFrom stringi %s+% -#' @importFrom rlang %||% +#' @importFrom collapse %==% +#' @import rlang ## usethis namespace: end NULL diff --git a/R/reassignments.R b/R/reassignments.R index 7ebfbe9a..5c605e96 100644 --- a/R/reassignments.R +++ b/R/reassignments.R @@ -79,7 +79,7 @@ reassignments <- function(npi = NULL, enid_org <- enid_org %nn% check_enid(enid_org) if (!is.null(entry)) { - entry <- rlang::arg_match(entry, c("E", "R")) + entry <- rlang::arg_match0(entry, c("E", "R")) entry <- dplyr::case_match(entry, "E" ~ "Physician Assistant", "R" ~ "Reassignment") } diff --git a/R/utils.R b/R/utils.R index c88f9198..4c1347f1 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,39 +1,42 @@ -#' Infix operator for `if (!is.null(x)) y else x` statements +#' Infix if (!is.null(x)) y else x +#' #' @param x,y description -#' @return description +#' +#' @returns y if x is not NULL, otherwise x +#' #' @examples #' ccn <- 123456 -#' ccn <- ccn %nn% as.character(ccn) -#' ccn +#' +#' ccn %nn% as.character(ccn) +#' +#' NULL %nn% as.character(ccn) +#' #' @autoglobal +#' #' @noRd -`%nn%` <- function(x, y) if (!is.null(x)) y else x #nocov - -#' Infix operator for `not in` statements -#' @return description -#' @autoglobal -#' @noRd -`%nin%` <- function(x, table) match(x, table, nomatch = 0L) == 0L #nocov +`%nn%` <- \(x, y) if (!is.null(x)) y else x #nocov #' Format US ZIP codes -#' @param zip Nine-digit US ZIP code -#' @return ZIP code, hyphenated for ZIP+4 or 5-digit ZIP. +#' +#' @param x Nine-digit US ZIP code +#' +#' @returns ZIP code, hyphenated for ZIP+4 or 5-digit ZIP. +#' #' @examples #' format_zipcode(123456789) #' format_zipcode(12345) +#' #' @autoglobal +#' #' @noRd -format_zipcode <- function(zip) { +format_zipcode <- function(x) { - zip <- as.character(zip) + stopifnot(is.character(x)) + + if (grepl("^[0-9]{9}$", x)) + paste0(substr(x, 1, 5), "-", substr(x, 6, 9)) + else x - if (stringr::str_detect(zip, "^[[:digit:]]{9}$") == TRUE) { - zip <- paste0(stringr::str_sub(zip, 1, 5), "-", - stringr::str_sub(zip, 6, 9)) - return(zip) - } else { - return(zip) - } } #' Remove periods from credentials @@ -92,37 +95,43 @@ tf_2_yn <- function(x) { x, TRUE ~ "Y", FALSE ~ "N", - .default = NULL + .default = NA_character_ ) } #' @param abb state abbreviation -#' @return state full name +#' +#' @returns state full name +#' #' @autoglobal +#' #' @noRd abb2full <- function(abb, arg = rlang::caller_arg(abb), call = rlang::caller_env()) { - results <- dplyr::tibble(x = c(state.abb[1:8], - 'DC', - state.abb[9:50], - 'AS', 'GU', 'MP', 'PR', 'VI', 'UK'), - y = c(state.name[1:8], - 'District of Columbia', - state.name[9:50], - 'American Samoa', - 'Guam', - 'Northern Mariana Islands', - 'Puerto Rico', - 'Virgin Islands', - 'Unknown')) |> + results <- dplyr::tibble( + x = c(state.abb[1:8], + 'DC', + state.abb[9:50], + 'AS', 'GU', 'MP', 'PR', 'VI', 'UK'), + y = c(state.name[1:8], + 'District of Columbia', + state.name[9:50], + 'American Samoa', + 'Guam', + 'Northern Mariana Islands', + 'Puerto Rico', + 'Virgin Islands', + 'Unknown')) |> dplyr::filter(x == abb) |> dplyr::pull(y) if (vctrs::vec_is_empty(results)) { - cli::cli_abort(c("{.val {abb}} is not a valid state abbreviation."), # nolint - call = call) + cli::cli_abort( + c("{.arg {arg}} is not a valid state abbreviation."), # nolint + arg = arg, + call = call) } return(results) } @@ -135,7 +144,10 @@ abb2full <- function(abb, #' @keywords internal display_long <- function(df, cols = dplyr::everything()) { - df |> dplyr::mutate(dplyr::across(dplyr::everything(), as.character)) |> + df |> + dplyr::mutate( + dplyr::across( + dplyr::everything(), as.character)) |> tidyr::pivot_longer({{ cols }}) } @@ -212,11 +224,13 @@ tidyup <- function(df, #' @noRd combine <- function(df, nm, cols, sep = " ") { - return(tidyr::unite(df, col = {{ nm }}, - dplyr::any_of(cols), - remove = TRUE, - na.rm = TRUE, - sep = sep)) + tidyr::unite( + df, + col = {{ nm }}, + dplyr::any_of(cols), + remove = TRUE, + na.rm = TRUE, + sep = sep) } #' Remove empty rows and columns @@ -233,13 +247,15 @@ narm <- function(df) { #' @noRd format_cli <- function(df) { - x <- purrr::map2(df$x, - df$y, - stringr::str_c, - sep = " = ", - collapse = "") - - cli::cli_alert_danger("No results for {.val {x}}", - wrap = TRUE) + x <- purrr::map2( + df$x, + df$y, + stringr::str_c, + sep = " = ", + collapse = "") + cli::cli_alert_danger( + "No results for {.val {x}}", + wrap = TRUE + ) } diff --git a/R/zzz.R b/R/zzz.R new file mode 100644 index 00000000..4bb47107 --- /dev/null +++ b/R/zzz.R @@ -0,0 +1,100 @@ +.onLoad <- function(libname, pkgname) { + .__distros <<- cms_distributions() +} + +.onUnload <- function(libpath) { + remove(".__distros", envir = .GlobalEnv) +} + +#' Retrieve the latest CMS distributions +#' +#' @autoglobal +#' +#' @keywords internal +#' +#' @export +cms_distributions <- \() { + + datasets <- c( + hospitals = "^Hospital Enrollments", + providers = "^Medicare Fee-For-Service Public Provider Enrollment", + crosswalk = "^Medicare Provider and Supplier Taxonomy Crosswalk", + orderrefer = "^Order and Referring", + pending = "^Pending Initial Logging and Tracking", + quality_payment = "^Quality Payment Program Experience", + rbcs = "^Restructured BETOS Classification System", + reassignment = "^Revalidation", # Clinic Group Practice Reassignment", + optout = "^Opt Out Affidavits", + utilization = "^Medicare Physician & Other Practitioners", + outpatient = "^Medicare Outpatient Hospitals", + laboratories = "^Provider of Services File - Clinical Laboratories", + beneficiaries = "^Medicare Monthly Enrollment", + prescribers = "^Medicare Part D Prescribers" + ) + + resp <- httr2::request("https://data.cms.gov/data.json") |> + httr2::req_perform() + + resp <- as.Date( + regmatches(resp[["headers"]][["Last-Modified"]], + regexpr("[0-9]{2} [A-Za-z]{3} [0-9]{4}", + resp[["headers"]][["Last-Modified"]], + perl = TRUE)), + format = "%d %b %Y") + + arrow_cms <- arrow::read_json_arrow( + file = "https://data.cms.gov/data.json", + col_select = c("dataset"), + as_data_frame = TRUE) + + arrow_cms <- arrow_cms[["dataset"]][[1]][ + c("title", "modified", "distribution")] |> + collapse::fsubset( + grepl( + paste0( + unname(datasets), + collapse = "|"), + title, + perl = TRUE + ) + ) |> + tidyr::unnest( + cols = distribution, + names_sep = "_", + keep_empty = TRUE) |> + collapse::fsubset( + distribution_format %==% "API") |> + collapse::fcompute( + year = as.integer( + regmatches( + distribution_title, + regexpr( + "\\d{4}(?=-\\d{2}-\\d{2})", + distribution_title, + perl = TRUE))), + modified = as.Date.character(distribution_modified), + distribution = regmatches( + distribution_accessURL, + regexpr( + "(?<=dataset/).*?(?=/data)", + distribution_accessURL, + perl = TRUE)), + keep = "title") |> + collapse::fsubset( + data.table::rleid( + title, + year, + modified) != collapse::flag( + data.table::rleid( + title, + year, + modified + ), + -1) + ) + + list( + last_modified = resp, + distributions = arrow_cms + ) +} diff --git a/data-raw/dev/distro.R b/data-raw/dev/distro.R new file mode 100644 index 00000000..be5f83d2 --- /dev/null +++ b/data-raw/dev/distro.R @@ -0,0 +1,142 @@ +`%==%` <- collapse::`%==%` + +datasets <- c( + hospitals = "^Hospital Enrollments", + providers = "^Medicare Fee-For-Service Public Provider Enrollment", + crosswalk = "^Medicare Provider and Supplier Taxonomy Crosswalk", + orderrefer = "^Order and Referring", + pending = "^Pending Initial Logging and Tracking", + quality_payment = "^Quality Payment Program Experience", + rbcs = "^Restructured BETOS Classification System", + reassignment = "^Revalidation Clinic Group Practice Reassignment", + optout = "^Opt Out Affidavits", + utilization = "^Medicare Physician & Other Practitioners", + outpatient = "^Medicare Outpatient Hospitals", + laboratories = "^Provider of Services File - Clinical Laboratories", + beneficiaries = "^Medicare Monthly Enrollment", + prescribers = "^Medicare Part D Prescribers" +) + +cms_distro <- \() { + resp <- httr2::request("https://data.cms.gov/data.json") |> + httr2::req_perform() + + resp <- arknpi::as_date( + regmatches(resp[["headers"]][["Last-Modified"]], + regexpr("[0-9]{2} [A-Za-z]{3} [0-9]{4}", + resp[["headers"]][["Last-Modified"]], + perl = TRUE)), + fmt = "%d %b %Y") + + arrow_cms <- arrow::read_json_arrow( + file = "https://data.cms.gov/data.json", + col_select = c("dataset")) + + arrow_cms <- arrow_cms[["dataset"]][[1]][ + c("title", "modified", "distribution")] |> + collapse::fsubset( + codex::sf_detect( + title, + codex::sf_smush( + unname(datasets), "|") + ) + ) |> + tidyr::unnest( + cols = distribution, + names_sep = "_", + keep_empty = TRUE) |> + collapse::fsubset( + distribution_format %==% "API") |> + collapse::fcompute( + year = as.integer( + regmatches( + distribution_title, + regexpr( + "\\d{4}(?=-\\d{2}-\\d{2})", + distribution_title, + perl = TRUE))), + modified = as.Date.character(distribution_modified), + distribution = regmatches( + distribution_accessURL, + regexpr( + "(?<=dataset/).*?(?=/data)", + distribution_accessURL, + perl = TRUE)), + keep = "title") |> + collapse::fsubset( + data.table::rleid( + title, + year, + modified) != collapse::flag( + data.table::rleid( + title, + year, + modified + ), + -1) + ) + + list( + last_modified = resp, + distributions = arrow_cms + ) +} + +c( + "Managing Clinician Aggregation Group Performance", + "Quarterly Prescription Drug Plan Formulary, Pharmacy Network, and Pricing Information", + "Medicare Clinical Laboratory Fee Schedule Private Payer Rates and Volumes", + "Hospital Price Transparency Enforcement Activities and Outcomes", + "Federally Qualified Health Center Enrollments", + "Rural Health Clinic Enrollments" +) + + +body <- RcppSimdJson::fparse( + resp$body +)[["dataset"]][ + c("title", + "modified", + "distribution")] + + + +unnest_dt <- function(tbl, col) { + + tbl <- data.table::as.data.table(tbl) + + col <- rlang::ensyms(col) + + col_names <- rlang::syms(setdiff(colnames(tbl), as.character(col))) + + tbl <- rlang::eval( + rlang::expr( + tbl[, + as.character(unlist(!!!col)), + by = list(!!!col_names)]) + ) + + colnames(tbl) <- c(as.character(col_names), as.character(col)) + + tbl +} + + +unnest_dt2 <- function(tbl, ...) { + + tbl <- data.table::as.data.table(tbl) + + col <- rlang::ensyms(...) + + clnms <- rlang::syms(setdiff(colnames(tbl), as.character(col))) + + tbl <- data.table::as.data.table(tbl) + + tbl <- rlang::eval( + rlang::expr(tbl[, lapply(.SD, unlist), by = list(!!!clnms), .SDcols = as.character(col)]) + ) + + colnames(tbl) <- c(as.character(clnms), as.character(col)) + + tbl +} diff --git a/man/cms_distributions.Rd b/man/cms_distributions.Rd new file mode 100644 index 00000000..65b67d2b --- /dev/null +++ b/man/cms_distributions.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/zzz.R +\name{cms_distributions} +\alias{cms_distributions} +\title{Retrieve the latest CMS distributions} +\usage{ +cms_distributions() +} +\description{ +Retrieve the latest CMS distributions +} +\keyword{internal} diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index 3ebc2987..be3f4500 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -5,8 +5,8 @@ template: light-switch: true bootswatch: simplex bslib: - base_font: {google: "IBM Plex Sans"} - heading_font: {google: "Kanit"} + base_font: {google: "Roboto Condensed"} + heading_font: {google: "Roboto"} code_font: {google: "JetBrains Mono"} home: diff --git a/tests/testthat/_snaps/utils.md b/tests/testthat/_snaps/utils.md index e8f6eea5..839ca42e 100644 --- a/tests/testthat/_snaps/utils.md +++ b/tests/testthat/_snaps/utils.md @@ -4,7 +4,7 @@ abb2full("YN") Condition Error: - ! "YN" is not a valid state abbreviation. + ! `"YN"` is not a valid state abbreviation. # format_param() works diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 48043f3d..23a15a66 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -1,6 +1,6 @@ test_that("format_zipcode() works", { - expect_equal(format_zipcode(123456789), "12345-6789") - expect_equal(format_zipcode(12345), "12345") + expect_equal(format_zipcode("123456789"), "12345-6789") + expect_equal(format_zipcode("12345"), "12345") }) test_that("clean_credentials() works", {