From 7b1c6489f56eb7132d49743ca65dd0bfe45e2422 Mon Sep 17 00:00:00 2001
From: Andrew Bruce <andrewbruce.himni@gmail.com>
Date: Fri, 6 Dec 2024 22:17:23 -0800
Subject: [PATCH] * first pass at cms_distributions reimplementation

---
 DESCRIPTION                    |   4 +-
 NAMESPACE                      |   4 +-
 R/cms.R                        |   2 +-
 R/generated-globals.R          |  12 ++-
 R/provider-package.R           |   3 +-
 R/reassignments.R              |   2 +-
 R/utils.R                      | 122 ++++++++++++++++------------
 R/zzz.R                        | 100 +++++++++++++++++++++++
 data-raw/dev/distro.R          | 142 +++++++++++++++++++++++++++++++++
 man/cms_distributions.Rd       |  12 +++
 pkgdown/_pkgdown.yml           |   4 +-
 tests/testthat/_snaps/utils.md |   2 +-
 tests/testthat/test-utils.R    |   4 +-
 13 files changed, 346 insertions(+), 67 deletions(-)
 create mode 100644 R/zzz.R
 create mode 100644 data-raw/dev/distro.R
 create mode 100644 man/cms_distributions.Rd
diff --git a/DESCRIPTION b/DESCRIPTION
index 6e9a6e81..23d0d312 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -17,6 +17,8 @@ Depends:
     R (>= 4.1.0)
 Imports: 
     cli,
+    collapse,
+    arrow,
     dplyr,
     furrr,
     httr2,
@@ -31,9 +33,9 @@ Imports:
     stringr,
     tidyr,
     vctrs,
+    data.table,
     zeallot
 Suggests: 
-    data.table,
     fipio,
     fontawesome,
     fuimus,
diff --git a/NAMESPACE b/NAMESPACE
index 9bc4af5f..bb9f1deb 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -8,6 +8,7 @@ export(betos)
 export(change)
 export(chg)
 export(clinicians)
+export(cms_distributions)
 export(compare_hcpcs)
 export(df2chr)
 export(display_long)
@@ -54,7 +55,8 @@ export(utilization)
 export(utilization_)
 export(years_df)
 export(years_vec)
+import(rlang)
+importFrom(collapse,"%==%")
 importFrom(lifecycle,deprecated)
-importFrom(rlang,"%||%")
 importFrom(stringi,"%s+%")
 importFrom(zeallot,"%<-%")
diff --git a/R/cms.R b/R/cms.R
index 07d747bc..e9472973 100644
--- a/R/cms.R
+++ b/R/cms.R
@@ -98,7 +98,7 @@ format_param <- function(param, arg, type = "filter") {
 
   rlang::check_required(param)
   rlang::check_required(arg)
-  rlang::arg_match(type, c("filter", "sql"))
+  rlang::arg_match0(type, c("filter", "sql"))
 
   if (type %in% 'filter') {out <- paste0("filter[", param, "]=", arg)}
   if (type %in% 'sql')    {out <- paste0("[WHERE ", param, " = ", "%22", arg, "%22", "]")}
diff --git a/R/generated-globals.R b/R/generated-globals.R
index a001ff3f..9dbfe975 100644
--- a/R/generated-globals.R
+++ b/R/generated-globals.R
@@ -5,10 +5,6 @@ utils::globalVariables(c(
   ".name_1",
   # <open_payments>
   ".pdi_1",
-  # <add_counties>
-  # <ror>
-  # <change_year>
-  ":=",
   # <laboratories>
   "a2la",
   # <laboratories>
@@ -148,14 +144,19 @@ utils::globalVariables(c(
   "Description",
   # <cms_update>
   # <open_ids>
+  # <cms_distributions>
   "distribution",
   # <cms_update>
+  # <cms_distributions>
   "distribution_accessURL",
   # <cms_update>
+  # <cms_distributions>
   "distribution_format",
   # <cms_update>
+  # <cms_distributions>
   "distribution_modified",
   # <cms_update>
+  # <cms_distributions>
   "distribution_title",
   # <outpatient>
   # <prescribers>
@@ -249,6 +250,7 @@ utils::globalVariables(c(
   # <cms_update>
   # <cms_dataset_search>
   # <open_ids>
+  # <cms_distributions>
   "modified",
   # <open_payments>
   # <cols_qelig>
@@ -394,6 +396,7 @@ utils::globalVariables(c(
   # <cms_dataset_search>
   # <medline>
   # <open_ids>
+  # <cms_distributions>
   "title",
   # <medline>
   "title._value",
@@ -453,6 +456,7 @@ utils::globalVariables(c(
   # <compare_hcpcs>
   # <open_ids>
   # <cols_qelig>
+  # <cms_distributions>
   "year",
   # <add_counties>
   "zip",
diff --git a/R/provider-package.R b/R/provider-package.R
index 8575873e..fe4fe508 100644
--- a/R/provider-package.R
+++ b/R/provider-package.R
@@ -5,6 +5,7 @@
 #' @importFrom lifecycle deprecated
 #' @importFrom zeallot %<-%
 #' @importFrom stringi %s+%
-#' @importFrom rlang %||%
+#' @importFrom collapse %==%
+#' @import rlang
 ## usethis namespace: end
 NULL
diff --git a/R/reassignments.R b/R/reassignments.R
index 7ebfbe9a..5c605e96 100644
--- a/R/reassignments.R
+++ b/R/reassignments.R
@@ -79,7 +79,7 @@ reassignments <- function(npi          = NULL,
   enid_org <- enid_org %nn% check_enid(enid_org)
 
   if (!is.null(entry)) {
-    entry <- rlang::arg_match(entry, c("E", "R"))
+    entry <- rlang::arg_match0(entry, c("E", "R"))
     entry <- dplyr::case_match(entry, "E" ~ "Physician Assistant",
                                       "R" ~ "Reassignment")
     }
diff --git a/R/utils.R b/R/utils.R
index c88f9198..4c1347f1 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -1,39 +1,42 @@
-#' Infix operator for `if (!is.null(x)) y else x` statements
+#' Infix if (!is.null(x)) y else x
+#'
 #' @param x,y description
-#' @return description
+#'
+#' @returns y if x is not NULL, otherwise x
+#'
 #' @examples
 #' ccn <- 123456
-#' ccn <- ccn %nn% as.character(ccn)
-#' ccn
+#'
+#' ccn %nn% as.character(ccn)
+#'
+#' NULL %nn% as.character(ccn)
+#'
 #' @autoglobal
+#'
 #' @noRd
-`%nn%` <- function(x, y) if (!is.null(x)) y else x #nocov
-
-#' Infix operator for `not in` statements
-#' @return description
-#' @autoglobal
-#' @noRd
-`%nin%` <- function(x, table) match(x, table, nomatch = 0L) == 0L #nocov
+`%nn%` <- \(x, y) if (!is.null(x)) y else x #nocov
 
 #' Format US ZIP codes
-#' @param zip Nine-digit US ZIP code
-#' @return ZIP code, hyphenated for ZIP+4 or 5-digit ZIP.
+#'
+#' @param x Nine-digit US ZIP code
+#'
+#' @returns ZIP code, hyphenated for ZIP+4 or 5-digit ZIP.
+#'
 #' @examples
 #' format_zipcode(123456789)
 #' format_zipcode(12345)
+#'
 #' @autoglobal
+#'
 #' @noRd
-format_zipcode <- function(zip) {
+format_zipcode <- function(x) {
 
-  zip <- as.character(zip)
+  stopifnot(is.character(x))
+
+  if (grepl("^[0-9]{9}$", x))
+    paste0(substr(x, 1, 5), "-", substr(x, 6, 9))
+  else x
 
-  if (stringr::str_detect(zip, "^[[:digit:]]{9}$") == TRUE) {
-    zip <- paste0(stringr::str_sub(zip, 1, 5), "-",
-                  stringr::str_sub(zip, 6, 9))
-    return(zip)
-    } else {
-      return(zip)
-  }
 }
 
 #' Remove periods from credentials
@@ -92,37 +95,43 @@ tf_2_yn <- function(x) {
     x,
     TRUE ~ "Y",
     FALSE ~ "N",
-    .default = NULL
+    .default = NA_character_
   )
 }
 
 #' @param abb state abbreviation
-#' @return state full name
+#'
+#' @returns state full name
+#'
 #' @autoglobal
+#'
 #' @noRd
 abb2full <- function(abb,
                      arg = rlang::caller_arg(abb),
                      call = rlang::caller_env()) {
 
-  results <- dplyr::tibble(x = c(state.abb[1:8],
-                                 'DC',
-                                 state.abb[9:50],
-                                 'AS', 'GU', 'MP', 'PR', 'VI', 'UK'),
-                           y = c(state.name[1:8],
-                                 'District of Columbia',
-                                 state.name[9:50],
-                                 'American Samoa',
-                                 'Guam',
-                                 'Northern Mariana Islands',
-                                 'Puerto Rico',
-                                 'Virgin Islands',
-                                 'Unknown')) |>
+  results <- dplyr::tibble(
+    x = c(state.abb[1:8],
+          'DC',
+          state.abb[9:50],
+          'AS', 'GU', 'MP', 'PR', 'VI', 'UK'),
+    y = c(state.name[1:8],
+          'District of Columbia',
+          state.name[9:50],
+          'American Samoa',
+          'Guam',
+          'Northern Mariana Islands',
+          'Puerto Rico',
+          'Virgin Islands',
+          'Unknown')) |>
     dplyr::filter(x == abb) |>
     dplyr::pull(y)
 
   if (vctrs::vec_is_empty(results)) {
-    cli::cli_abort(c("{.val {abb}} is not a valid state abbreviation."), # nolint
-                   call = call)
+    cli::cli_abort(
+      c("{.arg {arg}} is not a valid state abbreviation."), # nolint
+      arg = arg,
+      call = call)
   }
   return(results)
 }
@@ -135,7 +144,10 @@ abb2full <- function(abb,
 #' @keywords internal
 display_long <- function(df, cols = dplyr::everything()) {
 
-  df |> dplyr::mutate(dplyr::across(dplyr::everything(), as.character)) |>
+  df |>
+    dplyr::mutate(
+      dplyr::across(
+        dplyr::everything(), as.character)) |>
         tidyr::pivot_longer({{ cols }})
 }
 
@@ -212,11 +224,13 @@ tidyup <- function(df,
 #' @noRd
 combine <- function(df, nm, cols, sep = " ") {
 
-  return(tidyr::unite(df, col = {{ nm }},
-                      dplyr::any_of(cols),
-                      remove = TRUE,
-                      na.rm = TRUE,
-                      sep = sep))
+  tidyr::unite(
+    df,
+    col = {{ nm }},
+    dplyr::any_of(cols),
+    remove = TRUE,
+    na.rm = TRUE,
+    sep = sep)
 }
 
 #' Remove empty rows and columns
@@ -233,13 +247,15 @@ narm <- function(df) {
 #' @noRd
 format_cli <- function(df) {
 
-  x <- purrr::map2(df$x,
-                   df$y,
-                   stringr::str_c,
-                   sep = " = ",
-                   collapse = "")
-
-  cli::cli_alert_danger("No results for {.val {x}}",
-                        wrap = TRUE)
+  x <- purrr::map2(
+    df$x,
+    df$y,
+    stringr::str_c,
+    sep = " = ",
+    collapse = "")
 
+  cli::cli_alert_danger(
+    "No results for {.val {x}}",
+    wrap = TRUE
+    )
 }
diff --git a/R/zzz.R b/R/zzz.R
new file mode 100644
index 00000000..4bb47107
--- /dev/null
+++ b/R/zzz.R
@@ -0,0 +1,100 @@
+.onLoad <- function(libname, pkgname) {
+  .__distros <<- cms_distributions()
+}
+
+.onUnload <- function(libpath) {
+  remove(".__distros", envir = .GlobalEnv)
+}
+
+#' Retrieve the latest CMS distributions
+#'
+#' @autoglobal
+#'
+#' @keywords internal
+#'
+#' @export
+cms_distributions <- \() {
+
+  datasets <- c(
+    hospitals       = "^Hospital Enrollments",
+    providers       = "^Medicare Fee-For-Service  Public Provider Enrollment",
+    crosswalk       = "^Medicare Provider and Supplier Taxonomy Crosswalk",
+    orderrefer      = "^Order and Referring",
+    pending         = "^Pending Initial Logging and Tracking",
+    quality_payment = "^Quality Payment Program Experience",
+    rbcs            = "^Restructured BETOS Classification System",
+    reassignment    = "^Revalidation", # Clinic Group Practice Reassignment",
+    optout          = "^Opt Out Affidavits",
+    utilization     = "^Medicare Physician & Other Practitioners",
+    outpatient      = "^Medicare Outpatient Hospitals",
+    laboratories    = "^Provider of Services File - Clinical Laboratories",
+    beneficiaries   = "^Medicare Monthly Enrollment",
+    prescribers     = "^Medicare Part D Prescribers"
+  )
+
+  resp <- httr2::request("https://data.cms.gov/data.json") |>
+    httr2::req_perform()
+
+  resp <- as.Date(
+    regmatches(resp[["headers"]][["Last-Modified"]],
+               regexpr("[0-9]{2} [A-Za-z]{3} [0-9]{4}",
+                       resp[["headers"]][["Last-Modified"]],
+                       perl = TRUE)),
+    format = "%d %b %Y")
+
+  arrow_cms <- arrow::read_json_arrow(
+    file = "https://data.cms.gov/data.json",
+    col_select = c("dataset"),
+    as_data_frame = TRUE)
+
+  arrow_cms <- arrow_cms[["dataset"]][[1]][
+    c("title", "modified", "distribution")] |>
+    collapse::fsubset(
+      grepl(
+        paste0(
+          unname(datasets),
+          collapse = "|"),
+        title,
+        perl = TRUE
+      )
+    ) |>
+    tidyr::unnest(
+      cols = distribution,
+      names_sep = "_",
+      keep_empty = TRUE) |>
+    collapse::fsubset(
+      distribution_format %==% "API") |>
+    collapse::fcompute(
+      year = as.integer(
+        regmatches(
+          distribution_title,
+          regexpr(
+            "\\d{4}(?=-\\d{2}-\\d{2})",
+            distribution_title,
+            perl = TRUE))),
+      modified = as.Date.character(distribution_modified),
+      distribution = regmatches(
+        distribution_accessURL,
+        regexpr(
+          "(?<=dataset/).*?(?=/data)",
+          distribution_accessURL,
+          perl = TRUE)),
+      keep = "title") |>
+    collapse::fsubset(
+      data.table::rleid(
+        title,
+        year,
+        modified) != collapse::flag(
+          data.table::rleid(
+            title,
+            year,
+            modified
+          ),
+          -1)
+    )
+
+  list(
+    last_modified = resp,
+    distributions = arrow_cms
+  )
+}
diff --git a/data-raw/dev/distro.R b/data-raw/dev/distro.R
new file mode 100644
index 00000000..be5f83d2
--- /dev/null
+++ b/data-raw/dev/distro.R
@@ -0,0 +1,142 @@
+`%==%` <- collapse::`%==%`
+
+datasets <- c(
+  hospitals = "^Hospital Enrollments",
+  providers = "^Medicare Fee-For-Service  Public Provider Enrollment",
+  crosswalk = "^Medicare Provider and Supplier Taxonomy Crosswalk",
+  orderrefer = "^Order and Referring",
+  pending = "^Pending Initial Logging and Tracking",
+  quality_payment = "^Quality Payment Program Experience",
+  rbcs = "^Restructured BETOS Classification System",
+  reassignment = "^Revalidation Clinic Group Practice Reassignment",
+  optout = "^Opt Out Affidavits",
+  utilization = "^Medicare Physician & Other Practitioners",
+  outpatient = "^Medicare Outpatient Hospitals",
+  laboratories = "^Provider of Services File - Clinical Laboratories",
+  beneficiaries = "^Medicare Monthly Enrollment",
+  prescribers = "^Medicare Part D Prescribers"
+)
+
+cms_distro <- \() {
+  resp <- httr2::request("https://data.cms.gov/data.json") |>
+    httr2::req_perform()
+
+  resp <- arknpi::as_date(
+    regmatches(resp[["headers"]][["Last-Modified"]],
+    regexpr("[0-9]{2} [A-Za-z]{3} [0-9]{4}",
+            resp[["headers"]][["Last-Modified"]],
+            perl = TRUE)),
+    fmt = "%d %b %Y")
+
+  arrow_cms <- arrow::read_json_arrow(
+    file = "https://data.cms.gov/data.json",
+    col_select = c("dataset"))
+
+  arrow_cms <- arrow_cms[["dataset"]][[1]][
+    c("title", "modified", "distribution")] |>
+    collapse::fsubset(
+      codex::sf_detect(
+        title,
+        codex::sf_smush(
+          unname(datasets), "|")
+        )
+      ) |>
+    tidyr::unnest(
+      cols = distribution,
+      names_sep = "_",
+      keep_empty = TRUE) |>
+    collapse::fsubset(
+      distribution_format %==% "API") |>
+    collapse::fcompute(
+      year = as.integer(
+        regmatches(
+          distribution_title,
+          regexpr(
+            "\\d{4}(?=-\\d{2}-\\d{2})",
+            distribution_title,
+            perl = TRUE))),
+      modified = as.Date.character(distribution_modified),
+      distribution = regmatches(
+        distribution_accessURL,
+        regexpr(
+          "(?<=dataset/).*?(?=/data)",
+          distribution_accessURL,
+          perl = TRUE)),
+      keep = "title") |>
+    collapse::fsubset(
+      data.table::rleid(
+        title,
+        year,
+        modified) != collapse::flag(
+        data.table::rleid(
+          title,
+          year,
+          modified
+          ),
+        -1)
+      )
+
+  list(
+    last_modified = resp,
+    distributions = arrow_cms
+    )
+}
+
+c(
+  "Managing Clinician Aggregation Group Performance",
+  "Quarterly Prescription Drug Plan Formulary, Pharmacy Network, and Pricing Information",
+  "Medicare Clinical Laboratory Fee Schedule Private Payer Rates and Volumes",
+  "Hospital Price Transparency Enforcement Activities and Outcomes",
+  "Federally Qualified Health Center Enrollments",
+  "Rural Health Clinic Enrollments"
+)
+
+
+body <- RcppSimdJson::fparse(
+  resp$body
+)[["dataset"]][
+  c("title",
+    "modified",
+    "distribution")]
+
+
+
+unnest_dt <- function(tbl, col) {
+
+  tbl <- data.table::as.data.table(tbl)
+
+  col <- rlang::ensyms(col)
+
+  col_names <- rlang::syms(setdiff(colnames(tbl), as.character(col)))
+
+  tbl <- rlang::eval(
+    rlang::expr(
+      tbl[,
+          as.character(unlist(!!!col)),
+          by = list(!!!col_names)])
+  )
+
+  colnames(tbl) <- c(as.character(col_names), as.character(col))
+
+  tbl
+}
+
+
+unnest_dt2 <- function(tbl, ...) {
+
+  tbl <- data.table::as.data.table(tbl)
+
+  col <- rlang::ensyms(...)
+
+  clnms <- rlang::syms(setdiff(colnames(tbl), as.character(col)))
+
+  tbl <- data.table::as.data.table(tbl)
+
+  tbl <- rlang::eval(
+    rlang::expr(tbl[, lapply(.SD, unlist), by = list(!!!clnms), .SDcols = as.character(col)])
+  )
+
+  colnames(tbl) <- c(as.character(clnms), as.character(col))
+
+  tbl
+}
diff --git a/man/cms_distributions.Rd b/man/cms_distributions.Rd
new file mode 100644
index 00000000..65b67d2b
--- /dev/null
+++ b/man/cms_distributions.Rd
@@ -0,0 +1,12 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/zzz.R
+\name{cms_distributions}
+\alias{cms_distributions}
+\title{Retrieve the latest CMS distributions}
+\usage{
+cms_distributions()
+}
+\description{
+Retrieve the latest CMS distributions
+}
+\keyword{internal}
diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml
index 3ebc2987..be3f4500 100644
--- a/pkgdown/_pkgdown.yml
+++ b/pkgdown/_pkgdown.yml
@@ -5,8 +5,8 @@ template:
   light-switch: true
   bootswatch: simplex
   bslib:
-    base_font: {google: "IBM Plex Sans"}
-    heading_font: {google: "Kanit"}
+    base_font: {google: "Roboto Condensed"}
+    heading_font: {google: "Roboto"}
     code_font: {google: "JetBrains Mono"}
 
 home:
diff --git a/tests/testthat/_snaps/utils.md b/tests/testthat/_snaps/utils.md
index e8f6eea5..839ca42e 100644
--- a/tests/testthat/_snaps/utils.md
+++ b/tests/testthat/_snaps/utils.md
@@ -4,7 +4,7 @@
       abb2full("YN")
     Condition
       Error:
-      ! "YN" is not a valid state abbreviation.
+      ! `"YN"` is not a valid state abbreviation.
 
 # format_param() works
 
diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R
index 48043f3d..23a15a66 100644
--- a/tests/testthat/test-utils.R
+++ b/tests/testthat/test-utils.R
@@ -1,6 +1,6 @@
 test_that("format_zipcode() works", {
-  expect_equal(format_zipcode(123456789), "12345-6789")
-  expect_equal(format_zipcode(12345), "12345")
+  expect_equal(format_zipcode("123456789"), "12345-6789")
+  expect_equal(format_zipcode("12345"), "12345")
   })
 
 test_that("clean_credentials() works", {