From c3727a4db6e834f6d77a475f0581e17b9c17ff09 Mon Sep 17 00:00:00 2001 From: Andrew Bruce Date: Mon, 4 Dec 2023 07:54:22 -0500 Subject: [PATCH] metadata refactoring --- R/globals.R | 2 + R/metadata.R | 209 +++++++++++++++-------- R/prescribers.R | 3 +- man/betos.Rd | 11 ++ vignettes/articles/linking-providers.Rmd | 18 +- 5 files changed, 172 insertions(+), 71 deletions(-) diff --git a/R/globals.R b/R/globals.R index 2ff56edc..519cb1eb 100644 --- a/R/globals.R +++ b/R/globals.R @@ -126,6 +126,8 @@ utils::globalVariables(c( "landingPage", # "modified", # "references", # + "accrualPeriodicity", # + "temporal", # "distribution_title", # "distribution_modified", # "distribution_accessURL", # diff --git a/R/metadata.R b/R/metadata.R index 4c37def9..d15689e9 100644 --- a/R/metadata.R +++ b/R/metadata.R @@ -1,12 +1,11 @@ #' @param title name of the api -#' @return A [tibble()] containing the updated ids. +#' @return A list of metadata describing each API's dataset #' @examplesIf interactive() #' metadata.store("Facility Affiliation Data") #' metadata.store("National Downloadable File") #' @autoglobal #' @noRd metadata.store <- function(title) { - #------------------------------------------------ url.store <- paste0('https://data.cms.gov/', 'provider-data/api/1/metastore/', @@ -50,23 +49,23 @@ metadata.store <- function(title) { simplifyVector = TRUE) schema <- dplyr::tibble( - title = response$title, - description = response$description, - uuid = response$identifier, - identifier = response$keyword$identifier, - distribution = response$distribution$identifier, - landing_page = response$landingPage, - publisher = response$publisher$data$name, - contact = response$contactPoint$hasEmail, - date_issued = response$issued, - date_modified = response$modified, + title = response$title, + description = response$description, + uuid = response$identifier, + identifier = response$keyword$identifier, + distribution = response$distribution$identifier, + landing_page = response$landingPage, + publisher = response$publisher$data$name, + contact = response$contactPoint$hasEmail, + date_issued = response$issued, + date_modified = response$modified, datetime_modified = response$`%modified`, - date_released = response$released) + date_released = response$released) #------------------------------------------------ url.query <- glue::glue('https://data.cms.gov/', 'provider-data/api/1/datastore/query/', - '{schema$distribution}', + '{schema$distribution[1]}', '?limit=1&offset=0&count=true&results=true', '&schema=true&keys=true&format=json&rowIds=true') @@ -81,56 +80,23 @@ metadata.store <- function(title) { names = list(names(response$results))) #------------------------------------------------ - return(list(store = store, schema = schema, query = query)) -} - -#' @param uuid distribution id of the api -#' @return A numeric vector containing the total rows in the dataset. -#' @examplesIf interactive() -#' metadata.rows('2457ea29-fc82-48b0-86ec-3b0755de7515') # providers() -#' metadata.rows('a85fa452-dee9-4c8f-8156-665238b8492f') # hospitals() -#' @autoglobal -#' @noRd -metadata.rows <- function(uuid) { - - url <- glue::glue('https://data.cms.gov/', - 'data-api/v1/dataset/', - '{uuid}/data-viewer/stats') - - response <- httr2::request(url) |> - httr2::req_perform() |> - httr2::resp_body_json(check_type = FALSE, - simplifyVector = TRUE) - - return(as.integer(response$data$total_rows)) -} - -#' @param uuid distribution id of the api -#' @return A list containing the total columns and rows in the dataset, -#' as well as the column names. -#' @examplesIf interactive() -#' metadata.viewer('2457ea29-fc82-48b0-86ec-3b0755de7515') # providers() -#' metadata.viewer('a85fa452-dee9-4c8f-8156-665238b8492f') # hospitals() -#' @autoglobal -#' @noRd -metadata.viewer <- function(uuid) { - - url <- glue::glue('https://data.cms.gov/', - 'data-api/v1/dataset/', - '{uuid}/data-viewer?offset=0&size=1') - - response <- httr2::request(url) |> - httr2::req_perform() |> - httr2::resp_body_json(check_type = FALSE, - simplifyVector = TRUE) - - rows <- response$meta$total_rows - cols <- response$meta$headers - - list( - dimensions = paste0(length(cols), ' columns x ', format(rows, big.mark = ","), ' rows'), - fields = cols + results <- list( + title = store$title, + description = schema$description[[1]], + publisher = store$name, + uuid = store$identifier, + distribution = schema$distribution[[1]], + date_issued = store$issued, + date_modified = store$modified, + date_released = store$released, + period = store$period, + timelength_days = store$timelength_days, + dimensions = paste0(query$columns, ' columns x ', format(query$rows, big.mark = ","), ' rows'), + fields = query$names[[1]], + landing_page = store$landingPage, + data_dictionary = "https://data.cms.gov/provider-data/sites/default/files/data_dictionaries/physician/DOC_Data_Dictionary.pdf" ) + return(results) } #' @param title name of the api @@ -155,7 +121,9 @@ metadata.json <- function(title, first = TRUE) { distribution, landingPage, modified, - references) |> + references, + accrualPeriodicity, + temporal) |> dplyr::filter(title == {{ title }}) |> tidyr::unnest(references) @@ -176,14 +144,15 @@ metadata.json <- function(title, first = TRUE) { results <- dplyr::left_join(resp, dst, by = dplyr::join_by(title)) |> dplyr::select(-title) |> - dplyr::select(title = distribution_title, + dplyr::select(title = distribution_title, description, - dictionary = describedBy, - methodology = references, + dictionary = describedBy, + methodology = references, landing_page = landingPage, distribution, - modified = distribution_modified) |> - dplyr::mutate(modified = lubridate::ymd(modified)) |> + modified = distribution_modified, + accrualPeriodicity) |> + dplyr::mutate(modified = lubridate::ymd(modified)) |> provider::make_interval(start = modified) |> tidyr::separate_wider_delim(title, delim = " : ", names = c("title", NA)) @@ -191,5 +160,107 @@ metadata.json <- function(title, first = TRUE) { if (first) results <- dplyr::slice_head(results) + url <- glue::glue('https://data.cms.gov/', + 'data-api/v1/dataset/', + '{results$distribution}', + '/data-viewer?offset=0&size=1') + + response <- httr2::request(url) |> + httr2::req_perform() |> + httr2::resp_body_json(check_type = FALSE, + simplifyVector = TRUE) + + rows <- response$meta$total_rows + cols <- response$meta$headers + + iso_8601 <- function(x) { + dplyr::case_match( + x, + "R/P10Y" ~ "Decennial", + "R/P4Y" ~ "Quadrennial", + "R/P1Y" ~ "Annual", + c("R/P2M", "R/P0.5M") ~ "Bimonthly", + "R/P3.5D" ~ "Semiweekly", + "R/P1D" ~ "Daily", + c("R/P2W", "R/P0.5W") ~ "Biweekly", + "R/P6M" ~ "Semiannual", + "R/P2Y" ~ "Biennial", + "R/P3Y" ~ "Triennial", + "R/P0.33W" ~ "Three Times a Week", + "R/P0.33M" ~ "Three Times a Month", + "R/PT1S" ~ "Continuously Updated", + "R/P1M" ~ "Monthly", + "R/P3M" ~ "Quarterly", + "R/P0.5M" ~ "Semimonthly", + "R/P4M" ~ "Three Times a Year", + "R/P1W" ~ "Weekly", + "R/PT1H" ~ "Hourly") + } + + results <- list( + title = results$title, + description = results$description, + publisher = " ", + distribution = results$distribution, + update_schedule = iso_8601(results$accrualPeriodicity), + date_modified = results$modified, + period = results$period, + timelength_days = results$timelength_days, + dimensions = paste0(length(cols), ' columns x ', format(rows, big.mark = ","), ' rows'), + fields = cols, + landing_page = results$landing_page, + data_dictionary = results$dictionary, + methodology = results$methodology) + return(results) } + +#' @param uuid distribution id of the api +#' @return A numeric vector containing the total rows in the dataset. +#' @examplesIf interactive() +#' metadata.rows('2457ea29-fc82-48b0-86ec-3b0755de7515') # providers() +#' metadata.rows('a85fa452-dee9-4c8f-8156-665238b8492f') # hospitals() +#' @autoglobal +#' @noRd +metadata.rows <- function(uuid) { + + url <- glue::glue('https://data.cms.gov/', + 'data-api/v1/dataset/', + '{uuid}/data-viewer/stats') + + response <- httr2::request(url) |> + httr2::req_perform() |> + httr2::resp_body_json(check_type = FALSE, + simplifyVector = TRUE) + + return(as.integer(response$data$total_rows)) +} + +#' @param uuid distribution id of the api +#' @return A list containing the total columns and rows in the dataset, +#' as well as the column names. +#' @examplesIf interactive() +#' metadata.viewer('2457ea29-fc82-48b0-86ec-3b0755de7515') # providers() +#' metadata.viewer('a85fa452-dee9-4c8f-8156-665238b8492f') # hospitals() +#' @autoglobal +#' @noRd + +metadata.viewer <- function(uuid) { + + url <- glue::glue('https://data.cms.gov/', + 'data-api/v1/dataset/', + '{uuid}/data-viewer?offset=0&size=1') + + response <- httr2::request(url) |> + httr2::req_perform() |> + httr2::resp_body_json(check_type = FALSE, + simplifyVector = TRUE) + + rows <- response$meta$total_rows + cols <- response$meta$headers + + list( + dimensions = paste0(length(cols), ' columns x ', format(rows, big.mark = ","), ' rows'), + fields = cols + ) +} diff --git a/R/prescribers.R b/R/prescribers.R index a48a99f0..255b7639 100644 --- a/R/prescribers.R +++ b/R/prescribers.R @@ -491,7 +491,8 @@ prescribers_ <- function(year = rx_years(), fct_src <- function(x) { factor(x, levels = c("S", "T"), - labels = c("Medicare Specialty Code", "Taxonomy Code Classification")) + labels = c("Medicare Specialty Code", + "Taxonomy Code Classification")) } #' @param df data frame diff --git a/man/betos.Rd b/man/betos.Rd index 4e14953a..5a4601fb 100644 --- a/man/betos.Rd +++ b/man/betos.Rd @@ -95,3 +95,14 @@ assigned to a 6-character RBCS taxonomy code. Annually } +\examples{ +\dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +betos(hcpcs = "0001U") +betos(category = "Test") +betos(subcategory = "General Laboratory") +betos(family = "Immunoassay") +betos(procedure = "M") +betos(family = "No RBCS Family") +betos(rbcs = "TL001N") +\dontshow{\}) # examplesIf} +} diff --git a/vignettes/articles/linking-providers.Rmd b/vignettes/articles/linking-providers.Rmd index 3e923bbb..807ed2ba 100644 --- a/vignettes/articles/linking-providers.Rmd +++ b/vignettes/articles/linking-providers.Rmd @@ -24,7 +24,6 @@ options(scipen = 999) ```{r message=FALSE, warning=FALSE} library(provider) -library(tibble) library(vctrs) library(dplyr) library(purrr) @@ -36,6 +35,23 @@ library(gt) ## Individual Provider +```{r} +library(chainr) + +mark <- chain( + providers = providers(pac = 7810891009), + reassignment = reassignments(pac = 7810891009), + clinicians = clinicians(pac = 7810891009), + nppes = nppes(npi = 1043245657), + referrals = order_refer(npi = 1043245657), + affiliations = affiliations(pac = 7810891009), + hospitals = affiliations(pac = 7810891009) |> pull(facility_ccn) |> map_dfr(~hospitals(facility_ccn = .x)), + utilization = utilization_(npi = 1043245657, type = "Provider")) + +mark +``` + + ```{r} vctrs::vec_rbind( display_long(providers(pac = 7810891009)) |> tibble::add_column(source = "`providers()`"),