Merge branch 'main' into pacta.multi.loanbook

RMI-PACTA · Jan 9, 2025 · aa19028 · aa19028
2 parents a737e18 + 72e74d8
commit aa19028
Show file tree

Hide file tree

Showing 9 changed files with 68 additions and 66 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -19,3 +19,4 @@
 ^revdep$
 ^CRAN-RELEASE$
 ^pkgdown$
+^CRAN-SUBMISSION$
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: r2dii.match
 Title: Tools to Match Corporate Lending Portfolios with Climate Data
-Version: 0.2.1.9001
+Version: 0.3.0.9000
 Authors@R: 
     c(person(given = "Alex",
              family = "Axthelm",

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,11 @@
 # r2dii.match (development version)
 
+# r2dii.match 0.3.0
+
+## Breaking changes
+
+* `match_name()` gains a new argument in its signature, `sector_classification`, which is placed before the `...` argument. Where users have used `...` to pass additional unnamed arguments by position, they will need to update their code to pass these arguments by name so that they are not confused as an input to `sector_classfication`. The new argument `sector_classification` is optional and defaults to `r2dii.data::sector_classifications`. Explicitly passing a `sector_classification` to `match_name()`, enables users to use their own sector classification systems to match loan books to the abcd, assuming the `sector_classification` is mapped appropriately to PACTA sectors and has the same format as `r2dii.data::sector_classifications`. Own sector classifications can no longer be passed to `match_name()` via options.
+
 # r2dii.match 0.2.1
 
 * r2dii.match is now [stable](https://lifecycle.r-lib.org/articles/stages.html).

diff --git a/R/add_sector_and_borderline.R b/R/add_sector_and_borderline.R
@@ -31,7 +31,7 @@
 #' out %>%
 #'   select(new_columns, everything())
 #' @noRd
-add_sector_and_borderline <- function(data) {
+add_sector_and_borderline <- function(data, sector_classification = default_sector_classification()) {
   crucial <- c(
     "sector_classification_system", "sector_classification_direct_loantaker"
   )
@@ -40,40 +40,31 @@ add_sector_and_borderline <- function(data) {
     check_crucial_names(crucial) %>%
     # Coerce crucial columns to character for more robust join()
     purrr::modify_at(crucial, as.character) %>%
-    check_classification(column = "sector_classification_system") %>%
-    check_classification(column = "sector_classification_direct_loantaker")
+    check_classification(column = "sector_classification_system", classification = sector_classification) %>%
+    check_classification(column = "sector_classification_direct_loantaker", classification = sector_classification)
 
   out <- left_join(
-    checked, get_classifications(),
+    checked, sector_classification,
     by = set_names(c("code_system", "code"), crucial)
   )
 
   restore_typeof(data, out, crucial)
 }
 
-get_classifications <- function() {
-  custom_sector_classifications() %||% default_sector_classification()
-}
-
 default_sector_classification <- function() {
   r2dii.data::sector_classifications
 }
 
-custom_sector_classifications <- function() {
-  getOption("r2dii.match.sector_classifications")
-}
-
 check_classification <- function(data,
                                  column,
                                  # FIXME: Remove needless argument?
                                  classification = NULL) {
-  classification <- classification %||% get_classifications()
   # To call columns from both data and classification with the same colname
   reference <- rename_as_loanbook(classification)
 
   all_unknown <- !any(data[[column]] %in% reference[[column]])
   known <- unique(reference[[column]])
-  if (all_unknown && is.null(custom_sector_classifications())) {
+  if (all_unknown) {
     abort_all_sec_classif_unknown(column, known)
   }
 

diff --git a/R/match_name.R b/R/match_name.R
@@ -7,12 +7,6 @@
 #' are assigned. The similarity between aliases in each of the loanbook and abcd
 #'  is scored using [stringdist::stringsim()].
 #'
-#' @section Package options:
-#' `r2dii.match.sector_classifications`: Allows you to use your own
-#' `sector_classififications` instead of the default. This feature is
-#' experimental and may be dropped and/or become a new argument to
-#' `match_name()`.
-#'
 #' @template alias-assign
 #' @template ignores-but-preserves-existing-groups
 #'
@@ -35,6 +29,9 @@
 #'   character string, it assumes identical join columns between `loanbook` and
 #'   `abcd`. If a named character vector, it uses the name as the join column of `loanbook` and
 #'   the value as the join column of `abcd`.
+#' @param sector_classification A data frame containing sector classifications
+#'   in the same format as `r2dii.data::sector_classifications`. The default
+#'   value is `r2dii.data::sector_classifications`.
 #' @param ... Arguments passed on to [stringdist::stringsim()].
 #'
 #' @family main functions
@@ -77,17 +74,9 @@
 #'
 #' match_name(loanbook, abcd, min_score = 0.9)
 #'
-#' # Use your own `sector_classifications`
-#' your_classifications <- tibble(
-#'   sector = "power",
-#'   borderline = FALSE,
-#'   code = "D35.11",
-#'   code_system = "XYZ"
-#' )
-#'
 #' # match on LEI
 #' loanbook <- tibble(
-#'   sector_classification_system = "XYZ",
+#'   sector_classification_system = "NACE",
 #'   sector_classification_direct_loantaker = "D35.11",
 #'   id_ultimate_parent = "UP15",
 #'   name_ultimate_parent = "Won't fuzzy match",
@@ -102,9 +91,15 @@
 #'   lei = "LEI123"
 #' )
 #'
-#' match_name(loanbook, abcd, join_by = c(lei_direct_loantaker = "lei"))
+#' match_name(loanbook, abcd, join_id = c(lei_direct_loantaker = "lei"))
 #'
-#' restore <- options(r2dii.match.sector_classifications = your_classifications)
+#' # Use your own `sector_classifications`
+#' your_classifications <- tibble(
+#'   sector = "power",
+#'   borderline = FALSE,
+#'   code = "D35.11",
+#'   code_system = "XYZ"
+#' )
 #'
 #' loanbook <- tibble(
 #'   sector_classification_system = "XYZ",
@@ -120,7 +115,7 @@
 #'   sector = "power"
 #' )
 #'
-#' match_name(loanbook, abcd)
+#' match_name(loanbook, abcd, sector_classification = your_classifications)
 #'
 #' # Cleanup
 #' options(restore)
@@ -133,6 +128,7 @@ match_name <- function(loanbook,
                        p = 0.1,
                        overwrite = NULL,
                        join_id = NULL,
+                       sector_classification = default_sector_classification(),
                        ...) {
   restore <- options(datatable.allow.cartesian = TRUE)
   on.exit(options(restore), add = TRUE)
@@ -152,7 +148,7 @@ match_name <- function(loanbook,
 
     prep_abcd <- dplyr::distinct(prep_abcd)
 
-    prep_lbk <- may_add_sector_and_borderline(loanbook)
+    prep_lbk <- may_add_sector_and_borderline(loanbook, sector_classification = sector_classification)
     prep_lbk <- distinct(prep_lbk)
 
     join_matched <- dplyr::inner_join(
@@ -188,6 +184,7 @@ match_name <- function(loanbook,
       method = method,
       p = p,
       overwrite = overwrite,
+      sector_classification = sector_classification,
       ...
     )
   } else {
@@ -217,6 +214,7 @@ match_name_impl <- function(loanbook,
                             method = "jw",
                             p = 0.1,
                             overwrite = NULL,
+                            sector_classification = default_sector_classification(),
                             ...) {
 
   old_groups <- dplyr::groups(loanbook)
@@ -226,7 +224,7 @@ match_name_impl <- function(loanbook,
   if (!allow_reserved_columns()) abort_reserved_column(loanbook)
   loanbook_rowid <- tibble::rowid_to_column(loanbook)
 
-  prep_lbk <- restructure_loanbook(loanbook_rowid, overwrite = overwrite)
+  prep_lbk <- restructure_loanbook(loanbook_rowid, overwrite = overwrite, sector_classification = sector_classification)
   prep_abcd <- restructure_abcd(abcd)
 
   if (by_sector) {

diff --git a/R/restructure_abcd.R b/R/restructure_abcd.R
@@ -51,7 +51,7 @@ restructure_abcd <- function(data) {
 #'
 #' restructure_loanbook(lbk, overwrite = overwrite_demo)
 #' @noRd
-restructure_loanbook <- function(data, overwrite = NULL) {
+restructure_loanbook <- function(data, overwrite = NULL, sector_classification = default_sector_classification()) {
   check_prep_loanbook_overwrite(overwrite)
   check_prepare_loanbook_data(data)
 
@@ -63,7 +63,7 @@ restructure_loanbook <- function(data, overwrite = NULL) {
   name_level <- extract_level_names(data, prefix = "name_")
   important_columns <- c("rowid", id_level, name_level)
 
-  out <- may_add_sector_and_borderline(data)
+  out <- may_add_sector_and_borderline(data, sector_classification = sector_classification)
   out <- select(
     out, all_of(c("rowid", important_columns, "sector", "borderline"))
   )
@@ -77,15 +77,15 @@ restructure_loanbook <- function(data, overwrite = NULL) {
   out
 }
 
-may_add_sector_and_borderline <- function(data) {
+may_add_sector_and_borderline <- function(data, sector_classification = default_sector_classification()) {
   if (lacks_borderline(data) || lacks_sector(data)) {
     abort("Must have both `sector` and `borderline`")
   }
 
   if (has_sector(data) & has_borderline(data)) {
     data2 <- data
   } else {
-    data2 <- add_sector_and_borderline(data)
+    data2 <- add_sector_and_borderline(data, sector_classification = sector_classification)
   }
 
   data2

diff --git a/cran-comments.md b/cran-comments.md
@@ -1,3 +1,14 @@
 ## R CMD check results
 
-0 errors | 0 warnings | 0 notes
+❯ checking for future file timestamps ... NOTE
+  unable to verify current time
+
+0 errors ✔ | 0 warnings ✔ | 1 note ✖
+
+## revdepcheck results
+
+── CHECK ────────────────────────────────────────────── 2 packages ──
+✔ r2dii.plot 0.4.0                       ── E: 0     | W: 0     | N: 0    
+✔ r2dii.analysis 0.4.0                   ── E: 0     | W: 0     | N: 0    
+OK: 2                                                              
+BROKEN: 0
diff --git a/man/match_name.Rd b/man/match_name.Rd
diff --git a/tests/testthat/test-match_name.R b/tests/testthat/test-match_name.R
@@ -700,19 +700,19 @@ test_that("errors if any id_loan is duplicated (#349)", {
   expect_error(class = "duplicated_id_loan", match_name(lbk, abcd))
 })
 
-test_that("allows custom `sector_classifications` via options() (#354)", {
-  loanbook <- fake_lbk(sector_classification_system = "XYZ")
+test_that("allows custom `sector_classification`", {
+  loanbook <- fake_lbk(
+    sector_classification_system = "XYZ",
+    sector_classification_direct_loantaker = "D35.11"
+  )
   abcd <- fake_abcd()
   custom_classification <- tibble::tribble(
     ~sector,       ~borderline,  ~code, ~code_system,
     "power",             FALSE, "D35.11",        "XYZ",
   )
 
-  # Allow users to inject their own `sector_classifications`
-  old <- options(r2dii.match.sector_classifications = custom_classification)
-  out <- match_name(loanbook, abcd)
+  out <- match_name(loanbook, abcd, sector_classification = custom_classification)
   expect_equal(nrow(out), 1L)
-  options(old)
 })
 
 test_that("with `join_id`, joins as expected (#135)", {