Skip to content

Commit

Permalink
Merge branch 'main' into pacta.multi.loanbook
Browse files Browse the repository at this point in the history
  • Loading branch information
cjyetman authored Jan 9, 2025
2 parents a737e18 + 72e74d8 commit aa19028
Show file tree
Hide file tree
Showing 9 changed files with 68 additions and 66 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@
^revdep$
^CRAN-RELEASE$
^pkgdown$
^CRAN-SUBMISSION$
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: r2dii.match
Title: Tools to Match Corporate Lending Portfolios with Climate Data
Version: 0.2.1.9001
Version: 0.3.0.9000
Authors@R:
c(person(given = "Alex",
family = "Axthelm",
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# r2dii.match (development version)

# r2dii.match 0.3.0

## Breaking changes

* `match_name()` gains a new argument in its signature, `sector_classification`, which is placed before the `...` argument. Where users have used `...` to pass additional unnamed arguments by position, they will need to update their code to pass these arguments by name so that they are not confused as an input to `sector_classfication`. The new argument `sector_classification` is optional and defaults to `r2dii.data::sector_classifications`. Explicitly passing a `sector_classification` to `match_name()`, enables users to use their own sector classification systems to match loan books to the abcd, assuming the `sector_classification` is mapped appropriately to PACTA sectors and has the same format as `r2dii.data::sector_classifications`. Own sector classifications can no longer be passed to `match_name()` via options.

# r2dii.match 0.2.1

* r2dii.match is now [stable](https://lifecycle.r-lib.org/articles/stages.html).
Expand Down
19 changes: 5 additions & 14 deletions R/add_sector_and_borderline.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
#' out %>%
#' select(new_columns, everything())
#' @noRd
add_sector_and_borderline <- function(data) {
add_sector_and_borderline <- function(data, sector_classification = default_sector_classification()) {
crucial <- c(
"sector_classification_system", "sector_classification_direct_loantaker"
)
Expand All @@ -40,40 +40,31 @@ add_sector_and_borderline <- function(data) {
check_crucial_names(crucial) %>%
# Coerce crucial columns to character for more robust join()
purrr::modify_at(crucial, as.character) %>%
check_classification(column = "sector_classification_system") %>%
check_classification(column = "sector_classification_direct_loantaker")
check_classification(column = "sector_classification_system", classification = sector_classification) %>%
check_classification(column = "sector_classification_direct_loantaker", classification = sector_classification)

out <- left_join(
checked, get_classifications(),
checked, sector_classification,
by = set_names(c("code_system", "code"), crucial)
)

restore_typeof(data, out, crucial)
}

get_classifications <- function() {
custom_sector_classifications() %||% default_sector_classification()
}

default_sector_classification <- function() {
r2dii.data::sector_classifications
}

custom_sector_classifications <- function() {
getOption("r2dii.match.sector_classifications")
}

check_classification <- function(data,
column,
# FIXME: Remove needless argument?
classification = NULL) {
classification <- classification %||% get_classifications()
# To call columns from both data and classification with the same colname
reference <- rename_as_loanbook(classification)

all_unknown <- !any(data[[column]] %in% reference[[column]])
known <- unique(reference[[column]])
if (all_unknown && is.null(custom_sector_classifications())) {
if (all_unknown) {
abort_all_sec_classif_unknown(column, known)
}

Expand Down
38 changes: 18 additions & 20 deletions R/match_name.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,6 @@
#' are assigned. The similarity between aliases in each of the loanbook and abcd
#' is scored using [stringdist::stringsim()].
#'
#' @section Package options:
#' `r2dii.match.sector_classifications`: Allows you to use your own
#' `sector_classififications` instead of the default. This feature is
#' experimental and may be dropped and/or become a new argument to
#' `match_name()`.
#'
#' @template alias-assign
#' @template ignores-but-preserves-existing-groups
#'
Expand All @@ -35,6 +29,9 @@
#' character string, it assumes identical join columns between `loanbook` and
#' `abcd`. If a named character vector, it uses the name as the join column of `loanbook` and
#' the value as the join column of `abcd`.
#' @param sector_classification A data frame containing sector classifications
#' in the same format as `r2dii.data::sector_classifications`. The default
#' value is `r2dii.data::sector_classifications`.
#' @param ... Arguments passed on to [stringdist::stringsim()].
#'
#' @family main functions
Expand Down Expand Up @@ -77,17 +74,9 @@
#'
#' match_name(loanbook, abcd, min_score = 0.9)
#'
#' # Use your own `sector_classifications`
#' your_classifications <- tibble(
#' sector = "power",
#' borderline = FALSE,
#' code = "D35.11",
#' code_system = "XYZ"
#' )
#'
#' # match on LEI
#' loanbook <- tibble(
#' sector_classification_system = "XYZ",
#' sector_classification_system = "NACE",
#' sector_classification_direct_loantaker = "D35.11",
#' id_ultimate_parent = "UP15",
#' name_ultimate_parent = "Won't fuzzy match",
Expand All @@ -102,9 +91,15 @@
#' lei = "LEI123"
#' )
#'
#' match_name(loanbook, abcd, join_by = c(lei_direct_loantaker = "lei"))
#' match_name(loanbook, abcd, join_id = c(lei_direct_loantaker = "lei"))
#'
#' restore <- options(r2dii.match.sector_classifications = your_classifications)
#' # Use your own `sector_classifications`
#' your_classifications <- tibble(
#' sector = "power",
#' borderline = FALSE,
#' code = "D35.11",
#' code_system = "XYZ"
#' )
#'
#' loanbook <- tibble(
#' sector_classification_system = "XYZ",
Expand All @@ -120,7 +115,7 @@
#' sector = "power"
#' )
#'
#' match_name(loanbook, abcd)
#' match_name(loanbook, abcd, sector_classification = your_classifications)
#'
#' # Cleanup
#' options(restore)
Expand All @@ -133,6 +128,7 @@ match_name <- function(loanbook,
p = 0.1,
overwrite = NULL,
join_id = NULL,
sector_classification = default_sector_classification(),
...) {
restore <- options(datatable.allow.cartesian = TRUE)
on.exit(options(restore), add = TRUE)
Expand All @@ -152,7 +148,7 @@ match_name <- function(loanbook,

prep_abcd <- dplyr::distinct(prep_abcd)

prep_lbk <- may_add_sector_and_borderline(loanbook)
prep_lbk <- may_add_sector_and_borderline(loanbook, sector_classification = sector_classification)
prep_lbk <- distinct(prep_lbk)

join_matched <- dplyr::inner_join(
Expand Down Expand Up @@ -188,6 +184,7 @@ match_name <- function(loanbook,
method = method,
p = p,
overwrite = overwrite,
sector_classification = sector_classification,
...
)
} else {
Expand Down Expand Up @@ -217,6 +214,7 @@ match_name_impl <- function(loanbook,
method = "jw",
p = 0.1,
overwrite = NULL,
sector_classification = default_sector_classification(),
...) {

old_groups <- dplyr::groups(loanbook)
Expand All @@ -226,7 +224,7 @@ match_name_impl <- function(loanbook,
if (!allow_reserved_columns()) abort_reserved_column(loanbook)
loanbook_rowid <- tibble::rowid_to_column(loanbook)

prep_lbk <- restructure_loanbook(loanbook_rowid, overwrite = overwrite)
prep_lbk <- restructure_loanbook(loanbook_rowid, overwrite = overwrite, sector_classification = sector_classification)
prep_abcd <- restructure_abcd(abcd)

if (by_sector) {
Expand Down
8 changes: 4 additions & 4 deletions R/restructure_abcd.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ restructure_abcd <- function(data) {
#'
#' restructure_loanbook(lbk, overwrite = overwrite_demo)
#' @noRd
restructure_loanbook <- function(data, overwrite = NULL) {
restructure_loanbook <- function(data, overwrite = NULL, sector_classification = default_sector_classification()) {
check_prep_loanbook_overwrite(overwrite)
check_prepare_loanbook_data(data)

Expand All @@ -63,7 +63,7 @@ restructure_loanbook <- function(data, overwrite = NULL) {
name_level <- extract_level_names(data, prefix = "name_")
important_columns <- c("rowid", id_level, name_level)

out <- may_add_sector_and_borderline(data)
out <- may_add_sector_and_borderline(data, sector_classification = sector_classification)
out <- select(
out, all_of(c("rowid", important_columns, "sector", "borderline"))
)
Expand All @@ -77,15 +77,15 @@ restructure_loanbook <- function(data, overwrite = NULL) {
out
}

may_add_sector_and_borderline <- function(data) {
may_add_sector_and_borderline <- function(data, sector_classification = default_sector_classification()) {
if (lacks_borderline(data) || lacks_sector(data)) {
abort("Must have both `sector` and `borderline`")
}

if (has_sector(data) & has_borderline(data)) {
data2 <- data
} else {
data2 <- add_sector_and_borderline(data)
data2 <- add_sector_and_borderline(data, sector_classification = sector_classification)
}

data2
Expand Down
13 changes: 12 additions & 1 deletion cran-comments.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
## R CMD check results

0 errors | 0 warnings | 0 notes
❯ checking for future file timestamps ... NOTE
unable to verify current time

0 errors ✔ | 0 warnings ✔ | 1 note ✖

## revdepcheck results

── CHECK ────────────────────────────────────────────── 2 packages ──
✔ r2dii.plot 0.4.0 ── E: 0 | W: 0 | N: 0
✔ r2dii.analysis 0.4.0 ── E: 0 | W: 0 | N: 0
OK: 2
BROKEN: 0
35 changes: 15 additions & 20 deletions man/match_name.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 6 additions & 6 deletions tests/testthat/test-match_name.R
Original file line number Diff line number Diff line change
Expand Up @@ -700,19 +700,19 @@ test_that("errors if any id_loan is duplicated (#349)", {
expect_error(class = "duplicated_id_loan", match_name(lbk, abcd))
})

test_that("allows custom `sector_classifications` via options() (#354)", {
loanbook <- fake_lbk(sector_classification_system = "XYZ")
test_that("allows custom `sector_classification`", {
loanbook <- fake_lbk(
sector_classification_system = "XYZ",
sector_classification_direct_loantaker = "D35.11"
)
abcd <- fake_abcd()
custom_classification <- tibble::tribble(
~sector, ~borderline, ~code, ~code_system,
"power", FALSE, "D35.11", "XYZ",
)

# Allow users to inject their own `sector_classifications`
old <- options(r2dii.match.sector_classifications = custom_classification)
out <- match_name(loanbook, abcd)
out <- match_name(loanbook, abcd, sector_classification = custom_classification)
expect_equal(nrow(out), 1L)
options(old)
})

test_that("with `join_id`, joins as expected (#135)", {
Expand Down

0 comments on commit aa19028

Please sign in to comment.