Skip to content

Commit

Permalink
first take on #101
Browse files Browse the repository at this point in the history
  • Loading branch information
fmichonneau committed Sep 15, 2017
1 parent 0c21495 commit b8009f2
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 55 deletions.
44 changes: 27 additions & 17 deletions R/match_names.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
## internal function that match the arguments provided to the correct
## row number in the data frame representing the Open Tree Taxonomy
## for a series of matched names.
check_args_match_names <- function(response, row_number, taxon_name, ott_id) {
orig_order <- attr(response, "original_order")
check_args_match_names <- function(response, req_number, row_number, taxon_name, ott_id) {
orig_order <- attr(response, "original_order")[[req_number]]
if (is.null(orig_order)) {
stop(sQuote(substitute(response)), " was not created using ",
sQuote("tnrs_match_names"))
Expand Down Expand Up @@ -106,11 +106,13 @@ match_row_number <- function(response, row_number, taxon_name, ott_id) {
##' @rdname match_names
inspect.match_names <- function(response, row_number, taxon_name, ott_id, ...) {

i <- check_args_match_names(response, row_number, taxon_name, ott_id)
j <- match_row_number(response, row_number, taxon_name, ott_id)
k <- ceiling(j/max_tnrs_req)
i <- check_args_match_names(response, req_number = k,
get_row(row_number), taxon_name, ott_id)

if (attr(response, "has_original_match")[j]) {
res <- attr(response, "original_response")
if (attr(response, "has_original_match")[[k]][get_row(j)]) {
res <- attr(response, "original_response")[[k]]
summary_match <- build_summary_match(res, res_id = i, match_id = NULL,
initial_creation = FALSE)
} else {
Expand All @@ -119,6 +121,10 @@ inspect.match_names <- function(response, row_number, taxon_name, ott_id, ...) {
summary_match
}

get_row <- function(x) {
ifelse(x %% max_tnrs_req == 0, max_tnrs_req, x %% max_tnrs_req)
}

##' @export
##' @rdname match_names
inspect <- function(response, ...) UseMethod("inspect")
Expand All @@ -137,12 +143,15 @@ update.match_names <- function(object, row_number, taxon_name, ott_id,
new_row_number, new_ott_id, ...) {

response <- object
i <- check_args_match_names(response, row_number, taxon_name, ott_id)
j <- match_row_number(response, row_number, taxon_name, ott_id)
k <- ceiling(j/max_tnrs_req)
i <- check_args_match_names(response, req_number = k,
row_number = get_row(row_number),
taxon_name = taxon_name, ott_id = ott_id)

res <- attr(response, "original_response")
res <- attr(response, "original_response")[[k]]

if (!attr(response, "has_original_match")[j]) {
if (!attr(response, "has_original_match")[[k]][get_row(j)]) {
warning("There is no match for this name, ",
"so there is nothing to replace it with.")
return(response)
Expand Down Expand Up @@ -179,10 +188,10 @@ update.match_names <- function(object, row_number, taxon_name, ott_id,
}
if (length(j) > 1) stop("You must supply a single element for each argument")

summ_match <- summary_row_factory(res, res_id = i, match_id = j)
summ_match <- summary_row_factory(res, res_id = i, match_id = get_row(j))

response[rnb, ] <- summ_match
attr(response, "match_id")[rnb] <- j
attr(response, "match_id")[[k]][rnb] <- get_row(j)
response
}

Expand All @@ -199,16 +208,16 @@ get_list_element <- function(response, i, list_name) {

match_names_method_factory <- function(list_name) {

function(tax, row_number, taxon_name, ott_id, ...) {
function(tax, req_number, row_number, taxon_name, ott_id, ...) {

response <- tax
res <- attr(response, "original_response")
res <- attr(response, "original_response")[[req_number]]

no_args <- all(c(missing(row_number), missing(taxon_name),
missing(ott_id)))

if (no_args) {
res_i <- attr(response, "original_order")[attr(response, "has_original_match")]
res_i <- attr(response, "original_order")[[req_number]][attr(response, "has_original_match")[[req_number]]]
ret <- lapply(res_i, function(i) {
get_list_element(res, i, list_name)
})
Expand All @@ -219,15 +228,15 @@ match_names_method_factory <- function(list_name) {
## to extract the correct element
ret <- mapply(function(x, i) {
ret[[x]][i]
}, seq_along(ret), attr(response, "match_id")[attr(response, "has_original_match")])
}, seq_along(ret), attr(response, "match_id")[[req_number]][attr(response, "has_original_match")[[req_number]]])
if (all(sapply(ret, length) == 1)) {
ret <- unlist(ret, use.names = TRUE)
}
} else {
i <- check_args_match_names(response, row_number, taxon_name, ott_id)
j <- match_row_number(response, row_number, taxon_name, ott_id)
if (attr(response, "has_original_match")[j]) {
ret <- get_list_element(res, i, list_name)[attr(response, "match_id")[j]]
if (attr(response, "has_original_match")[[req_number]][j]) {
ret <- get_list_element(res, i, list_name)[attr(response, "match_id")[[req_number]][j]]
} else {
ret <- list(ott_id = NA_character_,
name = response[["search_string"]][j],
Expand All @@ -249,7 +258,8 @@ match_names_method_factory <- function(list_name) {
match_names_taxon_method_factory <- function(.f) {
function(tax, row_number, taxon_name, ott_id, ...) {
extract_tax_list <- match_names_method_factory("taxon")
tax_info <- extract_tax_list(tax, row_number = row_number,
tax_info <- extract_tax_list(tax, req_number = req_number,
row_number = row_number,
taxon_name = taxon_name,
ott_id = ott_id)
res <- lapply(tax_info, function(x) .f(x))
Expand Down
101 changes: 63 additions & 38 deletions R/tnrs.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
max_tnrs_req <- 2


##' Match taxonomic names to the Open Tree Taxonomy.
##'
Expand Down Expand Up @@ -78,47 +80,70 @@ tnrs_match_names <- function(names = NULL, context_name = NULL,
names <- unique(names)
}

res <- .tnrs_match_names(names = names, context_name = context_name,
do_approximate_matching = do_approximate_matching,
ids = ids, include_suppressed = include_suppressed,
...)

check_tnrs(res)
match_ids <- lowest_ott_id(res)
if (!identical(length(res[["results"]]), length(match_ids)))
stop("The number of matches and the number of 'results' elements should",
" be the same.")

summary_match <- mapply(
function(rid, mid) {
build_summary_match(res, res_id = rid, match_id = mid, initial_creation = TRUE)
}, seq_along(res[["results"]]), match_ids, SIMPLIFY = FALSE)
## add taxon names with no maches
summary_match <- add_unmatched_names(summary_match, res)
summary_match <- do.call("rbind", summary_match)

summary_match$search_string <- gsub("\\\\", "", summary_match$search_string)

## reorder to match original query
match_ids <- c(match_ids, rep(NA_integer_, sum(is.na(summary_match$ott_id))))
ordr <- match(tolower(names), summary_match$search_string)
stopifnot(identical(length(match_ids), length(ordr)))

summary_match <- summary_match[ordr, ]
match_ids <- match_ids[ordr]

summary_match[["approximate_match"]] <-
convert_to_logical(summary_match[["approximate_match"]])
summary_match[["is_synonym"]] <-
convert_to_logical(summary_match[["is_synonym"]])
summary_match[["flags"]] <- convert_to_logical(summary_match[["flags"]])

attr(summary_match, "original_order") <- as.numeric(rownames(summary_match))
names <- split_by_n(names, max_tnrs_req)
names(names) <- paste0("req", seq_along(names))

res <- lapply(names, function(n) {
.r <- .tnrs_match_names(names = n, context_name = context_name,
do_approximate_matching = do_approximate_matching,
ids = ids, include_suppressed = include_suppressed,
...)
check_tnrs(.r)
.r
})

match_ids <- lapply(res, function(.r) {
mid <- lowest_ott_id(.r)
if (!identical(length(.r[["results"]]), length(mid)))
stop("The number of matches and the number of 'results' elements should",
" be the same.")
mid
})

summary_match_list <- lapply(seq_along(res), function(i) {
.smry <- mapply(
function(rid, mid) {
build_summary_match(res[[i]], res_id = rid, match_id = mid, initial_creation = TRUE)
}, seq_along(res[[i]][["results"]]), match_ids[[i]], SIMPLIFY = FALSE)

## add taxon names with no maches
.smry <- add_unmatched_names(.smry, res)
.smry <- do.call("rbind", .smry)

.smry$search_string <- gsub("\\\\", "", .smry$search_string)

## reorder to match original query
match_ids[[i]] <- c(match_ids[[i]], rep(NA_integer_, sum(is.na(.smry$ott_id))))
ordr <- match(tolower(names[[i]]), .smry$search_string)
stopifnot(identical(length(match_ids[[i]]), length(ordr)))

.smry <- .smry[ordr, ]
match_ids[[i]] <- match_ids[[i]][ordr]

.smry[["approximate_match"]] <-
convert_to_logical(.smry[["approximate_match"]])
.smry[["is_synonym"]] <-
convert_to_logical(.smry[["is_synonym"]])
.smry[["flags"]] <- convert_to_logical(.smry[["flags"]])
.smry
})

summary_match <- do.call("rbind", summary_match_list)

orig_order <- lapply(summary_match_list, function(smry)
as.numeric(rownames(smry)))
names(orig_order) <- names(res)

has_original_match <- lapply(summary_match_list, function(smry) {
!is.na(smry[["number_matches"]])
})

attr(summary_match, "original_order") <- orig_order
rownames(summary_match) <- NULL
attr(summary_match, "original_response") <- res
attr(summary_match, "match_id") <- match_ids
attr(summary_match, "has_original_match") <-
!is.na(summary_match[["number_matches"]])
attr(summary_match, "has_original_match") <- has_original_match

class(summary_match) <- c("match_names", "data.frame")
summary_match
}
Expand Down

0 comments on commit b8009f2

Please sign in to comment.