Skip to content

Commit

Permalink
merge fixes fro @HDash
Browse files Browse the repository at this point in the history
  • Loading branch information
bschilder committed Jan 8, 2025
2 parents 158a703 + f36236c commit 80613ea
Show file tree
Hide file tree
Showing 28 changed files with 283 additions and 147 deletions.
14 changes: 10 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
Package: KGExplorer
Type: Package
Title: Biomedical Knowledge Network Construction and Analysis
Version: 0.99.03
Version: 0.99.06
Authors@R:
c(
person(given = "Brian",
family = "Schilder",
role = c("aut","cre"),
email = "[email protected]",
comment = c(ORCID = "0000-0001-5949-2191"))
comment = c(ORCID = "0000-0001-5949-2191")),
person(given = "Hiranyamaya",
family = "Dash",
role = c("ctb"),
email = "[email protected]",
comment = c(ORCID = "0009-0005-5514-505X"))
)
Description: Query, construct, and analyse large-scale biomedical knowledge graphs and ontologies.
URL: https://github.com/neurogenomics/KGExplorer
Expand Down Expand Up @@ -75,9 +80,10 @@ Suggests:
tidyr,
DiagrammeR,
forcats,
arrow
arrow,
curl
Remotes:
github::charlieccarey/monarchr,
github::monarch-initiative/monarchr,
github::phenoscape/rphenoscape,
github::vjcitn/biocBiocypher,
github::RajLabMSSM/echogithub,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ export(get_gencc)
export(get_gene_lengths)
export(get_genes_disease)
export(get_graph_colnames)
export(get_hpo)
export(get_monarch)
export(get_monarch_files)
export(get_monarch_kg)
Expand Down
39 changes: 39 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,42 @@
# KGExplorer 0.99.06

## Bug fixes
* `get_opentargets`: Limit threads and files to import in example.
* Merge fixes by @HDash https://github.com/neurogenomics/KGExplorer/pull/2
* `dt_to_kg`: fix reference to graph `g`.

# KGExplorer 0.99.05

## New features
* `get_hpo`
- Port function from `HPOExplorer` package to prevent circular dependency.

## Bug fixes
* `DESCRIPTION`
- Update remote for `monarchr`.
* Tests
- Add `skip_if_offline` to tests that (may) require internet access.
* `ontology_to`
- `igraph::as_adj` (deprecated) -> `igraph::as_adjacency_matrix`.

# KGExplorer 0.99.04

## Bug fixes
* `test-get_ontology_levels`
- Check for range rather than fixed values.
* `filter_ontology`
- Move `terms` processing block to after check for character, as appropriate.
* `get_ontology_dict`
- Add error handling for missing `alternative_terms` when
`include_alternative_terms=TRUE`.
* `plot_ontology_heatmap`
- Fix default value for argument `annot`-- cast one@elementMetadata to
data.frame first.
* `prune_ancestors`
- Add value for argument `id_col` in example.
* `set_cores`
- Reduce workers during `R CMD CHECK` if required.

# KGExplorer 0.99.03

## New features
Expand Down
9 changes: 5 additions & 4 deletions R/dt_to_kg.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@ dt_to_kg <- function(d,
message("subject_label examples:")
methods::show(unique(utils::head(d_sub$subhect_label,100))[seq(5)])
if(as_tidygraph){
d_sub <- dt_to_graph(d_sub,
add_hover=add_hover)
g <- dt_to_graph(d_sub,
add_hover=add_hover)
igraph::vertex_attr(g,"name") <- igraph::vertex_attr(g,"id")
return(g)
}
igraph::vertex_attr(g,"name") <- igraph::vertex_attr(g,"id")
d_sub
return(d_sub)
}
31 changes: 16 additions & 15 deletions R/filter_ontology.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ filter_ontology <- function(ont,
include_self = TRUE,
use_simona=FALSE,
...){
#### Check remove_terms ####
#### Check remove_terms ####
terms <- terms[!terms %in% remove_terms]
#### Use simona ####
if(isTRUE(use_simona)){
Expand All @@ -30,14 +30,14 @@ filter_ontology <- function(ont,
to = "id") |> stats::na.omit()
if(length(keep_descendants)>0){
messager("Keeping descendants of",length(keep_descendants),"term(s).")
ont <- simona::dag_filter(ont,
ont <- simona::dag_filter(ont,
root=as.character(keep_descendants),
...)
messager(formatC(ont@n_terms,big.mark = ","),
"terms remain after filtering.")
} else {
messager("keep_descendants: No descendants found.")
}
}
}
#### remove_descendants ####
if(!is.null(remove_descendants)){
Expand All @@ -50,8 +50,8 @@ filter_ontology <- function(ont,
include_self = include_self,
term = remove_descendants)
keep_terms <- ont@terms[!ont@terms %in% remove_descendants]
ont <- simona::dag_filter(ont,
terms=keep_terms,
ont <- simona::dag_filter(ont,
terms=keep_terms,
...)
messager(formatC(ont@n_terms,big.mark = ","),
"terms remain after filtering.")
Expand All @@ -61,29 +61,30 @@ filter_ontology <- function(ont,
}
#### Use custom filtering methods ####
if(!is.null(terms)){
terms <- map_ontology_terms(ont = ont,
terms = terms,
to = "id") |> stats::na.omit()
## Characters

## Characters
if(is.character(terms)){
terms <- map_ontology_terms(ont = ont,
terms = terms,
to = "id") |> stats::na.omit()
terms <- terms[simona::dag_has_terms(dag=ont, terms = unique(terms))]
if(length(terms)==0) {
stopper("None of the supplied terms found in the ontology.")
}
}
ont <- ont[,terms]

} else if (is.numeric(terms)){
messager("Randomly sampling",terms,"term(s).")
if(terms>length(ont@terms)){
messager(
"Number of terms requested exceeds number of terms in the ontology.",
"Returning original ontology object without filtering.")
return(ont)
}
return(ont)
}
if(terms==0) stopper("Terms must be >0 if numeric.")
term_ids <- sample(ont@terms,terms, replace = FALSE)
ont <- ont[,term_ids]
}
}
}
return(ont)
}
}
43 changes: 43 additions & 0 deletions R/get_hpo.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#' @describeIn get_ get_
#' Get Human Phenotype Ontology (HPO)
#'
#' Updated version of Human Phenotype Ontology (HPO).
#' Created from the OBO files distributed by the HPO project's
#' \href{https://github.com/obophenotype/human-phenotype-ontology}{GitHub}.
#' Adapted from \link[HPOExplorer]{get_hpo}.
#'
#' By comparison, the \code{hpo} data from \pkg{ontologyIndex} is from 2016.
#' Note that the maximum ontology level depth in the 2016 version was 14,
#' whereas in the 2023 version the maximum ontology level depth is 16
#' (due to an expansion of the HPO).
#' @inheritParams get_ontology
#' @inheritDotParams get_ontology
#' @returns \link[simona]{ontology_DAG} object.
#'
#' @export
#' @examples
#' hpo <- get_hpo()
get_hpo <- function(lvl = 2,
force_new = FALSE,
terms=NULL,
## rols imports the international version for some reason
method="github",
save_dir=cache_dir(package = "KGExplorer"),
...){

file <- file.path(save_dir,"hp.rds")
if(!file.exists(file) || isTRUE(force_new)){
ont <- get_ontology(name = "hp",
lvl = lvl,
force_new = force_new,
terms = terms,
method = method,
save_dir = save_dir,
...)
saveRDS(ont,file)
} else {
ont <- readRDS(file)
}
ont <- filter_ontology(ont = ont, terms = terms)
return(ont)
}
16 changes: 8 additions & 8 deletions R/get_ontology_dict.R
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
#' @describeIn get_ get_
#'
#'
#' @param as_datatable Return as a data.table instead of a named vector.
#' @param include_alternative_terms Include alternative terms in the dictionary.
#' @export
#' @examples
#' ont <- get_ontology("hp", terms=10)
#' dict <- get_ontology_dict(ont)
get_ontology_dict <- function(ont,
get_ontology_dict <- function(ont,
from="short_id",
to=c("name","label","term"),
include_self=FALSE,
include_alternative_terms=TRUE,
include_alternative_terms=FALSE,
as_datatable=FALSE){
to <- intersect(to,colnames(ont@elementMetadata))[1]

if(from=="id") from <- "short_id"
if(to=="id") to <- "short_id"

## Check from col exists
if(!from %in% colnames(ont@elementMetadata)){
stopper("Column",from,"not found in ontology metadata.")
Expand All @@ -25,7 +25,7 @@ get_ontology_dict <- function(ont,
if(!to %in% colnames(ont@elementMetadata)){
stopper("Column",to,"not found in ontology metadata.")
}

if(isTRUE(as_datatable)){
#### As data.table ####
dict <- data.table::as.data.table(
Expand All @@ -47,7 +47,7 @@ get_ontology_dict <- function(ont,
)[,from:=get(to)][,to:=get(to)][,c("from","to")])
}
dict <- unique(dict)
data.table::setkeyv(dict, c("from"))
data.table::setkeyv(dict, c("from"))
} else {
#### As named vector ####
dict <- stats::setNames(ont@elementMetadata[[to]],
Expand All @@ -58,6 +58,6 @@ get_ontology_dict <- function(ont,
ont@elementMetadata[[to]])
)
}
}
}
return(dict)
}
11 changes: 9 additions & 2 deletions R/get_opentargets.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@
#' @param subdir Open Targets subdirectory.
#' @param subdir2 Open Targets sub-subdirectory.
#' @param ftp The final Open Targets FTP URL.
#' @param max_files Limit the number of files to import
#' @inheritParams get_
#' @inheritParams set_cores
#' @import rvest
#' @export
#' @examples
#' d <- get_opentargets()
#' d <- get_opentargets(max_files=2, save_dir=tempdir())
get_opentargets <- function(release="latest",
data_type=c("associationByDatasourceDirect",
"associationByDatasourceIndirect",
Expand All @@ -37,6 +39,8 @@ get_opentargets <- function(release="latest",
subdir2,
data_type[1],"/"),
save_dir=cache_dir(),
max_files=NULL,
workers=1,
force_new=FALSE){

## Variant and gene level data merged for all genome-wide summary statistics:
Expand All @@ -59,8 +63,11 @@ get_opentargets <- function(release="latest",
)[[1]]|>
subset(endsWith(Name,".parquet")|endsWith(Name,".json"))
tbl$Date <- stringr::str_split(tbl$`Last modified`," ",simplify = TRUE)[,1]
if(!is.null(max_files)){
tbl <- tbl[seq(max_files),]
}
if(nrow(tbl)==0) stopper("No data files found at", ftp)
BPPARAM <- set_cores()
BPPARAM <- set_cores(workers = workers)
d <- BiocParallel::bplapply(stats::setNames(tbl$Name,
tbl$Name),
BPPARAM = BPPARAM,
Expand Down
20 changes: 10 additions & 10 deletions R/map_upheno_data_i.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ map_upheno_data_i <- function(pheno_map_method,
n_genes_db1 <- object <- gene_label <- db <- . <-
n_genes_db2 <- subject_taxon_label1 <- subject_taxon_label2 <-
phenotype_genotype_score <- equivalence_score <- NULL;

pheno_map_method <- pheno_map_method[1]
gene_map_method <- gene_map_method[1]
messager(paste0("map_upheno_data: pheno_map_method=",
Expand All @@ -26,10 +26,10 @@ map_upheno_data_i <- function(pheno_map_method,
names(pheno_map) <-gsub("^object","id2",names(pheno_map))
pheno_map[,db1:=gsub("*:.*","",basename(id1))]
} else if(pheno_map_method=="monarch"){
hpo <- HPOExplorer::get_hpo()

hpo <- get_hpo()
out <- monarchr::monarch_search(query = NULL,
category = "biolink:PhenotypicFeature",
category = "biolink:PhenotypicFeature",
limit = 500)
pheno_map <- get_monarch(queries = "phenotype_to_phenotype") |>
data.table::setnames(c("label_x","label_y"),c("label1","label2"))
Expand All @@ -47,7 +47,7 @@ map_upheno_data_i <- function(pheno_map_method,
}
}
}

## Gene-phenotype associations across 8 species
{
genes <- get_monarch(maps = list(c("phenotype","gene")),
Expand All @@ -65,7 +65,7 @@ map_upheno_data_i <- function(pheno_map_method,
## Create an db-species map for each Ontology
species_map <- genes_map[,.SD[1], keyby="db"][,.(db,subject_taxon_label)]
}

#### Map non-human genes onto human orthologs ####
{
genes_homol <- map_genes_monarch(dat=genes,
Expand All @@ -75,7 +75,7 @@ map_upheno_data_i <- function(pheno_map_method,
data.table::uniqueN(genes$subject_taxon_label),
"species remain after cross-species gene mapping.")
}

#### Map non-human phenotypes onto human phenotypes ####
#### Merge nonhuman ontology genes with human HPO genes ####
{
Expand All @@ -94,7 +94,7 @@ map_upheno_data_i <- function(pheno_map_method,
all.y = keep_nogenes,
suffixes = c(1,2),
allow.cartesian = TRUE
)
)
pheno_map_genes[,db2:=id2_db]
## Fill in missing species for those without gene data
pheno_map_genes[
Expand All @@ -113,7 +113,7 @@ map_upheno_data_i <- function(pheno_map_method,
## Remove
# remove(genes_human,genes_nonhuman,pheno_map)
}

#### Count the number of overlapping genes
{
if(isFALSE(keep_nogenes)){
Expand Down Expand Up @@ -154,4 +154,4 @@ map_upheno_data_i <- function(pheno_map_method,
## less than or equal to the number of total HPO genes.
# pheno_map_genes_match[n_genes>n_genes_hpo,]
return(pheno_map_genes_match)
}
}
Loading

0 comments on commit 80613ea

Please sign in to comment.