Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CRAN release prep #99

Merged
merged 9 commits into from
Feb 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ README.Rmd
^pkgdown$
^doc$
^Meta$
^cran-comments\.md$
14 changes: 8 additions & 6 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@ Authors@R: c(
person("S. Marshall", "Ledford", role = "ctb"),
person("Tamás", "Stirling", role = "ctb")
)
Description: Use this package to calculate estimated relative volatility
index values for organic compounds based on functional group
contributions. Calculation uses the SIMPOL.1 method (Prankow and Asher,
2008) or modified SIMPOL.1 method as in Meredith et al. (2023).
Description: Calculate estimated relative volatility index values for
organic compounds based on functional group contributions. Calculation
uses the SIMPOL.1 method (Prankow and Asher, 2008)
<doi:10.5194/acp-8-2773-2008> or modified SIMPOL.1 method as in
Meredith et al. (2023) <doi:10.5194/acp-8-2773-2008>.
License: MIT + file LICENSE
URL: https://meredith-lab.github.io/volcalc/
BugReports: https://github.com/Meredith-Lab/volcalc/issues
Expand All @@ -27,8 +28,8 @@ Imports:
httr2,
KEGGREST,
magrittr,
rlang,
purrr,
rlang,
stringr,
tibble,
tidyr,
Expand All @@ -39,9 +40,10 @@ Suggests:
rmarkdown,
testthat (>= 3.0.0),
withr
VignetteBuilder:
knitr
biocViews:
Config/testthat/edition: 3
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
VignetteBuilder: knitr
7 changes: 5 additions & 2 deletions R/calc_vol.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#'
#' @export
#' @examples
#' \dontrun{
#' mol_paths <- mol_example()
#' calc_vol(mol_paths)
#'
Expand All @@ -48,7 +49,7 @@
#'
#' # Return intermediate calculations
#' calc_vol(mol_paths, return_calc_steps = TRUE)
#'
#' }
calc_vol <-
function(input,
from = c("mol_path", "smiles"),
Expand Down Expand Up @@ -123,7 +124,9 @@ calc_vol <-

#return:
vol_df %>%
dplyr::select(dplyr::all_of(c({{ from }}, "formula", "name", "rvi", "category", cols_fx, cols_calc)))
dplyr::select(dplyr::all_of(c(
{{ from }}, "formula", "name", "rvi", "category", cols_fx, cols_calc)
))

}

29 changes: 20 additions & 9 deletions R/get_fx_groups.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
#' @export
get_fx_groups <- function(compound_sdf) {

# For now at least, this code only works with SDFset objects that contain single molecules.
# For now at least, this code only works with SDFset objects that contain
# single molecules.
# TODO: make this function work with SDFset objects with multiple molecules?
if (length(compound_sdf) != 1) {
stop("SDFset objects must contain a single molecule only")
Expand All @@ -46,8 +47,11 @@ get_fx_groups <- function(compound_sdf) {
rowname <- n <- NULL

#convert counts to integer
groups <- groups %>% dplyr::mutate(dplyr::across(dplyr::everything(), as.integer))
rings <- data.frame(t(ChemmineR::rings(compound_sdf, type = "count", arom = TRUE, inner = TRUE)))
groups <-
groups %>%
dplyr::mutate(dplyr::across(dplyr::everything(), as.integer))
rings <-
data.frame(t(ChemmineR::rings(compound_sdf, type = "count", arom = TRUE, inner = TRUE)))
atoms <- atomcount2tibble(ChemmineR::atomcount(compound_sdf))
carbon_bond_data <- data.frame(ChemmineR::conMA(compound_sdf)[[1]]) %>%
dplyr::select(dplyr::contains("C_")) %>%
Expand Down Expand Up @@ -86,7 +90,9 @@ get_fx_groups <- function(compound_sdf) {

amide_primary_pattern <- "[CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3H2]"
amide_secondary_pattern <- "[CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3H1][#6;!$(C=[O,N,S])]"
amide_tertiary_pattern <- "[CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3H0]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])]"
amide_tertiary_pattern <-
"[CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3H0]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])]"

# amide_total_pattern <- "[CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]"

carbonylperoxynitrate_pattern <- "*C(=O)OO[N+1](=O)[O-1]"
Expand All @@ -95,12 +101,17 @@ get_fx_groups <- function(compound_sdf) {
carbonylperoxyacid_pattern <- "[CX3;$([R0][#6]),$([H1R0])](=[OX1])[OX2][$([OX2H]),$([OX1-])]"
nitroester_pattern <- "C(=O)(OC)C~[NX3](-,=[OX1])-,=[OX1]"
# This captures OH groups on a ring that also has a nitro group (para, ortho, or meta). Need to correct aromatic hydroxyl count later.
nitrophenol_pattern <- "[OX2H][$(c1ccccc1[$([NX3](=O)=O),$([NX3+](=O)[O-])]),$(c1cccc(c1)[$([NX3](=O)=O),$([NX3+](=O)[O-])]),$(c1ccc(cc1)[$([NX3](=O)=O),$([NX3+](=O)[O-])])]"
phosphoric_acid_pattern <- "[$(P(=[OX1])([$([OX2H]),$([OX1-]),$([OX2]P)])([$([OX2H]),$([OX1-]),$([OX2]P)])[$([OX2H]),$([OX1-]),$([OX2]P)]),$([P+]([OX1-])([$([OX2H]),$([OX1-]),$([OX2]P)])([$([OX2H]),$([OX1-]),$([OX2]P)])[$([OX2H]),$([OX1-]),$([OX2]P)])]"
phosphoric_ester_pattern <- "[$(P(=[OX1])([OX2][#6])([$([OX2H]),$([OX1-]),$([OX2][#6])])[$([OX2H]),$([OX1-]),$([OX2][#6]),$([OX2]P)]),$([P+]([OX1-])([OX2][#6])([$([OX2H]),$([OX1-]),$([OX2][#6])])[$([OX2H]),$([OX1-]),$([OX2][#6]),$([OX2]P)])]"
sulfate_pattern <- "[$([#16X4](=[OX1])(=[OX1])([OX2H,OX1H0-])[OX2][#6]),$([#16X4+2]([OX1-])([OX1-])([OX2H,OX1H0-])[OX2][#6])]"
nitrophenol_pattern <-
"[OX2H][$(c1ccccc1[$([NX3](=O)=O),$([NX3+](=O)[O-])]),$(c1cccc(c1)[$([NX3](=O)=O),$([NX3+](=O)[O-])]),$(c1ccc(cc1)[$([NX3](=O)=O),$([NX3+](=O)[O-])])]"
phosphoric_acid_pattern <-
"[$(P(=[OX1])([$([OX2H]),$([OX1-]),$([OX2]P)])([$([OX2H]),$([OX1-]),$([OX2]P)])[$([OX2H]),$([OX1-]),$([OX2]P)]),$([P+]([OX1-])([$([OX2H]),$([OX1-]),$([OX2]P)])([$([OX2H]),$([OX1-]),$([OX2]P)])[$([OX2H]),$([OX1-]),$([OX2]P)])]"
phosphoric_ester_pattern <-
"[$(P(=[OX1])([OX2][#6])([$([OX2H]),$([OX1-]),$([OX2][#6])])[$([OX2H]),$([OX1-]),$([OX2][#6]),$([OX2]P)]),$([P+]([OX1-])([OX2][#6])([$([OX2H]),$([OX1-]),$([OX2][#6])])[$([OX2H]),$([OX1-]),$([OX2][#6]),$([OX2]P)])]"
sulfate_pattern <-
"[$([#16X4](=[OX1])(=[OX1])([OX2H,OX1H0-])[OX2][#6]),$([#16X4+2]([OX1-])([OX1-])([OX2H,OX1H0-])[OX2][#6])]"
#sulfonate groups; sulfonate ions, and conjugate acid, sulfonic acids
sulfonate_pattern <- "[#16X4](=[OX1])(=[OX1])([#6])[*$([O-1]),*$([OH1]),*$([OX2H0])]"
sulfonate_pattern <-
"[#16X4](=[OX1])(=[OX1])([#6])[*$([O-1]),*$([OH1]),*$([OX2H0])]"
thiol_pattern <- "[#16X2H]"
carbothioester_pattern <- "S([#6])[CX3](=O)[#6]"

Expand Down
20 changes: 13 additions & 7 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ knitr::opts_chunk$set(

<!-- badges: start -->

[![R-CMD-check](https://github.com/Meredith-Lab/volcalc/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/Meredith-Lab/volcalc/actions/workflows/R-CMD-check.yaml) [![latest-DOI](https://zenodo.org/badge/425022983.svg)](https://zenodo.org/badge/latestdoi/425022983) [![manuscript-DOI](https://img.shields.io/badge/DOI-10.3389/fmicb.2023.1267234-32a859.svg)](https://doi.org/10.3389/fmicb.2023.1267234) [![Project Status: Active -- The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) [![Codecov test coverage](https://codecov.io/gh/Meredith-Lab/volcalc/branch/master/graph/badge.svg)](https://app.codecov.io/gh/Meredith-Lab/volcalc?branch=master) [![volcalc status badge](https://cct-datascience.r-universe.dev/badges/volcalc)](https://cct-datascience.r-universe.dev/volcalc)
[![R-CMD-check](https://github.com/Meredith-Lab/volcalc/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/Meredith-Lab/volcalc/actions/workflows/R-CMD-check.yaml) [![latest-DOI](https://zenodo.org/badge/425022983.svg)](https://zenodo.org/badge/latestdoi/425022983) [![manuscript-DOI](https://img.shields.io/badge/DOI-10.3389/fmicb.2023.1267234-32a859.svg)](https://doi.org/10.3389/fmicb.2023.1267234) [![Project Status: Active -- The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) [![Codecov test coverage](https://codecov.io/gh/Meredith-Lab/volcalc/branch/master/graph/badge.svg)](https://app.codecov.io/gh/Meredith-Lab/volcalc?branch=master) [![volcalc status badge](https://cct-datascience.r-universe.dev/badges/volcalc)](https://cct-datascience.r-universe.dev/volcalc) [![CRAN status](https://www.r-pkg.org/badges/version/volcalc)](https://CRAN.R-project.org/package=volcalc)

<!-- badges: end -->

Expand All @@ -26,12 +26,18 @@ knitr::opts_chunk$set(
The `volcalc` package allows you to automate calculating estimates of volatility for chemical compounds.

`volcalc` supports "group contribution" methods for estimating volatility that rely on molecular properties such as molecular weight, numbers of certain atoms, and counts of certain functional groups.
Currently, the only methods implemented are SIMPOL.1 (Pankow & Asher 2008) and a modified version used in Meredith et al. (2023).
Currently, the only methods implemented are SIMPOL.1 ([Pankow & Asher 2008](https://doi.org/10.5194/acp-8-2773-2008)) and a modified version used in [Meredith et al. (2023)](https://doi.org/10.3389/fmicb.2023.1267234).

`volcalc` works with either .mol files or [SMILES](https://en.wikipedia.org/wiki/Simplified_molecular-input_line-entry_system) strings as input, and supports downloading .mol files directly from [KEGG](https://www.kegg.jp/).

## Installation

Install from CRAN with

``` r
install.packages("volcalc")
```

You can install the development version of `volcalc` from GitHub with

``` r
Expand Down Expand Up @@ -113,9 +119,9 @@ citation("volcalc")

### References

Pankow, J.F., Asher, W.E., 2008.
SIMPOL.1: a simple group contribution method for predicting vapor pressures and enthalpies of vaporization of multifunctional organic compounds.
Atmos.
Chem.
Phys.
Pankow, J.F., Asher, W.E., 2008. SIMPOL.1: a simple group contribution
method for predicting vapor pressures and enthalpies of vaporization of
multifunctional organic compounds. Atmos. Chem. Phys.
<https://doi.org/10.5194/acp-8-2773-2008>

Meredith, L.K., Ledford, S.M., Riemer, K., Geffre, P., Graves, K., Honeker, L.K., LeBauer, D., Tfaily, M.M., Krechmer, J., 2023. Automating methods for estimating metabolite volatility. Frontiers in Microbiology 14. <https://doi.org/10.3389/fmicb.2023.1267234>
18 changes: 15 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.re
coverage](https://codecov.io/gh/Meredith-Lab/volcalc/branch/master/graph/badge.svg)](https://app.codecov.io/gh/Meredith-Lab/volcalc?branch=master)
[![volcalc status
badge](https://cct-datascience.r-universe.dev/badges/volcalc)](https://cct-datascience.r-universe.dev/volcalc)
[![CRAN
status](https://www.r-pkg.org/badges/version/volcalc)](https://CRAN.R-project.org/package=volcalc)

<!-- badges: end -->

Expand All @@ -27,7 +29,9 @@ volatility for chemical compounds.
volatility that rely on molecular properties such as molecular weight,
numbers of certain atoms, and counts of certain functional groups.
Currently, the only methods implemented are SIMPOL.1 ([Pankow & Asher
2008](https://doi.org/10.5194/acp-8-2773-2008)) and a modified version used in [Meredith et al. (2023)](https://doi.org/10.3389/fmicb.2023.1267234).
2008](https://doi.org/10.5194/acp-8-2773-2008)) and a modified version
used in [Meredith et
al. (2023)](https://doi.org/10.3389/fmicb.2023.1267234).

`volcalc` works with either .mol files or
[SMILES](https://en.wikipedia.org/wiki/Simplified_molecular-input_line-entry_system)
Expand All @@ -36,6 +40,12 @@ strings as input, and supports downloading .mol files directly from

## Installation

Install from CRAN with

``` r
install.packages("volcalc")
```

You can install the development version of `volcalc` from GitHub with

``` r
Expand Down Expand Up @@ -171,5 +181,7 @@ method for predicting vapor pressures and enthalpies of vaporization of
multifunctional organic compounds. Atmos. Chem. Phys.
<https://doi.org/10.5194/acp-8-2773-2008>

Meredith, L.K., Ledford, S.M., Riemer, K., Geffre, P., Graves, K., Honeker, L.K., LeBauer, D., Tfaily, M.M., Krechmer, J., 2023. Automating methods for estimating metabolite volatility. Frontiers in Microbiology 14. <https://doi.org/10.3389/fmicb.2023.1267234>

Meredith, L.K., Ledford, S.M., Riemer, K., Geffre, P., Graves, K.,
Honeker, L.K., LeBauer, D., Tfaily, M.M., Krechmer, J., 2023. Automating
methods for estimating metabolite volatility. Frontiers in Microbiology
14. <https://doi.org/10.3389/fmicb.2023.1267234>
13 changes: 13 additions & 0 deletions cran-comments.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
## R CMD check results

0 errors | 0 warnings | 1 note

* This is a new release.

The note on check is:

>Package has a FOSS license but eventually depends on the following
> package which may restrict use:
> ChemmineOB

ChemmineOB is an R package with the [Artistic-2.0 license](https://github.com/girke-lab/ChemmineOB/blob/master/LICENSE), which does appear to be FOSS. Other R packages
3 changes: 2 additions & 1 deletion man/calc_vol.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/volcalc-package.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

32 changes: 22 additions & 10 deletions tests/testthat/test-calc_vol.R
Original file line number Diff line number Diff line change
@@ -1,16 +1,22 @@
test_that("volatility estimate is correct for example compound for entire workflow", {
test_that("volatility estimate is correct", {
ex_vol_df <- calc_vol("data/C16181.mol")
expect_equal(round(ex_vol_df$rvi, 6), 6.975349)
})

test_that("returns correct columns depending on return arguments", {
just_vol <- calc_vol("data/C16181.mol")
with_fx <- calc_vol("data/C16181.mol", return_fx_groups = TRUE)
with_fx_steps <- calc_vol("data/C16181.mol", return_fx_groups = TRUE, return_calc_steps = TRUE)
expect_setequal(colnames(just_vol), c("mol_path", "formula", "name", "rvi", "category"))
with_fx_steps <-
calc_vol("data/C16181.mol",
return_fx_groups = TRUE,
return_calc_steps = TRUE)
expect_setequal(colnames(just_vol),
c("mol_path", "formula", "name", "rvi", "category"))
# just some examples here
expect_contains(colnames(with_fx), c(colnames(just_vol), "carbons", "carbothioesters", "fluorines"))
expect_contains(colnames(with_fx_steps), c(colnames(with_fx), "molecular_weight", "log_alpha", "log10_P"))
expect_contains(colnames(with_fx),
c(colnames(just_vol), "carbons", "carbothioesters", "fluorines"))
expect_contains(colnames(with_fx_steps),
c(colnames(with_fx), "molecular_weight", "log_alpha", "log10_P"))
})

test_that("calc_vol() works with multiple inputs", {
Expand All @@ -21,11 +27,15 @@ test_that("calc_vol() works with multiple inputs", {
})

test_that("smiles and .mol give same results", {
paths <- c("data/C16181.mol", "data/map00361/C00011.mol", "data/map00361/C00042.mol")
smiles <- c("C1(C(C(C(C(C1Cl)Cl)Cl)Cl)Cl)O", "O=C=O", "C(CC(=O)O)C(=O)O")
paths <-
c("data/C16181.mol",
"data/map00361/C00011.mol",
"data/map00361/C00042.mol")
smiles <-
c("C1(C(C(C(C(C1Cl)Cl)Cl)Cl)Cl)O", "O=C=O", "C(CC(=O)O)C(=O)O")
expect_equal(
calc_vol(smiles, from = "smiles") %>% dplyr::select(-name, -smiles),
calc_vol(paths) %>% dplyr::select(-name, -mol_path)
calc_vol(smiles, from = "smiles") %>% dplyr::select(-name,-smiles),
calc_vol(paths) %>% dplyr::select(-name,-mol_path)
)
})

Expand All @@ -36,7 +46,9 @@ test_that("errors with invalid SMILES", {
test_that("meredith and original method give different results", {
#thiol and sulfonate groups, respectively
# paths <- c(test_path("data/C00409.mol"), test_path("data/C03349.mol"))
smiles <- c("Methanethiol" = "SC", "Methyl methanesulfonate" = "COS(=O)(=O)C")
smiles <-
c("Methanethiol" = "SC",
"Methyl methanesulfonate" = "COS(=O)(=O)C")
meredith <- calc_vol(smiles, from = "smiles", method = "meredith")
simpol <- calc_vol(smiles, from = "smiles", method = "simpol1")
expect_true(all(meredith$rvi < simpol$rvi))
Expand Down
3 changes: 2 additions & 1 deletion tests/testthat/test-get_fx_groups.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ test_that("SMARTS strings are correct", {
expected <- test_compounds %>% dplyr::select(smiles, dplyr::all_of(common_cols))
actual <- test_fx_groups %>% dplyr::select(smiles, dplyr::all_of(common_cols))

# compare but ignore NAs in expected, by just overwriting them with values in actual using rows_patch()
# compare but ignore NAs in expected, by just overwriting them with values in
# actual using rows_patch()
expect_equal(
actual,
dplyr::rows_patch(expected, actual)
Expand Down
Loading