From 340e0f50403615863e2ce77c5d233ed672fe2507 Mon Sep 17 00:00:00 2001 From: Yi Zhou <17245097+y1zhou@users.noreply.github.com> Date: Sat, 4 May 2024 23:30:36 +0800 Subject: [PATCH] feat(intxn): search for disulfides (#5) * feat(covalent): search for disulfides in the interface * feat(intxn): ignore neighboring residues during search * refactor: re-export interactions module functions --- src/interactions/complex.rs | 87 +++++++++++++++++++++++++++++-------- src/interactions/mod.rs | 12 ++--- src/interactions/structs.rs | 2 + src/interactions/vdw.rs | 55 ++++++++++++++++++++--- src/main.rs | 3 +- src/utils.rs | 18 +++++++- 6 files changed, 146 insertions(+), 31 deletions(-) diff --git a/src/interactions/complex.rs b/src/interactions/complex.rs index 0896e1e..4056ea9 100644 --- a/src/interactions/complex.rs +++ b/src/interactions/complex.rs @@ -1,12 +1,12 @@ use super::{ find_hydrogen_bond, find_hydrophobic_contact, find_ionic_bond, find_vdw_contact, - find_weak_hydrogen_bond, InteractingEntity, Interaction, ResultEntry, + find_weak_hydrogen_bond, Interaction, ResultEntry, }; -use crate::utils::parse_groups; +use crate::utils::{hierarchy_to_entity, parse_groups}; use pdbtbx::*; use rayon::prelude::*; -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; /// The workhorse struct for identifying interactions in the model pub struct InteractionComplex { @@ -20,6 +20,9 @@ pub struct InteractionComplex { pub vdw_comp_factor: f64, /// Distance cutoff when searching for neighboring atoms pub interacting_threshold: f64, + + /// Maps residue names to unique indices + res2idx: HashMap>, } impl InteractionComplex { @@ -27,12 +30,42 @@ impl InteractionComplex { let all_chains: HashSet = model.par_chains().map(|c| c.id().to_string()).collect(); let (ligand, receptor) = parse_groups(&all_chains, groups); + let res2idx = build_residue_index(&model); + Self { model, ligand, receptor, vdw_comp_factor, interacting_threshold, + res2idx, + } + } + + fn is_neighboring_res_pair( + &self, + x: &AtomConformerResidueChainModel, + y: &AtomConformerResidueChainModel, + ) -> bool { + if x.chain().id() != y.chain().id() { + return false; + } + let (x_resi, x_insertion) = x.residue().id(); + let x_altloc = match x_insertion { + Some(insertion) => insertion.to_string(), + None => "".to_string(), + }; + let x_idx = self.res2idx[x.chain().id()][&(x_resi, x_altloc)]; + let (y_resi, y_insertion) = y.residue().id(); + let y_altloc = match y_insertion { + Some(insertion) => insertion.to_string(), + None => "".to_string(), + }; + let y_idx = self.res2idx[y.chain().id()][&(y_resi, y_altloc)]; + + match x_idx { + 0 => y_idx == x_idx + 1, + _ => (y_idx == x_idx - 1) | (y_idx == x_idx + 1), } } } @@ -74,6 +107,15 @@ impl Interactions for InteractionComplex { .filter(|y| { self.receptor.contains(y.chain().id()) & (y.atom().element().unwrap() != &Element::H) + // Skip lower resi if both entities are on the same chain + & !((x.chain().id() == y.chain().id()) + & (x.residue().serial_number() > y.residue().serial_number())) + // Skip neighboring residues + & !self.is_neighboring_res_pair(&x, y) + // Skip atoms in the same residue + & !((x.chain().id() == y.chain().id()) + & (x.residue().serial_number() == y.residue().serial_number()) + & (x.residue().insertion_code() == y.residue().insertion_code())) }) .map(|y| (x.clone(), y.clone())) .collect(); @@ -157,18 +199,29 @@ impl Interactions for InteractionComplex { } } -/// Helper function to convert an [`pdbtbx::AtomConformerResidueChainModel`] to a human-readable format -fn hierarchy_to_entity(hierarchy: &AtomConformerResidueChainModel<'_>) -> InteractingEntity { - InteractingEntity { - chain: hierarchy.chain().id().to_string(), - resn: hierarchy.residue().name().unwrap().to_string(), - resi: hierarchy.residue().serial_number(), - altloc: hierarchy - .conformer() - .alternative_location() - .unwrap_or("") - .to_string(), - atomn: hierarchy.atom().name().to_string(), - atomi: hierarchy.atom().serial_number(), - } +/// Find the absolute index of a residue in each chain. +/// +/// Returns two mappings, one from residue to index, and one from index to residue. +fn build_residue_index(model: &PDB) -> HashMap> { + let mut res2idx: HashMap> = HashMap::new(); + + model.chains().for_each(|chain| { + let chain_id = chain.id().to_string(); + res2idx.insert( + chain_id.clone(), + chain + .residues() + .enumerate() + .map(|(i, residue)| { + let (resi, insertion) = residue.id(); + let altloc = match insertion { + Some(insertion) => insertion.to_string(), + None => "".to_string(), + }; + ((resi, altloc), i) + }) + .collect::>(), + ); + }); + res2idx } diff --git a/src/interactions/mod.rs b/src/interactions/mod.rs index d890480..9ba893d 100644 --- a/src/interactions/mod.rs +++ b/src/interactions/mod.rs @@ -5,8 +5,10 @@ pub mod ionic; pub mod structs; pub mod vdw; -use hbond::{find_hydrogen_bond, find_weak_hydrogen_bond}; -use hydrophobic::find_hydrophobic_contact; -use ionic::find_ionic_bond; -use structs::*; -use vdw::find_vdw_contact; +// Re-exports +pub use complex::*; +pub use hbond::{find_hydrogen_bond, find_weak_hydrogen_bond}; +pub use hydrophobic::find_hydrophobic_contact; +pub use ionic::find_ionic_bond; +pub use structs::*; +pub use vdw::find_vdw_contact; diff --git a/src/interactions/structs.rs b/src/interactions/structs.rs index 5951015..0df73b8 100644 --- a/src/interactions/structs.rs +++ b/src/interactions/structs.rs @@ -8,6 +8,8 @@ pub enum Interaction { StericClash, /// Covalent bonded CovalentBond, + // CB-S-S-CB bond + Disulfide, /// Within van der Waals radii VanDerWaalsContact, diff --git a/src/interactions/vdw.rs b/src/interactions/vdw.rs index f79a91e..dfa6c2a 100644 --- a/src/interactions/vdw.rs +++ b/src/interactions/vdw.rs @@ -2,14 +2,18 @@ use super::structs::Interaction; use pdbtbx::*; -/// Search for steric clashes and Van de Waals contacts. +/// Search for steric clashes, disulfides, and Van de Waals contacts. /// /// ## Details -/// If the distance between two atoms is less than the sum of their covalent radii, -/// then they are considered to be in a steric clash. +/// If the distance between two atoms is less than the sum of their covalent radii minus `vdw_comp_factor`, +/// they are considered to be in a steric clash. +/// If the distance is between the covalent radii sum and that plus the `vdw_comp_factor`, +/// they are considered to be covalently bonded. +/// A special case would be disulfides, where we also consider the Cb-S-S-Cb dihedral angle +/// and determine if it is around 90° ([10.1039/c8sc01423j](https://doi.org/10.1039/c8sc01423j)). /// If the distance between two atoms is less than the sum of their Van de Waals radii /// plus the `vdw_comp_factor`, -/// then they are considered to be in a Van de Waals contact. +/// they are considered to be in a Van de Waals contact. /// /// TODO: need bonding information for correct covalent bond identification pub fn find_vdw_contact( @@ -26,9 +30,48 @@ pub fn find_vdw_contact( let dist = entity1.atom().distance(entity2.atom()); match dist { - d if d < sum_cov_radii => Some(Interaction::StericClash), - // d if d < sum_vdw_radii => Some(Interaction::CovalentBond), + d if d < sum_cov_radii - vdw_comp_factor => Some(Interaction::StericClash), + d if d < sum_cov_radii + vdw_comp_factor => match is_disulfide(entity1, entity2) { + true => Some(Interaction::Disulfide), + false => Some(Interaction::CovalentBond), + }, d if d < sum_vdw_radii + vdw_comp_factor => Some(Interaction::VanDerWaalsContact), _ => None, } } + +fn is_disulfide( + entity1: &AtomConformerResidueChainModel, + entity2: &AtomConformerResidueChainModel, +) -> bool { + if (entity1.residue().name().unwrap() == "CYS") + & (entity2.residue().name().unwrap() == "CYS") + & (entity1.atom().name() == "SG") + & (entity2.atom().name() == "SG") + { + let cb1 = entity1 + .residue() + .atoms() + .find(|atom| atom.name() == "CB") + .unwrap(); + let s1 = entity1 + .residue() + .atoms() + .find(|atom| atom.name() == "SG") + .unwrap(); + let cb2 = entity2 + .residue() + .atoms() + .find(|atom| atom.name() == "CB") + .unwrap(); + let s2 = entity2 + .residue() + .atoms() + .find(|atom| atom.name() == "SG") + .unwrap(); + let cssb_dihedral = cb1.dihedral(s1, s2, cb2).abs(); + (60.0..=120.0).contains(&cssb_dihedral) + } else { + false + } +} diff --git a/src/main.rs b/src/main.rs index a38aba7..c4e1fab 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,8 +7,7 @@ mod residues; mod utils; use crate::chains::ChainExt; -use crate::interactions::complex::{InteractionComplex, Interactions}; -use crate::interactions::structs::Interaction; +use crate::interactions::{Interaction, InteractionComplex, Interactions}; use crate::utils::load_model; use clap::Parser; diff --git a/src/utils.rs b/src/utils.rs index fc88488..8311b87 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,6 +1,6 @@ use std::collections::HashSet; -use crate::residues::ResidueExt; +use crate::{interactions::structs::InteractingEntity, residues::ResidueExt}; use pdbtbx::*; /// Open an atomic data file with [`pdbtbx::open`] and remove non-protein residues. @@ -57,6 +57,22 @@ pub fn parse_groups( (ligand, receptor) } +/// Helper function to convert an [`pdbtbx::AtomConformerResidueChainModel`] to a human-readable format +pub fn hierarchy_to_entity(hierarchy: &AtomConformerResidueChainModel<'_>) -> InteractingEntity { + InteractingEntity { + chain: hierarchy.chain().id().to_string(), + resn: hierarchy.residue().name().unwrap().to_string(), + resi: hierarchy.residue().serial_number(), + altloc: hierarchy + .conformer() + .alternative_location() + .unwrap_or("") + .to_string(), + atomn: hierarchy.atom().name().to_string(), + atomi: hierarchy.atom().serial_number(), + } +} + #[cfg(test)] mod tests { use super::*;