-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Feat/sasa (#9) sub-commands contacts, sasa, and seq
* feat(sasa): add method to calculate the per-atom SASA * fix(sasa): ignore waters when calculating SASA * refactor(cli): BREAKING CHANGE: move contacts and sasa to separate sub-commands * style(cli): sort imports * refactor(cli): hardcode output path by contacts * feat(cli): separate contacts and seq sub-commands * docs: version bump to v0.3.0
- Loading branch information
Showing
10 changed files
with
411 additions
and
199 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,3 +15,6 @@ Cargo.lock | |
|
||
# macOS files | ||
.DS_Store | ||
|
||
# Test generated files | ||
*.csv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,9 @@ | ||
[package] | ||
name = "arpeggia" | ||
version = "0.2.0" | ||
version = "0.3.0" | ||
description = "A port of the Arpeggio library for Rust" | ||
edition = "2021" | ||
authors = ["Yi Zhou <[email protected]>"] | ||
|
||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | ||
|
||
|
@@ -13,5 +14,6 @@ pdbtbx = { version = "0.11.0", features = ["rayon", "rstar"], git = "https://git | |
polars = { version = "0.39.2", features = ["lazy"] } | ||
rayon = "1.10.0" | ||
rstar = "0.12.0" | ||
rust-sasa = "0.2.2" | ||
tracing = "0.1.40" | ||
tracing-subscriber = "0.3.18" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
use crate::interactions::{InteractionComplex, Interactions, ResultEntry}; | ||
use crate::utils::{load_model, write_df_to_csv}; | ||
use clap::Parser; | ||
use pdbtbx::*; | ||
use polars::prelude::*; | ||
use std::path::{Path, PathBuf}; | ||
use tracing::{debug, error, info, trace, warn}; | ||
|
||
#[derive(Parser, Debug, Clone)] | ||
#[command(version, about)] | ||
pub(crate) struct Args { | ||
/// Path to the PDB or mmCIF file to be analyzed | ||
#[arg(short, long)] | ||
input: PathBuf, | ||
|
||
/// Output directory | ||
#[arg(short, long)] | ||
output: PathBuf, | ||
|
||
/// Group chains for interactions: | ||
/// e.g. A,B/C,D | ||
/// where chains A and B are the "ligand" and C and D are the "receptor" | ||
#[arg(short, long)] | ||
groups: String, | ||
|
||
/// Compensation factor for VdW radii dependent interaction types | ||
#[arg(short = 'c', long = "vdw-comp", default_value_t = 0.1)] | ||
vdw_comp: f64, | ||
|
||
/// Distance cutoff when searching for neighboring atoms | ||
#[arg(short, long, default_value_t = 4.5)] | ||
dist_cutoff: f64, | ||
|
||
/// Number of threads to use for parallel processing | ||
#[arg(short = 'j', long = "num-threads", default_value_t = 0)] | ||
num_threads: usize, | ||
} | ||
|
||
pub(crate) fn run(args: &Args) { | ||
trace!("{args:?}"); | ||
|
||
// Create Rayon thread pool | ||
rayon::ThreadPoolBuilder::new() | ||
.num_threads(args.num_threads) | ||
.build_global() | ||
.unwrap(); | ||
debug!("Using {} thread(s)", rayon::current_num_threads()); | ||
|
||
// Make sure `input` exists | ||
let input_path = Path::new(&args.input).canonicalize().unwrap(); | ||
let input_file: String = input_path.to_str().unwrap().parse().unwrap(); | ||
|
||
// Load file as complex structure | ||
let (pdb, pdb_warnings) = load_model(&input_file); | ||
if !pdb_warnings.is_empty() { | ||
pdb_warnings.iter().for_each(|e| match e.level() { | ||
pdbtbx::ErrorLevel::BreakingError => error!("{e}"), | ||
pdbtbx::ErrorLevel::InvalidatingError => error!("{e}"), | ||
_ => warn!("{e}"), | ||
}); | ||
} | ||
|
||
let (df_atomic, df_ring, i_complex) = | ||
get_contacts(pdb, args.groups.as_str(), args.vdw_comp, args.dist_cutoff); | ||
|
||
// Information on the sequence of the chains in the model | ||
let num_chains = i_complex.ligand.len() + i_complex.receptor.len(); | ||
info!( | ||
"Loaded {} {}", | ||
num_chains, | ||
match num_chains { | ||
1 => "chain", | ||
_ => "chains", | ||
} | ||
); | ||
|
||
debug!( | ||
"Parsed ligand chains {lig:?}; receptor chains {receptor:?}", | ||
lig = i_complex.ligand, | ||
receptor = i_complex.receptor | ||
); | ||
|
||
// Prepare output directory | ||
let output_path = Path::new(&args.output).canonicalize().unwrap(); | ||
let _ = std::fs::create_dir_all(output_path.clone()); | ||
let output_file = output_path.join("contacts.csv"); | ||
|
||
let output_file_str = output_file.to_str().unwrap(); | ||
debug!("Results will be saved to {output_file_str}"); | ||
|
||
// Save results and log the identified interactions | ||
info!( | ||
"Found {} atom-atom contacts\n{}", | ||
df_atomic.shape().0, | ||
df_atomic | ||
); | ||
let df_clash = df_atomic | ||
.clone() | ||
.lazy() | ||
.filter(col("interaction").eq(lit("StericClash"))) | ||
.collect() | ||
.unwrap(); | ||
if df_clash.height() > 0 { | ||
warn!("Found {} steric clashes\n{}", df_clash.shape().0, df_clash); | ||
} | ||
info!("Found {} ring contacts\n{}", df_ring.shape().0, df_ring); | ||
|
||
// Concate dataframes for saving to CSV | ||
let mut df_contacts = concat( | ||
[df_atomic.clone().lazy(), df_ring.clone().lazy()], | ||
UnionArgs::default(), | ||
) | ||
.unwrap() | ||
.collect() | ||
.unwrap(); | ||
|
||
// Save res to CSV files | ||
write_df_to_csv(&mut df_contacts, output_file); | ||
} | ||
|
||
pub fn get_contacts( | ||
pdb: PDB, | ||
groups: &str, | ||
vdw_comp: f64, | ||
dist_cutoff: f64, | ||
) -> (DataFrame, DataFrame, InteractionComplex) { | ||
let i_complex = InteractionComplex::new(pdb, groups, vdw_comp, dist_cutoff); | ||
|
||
// Find interactions | ||
let atomic_contacts = i_complex.get_atomic_contacts(); | ||
let df_atomic = results_to_df(&atomic_contacts); | ||
|
||
let mut ring_contacts: Vec<ResultEntry> = Vec::new(); | ||
ring_contacts.extend(i_complex.get_ring_atom_contacts()); | ||
ring_contacts.extend(i_complex.get_ring_ring_contacts()); | ||
let df_ring = results_to_df(&ring_contacts) | ||
// .drop_many(&["from_atomn", "from_atomi", "to_atomn", "to_atomi"]) | ||
.sort( | ||
[ | ||
"from_chain", | ||
"from_resi", | ||
"from_altloc", | ||
"to_chain", | ||
"to_resi", | ||
"to_altloc", | ||
], | ||
Default::default(), | ||
) | ||
.unwrap(); | ||
|
||
(df_atomic, df_ring, i_complex) | ||
} | ||
|
||
fn results_to_df(res: &[ResultEntry]) -> DataFrame { | ||
df!( | ||
"interaction" => res.iter().map(|x| x.interaction.to_string()).collect::<Vec<String>>(), | ||
"distance" => res.iter().map(|x| x.distance).collect::<Vec<f64>>(), | ||
"from_chain" => res.iter().map(|x| x.ligand.chain.to_owned()).collect::<Vec<String>>(), | ||
"from_resn" => res.iter().map(|x| x.ligand.resn.to_owned()).collect::<Vec<String>>(), | ||
"from_resi" => res.iter().map(|x| x.ligand.resi as i64).collect::<Vec<i64>>(), | ||
"from_altloc" => res.iter().map(|x| x.ligand.altloc.to_owned()).collect::<Vec<String>>(), | ||
"from_atomn" => res.iter().map(|x| x.ligand.atomn.to_owned()).collect::<Vec<String>>(), | ||
"from_atomi" => res.iter().map(|x| x.ligand.atomi as i64).collect::<Vec<i64>>(), | ||
"to_chain" => res.iter().map(|x| x.receptor.chain.to_owned()).collect::<Vec<String>>(), | ||
"to_resn" => res.iter().map(|x| x.receptor.resn.to_owned()).collect::<Vec<String>>(), | ||
"to_resi" => res.iter().map(|x| x.receptor.resi as i64).collect::<Vec<i64>>(), | ||
"to_altloc" => res.iter().map(|x| x.receptor.altloc.to_owned()).collect::<Vec<String>>(), | ||
"to_atomn" => res.iter().map(|x| x.receptor.atomn.to_owned()).collect::<Vec<String>>(), | ||
"to_atomi" => res.iter().map(|x| x.receptor.atomi as i64).collect::<Vec<i64>>(), | ||
) | ||
.unwrap() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
pub mod contacts; | ||
pub mod pdb2seq; | ||
pub mod sasa; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
use crate::chains::ChainExt; | ||
use clap::Parser; | ||
use std::path::PathBuf; | ||
|
||
use crate::utils::load_model; | ||
use std::path::Path; | ||
|
||
#[derive(Parser, Debug, Clone)] | ||
#[command(version, about)] | ||
pub(crate) struct Args { | ||
/// Path to the PDB or mmCIF file to be analyzed | ||
input: Vec<PathBuf>, | ||
} | ||
|
||
pub(crate) fn run(args: &Args) { | ||
for f in &args.input { | ||
// Make sure `input` exists | ||
let input_path = Path::new(f).canonicalize().unwrap(); | ||
let input_file: String = input_path.to_str().unwrap().parse().unwrap(); | ||
|
||
// Load file and print sequences | ||
let (pdb, _) = load_model(&input_file); | ||
println!("File: {}", input_file); | ||
for chain in pdb.chains() { | ||
println!("{}: {}", chain.id(), chain.pdb_seq().join("")); | ||
} | ||
println!(); | ||
} | ||
} |
Oops, something went wrong.