diff --git a/src/atac/commands.rs b/src/atac/commands.rs index 695e36c..fc89510 100644 --- a/src/atac/commands.rs +++ b/src/atac/commands.rs @@ -244,6 +244,10 @@ pub struct ProcessOpts { #[arg(short, long, default_value_t = 16, display_order = 5)] pub threads: u32, + /// do peak calling after generating the bed file + #[arg(long)] + pub call_peaks: bool, + /// Use the provided file as the unfiltered permit list (i.e. whitelist). /// This argument only needs to be provided if you are providing the permit list explicitly, /// overriding the default permit list for the provided chemistry. diff --git a/src/atac/process.rs b/src/atac/process.rs index 4ac7cca..ae534e8 100644 --- a/src/atac/process.rs +++ b/src/atac/process.rs @@ -127,7 +127,10 @@ fn add_read_args(map_cmd: &mut std::process::Command, opts: &ProcessOpts) -> any Ok(()) } -pub(crate) fn check_progs>(af_home_path: P) -> anyhow::Result<()> { +pub(crate) fn check_progs>( + af_home_path: P, + opts: &ProcessOpts, +) -> anyhow::Result<()> { let af_home_path = af_home_path.as_ref(); // Read the JSON contents of the file as an instance of `User`. let v: Value = prog_utils::inspect_af_home(af_home_path)?; @@ -161,6 +164,21 @@ pub(crate) fn check_progs>(af_home_path: P) -> anyhow::Result<()> Err(e) => return Err(e), } + if opts.call_peaks { + let macs_prog_info = rp + .macs + .as_ref() + .expect("macs2 program should be properly set if using the `--call-peaks` option"); + match prog_utils::check_version_constraints( + "macs2", + ">=2.2.9, <3.0.0", + &macs_prog_info.version, + ) { + Ok(macs_ver) => info!("found macs2 version {:#}, proceeding", macs_ver), + Err(e) => return Err(e), + } + } + Ok(()) } @@ -380,11 +398,12 @@ fn macs_call_peaks(af_home_path: &Path, opts: &ProcessOpts) -> anyhow::Result<() .expect("macs program info should be properly set."); let gpl_dir = opts.output.join("af_process"); - let bed_input = gpl_dir.join("map_sorted.bed"); + let bed_input = gpl_dir.join("map.bed"); let peaks_output = gpl_dir.join("peaks.narrowPeak"); let mut macs_cmd = std::process::Command::new(format!("{}", &macs_prog_info.exe_path.display())); macs_cmd + .arg("callpeak") .arg("-f") .arg("BEDPE") .arg("--nomodel") @@ -401,6 +420,48 @@ fn macs_call_peaks(af_home_path: &Path, opts: &ProcessOpts) -> anyhow::Result<() .arg("-n") .arg(peaks_output); + let macs_cmd_string = prog_utils::get_cmd_line_string(&macs_cmd); + info!("macs2 command : {}", macs_cmd_string); + + let macs_start = Instant::now(); + let macs_proc_out = prog_utils::execute_command(&mut macs_cmd, CommandVerbosityLevel::Quiet) + .expect("could not execute [atac::macs]"); + let macs_duration = macs_start.elapsed(); + + if !macs_proc_out.status.success() { + bail!( + "atac::macs failed with exit status {:?}", + macs_proc_out.status + ); + } else { + info!("macs completed successfully in {:#?}", macs_duration); + } + + let af_process_info_file = opts.output.join("simpleaf_process_log.json"); + let json_file = std::fs::File::open(af_process_info_file.clone()) + .with_context(|| format!("couldn't open file {}", af_process_info_file.display()))?; + let json_reader = BufReader::new(json_file); + let mut af_process_info: serde_json::Value = serde_json::from_reader(json_reader) + .with_context(|| { + format!( + "couldn't parse JSON content from {}", + af_process_info_file.display() + ) + })?; + + af_process_info["time_info"]["macs_time"] = json!(macs_duration.as_secs_f64()); + af_process_info["cmd_info"]["macs_cmd"] = json!(macs_cmd_string); + + // write the relevant info about + // our run to file. + std::fs::write( + &af_process_info_file, + serde_json::to_string_pretty(&af_process_info).unwrap(), + ) + .with_context(|| format!("could not write {}", af_process_info_file.display()))?; + + info!("successfully called peaks using macs2."); + Ok(()) } diff --git a/src/main.rs b/src/main.rs index 48c80ae..64efefe 100644 --- a/src/main.rs +++ b/src/main.rs @@ -107,7 +107,7 @@ fn main() -> anyhow::Result<()> { // processing for ATAC-seq data Commands::Atac(AtacCommand::Process(process_opts)) => { // validate versions - atac::process::check_progs(&af_home_path)?; + atac::process::check_progs(&af_home_path, &process_opts)?; // first we map the reads atac::process::map_reads(af_home_path.as_path(), &process_opts)?; // then we generate the permit list and sort the file diff --git a/src/utils/prog_utils.rs b/src/utils/prog_utils.rs index 6fe3d56..4458196 100644 --- a/src/utils/prog_utils.rs +++ b/src/utils/prog_utils.rs @@ -242,7 +242,18 @@ pub fn check_version_constraints_from_output>( Ok(vs) => { let x = vs.split_whitespace(); if let Some(version) = x.last() { - let parsed_version = Version::parse(version).unwrap(); + let ver = if version.split(".").count() > 3 { + warn!("version info {} is not a valid semver (more than 3 dotted version parts; looking only at the major, minor & patch versions).", version); + version + .split(".") + .take(3) + .collect::>() + .join(".") + .to_string() + } else { + version.to_string() + }; + let parsed_version = Version::parse(&ver).unwrap(); let req = VersionReq::parse(req_string.as_ref()).unwrap(); if req.matches(&parsed_version) { return Ok(parsed_version); @@ -250,7 +261,7 @@ pub fn check_version_constraints_from_output>( return Err(anyhow!( "Parsed version of {} ({:?}) does not satisfy constraints {}. Please install a compatible version.", prog_name, - version, + ver, req )); } @@ -395,7 +406,7 @@ pub fn get_required_progs_from_paths( if let Some(macs) = opt_macs { let st = macs.display().to_string(); let sr = run_fun!($st --version); - let v = check_version_constraints_from_output("macs2", ">=2.2.9.1, <3.0.0", sr)?; + let v = check_version_constraints_from_output("macs2", ">=2.2.9, <3.0.0", sr)?; rp.macs = Some(ProgInfo { exe_path: macs, version: format!("{}", v), diff --git a/src/utils/remote.rs b/src/utils/remote.rs new file mode 100644 index 0000000..55d37db --- /dev/null +++ b/src/utils/remote.rs @@ -0,0 +1,8 @@ +/// Checks the provided URL to determine if it is a remote or local URL. +/// The current implementation is a heuristic, and may not cover all cases. +/// Decide if we want to pull in a crate like [url](https://crates.io/crates/url) +/// instead to do more comprehensive testing. +pub(crate) fn is_remote_url>(p: T) -> bool { + let pr = p.as_ref(); + pr.starts_with("www.") || pr.starts_with("http://") || pr.starts_with("https://") +}