Skip to content

Commit

Permalink
refactor: untangle data flow structs (#412)
Browse files Browse the repository at this point in the history
* Extract BarcodeFates type and clean up unnecessary mutability.
* Remove a ton of argument unpacking.
* Remove more unnecessary types that know too much.
* Remove the unused MainEncloneOutput type.
* Remove creation of ref id index into sole place its used.
* Remove pre-computed is_bcr/is_tcr in favor of a method.
  • Loading branch information
macklin-10x authored Apr 2, 2024
1 parent 158f7c9 commit 3759c79
Show file tree
Hide file tree
Showing 23 changed files with 127 additions and 176 deletions.
5 changes: 3 additions & 2 deletions enclone/src/graph_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@

use enclone_core::barcode_fate::BarcodeFate;
use enclone_core::defs::{EncloneControl, TigData};
use enclone_core::enclone_structs::BarcodeFates;
use graph_simple::GraphSimple;
use io_utils::fwriteln;
use petgraph::prelude::*;
use rayon::prelude::*;
use std::cmp::{max, min};
use std::collections::HashMap;

use std::io::Write;
use string_utils::strme;
use vector_utils::{bin_member, bin_position, erase_if, lower_bound, next_diff12_3, reverse_sort};
Expand All @@ -34,7 +35,7 @@ pub fn graph_filter(
ctl: &EncloneControl,
tig_bc: &mut Vec<Vec<TigData>>,
graph: bool,
fate: &mut [HashMap<String, BarcodeFate>],
fate: &mut [BarcodeFates],
) {
let mut ndels = 0;
let mut seqs = Vec::<(&[u8], bool, &str, usize)>::new();
Expand Down
5 changes: 3 additions & 2 deletions enclone/src/info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// This file provides the single function build_info.

use enclone_core::barcode_fate::BarcodeFate;
use enclone_core::enclone_structs::BarcodeFates;
use vdj_ann::refx;

use self::refx::RefData;
Expand All @@ -12,7 +13,7 @@ use debruijn::{dna_string::DnaString, Mer};
use enclone_core::defs::{CloneInfo, EncloneControl, ExactClonotype};
use enclone_core::print_tools::emit_codon_color_escape;
use rayon::prelude::*;
use std::collections::HashMap;

use std::convert::TryInto;
use std::fmt::Write;
use string_utils::strme;
Expand All @@ -22,7 +23,7 @@ pub fn build_info(
refdata: &RefData,
ctl: &EncloneControl,
exact_clonotypes: &mut [ExactClonotype],
fate: &mut [HashMap<String, BarcodeFate>],
fate: &mut [BarcodeFates],
) -> Vec<CloneInfo> {
// Build info about clonotypes. We create a data structure info.
// An entry in info is a clonotype having appropriate properties.
Expand Down
2 changes: 0 additions & 2 deletions enclone/src/join.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ use std::io::Write;
use vector_utils::{bin_member, erase_if, next_diff1_2};

pub fn join_exacts(
is_bcr: bool,
to_bc: &HashMap<(usize, usize), Vec<String>>,
refdata: &RefData,
ctl: &EncloneControl,
Expand Down Expand Up @@ -104,7 +103,6 @@ pub fn join_exacts(
// what this means.

join_core(
is_bcr,
i,
j,
ctl,
Expand Down
2 changes: 0 additions & 2 deletions enclone/src/join_core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ use std::collections::HashMap;
use vdj_ann::refx::RefData;

pub fn join_core<'a>(
is_bcr: bool,
i: usize,
j: usize,
ctl: &EncloneControl,
Expand All @@ -34,7 +33,6 @@ pub fn join_core<'a>(
continue;
}
if join_one(
is_bcr,
k1,
k2,
ctl,
Expand Down
4 changes: 2 additions & 2 deletions enclone/src/misc1.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
use enclone_core::{
barcode_fate::BarcodeFate,
defs::{CloneInfo, EncloneControl, ExactClonotype, TigData},
enclone_structs::BarcodeFates,
};
use equiv::EquivRel;
use itertools::Itertools;
#[cfg(not(target_os = "windows"))]
use pager::Pager;

use std::collections::HashMap;
use std::time::Instant;
use string_utils::stringme;
use vector_utils::{
Expand Down Expand Up @@ -198,7 +198,7 @@ pub fn lookup_heavy_chain_reuse(
pub fn cross_filter(
ctl: &EncloneControl,
tig_bc: &mut Vec<Vec<TigData>>,
fate: &mut [HashMap<String, BarcodeFate>],
fate: &mut [BarcodeFates],
) {
// Get the list of dataset origins. Here we allow the same origin name to have been used
// for more than one donor, as we haven't explicitly prohibited that.
Expand Down
4 changes: 2 additions & 2 deletions enclone/src/misc2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ use amino::nucleotide_to_aminoacid_sequence;
use debruijn::dna_string::DnaString;
use enclone_core::barcode_fate::BarcodeFate;
use enclone_core::defs::{EncloneControl, ExactClonotype, Junction, TigData, TigData0, TigData1};
use enclone_core::enclone_structs::BarcodeFates;
use io_utils::{fwriteln, open_for_write_new};
use rayon::prelude::*;
use std::cmp::{max, min};
use std::collections::HashMap;

use std::fmt::Write as _;
use std::io::Write;
Expand Down Expand Up @@ -273,7 +273,7 @@ pub fn find_exact_subclonotypes(
ctl: &EncloneControl,
tig_bc: &[Vec<TigData>],
refdata: &RefData,
fate: &mut [HashMap<String, BarcodeFate>],
fate: &mut [BarcodeFates],
) -> Vec<ExactClonotype> {
let mut exact_clonotypes = Vec::<ExactClonotype>::new();
let mut r = 0;
Expand Down
13 changes: 10 additions & 3 deletions enclone_args/src/read_json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use self::transcript::is_productive_contig;
use debruijn::dna_string::DnaString;
use enclone_core::barcode_fate::BarcodeFate;
use enclone_core::defs::{EncloneControl, OriginInfo, TigData};
use enclone_core::enclone_structs::BarcodeFates;
use io_utils::{open_maybe_compressed, path_exists};
use martian_filetypes::json_file::{Json, LazyJsonReader};
use martian_filetypes::LazyRead;
Expand Down Expand Up @@ -697,20 +698,26 @@ pub struct Annotations {
pub gex_cells: Vec<Vec<String>>,
pub gex_cells_specified: Vec<bool>,
pub tig_bc: Vec<Vec<TigData>>,
pub fate: Vec<HashMap<String, BarcodeFate>>,
pub fate: Vec<BarcodeFates>,
}

pub fn parse_json_annotations_files(
ctl: &EncloneControl,
refdata: &RefData,
to_ref_index: &HashMap<usize, usize>,
) -> Result<Annotations, String> {
// Note: only tracking truncated seq and quals initially
let ann = if !ctl.gen_opt.cellranger {
"all_contig_annotations.json"
} else {
"contig_annotations.json"
};
let to_ref_index = refdata
.id
.iter()
.take(refdata.refs.len())
.enumerate()
.map(|(i, &id)| (id as usize, i))
.collect();
let results = ctl
.origin_info
.dataset_path
Expand All @@ -728,7 +735,7 @@ pub fn parse_json_annotations_files(
li,
&json,
refdata,
to_ref_index,
&to_ref_index,
ctl.gen_opt.reannotate,
ctl,
)
Expand Down
26 changes: 26 additions & 0 deletions enclone_core/src/defs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,32 @@ pub struct GeneralOpt {
pub session_narrative: String,
}

impl GeneralOpt {
pub fn is_bcr(&self) -> bool {
if self.tcr {
assert!(!self.bcr);
return false;
}
// The original logic for computing this value was based on the confusing
// assumption that this is always true if self.tcr is false.
// I've preserved this for now, but this line might make sense to add:
// assert!(self.bcr);
true
}

pub fn is_tcr(&self) -> bool {
if self.bcr {
assert!(!self.tcr);
return false;
}
// The original logic for computing this value was based on the confusing
// assumption that this is always true if self.bcr is false.
// I've preserved this for now, but this line might make sense to add:
// assert!(self.tcr);
true
}
}

#[derive(Clone, PartialEq)]
pub struct GeneScanOpts {
pub test: LinearCondition,
Expand Down
32 changes: 1 addition & 31 deletions enclone_core/src/enclone_structs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,44 +10,16 @@ use qd::Double;
use std::{collections::HashMap, time::Instant};
use vdj_ann::refx;

#[derive(Clone, Debug, Default)]
pub struct MainEncloneOutput {
pub pics: Vec<String>, // clonotype tables
pub last_widths: Vec<u32>,
pub svgs: Vec<String>, // SVG objects
pub summary: String, // summary
pub metrics: Vec<String>,
pub dataset_names: Vec<String>,
pub parseable_stdouth: bool,
pub noprint: bool,
pub noprintx: bool,
pub html: bool,
pub ngroup: bool,
pub pretty: bool,
}

#[derive(Default)]
pub struct EncloneState {
pub inter: EncloneIntermediates,
pub outs: MainEncloneOutput,
}

#[derive(Default)]
pub struct EncloneSetup {
pub ctl: EncloneControl,
pub ann: String,
pub gex_info: GexInfo,
pub tall: Option<Instant>,
pub refdata: RefData,
pub is_bcr: bool,
pub to_ref_index: HashMap<usize, usize>,
}

#[derive(Default)]
pub struct EncloneIntermediates {
pub setup: EncloneSetup,
pub ex: EncloneExacts,
}
pub type BarcodeFates = HashMap<String, BarcodeFate>;

#[derive(Default, Clone)]
pub struct EncloneExacts {
Expand All @@ -60,8 +32,6 @@ pub struct EncloneExacts {
pub join_info: Vec<JoinInfo>,
pub drefs: Vec<DonorReferenceItem>,
pub sr: Vec<Vec<Double>>,
pub fate: Vec<HashMap<String, BarcodeFate>>, // GETS MODIFIED SUBSEQUENTLY
pub is_bcr: bool,
pub allele_data: AlleleData,
}

Expand Down
2 changes: 1 addition & 1 deletion enclone_core/src/join_one.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ pub fn partial_bernoulli_sum(n: usize, k: usize) -> f64 {
// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓

pub fn join_one<'a>(
is_bcr: bool,
k1: usize,
k2: usize,
ctl: &EncloneControl,
Expand All @@ -76,6 +75,7 @@ pub fn join_one<'a>(
refdata: &RefData,
dref: &[DonorReferenceItem],
) -> bool {
let is_bcr = ctl.gen_opt.is_bcr();
// Do not merge onesies or foursies with anything. Deferred until later.
// Note that perhaps some foursies should be declared doublets and deleted.
// Note onesies merging above is turned off so this appears to be moot.
Expand Down
3 changes: 0 additions & 3 deletions enclone_print/src/define_mat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ pub fn setup_define_mat(orbit: &[i32], info: &[CloneInfo]) -> (Vec<Od>, Vec<usiz
// algorithm, and is the number of columns (chains) in the clonotype table.

pub fn define_mat(
is_bcr: bool,
to_bc: &HashMap<(usize, usize), Vec<String>>,
sr: &[Vec<Double>],
ctl: &EncloneControl,
Expand Down Expand Up @@ -171,7 +170,6 @@ pub fn define_mat(
if info[l1].lens == info[l2].lens {
let mut pot = Vec::<PotentialJoin<'_>>::new();
if join_one(
is_bcr,
l1,
l2,
ctl,
Expand Down Expand Up @@ -247,7 +245,6 @@ pub fn define_mat(
}
let mut pot = Vec::<PotentialJoin<'_>>::new();
if join_one(
is_bcr,
i1,
i2,
ctl,
Expand Down
Loading

0 comments on commit 3759c79

Please sign in to comment.