diff --git a/enclone/src/allele.rs b/enclone/src/allele.rs index 0a1792ea4..8c8e5ad45 100644 --- a/enclone/src/allele.rs +++ b/enclone/src/allele.rs @@ -115,7 +115,7 @@ pub fn find_alleles( let mut alls = Vec::, Vec, usize, usize, String)>>::new(); let mut i = 0; while i < allx.len() { - // let j = next_diff1_6(&allx, i as i32) as usize; + // let j = next_diff1_6(&allx, i); let mut j = i + 1; while j < allx.len() { if allx[j].0 != allx[i].0 { @@ -188,7 +188,7 @@ pub fn find_alleles( trace.sort_unstable(); let mut i = 0; while i < trace.len() { - let j = next_diff1_2(&trace, i as i32) as usize; + let j = next_diff1_2(&trace, i); for k in i..j { if !to_delete[trace[k].1] { for l in i..j { @@ -223,7 +223,7 @@ pub fn find_alleles( let mut freqs = Vec::<(usize, Vec, Vec>, u8)>::new(); let mut i = 0; while i < bases.len() { - let j = next_diff1_3(&bases, i as i32) as usize; + let j = next_diff1_3(&bases, i); let mut x = Vec::::new(); let mut y = Vec::>::new(); for base in &bases[i..j] { @@ -297,7 +297,7 @@ pub fn find_alleles( let mut i = 0; let mut have_ref = false; while i < types.len() { - let j = next_diff1_2(&types, i as i32) as usize; + let j = next_diff1_2(&types, i); // Determine if the contigs equal reference at the positions in ps. diff --git a/enclone/src/graph_filter.rs b/enclone/src/graph_filter.rs index 340794963..a3730ade4 100644 --- a/enclone/src/graph_filter.rs +++ b/enclone/src/graph_filter.rs @@ -97,7 +97,7 @@ pub fn graph_filter( let mut edges1 = Vec::<(usize, usize, (usize, usize))>::new(); let mut i = 0; while i < edges0.len() { - let j = next_diff12_3(&edges0, i as i32) as usize; + let j = next_diff12_3(&edges0, i); let mut weight = 0; for e in &edges0[i..j] { weight += e.2; diff --git a/enclone/src/join.rs b/enclone/src/join.rs index a4fcae3c4..cee17b4c6 100644 --- a/enclone/src/join.rs +++ b/enclone/src/join.rs @@ -55,7 +55,7 @@ pub fn join_exacts( bcx.sort(); let mut i = 0; while i < bcx.len() { - let j = next_diff1_2(&bcx, i as i32) as usize; + let j = next_diff1_2(&bcx, i); for k in i + 1..j { eq.join(bcx[i].1 as i32, bcx[k].1 as i32); } diff --git a/enclone/src/join2.rs b/enclone/src/join2.rs index a96801596..7810f78d1 100644 --- a/enclone/src/join2.rs +++ b/enclone/src/join2.rs @@ -76,7 +76,7 @@ pub fn finish_join( ox.sort_unstable(); let mut i = 0; while i < ox.len() { - let j = next_diff1_2(&ox, i as i32) as usize; + let j = next_diff1_2(&ox, i); for k in i..j - 1 { eq.join(ox[k].1, ox[k + 1].1); } diff --git a/enclone/src/misc1.rs b/enclone/src/misc1.rs index 12a81b3bf..56af8966e 100644 --- a/enclone/src/misc1.rs +++ b/enclone/src/misc1.rs @@ -137,7 +137,7 @@ pub fn lookup_heavy_chain_reuse( xcdr3.sort(); let mut i = 0; while i < xcdr3.len() { - let j = next_diff1_3(&xcdr3, i as i32) as usize; + let j = next_diff1_3(&xcdr3, i); let mut ids = Vec::::new(); for cdr in &xcdr3[i..j] { ids.push(cdr.2); @@ -266,7 +266,7 @@ pub fn cross_filter( let mut blacklist = Vec::<&[u8]>::new(); let mut i = 0; while i < vjx.len() { - let j = next_diff1_3(&vjx, i as i32) as usize; + let j = next_diff1_3(&vjx, i); if j - i == 1 { let dataset_index = vjx[i].1; let n = vjx[i].2; diff --git a/enclone/src/misc2.rs b/enclone/src/misc2.rs index d477f1c5d..1111f3e5d 100644 --- a/enclone/src/misc2.rs +++ b/enclone/src/misc2.rs @@ -52,7 +52,7 @@ pub fn filter_gelbead_contamination( bch[l].sort_unstable(); let mut m = 0; while m < bch[l].len() { - let n = next_diff12_4(&bch[l], m as i32) as usize; + let n = next_diff12_4(&bch[l], m); let mut count = 0; for u1 in m..n { for u2 in m..n { @@ -121,7 +121,7 @@ pub fn create_exact_subclonotype_core( let mut callsx = Vec::<(usize, u8)>::new(); // (qual,base) let mut i = 0; while i < calls.len() { - let j = next_diff1_2(&calls, i as i32) as usize; + let j = next_diff1_2(&calls, i); let mut q = 0; for c in &calls[i..j] { q += c.1 as usize; @@ -156,7 +156,7 @@ pub fn create_exact_subclonotype_core( let mut callsx = Vec::<(usize, u8)>::new(); // (qual,base) let mut i = 0; while i < calls.len() { - let j = next_diff1_2(&calls, i as i32) as usize; + let j = next_diff1_2(&calls, i); let mut q = 0; for c in &calls[i..j] { q += c.1 as usize; @@ -373,7 +373,7 @@ pub fn find_exact_subclonotypes( bc.sort_unstable(); let mut i = 0; while i < bc.len() { - let j = next_diff1_2(&bc, i as i32) as usize; + let j = next_diff1_2(&bc, i); if j - i >= 2 { for bck in &bc[i..j] { let t = bck.1; @@ -535,7 +535,7 @@ pub fn search_for_shm_indels(ctl: &EncloneControl, tig_bc: &[Vec]) { unique_sort(&mut cs); let mut i = 0; while i < cs.len() { - let j = next_diff1_3(&cs, i as i32) as usize; + let j = next_diff1_3(&cs, i); if j - i > 1 { println!("{}", cs[i].2); } @@ -566,7 +566,7 @@ pub fn check_for_barcode_reuse( let mut reuse = Vec::<(usize, usize)>::new(); let mut i = 0; while i < all.len() { - let j = next_diff1_3(&all, i as i32) as usize; + let j = next_diff1_3(&all, i); for k1 in i..j { for k2 in k1 + 1..j { // We require identity on one cdr3_aa. That seems to be about the right amount diff --git a/enclone/src/secret.rs b/enclone/src/secret.rs index ecfb05c3e..3651bc6a9 100644 --- a/enclone/src/secret.rs +++ b/enclone/src/secret.rs @@ -209,11 +209,11 @@ pub fn fetch_secmem(ctl: &mut EncloneControl) -> Result<(), String> { data.sort(); let mut i = 0; while i < data.len() { - let j = next_diff1_3(&data, i as i32) as usize; + let j = next_diff1_3(&data, i); let (mut sec, mut mem) = (0, 0); let mut k = i; while k < j { - // let l = next_diff12_3(&data, k as i32) as usize; // crashed loader + // let l = next_diff12_3(&data, k); // crashed loader let mut l = k; while l < j { if data[l].1 != data[k].1 { diff --git a/enclone_print/src/define_mat.rs b/enclone_print/src/define_mat.rs index 4e75d2992..7a302c91e 100644 --- a/enclone_print/src/define_mat.rs +++ b/enclone_print/src/define_mat.rs @@ -41,7 +41,7 @@ fn joiner( } let mut i = 0; while i < seq_chains.len() { - let j = next_diff1_3(seq_chains, i as i32) as usize; + let j = next_diff1_3(seq_chains, i); for k in i + 1..j { let (x1, x2) = (&seq_chains[i], &seq_chains[k]); let z1 = bin_position(chains, &(x1.1, x1.2)); @@ -66,7 +66,7 @@ pub fn setup_define_mat(orbit: &[i32], info: &[CloneInfo]) -> (Vec, Vec::new(); let mut j = 0; while j < od.len() { - let k = next_diff12_3(&od, j as i32) as usize; + let k = next_diff12_3(&od, j); exacts.push(od[j].1); j = k; } @@ -229,7 +229,7 @@ pub fn define_mat( let mut rxir = Vec::<(usize, usize, usize)>::new(); // (heavy orbit, light orbit, info index) let mut i = 0; while i < rxi.len() { - let j = next_diff12_3(&rxi, i as i32) as usize; + let j = next_diff12_3(&rxi, i); rxir.extend(&rxi[i..j.min(i + MAX_USE)]); i = j; } diff --git a/enclone_print/src/loupe.rs b/enclone_print/src/loupe.rs index 2abb10fe8..896e3dc7a 100644 --- a/enclone_print/src/loupe.rs +++ b/enclone_print/src/loupe.rs @@ -36,7 +36,7 @@ pub fn make_donor_refs( } j += 1; } - // let j = next_diff12_5(alt_refs, i as i32) as usize; + // let j = next_diff12_5(alt_refs, i); for (k, x) in alt_refs.iter().enumerate().take(j).skip(i) { let donor_id = x.0; let ref_id = x.1; diff --git a/enclone_print/src/print_clonotypes.rs b/enclone_print/src/print_clonotypes.rs index c9d488215..f894d4cb1 100644 --- a/enclone_print/src/print_clonotypes.rs +++ b/enclone_print/src/print_clonotypes.rs @@ -229,7 +229,7 @@ pub fn print_clonotypes( let mut mults = Vec::::new(); let mut j = 0; while j < od.len() { - let k = next_diff12_3(&od, j as i32) as usize; + let k = next_diff12_3(&od, j); let mut mult = 0_usize; for l in j..k { let x: &CloneInfo = &info[od[l].2 as usize]; diff --git a/enclone_print/src/print_utils2.rs b/enclone_print/src/print_utils2.rs index 06b841185..ca39c0ad3 100644 --- a/enclone_print/src/print_utils2.rs +++ b/enclone_print/src/print_utils2.rs @@ -172,7 +172,7 @@ pub fn row_fill( bch[l].sort(); let mut m = 0; while m < bch[l].len() { - let n = next_diff12_4(&bch[l], m as i32) as usize; + let n = next_diff12_4(&bch[l], m); for u1 in m..n { for u2 in m..n { if bch[l][u1].2 >= 10 * bch[l][u2].2 { diff --git a/enclone_print/src/proc_cvar_auto.rs b/enclone_print/src/proc_cvar_auto.rs index 1c5419a2f..4255dfc42 100644 --- a/enclone_print/src/proc_cvar_auto.rs +++ b/enclone_print/src/proc_cvar_auto.rs @@ -1466,7 +1466,7 @@ pub fn proc_cvar_auto( bch[l].sort(); let mut m = 0; while m < bch[l].len() { - let n = next_diff12_4(&bch[l], m as i32) as usize; + let n = next_diff12_4(&bch[l], m); for u1 in m..n { for u2 in m..n { if bch[l][u1].2 >= 10 * bch[l][u2].2 { diff --git a/enclone_stuff/src/analyze_dref.rs b/enclone_stuff/src/analyze_dref.rs index af0a25d57..6d3073207 100644 --- a/enclone_stuff/src/analyze_dref.rs +++ b/enclone_stuff/src/analyze_dref.rs @@ -75,7 +75,7 @@ pub fn analyze_donor_ref( refs.sort(); let mut i = 0; while i < refs.len() { - let j = next_diff1_3(&refs, i as i32) as usize; + let j = next_diff1_3(&refs, i); let gene = &refs[i].0; let mut alleles = Vec::<(&[u8], &str)>::new(); // (sequence, name) let mut have_alt = false; @@ -128,7 +128,7 @@ pub fn analyze_donor_ref( let mut allelesg = Vec::<(Vec<&str>, &[u8])>::new(); let mut r = 0; while r < alleles.len() { - let s = next_diff1_2(&alleles, r as i32) as usize; + let s = next_diff1_2(&alleles, r); let mut names = Vec::<&str>::new(); for at in &alleles[r..s] { names.push(at.1); diff --git a/enclone_stuff/src/doublets.rs b/enclone_stuff/src/doublets.rs index 83a585882..4c5df915a 100644 --- a/enclone_stuff/src/doublets.rs +++ b/enclone_stuff/src/doublets.rs @@ -113,7 +113,7 @@ pub fn delete_doublets( let mut j = 0; while j < content.len() { - let k = next_diff1_2(&content, j as i32) as usize; + let k = next_diff1_2(&content, j); for l1 in j..k { for l2 in l1 + 1..k { shares.push((content[l1].1, content[l2].1)); diff --git a/enclone_stuff/src/filter_umi.rs b/enclone_stuff/src/filter_umi.rs index 7c6819259..47acf2750 100644 --- a/enclone_stuff/src/filter_umi.rs +++ b/enclone_stuff/src/filter_umi.rs @@ -271,7 +271,7 @@ pub fn filter_umi( reverse_sort(&mut z); let mut j = 0; while j < z.len() { - let k = next_diff1_5(&z, j as i32) as usize; + let k = next_diff1_5(&z, j); for l in j..k { if z[j].1 >= MIN_UMI_RATIO * z[l].4 { to_delete[z[l].2][z[l].3] = true; diff --git a/enclone_stuff/src/some_filters.rs b/enclone_stuff/src/some_filters.rs index 051f505f6..49eb10cda 100644 --- a/enclone_stuff/src/some_filters.rs +++ b/enclone_stuff/src/some_filters.rs @@ -97,7 +97,7 @@ pub fn some_filters( types.sort(); let mut i = 0; while i < types.len() { - let j = next_diff1_2(&types, i as i32) as usize; + let j = next_diff1_2(&types, i); let mut mult = 0; for t in &types[i..j] { mult += t.1; diff --git a/enclone_stuff/src/start.rs b/enclone_stuff/src/start.rs index 4aed8210a..4749f278a 100644 --- a/enclone_stuff/src/start.rs +++ b/enclone_stuff/src/start.rs @@ -631,7 +631,7 @@ pub fn main_enclone_start(setup: EncloneSetup) -> Result::new(); 2]; bch[l].sort(); let mut m = 0; while m < bch[l].len() { - let n = next_diff12_4(&bch[l], m as i32) as usize; + let n = next_diff12_4(&bch[l], m); for u1 in m..n { for u2 in m..n { if bch[l][u1].2 >= 10 * bch[l][u2].2 { diff --git a/hyperbase/src/lib.rs b/hyperbase/src/lib.rs index 5c268a871..81ee4265a 100644 --- a/hyperbase/src/lib.rs +++ b/hyperbase/src/lib.rs @@ -191,8 +191,7 @@ impl HyperBasevector { let mut i: i64 = 0; let mut dups = 0; while i < kmers_plus.len() as i64 { - // warning: note truncation to i32 - let j = next_diff1_3(&kmers_plus, i as i32) as i64; + let j = next_diff1_3(&kmers_plus, i as usize) as i64; if j - i > 1 { if dups == 0 { println!("\ntest_unique failed"); diff --git a/vdj_ann/src/annotate.rs b/vdj_ann/src/annotate.rs index 192510362..9cf43cf8a 100644 --- a/vdj_ann/src/annotate.rs +++ b/vdj_ann/src/annotate.rs @@ -26,7 +26,7 @@ use std::{ use string_utils::{stringme, strme, TextUtils}; use vdj_types::{VdjChain, VdjRegion}; use vector_utils::{ - bin_member, erase_if, lower_bound1_3, next_diff12_4, next_diff1_2, next_diff1_3, reverse_sort, + bin_member, erase_if, lower_bound1_3, next_diff1_2, next_diff1_3, next_diff_any, reverse_sort, unique_sort, VecUtils, }; @@ -222,7 +222,7 @@ fn print_alignx(log: &mut Vec, a: &PreAnnotation, refdata: &RefData) { fn report_semis( verbose: bool, title: &str, - semi: &[(i32, i32, i32, i32, Vec)], + semi: &[SemiPerfectMatch], b_seq: &[u8], refs: &[DnaString], log: &mut Vec, @@ -233,19 +233,19 @@ fn report_semis( fwrite!( log, "t = {}, offset = {}, tig start = {}, ref start = {}, len = {}, mis = {}", - s.0, - s.1, - s.2, - s.1 + s.2, - s.3, - s.4.len(), + s.ref_id, + s.offset, + s.tig_start, + s.offset + s.tig_start, + s.len, + s.mismatches.len(), ); - let t = s.0 as usize; - let off = s.1; - let tig_start = s.2; + let t = s.ref_id as usize; + let off = s.offset; + let tig_start = s.tig_start; let ref_start = off + tig_start; - let len = s.3; - let mis = &s.4; + let len = s.len; + let mis = &s.mismatches; let mut new_mis = Vec::::new(); for j in 0..len { if b_seq[(tig_start + j) as usize] != refs[t].get((ref_start + j) as usize) { @@ -263,6 +263,35 @@ fn report_semis( } } +// TODO: combine this and the PreAnnotation type above? +#[derive(PartialEq, Eq, PartialOrd, Ord)] +struct PerfectMatch { + pub ref_id: i32, + /// ref_start - tig_start + pub offset: i32, + pub tig_start: i32, + // len + pub len: i32, +} + +#[derive(PartialEq, Eq, PartialOrd, Ord)] +struct SemiPerfectMatch { + pub ref_id: i32, + /// off = pos on ref - pos on b + pub offset: i32, + /// pos on b + pub tig_start: i32, + pub len: i32, + /// positions on b of mismatches + pub mismatches: Vec, +} + +#[derive(PartialEq, Eq, PartialOrd, Ord)] +struct Offset { + pub ref_id: i32, + pub offset: i32, +} + pub fn annotate_seq_core( b: &DnaString, refdata: &RefData, @@ -295,7 +324,7 @@ pub fn annotate_seq_core( // // perf = {(ref_id, ref_start - tig_start, tig_start, len)} - let mut perf = Vec::<(i32, i32, i32, i32)>::new(); + let mut perf = Vec::::new(); if b.len() < K { return; } @@ -347,7 +376,12 @@ pub fn annotate_seq_core( } } if ok { - perf.push((t as i32, p as i32 - l as i32, l as i32, len as i32)); + perf.push(PerfectMatch { + ref_id: t as i32, + offset: p as i32 - l as i32, + tig_start: l as i32, + len: len as i32, + }); } } } @@ -357,11 +391,11 @@ pub fn annotate_seq_core( fwriteln!( log, "t = {}, offset = {}, tig start = {}, ref start = {}, len = {}", - s.0, - s.1, - s.2, - s.1 + s.2, - s.3, + s.ref_id, + s.offset, + s.tig_start, + s.offset + s.tig_start, + s.len, ); } } @@ -370,20 +404,27 @@ pub fn annotate_seq_core( // already found and are not equal to one of them. But only do this if we already have at // least 150 bases aligned. - let mut offsets = Vec::<(i32, i32)>::new(); + let mut offsets = Vec::::new(); for p in &perf { - offsets.push((p.0, p.1)); + offsets.push(Offset { + ref_id: p.ref_id, + offset: p.offset, + }); } unique_sort(&mut offsets); const MM_START: i32 = 150; const MM: i32 = 10; - for (t, off) in offsets { + for Offset { + ref_id: t, + offset: off, + } in offsets + { let mut tig_starts = Vec::::new(); let mut total = 0; for pi in &perf { - if pi.0 == t && pi.1 == off { - tig_starts.push(pi.2); - total += pi.3; + if pi.ref_id == t && pi.offset == off { + tig_starts.push(pi.tig_start); + total += pi.len; } } if total < MM_START { @@ -420,7 +461,12 @@ pub fn annotate_seq_core( } } if !known { - perf.push((t, p - l, l, len)); + perf.push(PerfectMatch { + ref_id: t, + offset: p - l, + tig_start: l, + len, + }); } } l = lx; @@ -438,29 +484,29 @@ pub fn annotate_seq_core( fwriteln!( log, "t = {}, offset = {}, tig start = {}, ref start = {}, len = {}", - s.0, - s.1, - s.2, - s.1 + s.2, - s.3, + s.ref_id, + s.offset, + s.tig_start, + s.offset + s.tig_start, + s.len, ); } } // Merge perfect matches. We track the positions on b of mismatches. - // semi = {(t, off, pos on b, len, positions on b of mismatches)} - // where off = pos on ref - pos on b - let mut semi = Vec::<(i32, i32, i32, i32, Vec)>::new(); + let mut semi = Vec::::new(); let mut i = 0; while i < perf.len() { - let j = next_diff12_4(&perf, i as i32); - let (t, off) = (perf[i].0, perf[i].1); - let mut join = vec![false; j as usize - i]; - let mut mis = vec![Vec::::new(); j as usize - i]; - for k in i..j as usize - 1 { - let (l1, len1) = (perf[k].2, perf[k].3); - let (l2, len2) = (perf[k + 1].2, perf[k + 1].3); + let j = next_diff_any(&perf, i, |a, b| { + a.ref_id == b.ref_id && a.offset == b.offset + }); + let (t, off) = (perf[i].ref_id, perf[i].offset); + let mut join = vec![false; j - i]; + let mut mis = vec![Vec::::new(); j - i]; + for k in i..j - 1 { + let (l1, len1) = (perf[k].tig_start, perf[k].len); + let (l2, len2) = (perf[k + 1].tig_start, perf[k + 1].len); for z in l1 + len1..l2 { if b_seq[z as usize] != refs[t as usize].get((z + off) as usize) { mis[k - i].push(z); @@ -476,12 +522,12 @@ pub fn annotate_seq_core( } } let mut k1 = i; - while k1 < j as usize { + while k1 < j { // let mut k2 = k1 + 1; let mut k2 = k1; let mut m = Vec::::new(); // m.append( &mut mis[k1-i].clone() ); - while k2 < j as usize { + while k2 < j { // if !join[k2-i-1] { break; } if !join[k2 - i] { break; @@ -489,21 +535,27 @@ pub fn annotate_seq_core( m.append(&mut mis[k2 - i].clone()); k2 += 1; } - semi.push((t, off, perf[k1].2, perf[k2].2 + perf[k2].3 - perf[k1].2, m)); + semi.push(SemiPerfectMatch { + ref_id: t, + offset: off, + tig_start: perf[k1].tig_start, + len: perf[k2].tig_start + perf[k2].len - perf[k1].tig_start, + mismatches: m, + }); k1 = k2 + 1; } - i = j as usize; + i = j; } report_semis(verbose, "INITIAL SEMI ALIGNMENTS", &semi, &b_seq, refs, log); // Extend backwards and then forwards. for s in &mut semi { - let t = s.0; - let off = s.1; - let mut l = s.2; - let mut len = s.3; - let mut mis = s.4.clone(); + let t = s.ref_id; + let off = s.offset; + let mut l = s.tig_start; + let mut len = s.len; + let mut mis = s.mismatches.clone(); while l > MIN_PERF_EXT as i32 && l + off > MIN_PERF_EXT as i32 { let mut ok = true; for j in 0..MIN_PERF_EXT { @@ -550,12 +602,12 @@ pub fn annotate_seq_core( len += 1; } } - s.2 = l; - s.3 = len; - s.4 = mis; + s.tig_start = l; + s.len = len; + s.mismatches = mis; } for s in &mut semi { - s.4.sort_unstable(); + s.mismatches.sort_unstable(); } // Add some 40-mers with the same offset having <= 6 mismatches. @@ -574,17 +626,17 @@ pub fn annotate_seq_core( let mut i = 0; while i < semi.len() { let mut j = i + 1; - let t = semi[i].0; - let off = semi[i].1; + let t = semi[i].ref_id; + let off = semi[i].offset; while j < semi.len() { - if semi[j].0 != t || semi[j].1 != off { + if semi[j].ref_id != t || semi[j].offset != off { break; } j += 1; } const L: i32 = 40; const MAX_DIFFS: usize = 6; - let p1 = off + semi[i].2; + let p1 = off + semi[i].tig_start; // let p2 = off + semi[j-1].2 + semi[j-1].3; if -off >= 0 && p1 - off <= b_seq.len() as i32 { for p in 0..p1 - L { @@ -605,7 +657,13 @@ pub fn annotate_seq_core( x.push(l + m); } } - semi.push((t, off, p - off, L, x)); + semi.push(SemiPerfectMatch { + ref_id: t, + offset: off, + tig_start: p - off, + len: L, + mismatches: x, + }); break; } } @@ -621,11 +679,11 @@ pub fn annotate_seq_core( if allow_weak { let max_mis = 5; for s in &mut semi { - let t = s.0; - let off = s.1; - let l = s.2; - let mut len = s.3; - let mut mis = s.4.clone(); + let t = s.ref_id; + let off = s.offset; + let l = s.tig_start; + let mut len = s.len; + let mut mis = s.mismatches.clone(); let mut mis_count = 0; while l + len < b_seq.len() as i32 && l + len + off < refs[t as usize].len() as i32 { if b_seq[(l + len) as usize] != refs[t as usize].get((l + off + len) as usize) { @@ -635,16 +693,16 @@ pub fn annotate_seq_core( len += 1; } if mis_count <= max_mis && l + len + off == refs[t as usize].len() as i32 { - s.3 = len; - s.4 = mis; + s.len = len; + s.mismatches = mis; } } for s in &mut semi { - let t = s.0; - let off = s.1; - let mut l = s.2; - let mut len = s.3; - let mut mis = s.4.clone(); + let t = s.ref_id; + let off = s.offset; + let mut l = s.tig_start; + let mut len = s.len; + let mut mis = s.mismatches.clone(); let mut mis_count = 0; while l > 0 && l + off > 0 { if b_seq[(l - 1_i32) as usize] != refs[t as usize].get((l + off - 1_i32) as usize) { @@ -655,13 +713,13 @@ pub fn annotate_seq_core( len += 1; } if mis_count <= max_mis && l + off == 0 { - s.2 = l; - s.3 = len; - s.4 = mis; + s.tig_start = l; + s.len = len; + s.mismatches = mis; } } for s in &mut semi { - s.4.sort_unstable(); + s.mismatches.sort_unstable(); } } report_semis(verbose, "SEMI ALIGNMENTS", &semi, &b_seq, refs, log); @@ -672,24 +730,24 @@ pub fn annotate_seq_core( let mut to_delete = vec![false; semi.len()]; for i1 in 0..semi.len() { - let t1 = semi[i1].0; + let t1 = semi[i1].ref_id; if t1 < 0 { continue; } - let off1 = semi[i1].1; - let (l1, len1) = (semi[i1].2, semi[i1].3); - let mis1 = semi[i1].4.clone(); + let off1 = semi[i1].offset; + let (l1, len1) = (semi[i1].tig_start, semi[i1].len); + let mis1 = semi[i1].mismatches.clone(); for i2 in 0..semi.len() { - let t2 = semi[i2].0; - let off2 = semi[i2].1; + let t2 = semi[i2].ref_id; + let off2 = semi[i2].offset; if t2 != t1 || off2 != off1 { continue; } - let (l2, len2) = (semi[i2].2, semi[i2].3); + let (l2, len2) = (semi[i2].tig_start, semi[i2].len); if l1 + len1 >= l2 { continue; } - let mis2 = semi[i2].4.clone(); + let mis2 = semi[i2].mismatches.clone(); let mut mis3 = Vec::::new(); for l in l1 + len1..l2 { if b_seq[l as usize] != refs[t1 as usize].get((l + off1) as usize) { @@ -700,11 +758,11 @@ pub fn annotate_seq_core( if nmis as f64 / ((l2 + len2) - l1) as f64 > MAX_RATE { continue; } - semi[i1].3 = (l2 + len2) - l1; - semi[i1].4.append(&mut mis3); - semi[i1].4.append(&mut mis2.clone()); - unique_sort(&mut semi[i1].4); - semi[i2].0 = -1_i32; + semi[i1].len = (l2 + len2) - l1; + semi[i1].mismatches.append(&mut mis3); + semi[i1].mismatches.append(&mut mis2.clone()); + unique_sort(&mut semi[i1].mismatches); + semi[i2].ref_id = -1_i32; to_delete[i2] = true; } } @@ -726,7 +784,7 @@ pub fn annotate_seq_core( while i < semi.len() { let mut j = i + 1; while j < semi.len() { - if semi[j].0 != semi[i].0 || semi[j].1 != semi[i].1 { + if semi[j].ref_id != semi[i].ref_id || semi[j].offset != semi[i].offset { break; } j += 1; @@ -736,20 +794,20 @@ pub fn annotate_seq_core( if to_delete[k1] || to_delete[k2] { continue; } - let start1 = semi[k1].2; - let start2 = semi[k2].2; - let len1 = semi[k1].3; - let len2 = semi[k2].3; + let start1 = semi[k1].tig_start; + let start2 = semi[k2].tig_start; + let len1 = semi[k1].len; + let len2 = semi[k2].len; let stop1 = start1 + len1; let stop2 = start2 + len2; let start = min(start1, start2); let stop = max(stop1, stop2); if stop - start <= len1 + len2 { - semi[k1].2 = start; - semi[k1].3 = stop - start; - let mut m2 = semi[k2].4.clone(); - semi[k1].4.append(&mut m2); - unique_sort(&mut semi[k1].4); + semi[k1].tig_start = start; + semi[k1].len = stop - start; + let mut m2 = semi[k2].mismatches.clone(); + semi[k1].mismatches.append(&mut m2); + unique_sort(&mut semi[k1].mismatches); to_delete[k2] = true; } } @@ -774,7 +832,7 @@ pub fn annotate_seq_core( while i < semi.len() { let mut j = i + 1; while j < semi.len() { - if semi[j].0 != semi[i].0 { + if semi[j].ref_id != semi[i].ref_id { break; } j += 1; @@ -785,18 +843,18 @@ pub fn annotate_seq_core( ok = true; } else if j - i == 2 { ok = true; - if semi[i].2 < semi[i + 1].2 { + if semi[i].tig_start < semi[i + 1].tig_start { k = i + 1; } } if ok { - let offset = semi[k].1; - let ref_start = semi[k].1 + semi[k].2; - let tig_start = semi[k].2; - let t = semi[k].0 as usize; + let offset = semi[k].offset; + let ref_start = semi[k].offset + semi[k].tig_start; + let tig_start = semi[k].tig_start; + let t = semi[k].ref_id as usize; if !rheaders[t].contains("segment") && refdata.is_v(t) { let r = &refs[t]; - let len = semi[k].3; + let len = semi[k].len; if ref_start + len < r.len() as i32 && (ref_start + len) as f64 / r.len() as f64 >= 0.60 && len + tig_start < b_seq.len() as i32 @@ -805,10 +863,10 @@ pub fn annotate_seq_core( let stop = min(r.len() as i32, b_seq.len() as i32 + offset); for m in start..stop { if b_seq[(m - offset) as usize] != r.get(m as usize) { - semi[k].4.push(m - offset); + semi[k].mismatches.push(m - offset); } } - semi[k].3 += stop - start; + semi[k].len += stop - start; } } } @@ -818,7 +876,7 @@ pub fn annotate_seq_core( // Make sure that mismatches are unique sorted. for s in &mut semi { - unique_sort(&mut s.4); + unique_sort(&mut s.mismatches); } report_semis( verbose, @@ -836,15 +894,16 @@ pub fn annotate_seq_core( while i < semi.len() { let mut j = i + 1; while j < semi.len() { - if semi[j].0 != semi[i].0 || semi[j].1 != semi[i].1 { + if semi[j].ref_id != semi[i].ref_id || semi[j].offset != semi[i].offset { break; } j += 1; } for k1 in i..j { for k2 in i..j { - if semi[k1].1 + semi[k1].2 + semi[k1].3 == semi[k2].1 + semi[k2].2 + semi[k2].3 - && semi[k1].3 > semi[k2].3 + if semi[k1].offset + semi[k1].tig_start + semi[k1].len + == semi[k2].offset + semi[k2].tig_start + semi[k2].len + && semi[k1].len > semi[k2].len { to_delete[k2] = true; } @@ -868,11 +927,11 @@ pub fn annotate_seq_core( let mut annx = Vec::::new(); for x in &semi { annx.push(PreAnnotation { - tig_start: x.2, - match_len: x.3, - ref_id: x.0, - ref_start: x.2 + x.1, - mismatches: x.4.clone(), + tig_start: x.tig_start, + match_len: x.len, + ref_id: x.ref_id, + ref_start: x.tig_start + x.offset, + mismatches: x.mismatches.clone(), }); } unique_sort(&mut annx); @@ -989,14 +1048,14 @@ pub fn annotate_seq_core( ts.sort_unstable(); let mut i1 = 0; while i1 < ts.len() { - let j1 = next_diff1_2(&ts, i1 as i32) as usize; + let j1 = next_diff1_2(&ts, i1); let mut tlen1 = 0; for k in i1..j1 { tlen1 += annx[ts[k].1].match_len; } let mut i2 = 0; while i2 < ts.len() { - let j2 = next_diff1_2(&ts, i2 as i32) as usize; + let j2 = next_diff1_2(&ts, i2); let mut tlen2 = 0; for k in i2..j2 { tlen2 += annx[ts[k].1].match_len; @@ -1441,7 +1500,7 @@ pub fn annotate_seq_core( let mut i = 0; while i < combo.len() { - let j = next_diff1_3(&combo, i as i32) as usize; + let j = next_diff1_3(&combo, i); let mut cov = Vec::<(usize, usize)>::new(); let mut mis = 0; let mut mis_nutr = 0; @@ -2032,13 +2091,10 @@ pub fn annotate_seq_core( best_matches = max(best_matches, result.1); } if results[0].1 == best_matches { - let t = results[0].2; - let r = results[0].0 + results[0].1; - let m = results[0].4; annx.push(PreAnnotation { - tig_start: m as i32, - match_len: r as i32, - ref_id: t as i32, + tig_start: results[0].4 as i32, + match_len: (results[0].0 + results[0].1) as i32, + ref_id: results[0].2 as i32, ref_start: 0, mismatches: results[0].5.clone(), }); @@ -2290,7 +2346,7 @@ pub fn annotate_seq_core( let max_indel = 27; let min_len_gain = 100; while j < vs.len() { - let k = next_diff1_2(&vs, j as i32) as usize; + let k = next_diff1_2(&vs, j); if k - j == 1 { score.push((annx[j].match_len, k - j, annx[j].mismatches.len(), vs[j].1)); } else if k - j == 2 { diff --git a/vdj_ann_ref/src/bin/build_vdj_ref.rs b/vdj_ann_ref/src/bin/build_vdj_ref.rs index da57eb84b..79267311e 100644 --- a/vdj_ann_ref/src/bin/build_vdj_ref.rs +++ b/vdj_ann_ref/src/bin/build_vdj_ref.rs @@ -1264,7 +1264,7 @@ fn main() { let mut i = 0; let mut dnas = Vec::<(Vec, usize, usize)>::new(); while i < exons.len() { - let j = next_diff12_8(&exons, i as i32) as usize; + let j = next_diff12_8(&exons, i); let mut x = Vec::::new(); for d in &dna[i..j] { x.push(d.clone()); @@ -1320,7 +1320,7 @@ fn main() { let mut i = 0; let mut record = 0; while i < exons.len() { - let j = next_diff12_8(&exons, i as i32) as usize; + let j = next_diff12_8(&exons, i); let mut fws = Vec::::new(); for exon in &exons[i..j] { fws.push(exon.6); diff --git a/vector_utils/src/lib.rs b/vector_utils/src/lib.rs index 9f4848b6f..66b6f1a1b 100644 --- a/vector_utils/src/lib.rs +++ b/vector_utils/src/lib.rs @@ -333,140 +333,74 @@ pub fn count_instances(x: &[impl Borrow], d: &T) -> i32 { // NEXT DIFFERENCE // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ -// Find first element that's different in a sorted vector, or different in -// first position. - -pub fn next_diff(x: &[T], i: usize) -> usize { +/// Find first element that's different in a sorted vector, or different in +/// first position. +pub fn next_diff_any(x: &[T], i: usize, eq: impl Fn(&T, &T) -> bool) -> usize { let mut j = i + 1; loop { - if j == x.len() || x[j] != x[i] { + if j == x.len() || !eq(&x[j], &x[i]) { return j; } j += 1; } } -pub fn next_diff1_2(x: &[(T, U)], i: i32) -> i32 { - let mut j: i32 = i + 1; - loop { - if j == x.len() as i32 || x[j as usize].0 != x[i as usize].0 { - return j; - } - j += 1; - } +pub fn next_diff(x: &[T], i: usize) -> usize { + next_diff_any(x, i, |a, b| a == b) } -pub fn next_diff1_3(x: &[(T, U, V)], i: i32) -> i32 { - let mut j: i32 = i + 1; - loop { - if j == x.len() as i32 || x[j as usize].0 != x[i as usize].0 { - return j; - } - j += 1; - } +pub fn next_diff1_2(x: &[(T, U)], i: usize) -> usize { + next_diff_any(x, i, |a, b| a.0 == b.0) } -pub fn next_diff1_4(x: &[(T, U, V, W)], i: i32) -> i32 { - let mut j: i32 = i + 1; - loop { - if j == x.len() as i32 || x[j as usize].0 != x[i as usize].0 { - return j; - } - j += 1; - } +pub fn next_diff1_3(x: &[(T, U, V)], i: usize) -> usize { + next_diff_any(x, i, |a, b| a.0 == b.0) } -pub fn next_diff12_3(x: &[(T, U, V)], i: i32) -> i32 { - let mut j: i32 = i + 1; - loop { - if j == x.len() as i32 - || x[j as usize].0 != x[i as usize].0 - || x[j as usize].1 != x[i as usize].1 - { - return j; - } - j += 1; - } +pub fn next_diff1_4(x: &[(T, U, V, W)], i: usize) -> usize { + next_diff_any(x, i, |a, b| a.0 == b.0) } -pub fn next_diff12_4(x: &[(T, U, V, W)], i: i32) -> i32 { - let mut j: i32 = i + 1; - loop { - if j == x.len() as i32 - || x[j as usize].0 != x[i as usize].0 - || x[j as usize].1 != x[i as usize].1 - { - return j; - } - j += 1; - } +pub fn next_diff12_3(x: &[(T, U, V)], i: usize) -> usize { + next_diff_any(x, i, |a, b| a.0 == b.0 && a.1 == b.1) +} + +pub fn next_diff12_4(x: &[(T, U, V, W)], i: usize) -> usize { + next_diff_any(x, i, |a, b| a.0 == b.0 && a.1 == b.1) } #[allow(clippy::type_complexity)] pub fn next_diff12_8( x: &[(S, T, U, V, W, X, Y, Z)], - i: i32, -) -> i32 { - let mut j: i32 = i + 1; - loop { - if j == x.len() as i32 - || x[j as usize].0 != x[i as usize].0 - || x[j as usize].1 != x[i as usize].1 - { - return j; - } - j += 1; - } + i: usize, +) -> usize { + next_diff_any(x, i, |a, b| a.0 == b.0 && a.1 == b.1) } -pub fn next_diff1_5(x: &[(T, U, V, W, X)], i: i32) -> i32 { - let mut j: i32 = i + 1; - loop { - if j == x.len() as i32 || x[j as usize].0 != x[i as usize].0 { - return j; - } - j += 1; - } +pub fn next_diff1_5(x: &[(T, U, V, W, X)], i: usize) -> usize { + next_diff_any(x, i, |a, b| a.0 == b.0) } pub fn next_diff1_6( x: &[(T, U, V, W, X, Y)], - i: i32, -) -> i32 { - let mut j: i32 = i + 1; - loop { - if j == x.len() as i32 || x[j as usize].0 != x[i as usize].0 { - return j; - } - j += 1; - } + i: usize, +) -> usize { + next_diff_any(x, i, |a, b| a.0 == b.0) } pub fn next_diff1_7( x: &[(T, U, V, W, X, Y, Z)], - i: i32, -) -> i32 { - let mut j: i32 = i + 1; - loop { - if j == x.len() as i32 || x[j as usize].0 != x[i as usize].0 { - return j; - } - j += 1; - } + i: usize, +) -> usize { + next_diff_any(x, i, |a, b| a.0 == b.0) } #[allow(clippy::type_complexity)] pub fn next_diff1_8( x: &[(S, T, U, V, W, X, Y, Z)], - i: i32, -) -> i32 { - let mut j: i32 = i + 1; - loop { - if j == x.len() as i32 || x[j as usize].0 != x[i as usize].0 { - return j; - } - j += 1; - } + i: usize, +) -> usize { + next_diff_any(x, i, |a, b| a.0 == b.0) } // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓