Skip to content

Commit

Permalink
before branch
Browse files Browse the repository at this point in the history
  • Loading branch information
chhylp123 committed Sep 16, 2021
1 parent ab80851 commit ed814ab
Show file tree
Hide file tree
Showing 2 changed files with 180 additions and 137 deletions.
219 changes: 108 additions & 111 deletions anchor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,130 +265,127 @@ void calculate_ug_chaining(Candidates_list* candidates, overlap_region_alloc* ov
}
}

/**
void ha_get_inter_candidates(ha_abufl_t *ab, int64_t uid, ma_utg_v *ua, overlap_region_alloc *ovlp,
Candidates_list *cl, double bw_thres, int max_n_chain, kvec_t_u8_warp* k_flag, kvec_t_u64_warp* chain_idx,
void *ha_flt_tab, ha_pt_t *ha_idx, overlap_region* f_cigar, kvec_t_u64_warp* dbg_ct, st_mt_t *sp,
double chain_match_rate, uint32_t uk, uint32_t uw, uint32_t is_hpc, uint32_t h_occ)
{

uint32_t i, rlen;
uint64_t k, l;
ma_utg_t *u = &(ua->a[uid]);
void ha_get_inter_candidates(ha_abufl_t *ab, uint64_t id, char* r, uint64_t rlen, uint64_t rw, uint64_t rk, uint64_t is_hpc,
overlap_region_alloc *ol, Candidates_list *cl, double bw_thres, int max_n_chain, int keep_whole_chain,
kvec_t_u8_warp* k_flag, kvec_t_u64_warp* chain_idx, void *ha_flt_tab, ha_pt_t *ha_idx,
overlap_region* f_cigar, kvec_t_u64_warp* dbg_ct, st_mt_t *sp)
{
uint64_t i, k, l;
// prepare
clear_Candidates_list(cl);
clear_overlap_region_alloc(ovlp);
clear_overlap_region_alloc(ol);
ab->mz.n = 0, ab->n_a = 0;

mz2_ha_sketch(u->s, u->len, uw, uk, uid, is_hpc, &ab->mz, ha_flt_tab, asm_opt.mz_sample_dist, k_flag, dbg_ct,
NULL, -1, asm_opt.dp_min_len, -1, sp, asm_opt.mz_rewin, 1);
// get the list of anchors
mz2_ha_sketch(r, rlen, rw, rk, 0, is_hpc, &ab->mz, ha_flt_tab, asm_opt.mz_sample_dist, k_flag, dbg_ct,
NULL, -1, asm_opt.dp_min_len, -1, sp, asm_opt.mz_rewin, 1);

// minimizer of queried read
if (ab->mz.m > ab->old_mz_m) {
ab->old_mz_m = ab->mz.m;
REALLOC(ab->seed, ab->old_mz_m);
}
for (i = 0, ab->n_a = 0; i < ab->mz.n; ++i) {
int n;
ab->seed[i].a = ha_ptl_get(ha_idx, ab->mz.a[i].x, &n);
ab->seed[i].n = n;
ab->seed[i].cnt = ha_ft_cnt(ha_flt_tab, ab->mz.a[i].x);
ab->n_a += n;
}
if (ab->n_a > ab->m_a) {
ab->m_a = ab->n_a;
kroundup64(ab->m_a);
REALLOC(ab->a, ab->m_a);
}
for (i = 0, k = 0; i < ab->mz.n; ++i) {
int j;
///z is one of the minimizer
ha_mzl_t *z = &ab->mz.a[i];
seedl_t *s = &ab->seed[i];
for (j = 0; j < s->n; ++j) {
const ha_idxposl_t *y = &s->a[j];
anchor1_t *an = &ab->a[k++];
uint8_t rev = z->rev == y->rev? 0 : 1;
an->other_off = y->pos;
an->self_off = rev? u->len - 1 - (z->pos + 1 - z->span) : z->pos;
an->cnt = s->cnt;
an->srt = (uint64_t)y->rid<<33 | (uint64_t)rev<<32 | an->other_off;
}
}
ab->old_mz_m = ab->mz.m;
REALLOC(ab->seed, ab->old_mz_m);
}
for (i = 0, ab->n_a = 0; i < ab->mz.n; ++i) {
int n;
ab->seed[i].a = ha_ptl_get(ha_idx, ab->mz.a[i].x, &n);
ab->seed[i].n = n;
ab->seed[i].cnt = ha_ft_cnt(ha_flt_tab, ab->mz.a[i].x);
ab->n_a += n;
}
if (ab->n_a > ab->m_a) {
ab->m_a = ab->n_a;
kroundup64(ab->m_a);
REALLOC(ab->a, ab->m_a);
}
for (i = 0, k = 0; i < ab->mz.n; ++i) {
int j;
///z is one of the minimizer
ha_mzl_t *z = &ab->mz.a[i];
seedl_t *s = &ab->seed[i];
for (j = 0; j < s->n; ++j) {
const ha_idxposl_t *y = &s->a[j];
anchor1_t *an = &ab->a[k++];
uint8_t rev = z->rev == y->rev? 0 : 1;
an->other_off = y->pos;
an->self_off = rev? rlen - 1 - (z->pos + 1 - z->span) : z->pos;
an->cnt = s->n;
an->srt = (uint64_t)y->rid<<33 | (uint64_t)rev<<32 | an->other_off;
}
}

// sort anchors
radix_sort_ha_an1(ab->a, ab->a + ab->n_a);
for (k = 1, l = 0; k <= ab->n_a; ++k) {
if (k == ab->n_a || ab->a[k].srt != ab->a[l].srt) {
if (k - l > 1)
radix_sort_ha_an2(ab->a + l, ab->a + k);
l = k;
}
}
// sort anchors
radix_sort_ha_an1(ab->a, ab->a + ab->n_a);
for (k = 1, l = 0; k <= ab->n_a; ++k) {
if (k == ab->n_a || ab->a[k].srt != ab->a[l].srt) {
if (k - l > 1)
radix_sort_ha_an2(ab->a + l, ab->a + k);
l = k;
}
}


// copy over to _cl_
if (ab->m_a >= (uint64_t)cl->size) {
cl->size = ab->m_a;
REALLOC(cl->list, cl->size);
}
for (k = 0; k < ab->n_a; ++k) {
k_mer_hit *p = &cl->list[k];
p->readID = ab->a[k].srt >> 33;
p->strand = ab->a[k].srt >> 32 & 1;
p->offset = ab->a[k].other_off;
p->self_offset = ab->a[k].self_off;
p->cnt = (ab->a[k].cnt == 1? 1 : 16);
}
cl->length = ab->n_a;
// calculate_overlap_region_by_chaining(cl, ovlp, chain_idx, rid, ucr->length, &R_INF, bw_thres, keep_whole_chain, f_cigar);
// calculate_ug_chaining(cl, overlap_list, chain_idx, rid, ua, bw_thres, keep_whole_chain, f_cigar, ab->mz.n, chain_match_rate);
// copy over to _cl_
if (ab->m_a >= (uint64_t)cl->size) {
cl->size = ab->m_a;
REALLOC(cl->list, cl->size);
}
for (k = 0; k < ab->n_a; ++k) {
k_mer_hit *p = &cl->list[k];
p->readID = ab->a[k].srt >> 33;
p->strand = ab->a[k].srt >> 32 & 1;
p->offset = ab->a[k].other_off;
p->self_offset = ab->a[k].self_off;
p->cnt = (ab->a[k].cnt == 1? 1 : 16);
}
cl->length = ab->n_a;

calculate_overlap_region_by_chaining(cl, ol, chain_idx, id, rlen, &R_INF, bw_thres, keep_whole_chain, f_cigar);

#if 0
if (overlap_list->length > 0) {
fprintf(stderr, "B\t%ld\t%ld\t%d\n", (long)rid, (long)overlap_list->length, rlen);
for (int i = 0; i < (int)overlap_list->length; ++i) {
overlap_region *r = &overlap_list->list[i];
fprintf(stderr, "C\t%d\t%d\t%d\t%c\t%d\t%ld\t%d\t%d\t%c\t%d\t%d\n", (int)r->x_id, (int)r->x_pos_s, (int)r->x_pos_e, "+-"[r->x_pos_strand],
(int)r->y_id, (long)Get_READ_LENGTH(R_INF, r->y_id), (int)r->y_pos_s, (int)r->y_pos_e, "+-"[r->y_pos_strand], (int)r->shared_seed, ha_ov_type(r, rlen));
}
}
#endif
if ((int)ovlp->length > max_n_chain) {
int32_t w, n[4], s[4];
n[0] = n[1] = n[2] = n[3] = 0, s[0] = s[1] = s[2] = s[3] = 0;
ks_introsort_or_ss(ovlp->length, ovlp->list);
for (i = 0; i < (uint32_t)ovlp->length; ++i) {
const overlap_region *r = &ovlp->list[i];
w = ha_ov_type(r, rlen);
++n[w];
if ((int)n[w] == max_n_chain) s[w] = r->shared_seed;
}
if (s[0] > 0 || s[1] > 0 || s[2] > 0 || s[3] > 0) {
// n[0] = n[1] = n[2] = n[3] = 0;
for (i = 0, k = 0; i < (uint32_t)ovlp->length; ++i) {
overlap_region *r = &ovlp->list[i];
w = ha_ov_type(r, rlen);
// ++n[w];
// if (((int)n[w] <= max_n_chain) || (r->shared_seed >= s[w] && s[w] >= (asm_opt.k_mer_length<<1))) {
if (r->shared_seed >= s[w]) {
if ((uint32_t)k != i) {
overlap_region t;
t = ovlp->list[k];
ovlp->list[k] = ovlp->list[i];
ovlp->list[i] = t;
}
++k;
}
}
ovlp->length = k;
}
}
///ks_introsort_or_xs(overlap_list->length, overlap_list->list);
}
**/
#if 0
if (ol->length > 0) {
fprintf(stderr, "B\t%ld\t%ld\t%d\n", (long)rid, (long)ol->length, rlen);
for (int i = 0; i < (int)ol->length; ++i) {
overlap_region *r = &ol->list[i];
fprintf(stderr, "C\t%d\t%d\t%d\t%c\t%d\t%ld\t%d\t%d\t%c\t%d\t%d\n", (int)r->x_id, (int)r->x_pos_s, (int)r->x_pos_e, "+-"[r->x_pos_strand],
(int)r->y_id, (long)Get_READ_LENGTH(R_INF, r->y_id), (int)r->y_pos_s, (int)r->y_pos_e, "+-"[r->y_pos_strand], (int)r->shared_seed, ha_ov_type(r, rlen));
}
}
#endif

if ((int)ol->length > max_n_chain) {
int32_t w, n[4], s[4];
n[0] = n[1] = n[2] = n[3] = 0, s[0] = s[1] = s[2] = s[3] = 0;
ks_introsort_or_ss(ol->length, ol->list);
for (i = 0; i < (uint32_t)ol->length; ++i) {
const overlap_region *r = &ol->list[i];
w = ha_ov_type(r, rlen);
++n[w];
if ((int)n[w] == max_n_chain) s[w] = r->shared_seed;
}
if (s[0] > 0 || s[1] > 0 || s[2] > 0 || s[3] > 0) {
// n[0] = n[1] = n[2] = n[3] = 0;
for (i = 0, k = 0; i < (uint32_t)ol->length; ++i) {
overlap_region *r = &ol->list[i];
w = ha_ov_type(r, rlen);
// ++n[w];
// if (((int)n[w] <= max_n_chain) || (r->shared_seed >= s[w] && s[w] >= (asm_opt.k_mer_length<<1))) {
if (r->shared_seed >= s[w]) {
if ((uint32_t)k != i) {
overlap_region t;
t = ol->list[k];
ol->list[k] = ol->list[i];
ol->list[i] = t;
}
++k;
}
}
ol->length = k;
}
}

///ks_introsort_or_xs(overlap_list->length, overlap_list->list);
}

void ha_get_ug_candidates(ha_abuf_t *ab, int64_t rid, ma_utg_t *u, ma_utg_v *ua, overlap_region_alloc *overlap_list, Candidates_list *cl, double bw_thres, int max_n_chain, int keep_whole_chain, kvec_t_u8_warp* k_flag,
kvec_t_u64_warp* chain_idx, void *ha_flt_tab, ha_pt_t *ha_idx, overlap_region* f_cigar, kvec_t_u64_warp* dbg_ct, double chain_match_rate)
Expand Down
Loading

0 comments on commit ed814ab

Please sign in to comment.