Skip to content

Commit

Permalink
ont overlapping
Browse files Browse the repository at this point in the history
  • Loading branch information
chhylp123 committed Dec 21, 2024
1 parent 3067771 commit 2c77b3c
Show file tree
Hide file tree
Showing 6 changed files with 170 additions and 4 deletions.
30 changes: 28 additions & 2 deletions Assembly.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1050,6 +1050,21 @@ void ha_ec(int64_t round, int num_pround, int des_idx, uint64_t *tot_b, uint64_t
}


int ha_ec_dbg(void)
{
int hom_cov, het_cov;

ha_idx = ha_pt_gen(&asm_opt, 0, 0, 0, &R_INF, &hom_cov, &het_cov); // build the index
asm_opt.hom_cov = hom_cov; asm_opt.het_cov = het_cov;
ha_opt_update_cov(&asm_opt, hom_cov);

cal_ec_r_dbg(asm_opt.thread_num, R_INF.total_reads);

ha_pt_destroy(ha_idx); ha_idx = NULL;

return 0;
}

void ha_overlap_and_correct(int round)
{
int i, hom_cov, het_cov, r_out = 0;
Expand Down Expand Up @@ -2003,8 +2018,7 @@ int ha_assemble_ovec(void)
ha_flt_tab = ha_idx = NULL;

// construct hash table for high occurrence k-mers
if (!(asm_opt.flag & HA_F_NO_KMER_FLT) && ha_flt_tab == NULL)
{
if (!(asm_opt.flag & HA_F_NO_KMER_FLT) && ha_flt_tab == NULL) {
ha_flt_tab = ha_ft_gen(&asm_opt, &R_INF, &hom_cov, 0, 0);
ha_opt_update_cov(&asm_opt, hom_cov);
}
Expand All @@ -2026,6 +2040,18 @@ int ha_assemble_ovec(void)
return 0;
}

int ha_assemble_ovec_cc(void)
{
ha_idx = NULL;

ha_opt_reset_to_round(&asm_opt, 0); // this update asm_opt.roundID and a few other fields
ha_ec_dbg();

// Output_PAF0(R_INF.paf, "0");
destory_All_reads(&R_INF);
return 0;
}

int ha_assemble(void)
{
// debug_mc_g_t(MC_NAME);
Expand Down
1 change: 1 addition & 0 deletions Assembly.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,5 +51,6 @@ ha_ovec_buf_t *ha_ovec_buf_init(void *km, int is_final, int save_ov, int is_ug);
void ha_ovec_destroy(ha_ovec_buf_t *b);
int64_t ha_ovec_mem(const ha_ovec_buf_t *b, int64_t *mem_a);
int ha_assemble_ovec(void);
int ha_ec_dbg(void);

#endif
2 changes: 1 addition & 1 deletion CommandLines.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#include <pthread.h>
#include <stdint.h>

#define HA_VERSION "0.24.0-r702"
#define HA_VERSION "0.24.0-r703"

#define VERBOSE 0

Expand Down
138 changes: 138 additions & 0 deletions ecovlp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,23 @@ typedef struct {
uint8_t *cr;
} ec_ovec_buf_t;

typedef struct {
uint32_t n_thread, n_a, chunk_size, cn;
FILE *fp;
} cal_ec_r_dbg_t;

typedef struct {
ma_hit_t *a;
size_t n, m;
asg16_v ec;
} r_dbg_step_res_t;

typedef struct { // data structure for each step in kt_pipeline()
ec_ovec_buf_t *buf;
r_dbg_step_res_t *res;
uint32_t si, ei;
} cal_ec_r_dbg_step_t;

ec_ovec_buf_t* gen_ec_ovec_buf_t(uint32_t n);
void destroy_ec_ovec_buf_t(ec_ovec_buf_t *p);

Expand Down Expand Up @@ -3352,6 +3369,61 @@ static void worker_hap_ec(void *data, long i, int tid)
**/
}

static void worker_hap_ec_dbg_paf(void *data, long i, int tid)
{
ec_ovec_buf_t0 *b = &(((cal_ec_r_dbg_step_t*)data)->buf->a[tid]);
r_dbg_step_res_t *rr = &(((cal_ec_r_dbg_step_t*)data)->res[tid]);
uint32_t high_occ = asm_opt.hom_cov * (2.0 - HA_KMER_GOOD_RATIO);
uint32_t low_occ = asm_opt.hom_cov * HA_KMER_GOOD_RATIO;
overlap_region *aux_o = NULL; i += ((cal_ec_r_dbg_step_t*)data)->si;

// debug_retrive_bqual(D, &b->v8t, i, 256); return;

recover_UC_Read(&b->self_read, &R_INF, i);

h_ec_lchain(b->ab, i, b->self_read.seq, b->self_read.length, asm_opt.mz_win, asm_opt.k_mer_length, &R_INF, &b->olist, &b->clist, ((asm_opt.is_ont)?(0.05):(0.02)), asm_opt.max_n_chain, 1, NULL, NULL, &(b->sp), &high_occ, &low_occ, 1, 1, 3, 0.7, 2, 32, COV_W);///ONT high error

aux_o = fetch_aux_ovlp(&b->olist);///must be here

// stderr_phase_ovlp(&b->olist);
gen_hc_r_alin_ea(&b->olist, &b->clist, &R_INF, &b->self_read, &b->ovlp_read, &b->exz, aux_o, asm_opt.max_ov_diff_ec, (asm_opt.is_ont)?(WINDOW_OHC):(WINDOW_HC), i, E_KHIT, 1, &b->v16, &b->v64, &(R_INF.paf[i]), 0);

uint32_t k, m, tl; overlap_region *z; bit_extz_t ez; ma_hit_t *t;
for (k = 0; k < b->olist.length; k++) {
z = &(b->olist.list[k]);
if(!(z->w_list.n)) continue;

tl = Get_READ_LENGTH((R_INF), z->y_id);
for (m = 0; m < z->w_list.n; m++) {
if(is_ualn_win(z->w_list.a[m])) continue;
set_bit_extz_t(ez, (*z), m);
kv_pushp(ma_hit_t, *rr, &t);

t->qns = z->x_id; t->qns = t->qns << 32;
t->tn = z->y_id;

t->qns = t->qns | (uint64_t)(z->w_list.a[m].x_start);
t->qe = z->w_list.a[m].x_end + 1;

t->ts = z->w_list.a[m].y_start;
t->te = z->w_list.a[m].y_end + 1;

t->rev = z->y_pos_strand;

t->bl = rr->ec.n;
kv_resize(uint16_t, rr->ec, rr->ec.n + ez.cigar.n);
memcpy(rr->ec.a + rr->ec.n, ez.cigar.a, ez.cigar.n * sizeof((*(rr->ec.a))));
rr->ec.n += ez.cigar.n;
t->cc = rr->ec.n - t->bl;

if(t->rev) {
t->ts = tl - z->w_list.a[m].y_end - 1;
t->te = tl - z->w_list.a[m].y_start;
}
}
}
}

uint32_t adjust_exact_match(asg16_v *in, int64_t xs0, int64_t xe0, int64_t ys0, int64_t ye0, uint64_t *rxs, uint64_t *rxe, uint64_t *rys, uint64_t *rye, uint32_t rev)
{
*rxs = *rxe = *rys = *rye = 0;
Expand Down Expand Up @@ -6106,6 +6178,72 @@ void cal_ec_r(uint64_t n_thre, uint64_t round, uint64_t n_round, uint64_t n_a, u
// }
}

void print_ov_dbg_paf(FILE *fp, char *ref_str, char *ref_id, int32_t ref_id_n, char *qry_str, char *qry_id, int32_t qry_id_n, uint64_t rs, uint64_t re, uint64_t rl, uint64_t qs, uint64_t qe, uint64_t ql, uint64_t rev, bit_extz_t *ez, char *ezh)
{
uint64_t ci = 0; uint16_t c; uint32_t cl;
fprintf(fp, "%.*s\t%lu\t%lu\t%lu\t", qry_id_n, qry_id, ql, qs, qe);
fprintf(fp, "%c\t", "+-"[rev]);
fprintf(fp, "%.*s\t%lu\t%lu\t%lu\t", ref_id_n, ref_id, rl, rs, re);
fprintf(fp, "255\tcg:Z:");
while (ci < ez->cigar.n) {
ci = pop_trace(&(ez->cigar), ci, &c, &cl);
fprintf(fp, "%u%c", cl, ezh[c]);
}
fprintf(fp, "\n");
}

static void *worker_ov_dbg_pipeline(void *data, int step, void *in) // callback for kt_pipeline()
{
cal_ec_r_dbg_t *p = (cal_ec_r_dbg_t*)data; char cm[4]; cm[0] = 'M'; cm[1] = 'M'; cm[2] = 'I'; cm[3] = 'D';
// cal_ec_r_dbg_step_t
if (step == 0) { // step 1: read a block of sequences
cal_ec_r_dbg_step_t *s; CALLOC(s, 1);
s->si = p->cn; p->cn += p->chunk_size;
if(p->cn > p->n_a) p->cn = p->n_a; s->ei = p->cn;
if(s->si >= s->ei) free(s);
else return s;
} else if (step == 1) { // step 2: alignment
cal_ec_r_dbg_step_t *s = (cal_ec_r_dbg_step_t*)in;
s->buf = gen_ec_ovec_buf_t(p->n_thread); CALLOC(s->res, p->n_thread);
kt_for(p->n_thread, worker_hap_ec_dbg_paf, s, (s->ei - s->si));
destroy_ec_ovec_buf_t(s->buf);
return s;
} else if (step == 2) { // step 3: dump
cal_ec_r_dbg_step_t *s = (cal_ec_r_dbg_step_t*)in; uint64_t k, z; bit_extz_t ez; memset(&ez, 0, sizeof(ez));
// UC_Read qu; UC_Read tu; init_UC_Read(&qu); init_UC_Read(&tu);
for (k = 0; k < p->n_thread; k++) {
for (z = 0; z < s->res[k].n; z++) {
ez.cigar.a = s->res[k].ec.a + s->res[k].a[z].bl; ez.cigar.n = ez.cigar.m = s->res[k].a[z].cc;

// UC_Read_resize(qu, (s->res[k].a[z].qe - ((uint32_t)s->res[k].a[z].qns)));
// recover_UC_Read_sub_region(qu.seq, ((uint32_t)s->res[k].a[z].qns), (s->res[k].a[z].qe - ((uint32_t)s->res[k].a[z].qns)), 0, &R_INF, (s->res[k].a[z].qns>>32));

// UC_Read_resize(tu, (s->res[k].a[z].te - s->res[k].a[z].ts));
// recover_UC_Read_sub_region(tu.seq, s->res[k].a[z].ts, s->res[k].a[z].te - s->res[k].a[z].ts, 0, &R_INF, s->res[k].a[z].tn);

print_ov_dbg_paf(p->fp, NULL/**qu.seq**/, Get_NAME(R_INF, (s->res[k].a[z].qns>>32)), Get_NAME_LENGTH(R_INF, (s->res[k].a[z].qns>>32)),
NULL/**tu.seq**/, Get_NAME(R_INF, (s->res[k].a[z].tn)), Get_NAME_LENGTH(R_INF, (s->res[k].a[z].tn)), (uint32_t)s->res[k].a[z].qns, s->res[k].a[z].qe, Get_READ_LENGTH(R_INF, (s->res[k].a[z].qns>>32)), s->res[k].a[z].ts, s->res[k].a[z].te, Get_READ_LENGTH(R_INF, (s->res[k].a[z].tn)), s->res[k].a[z].rev, &ez, cm);
}
free(s->res[k].a); free(s->res[k].ec.a);
}
free(s->res); free(s); ///destory_UC_Read(&qu); destory_UC_Read(&tu);
}
return 0;
}

void cal_ec_r_dbg(uint64_t n_thre, uint64_t n_a)
{
char *paf = NULL; MALLOC(paf, (strlen(asm_opt.output_file_name)+64));
sprintf(paf, "%s.ovlp.paf", asm_opt.output_file_name);

cal_ec_r_dbg_t sl; memset(&sl, 0, sizeof(sl));
sl.n_thread = n_thre; sl.n_a = n_a; sl.chunk_size = 2000; sl.cn = 0; sl.fp = fopen(paf, "w");

kt_pipeline(3, worker_ov_dbg_pipeline, &sl, 3);

fclose(sl.fp); free(paf);
}

void destroy_cc_v(cc_v *z)
{
uint64_t k;
Expand Down
1 change: 1 addition & 0 deletions ecovlp.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,6 @@ void cal_ov_r(uint64_t n_thre, uint64_t n_a, uint64_t new_idx);
void handle_chemical_r(uint64_t n_thre, uint64_t n_a);
void handle_chemical_arc(uint64_t n_thre, uint64_t n_a);
uint8_t* gen_chemical_arc_rf(uint64_t n_thre, uint64_t n_a);
void cal_ec_r_dbg(uint64_t n_thre, uint64_t n_a);

#endif
2 changes: 1 addition & 1 deletion main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ int main(int argc, char *argv[])
// ed_band_cal_global_128bit((char*)"ACTTTTTT", 8, (char*)"AATTTT", 6, 3));
// exit(1);
if(asm_opt.sec_in) ret = ha_assemble_pair();
else if(asm_opt.dbg_ovec_cal) ret = ha_assemble_ovec();
else if(asm_opt.dbg_ovec_cal) ret = ha_ec_dbg();
else ret = ha_assemble();

destory_opt(&asm_opt);
Expand Down

0 comments on commit 2c77b3c

Please sign in to comment.