Skip to content

Commit

Permalink
r837: removed BAM support
Browse files Browse the repository at this point in the history
This is not necessary.
  • Loading branch information
lh3 committed Sep 16, 2014
1 parent 7244d66 commit 1bdd791
Show file tree
Hide file tree
Showing 12 changed files with 24 additions and 268 deletions.
19 changes: 3 additions & 16 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ CC= gcc
#CC= clang --analyze
CFLAGS= -g -Wall -Wno-unused-function -O2
WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS
HTSLIB_PATH=
AR= ar
DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC)
LOBJS= utils.o kthread.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o bwamem_extra.o malloc_wrap.o
Expand All @@ -18,27 +17,15 @@ SUBDIRS= .
.SUFFIXES:.c .o .cc

.c.o:
ifneq ($(HTSLIB_PATH),)
$(CC) -c $(CFLAGS) $(DFLAGS) -DUSE_HTSLIB $(INCLUDES) -I$(HTSLIB_PATH) $< -o $@
else
$(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $< -o $@
endif
$(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $< -o $@

all:$(PROG)

bwa:libbwa.a $(AOBJS) main.o
ifneq ($(HTSLIB_PATH),)
$(CC) $(CFLAGS) $(DFLAGS) $(AOBJS) main.o -o $@ $(HTSLIB_PATH)/libhts.a -L. -lbwa $(LIBS)
else
$(CC) $(CFLAGS) $(DFLAGS) $(AOBJS) main.o -o $@ -L. -lbwa $(LIBS)
endif
$(CC) $(CFLAGS) $(DFLAGS) $(AOBJS) main.o -o $@ -L. -lbwa $(LIBS)

bwamem-lite:libbwa.a example.o
ifneq ($(HTSLIB_PATH),)
$(CC) $(CFLAGS) $(DFLAGS) example.o -o $@ $(HTSLIB_PATH)/libhts.a -L. -lbwa $(LIBS)
else
$(CC) $(CFLAGS) $(DFLAGS) example.o -o $@ -L. -lbwa $(LIBS)
endif
$(CC) $(CFLAGS) $(DFLAGS) example.o -o $@ -L. -lbwa $(LIBS)

libbwa.a:$(LOBJS)
$(AR) -csru $@ $(LOBJS)
Expand Down
2 changes: 0 additions & 2 deletions bamlite.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#ifndef USE_HTSLIB
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
Expand Down Expand Up @@ -209,4 +208,3 @@ int bamlite_gzclose(gzFile file) {
return ret;
}
#endif /* USE_VERBOSE_ZLIB_WRAPPERS */
#endif
2 changes: 0 additions & 2 deletions bamlite.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#ifndef BAMLITE_H_
#define BAMLITE_H_
#ifndef USE_HTSLIB

#include <stdint.h>
#include <zlib.h>
Expand Down Expand Up @@ -113,4 +112,3 @@ extern "C" {
#endif

#endif
#endif
41 changes: 0 additions & 41 deletions bwa.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@
#include "ksw.h"
#include "utils.h"
#include "kstring.h"
#ifdef USE_HTSLIB
#include <htslib/sam.h>
#endif

#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
Expand Down Expand Up @@ -271,27 +268,12 @@ void bwa_print_sam_hdr(const bntseq_t *bns, const char *rg_line)
{
int i;
extern char *bwa_pg;
err_printf("@HD\tVN:1.4\tSO:unknown\n");

This comment has been minimized.

Copy link
@mp15

mp15 Sep 18, 2014

Contributor

This isn't BAM support :(

for (i = 0; i < bns->n_seqs; ++i)
err_printf("@SQ\tSN:%s\tLN:%d\n", bns->anns[i].name, bns->anns[i].len);
if (rg_line) err_printf("%s\n", rg_line);
err_printf("%s\n", bwa_pg);
}

#ifdef USE_HTSLIB
void bwa_format_sam_hdr(const bntseq_t *bns, const char *rg_line, kstring_t *str)
{
int i;
extern char *bwa_pg;
str->l = 0; str->s = 0;
ksprintf(str, "@HD\tVN:1.4\tSO:unknown\n");
for (i = 0; i < bns->n_seqs; ++i)
ksprintf(str, "@SQ\tSN:%s\tLN:%d\n", bns->anns[i].name, bns->anns[i].len);
if (rg_line) ksprintf(str, "%s\n", rg_line);
ksprintf(str, "%s\n", bwa_pg);
}
#endif

static char *bwa_escape(char *s)
{
char *p, *q;
Expand Down Expand Up @@ -337,26 +319,3 @@ char *bwa_set_rg(const char *s)
return 0;
}

#ifdef USE_HTSLIB
bams_t *bams_init() {
return calloc(1, sizeof(bams_t));
}

void bams_add(bams_t *bams, bam1_t *b) {
if (bams->m == bams->l) {
bams->m = bams->m << 2;
bams->bams = realloc(bams->bams, sizeof(bam1_t) * bams->m);
}
bams->bams[bams->l] = b;
bams->l++;
}

void bams_destroy(bams_t *bams) {
int i;
for (i = 0; i < bams->l; i++) {
bam_destroy1(bams->bams[i]);
}
free(bams->bams);
free(bams);
}
#endif
32 changes: 0 additions & 32 deletions bwa.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@
#include <stdint.h>
#include "bntseq.h"
#include "bwt.h"
#ifdef USE_HTSLIB
#include <htslib/sam.h>
#endif

#define BWA_IDX_BWT 0x1
#define BWA_IDX_BNS 0x2
Expand All @@ -19,31 +16,11 @@ typedef struct {
uint8_t *pac; // the actual 2-bit encoded reference sequences with 'N' converted to a random base
} bwaidx_t;

#ifdef USE_HTSLIB
typedef struct {
int l, m;
bam1_t **bams;
} bams_t;
#endif

typedef struct {
int l_seq;
#ifdef USE_HTSLIB
char *name, *comment, *seq, *qual;
bams_t *bams;
#else
char *name, *comment, *seq, *qual, *sam;
#endif
} bseq1_t;

// This is here to faciliate passing around HTSLIB's bam_hdr_t structure when we are not compiling with HTSLIB
#ifndef USE_HTSLIB
typedef struct {
void *ptr;
} bam_hdr_t; // DO NOT USE
#endif


extern int bwa_verbose;
extern char bwa_rg_id[256];

Expand All @@ -64,17 +41,8 @@ extern "C" {
void bwa_idx_destroy(bwaidx_t *idx);

void bwa_print_sam_hdr(const bntseq_t *bns, const char *rg_line);
#ifdef USE_HTSLIB
void bwa_format_sam_hdr(const bntseq_t *bns, const char *rg_line, kstring_t *str);
#endif
char *bwa_set_rg(const char *s);

#ifdef USE_HTSLIB
bams_t *bams_init();
void bams_add(bams_t *bams, bam1_t *b);
void bams_destroy(bams_t *bams);
#endif

#ifdef __cplusplus
}
#endif
Expand Down
45 changes: 8 additions & 37 deletions bwamem.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,6 @@
#include "ksort.h"
#include "utils.h"

#ifdef USE_HTSLIB
#include <htslib/sam.h>
#endif

#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif
Expand Down Expand Up @@ -787,7 +783,7 @@ static inline int get_rlen(int n_cigar, const uint32_t *cigar)
return l;
}

void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m_, bam_hdr_t *h)
void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m_)
{
int i, l_name;
mem_aln_t ptmp = list[which], *p = &ptmp, mtmp, *m = 0; // make a copy of the alignment to convert
Expand Down Expand Up @@ -912,15 +908,6 @@ void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq
if (str->s[i] == '\t') str->s[i] = ' ';
}
kputc('\n', str);

#ifdef USE_HTSLIB
bam1_t *b = bam_init1();
if (sam_parse1(str, h, b) < 0) {
// TODO: error!
}
bams_add(s->bams, b);
str->l = 0; str->s = 0;
#endif
}

/************************
Expand Down Expand Up @@ -954,7 +941,7 @@ int mem_approx_mapq_se(const mem_opt_t *opt, const mem_alnreg_t *a)
}

// TODO (future plan): group hits into a uint64_t[] array. This will be cleaner and more flexible
void mem_reg2sam(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m, bam_hdr_t *h)
void mem_reg2sam(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m)
{
extern char **mem_gen_alt(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, mem_alnreg_v *a, int l_query, const char *query);
kstring_t str;
Expand Down Expand Up @@ -986,16 +973,14 @@ void mem_reg2sam(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac,
mem_aln_t t;
t = mem_reg2aln(opt, bns, pac, s->l_seq, s->seq, 0);
t.flag |= extra_flag;
mem_aln2sam(opt, bns, &str, s, 1, &t, 0, m, h);
mem_aln2sam(opt, bns, &str, s, 1, &t, 0, m);
} else {
for (k = 0; k < aa.n; ++k)
mem_aln2sam(opt, bns, &str, s, aa.n, aa.a, k, m, h);
mem_aln2sam(opt, bns, &str, s, aa.n, aa.a, k, m);
for (k = 0; k < aa.n; ++k) free(aa.a[k].cigar);
free(aa.a);
}
#ifndef USE_HTSLIB
s->sam = str.s;
#endif
if (XA) {
for (k = 0; k < a->n; ++k) free(XA[k]);
free(XA);
Expand Down Expand Up @@ -1123,7 +1108,6 @@ typedef struct {
bseq1_t *seqs;
mem_alnreg_v *regs;
int64_t n_processed;
bam_hdr_t *h;
} worker_t;

static void worker1(void *data, int i, int tid)
Expand All @@ -1142,33 +1126,26 @@ static void worker1(void *data, int i, int tid)

static void worker2(void *data, int i, int tid)
{
extern int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2], bam_hdr_t *h);
extern int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2]);
extern void mem_reg2ovlp(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a);
worker_t *w = (worker_t*)data;
if (!(w->opt->flag&MEM_F_PE)) {
#ifdef USE_HTSLIB
w->seqs[i].bams = bams_init();
#endif
if (bwa_verbose >= 4) printf("=====> Finalizing read '%s' <=====\n", w->seqs[i].name);
if (w->opt->flag & MEM_F_ALN_REG) {
mem_reg2ovlp(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i]);
} else {
mem_mark_primary_se(w->opt, w->regs[i].n, w->regs[i].a, w->n_processed + i);
mem_reg2sam(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i], 0, 0, w->h);
mem_reg2sam(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i], 0, 0);
}
free(w->regs[i].a);
} else {
if (bwa_verbose >= 4) printf("=====> Finalizing read pair '%s' <=====\n", w->seqs[i<<1|0].name);
#ifdef USE_HTSLIB
w->seqs[i<<1].bams = bams_init();
w->seqs[1+(i<<1)].bams = bams_init();
#endif
mem_sam_pe(w->opt, w->bns, w->pac, w->pes, (w->n_processed>>1) + i, &w->seqs[i<<1], &w->regs[i<<1], w->h);
mem_sam_pe(w->opt, w->bns, w->pac, w->pes, (w->n_processed>>1) + i, &w->seqs[i<<1], &w->regs[i<<1]);
free(w->regs[i<<1|0].a); free(w->regs[i<<1|1].a);
}
}

void mem_process_seqs2(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int64_t n_processed, int n, bseq1_t *seqs, const mem_pestat_t *pes0, bam_hdr_t *h)
void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int64_t n_processed, int n, bseq1_t *seqs, const mem_pestat_t *pes0)
{
extern void kt_for(int n_threads, void (*func)(void*,int,int), void *data, int n);
worker_t w;
Expand All @@ -1185,7 +1162,6 @@ void mem_process_seqs2(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *b
w.aux = malloc(opt->n_threads * sizeof(smem_aux_t));
for (i = 0; i < opt->n_threads; ++i)
w.aux[i] = smem_aux_init();
w.h = h;
kt_for(opt->n_threads, worker1, &w, (opt->flag&MEM_F_PE)? n>>1 : n); // find mapping positions
for (i = 0; i < opt->n_threads; ++i)
smem_aux_destroy(w.aux[i]);
Expand All @@ -1199,8 +1175,3 @@ void mem_process_seqs2(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *b
if (bwa_verbose >= 3)
fprintf(stderr, "[M::%s] Processed %d reads in %.3f CPU sec, %.3f real sec\n", __func__, n, cputime() - ctime, realtime() - rtime);
}

void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int64_t n_processed, int n, bseq1_t *seqs, const mem_pestat_t *pes0)
{
mem_process_seqs2(opt, bwt, bns, pac, n_processed, n, seqs, pes0, 0);
}
6 changes: 1 addition & 5 deletions bwamem.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,6 @@ typedef struct {
int max_matesw; // perform maximally max_matesw rounds of mate-SW for each end
int max_hits; // if there are max_hits or fewer, output them all
int8_t mat[25]; // scoring matrix; mat[0] == 0 if unset
#ifdef USE_HTSLIB
int bam_output;
#endif
} mem_opt_t;

typedef struct {
Expand Down Expand Up @@ -130,10 +127,8 @@ extern "C" {
* @param seqs query sequences; $seqs[i].seq/sam to be modified after the call
* @param pes0 insert-size info; if NULL, infer from data; if not NULL, it should be an array with 4 elements,
* corresponding to each FF, FR, RF and RR orientation. See mem_pestat() for more info.
* @param h the BAM header, NULL if not using HTSLIB
*/
void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int64_t n_processed, int n, bseq1_t *seqs, const mem_pestat_t *pes0);
void mem_process_seqs2(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int64_t n_processed, int n, bseq1_t *seqs, const mem_pestat_t *pes0, bam_hdr_t *h);

/**
* Find the aligned regions for one query sequence
Expand Down Expand Up @@ -165,6 +160,7 @@ extern "C" {
* @return CIGAR, strand, mapping quality and forward-strand position
*/
mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_seq, const char *seq, const mem_alnreg_t *ar);
mem_aln_t mem_reg2aln2(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_seq, const char *seq, const mem_alnreg_t *ar, const char *name);

/**
* Infer the insert size distribution from interleaved alignment regions
Expand Down
12 changes: 0 additions & 12 deletions bwamem_extra.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,7 @@ mem_alnreg_v mem_align1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *
return ar;
}

#ifdef USE_HTSLIB
void mem_reg2ovlp(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, bam_hdr_t *h)
#else
void mem_reg2ovlp(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a)
#endif
{
int i;
kstring_t str = {0,0,0};
Expand All @@ -119,15 +115,7 @@ void mem_reg2ovlp(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac,
ksprintf(&str, "%.3f", (double)p->truesc / opt->a / (qe - qb > re - rb? qe - qb : re - rb));
kputc('\n', &str);
}
#ifdef USE_HTSLIB
bam1_t *b = bam_init1();
if (sam_parse1(&str, h, b) < 0) {
// TODO: error!
}
bams_add(s->bams, b);
#else
s->sam = str.s;
#endif
}

// Okay, returning strings is bad, but this has happened a lot elsewhere. If I have time, I need serious code cleanup.
Expand Down
Loading

1 comment on commit 1bdd791

@nh13
Copy link
Contributor

@nh13 nh13 commented on 1bdd791 Jun 5, 2015

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why was this removed, with no input from users or good justification or discussion?

Please sign in to comment.