From c58203d03b2c81de3d84c68985c59ce032a5abac Mon Sep 17 00:00:00 2001 From: Petr Danecek Date: Thu, 8 Dec 2022 09:15:44 +0000 Subject: [PATCH] Make bcf_hdr_seqnames() work with gapped chromosome ids The bcf_hdr_remove() call can create gaps in tid blocks which fail assertion in bcf_hdr_seqnames(). This problem was encountered in #1533, but is only a partial fix of the problem --- htslib/vcf.h | 6 +++++- vcf.c | 36 ++++++++++++++++++++++++++++++------ 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/htslib/vcf.h b/htslib/vcf.h index 04784d168..8ab78c079 100644 --- a/htslib/vcf.h +++ b/htslib/vcf.h @@ -636,7 +636,11 @@ set to one of BCF_ERR* codes and must be checked before calling bcf_write(). HTSLIB_EXPORT bcf_hdr_t *bcf_hdr_subset(const bcf_hdr_t *h0, int n, char *const* samples, int *imap); - /** Creates a list of sequence names. It is up to the caller to free the list (but not the sequence names) */ + /** + * Creates a list of sequence names. It is up to the caller to free the list (but not the sequence names). + * NB: sequence name indexes returned by bcf_hdr_seqnames() may not correspond to bcf1_t.rid, use + * bcf_hdr_id2name() or bcf_seqname() instead. + */ HTSLIB_EXPORT const char **bcf_hdr_seqnames(const bcf_hdr_t *h, int *nseqs); diff --git a/vcf.c b/vcf.c index e91b24775..fc96aa24a 100644 --- a/vcf.c +++ b/vcf.c @@ -2214,20 +2214,44 @@ char *bcf_hdr_fmt_text(const bcf_hdr_t *hdr, int is_bcf, int *len) const char **bcf_hdr_seqnames(const bcf_hdr_t *h, int *n) { vdict_t *d = (vdict_t*)h->dict[BCF_DT_CTG]; - int tid, m = kh_size(d); + int i, tid, m = kh_size(d); const char **names = (const char**) calloc(m,sizeof(const char*)); + if ( !names ) + { + hts_log_error("Failed to allocate memory"); + *n = 0; + return NULL; + } khint_t k; for (k=kh_begin(d); k= m ) + { + // This can happen after a contig has been removed from BCF header via bcf_hdr_remove() + if ( hts_resize(const char*, tid + 1, &m, &names, HTS_RESIZE_CLEAR)<0 ) + { + hts_log_error("Failed to allocate memory"); + *n = 0; + free(names); + return NULL; + } + m = tid + 1; + } names[tid] = kh_key(d,k); } - // sanity check: there should be no gaps - for (tid=0; tid