From 1f5cab59c728a6d16edbbcd548ae41b26d9bf9c8 Mon Sep 17 00:00:00 2001 From: Rob Davies Date: Fri, 6 Jan 2023 16:58:05 +0000 Subject: [PATCH] Don't error when making an iterator on a tid not in the index Instead return one that instantly finishes. Fixes an edge case (issue #1534) where an index did not include an entry for a chromosome that was mentioned in the file header but had no data records. Normally these would be present but empty, but it was possible to use the IDX= key in a VCF file to make an index where the chromosome simply did not appear. In this case, rather than an error, we want to return the equivalent of HTS_IDX_NONE so the iterator produces no data. Another scenario where this is useful is if you build an index, and then try to use it immediately without first saving and reading it back in again. Such an index will have NULL entries in bidx[] for any chromosomes with no data. Again we want to return an HTS_IDX_NONE iterator if one of those chromosomes is queried. (This issue didn't usually occur because most programs are loading in an existing index, and idx_read_core() makes bidx[] entries for everything even if there's nothing in the index for the chromosome.) Note that this changes vcf_loop() in test_view.c so that it now treats bcf_itr_querys() failures as an error. The new behaviour matches sam_loop() and is needed to detect the problem being fixed here. All the other tests still work after this change no nothing was relying on the old behaviour of ignoring the errors. --- hts.c | 6 ++---- test/modhdr.expected.vcf | 4 ++++ test/modhdr.vcf.gz | Bin 0 -> 156 bytes test/modhdr.vcf.gz.csi | Bin 0 -> 86 bytes test/test.pl | 5 +++++ test/test_view.c | 3 ++- 6 files changed, 13 insertions(+), 5 deletions(-) create mode 100644 test/modhdr.expected.vcf create mode 100644 test/modhdr.vcf.gz create mode 100644 test/modhdr.vcf.gz.csi diff --git a/hts.c b/hts.c index c79d92d99..66b8643e8 100644 --- a/hts.c +++ b/hts.c @@ -3077,16 +3077,14 @@ hts_itr_t *hts_itr_query(const hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t free(iter); iter = NULL; } + } else if (tid >= idx->n || (bidx = idx->bidx[tid]) == NULL) { + iter->finished = 1; } else { if (beg < 0) beg = 0; if (end < beg) { free(iter); return NULL; } - if (tid >= idx->n || (bidx = idx->bidx[tid]) == NULL) { - free(iter); - return NULL; - } k = kh_get(bin, bidx, META_BIN(idx)); if (k != kh_end(bidx)) diff --git a/test/modhdr.expected.vcf b/test/modhdr.expected.vcf new file mode 100644 index 000000000..bad663c7e --- /dev/null +++ b/test/modhdr.expected.vcf @@ -0,0 +1,4 @@ +##fileformat=VCFv4.3 +##FILTER= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO diff --git a/test/modhdr.vcf.gz b/test/modhdr.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..f97e06ab3dee21b1d51002f83957a02a8a90f6f4 GIT binary patch literal 156 zcmb2|=3rp}f&Xj_PR>jW^$e^Go?OiaA8dXei&=W*>lMuo!xDwute6j4kJ$7S4u0o; zv}4WTiaF8GSDd^eYU|W^LwN+jWmJG#w-%_3=CnO{Q@q|Pmodm=!mw15S6weJthUe$~XIcO? O$fN0$W?%*z1R? "$$opts{bin}/htsfile -c $$opts{path}/formatmissing.vcf"); test_cmd($opts, %args, out => "vcf_meta_meta.vcf", cmd => "$$opts{bin}/htsfile -c $$opts{path}/vcf_meta_meta.vcf"); + + # VCF file with contig IDX=1, simulating an edited BCF file + # See htslib issue 1534 + test_cmd($opts, %args, out => "modhdr.expected.vcf", + cmd => "$$opts{path}/test_view $$opts{path}/modhdr.vcf.gz chr22:1-2"); } sub write_multiblock_bgzf { diff --git a/test/test_view.c b/test/test_view.c index 416ffe98d..f33c1cdf0 100644 --- a/test/test_view.c +++ b/test/test_view.c @@ -224,7 +224,8 @@ int vcf_loop(int argc, char **argv, int optind, struct opts *opts, htsFile *in, hts_itr_t *iter; if ((iter = bcf_itr_querys(idx, h, argv[i])) == 0) { fprintf(stderr, "[E::%s] fail to parse region '%s'\n", __func__, argv[i]); - continue; + exit_code = 1; + break; } while ((r = bcf_itr_next(in, iter, b)) >= 0) { if (!opts->benchmark && bcf_write1(out, h, b) < 0) {