From 1152d8d508bb3eec43f85434db6c5d886764bebe Mon Sep 17 00:00:00 2001 From: Tim Millar Date: Thu, 8 Oct 2020 02:34:50 +1300 Subject: [PATCH] Add fill argument to Genotype.array (#173) --- cyvcf2/cyvcf2.pyx | 9 +++++---- cyvcf2/tests/test_reader.py | 6 ++++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/cyvcf2/cyvcf2.pyx b/cyvcf2/cyvcf2.pyx index 8af5383..9669705 100644 --- a/cyvcf2/cyvcf2.pyx +++ b/cyvcf2/cyvcf2.pyx @@ -1024,13 +1024,14 @@ cdef class Genotypes(object): result.append((v >> 1) - 1) return result - def array(Genotypes self): + def array(Genotypes self, int fill=-2): """ array returns an int16 numpy array of shape n_samples, (ploidy + 1). The last column indicates phased (1 is phased, 0 is unphased). The other columns indicate the alleles, e.g. [0, 1, 1] is 0|1. - Unknown alleles are represented by -1 and mixed-ploidy arrays - are padded with -2 to indicate non-alleles. + Unknown alleles are represented by -1. + If a mixture of ploidy levels are present then the array is padded + with the `fill` value (default = -2) to indicate non-alleles. """ cdef np.ndarray[np.int16_t, ndim=2] to_return = np.zeros((self.n_samples, self.ploidy + 1), dtype=np.int16) @@ -1044,7 +1045,7 @@ cdef class Genotypes(object): for allele in range(self.ploidy): raw = self._raw[ind * self.ploidy + allele] if raw == bcf_int32_vector_end: - to_return[ind, allele] = -2 + to_return[ind, allele] = fill else: to_return[ind, allele] = (raw >> 1) - 1 to_return[ind, self.ploidy] = (self._raw[ind * self.ploidy + 1] & 1) == 1 diff --git a/cyvcf2/tests/test_reader.py b/cyvcf2/tests/test_reader.py index c46cc39..00b0d23 100644 --- a/cyvcf2/tests/test_reader.py +++ b/cyvcf2/tests/test_reader.py @@ -841,6 +841,12 @@ def test_access_genotype_array(): np.array([[-1, -2, 1], [0, 2, 1]], dtype=np.int16) ) + # test fill value + np.testing.assert_array_equal( + v.genotype.array(fill=-9), + np.array([[-1, -9, 1], [0, 2, 1]], dtype=np.int16) + ) + def test_alt_homozygous_gt(): vcf = VCF(os.path.join(HERE, "test-multiallelic-homozygous-alt.vcf.gz")) assert vcf is not None