Skip to content

Commit

Permalink
Merge pull request #180 from lldelisle/variantFastaNonRefChr
Browse files Browse the repository at this point in the history
Variant fasta non ref chr
  • Loading branch information
mdshw5 authored Oct 28, 2021
2 parents d08e813 + 7b7da95 commit e5ecb62
Show file tree
Hide file tree
Showing 8 changed files with 1,656,446 additions and 17 deletions.
7 changes: 5 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,11 @@ install:
before_install:
- sudo apt-get install -y python3.6 python3-pip
- env python3.6 -m pip install biopython requests
- env python3.6 tests/data/download_gene_fasta.py
script: nosetests --with-coverage --cover-package=pyfaidx
- ls tests/data/chr22*
- env --debug python3.6 tests/data/download_gene_fasta.py
- ls tests/data/chr22*
script:
- nosetests --with-coverage --cover-package=pyfaidx
deploy:
provider: pypi
user: mdshw5
Expand Down
32 changes: 24 additions & 8 deletions pyfaidx/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ class IndexNotFoundError(IOError):
"""Raised if read_fai cannot open the index file."""


class VcfIndexNotFoundError(IOError):
"""Raised if vcf cannot find a tbi file."""


class FastaNotFoundError(IOError):
"""Raised if the fasta file cannot be opened."""

Expand Down Expand Up @@ -1130,6 +1134,8 @@ def __init__(self,
self.vcf = vcf.Reader(filename=vcf_file)
else:
raise IOError("File {0} does not exist.".format(vcf_file))
if not os.path.exists(vcf_file + '.tbi'):
raise VcfIndexNotFoundError("File {0} has not tabix index.".format(vcf_file))
if sample is not None:
self.sample = sample
else:
Expand Down Expand Up @@ -1163,14 +1169,24 @@ def get_seq(self, name, start, end):
else:
seq_mut = list(seq.seq)
del seq.seq
var = self.vcf.fetch(name, start - 1, end)
for record in var:
if record.is_snp: # skip indels
sample = record.genotype(self.sample)
if sample.gt_type in self.gt_type and eval(self.filter):
alt = record.ALT[0]
i = (record.POS - 1) - (start - 1)
seq_mut[i:i + len(alt)] = str(alt)
try:
var = self.vcf.fetch(name, start - 1, end)
for record in var:
if record.is_snp: # skip indels
sample = record.genotype(self.sample)
if sample.gt_type in self.gt_type and eval(self.filter):
alt = record.ALT[0]
i = (record.POS - 1) - (start - 1)
seq_mut[i:i + len(alt)] = str(alt)
except ValueError as e: # Can be raised if name is not part of tabix for vcf
if self.vcf._tabix is not None and name not in self.vcf._tabix.contigs:
# The chromosome name is not part of the vcf
# The sequence returned is the same as the reference
pass
else:
# This is something else
raise e

# slice the list in case we added an MNP in last position
if self.faidx.as_raw:
return ''.join(seq_mut[:end - start + 1])
Expand Down
828,192 changes: 828,192 additions & 0 deletions tests/data/chr22.fasta

Large diffs are not rendered by default.

Loading

0 comments on commit e5ecb62

Please sign in to comment.