Skip to content

Commit

Permalink
check allele codes before trying to write pgen
Browse files Browse the repository at this point in the history
  • Loading branch information
aryarm authored Dec 27, 2024
1 parent 72a5f88 commit f04ec2b
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 0 deletions.
3 changes: 3 additions & 0 deletions haptools/data/genotypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1590,6 +1590,7 @@ def write(self):
end = start + chunks
if end > len(self.variants):
end = len(self.variants)
self.log.debug(f"Writing variant #{start} through variant #{end}")
size = end - start
try:
missing = np.ascontiguousarray(
Expand All @@ -1606,6 +1607,8 @@ def write(self):
"You don't have enough memory to write these genotypes! Try"
" specifying a value to the chunk_size parameter, instead"
) from e
if not np.all(allele_cts <= max_allele_ct):
raise ValueError("Some variants have more alleles than expected")
# convert any missing genotypes to -9
subset_data[missing] = -9
# finally, append the genotypes to the PGEN file
Expand Down
1 change: 1 addition & 0 deletions haptools/sim_genotype.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ def output_vcf(
vcf.read()
else:
vcf.read(region=f"{region['chr']}:{region['start']}-{region['end']}")
vcf.check_missing()

log.debug(f"Read in variants from {variant_file}")

Expand Down

0 comments on commit f04ec2b

Please sign in to comment.