From c44714058b8977f24f9932dc5bdb4b5fd0b808fe Mon Sep 17 00:00:00 2001 From: silastittes Date: Thu, 9 Jan 2025 11:43:12 -0800 Subject: [PATCH] adding assembly_name attribute --- stdpopsim/annotations.py | 4 ++++ stdpopsim/catalog/AraTha/annotations.py | 3 +++ stdpopsim/catalog/DroMel/annotations.py | 2 ++ stdpopsim/catalog/HomSap/annotations.py | 2 ++ stdpopsim/catalog/HomSap/genetic_maps.py | 5 +++++ stdpopsim/catalog/PhoSin/annotations.py | 2 ++ stdpopsim/genetic_maps.py | 2 ++ 7 files changed, 20 insertions(+) diff --git a/stdpopsim/annotations.py b/stdpopsim/annotations.py index 54dd15de8..71dfcc7c0 100644 --- a/stdpopsim/annotations.py +++ b/stdpopsim/annotations.py @@ -27,6 +27,9 @@ class Annotation: :vartype citations: list of :class:`.Citation` :ivar file_pattern: The pattern used to map individual chromosome id strings to files + :ivar assembly_name: The name of the genome assembly. + :vartype assembly_name: str + :ivar str assembly_name: The name of the assembly the annotation is based on """ id = attr.ib() @@ -40,6 +43,7 @@ class Annotation: file_pattern = attr.ib() annotation_source = attr.ib() annotation_type = attr.ib() + assembly_name = attr.ib() def __attrs_post_init__(self): self._cache = stdpopsim.CachedData( diff --git a/stdpopsim/catalog/AraTha/annotations.py b/stdpopsim/catalog/AraTha/annotations.py index 612fa3275..8e8e4e590 100644 --- a/stdpopsim/catalog/AraTha/annotations.py +++ b/stdpopsim/catalog/AraTha/annotations.py @@ -27,6 +27,7 @@ file_pattern="araport_exons_{id}.txt", annotation_source="araport11", annotation_type="exon", + assembly_name = "TAIR10" ) _species.add_annotations(_an) @@ -56,5 +57,7 @@ file_pattern="araport_CDS_{id}.txt", annotation_source="araport11", annotation_type="CDS", + assembly_name = "TAIR10" + ) _species.add_annotations(_an2) diff --git a/stdpopsim/catalog/DroMel/annotations.py b/stdpopsim/catalog/DroMel/annotations.py index 9f7c3abaa..b378bf7ac 100644 --- a/stdpopsim/catalog/DroMel/annotations.py +++ b/stdpopsim/catalog/DroMel/annotations.py @@ -28,6 +28,7 @@ file_pattern="flybase_exons_{id}.txt", annotation_source="FlyBase", annotation_type="exon", + assembly_name="BDGP6.32.51", ) _species.add_annotations(_an) @@ -57,5 +58,6 @@ file_pattern="flybase_CDS_{id}.txt", annotation_source="FlyBase", annotation_type="CDS", + assembly_name="BDGP6.32.51", ) _species.add_annotations(_an2) diff --git a/stdpopsim/catalog/HomSap/annotations.py b/stdpopsim/catalog/HomSap/annotations.py index ace4a53cd..bd494dacf 100644 --- a/stdpopsim/catalog/HomSap/annotations.py +++ b/stdpopsim/catalog/HomSap/annotations.py @@ -27,6 +27,7 @@ file_pattern="ensembl_havana_exons_{id}.txt", annotation_source="ensembl_havana", annotation_type="exon", + assembly_name = "GRCh38.p13" ) _species.add_annotations(_an) @@ -56,5 +57,6 @@ file_pattern="ensembl_havana_CDS_{id}.txt", annotation_source="ensembl_havana", annotation_type="CDS", + assembly_name = "GRCh38.p13", ) _species.add_annotations(_an2) diff --git a/stdpopsim/catalog/HomSap/genetic_maps.py b/stdpopsim/catalog/HomSap/genetic_maps.py index f21368919..300eb6686 100644 --- a/stdpopsim/catalog/HomSap/genetic_maps.py +++ b/stdpopsim/catalog/HomSap/genetic_maps.py @@ -31,6 +31,7 @@ sha256="80f22d9e6cb0e497074ed1bc277e765fa9d8e22f21b2f66c3b10286520f6b68f", file_pattern="genetic_map_GRCh37_chr{id}.txt", citations=[_hapmap2007.because(stdpopsim.CiteReason.GEN_MAP)], + assembly_name="GRCh37", ) _species.add_genetic_map(_gm) @@ -60,6 +61,7 @@ sha256="497512ed1c0f8a40e9aa13696049a9f8c3cb062e898921cfd7d85ce9d14c4baa", file_pattern="genetic_map_Hg38_chr{id}.txt", citations=[_hapmap2007.because(stdpopsim.CiteReason.GEN_MAP)], + assembly_name="GRCh38.p13", ) _species.add_genetic_map(_gm) @@ -90,6 +92,7 @@ reasons={stdpopsim.CiteReason.GEN_MAP}, ) ], + assembly_name="GRCh36", ) _species.add_genetic_map(_gm) @@ -131,6 +134,7 @@ reasons={stdpopsim.CiteReason.GEN_MAP}, ) ], + assembly_name="GRCh38.p13", ) _species.add_genetic_map(_gm) @@ -223,5 +227,6 @@ reasons={stdpopsim.CiteReason.GEN_MAP}, ) ], + assembly_name="GRCh38.p13", ) _species.add_genetic_map(_gm) diff --git a/stdpopsim/catalog/PhoSin/annotations.py b/stdpopsim/catalog/PhoSin/annotations.py index 55f2c1a9a..07dadf31a 100644 --- a/stdpopsim/catalog/PhoSin/annotations.py +++ b/stdpopsim/catalog/PhoSin/annotations.py @@ -27,6 +27,7 @@ file_pattern="Phocoena_sinus.mPhoSin1.pri.110_exon_{id}.txt", annotation_source="ensembl", annotation_type="exon", + assembly_name="mPhoSin1.pri", ) _species.add_annotations(_an) @@ -56,5 +57,6 @@ file_pattern="Phocoena_sinus.mPhoSin1.pri.110_CDS_{id}.txt", annotation_source="ensembl", annotation_type="CDS", + assembly_name="mPhoSin1.pri", ) _species.add_annotations(_an2) diff --git a/stdpopsim/genetic_maps.py b/stdpopsim/genetic_maps.py index c6837d5d4..e6669d669 100644 --- a/stdpopsim/genetic_maps.py +++ b/stdpopsim/genetic_maps.py @@ -35,6 +35,7 @@ def __init__( description=None, long_description=None, citations=None, + assembly_name=None, ): self.id = id self.species = species @@ -45,6 +46,7 @@ def __init__( self.file_pattern = file_pattern self.description = description self.citations = citations + self.assembly_name = assembly_name self._cache = stdpopsim.CachedData( namespace=f"genetic_maps/{self.species.id}/{id}",