From 6754bc19c514d2c6ef3ceb14e3b783cbc488962b Mon Sep 17 00:00:00 2001 From: Huber Date: Mon, 18 Sep 2023 14:43:47 -0400 Subject: [PATCH] Adding GorGor species and demography Update species name --- stdpopsim/catalog/GorGor/__init__.py | 5 + .../catalog/GorGor/demographic_models.py | 260 ++++++++++++++++++ stdpopsim/catalog/GorGor/genome_data.py | 32 +++ stdpopsim/catalog/GorGor/species.py | 166 +++++++++++ stdpopsim/catalog/ensembl_info.py | 2 +- tests/test_GorGor.py | 105 +++++++ 6 files changed, 569 insertions(+), 1 deletion(-) create mode 100644 stdpopsim/catalog/GorGor/__init__.py create mode 100644 stdpopsim/catalog/GorGor/demographic_models.py create mode 100644 stdpopsim/catalog/GorGor/genome_data.py create mode 100644 stdpopsim/catalog/GorGor/species.py create mode 100644 tests/test_GorGor.py diff --git a/stdpopsim/catalog/GorGor/__init__.py b/stdpopsim/catalog/GorGor/__init__.py new file mode 100644 index 000000000..92f2125a8 --- /dev/null +++ b/stdpopsim/catalog/GorGor/__init__.py @@ -0,0 +1,5 @@ +""" +Catalog definitions for GorGor (Ensembl ID='gorilla_gorilla') +""" +from . import species # noqa: F401 +from . import demographic_models # noqa: F401 diff --git a/stdpopsim/catalog/GorGor/demographic_models.py b/stdpopsim/catalog/GorGor/demographic_models.py new file mode 100644 index 000000000..7921667c2 --- /dev/null +++ b/stdpopsim/catalog/GorGor/demographic_models.py @@ -0,0 +1,260 @@ +import msprime +import stdpopsim + +_species = stdpopsim.get_species("GorGor") + +########################################################### +# +# Demographic models +# +########################################################### + +_pawar2023 = stdpopsim.Citation( + author="Pawar et al. 2023", + year=2023, + doi="https://doi.org/10.1038/s41559-023-02145-2", + reasons={stdpopsim.CiteReason.DEM_MODEL}, +) + + +def _gorilla_ghost(): + id = "GorillaGhost_5P23" + description = "Ghost admixture in eastern gorillas" + long_description = """ + Demographic model of ghost admixture into eastern gorillas + from Pawar et al. (2023) Fig. 2A and Supp. Table 1. + This model simulates five populations: + mountain gorillas, eastern lowland gorillas, western lowland gorillas, + cross river gorillas, and a extinct ghost lineage. The ghost admixture + event is modelled as a 2.47% pulse from the ghost lineage to a + population ancestral to eastern gorillas. Migration events among + eastern and western lowland gorillas are modelled as single generation + pulses. Population size changes are also modelled. + """ + + populations = [ + stdpopsim.Population( + id="cross_river", description="Contemporary Cross River Gorillas" + ), + stdpopsim.Population( + id="western_lowland", description="Contemporary Western Lowland Gorillas" + ), + stdpopsim.Population( + id="eastern_lowland", description="Contemporary Eastern Lowland Gorillas" + ), + stdpopsim.Population( + id="mountain", description="Contemporary Mountain Gorillas" + ), + stdpopsim.Population( + id="ghost", description="Extinct ghost lineage", sampling_time=None + ), + ] + + citations = [_pawar2023] + generation_time = 19 + mutation_rate = 1.235e-8 + + N_cross_river = 14558 + N_western_lowland = 64749 + N_eastern_lowland = 20894 + N_mountain = 2158 + + # The effective population size of the extinct ghost population is + # difficult to estmiate due to the low admixture proportion into Eastern + # Gorillas. Here, it is somewhat arbitrarly set to 25,000 individuals. + # See also: + # https://github.com/h-pawar/gor_ghost_introg/blob/main/4.Demog_ABC_model_comparison/4.1.model_comparison_simulations/scripts/mc.ghoste.12apr22.R + N_ghost = 25000 + + # ********************* + + N_bottleneck_ELG = 243 + N_anc_ELG = 21982 + N_bottleneck_MG = 115 + N_anc_MG = 3009 + N_anc_EG = 5325 + N_change_WLG = 48288 + N_anc_WG = 98135 + N_gor_species_split = 14364 + N_anc_ghost_gor = 35381 + + m_ghost_EG = 0.02477 + m_WLG_EG = 0.002768 + m_EG_WLG = 0.00827 + + T_EG_subspecies_split = 15.048 * 1000 / generation_time + T_ghost_introg_end = 38.281 * 1000 / generation_time + T_ne_change_WLG = 40.896 * 1000 / generation_time + T_WG_subspecies_split = 454.313 * 1000 / generation_time + T_gor_species_split = 965.481 * 1000 / generation_time + T_ghost_gor_split = 3421.360 * 1000 / generation_time + + T_ne_recovery_ELG = 30 / generation_time + T_ne_bottleneck_ELG = 7.220 * 1000 / generation_time + T_ne_recovery_MG = 9.120 * 1000 / generation_time + T_ne_bottleneck_MG = 9.310 * 1000 / generation_time + T_extant_admix_end = 34.0 * 1000 / generation_time + T_extant_admix_start = 34.0019 * 1000 / generation_time + T_ghost_introg_start = 38.300 * 1000 / generation_time + + population_configurations = [ + msprime.PopulationConfiguration( + initial_size=N_cross_river, metadata=populations[0].asdict() + ), + msprime.PopulationConfiguration( + initial_size=N_western_lowland, metadata=populations[1].asdict() + ), + msprime.PopulationConfiguration( + initial_size=N_eastern_lowland, metadata=populations[2].asdict() + ), + msprime.PopulationConfiguration( + initial_size=N_mountain, metadata=populations[3].asdict() + ), + msprime.PopulationConfiguration( + initial_size=N_ghost, metadata=populations[4].asdict() + ), + ] + + migration_matrix = [ + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + ] + + demographic_events = [ + # ELG bottleneck + msprime.PopulationParametersChange( + time=T_ne_recovery_ELG, + initial_size=N_bottleneck_ELG, + growth_rate=0.0, + population_id=2, + ), + msprime.PopulationParametersChange( + time=T_ne_bottleneck_ELG, + initial_size=N_anc_ELG, + growth_rate=0.0, + population_id=2, + ), + # MG bottleneck + msprime.PopulationParametersChange( + time=T_ne_recovery_MG, + initial_size=N_bottleneck_MG, + growth_rate=0.0, + population_id=3, + ), + msprime.PopulationParametersChange( + time=T_ne_bottleneck_MG, + initial_size=N_anc_MG, + growth_rate=0.0, + population_id=3, + ), + # EG subspecies split + msprime.MassMigration( + time=T_EG_subspecies_split, + source=2, + destination=3, + proportion=1.0, + ), + msprime.PopulationParametersChange( + time=T_EG_subspecies_split, + initial_size=N_anc_EG, + growth_rate=0.0, + population_id=3, + ), + # Admixture between EG and WLG + msprime.MigrationRateChange( + time=T_extant_admix_end, + rate=m_EG_WLG, + matrix_index=(1, 3), + ), + msprime.MigrationRateChange( + time=T_extant_admix_end, + rate=m_WLG_EG, + matrix_index=(3, 1), + ), + msprime.MigrationRateChange( + time=T_extant_admix_start, + rate=0, + matrix_index=(1, 3), + ), + msprime.MigrationRateChange( + time=T_extant_admix_start, + rate=0, + matrix_index=(3, 1), + ), + # Ghost admixture into EG + msprime.MigrationRateChange( + time=T_ghost_introg_end, + rate=m_ghost_EG, + matrix_index=(3, 4), + ), + msprime.MigrationRateChange( + time=T_ghost_introg_start, + rate=0, + matrix_index=(3, 4), + ), + # Ancestral size change WLG + msprime.PopulationParametersChange( + time=T_ne_change_WLG, + initial_size=N_change_WLG, + growth_rate=0.0, + population_id=1, + ), + # WG subspecies split + msprime.MassMigration( + time=T_WG_subspecies_split, + source=0, + destination=1, + proportion=1.0, + ), + msprime.PopulationParametersChange( + time=T_WG_subspecies_split, + initial_size=N_anc_WG, + growth_rate=0.0, + population_id=1, + ), + # Gorilla species split + msprime.MassMigration( + time=T_gor_species_split, + source=1, + destination=3, + proportion=1.0, + ), + msprime.PopulationParametersChange( + time=T_gor_species_split, + initial_size=N_gor_species_split, + growth_rate=0.0, + population_id=3, + ), + # Ghost Gorilla split + msprime.MassMigration( + time=T_ghost_gor_split, + source=3, + destination=4, + proportion=1.0, + ), + msprime.PopulationParametersChange( + time=T_ghost_gor_split, + initial_size=N_anc_ghost_gor, + growth_rate=0.0, + population_id=4, + ), + ] + + return stdpopsim.DemographicModel( + id=id, + description=description, + long_description=long_description, + populations=populations, + citations=citations, + generation_time=generation_time, + mutation_rate=mutation_rate, + population_configurations=population_configurations, + migration_matrix=migration_matrix, + demographic_events=demographic_events, + ) + + +_species.add_demographic_model(_gorilla_ghost()) diff --git a/stdpopsim/catalog/GorGor/genome_data.py b/stdpopsim/catalog/GorGor/genome_data.py new file mode 100644 index 000000000..7cac95805 --- /dev/null +++ b/stdpopsim/catalog/GorGor/genome_data.py @@ -0,0 +1,32 @@ +# File autogenerated from Ensembl REST API. Do not edit. +data = { + "assembly_accession": "GCA_000151905.3", + "assembly_name": "gorGor4", + "chromosomes": { + "1": {"length": 228908639, "synonyms": []}, + "2A": {"length": 107640188, "synonyms": []}, + "2B": {"length": 135448346, "synonyms": []}, + "3": {"length": 201391403, "synonyms": []}, + "4": {"length": 203093366, "synonyms": []}, + "5": {"length": 165317712, "synonyms": []}, + "6": {"length": 173266796, "synonyms": []}, + "7": {"length": 159110946, "synonyms": []}, + "8": {"length": 146757320, "synonyms": []}, + "9": {"length": 121655618, "synonyms": []}, + "10": {"length": 148642438, "synonyms": []}, + "11": {"length": 135235416, "synonyms": []}, + "12": {"length": 133456746, "synonyms": []}, + "13": {"length": 97154659, "synonyms": []}, + "14": {"length": 90977943, "synonyms": []}, + "15": {"length": 80890724, "synonyms": []}, + "16": {"length": 81384781, "synonyms": []}, + "17": {"length": 95888799, "synonyms": []}, + "18": {"length": 78004504, "synonyms": []}, + "19": {"length": 57935654, "synonyms": []}, + "20": {"length": 63231202, "synonyms": []}, + "21": {"length": 37891338, "synonyms": []}, + "22": {"length": 34650175, "synonyms": []}, + "X": {"length": 156331669, "synonyms": []}, + "MT": {"length": 16412, "synonyms": []}, + }, +} diff --git a/stdpopsim/catalog/GorGor/species.py b/stdpopsim/catalog/GorGor/species.py new file mode 100644 index 000000000..ea203de45 --- /dev/null +++ b/stdpopsim/catalog/GorGor/species.py @@ -0,0 +1,166 @@ +import stdpopsim +from . import genome_data + +_recombination_rate = dict() +_mutation_rate = dict() +_ploidy = dict() + +# genome-wide recombination rate +# associated with all recombining chromosomes +_Ne = 25200 +_mean_recombination_rate = 1.193e-08 +_recombination_rate_data = { + "1": _mean_recombination_rate, + "2A": _mean_recombination_rate, + "2B": _mean_recombination_rate, + "3": _mean_recombination_rate, + "4": _mean_recombination_rate, + "5": _mean_recombination_rate, + "6": _mean_recombination_rate, + "7": _mean_recombination_rate, + "8": _mean_recombination_rate, + "9": _mean_recombination_rate, + "10": _mean_recombination_rate, + "11": _mean_recombination_rate, + "12": _mean_recombination_rate, + "13": _mean_recombination_rate, + "14": _mean_recombination_rate, + "15": _mean_recombination_rate, + "16": _mean_recombination_rate, + "17": _mean_recombination_rate, + "18": _mean_recombination_rate, + "19": _mean_recombination_rate, + "20": _mean_recombination_rate, + "21": _mean_recombination_rate, + "22": _mean_recombination_rate, + "X": _mean_recombination_rate, + "MT": 0, +} + +# genome-wide average mutation rate +# associated with all chromosomes +_mean_mutation_rate = 1.235e-8 +_mutation_rate = { + "1": _mean_mutation_rate, + "2A": _mean_mutation_rate, + "2B": _mean_mutation_rate, + "3": _mean_mutation_rate, + "4": _mean_mutation_rate, + "5": _mean_mutation_rate, + "6": _mean_mutation_rate, + "7": _mean_mutation_rate, + "8": _mean_mutation_rate, + "9": _mean_mutation_rate, + "10": _mean_mutation_rate, + "11": _mean_mutation_rate, + "12": _mean_mutation_rate, + "13": _mean_mutation_rate, + "14": _mean_mutation_rate, + "15": _mean_mutation_rate, + "16": _mean_mutation_rate, + "17": _mean_mutation_rate, + "18": _mean_mutation_rate, + "19": _mean_mutation_rate, + "20": _mean_mutation_rate, + "21": _mean_mutation_rate, + "22": _mean_mutation_rate, + "X": _mean_mutation_rate, + "MT": 0, +} + +# species ploidy and chromosome-specific ploidy +_species_ploidy = 2 +_ploidy = { + "1": _species_ploidy, + "2A": _species_ploidy, + "2B": _species_ploidy, + "3": _species_ploidy, + "4": _species_ploidy, + "5": _species_ploidy, + "6": _species_ploidy, + "7": _species_ploidy, + "8": _species_ploidy, + "9": _species_ploidy, + "10": _species_ploidy, + "11": _species_ploidy, + "12": _species_ploidy, + "13": _species_ploidy, + "14": _species_ploidy, + "15": _species_ploidy, + "16": _species_ploidy, + "17": _species_ploidy, + "18": _species_ploidy, + "19": _species_ploidy, + "20": _species_ploidy, + "21": _species_ploidy, + "22": _species_ploidy, + "X": _species_ploidy, + "MT": 1, +} + +_chromosomes = [] +for name, data in genome_data.data["chromosomes"].items(): + _chromosomes.append( + stdpopsim.Chromosome( + id=name, + length=data["length"], + synonyms=data["synonyms"], + mutation_rate=_mutation_rate[name], + recombination_rate=_recombination_rate_data[name], + ploidy=_ploidy[name], + ) + ) + +_genome = stdpopsim.Genome( + chromosomes=_chromosomes, + assembly_name=genome_data.data["assembly_name"], + assembly_accession=genome_data.data["assembly_accession"], + citations=[ + stdpopsim.Citation( + author="Besenbacher et al.", + year=2019, + doi="https://doi.org/10.1038/s41559-018-0778-x", + reasons={stdpopsim.CiteReason.MUT_RATE}, + ), + stdpopsim.Citation( + author="Stevison et al.", + year=2015, + doi="https://doi.org/10.1093/molbev/msv331", + reasons={stdpopsim.CiteReason.REC_RATE}, + ), + stdpopsim.Citation( + doi="https://doi.org/10.1126/science.aae0344", + year=2016, + author="Gordon et al.", + reasons={stdpopsim.CiteReason.ASSEMBLY}, + ), + ], +) +stdpopsim.utils.append_common_synonyms(_genome) + +_species = stdpopsim.Species( + id="GorGor", + ensembl_id="gorilla_gorilla", + name="Gorilla gorilla", + common_name="Gorilla", + genome=_genome, + generation_time=19.0, + population_size=_Ne, + ploidy=_species_ploidy, + citations=[ + stdpopsim.Citation( + doi="https://doi.org/10.1038/s41559-018-0778-x", + year=2019, + author="Besenbacher et al.", + reasons={stdpopsim.CiteReason.GEN_TIME}, + ), + stdpopsim.Citation( + doi="https://doi.org/10.1534/genetics.166.3.1375", + year=2004, + author="Yu et al.", + reasons={stdpopsim.CiteReason.POP_SIZE}, + ), + ], +) + +stdpopsim.register_species(_species) diff --git a/stdpopsim/catalog/ensembl_info.py b/stdpopsim/catalog/ensembl_info.py index 06d064d58..9453a2521 100644 --- a/stdpopsim/catalog/ensembl_info.py +++ b/stdpopsim/catalog/ensembl_info.py @@ -1,2 +1,2 @@ # File autogenerated from Ensembl REST API. Do not edit. -release = 103 +release = 110 diff --git a/tests/test_GorGor.py b/tests/test_GorGor.py new file mode 100644 index 000000000..30f968a11 --- /dev/null +++ b/tests/test_GorGor.py @@ -0,0 +1,105 @@ +import pytest + +import stdpopsim +from tests import test_species + + +class TestSpeciesData(test_species.SpeciesTestBase): + + species = stdpopsim.get_species("GorGor") + + def test_ensembl_id(self): + assert self.species.ensembl_id == "gorilla_gorilla" + + def test_name(self): + assert self.species.name == "Gorilla gorilla gorilla" + + def test_common_name(self): + assert self.species.common_name == "Gorilla" + + # QC Tests. These tests are performed by another contributor + # independently referring to the citations provided in the + # species definition, filling in the appropriate values + # and deleting the pytest "skip" annotations. + @pytest.mark.skip("Population size QC not done yet") + def test_qc_population_size(self): + assert self.species.population_size == -1 + + @pytest.mark.skip("Generation time QC not done yet") + def test_qc_generation_time(self): + assert self.species.generation_time == -1 + + +class TestGenomeData(test_species.GenomeTestBase): + + genome = stdpopsim.get_species("GorGor").genome + + @pytest.mark.skip("Recombination rate QC not done yet") + @pytest.mark.parametrize( + ["name", "rate"], + { + "1": -1, + "2A": -1, + "2B": -1, + "3": -1, + "4": -1, + "5": -1, + "6": -1, + "7": -1, + "8": -1, + "9": -1, + "10": -1, + "11": -1, + "12": -1, + "13": -1, + "14": -1, + "15": -1, + "16": -1, + "17": -1, + "18": -1, + "19": -1, + "20": -1, + "21": -1, + "22": -1, + "X": -1, + "MT": -1, + }.items(), + ) + def test_recombination_rate(self, name, rate): + assert rate == pytest.approx( + self.genome.get_chromosome(name).recombination_rate + ) + + @pytest.mark.skip("Mutation rate QC not done yet") + @pytest.mark.parametrize( + ["name", "rate"], + { + "1": -1, + "2A": -1, + "2B": -1, + "3": -1, + "4": -1, + "5": -1, + "6": -1, + "7": -1, + "8": -1, + "9": -1, + "10": -1, + "11": -1, + "12": -1, + "13": -1, + "14": -1, + "15": -1, + "16": -1, + "17": -1, + "18": -1, + "19": -1, + "20": -1, + "21": -1, + "22": -1, + "X": -1, + "MT": -1, + }.items(), + ) + def test_mutation_rate(self, name, rate): + assert rate == pytest.approx(self.genome.get_chromosome(name).mutation_rate)