diff --git a/CHANGELOG.md b/CHANGELOG.md index 826f83c..894f095 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ ## [Unreleased] +## [0.0.5] - 2024-11-11 +- Added nomenclatural code parameter +- Added result methods for cultivars +- Disabled et_al_cutoff formatting by default +- Removed preserve_in_authorship parameter from authorship() because GNparser no longer normalizes `in` to `ex` + ## [0.0.4] - 2024-10-15 - Added preserve_in_authorship parameter to authorship() to optionally override normalization of `in` to `ex` diff --git a/README.md b/README.md index e75f405..0a928e6 100644 --- a/README.md +++ b/README.md @@ -105,6 +105,21 @@ True ['', 'Gay'] ``` +Parse a scientific name under a specified nomenclatural code: +```python +>>> result = gnparser('Malus domestica \'Fuji\'', code='cultivar') +>>> result.is_cultivar() # => Boolean +True +>>> result.genus() # => String +'Malus' +>>> result.species() # => String +'domestica' +>>> result.cultivar() # => String +'‘Fuji’' +>>> result.nomenclatural_code() # => String +'ICNCP' +``` + --- ### Parse multiple scientific names Parse multiple scientific names by separating them with `\r\n`: diff --git a/pygnparser/package_metadata.py b/pygnparser/package_metadata.py index 749d5bf..7e160bd 100644 --- a/pygnparser/package_metadata.py +++ b/pygnparser/package_metadata.py @@ -1,4 +1,4 @@ -__version__ = "0.0.4" +__version__ = "0.0.5" __title__ = "pygnparser" __author__ = "Geoff Ower" __license__ = "MIT" diff --git a/pygnparser/pygnparser/gnparser.py b/pygnparser/pygnparser/gnparser.py index 299ffea..888db47 100644 --- a/pygnparser/pygnparser/gnparser.py +++ b/pygnparser/pygnparser/gnparser.py @@ -1,11 +1,12 @@ from pygnparser.utils.pygnparser_utils import gnp_post, gnp_url -def gnparser(names, with_details='on', cultivars='off', diaereses='off'): +def gnparser(names, code='', with_details='on', cultivars='off', diaereses='off'): """ Parse scientific names :param names: [str] Human name(s) separated by \\r\\n + :param code: [str] Specify the nomenclatural code (bacterial, botanical, cultivar, or zoological) :param with_details: [str] Turn detailed information about the name on or off :param cultivars: [str] Turn cultivar handling on or off :param diaereses: [str] Turn diaereses handling on or off @@ -18,6 +19,7 @@ def gnparser(names, with_details='on', cultivars='off', diaereses='off'): url = gnp_url() args = { "names": names, + "code": code, "with_details": with_details, "cultivars": cultivars, "diaereses": diaereses, diff --git a/pygnparser/utils/result.py b/pygnparser/utils/result.py index 7e2f969..caf2d98 100644 --- a/pygnparser/utils/result.py +++ b/pygnparser/utils/result.py @@ -23,6 +23,10 @@ def parsed(self): return self._key('parsed') + def nomenclatural_code(self): + return self._key('nomenclaturalCode') + + def canonical(self): return self._key('canonical') @@ -70,6 +74,10 @@ def is_hybrid(self): return 'hybrid' in self + def is_cultivar(self): + return 'cultivar' in self + + def hybrid(self): return self._key('hybrid') @@ -82,8 +90,8 @@ def page(self): page = '' return page - - def _format_authorship(self, authorship_details, et_al_cutoff=4, preserve_in_authorship=False): + + def _format_authorship(self, authorship_details, et_al_cutoff=None): authorship_list = authorship_details['authors'] match len(authorship_list): case 0: @@ -93,23 +101,24 @@ def _format_authorship(self, authorship_details, et_al_cutoff=4, preserve_in_aut case 2: authorship = f'{authorship_list[0]} & {authorship_list[1]}' case _: - if len(authorship_list) >= et_al_cutoff: - authorship = ', '.join(authorship_list[:1]) + ' et al.' - else: + if et_al_cutoff is None or len(authorship_list) < et_al_cutoff: authorship = ', '.join(authorship_list[:-1]) + f' & {authorship_list[-1]}' + else: + authorship = ', '.join(authorship_list[:1]) + ' et al.' + if 'year' in authorship_details: year = self._key('year', dict=authorship_details['year']) authorship += f', {year}' if 'exAuthors' in authorship_details: ex_authorship = self._format_authorship(authorship_details['exAuthors'], et_al_cutoff) - if preserve_in_authorship and ' in ' in self.authorship_verbatim(): - authorship += f' in {ex_authorship}' - else: - authorship += f' ex {ex_authorship}' + authorship += f' ex {ex_authorship}' + if 'inAuthors' in authorship_details: + in_authorship = self._format_authorship(authorship_details['inAuthors'], et_al_cutoff) + authorship += f' in {in_authorship}' return authorship - def authorship(self, et_al_cutoff=4, authorship_details=None, preserve_in_authorship=False): + def authorship(self, et_al_cutoff=None, authorship_details=None): if authorship_details is None: if self.hybrid() == 'HYBRID_FORMULA': warnings.warn('Warning: authorship() returns empty for hybrid formulas. Use hybrid_formula_authorship() instead.', UserWarning) @@ -118,9 +127,9 @@ def authorship(self, et_al_cutoff=4, authorship_details=None, preserve_in_author authorship = '' if authorship_details != '': if 'originalAuth' in authorship_details: - authorship = self._format_authorship(authorship_details['originalAuth'], et_al_cutoff, preserve_in_authorship) + authorship = self._format_authorship(authorship_details['originalAuth'], et_al_cutoff) if 'combinationAuth' in authorship_details: - combination_authorship = self._format_authorship(authorship_details['combinationAuth'], et_al_cutoff, preserve_in_authorship) + combination_authorship = self._format_authorship(authorship_details['combinationAuth'], et_al_cutoff) authorship = f'({authorship}) {combination_authorship}' # handles zoological authorship @@ -129,21 +138,21 @@ def authorship(self, et_al_cutoff=4, authorship_details=None, preserve_in_author return authorship - def original_authorship(self, et_al_cutoff=4, preserve_in_authorship=False): + def original_authorship(self, et_al_cutoff=None): authorship_details = self.authorship_details() authorship = '' if authorship_details != '': if 'originalAuth' in authorship_details: - authorship = self._format_authorship(authorship_details['originalAuth'], et_al_cutoff, preserve_in_authorship) + authorship = self._format_authorship(authorship_details['originalAuth'], et_al_cutoff) return authorship - def combination_authorship(self, et_al_cutoff=4, preserve_in_authorship=False): + def combination_authorship(self, et_al_cutoff=None): authorship_details = self.authorship_details() authorship = '' if authorship_details != '': if 'combinationAuth' in authorship_details: - authorship = self._format_authorship(authorship_details['combinationAuth'], et_al_cutoff, preserve_in_authorship) + authorship = self._format_authorship(authorship_details['combinationAuth'], et_al_cutoff) return authorship @@ -231,13 +240,17 @@ def species(self): return self._key('species', dict=self.details()[self._details_rank()]) + def cultivar(self): + return self._key('cultivar', dict=self.details()[self._details_rank()]) + + def hybrid_formula_species(self): ranks = self.hybrid_formula_ranks() return [self.details()['hybridFormula'][0][ranks[0]]['species'], self.details()['hybridFormula'][1][ranks[1]]['species']] - def hybrid_formula_authorship(self, et_al_cutoff=4): + def hybrid_formula_authorship(self, et_al_cutoff=None): ranks = self.hybrid_formula_ranks() authorship = ['', ''] for i in range(2): diff --git a/setup.py b/setup.py index 2bfc3f7..11b5e95 100644 --- a/setup.py +++ b/setup.py @@ -12,14 +12,14 @@ setup( name="pyGNparser", - version="0.0.4", + version="0.0.5", description="Python client for GNparser", long_description=long_description, long_description_content_type="text/markdown", author="Geoff Ower", author_email="gdower@illinois.edu", url="http://github.com/gnames/pyGNparser", - download_url="https://github.com/gnames/pyGNparser/archive/refs/tags/v0.0.4.tar.gz", + download_url="https://github.com/gnames/pyGNparser/archive/refs/tags/v0.0.5.tar.gz", license="MIT", packages=find_packages(exclude=["test-*"]), install_requires=[ diff --git a/test/test-parse.py b/test/test-parse.py index d0263de..d912a8c 100644 --- a/test/test-parse.py +++ b/test/test-parse.py @@ -5,6 +5,41 @@ import warnings +@vcr.use_cassette("test/vcr_cassettes/test_default_code.yaml") +def test_default_code(): + res = gnparser('Puma concolor') + assert res.parsed() is True + assert res.nomenclatural_code() == '' + + +@vcr.use_cassette("test/vcr_cassettes/test_bacterial_code.yaml") +def test_bacterial_code(): + res = gnparser('Escherichia coli', code='bacterial') + assert res.parsed() is True + assert res.nomenclatural_code() == 'ICNP' + + +@vcr.use_cassette("test/vcr_cassettes/test_botanical_code.yaml") +def test_botanical_code(): + res = gnparser('Asimina triloba (L.) Dunal', code='botanical') + assert res.parsed() is True + assert res.nomenclatural_code() == 'ICN' + + +@vcr.use_cassette("test/vcr_cassettes/test_cultivar_code.yaml") +def test_cultivar_code(): + res = gnparser('Malus domestica \'Fuji\'', code='cultivar') + assert res.parsed() is True + assert res.nomenclatural_code() == 'ICNCP' + + +@vcr.use_cassette("test/vcr_cassettes/test_zoological_code.yaml") +def test_zoological_code(): + res = gnparser('Panthera leo (Linnaeus, 1758)', code='zoological') + assert res.parsed() is True + assert res.nomenclatural_code() == 'ICZN' + + @vcr.use_cassette("test/vcr_cassettes/test_parse_Aus_cus_Smith.yaml") def test_version(): res = gnparser('Aus bus cus (Smith, 1999)') @@ -251,28 +286,20 @@ def test_parse_1_author_no_brackets(): @vcr.use_cassette("test/vcr_cassettes/test_parse_in_original.yaml") def test_parse_in_original(): res = gnparser('Aus bus cus Smith in Richards, 1999') - assert res.normalized() == 'Aus bus cus Smith ex Richards 1999' - assert res.normalized(preserve_in_authorship=True) == 'Aus bus cus Smith in Richards 1999' - assert res.authorship() == 'Smith ex Richards, 1999' - assert res.authorship(preserve_in_authorship=True) == 'Smith in Richards, 1999' - assert res.authorship_normalized() == 'Smith ex Richards 1999' - assert res.authorship_normalized(preserve_in_authorship=True) == 'Smith in Richards 1999' - assert res.original_authorship() == 'Smith ex Richards, 1999' - assert res.original_authorship(preserve_in_authorship=True) == 'Smith in Richards, 1999' + assert res.normalized() == 'Aus bus cus Smith in Richards 1999' + assert res.authorship() == 'Smith in Richards, 1999' + assert res.authorship_normalized() == 'Smith in Richards 1999' + assert res.original_authorship() == 'Smith in Richards, 1999' assert res.combination_authorship() == '' @vcr.use_cassette("test/vcr_cassettes/test_parse_in_original_comb.yaml") def test_parse_in_original_comb(): res = gnparser('Aus bus cus (Smith in Richards, 1999) Ryan in Anderson, Smith, & Jones, 2000') - assert res.normalized(preserve_in_authorship=True) == 'Aus bus cus (Smith in Richards 1999) Ryan in Anderson, Smith & Jones 2000' - assert res.normalized() == 'Aus bus cus (Smith ex Richards 1999) Ryan ex Anderson, Smith & Jones 2000' - assert res.authorship(preserve_in_authorship=True) == '(Smith in Richards, 1999) Ryan in Anderson, Smith & Jones, 2000' - assert res.authorship() == '(Smith ex Richards, 1999) Ryan ex Anderson, Smith & Jones, 2000' - assert res.original_authorship(preserve_in_authorship=True) == 'Smith in Richards, 1999' - assert res.original_authorship() == 'Smith ex Richards, 1999' - assert res.combination_authorship(preserve_in_authorship=True) == 'Ryan in Anderson, Smith & Jones, 2000' - assert res.combination_authorship() == 'Ryan ex Anderson, Smith & Jones, 2000' + assert res.normalized() == 'Aus bus cus (Smith in Richards 1999) Ryan in Anderson, Smith & Jones 2000' + assert res.authorship() == '(Smith in Richards, 1999) Ryan in Anderson, Smith & Jones, 2000' + assert res.original_authorship() == 'Smith in Richards, 1999' + assert res.combination_authorship() == 'Ryan in Anderson, Smith & Jones, 2000' @vcr.use_cassette("test/vcr_cassettes/test_parse_Ablepharus_pannonicus.yaml") @@ -281,11 +308,11 @@ def test_parse_Ablepharus_pannonicus(): assert res.genus() == 'Ablepharus' assert res.species() == 'pannonicus' assert res.infraspecies() == '' - assert res.authorship() == 'Fitzinger ex Eversmann, 1823' - assert res.original_authorship() == 'Fitzinger ex Eversmann, 1823' + assert res.authorship() == 'Fitzinger in Eversmann, 1823' + assert res.original_authorship() == 'Fitzinger in Eversmann, 1823' assert res.combination_authorship() == '' assert res.page() == '145' - assert res.quality_warnings() == [{'quality': 4, 'warning': 'Unparsed tail'}, {'quality': 2, 'warning': 'Ex authors are not required (ICZN only)'}, {'quality': 2, 'warning': 'Year with page info'}] + assert res.quality_warnings() == [{'quality': 4, 'warning': 'Unparsed tail'}, {'quality': 2, 'warning': 'Year with page info'}, {'quality': 2, 'warning': '`in` authors are not required'}] assert res.tail().strip() == '(Nom. Nud., In Error)' assert res.is_hybrid() == False @@ -296,8 +323,8 @@ def test_parse_Aspidoscelis_neavesi(): assert res.genus() == 'Aspidoscelis' assert res.species() == 'neavesi' assert res.infraspecies() == '' - assert res.authorship() == 'Cole et al., 2014' - assert res.original_authorship() == 'Cole et al., 2014' + assert res.authorship() == 'Cole, Taylor, Baumann & Baumann, 2014' + assert res.original_authorship() == 'Cole, Taylor, Baumann & Baumann, 2014' assert res.combination_authorship() == '' assert res.page() == '' assert res.tail().strip() == '(Part)' @@ -346,17 +373,40 @@ def test_parse_Ablepharus_chernovi_ressli(): @vcr.use_cassette("test/vcr_cassettes/test_parse_et_al_default.yaml") def test_parse_et_al_default(): res = gnparser('Aus bus cus (Smith, Anderson, Jones, & Peters in Richards, Shultz, Anderson & Smith, 1999) Ryan in Anderson, Smith, & Jones, 2000') - assert res.authorship() == '(Smith et al. in Richards et al., 1999) Ryan in Anderson, Smith & Jones, 2000' - assert res.original_authorship() == 'Smith, Anderson, Jones, & Peters ex Richards, Shultz, Anderson & Smith, 1999' - assert res.combination_authorship() == 'Ryan ex Anderson, Smith & Jones, 2000' + assert res.authorship() == '(Smith, Anderson, Jones & Peters in Richards, Shultz, Anderson & Smith, 1999) Ryan in Anderson, Smith & Jones, 2000' + assert res.original_authorship() == 'Smith, Anderson, Jones & Peters in Richards, Shultz, Anderson & Smith, 1999' + assert res.combination_authorship() == 'Ryan in Anderson, Smith & Jones, 2000' + + +@vcr.use_cassette("test/vcr_cassettes/test_parse_et_al_6.yaml") +def test_parse_et_al_6(): + res = gnparser('Aus bus cus (Smith, Anderson, Jones, & Peters in Richards, Shultz, Anderson & Smith, 1999) Ryan in Anderson, Smith, & Jones, 2000') + assert res.authorship(et_al_cutoff=6) == '(Smith, Anderson, Jones & Peters in Richards, Shultz, Anderson & Smith, 1999) Ryan in Anderson, Smith & Jones, 2000' + assert res.original_authorship(et_al_cutoff=6) == 'Smith, Anderson, Jones & Peters in Richards, Shultz, Anderson & Smith, 1999' + assert res.combination_authorship(et_al_cutoff=6) == 'Ryan in Anderson, Smith & Jones, 2000' @vcr.use_cassette("test/vcr_cassettes/test_parse_et_al_5.yaml") -def test_parse_et_al_default(): +def test_parse_et_al_5(): + res = gnparser('Aus bus cus (Smith, Anderson, Jones, O\'Brian & Peters in Richards, Shultz, Anderson & Smith, 1999) Ryan in Anderson, Smith, & Jones, 2000') + assert res.authorship(et_al_cutoff=5) == '(Smith et al. in Richards, Shultz, Anderson & Smith, 1999) Ryan in Anderson, Smith & Jones, 2000' + assert res.original_authorship(et_al_cutoff=5) == 'Smith et al. in Richards, Shultz, Anderson & Smith, 1999' + assert res.combination_authorship(et_al_cutoff=5) == 'Ryan in Anderson, Smith & Jones, 2000' + +@vcr.use_cassette("test/vcr_cassettes/test_parse_et_al_4.yaml") +def test_parse_et_al_4(): res = gnparser('Aus bus cus (Smith, Anderson, Jones, O\'Brian & Peters in Richards, Shultz, Anderson & Smith, 1999) Ryan in Anderson, Smith, & Jones, 2000') - assert res.authorship(et_al_cutoff=5) == '(Smith et al. ex Richards, Shultz, Anderson & Smith, 1999) Ryan ex Anderson, Smith & Jones, 2000' - assert res.original_authorship(et_al_cutoff=5) == 'Smith et al. ex Richards, Shultz, Anderson & Smith, 1999' - assert res.combination_authorship(et_al_cutoff=5) == 'Ryan ex Anderson, Smith & Jones, 2000' + assert res.authorship(et_al_cutoff=4) == '(Smith et al. in Richards et al., 1999) Ryan in Anderson, Smith & Jones, 2000' + assert res.original_authorship(et_al_cutoff=4) == 'Smith et al. in Richards et al., 1999' + assert res.combination_authorship(et_al_cutoff=4) == 'Ryan in Anderson, Smith & Jones, 2000' + + +@vcr.use_cassette("test/vcr_cassettes/test_parse_et_al_3.yaml") +def test_parse_et_al_3(): + res = gnparser('Aus bus cus (Smith, Anderson, Jones, O\'Brian & Peters in Richards, Shultz, Anderson & Smith, 1999) Ryan in Anderson, Smith, & Jones, 2000') + assert res.authorship(et_al_cutoff=3) == '(Smith et al. in Richards et al., 1999) Ryan in Anderson et al., 2000' + assert res.original_authorship(et_al_cutoff=3) == 'Smith et al. in Richards et al., 1999' + assert res.combination_authorship(et_al_cutoff=3) == 'Ryan in Anderson et al., 2000' @vcr.use_cassette("test/vcr_cassettes/test_infraspecies_rank_on_species.yaml") @@ -367,6 +417,20 @@ def test_infraspecies_rank_on_species(): assert res.is_hybrid() == False +@vcr.use_cassette("test/vcr_cassettes/test_cultivar.yaml") +def test_cultivar(): + res = gnparser('Malus domestica \'Fuji\'', code='cultivar') + assert res.is_cultivar() == True + assert res.nomenclatural_code() == 'ICNCP' + assert res.normalized() == 'Malus domestica ‘Fuji’' + assert res.verbatim() == 'Malus domestica \'Fuji\'' + assert res.canonical_full() == 'Malus domestica ‘Fuji’' + assert res.genus() == 'Malus' + assert res.species() == 'domestica' + assert res.infraspecies() == '' + assert res.cultivar() == '‘Fuji’' + + @vcr.use_cassette("test/vcr_cassettes/test_hybrid_formula.yaml") def test_hybrid_formula(): res = gnparser('Isoetes lacustris x stricta Gay') @@ -378,7 +442,7 @@ def test_hybrid_formula(): assert res.hybrid_formula_species() == ['lacustris', 'stricta'] assert res.hybrid_formula_infraspecies() == ['', ''] assert res.hybrid_formula_authorship() == ['', 'Gay'] - with pytest.warns(UserWarning, match='Warning\: authorship\(\) returns empty for hybrid formulas. Use hybrid_formula_authorship\(\) instead.'): + with pytest.warns(UserWarning, match=re.escape('Warning: authorship() returns empty for hybrid formulas. Use hybrid_formula_authorship() instead.')): assert res.authorship() == '' #assert res.original_authorship() == '' #assert res.combination_authorship() == '' @@ -436,6 +500,39 @@ def test_named_hybrid2(): assert res.combination_authorship() == 'D. Don ex W. H. Baxter' +@vcr.use_cassette("test/vcr_cassettes/test_named_hybrid3.yaml") +def test_named_hybrid3(): + res = gnparser('Equisetum × trachyodon var. moorei (Newman) H. C. Watson & Syme') + assert res.is_hybrid() == True + assert res.hybrid() == 'NAMED_HYBRID' + assert res.normalized() == 'Equisetum × trachyodon var. moorei (Newman) H. C. Watson & Syme' + assert res.canonical_stemmed() == 'Equisetum trachyodon moore' + assert res.canonical_full() == 'Equisetum × trachyodon var. moorei' + assert res.canonical_simple() == 'Equisetum trachyodon moorei' + assert res.genus() == 'Equisetum' + assert res.species() == 'trachyodon' + assert res.infraspecies() == 'moorei' + assert res.infraspecies_rank() == 'var.' + assert res.authorship() == '(Newman) H. C. Watson & Syme' + assert res.original_authorship() == 'Newman' + assert res.combination_authorship() == 'H. C. Watson & Syme' + + +# should handle bad name without crashing +@vcr.use_cassette("test/vcr_cassettes/test_named_bad_hybrid.yaml") +def test_named_bad_hybrid(): + res = gnparser('xAndrorhiza x P. Delforge') + assert res.is_hybrid() == True + assert res.hybrid() == 'NAMED_HYBRID' + assert res.quality() == 4 + assert res.authorship() == '' + try: + res.normalized() + except IndexError as e: + pytest.fail(f"Unexpected error: {e}") + assert res.tail() == ' x P. Delforge' + + @vcr.use_cassette("test/vcr_cassettes/test_uninomial.yaml") def test_uninomial(): res = gnparser('Dennstaedtiaceae Lotsy') @@ -451,7 +548,7 @@ def test_uninomial(): # despite Microlepia being a genus, GNParser will treat it as a uninomial # unless it is combined with a specific epithet, so use res.uninomial() instead of res.genus() @vcr.use_cassette("test/vcr_cassettes/test_uninomial2.yaml") -def test_uninomial(): +def test_uninomial2(): res = gnparser('Microlepia C.Presl') assert res._details_rank() == 'uninomial' assert res.uninomial() == 'Microlepia' @@ -461,3 +558,8 @@ def test_uninomial(): assert res.authorship() == 'C. Presl' assert res.is_hybrid() == False + +@vcr.use_cassette("test/vcr_cassettes/test_verbatim_authorship.yaml") +def test_verbatim_authorship(): + res = gnparser('Equisetum × litorale f. arvensiforme (A. A. Eaton) Vict.') + assert res.authorship_verbatim() == '(A. A. Eaton) Vict.'