Skip to content

Commit

Permalink
Add hybrid formula handling
Browse files Browse the repository at this point in the history
  • Loading branch information
gdower committed Oct 11, 2024
1 parent c198742 commit 3a1b6e6
Show file tree
Hide file tree
Showing 7 changed files with 219 additions and 19 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
## [Unreleased]

## [0.0.2] - 2024-10-10

- Added named hybrid and hybrid formula handling
- Added original authorship and combination authorship handling
- Added et_al_cutoff parameter to authorship formatting

## [0.0.1] - 2024-03-27
Expand Down
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,25 @@ Get the scientific name without the Latin gender stem:
'Ursus arct'
```

Get the parsed name components for a hybrid formula:
```python
>>> result = gnparser('Isoetes lacustris × stricta Gay') # => Dictionary
>>> result.is_hybrid() # => Boolean
True
>>> result.hybrid() # => String
'HYBRID_FORMULA'
>>> result.normalized() # => String
'Isoetes lacustris × Isoetes stricta Gay'
>>> result.hybrid_formula_ranks() # => Array
['species', 'species']
>>> res.hybrid_formula_genera() # => Array
['Isoetes', 'Isoetes']
>>> res.hybrid_formula_species() # => Array
['lacustris', 'stricta']
>>> res.hybrid_formula_authorship() # => Array
['', 'Gay']
```

---
### Parse multiple scientific names
Parse multiple scientific names by separating them with `\r\n`:
Expand Down
2 changes: 1 addition & 1 deletion pygnparser/package_metadata.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "0.0.1"
__version__ = "0.0.2"
__title__ = "pygnparser"
__author__ = "Geoff Ower"
__license__ = "MIT"
107 changes: 96 additions & 11 deletions pygnparser/utils/result.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import warnings

class Result(dict):
def __init__(self, parsed_result):
super().__init__()
Expand Down Expand Up @@ -36,11 +38,19 @@ def canonical_simple(self):
def canonical_full(self):
return self._key('full', dict=self.canonical())


def authorship_details(self):
return self._key('authorship')
if self.hybrid() == 'HYBRID_FORMULA':
authorship = ['', '']
for i in range(2):
rank = next(iter(self.details()['hybridFormula'][i]))
if 'authorship' in self.details()['hybridFormula'][i][rank]:
authorship[i] = self.details()['hybridFormula'][i][rank]['authorship']
else:
authorship[i] = ''
else:
authorship = self._key('authorship')
return authorship


def authorship_verbatim(self):
return self._key('verbatim', dict=self.authorship_details())

Expand All @@ -51,8 +61,16 @@ def authorship_normalized(self):

def authorship_year(self):
return self._key('year', dict=self.authorship_details())



def is_hybrid(self):
return 'hybrid' in self


def hybrid(self):
return self._key('hybrid')


def page(self):
verbatim_authorship = self.authorship_verbatim()
if ':' in verbatim_authorship:
Expand Down Expand Up @@ -84,9 +102,13 @@ def _format_authorship(self, authorship_details, et_al_cutoff=4):
authorship += f' in {ex_authorship}'
return authorship


def authorship(self, et_al_cutoff=4):
authorship_details = self.authorship_details()

def authorship(self, et_al_cutoff=4, authorship_details=None):
if authorship_details is None:
if self.hybrid() == 'HYBRID_FORMULA':
warnings.warn('Warning: authorship() returns empty for hybrid formulas. Use hybrid_formula_authorship() instead.', UserWarning)
return ''
authorship_details = self.authorship_details()
authorship = ''
if authorship_details != '':
if 'originalAuth' in authorship_details:
Expand All @@ -99,6 +121,24 @@ def authorship(self, et_al_cutoff=4):
if 'combinationAuth' not in authorship_details and '(' in self.authorship_verbatim():
authorship = f'({authorship})'
return authorship


def original_authorship(self, et_al_cutoff=4):
authorship_details = self.authorship_details()
authorship = ''
if authorship_details != '':
if 'originalAuth' in authorship_details:
authorship = self._format_authorship(authorship_details['originalAuth'], et_al_cutoff)
return authorship


def combination_authorship(self, et_al_cutoff=4):
authorship_details = self.authorship_details()
authorship = ''
if authorship_details != '':
if 'combinationAuth' in authorship_details:
authorship = self._format_authorship(authorship_details['combinationAuth'], et_al_cutoff)
return authorship


def year(self):
Expand Down Expand Up @@ -149,22 +189,56 @@ def quality_warnings(self):
return self._key('qualityWarnings')


def species(self):
return self._key('species')


def genus(self):
return self._key('genus', dict=self.details()[self._details_rank()])


def hybrid_formula_ranks(self):
return [next(iter(self.details()['hybridFormula'][0])),
next(iter(self.details()['hybridFormula'][1]))]


def hybrid_formula_genera(self):
ranks = self.hybrid_formula_ranks()
return [self.details()['hybridFormula'][0][ranks[0]]['genus'],
self.details()['hybridFormula'][1][ranks[1]]['genus']]


def subgenus(self):
return self._key('subgenus', dict=self.details()[self._details_rank()])


def hybrid_formula_subgenera(self):
ranks = self.hybrid_formula_ranks()
return [self.details()['hybridFormula'][0][ranks[0]]['subgenus'],
self.details()['hybridFormula'][1][ranks[1]]['subgenus']]


def species(self):
return self._key('species', dict=self.details()[self._details_rank()])


def hybrid_formula_species(self):
ranks = self.hybrid_formula_ranks()
return [self.details()['hybridFormula'][0][ranks[0]]['species'],
self.details()['hybridFormula'][1][ranks[1]]['species']]


def hybrid_formula_authorship(self, et_al_cutoff=4):
ranks = self.hybrid_formula_ranks()
authorship = ['', '']
for i in range(2):
try:
if ranks[i] == 'infraspecies':
authorship_details = self.details()['hybridFormula'][i][ranks[i]]['infraspecies'][0]['authorship']
else:
authorship_details = self.details()['hybridFormula'][i][ranks[i]]['authorship']
authorship[i] = self.authorship(et_al_cutoff=et_al_cutoff, authorship_details=authorship_details)
except KeyError:
authorship[i] = ''
return authorship


def infraspecies_details(self):
return self._key('infraspecies', dict=self.details()[self._details_rank()])

Expand All @@ -177,6 +251,17 @@ def infraspecies(self):
return ''


def hybrid_formula_infraspecies(self):
ranks = self.hybrid_formula_ranks()
result = ['', '']
for i in range(2):
try:
result[i] = self.details()['hybridFormula'][i][ranks[i]]['infraspecies'][0]['value']
except KeyError:
pass
return result


def infraspecies_rank(self):
rank = ''
if self._details_rank() == 'infraspecies':
Expand Down
5 changes: 3 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

setup(
name="pyGNparser",
version="0.0.1",
version="0.0.2",
description="Python client for GNparser",
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down Expand Up @@ -45,7 +45,8 @@
"Programming Language :: Python",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11"
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12"
],
keywords = ['biodiversity', 'scientific names', 'parser', 'nomenclature', 'taxonomy', 'API', 'web-services', 'species', 'natural history', 'taxonomists', 'biologists', 'Global Names'],
)
Loading

0 comments on commit 3a1b6e6

Please sign in to comment.