Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH Update drug categorization logic #51

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Changelog

## Unreleased
### Update drug categorization
- confers_resistance_to() now gets drugs for the whole AMR gene family. For example, OXA-19 previously only returned cephalosporin and penam, but now will also return oxacillin (from AMR gene family).
- Implementation of drugs_to_drug_classes() has also been fixed. Previously, the drug class was obtained from the superclasses of the drugs list passed without a thorough check if the drug class was the immediate child of 'antibiotic molecule'. These checks have now been put in place.
- drugs_to_drug_classes() also uses the 'has_part' ARO relationship now to get drug classes for antibiotic mixtures. In case of antibiotic mixtures, the drug classes of the drugs associated with 'has_part' are returned rather than 'antibiotic mixture' (ARO:3000707).
- 'antibiotic mixture' will not be reported as a drug class, rather the individual antibiotic classes making up the antibiotic mixture will be reported.

## 0.4.0 - 10 June

- Bundle a specific version of ARO with the package instead of downloading it from the internet (ensures reproducibility)
Expand Down
62 changes: 51 additions & 11 deletions argnorm/drug_categorization.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,29 @@
from . import lib

ARO = lib.get_aro_ontology()

confers_resistance_to_drug_class_rel = ARO.get_relationship('confers_resistance_to_drug_class')
confers_resistance_to_antibiotic_rel = ARO.get_relationship('confers_resistance_to_antibiotic')
has_part_rel = ARO.get_relationship('has_part')

def _get_drug_classes(super_classes_list: List[str]) -> List[str]:
"""
- Helper function to traverse up and record immediate child of 'antibiotic molecule' in ARO
- Traverses up ARO until immediate child of 'antibiotic molecule' class reached and 'antibiotic mixture' class not reached
- antibiotic molecule -> ARO:1000003
- antibiotic mixture -> ARO:3000707
"""
output = []

for super_class in super_classes_list:
super_class_classes = list(super_class.superclasses(1))
antibiotic_molecule_node = [ARO['ARO:1000003']]

# checking if immediate child of 'antibiotic molecule' is reached & it is not 'antibiotic mixture'
if super_class_classes[1:] == antibiotic_molecule_node and super_class.id != 'ARO:3000707':
output.append(super_class.id)

return output

def confers_resistance_to(aro_num: str) -> List[str]:
'''
Expand All @@ -16,18 +37,27 @@ def confers_resistance_to(aro_num: str) -> List[str]:
target (list[str]):
A list with ARO number of the drugs/antibiotics to which the input gene confers resistance to.
'''

antibiotic_molecule_node = [ARO['ARO:1000003'], ARO['ARO:1000001']]
# some gene superclasses can map to drugs which are immediate children of 'antibiotic molecule'
# only use these if no other drugs can be found, as this information will be present in
# drugs to drug classes
backup_drugs = []
target = set()

for term in ARO[aro_num].superclasses():
for drug in term.relationships.get(confers_resistance_to_drug_class_rel, []):
target.add(drug.id)
if list(ARO[drug.id].superclasses())[1:] == antibiotic_molecule_node:
backup_drugs.append(drug.id)
else:
target.add(drug.id)

for drug in term.relationships.get(confers_resistance_to_antibiotic_rel, []):
target.add(drug.id)
if list(ARO[drug.id].superclasses())[1:] == antibiotic_molecule_node:
backup_drugs.append(drug.id)
else:
target.add(drug.id)

if target:
break
if not target:
target.update(backup_drugs)

return sorted(target)

Expand All @@ -46,15 +76,25 @@ def drugs_to_drug_classes(drugs_list: List[str]) -> List[str]:
to the function in the drugs_list.
'''
drug_classes = []
temp_drug_classes = []

for drug in drugs_list:
drug_instance = ARO[drug]
drug_instance_superclasses = list(drug_instance.superclasses())
superclasses_len = len(drug_instance_superclasses)
temp_drug_classes += _get_drug_classes(drug_instance_superclasses)

has_part_nodes = drug_instance.relationships.get(has_part_rel, [])
for has_part_node in has_part_nodes:
has_part_node_superclasses = list(has_part_node.superclasses())[1:]

for super_class in has_part_node_superclasses:
super_class_categories = list(super_class.superclasses())
temp_drug_classes += _get_drug_classes(super_class_categories)

if temp_drug_classes == []:
temp_drug_classes.append(drug_instance.id)

if superclasses_len >= 3:
drug_classes.append(drug_instance_superclasses[superclasses_len - 3].id)
else:
drug_classes.append(drug_instance_superclasses[0].id)
drug_classes += list(set(temp_drug_classes))
temp_drug_classes = []

return sorted(drug_classes)
Loading
Loading