Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH Update drug categorization logic #51

Closed
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 53 additions & 12 deletions argnorm/drug_categorization.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,30 @@
from . import lib

ARO = lib.get_aro_ontology()

confers_resistance_to_drug_class_rel = ARO.get_relationship('confers_resistance_to_drug_class')
confers_resistance_to_antibiotic_rel = ARO.get_relationship('confers_resistance_to_antibiotic')
has_part_rel = ARO.get_relationship('has_part')
is_small_inhibitor_molecule_rel = ARO.get_relationship('is_small_molecule_inhibitor')

def navigate_superclasses(super_classes_list: List[str]) -> List[str]:
"""
- Helper function to traverse up and record superclasses in ARO
- Traverses up ARO until 'antibiotic molecule' class reached and 'antibiotic mixture' class not reached
- antibiotic molecule -> ARO:1000003
- antibiotic mixture -> ARO:3000707
"""
output = []

for super_class in super_classes_list:
super_class_classes = list(super_class.superclasses(1))
antibiotic_molecule_node = [ARO['ARO:1000003']]

# checking if immediate child of 'antibiotic molecule' is reached & it is not 'antibiotic mixture'
if super_class_classes[1:] == antibiotic_molecule_node and super_class.id != 'ARO:3000707':
output.append(super_class.id)

return output

def confers_resistance_to(aro_num: str) -> List[str]:
'''
Expand All @@ -16,20 +38,29 @@ def confers_resistance_to(aro_num: str) -> List[str]:
target (list[str]):
A list with ARO number of the drugs/antibiotics to which the input gene confers resistance to.
'''

antibiotic_molecule_node = [ARO['ARO:1000003'], ARO['ARO:1000001']]
# some gene superclasses can map to drugs which are immediate children of 'antibiotic molecule'
# only use these if no other drugs can be found, as this information will be present in
# drugs to drug classes
backup_drugs = []
target = set()

for term in ARO[aro_num].superclasses():
for drug in term.relationships.get(confers_resistance_to_drug_class_rel, []):
target.add(drug.id)
if list(ARO[drug.id].superclasses())[1:] == antibiotic_molecule_node:
backup_drugs.append(drug.id)
else:
target.add(drug.id)

for drug in term.relationships.get(confers_resistance_to_antibiotic_rel, []):
target.add(drug.id)
if list(ARO[drug.id].superclasses())[1:] == antibiotic_molecule_node:
backup_drugs.append(drug.id)
else:
target.add(drug.id)

if target:
break
if target == set():
target.update(backup_drugs)

return sorted(target)
return sorted(list(target))

def drugs_to_drug_classes(drugs_list: List[str]) -> List[str]:
'''
Expand All @@ -50,11 +81,21 @@ def drugs_to_drug_classes(drugs_list: List[str]) -> List[str]:
for drug in drugs_list:
drug_instance = ARO[drug]
drug_instance_superclasses = list(drug_instance.superclasses())
superclasses_len = len(drug_instance_superclasses)
drug_classes += navigate_superclasses(drug_instance_superclasses)

has_part_nodes = drug_instance.relationships.get(has_part_rel, [])
if has_part_nodes:
for hast_part_node in has_part_nodes:
hast_part_node_superclasses = list(hast_part_node.superclasses())[1:]

for super_class in hast_part_node_superclasses:
super_class_categories = list(super_class.superclasses())
drug_classes += navigate_superclasses(super_class_categories)

if hast_part_node.relationships.get(is_small_inhibitor_molecule_rel, []):
drug_classes.append('ARO:3000707')

if superclasses_len >= 3:
drug_classes.append(drug_instance_superclasses[superclasses_len - 3].id)
else:
drug_classes.append(drug_instance_superclasses[0].id)
if drug_classes == []:
drug_classes.append(drug_instance.id)

return sorted(drug_classes)
448 changes: 224 additions & 224 deletions outputs/hamronized/abricate.argannot.tsv

Large diffs are not rendered by default.

318 changes: 159 additions & 159 deletions outputs/hamronized/abricate.megares.tsv

Large diffs are not rendered by default.

238 changes: 119 additions & 119 deletions outputs/hamronized/abricate.ncbi.tsv

Large diffs are not rendered by default.

542 changes: 271 additions & 271 deletions outputs/hamronized/abricate.resfinder.tsv

Large diffs are not rendered by default.

294 changes: 147 additions & 147 deletions outputs/hamronized/abricate.resfinderfg.tsv

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion outputs/hamronized/amrfinderplus.ncbi.orfs.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ amrfinderplus.ncbi.orfs.tsv tet(Q) tetracycline resistance ribosomal protection
amrfinderplus.ncbi.orfs.tsv bexA multidrug efflux MATE transporter BexA NCBI Reference Gene Database 2023-Nov-01 BAB64566.1 amrfinderplus 3.10.30 gene_presence_detected EFFLUX 80.36 EFFLUX 18 1085 356 k119_41685 443 - 92.98 ARO:3003953 ARO:0000045,ARO:3000662 ARO:0000001,ARO:3005386
amrfinderplus.ncbi.orfs.tsv lnu(C) lincosamide nucleotidyltransferase Lnu(C) NCBI Reference Gene Database 2023-Nov-01 WP_063851341.1 amrfinderplus 3.10.30 gene_presence_detected LINCOSAMIDE 100.0 LINCOSAMIDE 234 725 164 k119_46979 164 - 97.56 ARO:3002837 ARO:0000046 ARO:0000017
amrfinderplus.ncbi.orfs.tsv sat4 streptothricin N-acetyltransferase Sat4 NCBI Reference Gene Database 2023-Nov-01 WP_000627290.1 amrfinderplus 3.10.30 gene_presence_detected STREPTOTHRICIN 86.11 STREPTOTHRICIN 8 472 155 k119_47732 180 - 100.0 ARO:3002897 ARO:0000012 ARO:3000034
amrfinderplus.ncbi.orfs.tsv aph(3')-IIIa aminoglycoside O-phosphotransferase APH(3')-IIIa NCBI Reference Gene Database 2023-Nov-01 WP_001096887.1 amrfinderplus 3.10.30 gene_presence_detected AMIKACIN/KANAMYCIN 100.0 AMINOGLYCOSIDE 207 998 264 k119_48139 264 - 100.0 ARO:3002647 ARO:0000005,ARO:0000013,ARO:0000021,ARO:0000024,ARO:0000049,ARO:3000652,ARO:3000655,ARO:3000657,ARO:3000658 ARO:0000016,ARO:0000016,ARO:0000016,ARO:0000016,ARO:0000016,ARO:0000016,ARO:0000016,ARO:0000016,ARO:3000707
amrfinderplus.ncbi.orfs.tsv aph(3')-IIIa aminoglycoside O-phosphotransferase APH(3')-IIIa NCBI Reference Gene Database 2023-Nov-01 WP_001096887.1 amrfinderplus 3.10.30 gene_presence_detected AMIKACIN/KANAMYCIN 100.0 AMINOGLYCOSIDE 207 998 264 k119_48139 264 - 100.0 ARO:3002647 ARO:0000005,ARO:0000013,ARO:0000021,ARO:0000024,ARO:0000049,ARO:3000652,ARO:3000655,ARO:3000657,ARO:3000658 ARO:0000016,ARO:0000016,ARO:0000016,ARO:0000016,ARO:0000016,ARO:0000016,ARO:0000016,ARO:0000016,ARO:0000016
amrfinderplus.ncbi.orfs.tsv aadS aminoglycoside 6-adenylyltransferase AadS NCBI Reference Gene Database 2023-Nov-01 WP_003013318.1 amrfinderplus 3.10.30 gene_presence_detected STREPTOMYCIN 100.0 AMINOGLYCOSIDE 34628 35488 287 k119_48233 287 + 100.0 ARO:3004683 ARO:0000040 ARO:0000016
amrfinderplus.ncbi.orfs.tsv tet(X2) tetracycline-inactivating monooxygenase Tet(X2) NCBI Reference Gene Database 2023-Nov-01 WP_008651082.1 amrfinderplus 3.10.30 gene_presence_detected TETRACYCLINE 100.0 TETRACYCLINE 12370 13533 388 k119_48273 388 + 99.74 ARO:3000205 ARO:0000030,ARO:0000051,ARO:0000069,ARO:3000152,ARO:3000528,ARO:3000667,ARO:3000668 ARO:3000050,ARO:3000050,ARO:3000050,ARO:3000050,ARO:3000050,ARO:3000050,ARO:3000050
amrfinderplus.ncbi.orfs.tsv tet(O) tetracycline resistance ribosomal protection protein Tet(O) NCBI Reference Gene Database 2023-Nov-01 WP_014636291.1 amrfinderplus 3.10.30 gene_presence_detected TETRACYCLINE 100.0 TETRACYCLINE 978 2894 639 k119_60190 639 + 99.22 ARO:3000190 ARO:0000051,ARO:0000069,ARO:3000152,ARO:3000528,ARO:3000667,ARO:3000668 ARO:3000050,ARO:3000050,ARO:3000050,ARO:3000050,ARO:3000050,ARO:3000050
Expand Down
Loading
Loading