From 5f65d9c02ef4128c7f87ad746dd508e8f2f69f32 Mon Sep 17 00:00:00 2001 From: jrober84 Date: Tue, 15 Aug 2023 09:35:45 -0400 Subject: [PATCH] updated mge report writer --- mob_suite/mob_typer.py | 12 +++--------- mob_suite/utils.py | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/mob_suite/mob_typer.py b/mob_suite/mob_typer.py index b1844a7..bff999e 100644 --- a/mob_suite/mob_typer.py +++ b/mob_suite/mob_typer.py @@ -534,10 +534,6 @@ def main(): writeReport(mobtyper_results, MOB_TYPER_REPORT_HEADER, report_file) - id_lookup = {} - for id in id_mapping: - id_lookup[id_mapping[id]] = id - # Peform MGE detection if mge_report_file is not None: mge_results = blast_mge(fixed_fasta, repetitive_mask_file, tmp_dir, min_length, @@ -545,8 +541,8 @@ def main(): tmp = {} for contig_id in mge_results: - if contig_id in id_lookup: - label = id_lookup[contig_id] + if contig_id in id_mapping: + label = id_mapping[contig_id] else: continue tmp[label] = mge_results[contig_id] @@ -555,8 +551,6 @@ def main(): contig_memberships = {'chromosome': {}, 'plasmid': {}} for i in range(0, len(mobtyper_results)): - if not 'total_length' in mobtyper_results[i]: - continue primary_cluster_id = mobtyper_results[i]['primary_cluster_id'] if not primary_cluster_id in contig_memberships['plasmid']: contig_memberships['plasmid'][primary_cluster_id] = {} @@ -566,7 +560,7 @@ def main(): mobtyper_results[i]['contig_id'] = contig_id contig_memberships['plasmid'][primary_cluster_id][contig_id] = mobtyper_results[i] - if len(mobtyper_results) > 0: + if len(mge_results) > 0: writeMGEresults(contig_memberships, mge_results, mge_report_file) logger.info("MOB-typer MGE results written to {}".format(mge_report_file)) else: diff --git a/mob_suite/utils.py b/mob_suite/utils.py index 314620a..4338d0d 100644 --- a/mob_suite/utils.py +++ b/mob_suite/utils.py @@ -1362,3 +1362,37 @@ def writeMGEresults(contig_membership,mge_results,outfile): fh.write("\n".join(out_string)) fh.close() + +def writeMGEresults(contig_membership,mge_results,outfile): + if len(mge_results) == 0: + return + header = "\t".join(["\t".join(MGE_INFO_HEADER)]) + with open(outfile,'w') as out: + out.write(f'{header}\n') + for molecule_type in contig_membership: + for contig_id in contig_membership[molecule_type]: + if not contig_id in mge_results: + continue + for i in range(0, len(mge_results[contig_id])): + row = {} + for field in MGE_INFO_HEADER: + row[field] = '' + if field in mge_results[contig_id]: + row[field] = mge_results[contig_id][field] + + id = mge_results[contig_id][i]['qseqid'].split('|') + row['mge_id'] = id[0] + row['mge_acs'] = id[1] + row['mge_type'] = id[2] + row['mge_subtype'] = id[3] + row['mge_length'] = mge_results[contig_id][i]['qlen'] + row['mge_start'] = mge_results[contig_id][i]['qstart'] + row['mge_end'] = mge_results[contig_id][i]['qend'] + row['contig_start'] = mge_results[contig_id][i]['sstart'] + row['contig_end'] = mge_results[contig_id][i]['send'] + + for field in contig_membership[molecule_type][contig_id]: + if field in row: + row[field] = contig_membership[molecule_type][contig_id][field] + + out.write("{}\n".format("\t".join([str(x) for x in list(row.values())])))