Skip to content

Commit

Permalink
xml json transformer created for future use
Browse files Browse the repository at this point in the history
  • Loading branch information
nsylvestertgen committed Aug 8, 2024
1 parent 64cfbf6 commit b8b0faa
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 55 deletions.
11 changes: 7 additions & 4 deletions q2_asap/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------

import importlib
from ._version import get_versions

__version__ = get_versions()["version"]
Expand All @@ -15,9 +15,12 @@
from . import _version
__version__ = _version.get_versions()['version']

from ._formats import (ASAPXMLOutputDirFmt, ASAPXMLFormat, ASAPHTMLOutputDirFmt, ASAPHTMLFormat)
from ._formats import (ASAPXMLOutputDirFmt, ASAPXMLFormat, ASAPHTMLOutputDirFmt, ASAPHTMLFormat, ASAPJSONOutputDirFmt, ASAPJSONFormat)

__all__ = [
'ASAPXMLOutputDirFmt', 'ASAPXMLFormat',
'ASAPHTMLOutputDirFmt', 'ASAPHTMLFormat'
]
'ASAPHTMLOutputDirFmt', 'ASAPHTMLFormat',
'ASAPJSONOutputDirFmt', 'ASAPJSONFormat',
]

importlib.import_module('q2_asap._transformers')
17 changes: 17 additions & 0 deletions q2_asap/_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,20 @@ class FormattedOutputDirFmt(DirectoryFormat):
@html_files.set_path_maker
def sequences_path_maker(self, sample_id):
return f"{sample_id}.html"

class ASAPJSONFormat(TextFileFormat):

def _validate_(self, level):
pass


class ASAPJSONOutputDirFmt(DirectoryFormat):

json_files = FileCollection(
r'.*json',
format=ASAPJSONFormat
)

@json_files.set_path_maker
def sequences_path_maker(self, sample_id):
return f"{sample_id}.json"
2 changes: 2 additions & 0 deletions q2_asap/_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@
ASAPXML = SemanticType('ASAPXML')

ASAPHTML = SemanticType('ASAPHTML')

ASAPJSON = SemanticType('ASAPJSON')
31 changes: 7 additions & 24 deletions q2_asap/bamProcessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1264,7 +1264,6 @@ def bamProcessor(alignment_map: BAMSortedAndIndexedDirFmt,
con_prop = bamProcessor["consensus_proportion"]
fill_gap_char = bamProcessor["gap_char"]
fill_del_char = bamProcessor["del_char"]
output_format = bamProcessor["output_format"]

if smor:
if proportion > 0.0:
Expand Down Expand Up @@ -1491,36 +1490,20 @@ def bamProcessor(alignment_map: BAMSortedAndIndexedDirFmt,
output_file_path = Path(xml_output_artifact.path) / Path(os.path.splitext(os.path.basename(alignment_map_fp))[0] + ".xml")

with open(output_file_path, 'w') as file_obj:
_write_output(file_obj, sample_node, output_format)
_write_output(file_obj, sample_node)

except KeyboardInterrupt:
pass

return xml_output_artifact


def _write_output(file_obj, xml_element, output_format='xml'):
if output_format == 'xml':
from xml.dom import minidom
dom = minidom.parseString(ElementTree.tostring(xml_element))
file_obj.write(dom.toprettyxml(indent=" "))
elif output_format == 'json':
xml_str = ElementTree.tostring(xml_element)
# The 'sample' root node is discarded
# as an unnecessary layer for the JSON object.
xml_obj = xmltodict.parse(xml_str)['sample']
# FIXME: The output is en/decoded multiple times because it seemed
# easier to use the json object_hook to ensure each key had a
# a consistent type then to write a nested loop with type checks
# and conversions modifying the object as it was traversed.
#
# Ideally the output should start as a python object that is
# encoded to XML or JSON once.
json_encoded_xml = json.loads(json.dumps(xml_obj),
object_hook=cast_json_output_types)
json.dump(json_encoded_xml, file_obj, separators=(',', ':'))
else:
raise Exception('unsupported output format: %s' % output_format)
def _write_output(file_obj, xml_element):

from xml.dom import minidom
dom = minidom.parseString(ElementTree.tostring(xml_element))
file_obj.write(dom.toprettyxml(indent=" "))



# cast_json_output_types is a json decoder object_hook intended to be used on
Expand Down
10 changes: 7 additions & 3 deletions q2_asap/plugin_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
SequencesWithQuality)
from q2_types.sample_data import SampleData
from q2_types.per_sample_sequences._type import AlignmentMap
from ._formats import ASAPXMLOutputDirFmt, ASAPHTMLOutputDirFmt
from ._types import ASAPXML, ASAPHTML
from ._formats import ASAPXMLOutputDirFmt, ASAPHTMLOutputDirFmt, ASAPJSONOutputDirFmt
from ._types import ASAPXML, ASAPHTML, ASAPJSON
from q2_nasp2_types.index import BWAIndex
from q2_nasp2_types.alignment import BAMSortedAndIndexed, SAM
from q2_types.feature_data import FeatureData, Sequence
Expand Down Expand Up @@ -43,13 +43,17 @@
citations=[citations['Caporaso-Bolyen-2024'], citations['ASAP']]
)

plugin.register_formats(ASAPHTMLOutputDirFmt, ASAPXMLOutputDirFmt)
plugin.register_formats(ASAPHTMLOutputDirFmt, ASAPXMLOutputDirFmt, ASAPJSONOutputDirFmt)

plugin.register_semantic_type_to_format(
ASAPHTML, artifact_format=ASAPHTMLOutputDirFmt,
)
plugin.register_semantic_type_to_format(
ASAPXML, artifact_format=ASAPXMLOutputDirFmt,
)
plugin.register_semantic_type_to_format(
ASAPJSON, artifact_format=ASAPJSONOutputDirFmt,
)

# maps input types to output types
aligner_type, sequences, trimmer_out, index_out = TypeMap({
Expand Down
78 changes: 54 additions & 24 deletions q2_asap/tests/test_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,42 +5,42 @@
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------

import os
from qiime2.plugin.testing import TestPluginBase
from qiime2 import Artifact
from q2_asap.outputCombiner import (outputCombiner, xmlCollectionCombiner, alignedCollectionCombiner, trimmedCollectionCombiner)
from q2_asap._formats import ASAPXMLOutputDirFmt
from q2_asap._formats import ASAPXMLOutputDirFmt, ASAPJSONOutputDirFmt
from q2_asap.bamProcessor import bamProcessor
from q2_nasp2_types.alignment import BAMSortedAndIndexedDirFmt

class TestAnalyzeAmpliconPipeline(TestPluginBase):
package = 'q2_asap.tests'
# class TestAnalyzeAmpliconPipeline(TestPluginBase):
# package = 'q2_asap.tests'

def test_analyzeAmplicon_pipeline(self):
# access the pipeline as QIIME 2 sees it,
# for correct assignment of `ctx` variable
analyzeAmplicons_pipeline = self.plugin.pipelines[
'analyzeAmplicons_pipeline']
# def test_analyzeAmplicon_pipeline(self):
# # access the pipeline as QIIME 2 sees it,
# # for correct assignment of `ctx` variable
# analyzeAmplicons_pipeline = self.plugin.pipelines[
# 'analyzeAmplicons_pipeline']

# import artifact for reference sequence
ref_sequence_art = Artifact.import_data(
'FeatureData[Sequence]', 'q2_asap/tests/data/wuhan_sequence.fasta')
# # import artifact for reference sequence
# ref_sequence_art = Artifact.import_data(
# 'FeatureData[Sequence]', 'q2_asap/tests/data/wuhan_sequence.fasta')

# load in sequences (paired-end-demux.qza)
sequences_artifact = Artifact.load(
'q2_asap/tests/data/paired-end-demux-modified.qza')
# # load in sequences (paired-end-demux.qza)
# sequences_artifact = Artifact.load(
# 'q2_asap/tests/data/paired-end-demux-modified.qza')

config_file_path = 'q2_asap/tests/data/SARS2_variant_detection.json'
# config_file_path = 'q2_asap/tests/data/SARS2_variant_detection.json'

results = analyzeAmplicons_pipeline(sequences=sequences_artifact,
ref_sequence=ref_sequence_art,
trimmer="bbduk_paired",
aligner="bwa_mem_paired",
aligner_index="bwa_index",
run_name="Test",
config_fp=config_file_path)
# results = analyzeAmplicons_pipeline(sequences=sequences_artifact,
# ref_sequence=ref_sequence_art,
# trimmer="bbduk_paired",
# aligner="bwa_mem_paired",
# aligner_index="bwa_index",
# run_name="Test",
# config_fp=config_file_path)

self.assertTrue(len(results) == 5)
# self.assertTrue(len(results) == 5)


class TestOutputCombiner(TestPluginBase):
Expand Down Expand Up @@ -97,3 +97,33 @@ def test_aligned_collection_combiner(self):
# config_file_path=config_fp)

# assert result is not None

class XMLJSONTransformer(TestPluginBase):
package = 'q2_asap.tests'

def test_xml_to_json(self):
in_= Artifact.load(self.get_data_path('asap_parallel_output/output_combiner_result.qza')).view(ASAPXMLOutputDirFmt)

tx = self.get_transformer(ASAPXMLOutputDirFmt, ASAPJSONOutputDirFmt)

observed = tx(in_)

# get file names in the observed directory
observed_dir = str(observed)
observed_files = sorted([f for f in os.listdir(observed_dir) if os.path.isfile(os.path.join(observed_dir, f))])

assert all(file.endswith('.json') for file in observed_files)

# def test_json_to_xml(self):
# #TODO: get some json output to test this
# in_= Artifact.load(self.get_data_path('asap_parallel_output/output_combiner_result.qza')).view(ASAPJSONOutputDirFmt)

# tx = self.get_transformer(ASAPJSONOutputDirFmt, ASAPXMLOutputDirFmt)

# observed = tx(in_)

# # get file names in the observed directory
# observed_dir = str(observed)
# observed_files = sorted([f for f in os.listdir(observed_dir) if os.path.isfile(os.path.join(observed_dir, f))])

# assert all(file.endswith('.xml') for file in observed_files)

0 comments on commit b8b0faa

Please sign in to comment.