Skip to content

Commit

Permalink
initial analyze amplicons pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
nsylvestertgen committed Jul 12, 2024
1 parent e98c6ce commit dad2330
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 32 deletions.
17 changes: 13 additions & 4 deletions q2_asap/analyzeAmplicons.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,20 @@ def analyzeAmplicons(sequences: CasavaOneEightSingleLanePerSampleDirFmt, name: s
output_dir_bam = BAMSortedAndIndexedDirFmt()
output_dir_bwa = BWAIndexDirFmt()
output_dir_xml = ASAPXMLOutputDirFmt()


result = subprocess.run(['ls', '-l', f'{sequences}'], stdout=subprocess.PIPE, text=True)
print(result)
print(os.listdir(f'{sequences}'))
subprocess.run(['mkdir', '-p', 'sequences'], check=True)
shutil.copytree(f'{sequences}', 'sequences', dirs_exist_ok=True)

# create analyzeAmplicons command
command = f'analyzeAmplicons -r {sequences} -n {name} -o {temp_dir}/asap_output -d {depth} -b {breadth} -j /scratch/nsylvester/SARS2_variant_detection.json \
command = f'analyzeAmplicons -r sequences -n {name} -o {temp_dir}/asap_output -d {depth} -b {breadth} -j /home/nsylvester/scratch/q2-asap/q2_asap/tests/data/SARS2_variant_detection.json \
--min_base_qual {min_base_qual} --consensus-proportion {consensus_proportion} --fill-gaps {fill_gaps} -a {aligner} --aligner-args {aligner_args}'

print(command)

# combine conda environment and command TODO: fix conda environment
shell_script= f"""
conda run -p /home/dlemmer/.conda/envs/asap {command}
Expand All @@ -75,14 +84,14 @@ def analyzeAmplicons(sequences: CasavaOneEightSingleLanePerSampleDirFmt, name: s
for file_name in os.listdir(asap_output_dir):
file_path = os.path.join(asap_output_dir, file_name)
if re.search(r'\.(amb|ann|bwt|pac|sa|fasta)$', file_name):
shutil.move(file_path, Path(output_dir_bwa.path) / file_name)
shutil.copy(file_path, Path(output_dir_bwa.path) / file_name)
elif re.search(r'\.xml$', file_name):
shutil.move(file_path, Path(output_dir_xml.path) / file_name)
shutil.copy(file_path, Path(output_dir_xml.path) / file_name)

# move bam files
for file_name in os.listdir(os.path.join(asap_output_dir, "bwamem")):
file_path = os.path.join(asap_output_dir, "bwamem", file_name)
shutil.move(file_path, Path(output_dir_bam.path) / file_name)
shutil.copy(file_path, Path(output_dir_bam.path) / file_name)

# remove temp directory
# shutil.rmtree(temp_dir)
Expand Down
30 changes: 30 additions & 0 deletions q2_asap/analyzeAmplicons_pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#TODO: config file for bbduk/fastp/trimmer params
#TODO: bamprocessor, outputCombiner
def analyzeAmplicons_pipeline(ctx, sequences, ref_sequence, config_file_path):

#TODO: long string of params in json file that we will use as trimmer/aligner_params
config = open(config_file_path)
data = json.load(config)

trimmer = data['ASAP_parameters']['trimmer']['name']
trimmer_action = ctx.get_action('trimmers', trimmer)

aligner_index = data['ASAP_parameters']['aligner']['index']
aligner_index_action = ctx.get_action('aligners', aligner_index)

aligner = data['ASAP_parameters']['aligner']['name']
aligner_action = ctx.get_action('aligners', aligner)

# initialize results
results = []

trimmer_results = trimmer_action(sequences)
# results.append(trimmer_results)

aligner_index_result, = aligner_index_action(ref_sequence)
# results.append(aligner_index_result)

aligner_result, = aligner_action(sequences = sequences, reference_index = aligner_index_result)
results.append(aligner_result)

return tuple(results)
38 changes: 34 additions & 4 deletions q2_asap/plugin_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,11 @@
from ._formats import ASAPXMLFormat, ASAPHTMLFormat, ASAPXMLOutputDirFmt, ASAPHTMLOutputDirFmt
from ._types import ASAPXML, ASAPHTML
from q2_nasp2_types.index import BWAIndex
from q2_nasp2_types.alignment import BAM
from q2_types.feature_data import FeatureData
from q2_nasp2_types.alignment import BAMIndexed, SAM
from q2_types.feature_data import FeatureData, Sequence
from q2_asap.analyzeAmplicons_pipeline import analyzeAmplicons_pipeline
from q2_types.bowtie2 import Bowtie2Index



plugin = Plugin(
Expand Down Expand Up @@ -49,7 +52,7 @@
'aligner_args': Str
},
outputs=[
('output_bams', FeatureData[BAM]),
('output_bams', FeatureData[BAMIndexed]),
('bwa_index', BWAIndex),
('asap_xmls', ASAPXML)
],
Expand All @@ -59,7 +62,7 @@
'depth': 'minimum read depth required to consider a position covered. [default: 100]',
'breadth': 'minimum breadth of coverage required to consider an amplicon as present. [default: 0.8]',
'min_base_qual': 'what is the minimum base quality score (BQS) to use a position (Phred scale, i.e. 10=90, 20=99, 30=99.9 accuracy',
'consensus_proportion': 'minimum proportion required to call at base at that position, else 'N'. [default: 0.8]',
'consensus_proportion': 'minimum proportion required to call at base at that position, else "N". [default: 0.8]',
'fill_gaps': 'fill no coverage gaps in the consensus sequence [default: False], optional parameter is the character to use for filling [defaut: n]',
'aligner': 'aligner to use for read mapping, supports bowtie2, novoalign, and bwa. [default: bowtie2]',
'aligner_args': "additional arguments to pass to the aligner, enclosed in ''."},
Expand Down Expand Up @@ -87,3 +90,30 @@
)


# register analyzeAmplicon pipeline
plugin.pipelines.register_function(
function=analyzeAmplicons_pipeline,
inputs={
'sequences': (SampleData[SequencesWithQuality] | SampleData[PairedEndSequencesWithQuality]),
'ref_sequence': FeatureData[Sequence],
},
parameters={'config_file_path': Str},
outputs=[
# ('trimmer_results', (SampleData[SequencesWithQuality] | SampleData[PairedEndSequencesWithQuality])),
# ('aligner_index_result', (BWAIndex | Bowtie2Index)),
('aligner_result', FeatureData[SAM])
],
input_descriptions={
'sequences': 'the sequences to be analyzed',
'ref_sequence': 'The reference sequence used to build bwa index.',
},
parameter_descriptions={'config_file_path': 'path to config file that holds all params'},
output_descriptions={
# 'trimmer_results': 'The results after completing trimming',
# 'aligner_index_result': 'The resulting aligner index',
'aligner_result': 'The result of aligning reads with specified aligner'
},
name='Analyze Amplicons Pipeline',
description=("A pipeline to run analyze amplicons")
)

39 changes: 15 additions & 24 deletions q2_asap/tests/test_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,38 +7,29 @@
# ----------------------------------------------------------------------------

import pandas as pd
import pandas.testing as pdt

from qiime2.plugin.testing import TestPluginBase
from qiime2.plugin.util import transform
from q2_types.feature_table import BIOMV100Format
from qiime2 import Artifact

from q2_asap._methods import duplicate_table
# from q2_asap.analyzeAmplicons_pipeline import analyzeAmplicons_pipeline


class DuplicateTableTests(TestPluginBase):
class TestAnalyzeAmpliconPipeline(TestPluginBase):
package = 'q2_asap.tests'

def test_simple1(self):
in_table = pd.DataFrame(
[[1, 2, 3, 4, 5], [9, 10, 11, 12, 13]],
columns=['abc', 'def', 'jkl', 'mno', 'pqr'],
index=['sample-1', 'sample-2'])
observed = duplicate_table(in_table)

expected = in_table
def test_analyzeAmplicon_pipeline(self):
# access the pipeline as QIIME 2 sees it,
# for correct assignment of `ctx` variable
analyzeAmplicons_pipeline = self.plugin.pipelines['analyzeAmplicons_pipeline']

pdt.assert_frame_equal(observed, expected)
#import artifact for reference sequence
ref_sequence_artifact = Artifact.import_data('FeatureData[Sequence]', '/home/nsylvester/scratch/q2-asap/q2_asap/tests/data/wuhan_sequence.fasta')

def test_simple2(self):
# test table duplication with table loaded from file this time
# (for demonstration purposes)
in_table = transform(
self.get_data_path('table-1.biom'),
from_type=BIOMV100Format,
to_type=pd.DataFrame)
observed = duplicate_table(in_table)
# get sequences (paired-end-demux.qza)
sequences_artifact = Artifact.load('q2_asap/tests/data/paired-end-demux.qza')

expected = in_table
# run the pipeline
results = analyzeAmplicons_pipeline(sequences = sequences_artifact, ref_sequence = ref_sequence_artifact)

pdt.assert_frame_equal(observed, expected)
# assert output
self.assertTrue(len(results) > 0)

0 comments on commit dad2330

Please sign in to comment.