Skip to content

Commit

Permalink
v4.0.1
Browse files Browse the repository at this point in the history
Allow to change Bowtie2 alignment mode
  • Loading branch information
miguelpmachado authored Jan 16, 2019
1 parent 95cdf03 commit 8b7f554
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 38 deletions.
21 changes: 18 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,9 @@ The sample files are required to be in "fq.gz" (or "fastq.gz") format.
[--minFrequencyDominantAllele 0.6] [--minGeneCoverage N]
[--minGeneIdentity N] [--doubleRun]
[--reportSequenceCoverage] [--notWriteConsensus]
[--bowtieOPT] [--debug]
[--bowtieAlgo="--very-sensitive-local"]
[--bowtieOPT="--no-mixed"]
[--debug]
[--mlstSchemaNumber N] [--mlstConsensus noMatter]
[--mlstRun first]
[-a /path/to/asperaweb_id_dsa.openssh] [-k]
Expand Down Expand Up @@ -212,8 +214,21 @@ The sample files are required to be in "fq.gz" (or "fastq.gz") format.
present in at least one sample (usefull when using a
large number of reference sequences, and only for
first run) (default: False)
--bowtieOPT "--no-mixed"
Extra Bowtie2 options (default: None)
--bowtieAlgo="--very-sensitive-local"
Bowtie2 alignment mode. It can be an end-to-end alignment
(unclipped alignment) or local alignment (soft clipped
alignment). Also, can choose between fast or sensitive
alignments. Please check Bowtie2 manual for extra information:
http://bowtie-bio.sourceforge.net/bowtie2/index.shtml .
This option should be provided between quotes and starting
with an empty space (like --bowtieAlgo " --very-fast") or
using equal sign (like --bowtieAlgo="--very-fast")
(default: "--very-sensitive-local")
--bowtieOPT="--no-mixed"
Extra Bowtie2 options. This option should be provided between
quotes and starting with an empty space
(like --bowtieOPT " --no-mixed") or using equal sign
(like --bowtieOPT="--no-mixed") (default: None)
--debug DeBug Mode: do not remove temporary files (default: False)
--mlstReference If the curated scheme for MLST alleles is available, tells
ReMatCh to use these as reference (force Bowtie2 to run
Expand Down
2 changes: 1 addition & 1 deletion ReMatCh/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '4.0'
__version__ = '4.0.1'
51 changes: 29 additions & 22 deletions ReMatCh/modules/rematch_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,16 @@ def index_sequence_bowtie2(reference_file, threads):


# Mapping with Bowtie2
def mapping_bowtie2(fastq_files, reference_file, threads, outdir, conserved_true, num_map_loc, bowtie_opt):
def mapping_bowtie2(fastq_files, reference_file, threads, outdir, num_map_loc,
bowtie_algorithm='--very-sensitive-local', bowtie_opt=None):
sam_file = os.path.join(outdir, str('alignment.sam'))

# Index reference file
run_successfully = index_sequence_bowtie2(reference_file, threads)

if run_successfully:
command = ['bowtie2', '-k', str(num_map_loc), '-q', '', '--threads', str(threads), '-x', reference_file, '',
'--no-unal', '', '-S', sam_file]
command = ['bowtie2', '-k', str(num_map_loc), '-q', bowtie_algorithm, '--threads', str(threads), '-x',
reference_file, '', '--no-unal', '', '-S', sam_file]

if len(fastq_files) == 1:
command[9] = '-U ' + fastq_files[0]
Expand All @@ -50,11 +51,6 @@ def mapping_bowtie2(fastq_files, reference_file, threads, outdir, conserved_true
else:
return False, None

if conserved_true:
command[4] = '--sensitive'
else:
command[4] = '--very-sensitive-local'

if bowtie_opt is not None:
command[11] = bowtie_opt

Expand Down Expand Up @@ -266,17 +262,22 @@ def index_alignment(alignment_file):
return run_successfully


def mapping_reads(fastq_files, reference_file, threads, outdir, conserved_true, num_map_loc, rematch_run,
def mapping_reads(fastq_files, reference_file, threads, outdir, num_map_loc, rematch_run,
soft_clip_base_quality, soft_clip_recode_run, reference_dict, soft_clip_cigar_flag_recode,
bowtie_opt):
bowtie_algorithm, bowtie_opt, clean_run=True):
# Create a symbolic link to the reference_file
reference_link = os.path.join(outdir, os.path.basename(reference_file))
os.symlink(reference_file, reference_link)
if clean_run:
reference_link = os.path.join(outdir, os.path.basename(reference_file))
if os.path.islink(reference_link):
os.unlink(reference_link)
os.symlink(reference_file, reference_link)
reference_file = reference_link

bam_file = None
# Mapping reads using Bowtie2
run_successfully, sam_file = mapping_bowtie2(fastq_files, reference_link, threads, outdir, conserved_true,
num_map_loc, bowtie_opt)
run_successfully, sam_file = mapping_bowtie2(fastq_files=fastq_files, reference_file=reference_file,
threads=threads, outdir=outdir, num_map_loc=num_map_loc,
bowtie_algorithm=bowtie_algorithm, bowtie_opt=bowtie_opt)

if run_successfully:
# Remove soft clipping
Expand All @@ -294,7 +295,7 @@ def mapping_reads(fastq_files, reference_file, threads, outdir, conserved_true,
# Index bam
run_successfully = index_alignment(bam_file)

return run_successfully, bam_file, reference_link
return run_successfully, bam_file, reference_file


def create_vcf(bam_file, sequence_to_analyse, outdir, counter, reference_file):
Expand Down Expand Up @@ -913,6 +914,7 @@ def analyse_sequence_data(bam_file, sequence_information, outdir, counter, refer
mean_coverage = None
number_diferences = 0
number_multi_alleles = 0
consensus_sequence = {'correct': {}, 'noMatter': {}, 'alignment': {}}

# Create vcf file (for multiple alleles check)
run_successfully, gene_vcf = create_vcf(bam_file, sequence_information['header'], outdir, counter, reference_file)
Expand Down Expand Up @@ -1121,19 +1123,24 @@ def gather_data_together(sample, data_directory, sequences_information, outdir,
@rematch_timer
def run_rematch_module(sample, fastq_files, reference_file, threads, outdir, length_extra_seq, minimum_depth_presence,
minimum_depth_call, minimum_depth_frequency_dominant_allele, minimum_gene_coverage,
conserved_true, debug_mode_true, num_map_loc, minimum_gene_identity, rematch_run,
debug_mode_true, num_map_loc, minimum_gene_identity, rematch_run,
soft_clip_base_quality, soft_clip_recode_run, reference_dict, soft_clip_cigar_flag_recode,
bowtie_opt, gene_list_reference, not_write_consensus):
bowtie_algorithm, bowtie_opt, gene_list_reference, not_write_consensus, clean_run=True):
rematch_folder = os.path.join(outdir, 'rematch_module', '')

utils.remove_directory(rematch_folder)
os.mkdir(rematch_folder)

# Map reads
run_successfully, bam_file, reference_file = mapping_reads(fastq_files, reference_file, threads, rematch_folder,
conserved_true, num_map_loc, rematch_run,
soft_clip_base_quality, soft_clip_recode_run,
reference_dict, soft_clip_cigar_flag_recode, bowtie_opt)

run_successfully, bam_file, reference_file = mapping_reads(fastq_files=fastq_files, reference_file=reference_file,
threads=threads, outdir=rematch_folder,
num_map_loc=num_map_loc, rematch_run=rematch_run,
soft_clip_base_quality=soft_clip_base_quality,
soft_clip_recode_run=soft_clip_recode_run,
reference_dict=reference_dict,
soft_clip_cigar_flag_recode=soft_clip_cigar_flag_recode,
bowtie_algorithm=bowtie_algorithm, bowtie_opt=bowtie_opt,
clean_run=clean_run)
if run_successfully:
# Index reference file
run_successfully, stdout = index_fasta_samtools(reference_file, None, None, True)
Expand Down
37 changes: 25 additions & 12 deletions ReMatCh/rematch.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
and consensus sequences production
<https://github.com/B-UMMI/ReMatCh/>
Copyright (C) 2018 Miguel Machado <[email protected]>
Copyright (C) 2019 Miguel Machado <[email protected]>
Last modified: October 15, 2018
Last modified: January 02, 2019
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -356,7 +356,6 @@ def run_rematch(args):
args.mlstSchemaNumber,
workdir)
args.softClip_recodeRun = 'first'
args.conservedSeq = False

if args.reference is None:
reference_file = check_mlst.check_existing_schema(args.mlst, args.mlstSchemaNumber, script_path)
Expand Down Expand Up @@ -437,10 +436,11 @@ def run_rematch(args):
rematch_module.run_rematch_module(sample, fastq_files, reference_file, args.threads, sample_outdir,
args.extraSeq, args.minCovPresence, args.minCovCall,
args.minFrequencyDominantAllele, args.minGeneCoverage,
args.conservedSeq, args.debug, args.numMapLoc, args.minGeneIdentity,
args.debug, args.numMapLoc, args.minGeneIdentity,
'first', args.softClip_baseQuality, args.softClip_recodeRun,
reference_dict, args.softClip_cigarFlagRecode, args.bowtieOPT,
gene_list_reference, args.notWriteConsensus)
reference_dict, args.softClip_cigarFlagRecode,
args.bowtieAlgo, args.bowtieOPT,
gene_list_reference, args.notWriteConsensus, clean_run=True)
if run_successfully_rematch_first:
if args.mlst is not None and (args.mlstRun == 'first' or args.mlstRun == 'all'):
run_get_st(sample, mlst_dicts, consensus_sequences, args.mlstConsensus, 'first', workdir, time_str)
Expand Down Expand Up @@ -471,12 +471,14 @@ def run_rematch(args):
args.threads, rematch_second_outdir, args.extraSeq,
args.minCovPresence, args.minCovCall,
args.minFrequencyDominantAllele, args.minGeneCoverage,
args.conservedSeq, args.debug, args.numMapLoc,
args.debug, args.numMapLoc,
args.minGeneIdentity, 'second',
args.softClip_baseQuality, args.softClip_recodeRun,
consensus_concatenated_dict,
args.softClip_cigarFlagRecode, args.bowtieOPT,
gene_list_reference, args.notWriteConsensus)
args.softClip_cigarFlagRecode,
args.bowtieAlgo, args.bowtieOPT,
gene_list_reference, args.notWriteConsensus,
clean_run=True)
if not args.debug:
os.remove(consensus_concatenated_fasta)
if run_successfully_rematch_second:
Expand Down Expand Up @@ -578,8 +580,6 @@ def main():
required=False)

parser_optional_rematch = parser.add_argument_group('ReMatCh module facultative options')
parser_optional_rematch.add_argument('--conservedSeq', action='store_true', help=argparse.SUPPRESS)
# parser_optional_rematch.add_argument('--conservedSeq', action='store_true', help='This option can be used with conserved sequences like MLST genes to speedup the analysis by alignning reads using Bowtie2 sensitive algorithm')
parser_optional_rematch.add_argument('--extraSeq', type=int, metavar='N',
help='Sequence length added to both ends of target sequences (usefull to'
' improve reads mapping to the target one) that will be trimmed in'
Expand Down Expand Up @@ -621,7 +621,20 @@ def main():
' sequences, and only for first run)')
parser_optional_rematch.add_argument('--notWriteConsensus', action='store_true',
help='Do not write consensus sequences')
parser_optional_rematch.add_argument('--bowtieOPT', type=str, metavar='"--no-mixed"', help='Extra Bowtie2 options',
parser_optional_rematch.add_argument('--bowtieAlgo', type=str, metavar='"--very-sensitive-local"',
help='Bowtie2 alignment mode. It can be an end-to-end alignment (unclipped'
' alignment) or local alignment (soft clipped alignment). Also, can'
' choose between fast or sensitive alignments. Please check Bowtie2'
' manual for extra'
' information: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml .'
' This option should be provided between quotes and starting with'
' an empty space (like --bowtieAlgo " --very-fast") or using equal'
' sign (like --bowtieAlgo="--very-fast")',
required=False, default='--very-sensitive-local')
parser_optional_rematch.add_argument('--bowtieOPT', type=str, metavar='"--no-mixed"',
help='Extra Bowtie2 options. This option should be provided between quotes and'
' starting with an empty space (like --bowtieOPT " --no-mixed") or using'
' equal sign (like --bowtieOPT="--no-mixed")',
required=False)
parser_optional_rematch.add_argument('--debug', action='store_true',
help='DeBug Mode: do not remove temporary files')
Expand Down

0 comments on commit 8b7f554

Please sign in to comment.