diff --git a/README.md b/README.md
index 9cc92ae..28de628 100644
--- a/README.md
+++ b/README.md
@@ -129,7 +129,9 @@ The sample files are required to be in "fq.gz" (or "fastq.gz") format.
[--minFrequencyDominantAllele 0.6] [--minGeneCoverage N]
[--minGeneIdentity N] [--doubleRun]
[--reportSequenceCoverage] [--notWriteConsensus]
- [--bowtieOPT] [--debug]
+ [--bowtieAlgo="--very-sensitive-local"]
+ [--bowtieOPT="--no-mixed"]
+ [--debug]
[--mlstSchemaNumber N] [--mlstConsensus noMatter]
[--mlstRun first]
[-a /path/to/asperaweb_id_dsa.openssh] [-k]
@@ -212,8 +214,21 @@ The sample files are required to be in "fq.gz" (or "fastq.gz") format.
present in at least one sample (usefull when using a
large number of reference sequences, and only for
first run) (default: False)
- --bowtieOPT "--no-mixed"
- Extra Bowtie2 options (default: None)
+ --bowtieAlgo="--very-sensitive-local"
+ Bowtie2 alignment mode. It can be an end-to-end alignment
+ (unclipped alignment) or local alignment (soft clipped
+ alignment). Also, can choose between fast or sensitive
+ alignments. Please check Bowtie2 manual for extra information:
+ http://bowtie-bio.sourceforge.net/bowtie2/index.shtml .
+ This option should be provided between quotes and starting
+ with an empty space (like --bowtieAlgo " --very-fast") or
+ using equal sign (like --bowtieAlgo="--very-fast")
+ (default: "--very-sensitive-local")
+ --bowtieOPT="--no-mixed"
+ Extra Bowtie2 options. This option should be provided between
+ quotes and starting with an empty space
+ (like --bowtieOPT " --no-mixed") or using equal sign
+ (like --bowtieOPT="--no-mixed") (default: None)
--debug DeBug Mode: do not remove temporary files (default: False)
--mlstReference If the curated scheme for MLST alleles is available, tells
ReMatCh to use these as reference (force Bowtie2 to run
diff --git a/ReMatCh/__init__.py b/ReMatCh/__init__.py
index a93f051..1a3bef5 100644
--- a/ReMatCh/__init__.py
+++ b/ReMatCh/__init__.py
@@ -1 +1 @@
-__version__ = '4.0'
+__version__ = '4.0.1'
diff --git a/ReMatCh/modules/rematch_module.py b/ReMatCh/modules/rematch_module.py
index 4586439..c2d3f40 100644
--- a/ReMatCh/modules/rematch_module.py
+++ b/ReMatCh/modules/rematch_module.py
@@ -33,15 +33,16 @@ def index_sequence_bowtie2(reference_file, threads):
# Mapping with Bowtie2
-def mapping_bowtie2(fastq_files, reference_file, threads, outdir, conserved_true, num_map_loc, bowtie_opt):
+def mapping_bowtie2(fastq_files, reference_file, threads, outdir, num_map_loc,
+ bowtie_algorithm='--very-sensitive-local', bowtie_opt=None):
sam_file = os.path.join(outdir, str('alignment.sam'))
# Index reference file
run_successfully = index_sequence_bowtie2(reference_file, threads)
if run_successfully:
- command = ['bowtie2', '-k', str(num_map_loc), '-q', '', '--threads', str(threads), '-x', reference_file, '',
- '--no-unal', '', '-S', sam_file]
+ command = ['bowtie2', '-k', str(num_map_loc), '-q', bowtie_algorithm, '--threads', str(threads), '-x',
+ reference_file, '', '--no-unal', '', '-S', sam_file]
if len(fastq_files) == 1:
command[9] = '-U ' + fastq_files[0]
@@ -50,11 +51,6 @@ def mapping_bowtie2(fastq_files, reference_file, threads, outdir, conserved_true
else:
return False, None
- if conserved_true:
- command[4] = '--sensitive'
- else:
- command[4] = '--very-sensitive-local'
-
if bowtie_opt is not None:
command[11] = bowtie_opt
@@ -266,17 +262,22 @@ def index_alignment(alignment_file):
return run_successfully
-def mapping_reads(fastq_files, reference_file, threads, outdir, conserved_true, num_map_loc, rematch_run,
+def mapping_reads(fastq_files, reference_file, threads, outdir, num_map_loc, rematch_run,
soft_clip_base_quality, soft_clip_recode_run, reference_dict, soft_clip_cigar_flag_recode,
- bowtie_opt):
+ bowtie_algorithm, bowtie_opt, clean_run=True):
# Create a symbolic link to the reference_file
- reference_link = os.path.join(outdir, os.path.basename(reference_file))
- os.symlink(reference_file, reference_link)
+ if clean_run:
+ reference_link = os.path.join(outdir, os.path.basename(reference_file))
+ if os.path.islink(reference_link):
+ os.unlink(reference_link)
+ os.symlink(reference_file, reference_link)
+ reference_file = reference_link
bam_file = None
# Mapping reads using Bowtie2
- run_successfully, sam_file = mapping_bowtie2(fastq_files, reference_link, threads, outdir, conserved_true,
- num_map_loc, bowtie_opt)
+ run_successfully, sam_file = mapping_bowtie2(fastq_files=fastq_files, reference_file=reference_file,
+ threads=threads, outdir=outdir, num_map_loc=num_map_loc,
+ bowtie_algorithm=bowtie_algorithm, bowtie_opt=bowtie_opt)
if run_successfully:
# Remove soft clipping
@@ -294,7 +295,7 @@ def mapping_reads(fastq_files, reference_file, threads, outdir, conserved_true,
# Index bam
run_successfully = index_alignment(bam_file)
- return run_successfully, bam_file, reference_link
+ return run_successfully, bam_file, reference_file
def create_vcf(bam_file, sequence_to_analyse, outdir, counter, reference_file):
@@ -913,6 +914,7 @@ def analyse_sequence_data(bam_file, sequence_information, outdir, counter, refer
mean_coverage = None
number_diferences = 0
number_multi_alleles = 0
+ consensus_sequence = {'correct': {}, 'noMatter': {}, 'alignment': {}}
# Create vcf file (for multiple alleles check)
run_successfully, gene_vcf = create_vcf(bam_file, sequence_information['header'], outdir, counter, reference_file)
@@ -1121,19 +1123,24 @@ def gather_data_together(sample, data_directory, sequences_information, outdir,
@rematch_timer
def run_rematch_module(sample, fastq_files, reference_file, threads, outdir, length_extra_seq, minimum_depth_presence,
minimum_depth_call, minimum_depth_frequency_dominant_allele, minimum_gene_coverage,
- conserved_true, debug_mode_true, num_map_loc, minimum_gene_identity, rematch_run,
+ debug_mode_true, num_map_loc, minimum_gene_identity, rematch_run,
soft_clip_base_quality, soft_clip_recode_run, reference_dict, soft_clip_cigar_flag_recode,
- bowtie_opt, gene_list_reference, not_write_consensus):
+ bowtie_algorithm, bowtie_opt, gene_list_reference, not_write_consensus, clean_run=True):
rematch_folder = os.path.join(outdir, 'rematch_module', '')
+
utils.remove_directory(rematch_folder)
os.mkdir(rematch_folder)
# Map reads
- run_successfully, bam_file, reference_file = mapping_reads(fastq_files, reference_file, threads, rematch_folder,
- conserved_true, num_map_loc, rematch_run,
- soft_clip_base_quality, soft_clip_recode_run,
- reference_dict, soft_clip_cigar_flag_recode, bowtie_opt)
-
+ run_successfully, bam_file, reference_file = mapping_reads(fastq_files=fastq_files, reference_file=reference_file,
+ threads=threads, outdir=rematch_folder,
+ num_map_loc=num_map_loc, rematch_run=rematch_run,
+ soft_clip_base_quality=soft_clip_base_quality,
+ soft_clip_recode_run=soft_clip_recode_run,
+ reference_dict=reference_dict,
+ soft_clip_cigar_flag_recode=soft_clip_cigar_flag_recode,
+ bowtie_algorithm=bowtie_algorithm, bowtie_opt=bowtie_opt,
+ clean_run=clean_run)
if run_successfully:
# Index reference file
run_successfully, stdout = index_fasta_samtools(reference_file, None, None, True)
diff --git a/ReMatCh/rematch.py b/ReMatCh/rematch.py
index 3a29a40..e15d23a 100755
--- a/ReMatCh/rematch.py
+++ b/ReMatCh/rematch.py
@@ -7,9 +7,9 @@
and consensus sequences production
-Copyright (C) 2018 Miguel Machado
+Copyright (C) 2019 Miguel Machado
-Last modified: October 15, 2018
+Last modified: January 02, 2019
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -356,7 +356,6 @@ def run_rematch(args):
args.mlstSchemaNumber,
workdir)
args.softClip_recodeRun = 'first'
- args.conservedSeq = False
if args.reference is None:
reference_file = check_mlst.check_existing_schema(args.mlst, args.mlstSchemaNumber, script_path)
@@ -437,10 +436,11 @@ def run_rematch(args):
rematch_module.run_rematch_module(sample, fastq_files, reference_file, args.threads, sample_outdir,
args.extraSeq, args.minCovPresence, args.minCovCall,
args.minFrequencyDominantAllele, args.minGeneCoverage,
- args.conservedSeq, args.debug, args.numMapLoc, args.minGeneIdentity,
+ args.debug, args.numMapLoc, args.minGeneIdentity,
'first', args.softClip_baseQuality, args.softClip_recodeRun,
- reference_dict, args.softClip_cigarFlagRecode, args.bowtieOPT,
- gene_list_reference, args.notWriteConsensus)
+ reference_dict, args.softClip_cigarFlagRecode,
+ args.bowtieAlgo, args.bowtieOPT,
+ gene_list_reference, args.notWriteConsensus, clean_run=True)
if run_successfully_rematch_first:
if args.mlst is not None and (args.mlstRun == 'first' or args.mlstRun == 'all'):
run_get_st(sample, mlst_dicts, consensus_sequences, args.mlstConsensus, 'first', workdir, time_str)
@@ -471,12 +471,14 @@ def run_rematch(args):
args.threads, rematch_second_outdir, args.extraSeq,
args.minCovPresence, args.minCovCall,
args.minFrequencyDominantAllele, args.minGeneCoverage,
- args.conservedSeq, args.debug, args.numMapLoc,
+ args.debug, args.numMapLoc,
args.minGeneIdentity, 'second',
args.softClip_baseQuality, args.softClip_recodeRun,
consensus_concatenated_dict,
- args.softClip_cigarFlagRecode, args.bowtieOPT,
- gene_list_reference, args.notWriteConsensus)
+ args.softClip_cigarFlagRecode,
+ args.bowtieAlgo, args.bowtieOPT,
+ gene_list_reference, args.notWriteConsensus,
+ clean_run=True)
if not args.debug:
os.remove(consensus_concatenated_fasta)
if run_successfully_rematch_second:
@@ -578,8 +580,6 @@ def main():
required=False)
parser_optional_rematch = parser.add_argument_group('ReMatCh module facultative options')
- parser_optional_rematch.add_argument('--conservedSeq', action='store_true', help=argparse.SUPPRESS)
- # parser_optional_rematch.add_argument('--conservedSeq', action='store_true', help='This option can be used with conserved sequences like MLST genes to speedup the analysis by alignning reads using Bowtie2 sensitive algorithm')
parser_optional_rematch.add_argument('--extraSeq', type=int, metavar='N',
help='Sequence length added to both ends of target sequences (usefull to'
' improve reads mapping to the target one) that will be trimmed in'
@@ -621,7 +621,20 @@ def main():
' sequences, and only for first run)')
parser_optional_rematch.add_argument('--notWriteConsensus', action='store_true',
help='Do not write consensus sequences')
- parser_optional_rematch.add_argument('--bowtieOPT', type=str, metavar='"--no-mixed"', help='Extra Bowtie2 options',
+ parser_optional_rematch.add_argument('--bowtieAlgo', type=str, metavar='"--very-sensitive-local"',
+ help='Bowtie2 alignment mode. It can be an end-to-end alignment (unclipped'
+ ' alignment) or local alignment (soft clipped alignment). Also, can'
+ ' choose between fast or sensitive alignments. Please check Bowtie2'
+ ' manual for extra'
+ ' information: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml .'
+ ' This option should be provided between quotes and starting with'
+ ' an empty space (like --bowtieAlgo " --very-fast") or using equal'
+ ' sign (like --bowtieAlgo="--very-fast")',
+ required=False, default='--very-sensitive-local')
+ parser_optional_rematch.add_argument('--bowtieOPT', type=str, metavar='"--no-mixed"',
+ help='Extra Bowtie2 options. This option should be provided between quotes and'
+ ' starting with an empty space (like --bowtieOPT " --no-mixed") or using'
+ ' equal sign (like --bowtieOPT="--no-mixed")',
required=False)
parser_optional_rematch.add_argument('--debug', action='store_true',
help='DeBug Mode: do not remove temporary files')