diff --git a/amrfinder.cpp b/amrfinder.cpp index f985dee..97c7839 100644 --- a/amrfinder.cpp +++ b/amrfinder.cpp @@ -32,7 +32,11 @@ * Dependencies: NCBI BLAST, HMMer * * Release changes: - +* PD-4548 fasta_check.cpp prohibits '\t' (not any '\'), and all restrictions are only for nucleotide sequences +* 3.11.11 04/13/2023 PD-4566 --hmmer_bin +* 3.11.10 04/12/2023 PD-4548 fasta_check.cpp prohibits ';', '.', '~' in the last position of a sequence identifier +* PD-4548 fasta_check.cpp prohibits: ',,' and '\\' in all positions, '?' in initial position, and ',' in the last position of a sequence identifier +* 3.11.9 04/11/2023 PD-4560 BLAST -mt_mode is used on Mac only for BLAST version >= 2.13.0 * 04/05/2023 PD-4522 blastp -task blastp-fast * 04/05/2023 PD-4548 "-a standard" is added * 3.11.8 04/01/2023 fasta_extract.cpp checks whether all requested identifiers are found in FASTA @@ -304,7 +308,7 @@ struct ThisApplication : ShellApplication addFlag ("report_common", "Report proteins common to a taxonomy group"); // PD-2756 addKey ("mutation_all", "File to report all mutations", "", '\0', "MUT_ALL_FILE"); addKey ("blast_bin", "Directory for BLAST. Deafult: $BLAST_BIN", "", '\0', "BLAST_DIR"); - //addKey ("hmmer_bin" ?? + addKey ("hmmer_bin", "Directory for HMMer", "", '\0', "HMMER_DIR"); addFlag ("report_all_equal", "Report all equally-scoring BLAST and HMM matches"); // PD-3772 addFlag ("print_node", "print hierarchy node (family)"); // PD-4394 addKey ("name", "Text to be added as the first column \"name\" to all rows of the report, for example it can be an assembly name", "", '\0', "NAME"); @@ -379,8 +383,9 @@ struct ThisApplication : ShellApplication bool num_threadsP = false; bool mt_modeP = false; { - exec (fullProg (blast) + " -help > " + tmp + "/blast_help"); - LineInput f (tmp + "/blast_help"); + const string blast_help (tmp + "/blast_help"); + exec (fullProg (blast) + " -help > " + blast_help); + LineInput f (blast_help); while (f. nextLine ()) { trim (f. line); @@ -395,10 +400,37 @@ struct ThisApplication : ShellApplication return string (); string s (" -num_threads " + to_string (t)); -//#ifndef __APPLE__ - if (mt_modeP) + + bool mt_mode_works = true; + #ifdef __APPLE__ + { + mt_mode_works = false; + const string blast_version (tmp + "/blast_version"); + exec (fullProg (blast) + " -version > " + blast_version); + LineInput f (blast_version); + while (f. nextLine ()) + { + trim (f. line); + const string prefix (blast + ": "); + if (isLeft (f. line, prefix)) + { + trimSuffix (f. line, "+"); + Istringstream iss; + iss. reset (f. line. substr (prefix. size ())); + const SoftwareVersion v (iss); + //PRINT (v); + iss. reset ("2.13.0"); // PD-4560 + const SoftwareVersion threshold (iss);; + //PRINT (threshold); + mt_mode_works = (threshold <= v); + } + break; + } + } + #endif + + if (mt_modeP && mt_mode_works) s += " -mt_mode 1"; -//#endif return s; } @@ -466,6 +498,7 @@ struct ThisApplication : ShellApplication const bool report_common = getFlag ("report_common"); const string mutation_all = getArg ("mutation_all"); string blast_bin = getArg ("blast_bin"); + string hmmer_bin = getArg ("hmmer_bin"); const bool equidistant = getFlag ("report_all_equal"); const bool print_node = getFlag ("print_node"); const string input_name = shellQuote (getArg ("name")); @@ -583,6 +616,12 @@ struct ThisApplication : ShellApplication prog2dir ["blastn"] = blast_bin; prog2dir ["makeblastdb"] = blast_bin; } + + if (! hmmer_bin. empty ()) + { + addDirSlash (hmmer_bin); + prog2dir ["hmmsearch"] = hmmer_bin; + } if (update) @@ -605,7 +644,10 @@ struct ThisApplication : ShellApplication string blast_bin_par; if (! blast_bin. empty ()) blast_bin_par = " --blast_bin " + shellQuote (blast_bin); - exec (fullProg ("amrfinder_update") + " -d " + shellQuote (dbDir. getParent ()) + ifS (force_update, " --force_update") + blast_bin_par + string hmmer_bin_par; + if (! hmmer_bin. empty ()) + hmmer_bin_par = " --hmmer_bin " + shellQuote (hmmer_bin); + exec (fullProg ("amrfinder_update") + " -d " + shellQuote (dbDir. getParent ()) + ifS (force_update, " --force_update") + blast_bin_par + hmmer_bin_par + ifS (quiet, " -q") + ifS (qc_on, " --debug") + " > " + logFName, logFName); } else diff --git a/amrfinder_index.cpp b/amrfinder_index.cpp index be63754..3322779 100644 --- a/amrfinder_index.cpp +++ b/amrfinder_index.cpp @@ -64,6 +64,7 @@ struct ThisApplication : ShellApplication { addPositional ("DATABASE", "Directory with AMRFinder database"); addKey ("blast_bin", "Directory for BLAST", "", '\0', "BLAST_DIR"); + addKey ("hmmer_bin", "Directory for HMMer", "", '\0', "HMMER_DIR"); addFlag ("quiet", "Suppress messages to STDERR", 'q'); version = SVN_REV; } @@ -72,14 +73,15 @@ struct ThisApplication : ShellApplication void shellBody () const final { - string dbDir = getArg ("DATABASE"); string blast_bin = getArg ("blast_bin"); + string hmmer_bin = getArg ("hmmer_bin"); const bool quiet = getFlag ("quiet"); addDirSlash (dbDir); addDirSlash (blast_bin); + addDirSlash (hmmer_bin); Stderr stderr (quiet); @@ -89,10 +91,13 @@ struct ThisApplication : ShellApplication if (! directoryExists (dbDir)) throw runtime_error ("Database directory " + dbDir + " does not exist"); - + if (! blast_bin. empty ()) - prog2dir ["makeblastdb"] = blast_bin; + prog2dir ["makeblastdb"] = blast_bin; findProg ("makeblastdb"); + + if (! hmmer_bin. empty ()) + prog2dir ["hmmpress"] = hmmer_bin; findProg ("hmmpress"); @@ -100,7 +105,6 @@ struct ThisApplication : ShellApplication StringVector dnaPointMuts; { LineInput f (dbDir + "taxgroup.tab"); - while (f. nextLine ()) { if (isLeft (f. line, "#")) @@ -117,10 +121,8 @@ struct ThisApplication : ShellApplication stderr << "Indexing" << "\n"; - exec (fullProg ("hmmpress") + " -f " + shellQuote (dbDir + "AMR.LIB") + " > /dev/null 2> " + tmp + "/hmmpress.err", tmp + "/hmmpress.err"); setSymlink (dbDir, tmp + "/db", true); - exec (fullProg ("makeblastdb") + " -in " + tmp + "/db/AMRProt" + " -dbtype prot -logfile " + tmp + "/makeblastdb.AMRProt", tmp + "/makeblastdb.AMRProt"); exec (fullProg ("makeblastdb") + " -in " + tmp + "/db/AMR_CDS" + " -dbtype nucl -logfile " + tmp + "/makeblastdb.AMR_CDS", tmp + "/makeblastdb.AMR_CDS"); for (const string& dnaPointMut : dnaPointMuts) diff --git a/amrfinder_update.cpp b/amrfinder_update.cpp index 6ae20e1..b43b0bb 100644 --- a/amrfinder_update.cpp +++ b/amrfinder_update.cpp @@ -315,6 +315,7 @@ Requirement: the database directory contains subdirectories named by database ve { addKey ("database", "Directory for all versions of AMRFinder databases", "$BASE/data", 'd', "DATABASE_DIR"); addKey ("blast_bin", "Directory for BLAST", "", '\0', "BLAST_DIR"); + addKey ("hmmer_bin", "Directory for HMMer", "", '\0', "HMMER_DIR"); addFlag ("force_update", "Force updating the AMRFinder database"); // PD-3469 addFlag ("quiet", "Suppress messages to STDERR", 'q'); version = SVN_REV; @@ -345,11 +346,13 @@ Requirement: the database directory contains subdirectories named by database ve { const string mainDirOrig = getArg ("database"); string blast_bin = getArg ("blast_bin"); + string hmmer_bin = getArg ("hmmer_bin"); const bool force_update = getFlag ("force_update"); const bool quiet = getFlag ("quiet"); addDirSlash (blast_bin); + addDirSlash (hmmer_bin); Stderr stderr (quiet); @@ -400,23 +403,13 @@ Requirement: the database directory contains subdirectories named by database ve ASSERT (! load_data_version. empty ()); - #if 0 - if (! blast_bin. empty ()) - prog2dir ["makeblastdb"] = blast_bin; - findProg ("makeblastdb"); - findProg ("hmmpress"); - #endif - - // Users's files string mainDirS; { const Dir mainDir (mainDirOrig); mainDirS = mainDir. get (); } - addDirSlash (mainDirS); - const string versionFName ("version.txt"); const string urlDir (URL + load_minor + "/" + load_data_version + "/"); @@ -493,7 +486,10 @@ Requirement: the database directory contains subdirectories named by database ve string blast_bin_par; if (! blast_bin. empty ()) blast_bin_par = " --blast_bin " + shellQuote (blast_bin); - exec (fullProg ("amrfinder_index") + shellQuote (latestDir) + blast_bin_par + ifS (quiet, " -q") + ifS (qc_on, " --debug") + " > " + tmp + "/amrfinder_index.err", tmp + "/amrfinder_index.err"); + string hmmer_bin_par; + if (! hmmer_bin. empty ()) + hmmer_bin_par = " --hmmer_bin " + shellQuote (hmmer_bin); + exec (fullProg ("amrfinder_index") + shellQuote (latestDir) + blast_bin_par + hmmer_bin_par + ifS (quiet, " -q") + ifS (qc_on, " --debug") + " > " + tmp + "/amrfinder_index.err", tmp + "/amrfinder_index.err"); #else stderr << "Indexing" << "\n"; exec (fullProg ("hmmpress") + " -f " + shellQuote (latestDir + "AMR.LIB") + " > /dev/null 2> " + tmp + "/hmmpress.err", tmp + "/hmmpress.err"); diff --git a/common.cpp b/common.cpp index 4adfff1..9e8cfcd 100644 --- a/common.cpp +++ b/common.cpp @@ -312,7 +312,6 @@ void throwf (const string &s) *cxml << s; } throw logic_error (s + "\nStack:\n" + getStack ()); - } @@ -1287,49 +1286,6 @@ void readLine (istream &is, -string getColumn (istream &is, - const string &skip, - const string &delimeters) -{ - // Skipping skip - for (;;) - { - IMPLY (! is. eof (), is. good ()); - const char c = (char) is. get (); - if (is. eof ()) - return noString; - ASSERT (c); - if (charInSet (c, skip)) - continue; - if (charInSet (c, delimeters)) - return string (1, c); - is. unget (); - break; - } - - string token; - for (;;) - { - IMPLY (! is. eof (), is. good ()); - const char c = (char) is. get (); - if ( is. eof () - || charInSet (c, skip) - || charInSet (c, delimeters) - ) - { - is. unget (); - break; - } - ASSERT (c); - token += c; - } - ASSERT (! token. empty ()); - - return token; -} - - - hash str_hash; hash size_hash; @@ -1520,7 +1476,10 @@ Threads::Threads (size_t threadsToStart_arg, threads. reserve (threadsToStart); if (! quiet && verbose (1) && threadsToStart) + { + const OColor c (cerr, Color::green, false, true); // Cf. Progress::report() cerr << "# Threads started: " << threadsToStart + 1/*main thread*/ << endl; + } } @@ -1763,9 +1722,12 @@ size_t Progress::beingUsed = 0; void Progress::report () const { + if (! n) + return; cerr << '\r'; #ifndef _MSC_VER cerr << "\033[2K"; + const OColor c (cerr, Color::green, false, true); // PAR #endif cerr << n; if (n_max) diff --git a/common.hpp b/common.hpp index c20168a..a2daed8 100644 --- a/common.hpp +++ b/common.hpp @@ -304,14 +304,14 @@ inline void section (const string &title, { { const OColor oc1 (cout, color, bright, true); const OColor oc2 (cerr, color, bright, true); - couterr << title << " ..."; + couterr << title /*<< " ..."*/; } couterr << endl; } else { { const OColor oc (cerr, color, bright, true); - cerr << title << " ..."; + cerr << title /*<< " ..."*/; } cerr << endl; } @@ -1404,14 +1404,6 @@ void readLine (istream &is, string &s); // Output: s -string getColumn (istream &is, - const string &skip, - const string &delimeters); - // Return: empty() <=> eof - -inline void pressAnyKey () - { cout << "Press any key..."; char c; cin >> c; } - inline streamsize double2decimals (double r) @@ -1980,7 +1972,6 @@ struct Vector : vector (*this) [index] = value; else throwf ("vector [" + to_string (index) +"] is not empty"); - } void eraseAt (size_t index) { eraseMany (index, index + 1); } @@ -2317,7 +2308,6 @@ struct Vector : vector res. searchSorted = true; return res; } - void setUnion (const Vector &other) { if (P::empty ()) *this = other; diff --git a/fasta_check.cpp b/fasta_check.cpp index 89294e9..0542144 100644 --- a/fasta_check.cpp +++ b/fasta_check.cpp @@ -103,6 +103,19 @@ struct ThisApplication : Application for (const char c : id) if (! printable (c)) throw runtime_error (errorS + "Non-printable character in the sequence identifier: " + to_string ((int) c)); + // BLAST: PD-4548 + if (! aa) + { + if (id. front () == '?') + throw runtime_error (errorS + "Sequence identifier starts with '?'"); + for (const char c : {',', ';', '.', '~'}) + if (id. back () == c) + throw runtime_error (errorS + "Sequence identifier ends with " + strQuote (string (1, c))); + if (contains (id, "\\t")) + throw runtime_error (errorS + "Sequence identifier contains '\\t'"); + if (contains (id, ",,")) + throw runtime_error (errorS + "Sequence identifier contains ',,'"); + } if (! first && seqSize == 0) throw runtime_error (errorS + "Empty sequence"); if (lenF. get () && ! ids. empty ()) diff --git a/fasta_extract.cpp b/fasta_extract.cpp index 2d061a6..8d435de 100644 --- a/fasta_extract.cpp +++ b/fasta_extract.cpp @@ -238,7 +238,9 @@ Line format for nucleotide sequences : =1)> = start)> = stop) throw runtime_error (errorS + "start should be less than stop"); diff --git a/version.txt b/version.txt index e028118..2d4715b 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -3.11.8 \ No newline at end of file +3.11.11