Skip to content

Commit

Permalink
Merge pull request #117 from ncbi/dev
Browse files Browse the repository at this point in the history
Release 3.11.11
  • Loading branch information
evolarjun authored Apr 18, 2023
2 parents 2bc24b6 + fa70613 commit 1961989
Show file tree
Hide file tree
Showing 9 changed files with 92 additions and 85 deletions.
58 changes: 50 additions & 8 deletions amrfinder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,11 @@
* Dependencies: NCBI BLAST, HMMer
*
* Release changes:
* PD-4548 fasta_check.cpp prohibits '\t' (not any '\'), and all restrictions are only for nucleotide sequences
* 3.11.11 04/13/2023 PD-4566 --hmmer_bin
* 3.11.10 04/12/2023 PD-4548 fasta_check.cpp prohibits ';', '.', '~' in the last position of a sequence identifier
* PD-4548 fasta_check.cpp prohibits: ',,' and '\\' in all positions, '?' in initial position, and ',' in the last position of a sequence identifier
* 3.11.9 04/11/2023 PD-4560 BLAST -mt_mode is used on Mac only for BLAST version >= 2.13.0
* 04/05/2023 PD-4522 blastp -task blastp-fast
* 04/05/2023 PD-4548 "-a standard" is added
* 3.11.8 04/01/2023 fasta_extract.cpp checks whether all requested identifiers are found in FASTA
Expand Down Expand Up @@ -304,7 +308,7 @@ struct ThisApplication : ShellApplication
addFlag ("report_common", "Report proteins common to a taxonomy group"); // PD-2756
addKey ("mutation_all", "File to report all mutations", "", '\0', "MUT_ALL_FILE");
addKey ("blast_bin", "Directory for BLAST. Deafult: $BLAST_BIN", "", '\0', "BLAST_DIR");
//addKey ("hmmer_bin" ??
addKey ("hmmer_bin", "Directory for HMMer", "", '\0', "HMMER_DIR");
addFlag ("report_all_equal", "Report all equally-scoring BLAST and HMM matches"); // PD-3772
addFlag ("print_node", "print hierarchy node (family)"); // PD-4394
addKey ("name", "Text to be added as the first column \"name\" to all rows of the report, for example it can be an assembly name", "", '\0', "NAME");
Expand Down Expand Up @@ -379,8 +383,9 @@ struct ThisApplication : ShellApplication
bool num_threadsP = false;
bool mt_modeP = false;
{
exec (fullProg (blast) + " -help > " + tmp + "/blast_help");
LineInput f (tmp + "/blast_help");
const string blast_help (tmp + "/blast_help");
exec (fullProg (blast) + " -help > " + blast_help);
LineInput f (blast_help);
while (f. nextLine ())
{
trim (f. line);
Expand All @@ -395,10 +400,37 @@ struct ThisApplication : ShellApplication
return string ();

string s (" -num_threads " + to_string (t));
//#ifndef __APPLE__
if (mt_modeP)

bool mt_mode_works = true;
#ifdef __APPLE__
{
mt_mode_works = false;
const string blast_version (tmp + "/blast_version");
exec (fullProg (blast) + " -version > " + blast_version);
LineInput f (blast_version);
while (f. nextLine ())
{
trim (f. line);
const string prefix (blast + ": ");
if (isLeft (f. line, prefix))
{
trimSuffix (f. line, "+");
Istringstream iss;
iss. reset (f. line. substr (prefix. size ()));
const SoftwareVersion v (iss);
//PRINT (v);
iss. reset ("2.13.0"); // PD-4560
const SoftwareVersion threshold (iss);;
//PRINT (threshold);
mt_mode_works = (threshold <= v);
}
break;
}
}
#endif

if (mt_modeP && mt_mode_works)
s += " -mt_mode 1";
//#endif

return s;
}
Expand Down Expand Up @@ -466,6 +498,7 @@ struct ThisApplication : ShellApplication
const bool report_common = getFlag ("report_common");
const string mutation_all = getArg ("mutation_all");
string blast_bin = getArg ("blast_bin");
string hmmer_bin = getArg ("hmmer_bin");
const bool equidistant = getFlag ("report_all_equal");
const bool print_node = getFlag ("print_node");
const string input_name = shellQuote (getArg ("name"));
Expand Down Expand Up @@ -583,6 +616,12 @@ struct ThisApplication : ShellApplication
prog2dir ["blastn"] = blast_bin;
prog2dir ["makeblastdb"] = blast_bin;
}

if (! hmmer_bin. empty ())
{
addDirSlash (hmmer_bin);
prog2dir ["hmmsearch"] = hmmer_bin;
}


if (update)
Expand All @@ -605,7 +644,10 @@ struct ThisApplication : ShellApplication
string blast_bin_par;
if (! blast_bin. empty ())
blast_bin_par = " --blast_bin " + shellQuote (blast_bin);
exec (fullProg ("amrfinder_update") + " -d " + shellQuote (dbDir. getParent ()) + ifS (force_update, " --force_update") + blast_bin_par
string hmmer_bin_par;
if (! hmmer_bin. empty ())
hmmer_bin_par = " --hmmer_bin " + shellQuote (hmmer_bin);
exec (fullProg ("amrfinder_update") + " -d " + shellQuote (dbDir. getParent ()) + ifS (force_update, " --force_update") + blast_bin_par + hmmer_bin_par
+ ifS (quiet, " -q") + ifS (qc_on, " --debug") + " > " + logFName, logFName);
}
else
Expand Down
14 changes: 8 additions & 6 deletions amrfinder_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ struct ThisApplication : ShellApplication
{
addPositional ("DATABASE", "Directory with AMRFinder database");
addKey ("blast_bin", "Directory for BLAST", "", '\0', "BLAST_DIR");
addKey ("hmmer_bin", "Directory for HMMer", "", '\0', "HMMER_DIR");
addFlag ("quiet", "Suppress messages to STDERR", 'q');
version = SVN_REV;
}
Expand All @@ -72,14 +73,15 @@ struct ThisApplication : ShellApplication

void shellBody () const final
{

string dbDir = getArg ("DATABASE");
string blast_bin = getArg ("blast_bin");
string hmmer_bin = getArg ("hmmer_bin");
const bool quiet = getFlag ("quiet");


addDirSlash (dbDir);
addDirSlash (blast_bin);
addDirSlash (hmmer_bin);


Stderr stderr (quiet);
Expand All @@ -89,18 +91,20 @@ struct ThisApplication : ShellApplication

if (! directoryExists (dbDir))
throw runtime_error ("Database directory " + dbDir + " does not exist");

if (! blast_bin. empty ())
prog2dir ["makeblastdb"] = blast_bin;
prog2dir ["makeblastdb"] = blast_bin;
findProg ("makeblastdb");

if (! hmmer_bin. empty ())
prog2dir ["hmmpress"] = hmmer_bin;
findProg ("hmmpress");


// Cf. amrfinder_update.cpp
StringVector dnaPointMuts;
{
LineInput f (dbDir + "taxgroup.tab");

while (f. nextLine ())
{
if (isLeft (f. line, "#"))
Expand All @@ -117,10 +121,8 @@ struct ThisApplication : ShellApplication


stderr << "Indexing" << "\n";

exec (fullProg ("hmmpress") + " -f " + shellQuote (dbDir + "AMR.LIB") + " > /dev/null 2> " + tmp + "/hmmpress.err", tmp + "/hmmpress.err");
setSymlink (dbDir, tmp + "/db", true);

exec (fullProg ("makeblastdb") + " -in " + tmp + "/db/AMRProt" + " -dbtype prot -logfile " + tmp + "/makeblastdb.AMRProt", tmp + "/makeblastdb.AMRProt");
exec (fullProg ("makeblastdb") + " -in " + tmp + "/db/AMR_CDS" + " -dbtype nucl -logfile " + tmp + "/makeblastdb.AMR_CDS", tmp + "/makeblastdb.AMR_CDS");
for (const string& dnaPointMut : dnaPointMuts)
Expand Down
18 changes: 7 additions & 11 deletions amrfinder_update.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,7 @@ Requirement: the database directory contains subdirectories named by database ve
{
addKey ("database", "Directory for all versions of AMRFinder databases", "$BASE/data", 'd', "DATABASE_DIR");
addKey ("blast_bin", "Directory for BLAST", "", '\0', "BLAST_DIR");
addKey ("hmmer_bin", "Directory for HMMer", "", '\0', "HMMER_DIR");
addFlag ("force_update", "Force updating the AMRFinder database"); // PD-3469
addFlag ("quiet", "Suppress messages to STDERR", 'q');
version = SVN_REV;
Expand Down Expand Up @@ -345,11 +346,13 @@ Requirement: the database directory contains subdirectories named by database ve
{
const string mainDirOrig = getArg ("database");
string blast_bin = getArg ("blast_bin");
string hmmer_bin = getArg ("hmmer_bin");
const bool force_update = getFlag ("force_update");
const bool quiet = getFlag ("quiet");


addDirSlash (blast_bin);
addDirSlash (hmmer_bin);


Stderr stderr (quiet);
Expand Down Expand Up @@ -400,23 +403,13 @@ Requirement: the database directory contains subdirectories named by database ve
ASSERT (! load_data_version. empty ());


#if 0
if (! blast_bin. empty ())
prog2dir ["makeblastdb"] = blast_bin;
findProg ("makeblastdb");
findProg ("hmmpress");
#endif


// Users's files
string mainDirS;
{
const Dir mainDir (mainDirOrig);
mainDirS = mainDir. get ();
}

addDirSlash (mainDirS);


const string versionFName ("version.txt");
const string urlDir (URL + load_minor + "/" + load_data_version + "/");
Expand Down Expand Up @@ -493,7 +486,10 @@ Requirement: the database directory contains subdirectories named by database ve
string blast_bin_par;
if (! blast_bin. empty ())
blast_bin_par = " --blast_bin " + shellQuote (blast_bin);
exec (fullProg ("amrfinder_index") + shellQuote (latestDir) + blast_bin_par + ifS (quiet, " -q") + ifS (qc_on, " --debug") + " > " + tmp + "/amrfinder_index.err", tmp + "/amrfinder_index.err");
string hmmer_bin_par;
if (! hmmer_bin. empty ())
hmmer_bin_par = " --hmmer_bin " + shellQuote (hmmer_bin);
exec (fullProg ("amrfinder_index") + shellQuote (latestDir) + blast_bin_par + hmmer_bin_par + ifS (quiet, " -q") + ifS (qc_on, " --debug") + " > " + tmp + "/amrfinder_index.err", tmp + "/amrfinder_index.err");
#else
stderr << "Indexing" << "\n";
exec (fullProg ("hmmpress") + " -f " + shellQuote (latestDir + "AMR.LIB") + " > /dev/null 2> " + tmp + "/hmmpress.err", tmp + "/hmmpress.err");
Expand Down
50 changes: 6 additions & 44 deletions common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,6 @@ void throwf (const string &s)
*cxml << s;
}
throw logic_error (s + "\nStack:\n" + getStack ());

}


Expand Down Expand Up @@ -1287,49 +1286,6 @@ void readLine (istream &is,



string getColumn (istream &is,
const string &skip,
const string &delimeters)
{
// Skipping skip
for (;;)
{
IMPLY (! is. eof (), is. good ());
const char c = (char) is. get ();
if (is. eof ())
return noString;
ASSERT (c);
if (charInSet (c, skip))
continue;
if (charInSet (c, delimeters))
return string (1, c);
is. unget ();
break;
}

string token;
for (;;)
{
IMPLY (! is. eof (), is. good ());
const char c = (char) is. get ();
if ( is. eof ()
|| charInSet (c, skip)
|| charInSet (c, delimeters)
)
{
is. unget ();
break;
}
ASSERT (c);
token += c;
}
ASSERT (! token. empty ());

return token;
}




hash<string> str_hash;
hash<size_t> size_hash;
Expand Down Expand Up @@ -1520,7 +1476,10 @@ Threads::Threads (size_t threadsToStart_arg,
threads. reserve (threadsToStart);

if (! quiet && verbose (1) && threadsToStart)
{
const OColor c (cerr, Color::green, false, true); // Cf. Progress::report()
cerr << "# Threads started: " << threadsToStart + 1/*main thread*/ << endl;
}
}


Expand Down Expand Up @@ -1763,9 +1722,12 @@ size_t Progress::beingUsed = 0;

void Progress::report () const
{
if (! n)
return;
cerr << '\r';
#ifndef _MSC_VER
cerr << "\033[2K";
const OColor c (cerr, Color::green, false, true); // PAR
#endif
cerr << n;
if (n_max)
Expand Down
14 changes: 2 additions & 12 deletions common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -304,14 +304,14 @@ inline void section (const string &title,
{
{ const OColor oc1 (cout, color, bright, true);
const OColor oc2 (cerr, color, bright, true);
couterr << title << " ...";
couterr << title /*<< " ..."*/;
}
couterr << endl;
}
else
{
{ const OColor oc (cerr, color, bright, true);
cerr << title << " ...";
cerr << title /*<< " ..."*/;
}
cerr << endl;
}
Expand Down Expand Up @@ -1404,14 +1404,6 @@ void readLine (istream &is,
string &s);
// Output: s

string getColumn (istream &is,
const string &skip,
const string &delimeters);
// Return: empty() <=> eof

inline void pressAnyKey ()
{ cout << "Press any key..."; char c; cin >> c; }



inline streamsize double2decimals (double r)
Expand Down Expand Up @@ -1980,7 +1972,6 @@ struct Vector : vector<T>
(*this) [index] = value;
else
throwf ("vector [" + to_string (index) +"] is not empty");

}
void eraseAt (size_t index)
{ eraseMany (index, index + 1); }
Expand Down Expand Up @@ -2317,7 +2308,6 @@ struct Vector : vector<T>
res. searchSorted = true;
return res;
}

void setUnion (const Vector<T> &other)
{ if (P::empty ())
*this = other;
Expand Down
13 changes: 13 additions & 0 deletions fasta_check.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,19 @@ struct ThisApplication : Application
for (const char c : id)
if (! printable (c))
throw runtime_error (errorS + "Non-printable character in the sequence identifier: " + to_string ((int) c));
// BLAST: PD-4548
if (! aa)
{
if (id. front () == '?')
throw runtime_error (errorS + "Sequence identifier starts with '?'");
for (const char c : {',', ';', '.', '~'})
if (id. back () == c)
throw runtime_error (errorS + "Sequence identifier ends with " + strQuote (string (1, c)));
if (contains (id, "\\t"))
throw runtime_error (errorS + "Sequence identifier contains '\\t'");
if (contains (id, ",,"))
throw runtime_error (errorS + "Sequence identifier contains ',,'");
}
if (! first && seqSize == 0)
throw runtime_error (errorS + "Empty sequence");
if (lenF. get () && ! ids. empty ())
Expand Down
4 changes: 3 additions & 1 deletion fasta_extract.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,9 @@ Line format for nucleotide sequences : <id> <start (>=1)> <stop (>= start)> <str
}
processed += process (id, seq, id2segments);
}
QC_ASSERT (processed == id2segments. size ()); // It is assumed that there are no duplicate identifiers in FASTA
if (processed != id2segments. size ())
throw runtime_error ("Requested identifiers: " + to_string (id2segments. size ()) + ", but processed: " + to_string (processed));
// Assumed: no duplicate identifiers in FASTA
}
};

Expand Down
Loading

0 comments on commit 1961989

Please sign in to comment.