Skip to content

Commit

Permalink
Add proteinaln2nucl
Browse files Browse the repository at this point in the history
  • Loading branch information
martin-steinegger committed Mar 3, 2018
1 parent 7e0f936 commit 2db9ad4
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/CommandDeclarations.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ extern int lca(int argc, const char **argv, const Command& command);
extern int profile2pssm(int argc, const char **argv, const Command& command);
extern int profile2cs(int argc, const char **argv, const Command& command);
extern int offsetalignment(int argc, const char **argv, const Command& command);
extern int proteinaln2nucl(int argc, const char **argv, const Command& command);
extern int apply(int argc, const char **argv, const Command& command);
extern int shellcompletion(int argc, const char **argv, const Command& command);

Expand Down
6 changes: 6 additions & 0 deletions src/mmseqs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,12 @@ std::vector<struct Command> commands = {
"Martin Steinegger <[email protected]>",
"<i:queryDB> <i:targetDB> <i:alnDB> <o:alnDB>",
CITATION_MMSEQS2},
{"proteinaln2nucl", proteinaln2nucl, &par.onlythreads, COMMAND_DB,
"Map protein alignment to nucleotide alignment",
NULL,
"Martin Steinegger <[email protected]> ",
"<i:alnDB> <o:alnDB>",
CITATION_MMSEQS2},
{"tsv2db", tsv2db, &par.tsv2db, COMMAND_DB,
"Turns a TSV file into a MMseqs database",
NULL,
Expand Down
1 change: 1 addition & 0 deletions src/util/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,6 @@ set(util_source_files
util/swapresults.cpp
util/translatenucs.cpp
util/tsv2db.cpp
util/proteinaln2nucl.cpp
PARENT_SCOPE
)
99 changes: 99 additions & 0 deletions src/util/proteinaln2nucl.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
//
// Created by mad on 11/28/17.
//
#include <string>
#include <vector>
#include <fstream>
#include <FileUtil.h>
#include <Orf.h>

#include "Alignment.h"
#include "Util.h"
#include "Parameters.h"
#include "Matcher.h"
#include "PrefilteringIndexReader.h"

#include "Debug.h"
#include "DBReader.h"
#include "DBWriter.h"

#ifdef OPENMP
#include <omp.h>
#endif


int proteinaln2nucl(int argc, const char **argv, const Command &command) {
Parameters &par = Parameters::getInstance();
par.parseParameters(argc, argv, command, 2);

Debug(Debug::INFO) << "Alignment database: " << par.db1 << "\n";
DBReader<unsigned int> alnDbr(par.db1.c_str(), par.db1Index.c_str());
alnDbr.open(DBReader<unsigned int>::LINEAR_ACCCESS);

DBWriter resultWriter(par.db2.c_str(), par.db2Index.c_str(), par.threads);
resultWriter.open();
Debug(Debug::INFO) << "Start writing file to " << par.db2 << "\n";

#pragma omp parallel for schedule(dynamic, 10)
for (size_t i = 0; i < alnDbr.getSize(); i++) {
unsigned int thread_idx = 0;
#ifdef OPENMP
thread_idx = static_cast<unsigned int>(omp_get_thread_num());
#endif

unsigned int alnKey = alnDbr.getDbKey(i);
char *data = alnDbr.getData(i);

std::string querySeq;

char buffer[1024];
std::string ss;
ss.reserve(1024);


std::vector<Matcher::result_t> results = Matcher::readAlignmentResults(data, true);
for (size_t j = 0; j < results.size(); j++) {
Matcher::result_t &res = results[j];
bool hasBacktrace = (res.backtrace.size() > 0);
std::string newBacktrace;
newBacktrace.reserve(1024);
res.dbStartPos = res.dbStartPos*3;
res.dbEndPos = res.dbEndPos*3;
res.dbLen = res.dbLen*3;
res.qStartPos = res.qStartPos*3;
res.qEndPos = res.qEndPos*3;
res.qLen = res.qLen*3;
for (size_t pos = 0; pos < res.backtrace.size(); pos++) {
int cnt=0;
if(isdigit(res.backtrace[pos])){
cnt += Util::fast_atoi<int>(res.backtrace.c_str()+pos);
while(isdigit(res.backtrace[pos])){
pos++;
}
}
switch(res.backtrace[pos]){
case 'M':
case 'D':
case 'I':
newBacktrace.append(std::string(cnt*3, res.backtrace[pos]));
break;

}

}
res.backtrace = newBacktrace;

size_t len = Matcher::resultToBuffer(buffer, res, hasBacktrace, true);
ss.append(buffer, len);
}

resultWriter.writeData(ss.c_str(), ss.length(), alnKey, thread_idx);
ss.clear();
}
alnDbr.close();
resultWriter.close();
Debug(Debug::INFO) << "Done." << "\n";

return EXIT_SUCCESS;
}

0 comments on commit 2db9ad4

Please sign in to comment.