diff --git a/src/common/counts_header.cpp b/src/common/counts_header.cpp index 051eb89e..228602fe 100644 --- a/src/common/counts_header.cpp +++ b/src/common/counts_header.cpp @@ -18,6 +18,8 @@ #include "counts_header.hpp" +#include +#include #include #include #include @@ -30,6 +32,7 @@ #include #include "bamxx.hpp" +#include "dnmt_error.hpp" using std::vector; using std::string; @@ -52,6 +55,20 @@ write_counts_header_from_chrom_sizes(const vector &chrom_names, } +void +write_counts_header_from_file(const string &header_file, bgzf_file &out) { + std::ifstream in(header_file); + if (!in.is_open()) { + throw dnmt_error("failed to open header file: " + header_file); + } + string line; + while(getline(in, line)) { + out.write(line + '\n'); + } + in.close(); +} + + inline bgzf_file & getline(bgzf_file &file, kstring_t &line) { if (file.f == nullptr) return file; diff --git a/src/common/counts_header.hpp b/src/common/counts_header.hpp index bb3f851b..d19d0398 100644 --- a/src/common/counts_header.hpp +++ b/src/common/counts_header.hpp @@ -30,6 +30,10 @@ write_counts_header_from_chrom_sizes(const std::vector &chrom_names const std::vector &chrom_sizes, bamxx::bgzf_file &out); +void +write_counts_header_from_file(const std::string &header_file, + bamxx::bgzf_file &out); + // returns -1 on failure, 0 on success int get_chrom_sizes_for_counts_header(const uint32_t n_threads, diff --git a/src/utils/xcounts.cpp b/src/utils/xcounts.cpp index 372f970e..2c77a49a 100644 --- a/src/utils/xcounts.cpp +++ b/src/utils/xcounts.cpp @@ -85,6 +85,7 @@ main_xcounts(int argc, const char **argv) { bool require_coverage = false; size_t n_threads = 1; string genome_file; + string header_file; string outfile{"-"}; const string description = @@ -99,6 +100,8 @@ main_xcounts(int argc, const char **argv) { false, genome_file); opt_parse.add_opt("reads", 'r', "ouput only sites with reads", false, require_coverage); + opt_parse.add_opt("header", 'h', "use this file to generate header", + false, header_file); opt_parse.add_opt("threads", 't', "threads for compression (use few)", false, n_threads); std::vector leftover_args; @@ -150,7 +153,9 @@ main_xcounts(int argc, const char **argv) { tpool.set_io(out); } - if (!genome_file.empty()) + if (!header_file.empty()) + write_counts_header_from_file(header_file, out); + else if (!genome_file.empty()) write_counts_header_from_chrom_sizes(chrom_names, chrom_sizes, out); // use the kstring_t type to more directly use the BGZF file @@ -163,17 +168,20 @@ main_xcounts(int argc, const char **argv) { uint32_t offset = 0; string prev_chrom; bool status_ok = true; + bool found_header = (!genome_file.empty() || !header_file.empty()); MSite site; while (status_ok && getline(in, line)) { if (is_counts_header_line(line.s)) { - if (!genome_file.empty()) continue; + if (!genome_file.empty() || !header_file.empty()) continue; + found_header = true; const string header_line{line.s}; write_counts_header_line(header_line, out); continue; } + status_ok = site.initialize(line.s, line.s + line.l); - if (!status_ok) break; + if (!status_ok || !found_header) break; if (site.chrom != prev_chrom) { prev_chrom = site.chrom; @@ -196,6 +204,10 @@ main_xcounts(int argc, const char **argv) { << filename << " to " << outfile << endl; return EXIT_FAILURE; } + if (!found_header) { + cerr << "no header provided or found" << endl; + return EXIT_FAILURE; + } } catch (const std::exception &e) { cerr << e.what() << endl;