Skip to content

Commit

Permalink
Merge pull request #252 from smithlabcode/explicit-zip-in-xcounts
Browse files Browse the repository at this point in the history
Adding option to force gzip output for xcounts
  • Loading branch information
andrewdavidsmith authored Nov 19, 2024
2 parents 6df5926 + f57abfe commit 85d3dce
Showing 1 changed file with 28 additions and 23 deletions.
51 changes: 28 additions & 23 deletions src/utils/xcounts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@

#include <bamxx.hpp>

#include <charconv>
#include <iostream>
#include <stdexcept>
#include <string>
#include <vector>
#include <charconv>
#include <system_error>
#include <vector>

// from smithlab_cpp
#include "OptionParser.hpp"
Expand All @@ -40,13 +40,12 @@ using std::endl;
using std::runtime_error;
using std::string;
using std::to_chars;
using std::vector;
using std::to_string;
using std::vector;

using bamxx::bgzf_file;


template<typename T>
template <typename T>
static inline uint32_t
fill_output_buffer(const uint32_t offset, const MSite &s, T &buf) {
auto buf_end = buf.data() + buf.size();
Expand All @@ -59,11 +58,11 @@ fill_output_buffer(const uint32_t offset, const MSite &s, T &buf) {
return std::distance(buf.data(), res.ptr);
}


int
main_xcounts(int argc, const char **argv) {
try {
bool verbose = false;
bool gzip_output = false;
bool require_coverage = false;
size_t n_threads = 1;
string genome_file;
Expand All @@ -78,14 +77,17 @@ main_xcounts(int argc, const char **argv) {
"<counts-file> (\"-\" for standard input)", 1);
opt_parse.add_opt("output", 'o', "output file (default is standard out)",
false, outfile);
opt_parse.add_opt("chroms", 'c', "make header from this reference",
false, genome_file);
opt_parse.add_opt("reads", 'r', "ouput only sites with reads",
false, require_coverage);
opt_parse.add_opt("header", 'h', "use this file to generate header",
false, header_file);
opt_parse.add_opt("chroms", 'c', "make header from this reference", false,
genome_file);
opt_parse.add_opt("reads", 'r', "ouput only sites with reads", false,
require_coverage);
opt_parse.add_opt("header", 'h', "use this file to generate header", false,
header_file);
opt_parse.add_opt("threads", 't', "threads for compression (use few)",
false, n_threads);
opt_parse.add_opt("zip", 'z',
"gzip compress output (automatic if input is gzip)",
false, gzip_output);
opt_parse.add_opt("verbose", 'v', "print more run info", false, verbose);
std::vector<string> leftover_args;
opt_parse.parse(argc, argv, leftover_args);
Expand All @@ -112,21 +114,22 @@ main_xcounts(int argc, const char **argv) {
vector<string> chrom_names;
vector<uint64_t> chrom_sizes;
if (!genome_file.empty()) {
const int ret =
get_chrom_sizes_for_counts_header(n_threads, genome_file,
chrom_names, chrom_sizes);
const int ret = get_chrom_sizes_for_counts_header(
n_threads, genome_file, chrom_names, chrom_sizes);
if (ret)
throw dnmt_error{"failed to get chrom sizes from: " + genome_file};
}

bamxx::bam_tpool tpool(n_threads);
bgzf_file in(filename, "r");
if (!in) throw dnmt_error{"could not open file: " + filename};
if (!in)
throw dnmt_error{"could not open file: " + filename};

const auto outfile_mode = in.is_compressed() ? "w" : "wu";
const auto outfile_mode = (gzip_output || in.is_compressed()) ? "w" : "wu";

bgzf_file out(outfile, outfile_mode);
if (!out) throw dnmt_error{"error opening output file: " + outfile};
if (!out)
throw dnmt_error{"error opening output file: " + outfile};

if (n_threads > 1) {
if (in.is_bgzf())
Expand All @@ -142,7 +145,8 @@ main_xcounts(int argc, const char **argv) {
// use the kstring_t type to more directly use the BGZF file
kstring_t line{0, 0, nullptr};
const int ret = ks_resize(&line, 1024);
if (ret) throw dnmt_error("failed to acquire buffer");
if (ret)
throw dnmt_error("failed to acquire buffer");

vector<char> buf(128);

Expand All @@ -154,15 +158,17 @@ main_xcounts(int argc, const char **argv) {
MSite site;
while (status_ok && bamxx::getline(in, line)) {
if (is_counts_header_line(line.s)) {
if (!genome_file.empty() || !header_file.empty()) continue;
if (!genome_file.empty() || !header_file.empty())
continue;
found_header = true;
const string header_line{line.s};
write_counts_header_line(header_line, out);
continue;
}

status_ok = site.initialize(line.s, line.s + line.l);
if (!status_ok || !found_header) break;
if (!status_ok || !found_header)
break;

if (site.chrom != prev_chrom) {
if (verbose)
Expand All @@ -183,8 +189,7 @@ main_xcounts(int argc, const char **argv) {
ks_free(&line);

if (!status_ok) {
cerr << "failed converting "
<< filename << " to " << outfile << endl;
cerr << "failed converting " << filename << " to " << outfile << endl;
return EXIT_FAILURE;
}
if (!found_header) {
Expand Down

0 comments on commit 85d3dce

Please sign in to comment.