Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: added per-read anchor requirement to junction extract #4

Open
wants to merge 1 commit into
base: 189/td/flag-and-quality-filtering
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions src/cis-splice-effects/cis_splice_effects_identifier.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ void CisSpliceEffectsIdentifier::usage(ostream& out) {
out << "\t\t" << "-t STR\tTag used in bam to label strand. [XS]" << endl;
out << "\t\t" << "-a INT\tMinimum anchor length. Junctions which satisfy a minimum \n"
<< "\t\t\t " << "anchor length on both sides are reported. [8]" << endl;
out << "\t\t" << "-A INT\tMinimum read anchor length. Reads which satisfy a minimum \n"
<< "\t\t\t " << "anchor length on both sides 'support' a junction. [0]" << endl;
out << "\t\t" << "-m INT\tMinimum intron length. [70]" << endl;
out << "\t\t" << "-M INT\tMaximum intron length. [500000]" << endl;
out << "\t\t" << "-f INT\tOnly use alignments where all flag bits set here are set. [0]" << endl;
Expand Down Expand Up @@ -116,7 +118,7 @@ void CisSpliceEffectsIdentifier::parse_options(int argc, char* argv[]) {
optind = 1; //Reset before parsing again.
stringstream help_ss;
char c;
while((c = getopt(argc, argv, "o:w:v:j:e:Ei:ISht:s:a:m:M:f:F:q:b:C")) != -1) {
while((c = getopt(argc, argv, "o:w:v:j:e:Ei:ISht:s:a:A:m:M:f:F:q:b:C")) != -1) {
switch(c) {
case 'o':
output_file_ = string(optarg);
Expand Down Expand Up @@ -167,6 +169,9 @@ void CisSpliceEffectsIdentifier::parse_options(int argc, char* argv[]) {
case 'a':
min_anchor_length_ = atoi(optarg);
break;
case 'A':
min_read_anchor_length_ = atoi(optarg);
break;
case 'm':
min_intron_length_ = atoi(optarg);
break;
Expand Down Expand Up @@ -298,7 +303,8 @@ void CisSpliceEffectsIdentifier::identify() {
ref_to_pass = "NA";
}
JunctionsExtractor je1(bam_, variant_region, strandness_,
strand_tag_, min_anchor_length_, min_intron_length_, max_intron_length_,
strand_tag_, min_anchor_length_, min_read_anchor_length_,
min_intron_length_, max_intron_length_,
filter_flags_, require_flags_, min_map_qual_, ref_to_pass);
je1.identify_junctions_from_BAM();
vector<Junction> junctions = je1.get_all_junctions();
Expand Down
6 changes: 4 additions & 2 deletions src/cis-splice-effects/cis_splice_effects_identifier.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,10 @@ class CisSpliceEffectsIdentifier {
//tag used in BAM to denote strand, default "XS"
string strand_tag_;
//Minimum anchor length for junctions
//Junctions need at least this many bp overlap
// on both ends.
//Junctions need at least this many bp overlap on both ends.
uint32_t min_anchor_length_;
//Reads need at least this many bp overlap on both ends to support junction.
uint32_t min_read_anchor_length_;
//Minimum length of an intron, i.e min junction width
uint32_t min_intron_length_;
//Maximum length of an intron, i.e max junction width
Expand Down Expand Up @@ -118,6 +119,7 @@ class CisSpliceEffectsIdentifier {
strandness_(-1),
strand_tag_("XS"),
min_anchor_length_(8),
min_read_anchor_length_(0),
min_intron_length_(70),
max_intron_length_(500000),
filter_flags_(0),
Expand Down
17 changes: 15 additions & 2 deletions src/junctions/junctions_extractor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,17 @@ int JunctionsExtractor::parse_options(int argc, char *argv[]) {
optind = 1; //Reset before parsing again.
int c;
stringstream help_ss;
while((c = getopt(argc, argv, "ha:m:M:f:F:q:o:r:t:s:b:")) != -1) {
while((c = getopt(argc, argv, "ha:A:m:M:f:F:q:o:r:t:s:b:")) != -1) {
switch(c) {
case 'h':
usage(help_ss);
throw common::cmdline_help_exception(help_ss.str());
case 'a':
min_anchor_length_ = atoi(optarg);
break;
case 'A':
min_read_anchor_length_ = atoi(optarg);
break;
case 'm':
min_intron_length_ = atoi(optarg);
break;
Expand Down Expand Up @@ -123,6 +126,7 @@ int JunctionsExtractor::parse_options(int argc, char *argv[]) {
}

cerr << "Minimum junction anchor length: " << min_anchor_length_ << endl;
cerr << "Minimum read anchor length: " << min_read_anchor_length_ << endl;
cerr << "Minimum intron length: " << min_intron_length_ << endl;
cerr << "Maximum intron length: " << max_intron_length_ << endl;
cerr << "Require alignment flags: " << require_flags_ << endl;
Expand All @@ -144,6 +148,8 @@ int JunctionsExtractor::usage(ostream& out) {
out << "Options:" << endl;
out << "\t\t" << "-a INT\tMinimum anchor length. Junctions which satisfy a minimum \n"
<< "\t\t\t " << "anchor length on both sides are reported. [8]" << endl;
out << "\t\t" << "-A INT\tMinimum read anchor length. Reads which satisfy a minimum \n"
<< "\t\t\t " << "anchor length on both sides 'support' a junction. [0]" << endl;
out << "\t\t" << "-m INT\tMinimum intron length. [70]" << endl;
out << "\t\t" << "-M INT\tMaximum intron length. [500000]" << endl;
out << "\t\t" << "-f INT\tOnly use alignments where all flag bits set here are set. [0]" << endl;
Expand Down Expand Up @@ -175,12 +181,19 @@ string JunctionsExtractor::get_new_junction_name() {
return name_ss.str();
}

//Do some basic qc on the junction
//Update if junction passes QC based on current read alignment
bool JunctionsExtractor::junction_qc(Junction &j1) {
// don't add support for junction if intron is wrong size
if(j1.end - j1.start < min_intron_length_ ||
j1.end - j1.start > max_intron_length_) {
return false;
}

// don't add support for junction if read isn't sufficiently anchored
if(j1.start - j1.thick_start < min_read_anchor_length_) return false;
if(j1.thick_end - j1.end < min_read_anchor_length_) return false;

// add support, update if this junction is sufficiently anchored
if(j1.start - j1.thick_start >= min_anchor_length_)
j1.has_left_min_anchor = true;
if(j1.thick_end - j1.end >= min_anchor_length_)
Expand Down
10 changes: 7 additions & 3 deletions src/junctions/junctions_extractor.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,10 @@ class JunctionsExtractor {
//Reference FASTA file
string ref_;
//Minimum anchor length for junctions
//Junctions need atleast this many bp overlap
// on both ends.
//Junctions need at least this many bp overlap on both ends.
uint32_t min_anchor_length_;
//Reads need at least this many bp overlap to support a junction
uint32_t min_read_anchor_length_;
//Minimum length of an intron, i.e min junction width
uint32_t min_intron_length_;
//Maximum length of an intron, i.e max junction width
Expand Down Expand Up @@ -190,6 +191,7 @@ class JunctionsExtractor {
//Default constructor
JunctionsExtractor() {
min_anchor_length_ = 8;
min_read_anchor_length_ = 0;
min_intron_length_ = 70;
max_intron_length_ = 500000;
filter_flags_ = 0;
Expand All @@ -211,6 +213,7 @@ class JunctionsExtractor {
int strandness1,
string strand_tag1,
uint32_t min_anchor_length1,
uint32_t min_read_anchor_length1,
uint32_t min_intron_length1,
uint32_t max_intron_length1,
uint16_t filter_flags,
Expand All @@ -222,7 +225,8 @@ class JunctionsExtractor {
strandness_(strandness1),
strand_tag_(strand_tag1),
min_anchor_length_(min_anchor_length1),
min_intron_length_(min_anchor_length1),
min_read_anchor_length_(min_read_anchor_length1),
min_intron_length_(min_intron_length1),
max_intron_length_(max_intron_length1),
filter_flags_(filter_flags),
require_flags_(require_flags),
Expand Down