Skip to content

Commit

Permalink
Merge pull request #26 from splicebox/stranded
Browse files Browse the repository at this point in the history
Add the option --stranded to handle strand-specific RNA-seq library
  • Loading branch information
mourisl authored May 7, 2022
2 parents b26696f + d322427 commit bcebe77
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 3 deletions.
37 changes: 36 additions & 1 deletion FindJunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ bool flagStrict ;
int junctionCnt ;
bool anchorBoth ;
bool validRead ;
int strandedLib ; // 0-unstranded, 1-rf, 2-fr

int flank ;
struct _cigarSeg
Expand All @@ -88,6 +89,7 @@ void PrintHelp()
"\t-j xx [-B]: Output the junctions using 1-based coordinates. The format is \"reference id\" start end \"# of read\" strand.(They are sorted)\n and the xx is an integer means the maximum unqualified anchor length for a splice junction(default=8). If -B, the splice junction must be supported by a read whose both anchors are longer than xx.\n"
"\t-a: Output all the junctions, and use non-positive support number to indicate unqualified junctions.\n"
"\t-y: If the bits from YS field of bam matches the argument, we filter the alignment (default: 4).\n"
"\t--stranded un/rf/fr: stranded library fr-firststrand/secondstrand (default: not set).\n"
) ;
}

Expand Down Expand Up @@ -741,7 +743,19 @@ void cigar2string( bam1_core_t *c, uint32_t *in_cigar, char *out_cigar )
}
}


char GetStrandFromStrandedLib(int flag)
{
if (strandedLib == 0)
return '?' ;
if ((flag & 0x80) == 0) // first read or single-end case
{
return (((flag >> 4) & 1) ^ (strandedLib & 1)) ? '-' : '+' ;
}
else
{
return (((flag >> 4) & 1) ^ (strandedLib & 1)) ? '+' : '-' ;
}
}

int main( int argc, char *argv[] )
{
Expand All @@ -768,6 +782,7 @@ int main( int argc, char *argv[] )
flagPrintJunction = true ;
flank = 8 ;
filterYS = 4 ;
strandedLib = 0 ;

contradictedReads = NULL ;

Expand Down Expand Up @@ -814,6 +829,16 @@ int main( int argc, char *argv[] )
hasMateReadIdSuffix = true ;
++i ;
}
else if ( !strcmp( argv[i], "--stranded" ) )
{
if ( !strcmp(argv[i + 1], "un") )
strandedLib = 0 ;
else if ( !strcmp(argv[i + 1], "rf") )
strandedLib = 1 ;
else if ( !strcmp(argv[i + 1], "fr" ) )
strandedLib = 2 ;
++i ;
}
else if ( i > 1 )
{
printf( "Unknown option %s\n", argv[i] ) ;
Expand Down Expand Up @@ -1021,6 +1046,11 @@ int main( int argc, char *argv[] )
noncanonStrandInfo = -1 ;
}
}
else if ( strandedLib != 0 )
{
strand = GetStrandFromStrandedLib(flag) ;
noncanonStrandInfo = -1 ;
}
else
{
strand = '?' ;
Expand All @@ -1034,6 +1064,11 @@ int main( int argc, char *argv[] )
strand = '-' ;
else if ( strstr( line, "XS:A:+" ) )
strand = '+' ;
else if ( strandedLib != 0)
{
strand = GetStrandFromStrandedLib(flag) ;
noncanonStrandInfo = -1 ;
}
else
{
strand = '?' ;
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ PsiCLASS depends on [pthreads](http://en.wikipedia.org/wiki/POSIX_Threads) and s
-c FLOAT: only use the subexons with classifier score <= than the given number. (default: 0.05)
--sa FLOAT: the minimum average number of supported read for retained introns (default: 0.5)
--vd FLOAT : the minimum average coverage depth of a transcript to be reported in voting (defaults: 1.0)
--stranded STRING: un/rf/fr for library unstranded/fr-firstand/fr-secondstrand (default: not used)
--maxDpConstraintSize: the number of subexons a constraint can cover in DP. (default: 7. -1 for inf)
--primaryParalog: use primary alignment to retain paralog genes (default: use unique alignments)
--tssTesQuantile FLOAT: the quantile for transcription start/end sites in subexon graph (default: 0.5; 1.0 for longest exon margin)
Expand Down
23 changes: 22 additions & 1 deletion alignments.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class Alignments
bool allowClip ;
bool hasClipHead, hasClipTail ;
int segmentsSum ; // the sum of segments.
int strandedLib ; // 0-unstranded, 1-rf, 2-fr

bool atBegin ;
bool atEnd ;
Expand Down Expand Up @@ -77,7 +78,10 @@ class Alignments
fragStdev = 0 ;
readLen = 0 ;
matePaired = false ;

strandedLib = 0 ;
}

~Alignments()
{
if ( b )
Expand Down Expand Up @@ -134,6 +138,11 @@ class Alignments
return hasClipTail ;
}

void SetStrandedLib(int libtype)
{
strandedLib = libtype ;
}

int Next()
{
int i ;
Expand Down Expand Up @@ -393,7 +402,7 @@ class Alignments
// -1:minus, 0: unknown, 1:plus
int GetStrand()
{
if ( segCnt == 1 )
if ( segCnt == 1 && strandedLib == 0)
return 0 ;
if ( bam_aux_get( b, "XS" ) )
{
Expand All @@ -402,6 +411,18 @@ class Alignments
else
return 1 ;
}
else if (strandedLib != 0)
{
int flag = b->core.flag ;
if ((flag & 0x80) == 0) // first read or single-end case
{
return (((flag >> 4) & 1) ^ (strandedLib & 1)) ? -1 : 1 ;
}
else
{
return (((flag >> 4) & 1) ^ (strandedLib & 1)) ? 1 : -1 ;
}
}
else
return 0 ;
}
Expand Down
17 changes: 17 additions & 0 deletions classes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ char usage[] = "./classes [OPTIONS]:\n"
"\t-f FLOAT: filter the transcript from the gene if its abundance is lower than the given number percent of the most abundant one. (default: 0.05)\n"
"\t-d FLOAT: filter the transcript whose average read depth is less than the given number. (default: 2.5)\n"
"\t--ls STRING: path to the file of the list of single-sample subexon files. (default: not used)\n"
"\t--stranded STRING: un/rf/fr for library unstranded/fr-firstand/fr-secondstrand (default: not used)\n"
"\t--hasMateIdSuffix: the read id has suffix such as .1, .2 for a mate pair. (default: false)\n"
"\t--maxDpConstraintSize: the maximum number of subexons a constraint can cover in dynamic programming. (default: 7; -1 for inf)\n"
"\t--primaryParalog: use primary alignment to retain paralog genes instead of unique alignments. (default: not used)\n"
Expand All @@ -35,6 +36,7 @@ static struct option long_options[] =
{ "hasMateIdSuffix", no_argument, 0, 10002 },
{ "primaryParalog", no_argument, 0, 10003 },
{ "maxDpConstraintSize", required_argument, 0, 10004 },
{ "stranded", required_argument, 0, 10005 },
{ (char *)0, 0, 0, 0}
} ;

Expand Down Expand Up @@ -114,6 +116,7 @@ int main( int argc, char *argv[] )
bool hasMateReadIdSuffix = false ;
bool usePrimaryAsUnique = false ;
int maxDpConstraintSize = 7 ;
int strandedLib = 0 ;

std::vector<Alignments> alignmentFiles ;
SubexonCorrelation subexonCorrelation ;
Expand Down Expand Up @@ -191,6 +194,13 @@ int main( int argc, char *argv[] )
{
maxDpConstraintSize = atoi(optarg) ;
}
else if ( c == 10005 ) // stranded
{
if (!strcmp(optarg, "rf"))
strandedLib = 1 ;
else if (!strcmp(optarg, "fr"))
strandedLib = 2 ;
}
else
{
printf( "%s", usage ) ;
Expand All @@ -208,6 +218,13 @@ int main( int argc, char *argv[] )
exit( 1 ) ;
}

if (strandedLib != 0)
{
size = alignmentFiles.size() ;
for ( i = 0 ; i < size ; ++i )
alignmentFiles[i].SetStrandedLib(strandedLib) ;
}


if ( alignmentFiles.size() < 50 )
{
Expand Down
9 changes: 8 additions & 1 deletion psiclass
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ die "Usage: ./psiclass [OPTIONS]\n".
"\t-c FLOAT: only use the subexons with classifier score <= than the given number (default: 0.05)\n".
"\t--sa FLOAT: the minimum average number of supported read for retained introns (default: 0.5)\n".
"\t--vd FLOAT : the minimum average coverage depth of a transcript to be reported (defaults: 1.0)\n".
"\t--stranded STRING: un/rf/fr for library unstranded/fr-firstand/fr-secondstrand (default: not used)\n".
"\t--maxDpConstraintSize: the number of subexons a constraint can cover in DP. (default: 7. -1 for inf)\n".
"\t--bamGroup STRING: path to the file listing the group id of BAMs in the --lb file (default: not used)\n".
"\t--primaryParalog: use primary alignment to retain paralog genes (default: use unique alignments)\n".
Expand Down Expand Up @@ -170,9 +171,15 @@ for ( $i = 0 ; $i < @ARGV ; ++$i )
$combineSubexonsOpt .= "-q ".$ARGV[$i + 1] ;
++$i ;
}
elsif ( $ARGV[$i] eq "--stranded" )
{
$juncOpt .= " --stranded ".$ARGV[$i + 1] ;
$classesOpt .= " --stranded ".$ARGV[$i + 1] ;
++$i ;
}
elsif ( $ARGV[$i] eq "--version" )
{
die "PsiCLASS v1.0.2\n" ;
die "PsiCLASS v1.0.3\n" ;
}
else
{
Expand Down

0 comments on commit bcebe77

Please sign in to comment.