Skip to content

Commit

Permalink
Add the option to adjust boundary exon length by specifying the quant…
Browse files Browse the repository at this point in the history
…ile of samples to use.
  • Loading branch information
mourisl committed Apr 19, 2021
1 parent 235c2ee commit 1c37f0e
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 6 deletions.
23 changes: 18 additions & 5 deletions CombineSubexons.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@ char usage[] = "combineSubexons [options]\n"
"Required options:\n"
"\t-s STRING: the path to the predicted subexon information. Can use multiple -s to specify multiple subexon prediction files\n"
"\t\tor\n"
"\t--ls STRING: the path to file of the list of the predicted subexon information.\n" ;
"\t--ls STRING: the path to file of the list of the predicted subexon information.\n"
"Optional options:\n"
"\t-q FLOAT: the quantile of samples to determine the extension of subexon soft boundaries. (default: 0.5)"
;

struct _overhang
{
Expand Down Expand Up @@ -526,6 +529,8 @@ int main( int argc, char *argv[] )
Blocks regions ;
Alignments alignments ;

double exonSoftBoundaryMergeQuantile = 0.5 ;

if ( argc == 1 )
{
printf( "%s", usage ) ;
Expand Down Expand Up @@ -557,6 +562,11 @@ int main( int argc, char *argv[] )
files.push_back( fileName ) ;
}
}
else if ( !strcmp( argv[i], "-q" ) )
{
sscanf( argv[i + 1], "%lf", &exonSoftBoundaryMergeQuantile ) ;
++i ;
}
}
int fileCnt = files.size() ;
// Obtain the chromosome ids through bam file.
Expand Down Expand Up @@ -963,8 +973,11 @@ int main( int argc, char *argv[] )
{
if ( j - i >= 3 )
{
rawSubexons[i].end = rawSubexons[ (i + j - 1) / 2 ].start ;
rawSubexons[j - 1].start = rawSubexons[ (i + j - 1) / 2].end ;
double adjustQuantile = exonSoftBoundaryMergeQuantile ;
if ( adjustQuantile > 0.5 )
adjustQuantile = 0.5 ;
rawSubexons[i].end = rawSubexons[ i + ( j - 1 - i ) * adjustQuantile ].start ;
rawSubexons[j - 1].start = rawSubexons[ i + ( j - 1 - i ) * (1 - adjustQuantile) ].end ;
}

if ( rawSubexons[i].end + 1 == rawSubexons[j - 1].start )
Expand All @@ -985,11 +998,11 @@ int main( int argc, char *argv[] )

if ( rawSubexons[i].leftType == 0 && rawSubexons[i].rightType != 0 )
{
rawSubexons[i].start = rawSubexons[ ( i + j - 1 ) / 2 ].start ;
rawSubexons[i].start = rawSubexons[ i + ( j - 1 - i ) * ( 1 - exonSoftBoundaryMergeQuantile ) ].start ;
}
else if ( rawSubexons[i].rightType == 0 && rawSubexons[i].leftType != 0 )
{
rawSubexons[i].end = rawSubexons[ ( i + j - 1 ) / 2 ].end ;
rawSubexons[i].end = rawSubexons[ i + ( j - 1 - i ) * exonSoftBoundaryMergeQuantile ].end ;
}

subexons.push_back( rawSubexons[i] ) ;
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ PsiCLASS depends on [pthreads](http://en.wikipedia.org/wiki/POSIX_Threads) and s
--vd FLOAT : the minimum average coverage depth of a transcript to be reported in voting (defaults: 1.0)
--maxDpConstraintSize: the number of subexons a constraint can cover in DP. (default: 7. -1 for inf)
--primaryParalog: use primary alignment to retain paralog genes (default: use unique alignments)
--tssTesQuantile FLOAT: the quantile for transcription start/end sites in subexon graph (default: 0.5; 1.0 for longest exon margin)
--version: print version and exit
--stage INT: (default: 0)
0-start from the beginning - building the splice site file for each sample
Expand Down
10 changes: 9 additions & 1 deletion psiclass
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ die "Usage: ./psiclass [OPTIONS]\n".
"\t--maxDpConstraintSize: the number of subexons a constraint can cover in DP. (default: 7. -1 for inf)\n".
"\t--bamGroup STRING: path to the file listing the group id of BAMs in the --lb file (default: not used)\n".
"\t--primaryParalog: use primary alignment to retain paralog genes (default: use unique alignments)\n".
"\t--tssTesQuantile FLOAT: the quantile for transcription start/end sites in subexon graph (default: 0.5)\n".
#"\t--mateIdx INT: the read id has suffix such as .1, .2 for a mate pair. (default: auto)\n".
"\t--version: print version and exit\n".
"\t--stage INT: (default: 0)\n".
Expand Down Expand Up @@ -55,6 +56,7 @@ my $stage = 0 ;
my $classesOpt = "" ;
my $juncOpt = "" ;
my $trustSpliceOpt = "" ;
my $combineSubexonsOpt = "" ;
my $voteOpt = "" ;
my $outdir = "." ;
my $mateIdx = -1 ;
Expand Down Expand Up @@ -162,6 +164,12 @@ for ( $i = 0 ; $i < @ARGV ; ++$i )
$classesOpt .= " --maxDpConstraintSize ".$ARGV[$i + 1] ;
++$i ;
}
elsif ( $ARGV[$i] eq "--tssTesQuantile" )
{
# The larger the value, the longer the exon is.
$combineSubexonsOpt .= "-q ".$ARGV[$i + 1] ;
++$i ;
}
elsif ( $ARGV[$i] eq "--version" )
{
die "PsiCLASS v1.0.2\n" ;
Expand Down Expand Up @@ -372,7 +380,7 @@ if ( $stage <= 1 )
# combine the subexons.
if ( $stage <= 2 )
{
$cmd = "$WD/combine-subexons --ls $outdir/subexon/${prefix}subexon.list > $outdir/subexon/${prefix}subexon_combined.out" ;
$cmd = "$WD/combine-subexons --ls $outdir/subexon/${prefix}subexon.list $combineSubexonsOpt > $outdir/subexon/${prefix}subexon_combined.out" ;
system_call( "$cmd" ) ;
}

Expand Down

0 comments on commit 1c37f0e

Please sign in to comment.