Skip to content

Commit

Permalink
Modifications to the TNRunner classes, updated VCFCallFreq to work off
Browse files Browse the repository at this point in the history
the file system, included log4j.properties in the jar to silence error
messages.
  • Loading branch information
DavidAustinNix committed Sep 27, 2018
1 parent 34e4ec6 commit 557d41a
Show file tree
Hide file tree
Showing 14 changed files with 596 additions and 228 deletions.
1 change: 1 addition & 0 deletions .classpath
Original file line number Diff line number Diff line change
Expand Up @@ -70,5 +70,6 @@
<classpathentry kind="lib" path="LibraryJars/json-20140107.jar"/>
<classpathentry kind="lib" path="LibraryJars/commons-codec-1.10.jar"/>
<classpathentry kind="lib" path="LibraryJars/sqljdbc41.jar"/>
<classpathentry combineaccessrules="false" kind="src" path="/Query"/>
<classpathentry kind="output" path="Classes"/>
</classpath>
397 changes: 295 additions & 102 deletions Documentation/USeqDocumentation/cmdLnMenus.html

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ public class BamConcordance {
private double minAFForHis = 0.05;
private int minBaseQuality = 20;
private int minMappingQuality = 20;
private File jsonOutputFile = null;

//internal fields
private File tempDirectory;
Expand All @@ -39,6 +40,8 @@ public class BamConcordance {
private Similarity[] similarities;
private String[] sampleNames = null;
private String bamNames = null;
private String[] similarityForJson = null;
private String[] genderForJson = null;

//internal fields
ConcordanceChunk[] runners;
Expand All @@ -61,6 +64,8 @@ public BamConcordance(String[] args){

printHist();

saveJson();

//finish and calc run time
double diffTime = ((double)(System.currentTimeMillis() -startTime))/60000;
System.out.println("\nDone! "+Math.round(diffTime)+" Min\n");
Expand Down Expand Up @@ -154,20 +159,26 @@ public void printThresholds(){
}

public void printStats(){
similarityForJson = new String[similarities.length];
//calculate max match and sort
for (int i=0; i< similarities.length; i++) similarities[i].calculateMaxMatch();
Arrays.sort(similarities);
System.out.println("\nStats:");
for (int i=0; i< similarities.length; i++) System.out.println(similarities[i].toString(sampleNames));
for (int i=0; i< similarities.length; i++) {
similarityForJson[i] = similarities[i].toString(sampleNames);
System.out.println(similarityForJson[i]);
}
}

public void printGenderRatios(){
genderForJson = new String[afHist.length];
IO.pl("Het/Hom histogram AF count ratios for AllChrs, ChrX, log2(All/X)");
for (int i=0; i< afHist.length; i++){
double allChr = ratioCenterVsLast(afHist[i]);
double chrX = ratioCenterVsLast(chrXAfHist[i]);
double lgrto = Num.log2(allChr/chrX);
IO.pl(sampleNames[i] +"\t"+Num.formatNumber(allChr, 3)+"\t"+Num.formatNumber(chrX, 3)+"\t"+Num.formatNumber(lgrto, 3));
genderForJson[i] = sampleNames[i] +"\t"+Num.formatNumber(allChr, 3)+"\t"+Num.formatNumber(chrX, 3)+"\t"+Num.formatNumber(lgrto, 3);
IO.pl(genderForJson[i]);
}
}

Expand Down Expand Up @@ -208,6 +219,28 @@ public void printHist(){
System.out.println();
}
}

@SuppressWarnings("unchecked")
private void saveJson() {
if (jsonOutputFile == null) return;

try {
String[] names = new String[bamFiles.length];
for (int i=0; i< names.length; i++) names[i] = Misc.removeExtension(bamFiles[i].getName());
//output simple json, DO NOT change the key names without updated downstream apps that read this file!
Gzipper gz = new Gzipper(jsonOutputFile);
gz.println("{");
gz.printJson("bamFileNames", names, true);
gz.printJson("similarities", similarityForJson, true);
gz.printJson("genderChecks", genderForJson, false);
gz.println("}");
gz.close();

} catch (Exception e){
e.printStackTrace();
Misc.printErrAndExit("\nProblem writing json file! "+jsonOutputFile);
}
}


public static void main(String[] args) {
Expand Down Expand Up @@ -241,6 +274,7 @@ public void processArgs(String[] args) throws IOException{
case 'm': minAFForMatch = Double.parseDouble(args[++i]); break;
case 'q': minBaseQuality = Integer.parseInt(args[++i]); break;
case 'u': minMappingQuality = Integer.parseInt(args[++i]); break;
case 'j': jsonOutputFile = new File(args[++i]); break;
case 't': numberThreads = Integer.parseInt(args[++i]); break;
default: Misc.printErrAndExit("\nProblem, unknown option! " + mat.group());
}
Expand All @@ -259,9 +293,10 @@ public void processArgs(String[] args) throws IOException{
if (fasta == null || fasta.canRead() == false) Misc.printErrAndExit("\nError: cannot find your indexed fasta reference file "+fasta);
//check bams
if (bamFiles == null || bamFiles.length == 0) Misc.printErrAndExit("\nError: cannot find your bam files to parse? ");

//threads to use
double gigaBytesAvailable = ((double)Runtime.getRuntime().maxMemory())/ 1073741824.0;
int numPossCores = (int)Math.round(gigaBytesAvailable/5.0);
int numPossCores = (int)Math.round(gigaBytesAvailable/5);
if (numPossCores < 1) numPossCores = 1;
int numPossThreads = Runtime.getRuntime().availableProcessors();
if (numberThreads == 0){
Expand Down Expand Up @@ -291,7 +326,7 @@ public void processArgs(String[] args) throws IOException{
public static void printDocs(){
System.out.println("\n" +
"**************************************************************************************\n" +
"** Bam Concordance: Feb 2018 **\n" +
"** Bam Concordance: Sept 2018 **\n" +
"**************************************************************************************\n" +
"BC calculates sample level concordance based on uncommon homozygous SNVs found in bam\n"+
"files. Samples from the same person will show high similarity (>0.9). Run BC on\n"+
Expand Down Expand Up @@ -321,12 +356,13 @@ public static void printDocs(){
"-m Minimum allele frequency to count a homozygous match, defaults to 0.9\n"+
"-q Minimum base quality, defaults to 20.\n"+
"-u Minimum mapping quality, defaults to 20.\n"+
"-j Write gzipped summary stats in json format to this file.\n"+
"-t Number of threads to use. If not set, determines this based on the number of\n"+
" threads and memory available to the JVM so set the -Xmx value to the max.\n\n"+

"Example: java -Xmx120G -jar pathTo/USeq/Apps/BamConcordance -r ~/exomeTargets.bed\n"+
"Example: java -Xmx100G -jar pathTo/USeq/Apps/BamConcordance -r ~/exomeTargets.bed\n"+
" -s ~/Samtools1.3.1/bin/samtools -b ~/Patient7Bams -d 10 -a 0.9 -m 0.8 -f\n"+
" ~/B37/human_g1k_v37.fasta -c ~/B37/b38ComSnps.bed.gz\n\n" +
" ~/B37/human_g1k_v37.fasta -c ~/B37/b38ComSnps.bed.gz -j bc.json.gz \n\n" +

"**************************************************************************************\n");

Expand Down
16 changes: 14 additions & 2 deletions Source/edu/utah/seq/run/FastqDataset.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;

import util.gen.IO;

/**Info related to a directory of fastq files.*/
public class FastqDataset{
Expand All @@ -10,17 +13,26 @@ public class FastqDataset{
private File[] fastqs = null;
private String name = null;

public FastqDataset (File fastqDir, String name) throws IOException{
public FastqDataset (File fastqDir, String name, ArrayList<String> info) throws IOException{
if (fastqDir != null) {
this.name = name;
File dir = new File(fastqDir, name);
if (dir.exists()) {
fastqDirExists = true;
//ok it exits check that there are two files
fastqs = TNSample.checkNumberFiles(dir, ".gz", 2);
fastqs = checkNumberFiles(dir, ".gz", 2);
if (fastqs == null) info.add("\t\tSkipping "+name+". Failed to find two fastq files within "+fastqDir);
}
}
}

public File[] checkNumberFiles(File dir, String extension, int requiredNumberFiles) throws IOException {
File[] f = IO.extractFiles(dir, extension);
if (f.length != requiredNumberFiles) {
return null;
}
return f;
}

public boolean isFastqDirExists() {
return fastqDirExists;
Expand Down
2 changes: 1 addition & 1 deletion Source/edu/utah/seq/run/TNRunner.java
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ public static void printDocs(){
"\nOptions:\n"+
"-p Directory containing one or more patient data directories to process.\n" +
"-e Workflow docs for launching exome alignments.\n"+
"-t Workflow docs for launching exome alignments.\n"+
"-t Workflow docs for launching transcriptome alignments.\n"+
"-c Workflow docs for launching somatic variant calling.\n"+
"-a Workflow docs for launching variant annotation.\n"+
"-b Workflow docs for launching bam concordance.\n"+
Expand Down
20 changes: 7 additions & 13 deletions Source/edu/utah/seq/run/TNSample.java
Original file line number Diff line number Diff line change
Expand Up @@ -249,13 +249,13 @@ private void removeBamConcordanceLinks(File jobDir) throws IOException{
new File(jobDir, "tumorExome.bai").delete();
}
if (normalExomeBamBedGvcf != null) {
new File(jobDir, "tumorTranscriptome.bam").delete();
new File(jobDir, "tumorTranscriptome.bai").delete();
}
if (tumorTranscriptomeBam != null) {
new File(jobDir, "normalExome.bam").delete();
new File(jobDir, "normalExome.bai").delete();
}
if (tumorTranscriptomeBam != null) {
new File(jobDir, "tumorTranscriptome.bam").delete();
new File(jobDir, "tumorTranscriptome.bai").delete();
}
}

private void annotateSomaticVcf() throws IOException {
Expand Down Expand Up @@ -754,21 +754,15 @@ public static void removeProgressFiles(File alignDir) {
private boolean checkFastq() throws IOException {
info.add("Checking Fastq availability...");
File fastqDir = makeCheckFile(rootDir, "Fastq");
tumorExomeFastq = new FastqDataset(fastqDir, "TumorExome");
normalExomeFastq = new FastqDataset(fastqDir, "NormalExome");
tumorTransFastq = new FastqDataset(fastqDir, "TumorTranscriptome");
tumorExomeFastq = new FastqDataset(fastqDir, "TumorExome", info);
normalExomeFastq = new FastqDataset(fastqDir, "NormalExome", info);
tumorTransFastq = new FastqDataset(fastqDir, "TumorTranscriptome", info);
if (tumorExomeFastq.isFastqDirExists()) info.add("\tTumorExome\t"+ (tumorExomeFastq.getFastqs() != null));
if (normalExomeFastq.isFastqDirExists()) info.add("\tNormalExome\t"+ (normalExomeFastq.getFastqs() != null));
if (tumorTransFastq.isFastqDirExists()) info.add("\tTumorTranscriptome\t"+ (tumorTransFastq.getFastqs() != null));
if (tumorExomeFastq.getFastqs() != null || normalExomeFastq.getFastqs() != null || tumorTransFastq.getFastqs() != null) return true;
return false;
}

public static File[] checkNumberFiles(File dir, String extension, int requiredNumberFiles) throws IOException {
File[] f = IO.extractFiles(dir, extension);
if (f.length != requiredNumberFiles) throw new IOException("Failed to find "+requiredNumberFiles+ " "+extension+ " files in "+dir);
return f;
}

public static File makeCheckFile(File parentDir, String fileName) throws IOException {
File f = new File(parentDir, fileName);
Expand Down
3 changes: 1 addition & 2 deletions Source/edu/utah/seq/vcf/AnnotatedVcfParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -650,7 +650,6 @@ public Histogram makeReadDepthHistogram(){
private void modifySettingsForFoundation() {
if (minimumDP == 0) minimumDP = 50;
if (minimumAF == 0) minimumAF = 0.01;
if (maximumAF == 1) maximumAF = 0.5;
if (maxFracBKAFs == 0) maxFracBKAFs = 0.1;
if (maximumPopAF == 0) maximumPopAF = 0.01;
if (passingAnnImpact == null) {
Expand Down Expand Up @@ -882,7 +881,7 @@ public void processArgs(String[] args){
public static void printDocs(){
IO.pl("\n" +
"**************************************************************************************\n" +
"** Annotated Vcf Parser Aug 2018 **\n" +
"** Annotated Vcf Parser Sept 2018 **\n" +
"**************************************************************************************\n" +
"Splits VCF files that have been annotated with SnpEff w/ dbNSFP and clinvar, plus the\n"+
"VCFBackgroundChecker and VCFSpliceScanner USeq apps into passing and failing records.\n"+
Expand Down
Loading

0 comments on commit 557d41a

Please sign in to comment.