Skip to content

Commit

Permalink
Made mods to filter and flag variant calls from Strelka GATK joint
Browse files Browse the repository at this point in the history
genotyping with "MULTIALLELIC".  Added LoH analysis to TNRunner.
  • Loading branch information
u0028003 committed Nov 22, 2022
1 parent b1ce18d commit a6beb17
Show file tree
Hide file tree
Showing 14 changed files with 427 additions and 97 deletions.
2 changes: 1 addition & 1 deletion .classpath
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,6 @@
<classpathentry kind="lib" path="LibraryJars/picard_2.22.4.jar"/>
<classpathentry kind="lib" path="LibraryJars/GQueryCLI.jar"/>
<classpathentry kind="lib" path="LibraryJars/commons-text-1.9.jar"/>
<classpathentry kind="lib" path="LibraryJars/SubjectMatchMaker_0.1.jar"/>
<classpathentry kind="lib" path="LibraryJars/SubjectMatchMaker_0.3.jar"/>
<classpathentry kind="output" path="Classes"/>
</classpath>
137 changes: 120 additions & 17 deletions Source/edu/utah/seq/cnv/cfdna/LiquidBiopsyCA.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import java.util.regex.*;
import htsjdk.samtools.*;
import htsjdk.samtools.cram.ref.ReferenceSource;
import trans.cel.QuantileNormalization;
import trans.main.WilcoxonSignedRankTest;
import util.bio.annotation.Bed;
import util.gen.*;
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream; //needed by cram
Expand All @@ -21,13 +23,14 @@ public class LiquidBiopsyCA {
//fields
private File bedFile = null;
private LiquidBiopsySample[] samples = null;
private File results;
private File resultsDirectory;
private File fastaFile;
private int minMappingQuality = 13;
private SamReaderFactory samFactory;
private LinkedHashMap<String, ArrayList<Bed>> groupNameBed = null;
private int numCpu = 0;
private ArrayList<LookupJob[]> jobs = new ArrayList<LookupJob[]>();
private int[][] targetSampleCounts = null;
private int totalNumberTargets = 0;

//constructor for stand alone use
Expand Down Expand Up @@ -60,7 +63,7 @@ public void doWork() throws Exception {
makeLookupJobs();

//make and execute loaders
IO.pl("Launching "+numCpu+" loaders\n\t\tJobName\t#IntFragments\t#NonIntFragments");
IO.pl("\nLaunching "+numCpu+" loaders...\n\tJobName\t#IntFragments\t#NonIntFragments");
ExecutorService executor = Executors.newFixedThreadPool(numCpu);
LookupJobRunner[] loaders = new LookupJobRunner[numCpu];
for (int i=0; i< numCpu; i++) {
Expand All @@ -78,11 +81,100 @@ public void doWork() throws Exception {
if (l.isFailed()) throw new IOException("\nERROR: Lookup Loader issue, aborting!");
}

printCountTable();
//saveCountTable();
saveCountsForR();

//normalizeCounts();

//wilcoxonTestPairedData();

}


private void wilcoxonTestPairedData() {
IO.pl("\nTesting each gene for copy differences...");
LookupJob[][] germlines = new LookupJob[samples.length][];
int normIndex = 0;
for (LiquidBiopsySample s: samples) {
IO.pl(s.getSampleName());
LookupJob[] cf = s.getCfJobs();
LookupJob[] norm = s.getGermlineJobs();
germlines[normIndex++] = norm;
for (int i=0; i<cf.length; i++) {
String geneName = cf[i].getRegions().get(0).getName();
IO.pl("\t"+geneName);
IO.pl("\t\tCF : "+Num.floatArrayToString(cf[i].getNormalizedCounts()," "));
IO.pl("\t\tNorm: "+Num.floatArrayToString(norm[i].getNormalizedCounts()," "));
WilcoxonSignedRankTest w = new WilcoxonSignedRankTest(cf[i].getNormalizedCounts(), norm[i].getNormalizedCounts());
IO.pl("\t\tPVal: "+w.getTransformedPValue());

}
}
IO.pl("\nTesting each gene for mock copy differences between germline samples...");
for (int k=0; k< germlines.length; k++) {
LookupJob[] normA = germlines[k];
for (int j=k+1; j< germlines.length; j++) {
LookupJob[] normB = germlines[j];
for (int i=0; i<normA.length; i++) {
String geneName = normA[i].getRegions().get(0).getName();
IO.p("\t"+geneName);
//IO.pl("\t\tCF : "+Num.floatArrayToString(cf[i].getGermlineizedCounts()," "));
//IO.pl("\t\tNorm: "+Num.floatArrayToString(norm[i].getGermlineizedCounts()," "));
WilcoxonSignedRankTest w = new WilcoxonSignedRankTest(normA[i].getNormalizedCounts(), normB[i].getNormalizedCounts());
IO.pl("\tPVal: "+w.getTransformedPValue());

}
}
}

}

private void normalizeCounts() {
IO.pl("\nQuantile germlineizing target counts...");
// TODO Auto-generated method stub
float[][] intensities = new float[samples.length*2][];
int index = 0;
for (LiquidBiopsySample s: samples) {
intensities[index++] = s.getCfCounts(totalNumberTargets);
intensities[index++] = s.getGermlineCounts(totalNumberTargets);
}
QuantileNormalization qn = new QuantileNormalization (intensities);
intensities = qn.getIntensities();
addGermlineizedCountsToLookupJobs(intensities);
//printIntensityStats(intensities);

}

private void printCountTable() {
private void addGermlineizedCountsToLookupJobs(float[][] intensities) {
int index = 0;
//add back to the LookupJob
for (LiquidBiopsySample s: samples) {
LookupJob[] cf = s.getCfJobs();
LookupJob[] norm = s.getGermlineJobs();
float[] normCf = intensities[index++];
float[] normNorm = intensities[index++];
int counter = 0;
for (int i=0; i< cf.length; i++) {
int numTargetsInJob = cf[i].getRegions().size();
int stop = counter+numTargetsInJob;
float[] nCf = Arrays.copyOfRange(normCf, counter, stop);
float[] nNorm = Arrays.copyOfRange(normNorm, counter, stop);
cf[i].setNormalizedCounts(nCf);
norm[i].setNormalizedCounts(nNorm);
counter = stop;
}
}
}

/**Prints statistics about the current intensity float[][] arrays*/
public void printIntensityStats(float[][] intensities){
for (int i=0; i<intensities.length; i++){
Num.statFloatArray(intensities[i], false);
}
}

private void saveCountTable() {
IO.pl("\nSaving count table...");
//create rowNames
String[] rowNames = new String[totalNumberTargets];
int counter = 0;
Expand All @@ -98,11 +190,11 @@ private void printCountTable() {
columnNames[counter++] = "Target:Coordinates";
for (LiquidBiopsySample l: samples) {
columnNames[counter++] = l.getSampleName()+":Somatic";
columnNames[counter++] = l.getSampleName()+":Normal";
columnNames[counter++] = l.getSampleName()+":Germline";
}

//create int[][] table
int[][] targetSampleCounts = new int[totalNumberTargets][samples.length*2];
targetSampleCounts = new int[totalNumberTargets][samples.length*2];

//for each targetBlock
int numGroup = groupNameBed.keySet().size();
Expand All @@ -114,10 +206,10 @@ private void printCountTable() {
for (int s = 0; s< samples.length; s++) {
int blockRowCounter = rowCounter;
LookupJob tumor = samples[s].getCfJobs()[t];
LookupJob normal = samples[s].getNormalJobs()[t];
LookupJob germline = samples[s].getGermlineJobs()[t];
for (int r=0; r< numRowsInBlock; r++) {
targetSampleCounts[blockRowCounter][columnCounter] = tumor.getCounts()[r];
targetSampleCounts[blockRowCounter++][columnCounter+1] = normal.getCounts()[r];
targetSampleCounts[blockRowCounter++][columnCounter+1] = germline.getCounts()[r];
}
columnCounter+=2;
}
Expand Down Expand Up @@ -147,16 +239,27 @@ private void printCountTable() {
sb.append("\n");
}

IO.pl(sb);
//IO.pl(sb);

}

private void saveCountsForR() throws IOException {
IO.pl("\nSaving Counts For R...");
File forR = new File(resultsDirectory, "countsForR.txt");
PrintWriter out = new PrintWriter( new FileWriter(forR));
for (LiquidBiopsySample s: samples) {
s.addCfCounts(out);
s.addGermlineCounts(out);
}
out.close();
}

private void closeSamReaders() throws IOException {
for (LiquidBiopsySample s: samples) {
s.getCfReader().close();
s.getNormalReader().close();
s.getGermlineReader().close();
s.getCfBedOut().close();
s.getNormalBedOut().close();
s.getGermlineBedOut().close();
}
}

Expand All @@ -168,7 +271,7 @@ public synchronized LookupJob[] fetchNextJob() {
private void makeLookupJobs() throws IOException {
for (LiquidBiopsySample s: samples) {
jobs.add(s.buildCFJobs(groupNameBed, samFactory, minMappingQuality));
jobs.add(s.buildNormalJobs(groupNameBed, samFactory, minMappingQuality));
jobs.add(s.buildGermlineJobs(groupNameBed, samFactory, minMappingQuality));
}

}
Expand Down Expand Up @@ -217,7 +320,7 @@ public void processArgs(String[] args) throws Exception{
case 's': forExtraction = new File(args[++i]).getCanonicalFile(); break;
case 'b': bedFile = new File(args[++i]); break;
case 'r': fastaFile = new File(args[++i]); break;
case 'o': results = new File(args[++i]); break;
case 'o': resultsDirectory = new File(args[++i]); break;
case 'm': minMappingQuality = Integer.parseInt(args[++i]); break;
case 'p': numCpu = Integer.parseInt(args[++i]); break;
case 'h': printDocs(); System.exit(0);
Expand All @@ -234,12 +337,12 @@ public void processArgs(String[] args) throws Exception{
if (forExtraction == null || forExtraction.exists() == false || forExtraction.isDirectory()== false) Misc.printErrAndExit("\nError: please enter a path to directory containing one or more sample specific sub directories.\n");
File[] sampleDirectories = IO.extractOnlyDirectories(forExtraction);
samples = new LiquidBiopsySample[sampleDirectories.length];
for (int i=0; i< samples.length; i++) samples[i] = new LiquidBiopsySample(sampleDirectories[i], results);
for (int i=0; i< samples.length; i++) samples[i] = new LiquidBiopsySample(sampleDirectories[i], resultsDirectory);

//if (fastaFile == null || fastaFile.canRead() == false) Misc.printErrAndExit("\nError: please provide an reference genome fasta file and it's index.");
if (bedFile == null || bedFile.canRead() == false) Misc.printErrAndExit("\nError: please provide a file of regions in bed format.");
if (results == null ) Misc.printErrAndExit("\nError: please provide a directory for saving the output results");
results.mkdirs();
if (resultsDirectory == null ) Misc.printErrAndExit("\nError: please provide a directory for saving the output results");
resultsDirectory.mkdirs();

//number of workers
int numProc = Runtime.getRuntime().availableProcessors();
Expand All @@ -257,7 +360,7 @@ public static void printDocs(){
"\n"+

"\nRequired Options:\n"+
"-s Path to a directory containing sample specific sub directories with two aligment files representing the cfDNA and matched normal cram or bam files with their indexes. The normal file name should contain the 'norm' string.\n"+
"-s Path to a directory containing sample specific sub directories with two aligment files representing the cfDNA and matched germline cram or bam files with their indexes. The germline file name should contain the 'norm' string.\n"+
"-b Path to a bed file of non overlapping regions to estimate copy ratio differences, xxx.bed.gz/.zip OK. The name column will be used to group each region into a larger area for merged copy ratio differences.\n"+
"-r Path to the reference fasta with xxx.fai index used for alignment.\n"+
"-o Path to the output results directory.\n"+
Expand Down
Loading

0 comments on commit a6beb17

Please sign in to comment.