Skip to content

Commit

Permalink
Added in INDEL variant scoring into the LoH app.
Browse files Browse the repository at this point in the history
  • Loading branch information
u0028003 committed Oct 7, 2022
1 parent 076a7eb commit b8fbfe2
Show file tree
Hide file tree
Showing 6 changed files with 171 additions and 125 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,12 @@

import java.io.File;
import java.io.IOException;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import edu.utah.seq.query.QueryIndexFileLoader;
import edu.utah.seq.vcf.VCFBkz;
import htsjdk.tribble.readers.TabixReader;
import util.bio.annotation.Bed;
import util.bio.annotation.Coordinate;
import util.gen.IO;
import util.gen.Misc;
import util.gen.Num;

/**Pulls bpileup lines that overlap vcf records.*/
public class BamPileupTabixLoaderSingle {
Expand Down Expand Up @@ -94,7 +85,7 @@ else if (allele == 'I') {


/**Return GATC or ID for indels*/
private static char fetchAllele(String[] fields) throws IOException {
public static char fetchAllele(String[] fields) throws IOException {
if (fields[4].contains(",") || fields[4].startsWith("<")) throw new IOException("Cannot interpret multi alts or those with <, deconvolute? "+Misc.stringArrayToString(fields, "\t"));
//#CHROM POS ID REF ALT
int lenRef = fields[3].length();
Expand Down
4 changes: 4 additions & 0 deletions Source/edu/utah/seq/parsers/jpileup/BaseCount.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ public double getPassingReadCoverage() {
public double getPassingReadCoverageSnv() {
return g+a+t+c;
}
public double getPassingReadCoverageIndel(boolean isD) {
if (isD) return g+a+t+c+del;
return g+a+t+c+ins;
}
public double getTotalReadCoverage() {
return g+a+t+c+ins+del+n+failQual;
}
Expand Down
44 changes: 22 additions & 22 deletions Source/edu/utah/seq/vcf/loh/HetWindow.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,25 +9,25 @@ public class HetWindow {
//these pvals have been -10Log10(pval) transformed!
private float pvalue;
private float adjPvalue;
private HeterozygousSnv[] hetSnvs; //pvalues in these have not been transformed
private HeterozygousVar[] hetVars; //pvalues in these have not been transformed
private float meanAfSomatic = -1f;
private float meanAfGermline = -1f;
private boolean passesThresholds = false;

public HetWindow (float pvalue, HeterozygousSnv[] hetSnvs) {
public HetWindow (float pvalue, HeterozygousVar[] hetVars) {
this.pvalue = pvalue;
this.hetSnvs = hetSnvs;
this.hetVars = hetVars;
}

public String toStringBedFormat(int bpPadding) throws IOException {
StringBuilder sb = new StringBuilder();
//chr,start,stop,name,score
addCoordinatesRegion(bpPadding, sb, true, "\t");
sb.append("\t");
//Name #Snvs_MeanDiff_-10Log10(AdjPval)
//Name #Vars_MeanDiff_-10Log10(AdjPval)
String p = Num.formatNumberNoComma(adjPvalue, 1);
String diff = Num.formatNumber(getMeanAfDiff(), 3);
sb.append("LoH_"+hetSnvs.length+"_"+diff+"_"+p);
sb.append("LoH_"+hetVars.length+"_"+diff+"_"+p);
sb.append("\t");
//score
p = Num.formatNumberNoComma(pvalue, 0);
Expand All @@ -43,7 +43,7 @@ public String toStringVcfFormat(int bpPadding) throws IOException {
StringBuilder sb = new StringBuilder();
sb.append("LoHBlock=");
addCoordinatesRegion(bpPadding, sb, false, "-"); sb.append(",");
sb.append(hetSnvs.length); sb.append(",");
sb.append(hetVars.length); sb.append(",");
String diff = Num.formatNumber(getMeanAfDiff(), 3);
sb.append(diff); sb.append(",");
String p = Num.formatNumberNoComma(pvalue, 1);
Expand All @@ -54,17 +54,17 @@ public String toStringVcfFormat(int bpPadding) throws IOException {

//for each variant
StringBuilder vcfs = new StringBuilder();
for (int i=0; i< hetSnvs.length; i++) {
for (int i=0; i< hetVars.length; i++) {
//#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLES
// 0 1 2 3 4 5 6 7 8 9+
String[] vcfFields = hetSnvs[i].getVcfRecord();
String[] vcfFields = hetVars[i].getVcfRecord();
for (int x=0; x<7; x++) {
vcfs.append(vcfFields[x]);
vcfs.append("\t");
}
//add the info fields
vcfs.append(blockInfo);
vcfs.append(hetSnvs[i].toStringVcf());
vcfs.append(hetVars[i].toStringVcf());
vcfs.append(vcfFields[7]);

//add the rest
Expand All @@ -86,9 +86,9 @@ public String toString() {
addCoordinates(sb);
sb.append("\n\tMeanAFDiff "+diff +" ("+meanAfSomatic+"-"+meanAfGermline+")");
sb.append("\n\t-10Log10(combPVal) "+pvalue+ "\t-10Log10(adjP) "+adjPvalue);
sb.append("\nLoH block snvs:\n");
for (int i=0; i< hetSnvs.length; i++) {
sb.append(hetSnvs[i]);
sb.append("\nLoH block vars:\n");
for (int i=0; i< hetVars.length; i++) {
sb.append(hetVars[i]);
sb.append("\n");
}

Expand All @@ -99,20 +99,20 @@ public String toString() {
}

private void addCoordinates(StringBuilder sb) {
String firstPos = hetSnvs[0].getVcfRecord()[1];
String lastPos = hetSnvs[hetSnvs.length-1].getVcfRecord()[1];
sb.append(hetSnvs[0].getVcfRecord()[0]); //chrom
String firstPos = hetVars[0].getVcfRecord()[1];
String lastPos = hetVars[hetVars.length-1].getVcfRecord()[1];
sb.append(hetVars[0].getVcfRecord()[0]); //chrom
sb.append(":");
sb.append(firstPos);
sb.append("-");
sb.append(lastPos);
}

private void addCoordinatesRegion(int bpPadding, StringBuilder sb, boolean includeChr, String delimiter) {
Integer firstPos = hetSnvs[0].getPosition()- bpPadding;
Integer lastPos = hetSnvs[hetSnvs.length-1].getPosition()+bpPadding;
Integer firstPos = hetVars[0].getPosition()- bpPadding;
Integer lastPos = hetVars[hetVars.length-1].getPosition()+bpPadding;
if (includeChr) {
sb.append(hetSnvs[0].getVcfRecord()[0]); //chrom
sb.append(hetVars[0].getVcfRecord()[0]); //chrom
sb.append(delimiter);
}

Expand All @@ -133,11 +133,11 @@ public float getMeanAfDiff() throws IOException {
if (meanAfSomatic != -1) return meanAfSomatic - meanAfGermline;
double somAfSum = 0;
double germAfSum = 0;
for (HeterozygousSnv h: hetSnvs) {
for (HeterozygousVar h: hetVars) {
somAfSum += h.getAlleleFractionSomatic();
germAfSum += h.getAlleleFractionGermline();
}
double count = hetSnvs.length;
double count = hetVars.length;
meanAfSomatic = (float)(somAfSum/count);
meanAfGermline = (float)(germAfSum/count);
return meanAfSomatic - meanAfGermline;
Expand All @@ -151,8 +151,8 @@ public void setAdjTransPvalue(float adjPvalue) {
this.adjPvalue = adjPvalue;
}

public HeterozygousSnv[] getHetSnvs() {
return hetSnvs;
public HeterozygousVar[] getHetVars() {
return hetVars;
}

public HetWindow compare(HetWindow other) throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,17 @@
import java.util.ArrayList;
import edu.utah.seq.parsers.jpileup.BaseCount;
import edu.utah.seq.parsers.jpileup.BpileupLine;
import edu.utah.seq.parsers.jpileup.BamPileupTabixLoaderSingle;
import util.gen.IO;
import util.gen.Misc;
import util.gen.Num;

public class HeterozygousSnv {
public class HeterozygousVar {

//#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLES
private String[] vcfRecord = null;
private boolean isSnv;
private boolean isDeletion;
private BpileupLine germlineBP = null;
private BpileupLine somaticBP = null;
private int[] somAltRefGermAltRef = null;
Expand All @@ -20,29 +25,48 @@ public class HeterozygousSnv {
private double germlineAf;
private HetWindow bestHetWindow = null;

public HeterozygousSnv (String[] vcfRecord) {
public HeterozygousVar (String[] vcfRecord) throws IOException {
this.vcfRecord = vcfRecord;
position = Integer.parseInt(vcfRecord[1]);
//figure out the type
char allele = BamPileupTabixLoaderSingle.fetchAllele(vcfRecord);
if (allele == 'D') {
isDeletion = true;
isSnv = false;
}
else if (allele == 'I') {
isDeletion = false;
isSnv = false;
}
else isSnv = true;
}

public String[] getVcfRecord() {
return vcfRecord;
}

public void setGermlineBPs(ArrayList<BpileupLine> data, int minimumDepth) throws IOException {
if (data.size() != 1) passing = false;
if (data== null || data.size() == 0) passing = false;
else {
germlineBP = data.get(0);
int dp = (int)Math.round(germlineBP.getSamples()[0].getPassingReadCoverageSnv());
int dp = -1;
//snv?
if (isSnv) dp = (int)Math.round(germlineBP.getSamples()[0].getPassingReadCoverageSnv());
//indel
else dp = (int)Math.round(germlineBP.getSamples()[0].getPassingReadCoverageIndel(isDeletion));
if (dp < minimumDepth) passing = false;
}
}

public void setSomaticBPs(ArrayList<BpileupLine> data, int minimumDepth) throws IOException {
if (data.size() != 1) passing = false;
if (data== null || data.size() == 0) passing = false;
else {
somaticBP = data.get(0);
int dp = (int)Math.round(somaticBP.getSamples()[0].getPassingReadCoverageSnv());
int dp = -1;
//snv?
if (isSnv) dp = (int)Math.round(germlineBP.getSamples()[0].getPassingReadCoverageSnv());
//indel
else dp = (int)Math.round(germlineBP.getSamples()[0].getPassingReadCoverageIndel(isDeletion));
if (dp < minimumDepth) passing = false;
}
}
Expand Down Expand Up @@ -73,7 +97,7 @@ public String toString() {
fetchSomAltRefGermAltRefCounts();
sb.append("\tsom "+somAltRefGermAltRef[0]+"/"+somAltRefGermAltRef[1]+" "+somaticAf+"\n");
sb.append("\tger "+somAltRefGermAltRef[2]+"/"+somAltRefGermAltRef[3]+" "+germlineAf+"\n");
sb.append("\t-10Log10(snvPVal) "+Num.minus10log10Float(pvalue)+ "\tafDiff " +getAlleleFractionDifference());
sb.append("\t-10Log10(varPVal) "+Num.minus10log10Float(pvalue)+ "\tafDiff " +getAlleleFractionDifference());
if(bestHetWindow!=null) {
sb.append("\n");
sb.append("\t\tmeanAFs Somatic "+bestHetWindow.getMeanAfSomatic()+"\tGermline "+bestHetWindow.getMeanAfGermline()+"\tDiff "+bestHetWindow.getMeanAfDiff());
Expand All @@ -90,22 +114,42 @@ public String toString() {

public int[] fetchSomAltRefGermAltRefCounts() throws IOException {
if (somAltRefGermAltRef != null) return somAltRefGermAltRef;
char alt = vcfRecord[4].charAt(0);
char ref = vcfRecord[3].charAt(0);
int somAlt;
int somRef;
int germAlt;
int germRef;
BaseCount somBC = somaticBP.getSamples()[0];
int somAlt = somBC.getSnvCount(alt);
int somRef = somBC.getSnvCount(ref);
BaseCount germBC = germlineBP.getSamples()[0];
int germAlt = germBC.getSnvCount(alt);
int germRef = germBC.getSnvCount(ref);
somAltRefGermAltRef = new int[] {somAlt, somRef, germAlt, germRef};

if (isSnv) {
char alt = vcfRecord[4].charAt(0);
char ref = vcfRecord[3].charAt(0);
somAlt = somBC.getSnvCount(alt);
somRef = somBC.getSnvCount(ref);
germAlt = germBC.getSnvCount(alt);
germRef = germBC.getSnvCount(ref);

}
else {
somRef = (int)somBC.getPassingReadCoverageSnv(); //don't want to include any Ds
germRef = (int)germBC.getPassingReadCoverageSnv();
if (isDeletion) {
somAlt = somBC.getIndelCount('D');
germAlt = germBC.getIndelCount('D');
}
else {
somAlt = somBC.getIndelCount('I');
germAlt = germBC.getIndelCount('I');
}
}

//set AFs
somAltRefGermAltRef = new int[] {somAlt, somRef, germAlt, germRef};
double total = somAlt+somRef;
somaticAf = (double)somAlt/ total;
total = germAlt+germRef;
germlineAf = (double)germAlt/ total;

return somAltRefGermAltRef;
}

Expand Down
Loading

0 comments on commit b8fbfe2

Please sign in to comment.