Skip to content

Commit

Permalink
Final fixes to the new TempusVcfComparator
Browse files Browse the repository at this point in the history
  • Loading branch information
DavidAustinNix committed Mar 19, 2019
1 parent f6072c9 commit ca040d4
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 91 deletions.
3 changes: 2 additions & 1 deletion Source/edu/utah/seq/vcf/json/TempusJson2Vcf.java
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,8 @@ public static void printDocs(){
"**************************************************************************************\n" +
"Parses json Tempus reports to vcf. Leave in PHI to enable calculating age at\n"+
"diagnosis. Summary statistics calculated for all reports. Vcfs will contain a mix of \n"+
"somatic and inherited snvs, indels, and cnvs.\n"+
"somatic and inherited snvs, indels, and cnvs. Be sure to vt normalize the exported\n"+
"vcfs, https://github.com/atks/vt \n"+

"\nOptions:\n"+
"-j Path to Tempus json report or directory containing such, xxx.json(.gz/.zip OK)\n"+
Expand Down
117 changes: 29 additions & 88 deletions Source/edu/utah/seq/vcf/json/TempusVcfComparator.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ public class TempusVcfComparator {
private SimpleVcf[] fVcfs;
private SimpleVcf[] rVcfs;
private int bpPaddingForOverlap = 2;
private boolean noModifyTempus = true;
private boolean appendChr = false;
private boolean excludeInherited = false;

Expand All @@ -35,6 +34,7 @@ public class TempusVcfComparator {
private int numberModifiedTempusCalls = 0;
private int numberTempusWithNoMatch = 0;
private int numberPassingRecallWithNoMatch = 0;
private int numberInherited = 0;

private ArrayList<SimpleVcf> vcfToPrint = new ArrayList<SimpleVcf>();
private ArrayList<String> headerLines = new ArrayList<String>();
Expand Down Expand Up @@ -78,10 +78,10 @@ private void printStats() {
System.out.println( numberRecall +"\t# Recall variants");
System.out.println( numberShortTempus +"\t# Short Tempus variants");
System.out.println( numberOtherTempus +"\t# Other Tempus variants");
System.out.println( numberInherited +"\t# Inherited Tempus variants, skippped? "+excludeInherited);
System.out.println( numberExactMatches +"\t# Short with an exact match");
System.out.println( numberTempusWithOnlyOverlap +"\t# Short with overlap recal variants");
if (noModifyTempus) System.out.println( numberModifiedTempusCalls +"\t# Short recommended for modification");
else System.out.println( numberModifiedTempusCalls +"\t# Short modified using overlapping recal variant info");
System.out.println( numberModifiedTempusCalls +"\t# Short recommended for modification");
System.out.println( numberTempusWithNoMatch +"\t# Short with no match");
System.out.println( numberPassingRecallWithNoMatch +"\t# Passing recall variants with no Short match");
}
Expand Down Expand Up @@ -114,14 +114,11 @@ private void processRecallVcfs() {
for (SimpleVcf r:rVcfs){
//print it?
if (r.isPrint() && r.getFilter().toLowerCase().contains("fail") == false) {
//mark Filter NR not reported
r.appendFilter("NR");
vcfToPrint.add(r);
numberPassingRecallWithNoMatch++;
if (r.getMatch() == null) numberPassingRecallWithNoMatch++;
}
}
}


/**Merges header lines eliminating duplicates. Does a bad ID name collision checking, silently keeps first one.
* Returns null if CHROM lines differ. */
Expand Down Expand Up @@ -164,11 +161,6 @@ else if (other.contains(h) == false) {

//add in filter lines
filter.add(SimpleVcf.ncFilter);
filter.add(SimpleVcf.nrFilter);
filter.add(SimpleVcf.mdFilter);

//add info lines
info.add(SimpleVcf.infoRAF);

//remove ID dups from contig, filter, format, info
ArrayList<String> contigAL = VCFParser.mergeHeaderIds(contig);
Expand All @@ -188,8 +180,6 @@ else if (other.contains(h) == false) {
return Misc.stringArrayListToStringArray(lines);
}



private void processTempusVcfs() {
//for each Tempus record
for (SimpleVcf f: fVcfs){
Expand All @@ -198,82 +188,34 @@ private void processTempusVcfs() {
if (f.isShortVariant() == false) {
vcfToPrint.add(f);
numberOtherTempus++;
continue;
}

numberShortTempus++;

//exact match?
if (f.getMatch() != null) {
//exact match then just print it
f.appendRAF(f.getMatch());
f.appendID(f.getMatch());
vcfToPrint.add(f);
numberExactMatches++;
f.getMatch().setPrint(false);
continue;
}

//So no exact match any overlap?
if (f.getOverlap().size()!=0){
//always print the tempus vcf record with a NC FILTER field, not confirmed.
//question is what to do about the overlapping records? print with NR FILTER field, not reported by tempus?
numberTempusWithOnlyOverlap++;

//more than one overlap? print tempus and the multiple with NC and NR's
if (f.getOverlap().size()!=1){
//System.err.println("Multiple overlap. Printing the Tempus and Recall variants:");
//System.err.println("F:\t"+f.getOriginalRecord());
//for (SimpleVcf r: f.getOverlap()) System.err.println("R:\t"+r.getOriginalRecord());
f.appendFilter("NC");
else {
numberShortTempus++;
//exact match?
if (f.getMatch() != null) {
numberExactMatches++;
//exact match then add tempus info to recall
SimpleVcf vcf = f.getMatch();
vcf.appendID(f);
vcf.appendINFO(f);
f.setPrint(false);
}

//ok so only one overlap, do the types match?
else {
int lenFRef = f.getRef().length();
int lenFAlt = f.getAlt().length();
SimpleVcf r = f.getOverlap().get(0);
int lenRRef = r.getRef().length();
int lenRAlt = r.getAlt().length();

//types match and it's a good recal variant, modify the tempus call and print, don't print the recal variant
if (lenFRef == lenRRef && lenFAlt == lenRAlt && r.getFilter().toLowerCase().contains("fail") == false){
if (noModifyTempus){
System.err.println("WARNING: One overlap and types match, recommend modifying the Tempus record. Will print both with no chr, pos, alt, ref modifications.");
f.appendFilter("NC");
System.err.println("R:\t"+r.getOriginalRecord());
System.err.println("F:\t"+f.getOriginalRecord());
numberModifiedTempusCalls++;
}
else {
System.err.println("WARNING: One overlap and types match thus MODIFYING the Tempus pos, ref, alt info and printing it. Not printing the recall.");
f.swapInfoWithOverlap(r);
f.appendFilter("MD");
System.err.println("R:\t"+r.getOriginalRecord());
System.err.println("F:\t"+f.getOriginalRecord());
System.err.println("M:\t"+f.getVcfLine());
numberModifiedTempusCalls++;
//set recall to not print
r.setPrint(false);
}
}
//types don't match so print tempus and recal
//So no exact match any overlap?
if (f.getOverlap().size()!=0) numberTempusWithOnlyOverlap++;

//No exact or overlap
else {
//System.err.println("One overlap, but diff types. Printing Tempus and Recall vars.");
f.appendFilter("NC");
System.err.println("WARNING: No match to this Tempus variant.");
System.err.println("F:\t"+f.getVcfLine());
numberTempusWithNoMatch++;
}
//always print it
f.appendFilter("NC");
vcfToPrint.add(f);
}
//in all cases print the tempus var
vcfToPrint.add(f);
continue;

}

//No exact or overlap, flag and print
System.err.println("WARNING: No match to this Tempus variant.");
numberTempusWithNoMatch++;
f.appendFilter("NC");
vcfToPrint.add(f);
System.err.println("F:\t"+f.getVcfLine());
}
}

Expand Down Expand Up @@ -305,7 +247,10 @@ private SimpleVcf[] load(File vcf, boolean excludeContig) {
ArrayList<SimpleVcf> al = new ArrayList<SimpleVcf>();
for (String v: lines){
if (v.startsWith("#") == false) {
if (excludeInherited && v.contains("inherited")) continue;
if (v.contains("inherited")) {
numberInherited++;
if (excludeInherited) continue;
}
if (appendChr && v.startsWith("chr") == false) v = "chr"+v;
al.add(new SimpleVcf(v, bpPaddingForOverlap));
}
Expand All @@ -322,7 +267,6 @@ private SimpleVcf[] load(File vcf, boolean excludeContig) {
}

public static void main(String[] args) {
IO.pl("Trying...");
if (args.length ==0){
printDocs();
System.exit(0);
Expand All @@ -346,7 +290,6 @@ public void processArgs(String[] args){
case 't': tempusVcf = new File(args[++i]); break;
case 'r': recallVcf = new File(args[++i]); break;
case 'm': mergedVcf = new File(args[++i]); break;
case 'k': noModifyTempus = false; break;
case 'c': appendChr = true; break;
case 'e': excludeInherited = true; break;
default: Misc.printErrAndExit("\nProblem, unknown option! " + mat.group());
Expand Down Expand Up @@ -377,8 +320,6 @@ public static void printDocs(){
"-r Path to a recalled snv/indel vcf file.\n"+
"-m Path to named vcf file for saving the results.\n"+
"-c Append chr if absent in chromosome name.\n"+
"-k Attempt to merge Tempus records that overlap a recall and are the same type.\n"+
" Defaults to printing both.\n"+
"-e Exclude 'inherited' germline Tempus records from the comparison and merged output.\n"+

"\nExample: java -Xmx2G -jar pathToUSeq/Apps/TempusVcfComparator -f TL-18-03CFD6.vcf\n" +
Expand Down
8 changes: 6 additions & 2 deletions Source/edu/utah/seq/vcf/xml/SimpleVcf.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ public class SimpleVcf implements Comparable<SimpleVcf>{
private ArrayList<SimpleVcf> overlap = new ArrayList<SimpleVcf>();
private static final Pattern endPat = Pattern.compile(".+END=(\\d+);.+");
private static final Pattern afPat = Pattern.compile(".+AF=([\\d+\\.]+).*");
public static final String ncFilter = "##FILTER=<ID=NC,Description=\"This Foundation variant was not confirmed in subsequent recalling.\">";
public static final String ncFilter = "##FILTER=<ID=NC,Description=\"This clinical test variant was not confirmed in subsequent recalling.\">";
public static final String nrFilter = "##FILTER=<ID=NR,Description=\"This variant was not reported by Foundation.\">";
public static final String mdFilter = "##FILTER=<ID=MD,Description=\"This Foundation variant overlapped a recall, had the same type, and was modified using info from the recall.\">";
public static final String mdFilter = "##FILTER=<ID=MD,Description=\"This clinical variant overlapped a recall, had the same type, and was modified using info from the recall.\">";
public static String infoRAF = "##INFO=<ID=RAF,Number=A,Type=Float,Description=\"Recalled variant allele frequency\">";

//#CHROM POS ID REF ALT QUAL FILTER INFO
Expand Down Expand Up @@ -118,6 +118,10 @@ public void appendID(SimpleVcf o) {
if (o.getId().equals(".") == false) id = id+";"+o.getId();
}

public void appendINFO(SimpleVcf o) {
info = info+";"+o.info;
}

public String toString(){
//#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLES.....
return chr+ "\t"+ (1+pos)+ "\t"+ id+ "\t"+ ref+ "\t"+ alt+ "\t"+ qual+ "\t"+ filter+ "\t"+ info;
Expand Down

0 comments on commit ca040d4

Please sign in to comment.