diff --git a/Source/edu/utah/seq/vcf/json/TempusJson2Vcf.java b/Source/edu/utah/seq/vcf/json/TempusJson2Vcf.java index ceed3097..e065d8d8 100644 --- a/Source/edu/utah/seq/vcf/json/TempusJson2Vcf.java +++ b/Source/edu/utah/seq/vcf/json/TempusJson2Vcf.java @@ -456,7 +456,8 @@ public static void printDocs(){ "**************************************************************************************\n" + "Parses json Tempus reports to vcf. Leave in PHI to enable calculating age at\n"+ "diagnosis. Summary statistics calculated for all reports. Vcfs will contain a mix of \n"+ - "somatic and inherited snvs, indels, and cnvs.\n"+ + "somatic and inherited snvs, indels, and cnvs. Be sure to vt normalize the exported\n"+ + "vcfs, https://github.com/atks/vt \n"+ "\nOptions:\n"+ "-j Path to Tempus json report or directory containing such, xxx.json(.gz/.zip OK)\n"+ diff --git a/Source/edu/utah/seq/vcf/json/TempusVcfComparator.java b/Source/edu/utah/seq/vcf/json/TempusVcfComparator.java index bfd5d640..5f6231ff 100644 --- a/Source/edu/utah/seq/vcf/json/TempusVcfComparator.java +++ b/Source/edu/utah/seq/vcf/json/TempusVcfComparator.java @@ -22,7 +22,6 @@ public class TempusVcfComparator { private SimpleVcf[] fVcfs; private SimpleVcf[] rVcfs; private int bpPaddingForOverlap = 2; - private boolean noModifyTempus = true; private boolean appendChr = false; private boolean excludeInherited = false; @@ -35,6 +34,7 @@ public class TempusVcfComparator { private int numberModifiedTempusCalls = 0; private int numberTempusWithNoMatch = 0; private int numberPassingRecallWithNoMatch = 0; + private int numberInherited = 0; private ArrayList vcfToPrint = new ArrayList(); private ArrayList headerLines = new ArrayList(); @@ -78,10 +78,10 @@ private void printStats() { System.out.println( numberRecall +"\t# Recall variants"); System.out.println( numberShortTempus +"\t# Short Tempus variants"); System.out.println( numberOtherTempus +"\t# Other Tempus variants"); + System.out.println( numberInherited +"\t# Inherited Tempus variants, skippped? "+excludeInherited); System.out.println( numberExactMatches +"\t# Short with an exact match"); System.out.println( numberTempusWithOnlyOverlap +"\t# Short with overlap recal variants"); - if (noModifyTempus) System.out.println( numberModifiedTempusCalls +"\t# Short recommended for modification"); - else System.out.println( numberModifiedTempusCalls +"\t# Short modified using overlapping recal variant info"); + System.out.println( numberModifiedTempusCalls +"\t# Short recommended for modification"); System.out.println( numberTempusWithNoMatch +"\t# Short with no match"); System.out.println( numberPassingRecallWithNoMatch +"\t# Passing recall variants with no Short match"); } @@ -114,14 +114,11 @@ private void processRecallVcfs() { for (SimpleVcf r:rVcfs){ //print it? if (r.isPrint() && r.getFilter().toLowerCase().contains("fail") == false) { - //mark Filter NR not reported - r.appendFilter("NR"); vcfToPrint.add(r); - numberPassingRecallWithNoMatch++; + if (r.getMatch() == null) numberPassingRecallWithNoMatch++; } } } - /**Merges header lines eliminating duplicates. Does a bad ID name collision checking, silently keeps first one. * Returns null if CHROM lines differ. */ @@ -164,11 +161,6 @@ else if (other.contains(h) == false) { //add in filter lines filter.add(SimpleVcf.ncFilter); - filter.add(SimpleVcf.nrFilter); - filter.add(SimpleVcf.mdFilter); - - //add info lines - info.add(SimpleVcf.infoRAF); //remove ID dups from contig, filter, format, info ArrayList contigAL = VCFParser.mergeHeaderIds(contig); @@ -188,8 +180,6 @@ else if (other.contains(h) == false) { return Misc.stringArrayListToStringArray(lines); } - - private void processTempusVcfs() { //for each Tempus record for (SimpleVcf f: fVcfs){ @@ -198,82 +188,34 @@ private void processTempusVcfs() { if (f.isShortVariant() == false) { vcfToPrint.add(f); numberOtherTempus++; - continue; } - - numberShortTempus++; - - //exact match? - if (f.getMatch() != null) { - //exact match then just print it - f.appendRAF(f.getMatch()); - f.appendID(f.getMatch()); - vcfToPrint.add(f); - numberExactMatches++; - f.getMatch().setPrint(false); - continue; - } - - //So no exact match any overlap? - if (f.getOverlap().size()!=0){ - //always print the tempus vcf record with a NC FILTER field, not confirmed. - //question is what to do about the overlapping records? print with NR FILTER field, not reported by tempus? - numberTempusWithOnlyOverlap++; - - //more than one overlap? print tempus and the multiple with NC and NR's - if (f.getOverlap().size()!=1){ - //System.err.println("Multiple overlap. Printing the Tempus and Recall variants:"); - //System.err.println("F:\t"+f.getOriginalRecord()); - //for (SimpleVcf r: f.getOverlap()) System.err.println("R:\t"+r.getOriginalRecord()); - f.appendFilter("NC"); + else { + numberShortTempus++; + //exact match? + if (f.getMatch() != null) { + numberExactMatches++; + //exact match then add tempus info to recall + SimpleVcf vcf = f.getMatch(); + vcf.appendID(f); + vcf.appendINFO(f); + f.setPrint(false); } - - //ok so only one overlap, do the types match? else { - int lenFRef = f.getRef().length(); - int lenFAlt = f.getAlt().length(); - SimpleVcf r = f.getOverlap().get(0); - int lenRRef = r.getRef().length(); - int lenRAlt = r.getAlt().length(); - - //types match and it's a good recal variant, modify the tempus call and print, don't print the recal variant - if (lenFRef == lenRRef && lenFAlt == lenRAlt && r.getFilter().toLowerCase().contains("fail") == false){ - if (noModifyTempus){ - System.err.println("WARNING: One overlap and types match, recommend modifying the Tempus record. Will print both with no chr, pos, alt, ref modifications."); - f.appendFilter("NC"); - System.err.println("R:\t"+r.getOriginalRecord()); - System.err.println("F:\t"+f.getOriginalRecord()); - numberModifiedTempusCalls++; - } - else { - System.err.println("WARNING: One overlap and types match thus MODIFYING the Tempus pos, ref, alt info and printing it. Not printing the recall."); - f.swapInfoWithOverlap(r); - f.appendFilter("MD"); - System.err.println("R:\t"+r.getOriginalRecord()); - System.err.println("F:\t"+f.getOriginalRecord()); - System.err.println("M:\t"+f.getVcfLine()); - numberModifiedTempusCalls++; - //set recall to not print - r.setPrint(false); - } - } - //types don't match so print tempus and recal + //So no exact match any overlap? + if (f.getOverlap().size()!=0) numberTempusWithOnlyOverlap++; + + //No exact or overlap else { - //System.err.println("One overlap, but diff types. Printing Tempus and Recall vars."); - f.appendFilter("NC"); + System.err.println("WARNING: No match to this Tempus variant."); + System.err.println("F:\t"+f.getVcfLine()); + numberTempusWithNoMatch++; } + //always print it + f.appendFilter("NC"); + vcfToPrint.add(f); } - //in all cases print the tempus var - vcfToPrint.add(f); - continue; + } - - //No exact or overlap, flag and print - System.err.println("WARNING: No match to this Tempus variant."); - numberTempusWithNoMatch++; - f.appendFilter("NC"); - vcfToPrint.add(f); - System.err.println("F:\t"+f.getVcfLine()); } } @@ -305,7 +247,10 @@ private SimpleVcf[] load(File vcf, boolean excludeContig) { ArrayList al = new ArrayList(); for (String v: lines){ if (v.startsWith("#") == false) { - if (excludeInherited && v.contains("inherited")) continue; + if (v.contains("inherited")) { + numberInherited++; + if (excludeInherited) continue; + } if (appendChr && v.startsWith("chr") == false) v = "chr"+v; al.add(new SimpleVcf(v, bpPaddingForOverlap)); } @@ -322,7 +267,6 @@ private SimpleVcf[] load(File vcf, boolean excludeContig) { } public static void main(String[] args) { - IO.pl("Trying..."); if (args.length ==0){ printDocs(); System.exit(0); @@ -346,7 +290,6 @@ public void processArgs(String[] args){ case 't': tempusVcf = new File(args[++i]); break; case 'r': recallVcf = new File(args[++i]); break; case 'm': mergedVcf = new File(args[++i]); break; - case 'k': noModifyTempus = false; break; case 'c': appendChr = true; break; case 'e': excludeInherited = true; break; default: Misc.printErrAndExit("\nProblem, unknown option! " + mat.group()); @@ -377,8 +320,6 @@ public static void printDocs(){ "-r Path to a recalled snv/indel vcf file.\n"+ "-m Path to named vcf file for saving the results.\n"+ "-c Append chr if absent in chromosome name.\n"+ - "-k Attempt to merge Tempus records that overlap a recall and are the same type.\n"+ - " Defaults to printing both.\n"+ "-e Exclude 'inherited' germline Tempus records from the comparison and merged output.\n"+ "\nExample: java -Xmx2G -jar pathToUSeq/Apps/TempusVcfComparator -f TL-18-03CFD6.vcf\n" + diff --git a/Source/edu/utah/seq/vcf/xml/SimpleVcf.java b/Source/edu/utah/seq/vcf/xml/SimpleVcf.java index d8eed6b6..d18dcaad 100644 --- a/Source/edu/utah/seq/vcf/xml/SimpleVcf.java +++ b/Source/edu/utah/seq/vcf/xml/SimpleVcf.java @@ -36,9 +36,9 @@ public class SimpleVcf implements Comparable{ private ArrayList overlap = new ArrayList(); private static final Pattern endPat = Pattern.compile(".+END=(\\d+);.+"); private static final Pattern afPat = Pattern.compile(".+AF=([\\d+\\.]+).*"); - public static final String ncFilter = "##FILTER="; + public static final String ncFilter = "##FILTER="; public static final String nrFilter = "##FILTER="; - public static final String mdFilter = "##FILTER="; + public static final String mdFilter = "##FILTER="; public static String infoRAF = "##INFO="; //#CHROM POS ID REF ALT QUAL FILTER INFO @@ -118,6 +118,10 @@ public void appendID(SimpleVcf o) { if (o.getId().equals(".") == false) id = id+";"+o.getId(); } + public void appendINFO(SimpleVcf o) { + info = info+";"+o.info; + } + public String toString(){ //#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLES..... return chr+ "\t"+ (1+pos)+ "\t"+ id+ "\t"+ ref+ "\t"+ alt+ "\t"+ qual+ "\t"+ filter+ "\t"+ info;