diff --git a/Documentation/USeqDocumentation/cmdLnMenus.html b/Documentation/USeqDocumentation/cmdLnMenus.html
index bf0a7bd..c742dc9 100644
--- a/Documentation/USeqDocumentation/cmdLnMenus.html
+++ b/Documentation/USeqDocumentation/cmdLnMenus.html
@@ -79,6 +79,8 @@ <H1>Command Line Menus</H1> <p>
 				
 					<a href="#CalculatePerCycleErrorRate">CalculatePerCycleErrorRate</a><br>
 				
+					<a href="#CarisVcfComparator">CarisVcfComparator</a><br>
+				
 					<a href="#CarisXmlVcfParser">CarisXmlVcfParser</a><br>
 				
 					<a href="#ChIPSeq">ChIPSeq</a><br>
@@ -1365,29 +1367,50 @@ <H1>Command Line Menus</H1> <p>
 Example: java -Xmx1500M -jar pathTo/USeq/Apps/CalculatePerCycleErrorRate
      -b /Data/Bam/ -f /Fastas/chrPhiX_Illumina.fasta.gz 
 
+**************************************************************************************
+</pre><br><p>
+				
+					<a name="CarisVcfComparator"><pre>**************************************************************************************
+**                           Caris Vcf Comparator: March 2022                       **
+**************************************************************************************
+CVC compares a Caris vcf generated with the CarisXmlVcfParser to a recalled vcf.
+Exact recall vars are so noted and removed. Caris vcf with no exact but one
+overlapping record can be merged with -k. Be sure to vt normalize each before running.
+Recall variants failing FILTER are not saved.
+
+Options:
+-c Path to a the Caris xml parsed vcf file, see the CarisXmlVcfParser app.
+-r Path to a recalled snv/indel vcf file.
+-m Path to named vcf file for saving the results.
+
+Example: java -Xmx2G -jar pathToUSeq/Apps/CarisVcfComparator -f TL-18-03CFD6.vcf
+     -r /F1/TL-18-03CFD6_recall.vcf.gz -g -c -m /F1/TL-18-93CFD6_merged.vcf.gz -k 
+
 **************************************************************************************
 </pre><br><p>
 				
 					<a name="CarisXmlVcfParser"><pre>**************************************************************************************
-**                           Caris Xml Vcf Parser: March 2021                       **
+**                           Caris Xml Vcf Parser: March 2022                       **
 **************************************************************************************
 This tool parses Caris paired xml and vcf report files to generate: new vcfs where xml
 reported genomic alternations are annotated, bed files of copy number changes and gene
 fusions as well as a summary spreadsheet of the non NGS test results and report
-attributes.
+attributes. Use Vt to normalize the vcf output files. 
 
 Options:
 -d Path to a directory containing paired xml and vcf files from Caris.
--s Path to a directory for saving the results.
+-s Path to a directory for saving the results. Only impacting vcf, cnv, fusions are
+     saved, if none present, no file(s) are created.
 -u Path to a Hg38 UCSC RefFlat or RefSeq merged gene file for CNV and gene fusion
    coordinate extraction. See: http://www.genome.ucsc.edu/FAQ/FAQformat.html#format9
    (refSeqGeneName name2(optional) chr strand txStart txEnd cdsStart cdsEnd exonCount
    exonStarts exonEnds). Be sure to merge transcripts with the USeq MergeUCSCGeneTable
    app and remove non standard chromosomes.
 -i Include PHI in spreadsheet output, defaults to excluding.
+-a Include all vcf records in output, defaults to just those with an xml match.
 
 Example: java -Xmx2G -jar pathToUSeq/Apps/CarisXmlVcfParser -d CarisReports/
-     -s ParsedCarisReports/ -i -u ~/GRCh38/hg38RefSeq9Dec2020_MergedStdChr.ucsc.gz 
+     -s ParsedCarisReports/ -u ~/GRCh38/hg38RefSeq9Dec2020_MergedStdChr.ucsc.gz 
 
 **************************************************************************************
 </pre><br><p>
@@ -4891,7 +4914,7 @@ <H1>Command Line Menus</H1> <p>
 </pre><br><p>
 				
 					<a name="TNRunner2"><pre>**************************************************************************************
-**                                TNRunner2 : Feb 2022                              **
+**                                TNRunner2 : March 2022                            **
 **************************************************************************************
 TNRunner is designed to execute several containerized snakmake workflows on tumor
 normal datasets via a slurm cluster.  Based on the availability of fastq, 
@@ -5165,9 +5188,11 @@ <H1>Command Line Menus</H1> <p>
      delimited, the first cell is the pathway ID and description (e.g.
      'hsa05210  Colorectal cancer'), subsequent cells, the associated genes.
 -r File to save results, should end with .xls
+-k (Optional) Tab delimited file containing KEGG gene IDs and gene Names, one row
+      per gene, for highlighting impacted genes in each KEGG pathway hyperlink.
 
 Example: java -Xmx10G -jar pathTo/USeq/Apps/VariantPathwayComparator -a earlyCRC.txt
-   -b lateCRC.txt -p keggHumanPathways.txt -r earlyVsLateVPC.xls 
+   -b lateCRC.txt -p keggHumanPathways.txt -r earlyVsLateVPC.xls -k keggGeneIDName.txt
 
 **************************************************************************************
 </pre><br><p>
@@ -5335,7 +5360,7 @@ <H1>Command Line Menus</H1> <p>
 -e Print verbose debugging output.
 
 Example: java -jar pathToUSeq/Apps/VCFCallFrequency -f Vcf/ -s CFVcfs -v 
-    Hg38/Somatic/Avatar/Vcf -b  -m 0.05 -d ~/GQuery/Data 
+    Hg38/Somatic/Avatar/Vcf -b  -m 0.07 -d ~/GQuery/Data 
 
 **************************************************************************************
 </pre><br><p>
diff --git a/Misc/JavaxComOrgInfoNetClasses.zip b/Misc/JavaxComOrgInfoNetClasses.zip
index 3405d88..95a121e 100644
Binary files a/Misc/JavaxComOrgInfoNetClasses.zip and b/Misc/JavaxComOrgInfoNetClasses.zip differ
diff --git a/Source/edu/utah/seq/run/TNRunner2.java b/Source/edu/utah/seq/run/TNRunner2.java
index 73f69ec..946847e 100644
--- a/Source/edu/utah/seq/run/TNRunner2.java
+++ b/Source/edu/utah/seq/run/TNRunner2.java
@@ -864,7 +864,7 @@ public void processArgs(String[] args){
 	public static void printDocs(){
 		IO.pl("\n" +
 				"**************************************************************************************\n" +
-				"**                                TNRunner2 : Feb 2022                              **\n" +
+				"**                                TNRunner2 : March 2022                            **\n" +
 				"**************************************************************************************\n" +
 				"TNRunner is designed to execute several containerized snakmake workflows on tumor\n"+
 				"normal datasets via a slurm cluster.  Based on the availability of fastq, \n"+
diff --git a/Source/edu/utah/seq/run/TNSample2.java b/Source/edu/utah/seq/run/TNSample2.java
index 694d3ab..08306fd 100644
--- a/Source/edu/utah/seq/run/TNSample2.java
+++ b/Source/edu/utah/seq/run/TNSample2.java
@@ -104,19 +104,30 @@ public TNSample2(File rootDir, TNRunner2 tnRunner) throws IOException{
 		private void parseMergeClinicalVars() throws IOException {
 			info.add("Checking clinical variant integration...");
 			
-			//look for json file
+			//look for json file and xml vcfs
 			File[] jsonTestResults = IO.extractFiles(new File(rootDir, "ClinicalReport"), ".json");
+			File[] xmls = null;
+			File[] vcfs = null;
+			File[] toLink = null;
+			
 			if (jsonTestResults == null || jsonTestResults.length !=1) {
-				info.add("\tJsonReport\tFAILED to find one xxx.json clinical test report file");
-				failed = true;
-				return;
+				
+				//look for caris xml and vcf.gz
+				xmls = IO.extractFiles(new File(rootDir, "ClinicalReport"), ".xml");
+				vcfs = IO.extractFiles(new File(rootDir, "ClinicalReport"), ".vcf.gz");
+				if (xmls.length != 1 || vcfs.length != 1) {
+					info.add("\tJson/Xml/VcfReport\tFAILED to find one xxx.json or one xxx.vcf.gz and xxx.xml clinical test report file(s)");
+					failed = true;
+					return;
+				}
+				jsonTestResults = null;
+				toLink = new File[]{somaticVariants, new File(somaticVariants+".tbi"), xmls[0], vcfs[0]};
 			}
+			else toLink = new File[]{somaticVariants, new File(somaticVariants+".tbi"), jsonTestResults[0]};
 			
 			//make dir, ok if it already exists
 			File jobDir = new File (rootDir, "SomaticVariantCalls/"+id+"_ClinicalVars");
 			jobDir.mkdirs();
-			
-			File[] toLink = new File[]{somaticVariants, new File(somaticVariants+".tbi"), jsonTestResults[0]};
 
 			//any files?
 			HashMap<String, File> nameFile = IO.fetchNamesAndFiles(jobDir);
diff --git a/Source/edu/utah/seq/vcf/VCFCallFrequency.java b/Source/edu/utah/seq/vcf/VCFCallFrequency.java
index 70a0b54..2986416 100644
--- a/Source/edu/utah/seq/vcf/VCFCallFrequency.java
+++ b/Source/edu/utah/seq/vcf/VCFCallFrequency.java
@@ -25,7 +25,6 @@
 import edu.utah.hci.query.MasterQuery;
 import edu.utah.hci.query.QueryRequest;
 import edu.utah.hci.query.UserQuery;
-import edu.utah.seq.vcf.anno.AnnotatedVcfParser;
 
 /**Calculates call frequencies for each vcf record from a db of vcf files and callable region files. 
  * Most mutated single site in p53 are seen at 0.039 across all cancers in TCGA.  0.57 for BRAF V600E across all cancers.*/
@@ -292,9 +291,10 @@ public void query(UserQuery userQuery) throws IOException {
 		//make a single use QueryRequest object
 		QueryRequest qr = new QueryRequest(masterQuery, null, options, null);
 		//any errors?
-		if (qr.getErrTxtForUser() != null || qr.getWarningTxtForUser().size()!=0) {
+		if (qr.getErrTxtForUser() != null ) {
+		//if (qr.getErrTxtForUser() != null || qr.getWarningTxtForUser().size()!=0) {
 			if (qr.getErrTxtForUser() !=null) Misc.printErrAndExit("\nInvalid request, "+qr.getErrTxtForUser()+ ",aborting.");
-			if (qr.getWarningTxtForUser().size() !=0) Misc.printErrAndExit("\n"+qr.getWarningTxtForUser()+", aborting.");
+			// already printed! so skip, if (qr.getWarningTxtForUser().size() !=0) Misc.printErrAndExit("\n"+qr.getWarningTxtForUser()+", might be an issue.");
 		}
 		//set results
 		else userQuery.setResults(qr.getJsonResults());
@@ -639,7 +639,7 @@ public static void printDocs(){
 
 
 				"\nExample: java -jar pathToUSeq/Apps/VCFCallFrequency -f Vcf/ -s CFVcfs -v \n"+
-				"    Hg38/Somatic/Avatar/Vcf -b  -m 0.05 -d ~/GQuery/Data \n\n" +
+				"    Hg38/Somatic/Avatar/Vcf -b  -m 0.07 -d ~/GQuery/Data \n\n" +
 				"**************************************************************************************\n");
 
 	}
diff --git a/Source/edu/utah/seq/vcf/VariantPathwayComparator.java b/Source/edu/utah/seq/vcf/VariantPathwayComparator.java
index f6c0c9d..3e7a981 100644
--- a/Source/edu/utah/seq/vcf/VariantPathwayComparator.java
+++ b/Source/edu/utah/seq/vcf/VariantPathwayComparator.java
@@ -11,10 +11,13 @@ public class VariantPathwayComparator {
 	private File groupAGeneHits;
 	private File groupBGeneHits;
 	private File pathwayGenes;
+	private File geneKeggIdName;
 	
 	//internal
 	private ArrayList<Pathway> pathways = new ArrayList<Pathway>();
 	private File resultsSpreadsheet = null;
+	private HashMap<String,String> keggGeneNameID = null;
+	private double minKeggFreq = 0.025;
 	
 	
 	//constructor
@@ -52,14 +55,14 @@ private void sortSavePathways() {
 		
 		try {
 			PrintWriter out = new PrintWriter( new FileWriter(this.resultsSpreadsheet));
-			out.println("NameHyperLink\tAdjPval\tAHits\tANoHits\tFracAHits\tAGeneHits\tBHits\tBNoHits\tFracBHits\tBGeneHits\tLog2(fracA/fracB)");
-			for (Pathway p: loadedPathways) out.println(p.toString());
+			if (geneKeggIdName == null) out.println("NameHyperLink\tPval\tAdjPval\tAHits\tANoHits\tFracAHits\tAGeneHits\tBHits\tBNoHits\tFracBHits\tBGeneHits\tLog2(fracA/fracB)");
+			else out.println("NameHyperLink\tPval\tAdjPval\tAHits\tANoHits\tFracAHits\tAGeneHits\tAGeneLinks\tBHits\tBNoHits\tFracBHits\tBGeneHits\tBGeneLinks\tLog2(fracA/fracB)");
+			for (Pathway p: loadedPathways) out.println(p.toString(keggGeneNameID));
 			out.close();
 		} catch (IOException e) {
 			e.printStackTrace();
 		}
 		
-		
 	}
 
 	private void comparePathways() {
@@ -173,6 +176,8 @@ private void loadPathways() {
 			e.printStackTrace();
 			Misc.printErrAndExit("\nFailed to parse the pathway file "+pathwayGenes);
 		}
+		
+		if (geneKeggIdName!=null) keggGeneNameID = IO.loadFileIntoHash(geneKeggIdName, 1, 0);
 	}
 	
 	private class Pathway implements Comparable<Pathway>{
@@ -197,32 +202,34 @@ private class Pathway implements Comparable<Pathway>{
 		
 		
 		
-		public String toString() {
-			//name adjPval AHits, ANoHits, fracAHits, AGeneHits, BHits, BNoHits, fracBHits, BGeneHits, log2(fracA/fracB)
+		public String toString(HashMap<String,String>keggGeneNameId) {
+			//name pval, adjPval, AHits, ANoHits, fracAHits, AGeneHits, (ALinks,) BHits, BNoHits, fracBHits, BGeneHits, (BLinks), log2(fracA/fracB)
 			StringBuilder sb = new StringBuilder();
 			//attempt to parse the hsa0000 pathway number
 			Matcher mat = hsaPath.matcher(name);
+			String partialHyperLink = null;
 			if (mat.matches()) {
 				sb.append("=HYPERLINK(\"https://www.kegg.jp/pathway/");
 				sb.append(mat.group(1));
+				partialHyperLink = sb.toString();
 				sb.append("\",\"");
 				sb.append(name);
 				sb.append("\")");
 			}
-			else sb.append(name); 
-			sb.append("\t");
+			else sb.append(name); sb.append("\t");
+			sb.append(Num.formatNumber(pval, 5)); sb.append("\t");
 			sb.append(Num.formatNumber(adjPval, 5)); sb.append("\t");
 			sb.append(groupAHits.size()); sb.append("\t");
 			sb.append(groupANoHits.size()); sb.append("\t");
 			double aFrac = (double)groupAHits.size()/ (double)(groupAHits.size()+groupANoHits.size());
 			sb.append(Num.formatNumber(aFrac, 5)); sb.append("\t");
-			sb.append(convertToSortedFrequencies(aGenes)); sb.append("\t");
+			sb.append(convertToSortedFrequencies(aGenes, partialHyperLink)); sb.append("\t");
 			
 			sb.append(groupBHits.size()); sb.append("\t");
 			sb.append(groupBNoHits.size()); sb.append("\t");
 			double bFrac = (double)groupBHits.size()/ (double)(groupBHits.size()+groupBNoHits.size());
 			sb.append(Num.formatNumber(bFrac, 5)); sb.append("\t");
-			sb.append(convertToSortedFrequencies(bGenes)); sb.append("\t");
+			sb.append(convertToSortedFrequencies(bGenes, partialHyperLink)); sb.append("\t");
 
 			double rto = aFrac/bFrac;
 			sb.append(Num.formatNumber(Num.log2(rto), 5));
@@ -237,7 +244,7 @@ public int compareTo(Pathway o) {
 		
 	}
 	
-	public static String convertToSortedFrequencies(HashMap<String,Integer> keyCounts) {
+	public String convertToSortedFrequencies(HashMap<String,Integer> keyCounts, String pathwayName) {
 		double totalCounts = 0;
 		for (String key: keyCounts.keySet()) totalCounts+= keyCounts.get(key);
 		GeneCount[] gc = new GeneCount[keyCounts.size()];
@@ -249,17 +256,43 @@ public static String convertToSortedFrequencies(HashMap<String,Integer> keyCount
 			gc[count++] = new GeneCount(key, geneCount);
 		}
 		Arrays.sort(gc);
-		StringBuilder sb = new StringBuilder(gc[0].name);
-		sb.append("=");
-		sb.append(Num.formatNumber(gc[0].fraction, 3));
-		for (int i=1; i<gc.length; i++) {
-			sb.append(",");
-			sb.append(gc[i].name);
-			sb.append("=");
-			sb.append(Num.formatNumber(gc[i].fraction, 3));
+		
+		StringBuilder namesToView = new StringBuilder();
+		ArrayList<String> namesToLink = new ArrayList<String>();
+		int last = gc.length-1;
+		for (int i=0; i<gc.length; i++) {
+			namesToView.append(gc[i].name);
+			namesToView.append("=");
+			namesToView.append(Num.formatNumber(gc[i].fraction, 3));
+			if (i!=last)namesToView.append(",");
+			//any for linking?
+			if (gc[i].fraction >= minKeggFreq) namesToLink.add(gc[i].name);
+		}
+		String geneNamesToView = namesToView.toString();
+		if (keggGeneNameID != null) return geneNamesToView+ "\t"+ createGeneHyperLink(pathwayName, namesToLink, geneNamesToView);
+		return geneNamesToView;
+	}
+	private String createGeneHyperLink(String pathwayLink, ArrayList<String> namesToLink, String geneNamesToView) {
+		//https://www.kegg.jp/pathway/hsa03440
+		//must watch size, max 250
+		StringBuilder sb = new StringBuilder(pathwayLink);
+		for (String geneName: namesToLink) {
+			String id = keggGeneNameID.get(geneName);
+			int size = sb.length() + id.length()+1;
+			if (size > 250) break;
+			if (id != null) {
+				sb.append("+");
+				sb.append(id);
+			
+			}
 		}
+		sb.append("\",\"");
+		if (geneNamesToView.length()> 250) sb.append(geneNamesToView.subSequence(0, 247)+"...");
+		else sb.append(geneNamesToView);
+		sb.append("\")");
 		return sb.toString();
 	}
+
 	public static class GeneCount implements Comparable<GeneCount>{
 		double fraction = 0;
 		String name = null;
@@ -301,6 +334,7 @@ public void processArgs(String[] args){
 					case 'a': groupAGeneHits = new File(args[++i]); break;
 					case 'b': groupBGeneHits = new File(args[++i]); break;
 					case 'p': pathwayGenes = new File(args[++i]); break;
+					case 'k': geneKeggIdName = new File(args[++i]); break;
 					case 'r': resultsSpreadsheet = new File(args[++i]); break;
 					case 'h': printDocs(); System.exit(0);
 					default: Misc.printErrAndExit("\nProblem, unknown option! " + mat.group());
@@ -341,9 +375,11 @@ public static void printDocs(){
 				"     delimited, the first cell is the pathway ID and description (e.g.\n"+
 		        "     'hsa05210  Colorectal cancer'), subsequent cells, the associated genes.\n"+
 				"-r File to save results, should end with .xls\n"+
+		        "-k (Optional) Tab delimited file containing KEGG gene IDs and gene Names, one row\n"+
+				"      per gene, for highlighting impacted genes in each KEGG pathway hyperlink.\n"+
 				
 				"\nExample: java -Xmx10G -jar pathTo/USeq/Apps/VariantPathwayComparator -a earlyCRC.txt\n"+
-				"   -b lateCRC.txt -p keggHumanPathways.txt -r earlyVsLateVPC.xls \n"+
+				"   -b lateCRC.txt -p keggHumanPathways.txt -r earlyVsLateVPC.xls -k keggGeneIDName.txt\n"+
 
 		"\n**************************************************************************************\n");
 
diff --git a/Source/edu/utah/seq/vcf/xml/caris/CNVAlteration.java b/Source/edu/utah/seq/vcf/xml/caris/CNVAlteration.java
index 324638d..08ff0f3 100644
--- a/Source/edu/utah/seq/vcf/xml/caris/CNVAlteration.java
+++ b/Source/edu/utah/seq/vcf/xml/caris/CNVAlteration.java
@@ -110,4 +110,9 @@ public SimpleBed toBed(HashMap<String, UCSCGeneLine[]> name2GeneModels) throws I
 		if (score == null) score = ".";
 		return new SimpleBed(model.getChrom(), model.getTxStart(), model.getTxEnd(), geneName+":"+result.toLowerCase().replace(" ", "_"), score, model.getStrand());
 	}
+
+
+	public String getResult() {
+		return result;
+	}
 }
diff --git a/Source/edu/utah/seq/vcf/xml/caris/CarisVcfComparator.java b/Source/edu/utah/seq/vcf/xml/caris/CarisVcfComparator.java
new file mode 100644
index 0000000..a254f1c
--- /dev/null
+++ b/Source/edu/utah/seq/vcf/xml/caris/CarisVcfComparator.java
@@ -0,0 +1,326 @@
+package edu.utah.seq.vcf.xml.caris;
+
+import java.io.*;
+import java.util.regex.*;
+import edu.utah.seq.vcf.VCFParser;
+import util.gen.*;
+import java.util.*;
+
+/**
+ * Takes a patient vcf file parsed from a Caris json report and compares it to a vcf generated by reprocessing the raw data.
+ * Writes out a final arbitrated vcf containing all the Caris calls plus non duplicate recall variants with no FILTER flags.
+ * 
+ * @author david.nix@hci.utah.edu 
+ **/
+public class CarisVcfComparator {
+
+	//user defined fields
+	private File carisVcf = null;
+	private File recallVcf = null;
+	private File mergedVcf = null;
+	private SimpleVcf[] fVcfs;
+	private SimpleVcf[] rVcfs;	
+	private int bpPaddingForOverlap = 2;
+
+	//counters
+	private int numberCaris = 0;
+	private int numberRecall = 0;
+	private int numberExactMatches = 0;
+	private int numberCarisWithOnlyOverlap = 0;
+	private int numberModifiedCarisCalls = 0;
+	private int numberCarisWithNoMatch = 0;
+	private int numberPassingRecallWithNoMatch = 0;
+	
+	private ArrayList<SimpleVcf> vcfToPrint = new ArrayList<SimpleVcf>();
+	private ArrayList<String> headerLines = new ArrayList<String>();
+	
+	
+	
+
+	//constructors
+	public CarisVcfComparator(String[] args){
+		try {
+			long startTime = System.currentTimeMillis();
+			processArgs(args);
+
+			//load vcf files
+			fVcfs = load(carisVcf, true);
+			rVcfs = load(recallVcf, true);
+			numberRecall = rVcfs.length;
+
+			compareVcfs();
+
+			processCarisVcfs();
+			
+			processRecallVcfs();
+			
+			printVcfs();
+			
+			printStats();
+
+			//finish and calc run time
+			double diffTime = ((double)(System.currentTimeMillis() -startTime))/60000;
+			System.out.println("\nDone! "+Math.round(diffTime)+" Min\n");
+
+		} catch (Exception e) {
+			e.printStackTrace();
+			Misc.printErrAndExit("\nProblem running CarisJson2Vcf app!");
+		}
+	}
+
+	private void printStats() {
+		System.out.println("\nComparator stats:");
+		System.out.println( numberRecall +"\t# Recall variants");
+		System.out.println( numberCaris +"\t# Caris variants");
+		System.out.println( numberExactMatches +"\t# Exact match");
+		System.out.println( numberCarisWithOnlyOverlap +"\t# Overlap recal variants");
+		System.out.println( numberModifiedCarisCalls +"\t# Recommended for modification");
+		System.out.println( numberCarisWithNoMatch +"\t# No match"); 
+		System.out.println( numberPassingRecallWithNoMatch +"\t# Passing recall variants with no xml vcf match");
+	}
+
+	private void printVcfs() {
+		//sort vcf
+		SimpleVcf[] vcf = new SimpleVcf[vcfToPrint.size()];
+		vcfToPrint.toArray(vcf);
+		Arrays.sort(vcf);
+		Pattern pat = Pattern.compile("N_DP=.+SOMATIC");
+		
+		try {
+			Gzipper out = new Gzipper(mergedVcf);
+			//fetch merged header
+			String[] header = mergeHeaders(headerLines);
+			
+			for (String h: header) out.println(h);
+			for (SimpleVcf v: vcf) {
+				if (v.getFilter().toLowerCase().contains("fail") == false) {
+					//fix the the SOMATIC info
+					String line = v.getVcfLine();
+					line = pat.matcher(line).replaceFirst("MIXED_SOMATIC_GERMLINE");
+					out.println(line);
+				}
+			}
+			
+			out.close();
+		} catch (IOException e) {
+			e.printStackTrace();
+			Misc.printErrAndExit("\nERROR: problem writing out the merged vcf file. "+mergedVcf);
+		}
+	}
+
+	private void processRecallVcfs() {
+		//for each Recall vcf, call this after processing the Caris vcfs, skip those with a fail filter field
+		for (SimpleVcf r:rVcfs){
+			//print it?
+			if (r.isPrint() && r.getFilter().toLowerCase().contains("fail") == false) {
+				vcfToPrint.add(r);
+				if (r.getMatch() == null) numberPassingRecallWithNoMatch++;
+			}
+		}
+	}
+	
+	/**Merges header lines eliminating duplicates.  Does a bad ID name collision checking, silently keeps first one. 
+	 * Returns null if CHROM lines differ. */
+	public static String[] mergeHeaders(ArrayList<String> header) {
+		
+		LinkedHashSet<String> other = new LinkedHashSet<String>();
+		LinkedHashSet<String> contig = new LinkedHashSet<String>();
+		LinkedHashSet<String> info = new LinkedHashSet<String>();
+		LinkedHashSet<String> filter = new LinkedHashSet<String>();
+		LinkedHashSet<String> format = new LinkedHashSet<String>();
+		TreeSet<String> source = new TreeSet<String>();
+		String chromLine = "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tTUMOR";
+
+		for (String h: header){
+			h=h.trim();
+			if (h.startsWith("##contig")){
+				if (contig.contains(h) == false) contig.add(h);
+			}
+			
+			//modify the caris DP info 
+			else if (h.startsWith("##INFO=<ID=DP,Number=1,Type=Integer,Description=")){
+				info.add("##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Caris Total Tumor Depth\">");
+			}
+			
+			//swap out the somatic info
+			else if (h.startsWith("##INFO=<ID=SOMATIC,Number=0,Type=Flag,Description=")) {
+				info.add("##INFO=<ID=MIXED_SOMATIC_GERMLINE,Number=0,Type=Flag,Description=\"Mutation is from a tumor only analysis thus somatic or germline.\">");
+			}
+			
+			else if (h.startsWith("##INFO")){
+				if (info.contains(h) == false) info.add(h);
+			}
+			else if (h.startsWith("##FILTER")){
+				if (filter.contains(h) == false) filter.add(h);
+			}
+			else if (h.startsWith("##FORMAT")){
+				if (format.contains(h) == false) format.add(h);
+			}
+			else if (h.startsWith("##source=")){
+				source.add(h);
+			}
+			else if (h.startsWith("#CHROM")){
+				//don't do anything, just skip it
+			}
+			else if (other.contains(h) == false) {
+				other.add(h);
+			}
+		}
+
+
+		//add in filter lines
+		filter.add(SimpleVcf.ncFilter);
+
+		//remove ID dups from contig, filter, format, info
+		ArrayList<String> contigAL = VCFParser.mergeHeaderIds(contig);
+		ArrayList<String> filterAL = VCFParser.mergeHeaderIds(filter);
+		ArrayList<String> formatAL = VCFParser.mergeHeaderIds(format);
+		ArrayList<String> infoAL = VCFParser.mergeHeaderIds(info);
+
+		ArrayList<String> lines = new ArrayList<String>();
+		for (String s : other) lines.add(s);
+		for (String s : source) lines.add(s);
+		for (String s : contigAL) lines.add(s);
+		for (String s : filterAL) lines.add(s);
+		for (String s : infoAL) lines.add(s);
+		for (String s : formatAL) lines.add(s);
+		if (chromLine != null) lines.add(chromLine);
+
+		return Misc.stringArrayListToStringArray(lines);
+	}
+
+	private void processCarisVcfs() {
+		//for each Caris record
+		for (SimpleVcf f: fVcfs){
+			numberCaris++;
+			//exact match? 
+			if (f.getMatch() != null) {
+				numberExactMatches++;
+				//exact match then add caris info to recall
+				SimpleVcf vcf = f.getMatch();
+				vcf.appendID(f);
+				vcf.appendINFO(f);
+				f.setPrint(false);
+			}
+			else {
+				//So no exact match any overlap?
+				if (f.getOverlap().size()!=0) numberCarisWithOnlyOverlap++;
+
+				//No exact or overlap
+				else {
+					System.err.println("WARNING: No match to this Caris variant:");
+					System.err.println("\t"+f.getVcfLine());
+					numberCarisWithNoMatch++;
+				}
+				//always print it
+				f.appendFilter("NC");
+				vcfToPrint.add(f);
+			}
+		}
+	}
+
+	private void compareVcfs() {
+		//slow comparator, could do many things to speed up...
+		//for each Caris record
+		for (int i=0; i< fVcfs.length; i++){
+			SimpleVcf f = fVcfs[i];
+
+			//for each Recal variant
+			for (int j=0; j< rVcfs.length; j++){
+				SimpleVcf r = rVcfs[j];
+				if (f.compareToExact(r)){
+					if (f.getMatch() !=null || r.getMatch() != null) System.err.println("WARNING: more than one exact match found for \n\t"+f+"\n\t"+r);
+					f.setMatch(r);
+					r.setMatch(f);
+				}
+				else if (f.compareToOverlap(r)){
+					//IO.pl("Overlap xml "+f.getOriginalRecord());
+					//IO.pl("Overlap rec "+r.getOriginalRecord());
+					f.getOverlap().add(r);
+					r.getOverlap().add(f);
+				}
+			}
+		}
+	}
+
+	private SimpleVcf[] load(File vcf, boolean excludeContig) {
+		String[] lines = IO.loadFileIntoStringArray(vcf);
+		ArrayList<SimpleVcf> al = new ArrayList<SimpleVcf>();
+		for (String v: lines){
+			if (v.startsWith("#") == false) {
+				al.add(new SimpleVcf(v, bpPaddingForOverlap));
+			}
+			else {
+				if (excludeContig){
+					if (v.startsWith("##contig") == false) headerLines.add(v);
+				}
+				else headerLines.add(v);
+			}
+		}
+		SimpleVcf[] svs = new SimpleVcf[al.size()];
+		al.toArray(svs);
+		return svs;
+	}
+
+	public static void main(String[] args) {
+		if (args.length ==0){
+			printDocs();
+			System.exit(0);
+		}
+		new CarisVcfComparator(args);
+	}		
+
+	/**This method will process each argument and assign new varibles*/
+	public void processArgs(String[] args){
+		Pattern pat = Pattern.compile("-[a-z]");
+		String useqVersion = IO.fetchUSeqVersion();
+		String source = useqVersion+" Args: "+ Misc.stringArrayToString(args, " ");
+		System.out.println("\n"+ source +"\n");
+		for (int i = 0; i<args.length; i++){
+			String lcArg = args[i].toLowerCase();
+			Matcher mat = pat.matcher(lcArg);
+			if (mat.matches()){
+				char test = args[i].charAt(1);
+				try{
+					switch (test){
+					case 'c': carisVcf = new File(args[++i]); break;
+					case 'r': recallVcf = new File(args[++i]); break;
+					case 'm': mergedVcf = new File(args[++i]); break;
+					default: Misc.printErrAndExit("\nProblem, unknown option! " + mat.group());
+					}
+				}
+				catch (Exception e){
+					Misc.printErrAndExit("\nSorry, something doesn't look right with this parameter: -"+test+"\n");
+				}
+			}
+		}
+		//check files
+		if (carisVcf == null || carisVcf.exists()==false || recallVcf == null || recallVcf.exists()==false) Misc.printErrAndExit("\nError: cannot find both of your vcf files to compare?!\n");
+		if (mergedVcf == null) Misc.printErrAndExit("\nError: please provide a named file for writing the merged vcf!\n");
+		mergedVcf.getParentFile().mkdirs();
+	}	
+
+	public static void printDocs(){
+		System.out.println("\n" +
+				"**************************************************************************************\n" +
+				"**                           Caris Vcf Comparator: March 2022                       **\n" +
+				"**************************************************************************************\n" +
+				"CVC compares a Caris vcf generated with the CarisXmlVcfParser to a recalled vcf.\n"+
+				"Exact recall vars are so noted and removed. Caris vcf with no exact but one\n"+
+				"overlapping record can be merged with -k. Be sure to vt normalize each before running.\n"+
+				"Recall variants failing FILTER are not saved.\n"+
+
+				"\nOptions:\n"+
+				"-c Path to a the Caris xml parsed vcf file, see the CarisXmlVcfParser app.\n"+
+				"-r Path to a recalled snv/indel vcf file.\n"+
+				"-m Path to named vcf file for saving the results.\n"+
+
+				"\nExample: java -Xmx2G -jar pathToUSeq/Apps/CarisVcfComparator -f TL-18-03CFD6.vcf\n" +
+				"     -r /F1/TL-18-03CFD6_recall.vcf.gz -g -c -m /F1/TL-18-93CFD6_merged.vcf.gz -k \n\n" +
+
+				"**************************************************************************************\n");
+	}
+
+
+
+}
diff --git a/Source/edu/utah/seq/vcf/xml/caris/CarisXmlVcfParser.java b/Source/edu/utah/seq/vcf/xml/caris/CarisXmlVcfParser.java
index 92db35c..66050ff 100644
--- a/Source/edu/utah/seq/vcf/xml/caris/CarisXmlVcfParser.java
+++ b/Source/edu/utah/seq/vcf/xml/caris/CarisXmlVcfParser.java
@@ -11,7 +11,7 @@
 import java.util.*;
 
 /**
- * Takes one or more patient xml reports from Caris tests and converts most of the variants into vcf format.
+ * Takes one or more patient xml reports from Caris tests and converts most of the variants into vcf format by using the Caris vcf to pull genomic coordinates which are not provided in the xml
  * Outputs a summary clinical data spreadsheet
  * @author david.nix@hci.utah.edu 
  **/
@@ -21,6 +21,7 @@ public class CarisXmlVcfParser {
 	private HashMap<String, File[]> xmlVcfFiles = null;
 	private File saveDirectory;
 	private boolean includePHI = false;
+	private boolean saveAllVcfRecords = false;
 
 	//internal fields
 	private String source;
@@ -48,6 +49,7 @@ public class CarisXmlVcfParser {
 	private ArrayList<CNVAlteration> workingCNVAlterations = new ArrayList<CNVAlteration>();
 	private ArrayList<Translocation> workingTranslocations = new ArrayList<Translocation>();
 	private ArrayList<ExpressionAlteration> workingExpressionAlterations = new ArrayList<ExpressionAlteration>();
+	private ArrayList<MethylationAlteration> workingMethylationAlterations = new ArrayList<MethylationAlteration>();
 
 
 	//constructors
@@ -76,7 +78,7 @@ public CarisXmlVcfParser(String[] args){
 	}
 
 	private void printSpreadsheet() throws IOException {
-		IO.pl("Exporting patient summary spreadsheet... ");
+		IO.pl("\nExporting patient summary spreadsheet... ");
 		//merge all the keys
 		LinkedHashSet<String> allKeys = new LinkedHashSet<String>();
 		for (LinkedHashMap<String,String> s : allReportAttributes) allKeys.addAll(s.keySet());
@@ -132,6 +134,7 @@ public void parseDatasets() throws Exception{
 			workingCNVAlterations.clear();
 			workingTranslocations.clear();
 			workingExpressionAlterations.clear();
+			workingMethylationAlterations.clear();
 			workingNumberFailedGenomicMatches = 0;
 
 			loadVcf();
@@ -215,10 +218,13 @@ private void loadVcf() throws IOException {
 					workingVcfHeader.append(SimpleVcf.xrv);
 					workingVcfHeader.append(SimpleVcf.xrp);
 				}
-				workingVcfHeader.append(line); workingVcfHeader.append("\n");
+				//fix their malformed header line that is causing a vt to error
+				if (line.startsWith("##FORMAT=<ID=SA,")) line = "##FORMAT=<ID=SA,Number=1,Type=String,Description=\"Number of 1) forward ref alleles; 2) reverse ref; 3) forward non-ref; 4) reverse non-ref alleles, used in variant calling\">";
+				workingVcfHeader.append(line); 
+				workingVcfHeader.append("\n");
 			}
 			else {
-				SimpleVcf vcf = new SimpleVcf(line);
+				SimpleVcf vcf = new SimpleVcf(line,0);
 				String key = vcf.fetchCarisKey();
 				if (key == null) key = Misc.getRandomString(10);
 				workingVcfs.put(key, vcf);
@@ -233,14 +239,29 @@ private void saveVcfFile() {
 		if (workingVcfs.size() == 0) return;
 		Gzipper out = null;
 		try {
+			//for each vcf record
+			ArrayList<String> toSave = new ArrayList<String>();
+			int counter = 1;
+			for (SimpleVcf sv : workingVcfs.values()) {
+				if (saveAllVcfRecords) {
+					sv.setId("Caris_"+counter);
+					counter++;
+					toSave.add(sv.toString());
+				}
+				else if (sv.getInfo().startsWith("XRV;XRP=")) {
+					sv.setId("Caris_"+counter);
+					counter++;
+					toSave.add(sv.toString());
+				}
+			}
+			//if (toSave.size() == 0) return; don't do this always want a vcf file output even if empty
+			
 			File vcf = new File (saveDirectory, workingReportName+".vcf.gz");
 			out = new Gzipper(vcf);
 
 			//add header
 			out.print(workingVcfHeader);
-
-			//for each vcf record
-			for (SimpleVcf sv : workingVcfs.values()) out.println(sv.toString());
+			for (String s: toSave) out.println(s);
 
 			out.close();
 		} catch (IOException e) {
@@ -258,19 +279,30 @@ private void saveCnvFile() {
 		Gzipper out = null;
 		try {
 			
-			SimpleBed[] bedRegions = new SimpleBed[workingCNVAlterations.size()];
-			for (int i=0; i< bedRegions.length; i++) bedRegions[i] = workingCNVAlterations.get(i).toBed(name2GeneModels);
-			Arrays.sort(bedRegions);
 			
+			ArrayList<SimpleBed> bedToSave = new ArrayList<SimpleBed>();
+			for (int i=0; i< workingCNVAlterations.size(); i++) {
+				CNVAlteration cnv = workingCNVAlterations.get(i);
+				String res = cnv.getResult().toLowerCase();
+				if (res.contains("not detected") || res.contains("indeterminate")) continue;
+				bedToSave.add(cnv.toBed(name2GeneModels));
+			}
+			if (bedToSave.size() == 0) return;
+			
+			SimpleBed[] bedRegions = new SimpleBed[bedToSave.size()];
+			bedToSave.toArray(bedRegions);
+			Arrays.sort(bedRegions);
 			
 			File bedFile = new File (saveDirectory, workingReportName+".cnv.bed.gz");
 			out = new Gzipper(bedFile);
 
 			//add header
-			out.println("#Chr\tGeneStart\tGeneStop\tGeneName:Type\tScore\tGeneStrand\n#Hg38");
+			out.println("#ChrHg38\tGeneStart\tGeneStop\tGeneName:Type\tScore\tGeneStrand");
 
 			//for each bed
-			for (SimpleBed cnv: bedRegions) out.println(cnv);
+			for (SimpleBed cnv: bedRegions) {
+				if (cnv!= null) out.println(cnv);
+			}
 
 			out.close();
 		} catch (IOException e) {
@@ -291,9 +323,15 @@ private void saveFusionFile() {
 			ArrayList<SimpleBed> beds = new ArrayList<SimpleBed>();
 			int num = workingTranslocations.size();
 			for (int i=0; i< num; i++) {
+				Translocation trans = workingTranslocations.get(i);
+				String res = trans.getResult().toLowerCase();
+				if (res.contains("not detected") || res.contains("indeterminate")) continue;
+				
 				SimpleBed[] sbs = workingTranslocations.get(i).toBed(name2GeneModels);
 				for (SimpleBed sb: sbs) if (sb!=null) beds.add(sb);
 			}
+			if (beds.size()==0) return;
+			
 			SimpleBed[] bedRegions = new SimpleBed[beds.size()];
 			beds.toArray(bedRegions);
 			Arrays.sort(bedRegions);
@@ -303,7 +341,7 @@ private void saveFusionFile() {
 			out = new Gzipper(bedFile);
 
 			//add header
-			out.println("#Chr\tGeneStart\tGeneStop\tGeneName1:GeneName2:Type\tScore\tGeneStrand\n#Hg38");
+			out.println("#ChrHg38\tGeneStart\tGeneStop\tGeneName1:GeneName2:Type\tScore\tGeneStrand");
 
 			//for each bed
 			for (SimpleBed fus: bedRegions) out.println(fus);
@@ -365,10 +403,14 @@ private void parseTests(Node node) throws DOMException, ParseException, IOExcept
 					else if (testName.equals("Exome CNA Panel - Additional Genes")) parseCNVPanel(childNodes);
 					else if (testName.equals("Transcriptome Detection_v1 Panel")) parseTranslocationPanel(childNodes);
 					else if (testName.equals("Transcriptome Detection_v1 Variant Panel")) parseTranslocationPanel(childNodes);
+					else if (testName.equals("MGMT-Me")) parseMethyl(childNodes);
 					else if (ihcTestNames.contains(testName)) parseIHC(childNodes);
 					else if (testName.equals("Her2 CISH")) parseCNVPanel(childNodes);
 					else if (testName.equals("EBER ISH")){
-						IO.el("Skipping the parsing of 'EBER ISH' test for Epstein-Barr virus genome copy number\n");
+						IO.el("\nSkipping the parsing of 'EBER ISH' test for Epstein-Barr virus genome copy number\n");
+					}
+					else if (testName.equals("ROS1 FISH")){
+						IO.el("\nSkipping the parsing of 'ROS1 FISH' test for ROS1 cytogenic alteration.");
 					}
 					else throw new IOException("Found an unknown test! "+testName);
 				}
@@ -396,6 +438,27 @@ private void parseIHC(NodeList childNodes) throws IOException {
 			}
 		}
 	}
+	
+	private void parseMethyl(NodeList childNodes) throws IOException {
+		for (int j = 0; j < childNodes.getLength(); j++) {
+			Node cNode = childNodes.item(j);
+			if (cNode instanceof Element) {
+				String name = cNode.getNodeName();
+				//find testResults
+				if (name.equals("testResults")) {
+					NodeList subNodes = cNode.getChildNodes();
+					for (int i=0; i< subNodes.getLength(); i++) {
+						Node n = subNodes.item(i);
+						if (n instanceof Element) {
+							String subName = n.getNodeName();
+							if (subName.equals("epigeneticAlteration")) workingMethylationAlterations.add( new MethylationAlteration(workingReportAttributes, n.getChildNodes()));
+							else throw new IOException("Found something other than 'epigeneticAlteration' "+subName);
+						}
+					}
+				}
+			}
+		}
+	}
 
 	private void parseTranslocationPanel(NodeList childNodes) throws IOException {
 		for (int j = 0; j < childNodes.getLength(); j++) {
@@ -663,6 +726,9 @@ public void processArgs(String[] args){
 		System.out.println("\n"+ source +"\n");
 		File ucscFile = null;
 		File vcfXmlDir = null;
+		File singleCarisVcf = null;
+		File singleCarisXml = null;
+		
 		for (int i = 0; i<args.length; i++){
 			String lcArg = args[i].toLowerCase();
 			Matcher mat = pat.matcher(lcArg);
@@ -688,8 +754,8 @@ public void processArgs(String[] args){
 		saveDirectory.mkdirs();
 		if (saveDirectory.isDirectory() == false) Misc.printErrAndExit("\nError: your save directory does not appear to be a directory?\n");
 
-		if (vcfXmlDir == null || vcfXmlDir.isDirectory() == false) Misc.printErrAndExit("\nError: cannot find the directory containing your xml and vcf files?! "+vcfXmlDir);
-		parseXmlVcfFiles(vcfXmlDir);
+		if (vcfXmlDir != null && vcfXmlDir.exists()) parseXmlVcfFiles(vcfXmlDir);
+		else Misc.printErrAndExit("\nError: cannot find the directory -d containing your xml and vcf files?! ");
 
 		if (ucscFile == null || ucscFile.exists() == false) Misc.printErrAndExit("\nError: cannot find or read your hg38 merged UCSC refFlat gene table? "+ucscFile);
 
@@ -713,7 +779,10 @@ private void parseXmlVcfFiles(File vcfXmlDir) {
 			String fullName = files[i].getName();
 			if (fullName.endsWith(".gz"))fullName = fullName.substring(0, fullName.length()-3);
 			String[] nameParts = Misc.UNDERSCORE.split(fullName);
-			if (fullName.endsWith(".vcf")) vcfs.put(nameParts[0], files[i]);
+			if (fullName.endsWith(".vcf")) {
+				boolean itIsACaris = carisVcf(files[i]);
+				if (itIsACaris) vcfs.put(nameParts[0], files[i]);
+			}
 			if (fullName.endsWith(".xml")) xmls.put(nameParts[0], files[i]);
 		}
 		//same number?
@@ -727,28 +796,52 @@ private void parseXmlVcfFiles(File vcfXmlDir) {
 		}
 	}
 
+	private boolean carisVcf(File file) {
+		try {
+			BufferedReader in = IO.fetchBufferedReader(file);
+			String line = null;
+			while ((line = in.readLine())!=null) {
+				line = line.trim();
+				if (line.startsWith("##INFO=<ID=CI,Number=.,Type=String,Description=")) {
+					in.close();
+					return true;
+				}
+				if (line.startsWith("#CHROM")) {
+					in.close();
+					return false;
+				}
+			}
+			in.close();
+		} catch (IOException e) {
+			e.printStackTrace();
+		}
+		return false;
+	}
+
 	public static void printDocs(){
 		System.out.println("\n" +
 				"**************************************************************************************\n" +
-				"**                           Caris Xml Vcf Parser: March 2021                       **\n" +
+				"**                           Caris Xml Vcf Parser: March 2022                       **\n" +
 				"**************************************************************************************\n" +
 				"This tool parses Caris paired xml and vcf report files to generate: new vcfs where xml\n"+
 				"reported genomic alternations are annotated, bed files of copy number changes and gene\n"+
 				"fusions as well as a summary spreadsheet of the non NGS test results and report\n"+
-				"attributes.\n"+
+				"attributes. Use Vt to normalize the vcf output files. \n"+
 
 				"\nOptions:\n"+
 				"-d Path to a directory containing paired xml and vcf files from Caris.\n"+
-				"-s Path to a directory for saving the results.\n"+
+				"-s Path to a directory for saving the results. Only impacting vcf, cnv, fusions are\n"+
+				"     saved, if none present, no file(s) are created.\n"+
 				"-u Path to a Hg38 UCSC RefFlat or RefSeq merged gene file for CNV and gene fusion\n"+
 				"   coordinate extraction. See: http://www.genome.ucsc.edu/FAQ/FAQformat.html#format9\n"+
 				"   (refSeqGeneName name2(optional) chr strand txStart txEnd cdsStart cdsEnd exonCount\n"+
 				"   exonStarts exonEnds). Be sure to merge transcripts with the USeq MergeUCSCGeneTable\n"+
 				"   app and remove non standard chromosomes.\n"+
 				"-i Include PHI in spreadsheet output, defaults to excluding.\n"+
+				"-a Include all vcf records in output, defaults to just those with an xml match.\n"+
 
 				"\nExample: java -Xmx2G -jar pathToUSeq/Apps/CarisXmlVcfParser -d CarisReports/\n" +
-				"     -s ParsedCarisReports/ -i -u ~/GRCh38/hg38RefSeq9Dec2020_MergedStdChr.ucsc.gz \n\n" +
+				"     -s ParsedCarisReports/ -u ~/GRCh38/hg38RefSeq9Dec2020_MergedStdChr.ucsc.gz \n\n" +
 
 				"**************************************************************************************\n");
 	}
diff --git a/Source/edu/utah/seq/vcf/xml/caris/MethylationAlteration.java b/Source/edu/utah/seq/vcf/xml/caris/MethylationAlteration.java
new file mode 100644
index 0000000..b413eba
--- /dev/null
+++ b/Source/edu/utah/seq/vcf/xml/caris/MethylationAlteration.java
@@ -0,0 +1,47 @@
+package edu.utah.seq.vcf.xml.caris;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+import util.gen.IO;
+import util.gen.Misc;
+
+public class MethylationAlteration {
+
+	//fields
+	private String biomarkerName = null;
+	private String gene = null;
+	private String result = null;
+	
+	public MethylationAlteration(LinkedHashMap<String, String> report, NodeList nodes) throws IOException {
+		for (int j = 0; j < nodes.getLength(); j++) {
+			Node n = nodes.item(j);
+			if (n instanceof Element) {
+				String name = n.getNodeName();
+				if (name.equals("biomarkerName")) biomarkerName = CarisXmlVcfParser.fetch("biomarkerName", n);
+				else if (name.equals("gene")) gene = CarisXmlVcfParser.fetch("gene", n);
+				else if (name.equals("result")) result = CarisXmlVcfParser.fetch("result", n);
+			}
+		}
+		//IO.pl(toString());
+		addReportInfo(report);
+	}
+
+	private void addReportInfo(LinkedHashMap<String, String> report) {
+		String bn = biomarkerName.replaceAll(" ", "_");
+		String res = result.replaceAll(" ", "_");
+		report.put(bn+":Result", res);
+	}
+
+	public String toString() {
+		StringBuilder sb = new StringBuilder();
+		if (biomarkerName!=null) sb.append("\nbiomarkerName\t"+biomarkerName);
+		if (gene!=null) sb.append("\ngene\t"+gene);
+		if (result!=null) sb.append("\nresult\t"+result);
+		return sb.toString();
+	}
+}
diff --git a/Source/edu/utah/seq/vcf/xml/caris/SimpleVcf.java b/Source/edu/utah/seq/vcf/xml/caris/SimpleVcf.java
index 406c4c9..0989be4 100644
--- a/Source/edu/utah/seq/vcf/xml/caris/SimpleVcf.java
+++ b/Source/edu/utah/seq/vcf/xml/caris/SimpleVcf.java
@@ -5,6 +5,8 @@
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 import util.gen.Gzipper;
 import util.gen.IO;
 import util.gen.Misc;
@@ -23,12 +25,22 @@ public class SimpleVcf  implements Comparable<SimpleVcf>{
 	private String info;
 	private String format;
 	private String sample;
-	private String pathogenicity = null;
+	private int end;
+	private int padPos;
+	private int padEnd;
 	public static final String xrv = "##INFO=<ID=XRV,Number=.,Type=String,Description=\"Caris xml reported genomic alteration\">\n";
 	public static final String xrp = "##INFO=<ID=XRP,Number=.,Type=String,Description=\"Caris xml reported pathogenicity\">\n";
-	
+	private boolean print = true;
+	private SimpleVcf match = null;
+	private ArrayList<SimpleVcf> overlap = new ArrayList<SimpleVcf>();
+	private static final Pattern afPat = Pattern.compile(".+AF=([\\d+\\.]+).*");
+	public static final String ncFilter = "##FILTER=<ID=NC,Description=\"This clinical test variant was not confirmed in subsequent recalling.\">";
+	public static final String nrFilter = "##FILTER=<ID=NR,Description=\"This variant was not reported by Caris.\">";
+	public static final String mdFilter = "##FILTER=<ID=MD,Description=\"This clinical variant overlapped a recall, had the same type, and was modified using info from the recall.\">";
+	public static String infoRAF = "##INFO=<ID=RAF,Number=A,Type=Float,Description=\"Recalled variant allele frequency\">";
 
-	public SimpleVcf (String vcfLine){
+
+	public SimpleVcf (String vcfLine, int bpPadding){
 		originalRecord = vcfLine;
 		//parse fields
 		String[] t = Misc.TAB.split(vcfLine);
@@ -41,27 +53,75 @@ public SimpleVcf (String vcfLine){
 		filter = t[6];
 		info = t[7];
 		format = t[8];
-		sample = t[9];
+		//two samples or one? 
+		// Caris just has one sample, the tumor
+		if (t.length==10) sample = t[9];
+		// Strelka has two, the normal and tumor, just take the tumor
+		else if (t.length==11) sample = t[10];
+		else Misc.printErrAndExit("\nERROR: sample mismatch for caris or strelka vcf record:\n"+ vcfLine);
+
+		//define the end as the max effected genomic region
+		int refLen = ref.length();
+		//find max alt length, there many be more than one, not good!
+		String[] splitAlts = Misc.COMMA.split(alt);
+		int altLen = splitAlts[0].length();
+		for (int i=1; i< splitAlts.length; i++){
+			if (splitAlts[i].length() > altLen) altLen = splitAlts[i].length();
+		}
+		if (refLen > altLen) {
+			end = pos+refLen;
+		}
+		else if (refLen < altLen){
+			end = pos+altLen;
+		}
+		else end = pos+altLen;
+
+		//set bp padded positions
+		padPos = pos - bpPadding;
+		padEnd = end + bpPadding;
 	}
-	
+
 	public void appendInfo(String toAppend) {
 		info = toAppend+info;
 	}
-	
-	
+
+	/**This appends on the AF from the matching SimpleVcf to this vcfLine*/
+	public void appendRAF(SimpleVcf o){
+		Matcher mat = afPat.matcher(o.info);
+		if (mat.matches()) info = info + ";RAF="+mat.group(1);
+		else Misc.printErrAndExit("\nERROR: failed to match AF= from "+o.getVcfLine());
+	}
+
+	/**Appends not confirmed flag to the FILTER field*/
+	public void appendFilter(String flag){
+		//any existing flags?
+		if (filter.equals(".") || filter.toLowerCase().equals("pass")) filter = flag;
+		else filter = filter+";"+flag;
+
+	}
+
+	public void appendID(SimpleVcf o) {
+		if (o.getId().equals(".") == false) id = id+";"+o.getId();
+		else id = o.getId();
+	}
+
+	public void appendINFO(SimpleVcf o) {
+		info = info+";"+o.info;
+	}
+
 	public String fetchCarisKey() throws IOException {
 		//chromosome_ref_alt_readDepth_geneName_hgvsCodingChange
 		StringBuilder sb = new StringBuilder();
 		sb.append(chr); sb.append("_");
 		sb.append(ref); sb.append("_");
 		sb.append(alt); sb.append("_");
-		
+
 
 		//parse DP from INFO
 		//DP=1134;TI=NM_003482.3;GI=KMT2D;FC=Nonsense;PC=Q3812*;DC=c.11434C>T;LO=EXON;EXON=39;CI="Pathogenic Variant"
 		//parse DP, transcript and hgvs
 		//cant use transcript id, these differ between the vcf file and the xml file
-		
+
 		String[] splitInfo = Misc.SEMI_COLON.split(info);
 		String dp = null;
 		String hgvs = null;
@@ -72,6 +132,7 @@ public String fetchCarisKey() throws IOException {
 			else if (i.startsWith("DP=")) dp = i.substring(3);
 		}
 		if (hgvs == null || gi == null || dp == null) {
+			// just a few Caris vcf records with no annotations, just skip
 			//IO.el("\t\tERROR: failed to parse one or more of the DC=, TI=, DP= INFO values from "+originalRecord);
 			return null;
 		}
@@ -83,7 +144,7 @@ public String fetchCarisKey() throws IOException {
 		return sb.toString();
 	}
 
-	
+
 	public String toString(){
 		//#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE
 		StringBuilder sb = new StringBuilder();
@@ -94,12 +155,17 @@ public String toString(){
 		sb.append(alt); sb.append("\t");
 		sb.append(qual); sb.append("\t");
 		sb.append(filter); sb.append("\t");
-		sb.append(info); sb.append("\t");
-		sb.append(format); sb.append("\t");
-		sb.append(sample); 
+		sb.append(info); 
+		if (format!=null && sample!=null) {
+			sb.append("\t");
+			sb.append(format); 
+			sb.append("\t");
+			sb.append(sample);
+		}
+		 
 		return sb.toString();
 	}
-	
+
 	public int compareTo(SimpleVcf other) {
 		//sort by chromosome
 		int x = chr.compareTo(other.chr);
@@ -109,7 +175,21 @@ public int compareTo(SimpleVcf other) {
 		if (pos > other.pos) return 1;
 		return 0;
 	}
-	
+
+	public boolean compareToExact(SimpleVcf o) {
+		if (chr.equals(o.chr) == false) return false;
+		if (pos != o.pos) return false;
+		if (ref.equals(o.ref) == false) return false;
+		if (alt.equals(o.alt) == false) return false;
+		return true;
+	}
+	public boolean compareToOverlap(SimpleVcf o) {
+		if (chr.equals(o.chr) == false) return false;
+		if (padPos > o.padEnd) return false;
+		if (o.padPos > padEnd) return false;
+		return true;
+	}
+
 	/**Sorts a VCF and writes it out compressed but not indexed.  This loads everything into memory so it can blow up. Careful.
 	 * @return number of vcf records*/
 	public static int sortVcf(File unsortedVcfFile, File sortedOutVcfFile) throws IOException { 
@@ -121,7 +201,7 @@ public static int sortVcf(File unsortedVcfFile, File sortedOutVcfFile) throws IO
 		while ((line = in.readLine())!=null){
 			if (line.trim().length() == 0) continue;
 			if (line.startsWith("#")) out.println(line);
-			else records.add(new SimpleVcf(line));
+			else records.add(new SimpleVcf(line, 0));
 		}
 		//sort em
 		SimpleVcf[] toSort = new SimpleVcf[records.size()];
@@ -134,7 +214,7 @@ public static int sortVcf(File unsortedVcfFile, File sortedOutVcfFile) throws IO
 		out.close();
 		return toSort.length;
 	}
-	
+
 	//getters and setters
 	public String getVcfLine() {
 		return toString();
@@ -166,4 +246,32 @@ public void setAlt(String alt) {
 	public String getId() {
 		return id;
 	}
+
+	public void setId(String id) {
+		this.id = id;
+	}
+
+	public boolean isPrint() {
+		return print;
+	}
+
+	public SimpleVcf getMatch() {
+		return match;
+	}
+
+	public void setPrint(boolean print) {
+		this.print = print;
+	}
+
+	public ArrayList<SimpleVcf> getOverlap() {
+		return overlap;
+	}
+
+	public void setOverlap(ArrayList<SimpleVcf> overlap) {
+		this.overlap = overlap;
+	}
+
+	public void setMatch(SimpleVcf match) {
+		this.match = match;
+	}
 }
diff --git a/Source/edu/utah/seq/vcf/xml/caris/Translocation.java b/Source/edu/utah/seq/vcf/xml/caris/Translocation.java
index ff68f11..6e1a8b8 100644
--- a/Source/edu/utah/seq/vcf/xml/caris/Translocation.java
+++ b/Source/edu/utah/seq/vcf/xml/caris/Translocation.java
@@ -117,6 +117,11 @@ private UCSCGeneLine fetchModel(String geneName, String chr, HashMap<String, UCS
 		else model = genes[0];
 		return model;
 	}
+
+
+	public String getResult() {
+		return result;
+	}
 	
 
 
diff --git a/build.properties b/build.properties
index 6fe3eec..b2e2af9 100644
--- a/build.properties
+++ b/build.properties
@@ -45,6 +45,7 @@ edu/utah/seq/data/BisSeqErrorAdder,\
 edu/utah/seq/analysis/BisStat,\
 edu/utah/seq/analysis/BisStatRegionMaker,\
 edu/utah/seq/data/CalculatePerCycleErrorRate,\
+edu/utah/seq/vcf/xml/caris/CarisVcfComparator,\
 edu/utah/seq/vcf/xml/caris/CarisXmlVcfParser,\
 edu/utah/seq/analysis/ChIPSeq,\
 edu/utah/seq/vcf/cluster/ClusterMultiSampleVCF,\