From 237851de2bb0ccab1b1fa29cf813b41fcec140b0 Mon Sep 17 00:00:00 2001
From: u0028003 <u0028003@davids-mbp-2.hci.utah.edu>
Date: Tue, 9 Jul 2024 12:07:25 -0600
Subject: [PATCH] Build out the z-score normalization gene selection method for
 Copy Analysis

---
 .../edu/utah/kohli/AccuGenomicsSpikeins.java  | 678 ++++++++++++++++--
 Source/edu/utah/kohli/CaptureRegion.java      |  31 +-
 Source/edu/utah/kohli/KohliGene.java          |  34 +
 Source/edu/utah/kohli/KohliPatient.java       |  13 +-
 Source/edu/utah/kohli/KohliSample.java        |  11 +-
 build.properties                              |   2 +-
 6 files changed, 694 insertions(+), 75 deletions(-)
diff --git a/Source/edu/utah/kohli/AccuGenomicsSpikeins.java b/Source/edu/utah/kohli/AccuGenomicsSpikeins.java
index 3ae54b3..4c8bd61 100644
--- a/Source/edu/utah/kohli/AccuGenomicsSpikeins.java
+++ b/Source/edu/utah/kohli/AccuGenomicsSpikeins.java
@@ -1,11 +1,14 @@
 package edu.utah.kohli;
 
 import java.io.File;
+import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.LinkedHashMap;
 import java.util.TreeMap;
+import java.util.TreeSet;
 
 import edu.utah.kohli.KohliGene.CopyTestResult;
 import util.gen.IO;
@@ -20,76 +23,528 @@ public class AccuGenomicsSpikeins {
 	private ArrayList<CaptureRegion> captureRegions = new ArrayList<CaptureRegion>();
 	private LinkedHashMap<String, KohliGene> genes = null;
 	private HashMap<String, NormalizerGene> geneNameNormalizedGene = new HashMap<String, NormalizerGene>();
-	private File boxPlotDirectory = null;
+	private File resultsDirectory = null;
 	private ArrayList<String> germlineSamplesToRemoveFromPoN = null;
 	private LinkedHashMap<String, CopyAnalysisTest> copyAnalysisTests = new LinkedHashMap<String, CopyAnalysisTest>();
-
-	public static void main(String[] args) {
+	private String testedGenes = "'ARID1A','PTEN','CCND1','ATM','ZBTB16','CDKN1B','KMT2D','CDK4','MDM2','BRCA2','RB1','FOXA1','AKT1','ZFHX3','TP53','NCOR1','CDK12','BRCA1','SPOP','RNF43','MSH2','ERG','TMPRSS2','CHEK2','CTNNB1','FOXP1','PIK3CB','PIK3CA','PIK3R1','CHD1','APC','CDK6','BRAF','KMT2C','NKX3-1','NCOA2','MYC','COL22A1','CDKN2A','NOTCH1','AR-Enh','AR','OPHN1'";
+	private File fullPathToR = new File("/usr/local/bin/R");
+	private KohliSample testSample = null;
+	private File saveDir = null;
+	private TreeSet<String> genesToNormWith = null;
+	
+	//params to pick genes to use in normalizing a test sample
+	private double maximumZScore = 3.0;
+	private double minimumFractionPassingGeneCaptureRegions = 0.75;
+	private double minimumNumberPassingGenes = 3;
+	
+	public static void main(String[] args) throws IOException {
 		File anno = new File ("/Users/u0028003/HCI/Labs/Kohli_Manish/ProstateCfDNAAnalysis/FinalAggregateAnalysis/sampleMatchingPersonGNomIDs_13June2024.txt");
 		File vcfCountData = new File("/Users/u0028003/HCI/Labs/Kohli_Manish/ProstateCfDNAAnalysis/FinalAggregateAnalysis/BcfToolsForAccGen/justVcfSampleCounts.txt");
-		File regionCountData = new File("/Users/u0028003/HCI/Labs/Kohli_Manish/ProstateCfDNAAnalysis/FinalAggregateAnalysis/CopyRatio/Split150bp/medianMergedSplit150ProbeCountsQCed.txt");
-		File boxPlotDirectory = new File("/Users/u0028003/HCI/Labs/Kohli_Manish/ProstateCfDNAAnalysis/FinalAggregateAnalysis/CopyRatio/Split150bp/BoxPlotDataFiles");
+		File regionCountData = new File("/Users/u0028003/HCI/Labs/Kohli_Manish/ProstateCfDNAAnalysis/FinalAggregateAnalysis/CopyRatio/Split150bp/medianCounts150bpDoOverPostQC.txt");
+		File resultsDirectory = new File("/Users/u0028003/HCI/Labs/Kohli_Manish/ProstateCfDNAAnalysis/FinalAggregateAnalysis/CopyRatio/Split150bp/CnvZScoreNormJustTest");
+		
+		//bad germline samples for PoN, still want for the paired analysis
 		ArrayList<String> germlineSamplesToRemoveFromPoN = new ArrayList<String>();
 		germlineSamplesToRemoveFromPoN.add("23802X11");
-		new AccuGenomicsSpikeins(anno, vcfCountData, regionCountData, boxPlotDirectory, germlineSamplesToRemoveFromPoN);
+		germlineSamplesToRemoveFromPoN.add("23802X4");
+		new AccuGenomicsSpikeins(anno, vcfCountData, regionCountData, resultsDirectory, germlineSamplesToRemoveFromPoN);
 	}
 	
-	public AccuGenomicsSpikeins(File anno, File vcfCountData, File regionCountData, File boxPlotDirectory, ArrayList<String> germlineSamplesToRemoveFromPoN) {
-		this.boxPlotDirectory = boxPlotDirectory;
+	public AccuGenomicsSpikeins(File anno, File vcfCountData, File regionCountData, File resultsDirectory, ArrayList<String> germlineSamplesToRemoveFromPoN) throws IOException {
+		this.resultsDirectory = resultsDirectory;
 		this.germlineSamplesToRemoveFromPoN = germlineSamplesToRemoveFromPoN;
-		
+
 		//load patient sample annotations
 		loadPatients(anno);
 		//for (KohliPatient kp: patients.values()) IO.pl(kp);
-		
+
 		//load Accugenomic spike-in data
-		//loadAccugenomicSpikeinData(vcfCountData);
-		//for (KohliSample ks: samples.values()) IO.pl(ks.toStringProbes(accuGenProbes));
+		/*loadAccugenomicSpikeinData(vcfCountData);
+		for (KohliSample ks: samples.values()) IO.pl(ks.toStringProbes(accuGenProbes));
+		GOAL: flag probes to ignore
+		for each probe generate a histogram and boxplot of AFs for the ctDNA and germline
+		IO.pl("\nAFs!");
+		printAFs();
+		for each probe generate a histogram and boxplot of DPs for the ctDNA and germline
+		IO.pl("\nDPs!");
+		printDPs();
+		GOAL: flag samples to ignore
+		remove flagged probes
+		histogram and boxplot each sample for AF and DPs*/
+
+		//load read coverage for each sample, region, and gene
+		loadReadCoverageData(regionCountData);
+
+		//mean scale the germline samples to 1000, needed for the TN comp
+		IO.pl("Initial all germline all region normalization to 1000...");
+		if (germlineSamplesToRemoveFromPoN.size()!=0) IO.pl("\tExcluding from PoN: "+germlineSamplesToRemoveFromPoN);
+		meanScaleGermlineReadCoverageDatasetsAllGenes(1000.0, captureRegions, true);
+		//File boxPlotDataGermline = new File(boxPlotDirectory, "germlineBoxPlotData.txt");
+		//saveScaledGermlineReadCoverageDatasetsForBoxPlot(boxPlotDataGermline);
+
+		//define the paired t/n datasets
+		//matchedTest = samples.get("23802X1");
+		//matchedTest = samples.get("22712X1");
+		//genesToNormWith = maximizeGenesWithInPoNZScores();
 		
-		//GOAL: flag probes to ignore
-		//for each probe generate a histogram and boxplot of AFs for the ctDNA and germline
-		//IO.pl("\nAFs!");
-		//printAFs();
-		//for each probe generate a histogram and boxplot of DPs for the ctDNA and germline
-		//IO.pl("\nDPs!");
-		//printDPs();
+		//runCfDNASamples();
 		
-		//GOAL: flag samples to ignore
-		//remove flagged probes
-		//histogram and boxplot each sample for AF and DPs
+		runAllGermlinesAsTest();
+
+
+	
+		IO.pl("\nCOMPLETE!");
+
+	}
+	
+	private void runAllGermlinesAsTest() throws IOException {
+
+		for (KohliSample ks: samples.values()) {
+			if (ks.isGermlineSample()==false) continue;
+			testSample = ks;
+
+			//exclude the test from the pon
+			germlineSamplesToRemoveFromPoN.add(testSample.getSampleId());
+
+			saveDir = new File(resultsDirectory, testSample.getPatient().getHciPatientId()+"_"+testSample.getSampleId());
+			saveDir.mkdirs();
+			IO.pl("Testing Sample "+saveDir.getName()+"...");
+			
+			//mean scale the pon minus the "test" sample
+			meanScaleGermlineReadCoverageDatasetsAllGenes(1000.0, captureRegions, true);
+
+			//ZScore gene selection method
+			genesToNormWith = maximizeGenesWithInPoNZScores();
+
+			//Too few genes?
+			if (genesToNormWith.size()==0) {
+				IO.pl("ERROR: too few genes found to normalize to, adjust zscore requirements and restart! SKIPPING");
+				continue;
+			}
+			//All on one chrom
+			else if (chromosomeCheck(genesToNormWith) == false) {
+				IO.pl("ERROR: all genes to normalize were found on one chromosome, adjust zscore requirements and restart! SKIPPING");
+				continue;
+			}
+
+			// Collect the Capture Regions to use in normalizing
+			ArrayList<CaptureRegion> crsToNorm = new ArrayList<CaptureRegion>();
+			for (String gn: genesToNormWith) {
+				KohliGene kg = genes.get(gn);
+				crsToNorm.addAll(kg.getCaptureRegions());
+			}
+
+			//ReNormalize the PoN
+			IO.pl("Renormalizing the PoN with the select genes...");
+			meanScaleGermlineReadCoverageDatasetsAllGenes(1000.0, crsToNorm, false);
+
+			//Normalize the test
+			IO.pl("Normalizing the test with the select genes...");
+			meanScaleTestReadCoverageDatasetsWithDefinedCaptureRegions(1000.0, crsToNorm);
+
+			//Generate some plots for each gene
+			generateBoxPlots("("+Misc.treeSetToString(genesToNormWith, ",")+")");
+			generateZScoreHistograms("("+Misc.treeSetToString(genesToNormWith, ",")+")");
+
+			//Stat copy changes
+			generateGeneStatistics();
+			
+			//remove the test from the PoN blocker
+			germlineSamplesToRemoveFromPoN.remove(germlineSamplesToRemoveFromPoN.size()-1);
+		}
 		
-		//load read coverage for each sample, region, and gene
-		loadReadCoverageData(regionCountData);
+	}
+
+	private void runCfDNASamples() throws IOException {
+		for (KohliPatient kp: patients.values()) {
+			IO.pl("\nTesting Patient "+kp.getHciPatientId()+"...");
+
+			for (KohliSample ks: kp.getNonGermlineSamples()) {
+				testSample = ks;
+
+				saveDir = new File(resultsDirectory, kp.getHciPatientId()+"_"+testSample.getSampleId());
+				saveDir.mkdirs();
+				IO.pl("Testing Sample "+saveDir.getName()+"...");
+
+				
+				//ZScore gene selection method
+				genesToNormWith = maximizeGenesWithInPoNZScores();
+				
+				//Too few genes?
+				if (genesToNormWith.size()==0) {
+					IO.pl("ERROR: too few genes found to normalize to, adjust zscore requirements and restart! SKIPPING");
+					continue;
+				}
+				//All on one chrom
+				else if (chromosomeCheck(genesToNormWith) == false) {
+					IO.pl("ERROR: all genes to normalize were found on one chromosome, adjust zscore requirements and restart! SKIPPING");
+					continue;
+				}
+	
+				//IO.pl(genesToNormWith.size()+" genes for normalization: "+genesToNormWith);
+				// Collect the Capture Regions to use in normalizing
+				ArrayList<CaptureRegion> crsToNorm = new ArrayList<CaptureRegion>();
+				for (String gn: genesToNormWith) {
+					KohliGene kg = genes.get(gn);
+					crsToNorm.addAll(kg.getCaptureRegions());
+				}
+
+				//ReNormalize the PoN
+				IO.pl("Renormalizing the PoN with the select genes...");
+				meanScaleGermlineReadCoverageDatasetsAllGenes(1000.0, crsToNorm, false);
 
-		//for (KohliSample ks: samples.values()) IO.pl(ks.meanRegionCounts(captureRegions));
-		//meanScaleAndPrintReadCoverageDatasets(1000.0);
-		//meanScaleReadCoverageDatasetsBoxPlot(1000.0,"germline");
+				//Normalize the test
+				IO.pl("Normalizing the test with the select genes...");
+				meanScaleTestReadCoverageDatasetsWithDefinedCaptureRegions(1000.0, crsToNorm);
+
+				//Generate some plots for each gene
+				generateBoxPlots("("+Misc.treeSetToString(genesToNormWith, ",")+")");
+				generateZScoreHistograms("("+Misc.treeSetToString(genesToNormWith, ",")+")");
+
+				//Stat copy changes
+				generateGeneStatistics();
+				
+				//reset the initial PoN values in the CaptureRegions
+				IO.pl("Resetting the PoN to the original values...");
+				for (CaptureRegion cr: captureRegions) cr.copyInitialPoNToCurrentPon();
+				
+			}
+		}
+
+	}
+	
+	private boolean chromosomeCheck(TreeSet<String> geneNames) {
+		HashSet<String> chrs = new HashSet<String>();
+		for (String geneName: geneNames) chrs.add(genes.get(geneName).getChromosome());
+		if (chrs.size()>1) return true;
+		return false;
+	}
+
+	private void generateGeneStatistics() {
+		IO.pl("Generating statistic spreadsheet...");
+		StringBuilder sb = new StringBuilder();
+		sb.append("Patient\tTest Sample\tGermline\tUsed To Norm\tGene\t# Capture Regions\t"
+				+ "# CRs Test Outside PoN\tMean CR Z-Score\tMean CR Scaled Test\tMean CR Scaled PoN\tLog2(Test/PoN)\n");
+		for (KohliGene kg: genes.values()) {
+			sb.append(testSample.getPatient().getHciPatientId()); sb.append("\t");
+			sb.append(testSample.getSampleId()); sb.append("\t");
+			sb.append(testSample.isGermlineSample()); sb.append("\t");
+			sb.append(genesToNormWith.contains(kg.getGeneName()));  sb.append("\t");
+			sb.append(kg.getGeneName()); sb.append("\t");
+			sb.append(kg.getCaptureRegions().size());  sb.append("\t");
+			sb.append(kg.getNumberTestValuesOutsideCaptureRegions()); sb.append("\t");
+			sb.append(kg.getMeanCaptureRegionZScore()); sb.append("\t");
+			double meanTest = kg.getMeanCaptureRegionScaledTestScore();
+			sb.append(meanTest); sb.append("\t");
+			double meanPoN = kg.getMeanCaptureRegionPoNScore();
+			sb.append(meanPoN); sb.append("\t");
+			sb.append(Num.log2(meanTest/meanPoN)); sb.append("\t");
+			sb.append("\n");
+		}
+		
+		File spreadsheet = new File( saveDir, saveDir.getName()+".geneStats.xls" );
+		IO.writeString(sb.toString(), spreadsheet);
+	}
+
+	private void generateBoxPlots(String normGenes) throws IOException {
+		IO.pl("Generating boxplots...");
+		//write out data files for R
+		File rDir = new File(saveDir, "R");
+		rDir.mkdir();
+		File boxPlotDataGermline = new File(rDir, "germlineBoxPlotData.txt");
+		saveScaledGermlineReadCoverageDatasetsForBoxPlot(boxPlotDataGermline);
+		File boxPlotDataTest = new File(rDir, "testBoxPlotData.txt");
+		saveScaledTestReadCoverageDatasetsForBoxPlot(boxPlotDataTest);
+		
+		//boxPlot dir
+		File boxPlots = new File(saveDir, "BoxPlots");
+		boxPlots.mkdir();
+		
+		ArrayList<String> rScriptAL = new ArrayList<String>();
+		rScriptAL.add("library(ggplot2)");
+		rScriptAL.add("setwd('"+boxPlots.getCanonicalPath()+"')");
+		rScriptAL.add("df = read.table('"+boxPlotDataGermline.getCanonicalPath()+"', header = TRUE)");
+		rScriptAL.add("testDf = read.table('"+boxPlotDataTest.getCanonicalPath()+"', header=TRUE)");
+		rScriptAL.add("normGenes = '"+normGenes+"'");
+		rScriptAL.add("testDataset = '"+testSample.getSampleId()+"'");
+		rScriptAL.add("genes = c("+testedGenes+")");
+		rScriptAL.add("for (x in genes) {");
+		rScriptAL.add("  print(x)");
+		rScriptAL.add("  toSub=paste(x,'_',sep='')");
+		rScriptAL.add("  dfSub = subset(df, grepl(toSub, Gene))");
+		rScriptAL.add("  dfSubTest = subset(testDf, grepl(toSub, Gene))");
+		rScriptAL.add("  boxPlotObj = ggplot(dfSub, aes(x=Gene, y=ScaledCount)) + ");
+		//rScriptAL.add("  geom_boxplot()+ coord_cartesian(ylim = c(250, 1750)) + ");
+		rScriptAL.add("  geom_boxplot()+ ");
+		rScriptAL.add("  ggtitle( paste(testDataset, x, normGenes)) + ");
+		rScriptAL.add("  ylab('1K Mean Scaled Counts') + ");
+		rScriptAL.add("  theme( ");
+		rScriptAL.add("    plot.title = element_text(size=12,face='bold'), ");
+		rScriptAL.add("    axis.title.x = element_blank(), ");
+		rScriptAL.add("    axis.title.y = element_text(face='bold', size=12), ");
+		rScriptAL.add("    axis.text.x = element_text(angle = 90, vjust = 1, hjust=1)) + ");
+		rScriptAL.add("  geom_point(data=dfSubTest, aes(x=Gene, y=ScaledCount), color = 'red', shape=8) ");
+		rScriptAL.add("  ggsave(file=paste(x,'.box.png', sep = ''), plot=boxPlotObj, width=10, height=8) ");
+		rScriptAL.add("}");
+		rScriptAL.add("print('COMPLETE')");
+		String rScript = Misc.stringArrayListToString(rScriptAL, "\n");
+		File rScriptFile = new File(rDir, "boxplotScript.R");
+		File rLogFile = new File(rDir, "boxplot.Rout");
+		IO.writeString(rScript, rScriptFile );
+		
+		//make command
+		String[] command = new String[] {
+				fullPathToR.getCanonicalPath(),
+				"CMD",
+				"BATCH",
+				"--no-save",
+				"--no-restore",
+				rScriptFile.getCanonicalPath(),
+				rLogFile.getCanonicalPath()};			
+		//execute
+		IO.executeCommandLine(command);
+		
+		//read in results 
+		String[] results = IO.loadFile(rLogFile);
+		boolean complete = false;
+		for (String s: results) if (s.contains("COMPLETE"))complete = true;
+		if (complete == false) Misc.printErrAndExit("\nERROR, failed to find 'COMPLETE' in the boxplot R output, see "+rLogFile);
+	}
+
+	private TreeSet<String> maximizeGenesWithCenteredCoverage(KohliSample testSample, int[][] upDownSameGermline) {
+		IO.pl("Scanning scalars to identify genes in the test that resemble the matched normal...");
+		UpDownResult[] res = new UpDownResult[10000 - 100 +1];
+		int counter = 0;
+		for (int i=100; i<=10000; i++) {
+			int[][] upDownSameTest = countGenesWithCenteredCoverage(i, testSample);
+			ArrayList<String>[] numMatchsOneOffsTwoOffs = compare(upDownSameGermline, upDownSameTest, false);
+			res[counter++] = new UpDownResult(numMatchsOneOffsTwoOffs, i);
+		}
+		//sort by matches, oneOffs, twoOffs, threeOffs
+		Arrays.sort(res);
+		IO.pl("\tScalar\tMatches\tOneOffs\tTwoOffs\tThreeOffs");
+		IO.pl(res[0]);
+		TreeSet<String> toNorm = new TreeSet<String>();
+		for (String geneName: res[0].matchesOneTwoThreeOffs[0]) toNorm.add(geneName);
+		for (String geneName: res[0].matchesOneTwoThreeOffs[1]) toNorm.add(geneName);
+		if (toNorm.size() ==0) {
+			IO.pl("WARNING: no genes found to normalize to, using all!");
+			toNorm.addAll(genes.keySet());
+		}
+		else {
+			for (int i=1; i< res.length; i++) {
+				if (res[0].compareTo(res[i]) != 0) break;
+				IO.pl(res[i]);
+				//add in matches and one offs? how about two offs?
+				for (String geneName: res[i].matchesOneTwoThreeOffs[0]) toNorm.add(geneName);
+				for (String geneName: res[i].matchesOneTwoThreeOffs[1]) toNorm.add(geneName);
+			}
+		}
+		return toNorm;
+	}
+	
+	private TreeSet<String> maximizeGenesWithInPoNZScores() {
+		IO.pl("Scanning scalars to identify genes in the test that resemble the matched normal...");
+		NormalizationResult[] res = new NormalizationResult[10000 - 100 +1];
+		int counter = 0;
+		for (int i=100; i<=10000; i++) {
+			//scale all to i	
+			meanScaleTestReadCoverageDatasetsWithDefinedCaptureRegions(i , captureRegions);
+			res[counter++] = countNumberGenesWithTestInPon(i);
+		}
+		//sort by highest num genes and then sum fraction copy regions
+		Arrays.sort(res);
+		IO.pl("\tScalar\t#Passing\tGeneNames");
+		TreeSet<String> toNorm = new TreeSet<String>();
+		if (res[0].numPassingGenes >= minimumNumberPassingGenes ) {
+			IO.pl(res[0]);
+			for (KohliGene kg: res[0].genesPassingThresholds) {
+				toNorm.add(kg.getGeneName());
+			}
+		}
+		return toNorm;
+	}
+	
+	private NormalizationResult countNumberGenesWithTestInPon(int normTarget) {
+		//for each gene, count the number of capture regions where the test is within the PoN zscores
+		ArrayList<KohliGene> testInPoN = new ArrayList<KohliGene>();
+		ArrayList<Double> fracCRs = new ArrayList<Double>();
+		for (KohliGene kg : genes.values()) {
+			double inPoN = 0;
+			for (CaptureRegion cr: kg.getCaptureRegions()) {
+				if (Math.abs(cr.calculateZScoreFromPoN()) <= maximumZScore) inPoN++;
+			}
+			double frac = inPoN/ (double)kg.getCaptureRegions().size();
+			if (frac >= minimumFractionPassingGeneCaptureRegions) {
+				testInPoN.add(kg);
+				fracCRs.add(frac);
+			}
+		}
+		return new NormalizationResult(normTarget, testInPoN, fracCRs);
+	}
+	
+	private class UpDownResult implements Comparable<UpDownResult>{
 		
-		//mean scale the germline samples
-		//meanScaleGermlineReadCoverageDatasetsAllGenes(1000.0);
+		int normTarget;
+		ArrayList<String>[] matchesOneTwoThreeOffs = null;
+		int[] numMatchesOneTwoThreeOffs = null;
 		
-		//print out the genes
-		//IO.pl("Genes, their capture regions, the mean of all of the germline samples and the individual values");
-		//for (KohliGene kg: genes.values()) {
-		//	IO.pl(kg.toString());
-		//}
+		public UpDownResult(ArrayList<String>[] matchesOneTwoThreeOffs, int normTarget) {
+			this.normTarget = normTarget;
+			this.matchesOneTwoThreeOffs = matchesOneTwoThreeOffs;
+			numMatchesOneTwoThreeOffs = new int[] {matchesOneTwoThreeOffs[0].size(), matchesOneTwoThreeOffs[1].size(), matchesOneTwoThreeOffs[2].size(), matchesOneTwoThreeOffs[3].size()};
+		}
 		
+		public String toString() {
+			StringBuilder sb = new StringBuilder("\t");
+			sb.append(normTarget);
+			sb.append("\t");
+			sb.append(matchesOneTwoThreeOffs[0]);
+			for (int i=1; i< matchesOneTwoThreeOffs.length; i++) {
+				sb.append("\t");
+				sb.append(matchesOneTwoThreeOffs[i]);
+			}
+			return sb.toString();
+		}
+
+		public int compareTo(UpDownResult o) {
+			//matches
+			if (this.numMatchesOneTwoThreeOffs[0] > o.numMatchesOneTwoThreeOffs[0]) return -1;
+			else if (this.numMatchesOneTwoThreeOffs[0] < o.numMatchesOneTwoThreeOffs[0]) return 1;
+			
+			//matches same, thus one offs
+			if (this.numMatchesOneTwoThreeOffs[1] > o.numMatchesOneTwoThreeOffs[1]) return -1;
+			else if (this.numMatchesOneTwoThreeOffs[1] < o.numMatchesOneTwoThreeOffs[1]) return 1;
+			
+			//one offs same, thus two offs
+			if (this.numMatchesOneTwoThreeOffs[2] > o.numMatchesOneTwoThreeOffs[2]) return -1;
+			else if (this.numMatchesOneTwoThreeOffs[2] < o.numMatchesOneTwoThreeOffs[2]) return 1;
+			
+			//two offs same, thus three offs
+			if (this.numMatchesOneTwoThreeOffs[3] > o.numMatchesOneTwoThreeOffs[3]) return -1;
+			else if (this.numMatchesOneTwoThreeOffs[3] < o.numMatchesOneTwoThreeOffs[3]) return 1;
+			
+			return 0;
+		}
+	}
+	
+	private class NormalizationResult implements Comparable<NormalizationResult>{
 		
-		//scan the pon to define the accepted range for each normalizer gene
-		generatePonBackground();
+		int normTarget;
+		ArrayList<KohliGene> genesPassingThresholds = null;
+		ArrayList<Double> fracCRs = new ArrayList<Double>();
+		int numPassingGenes;
+		double sumFracCRs;
 		
-		//test a sample against the PoN
-		test("22712X1");
-		//test("23802X1");
-		//for (String sampleId : samples.keySet()) {
-			//test(sampleId);
-		//}
+		public NormalizationResult(int normTarget, ArrayList<KohliGene> genesPassingThresholds, ArrayList<Double> fracCRs) {
+			this.genesPassingThresholds = genesPassingThresholds;
+			this.normTarget = normTarget;
+			numPassingGenes = genesPassingThresholds.size();
+			this.fracCRs = fracCRs;
+			sumFracCRs = Num.sumArray(Num.arrayListOfDoubleToArray(fracCRs));
+		}
 		
+		public String toString() {
+			StringBuilder sb = new StringBuilder("\t");
+			sb.append(normTarget);
+			sb.append("\t");
+			sb.append(numPassingGenes);
+			if (numPassingGenes > 0) {
+				sb.append("\t");
+				sb.append(genesPassingThresholds.get(0).getGeneName());
+				sb.append("(");
+				sb.append(Num.formatNumber(fracCRs.get(0), 2));
+				sb.append(")");
+				for (int i=1; i< numPassingGenes; i++) {
+					sb.append(",");
+					sb.append(genesPassingThresholds.get(i).getGeneName());
+					sb.append("(");
+					sb.append(Num.formatNumber(fracCRs.get(i), 2));
+					sb.append(")");
+				}
+			}
+			
+			return sb.toString();
+		}
+
+		public int compareTo(NormalizationResult o) {
+			if (this.numPassingGenes > o.numPassingGenes) return -1;
+			else if (this.numPassingGenes < o.numPassingGenes) return 1;
+			else {
+				if (this.sumFracCRs > o.sumFracCRs) return -1;
+				else if (this.sumFracCRs < o.sumFracCRs) return 1;
+				return 0;
+			}
+		}
 	}
 	
-	private void test(String sampleIdToTest) {
+	private ArrayList<String>[] compare(int[][] upDownSameGermline, int[][] upDownSameTest, boolean printStats) {
+		ArrayList<String> matches = new ArrayList<String>();
+		ArrayList<String> oneOffs = new ArrayList<String>();
+		ArrayList<String> twoOffs = new ArrayList<String>();
+		ArrayList<String> threeOffs = new ArrayList<String>();
+		
+		int i=-1;
+		for (String geneName: genes.keySet()) {
+			i++;
+			if (printStats) IO.p(geneName+"\t"+upDownSameGermline[i][0]+":"+upDownSameGermline[i][1]+":"+upDownSameGermline[i][2]+"\t"+upDownSameTest[i][0]+":"+upDownSameTest[i][1]+":"+upDownSameTest[i][2]);
+			
+			//No matches for zero values in germline
+			if (upDownSameGermline[i][0]==0 || upDownSameGermline[i][1]==0) {
+				if (printStats) IO.pl("\tZeroInGermlineGeneSkipping");
+			}
+			else {
+				int diffInUp = Math.abs(upDownSameGermline[i][0] - upDownSameTest[i][0]);
+				if (diffInUp == 0) {
+					if (printStats) IO.pl("\tMatch");
+					matches.add(geneName);
+				}
+				else if (diffInUp == 1) {
+					if (printStats) IO.pl("\tOneOff\t"+(upDownSameGermline[i][0] - upDownSameTest[i][0]));
+					oneOffs.add(geneName);
+				}
+				else if (diffInUp == 2) {
+					if (printStats) IO.pl("\tTwoOff\t"+(upDownSameGermline[i][0] - upDownSameTest[i][0]));
+					twoOffs.add(geneName);
+				}
+				else if (diffInUp == 3) {
+					if (printStats) IO.pl("\tThreeOff\t"+(upDownSameGermline[i][0] - upDownSameTest[i][0]));
+					threeOffs.add(geneName);
+				}
+				else if (printStats) IO.pl("\tNoMatch");
+			}
+		}
+		if (printStats)IO.pl(matches+ "\t"+ oneOffs+"\t"+twoOffs+"\t"+threeOffs);
+		return new ArrayList[] {matches, oneOffs, twoOffs, threeOffs};
+	}
+
+	private int[][] countGenesWithCenteredCoverage(double target, KohliSample ks) {
+		//scale the counts using all of the capture regions
+		ks.createScaledRegionCountMap(captureRegions, captureRegions, target);
+		HashMap<String, Double> sampleScaledCaptureRegionMeans = ks.getScaledReadCoverageCounts();
+		
+		//for each gene count the above and below means from the PoN
+		int[][] upDown = new int[genes.size()][];
+		int counter = 0;
+		for (KohliGene kg : genes.values()) {
+			int numAboveMean = 0;
+			int numBelowMean = 0;
+			int numSame = 0; //never happens!
+			for (CaptureRegion cr: kg.getCaptureRegions()) {
+				String crId = cr.getOriginalInput();
+				double testScaledCount = sampleScaledCaptureRegionMeans.get(crId);
+				double ponScaledCount = cr.getPonScaledMean();
+				if (testScaledCount > ponScaledCount) numAboveMean++;
+				else if (testScaledCount < ponScaledCount) numBelowMean++;
+				else numSame++;
+			}
+			upDown[counter++] = new int[]{numAboveMean, numBelowMean, numSame};
+		}
+		return upDown;
+	}
+
+	private void testOneGeneMethod(String sampleIdToTest) {
 		
-//IO.pl("Testing, "+sampleIdToTest+"...");
 		CopyAnalysisTest cat = new CopyAnalysisTest(sampleIdToTest);
 		copyAnalysisTests.put(sampleIdToTest, cat);
 		
@@ -109,7 +564,7 @@ private void test(String sampleIdToTest) {
 			
 			//scale the test
 			meanScaleTestReadCoverageDatasetsOneGene(1000, kg.getGeneName(), sampleIdToTest);
-File boxPlotDataTest = new File(boxPlotDirectory, "testing_"+sampleIdToTest+"_"+kg.getGeneName()+"_BoxPlotData.txt");
+File boxPlotDataTest = new File(resultsDirectory, "testing_"+sampleIdToTest+"_"+kg.getGeneName()+"_BoxPlotData.txt");
 saveScaledTestReadCoverageDatasetsForBoxPlot(boxPlotDataTest);
 			
 			//for each gene compare
@@ -152,7 +607,7 @@ private void generatePonBackground() {
 
 			//scale the PoN, not necessary, just useful for boxplots, not used in zscore calcs
 			meanScaleGermlineReadCoverageDatasetsOneGene(1000, normalizerGeneName);
-			File boxPlotDataGermline = new File(boxPlotDirectory, normalizerGeneName+"_All_Germline_BoxPlotData.txt");
+			File boxPlotDataGermline = new File(resultsDirectory, normalizerGeneName+"_All_Germline_BoxPlotData.txt");
 			saveScaledGermlineReadCoverageDatasetsForBoxPlot(boxPlotDataGermline);
 			
 			//for each germline sample that isn't flagged, use it as a mock test sample
@@ -192,6 +647,90 @@ private void generatePonBackground() {
 		if (germlineSamplesToRemoveFromPoN.size()!=0) IO.pl("\tExcluding user flagged "+Misc.stringArrayListToString(germlineSamplesToRemoveFromPoN, ", "));
 	}
 	
+	private void generateZScoreHistograms(String normGenes) throws IOException {
+		IO.pl("Generating histograms...");
+		
+		//write out data files for R
+		File rDir = new File(saveDir, "R");
+		rDir.mkdir();
+		File histogramData = new File(rDir, "histogramData.txt");
+		saveTestZScoresForHistograms(histogramData);
+		
+		//Histo dir
+		File histogramPlots = new File(saveDir, "HistogramPlots");
+		histogramPlots.mkdir();
+
+		ArrayList<String> rScriptAL = new ArrayList<String>();
+		rScriptAL.add("library(ggplot2)");
+		rScriptAL.add("setwd('"+histogramPlots.getCanonicalPath()+"')");
+		rScriptAL.add("df = read.table('"+histogramData.getCanonicalPath()+"', header = TRUE)");
+		rScriptAL.add("normGenes = '"+normGenes+"'");
+		rScriptAL.add("testDataset = '"+testSample.getSampleId()+"'");
+		rScriptAL.add("genes = c("+testedGenes+")");
+		rScriptAL.add("for (x in genes) {");
+		rScriptAL.add("  print(x)");
+		rScriptAL.add("  toSub=paste(x,'_',sep='')");
+		rScriptAL.add("  dfSub = subset(df, grepl(toSub, Gene))");
+		rScriptAL.add("  obj = ggplot(dfSub, aes(x=ZScore)) +  ");
+		rScriptAL.add("  geom_histogram(color='black',fill='gray',bins=20) +  ");
+		rScriptAL.add("  geom_vline(aes(xintercept=mean(ZScore)),color='red', linetype='dashed', size=1)+  ");
+		rScriptAL.add("  ggtitle( paste(testDataset, x, normGenes)) + ");
+		rScriptAL.add("  scale_x_continuous(name = 'Capture Region Z-Scores') + ");
+		rScriptAL.add("  scale_y_continuous(name = 'Count')");
+		rScriptAL.add("  ggsave(file=paste(x,'.hist.png', sep = ''), plot=obj, width=10, height=8) ");
+		rScriptAL.add("}");
+		rScriptAL.add("print('COMPLETE')");
+		String rScript = Misc.stringArrayListToString(rScriptAL, "\n");
+		File rScriptFile = new File(rDir, "histogramScript.R");
+		File rLogFile = new File(rDir, "histogram.Rout");
+		IO.writeString(rScript, rScriptFile );
+		
+		//make command
+		String[] command = new String[] {
+				fullPathToR.getCanonicalPath(),
+				"CMD",
+				"BATCH",
+				"--no-save",
+				"--no-restore",
+				rScriptFile.getCanonicalPath(),
+				rLogFile.getCanonicalPath()};			
+		//execute
+		IO.executeCommandLine(command);
+		
+		//read in results 
+		String[] results = IO.loadFile(rLogFile);
+		boolean complete = false;
+		for (String s: results) if (s.contains("COMPLETE"))complete = true;
+		if (complete == false) Misc.printErrAndExit("\nERROR, failed to find 'COMPLETE' in the boxplot R output, see "+rLogFile);
+
+	}
+	
+	private void saveTestZScoresForHistograms(File toSave) {
+		//print out all of the scaled counts for boxplots
+		StringBuilder sb = new StringBuilder();
+		//header row
+		sb.append("Gene\tZScore\n");
+		String gene = "";
+		int counter = 0;
+		for (int i=0; i< captureRegions.size(); i++) {
+			CaptureRegion cr = captureRegions.get(i);
+			if (cr.getGene().equals(gene) == false) {
+				gene = cr.getGene();
+				counter = 0;
+			}
+			counter++;
+			String counterString = Integer.toString(counter);
+			if (counterString.length()==1) counterString = "00"+counterString;
+			else if (counterString.length()==2) counterString = "0"+counterString;
+			String name = gene+"_"+counterString;
+			sb.append(name);
+			sb.append("\t");
+			sb.append(Double.toString(cr.calculateZScoreFromPoN()));
+			sb.append("\n");
+		}
+		IO.writeString(sb.toString(), toSave);
+	}
+
 	private void saveScaledTestReadCoverageDatasetsForBoxPlot(File toSave) {
 		//print out all of the scaled counts for boxplots
 		StringBuilder sb = new StringBuilder();
@@ -207,7 +746,8 @@ private void saveScaledTestReadCoverageDatasetsForBoxPlot(File toSave) {
 			}
 			counter++;
 			String counterString = Integer.toString(counter);
-			if (counterString.length()==1) counterString = "0"+counterString;
+			if (counterString.length()==1) counterString = "00"+counterString;
+			else if (counterString.length()==2) counterString = "0"+counterString;
 			String name = gene+"_"+counterString;
 			sb.append(name);
 			sb.append("\t");
@@ -217,30 +757,25 @@ private void saveScaledTestReadCoverageDatasetsForBoxPlot(File toSave) {
 		IO.writeString(sb.toString(), toSave);
 	}
 
-	private void meanScaleGermlineReadCoverageDatasetsAllGenes(double target) {
-
-		//scale the counts using all of the capture regions
-//IO.pl("Scaling germline sample counts....");
-		for (KohliSample ks: samples.values()) {
-			if (ks.isGermlineSample()) ks.createScaledRegionCountMap(captureRegions, captureRegions, target);
+	private void meanScaleGermlineReadCoverageDatasetsAllGenes(double target, ArrayList<CaptureRegion> crsToScaleWith, boolean setInitial) {
+		//scale the counts using the select capture regions for ALL germline samples
+		for (KohliSample ks: samples.values()) if (ks.isGermlineSample()) {
+			ks.createScaledRegionCountMap(crsToScaleWith, captureRegions, target);
 		}
-		
-		//for each capture region
-//IO.pl("Pulling counts over each capture region for each germline sample....");
+
+		//for all capture region, collect the scaled germline counts and save them to the cr for those good for PoN
 		for (CaptureRegion cr: captureRegions) {
 			String crId = cr.getOriginalInput();
 			ArrayList<Double> germlineScaledCounts = new ArrayList<Double>();
 			//for each germline pon sample
 			for (KohliSample ks: samples.values()) {
-				if (ks.isGermlineSample()) {
+				if (ks.isGermlineSample() && ks.isExcludeFromPon()==false) {
 					HashMap<String, Double> sampleScaledCaptureRegionMeans = ks.getScaledReadCoverageCounts();
 					germlineScaledCounts.add(sampleScaledCaptureRegionMeans.get(crId));
 				}
 			}
 			//convert to double[] and set in cr
-//IO.pl("Setting counts in "+cr.getOriginalInput());
-			cr.setPonScaledCountsCalculateStats(Num.arrayListOfDoubleToArray(germlineScaledCounts));
-//IO.pl("\t"+cr.getPonScaledMean());
+			cr.setPonScaledCountsCalculateStats(Num.arrayListOfDoubleToArray(germlineScaledCounts), setInitial);
 		}
 	}
 	
@@ -266,7 +801,7 @@ private void meanScaleGermlineReadCoverageDatasetsOneGene(double target, String
 				}
 			}
 			//convert to double[] and set in cr
-			cr.setPonScaledCountsCalculateStats(Num.arrayListOfDoubleToArray(germlineScaledCounts));
+			cr.setPonScaledCountsCalculateStats(Num.arrayListOfDoubleToArray(germlineScaledCounts), false);
 		}
 	}
 	
@@ -287,6 +822,16 @@ private void meanScaleTestReadCoverageDatasetsOneGene(double target, String gene
 		}
 	}
 	
+	private void meanScaleTestReadCoverageDatasetsWithDefinedCaptureRegions(double target, ArrayList<CaptureRegion> forScaling) {
+		testSample.createScaledRegionCountMap(forScaling, captureRegions, target);
+		//load the capture regions with the scaled counts
+		for (CaptureRegion cr: captureRegions) {
+			String crId = cr.getOriginalInput();
+			HashMap<String, Double> sampleScaledCaptureRegionMeans = testSample.getScaledReadCoverageCounts();
+			cr.setScaledTestCount(sampleScaledCaptureRegionMeans.get(crId));
+		}
+	}
+	
 	private void saveScaledGermlineReadCoverageDatasetsForBoxPlot(File toSave) {
 		//header row
 		StringBuilder sb = new StringBuilder("Gene\tScaledCount\n");
@@ -301,7 +846,8 @@ private void saveScaledGermlineReadCoverageDatasetsForBoxPlot(File toSave) {
 			}
 			counter++;
 			String counterString = Integer.toString(counter);
-			if (counterString.length()==1) counterString = "0"+counterString;
+			if (counterString.length()==1) counterString = "00"+counterString;
+			else if (counterString.length()==2) counterString = "0"+counterString;
 			String name = gene+"_"+counterString;
 			//for each sample
 			for (KohliSample ks: samples.values()) {
@@ -349,7 +895,6 @@ private void meanScaleAndPrintReadCoverageDatasets(double target) {
 		}
 		IO.pl(sb);
 	}
-	
 	private void meanScaleReadCoverageDatasetsBoxPlot(double target, boolean germline) {
 		//header row
 		StringBuilder sb = new StringBuilder("Gene\tScaledCount\n");
@@ -364,7 +909,8 @@ private void meanScaleReadCoverageDatasetsBoxPlot(double target, boolean germlin
 			}
 			counter++;
 			String counterString = Integer.toString(counter);
-			if (counterString.length()==1) counterString = "0"+counterString;
+			if (counterString.length()==1) counterString = "00"+counterString;
+			else if (counterString.length()==2) counterString = "0"+counterString;
 			String name = gene+"_"+counterString;
 IO.pl(cr.getOriginalInput()+"\t"+name);
 			//for each sample 
@@ -526,7 +1072,7 @@ private void loadPatients(File anno) {
 			else {
 				boolean isGermline = false;
 				if (tokens[2].toLowerCase().contains("germline")) isGermline = true;
-				kp.getSamples().add(new KohliSample(tokens[1], isGermline, tokens[3]));
+				kp.getSamples().add(new KohliSample(kp, tokens[1], isGermline, tokens[3]));
 			}
 		}
 		//load the sample treemap
@@ -545,6 +1091,4 @@ private void loadPatients(File anno) {
 		}
 	}
 
-
-
 }
diff --git a/Source/edu/utah/kohli/CaptureRegion.java b/Source/edu/utah/kohli/CaptureRegion.java
index 2508fa5..b01fb20 100644
--- a/Source/edu/utah/kohli/CaptureRegion.java
+++ b/Source/edu/utah/kohli/CaptureRegion.java
@@ -1,6 +1,6 @@
 package edu.utah.kohli;
 
-import util.gen.IO;
+import java.util.Arrays;
 import util.gen.Misc;
 import util.gen.Num;
 
@@ -18,6 +18,11 @@ public class CaptureRegion {
 	private double ponScaledMean = -1;
 	private double ponStdDev = -1;
 	
+	//Panel of Normals based on initial 1000 scaling
+	private double[] initialPonScaledCounts = null;
+	private double initialPonScaledMean = -1;
+	private double initialPonStdDev = -1;
+	
 	//Test dataset
 	private double scaledTestCount = -1;
 	
@@ -31,10 +36,21 @@ public CaptureRegion (String rep) {
 		gene = f[3];
 	}
 	
+	public boolean isTestOutsidePoN() {
+		double[] minMax = Num.findMinMaxDoubleValues(ponScaledCounts);
+		if (scaledTestCount < minMax[0]) return true;
+		if (scaledTestCount > minMax[1]) return true;
+		return false;
+	}
+	
 	public double calculateZScoreFromPoN(double testValue) {
 		return (testValue-ponScaledMean)/ponStdDev;
 	}
 	
+	public double calculateZScoreFromPoN() {
+		return (scaledTestCount-ponScaledMean)/ponStdDev;
+	}
+	
 	public String toString() {	
 		StringBuilder sb = new StringBuilder(originalInput);
 		sb.append("\t");
@@ -46,12 +62,23 @@ public String toString() {
 	}
 	
 	/**for each of the pons, save the scaled counts and calculate the mean*/
-	public void setPonScaledCountsCalculateStats (double[] ponScaledCounts) {
+	public void setPonScaledCountsCalculateStats (double[] ponScaledCounts, boolean setInitial) {
 		this.ponScaledCounts = ponScaledCounts;
 		ponScaledMean = Num.mean(ponScaledCounts);
 		ponStdDev = Num.standardDeviation(ponScaledCounts, ponScaledMean);
+		if (setInitial) {
+			initialPonScaledCounts = Arrays.copyOf(ponScaledCounts, ponScaledCounts.length);
+			initialPonScaledMean = ponScaledMean;
+			initialPonStdDev = ponStdDev;
+		}
 	}
 	
+	/**Replace the working PoN stats with the initially assigned values.*/
+	public void copyInitialPoNToCurrentPon() {
+		ponScaledCounts = Arrays.copyOf(initialPonScaledCounts, initialPonScaledCounts.length);
+		ponScaledMean = initialPonScaledMean;
+		ponStdDev = initialPonStdDev;
+	}
 
 	public String getOriginalInput() {
 		return originalInput;
diff --git a/Source/edu/utah/kohli/KohliGene.java b/Source/edu/utah/kohli/KohliGene.java
index aa5fbbe..9ba2bc9 100644
--- a/Source/edu/utah/kohli/KohliGene.java
+++ b/Source/edu/utah/kohli/KohliGene.java
@@ -94,4 +94,38 @@ public double[][] getScaledGermlineCounts() {
 		}
 	}
 
+	public int getNumberTestValuesOutsideCaptureRegions() {
+		int numOutside = 0;
+		for (CaptureRegion cr: captureRegions) {
+			if (cr.isTestOutsidePoN()) numOutside++;
+		}
+		return numOutside;
+	}
+	
+	public double getMeanCaptureRegionZScore() {
+		double meanZScore = 0.0;
+		for (CaptureRegion cr: captureRegions) {
+			meanZScore+= cr.calculateZScoreFromPoN();
+		}
+		return meanZScore/(double)captureRegions.size();
+	}
+	
+	public double getMeanCaptureRegionScaledTestScore() {
+		double total = 0.0;
+		for (CaptureRegion cr: captureRegions) {
+			total+= cr.getScaledTestCount();
+		}
+		return total/(double)captureRegions.size();
+	}
+	public double getMeanCaptureRegionPoNScore() {
+		double total = 0.0;
+		for (CaptureRegion cr: captureRegions) {
+			total+= cr.getPonScaledMean();
+		}
+		return total/(double)captureRegions.size();
+	}
+	public String getChromosome() {
+		return captureRegions.get(0).getChr();
+	}
+
 }
diff --git a/Source/edu/utah/kohli/KohliPatient.java b/Source/edu/utah/kohli/KohliPatient.java
index 58941f6..2668bfe 100644
--- a/Source/edu/utah/kohli/KohliPatient.java
+++ b/Source/edu/utah/kohli/KohliPatient.java
@@ -12,7 +12,7 @@ public KohliPatient (String[] tokens) {
 		this.hciPatientId = tokens[0];
 		boolean isGermline = false;
 		if (tokens[0].toLowerCase().contains("germline")) isGermline = true;
-		samples.add(new KohliSample(tokens[1], isGermline, tokens[3]));
+		samples.add(new KohliSample(this, tokens[1], isGermline, tokens[3]));
 	}
 	
 	public String toString() {
@@ -33,4 +33,15 @@ public String getHciPatientId() {
 	public ArrayList<KohliSample> getSamples() {
 		return samples;
 	}
+
+	public KohliSample getGermlineSample() {
+		for (KohliSample ks: samples) if (ks.isGermlineSample()) return ks;
+		return null;
+	}
+
+	public ArrayList<KohliSample> getNonGermlineSamples() {
+		ArrayList<KohliSample> ng = new ArrayList<KohliSample>();
+		for (KohliSample ks: samples) if (ks.isGermlineSample()==false) ng.add(ks);
+		return ng;
+	}
 }
diff --git a/Source/edu/utah/kohli/KohliSample.java b/Source/edu/utah/kohli/KohliSample.java
index eaa612a..cf6f7aa 100644
--- a/Source/edu/utah/kohli/KohliSample.java
+++ b/Source/edu/utah/kohli/KohliSample.java
@@ -2,12 +2,11 @@
 
 import java.util.ArrayList;
 import java.util.HashMap;
-
-import util.gen.IO;
 import util.gen.Misc;
 
 public class KohliSample {
 	
+	private KohliPatient patient = null;
 	private String sampleId = null;
 	private String type = null;
 	private String dateString = null;
@@ -22,7 +21,8 @@ public class KohliSample {
 	private double meanRegionCounts = -1.0;
 	private double regionCountScalar = -1.0;
 	
-	public KohliSample (String sampleId, boolean isGermline, String dateString) {
+	public KohliSample (KohliPatient patient, String sampleId, boolean isGermline, String dateString) {
+		this.patient = patient;
 		this.sampleId = sampleId;
 		if (isGermline) germlineSample = true;
 		else germlineSample = false;
@@ -94,7 +94,6 @@ public double[] getScaledRegionCountArray(ArrayList<CaptureRegion> regions, doub
 	
 	public void createScaledRegionCountMap(ArrayList<CaptureRegion> regionsForScalarCalc,  ArrayList<CaptureRegion> regionsToScale, double targetMean) {
 		regionCountScalar = targetMean / getMeanRegionCounts(regionsForScalarCalc);
-		//IO.pl("Scaling using just one gene for "+this.sampleId+ " numRegionsInGene "+regionsForScalarCalc.size()+" scalar "+regionCountScalar);
 		scaledReadCoverageCounts = new HashMap<String, Double>();
 		//probe info	
 		for (int i=0; i< regionsToScale.size(); i++) {
@@ -149,5 +148,9 @@ public void setExcludeFromPon(boolean excludeFromPon) {
 	public boolean isGermlineSample() {
 		return germlineSample;
 	}
+
+	public KohliPatient getPatient() {
+		return patient;
+	}
 	
 }
diff --git a/build.properties b/build.properties
index c4865f0..cfbb43b 100644
--- a/build.properties
+++ b/build.properties
@@ -50,7 +50,7 @@ edu/utah/seq/data/CalculatePerCycleErrorRate,\
 edu/utah/seq/run/caris/CarisDataWrangler,\
 edu/utah/seq/vcf/xml/caris/CarisVcfComparator,\
 edu/utah/seq/vcf/xml/caris/CarisXmlVcfParser,\
-edu/utah/billing/CBiBilling,\
+edu/utah/billing/CBiBilling2,\
 edu/utah/seq/data/cbio/CBio2Sgr,\
 edu/utah/seq/analysis/ChIPSeq,\
 util/apps/CompareIntersectingRegions,\