Skip to content

Commit

Permalink
Adding beta version of a tool for searching the patient molecular
Browse files Browse the repository at this point in the history
repository
  • Loading branch information
u0028003 committed Apr 10, 2023
1 parent e8dec78 commit 5b3d3ea
Show file tree
Hide file tree
Showing 23 changed files with 2,407 additions and 630 deletions.
60 changes: 43 additions & 17 deletions Source/edu/utah/seq/cnv/GatkCalledSegmentAnnotator.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ public class GatkCalledSegmentAnnotator {
private double minAbsLg2TumorCopyRatio = 0.15;
private int maxGapGeneIntersection = 1000;
private File ucscGeneTableFile;
private boolean normalPresent = true;

//internal fields
private GatkSegment[] gatkSegments;
Expand Down Expand Up @@ -95,7 +96,17 @@ private void saveSegResults() throws IOException {
int numPass = 0;
int numFail = 0;
for (GatkSegment gs : gatkSegments) {
if (Math.abs(gs.getLgMeanTNRatios()) >= minCopyRatioMeanTNRatios && Math.abs(gs.getLogMeanNormalCopyRatios()) <= maxAbsLg2NormalCopyRatio && Math.abs(gs.getLogMeanTumorCopyRatios()) >= minAbsLg2TumorCopyRatio) {
if (normalPresent == false) {
if (Math.abs(gs.getLogMeanTumorCopyRatios()) >= minAbsLg2TumorCopyRatio) {
out.println(gs.toSeg(passName));
numPass++;
}
else {
out.println(gs.toSeg(failName));
numFail++;
}
}
else if (Math.abs(gs.getLgMeanTNRatios()) >= minCopyRatioMeanTNRatios && Math.abs(gs.getLogMeanNormalCopyRatios()) <= maxAbsLg2NormalCopyRatio && Math.abs(gs.getLogMeanTumorCopyRatios()) >= minAbsLg2TumorCopyRatio) {
out.println(gs.toSeg(passName));
numPass++;
}
Expand All @@ -120,7 +131,11 @@ private void saveSpreadsheetResults() throws IOException {

for (GatkSegment gs : gatkSegments) {
boolean passThres;
if (Math.abs(gs.getLgMeanTNRatios()) >= minCopyRatioMeanTNRatios && Math.abs(gs.getLogMeanNormalCopyRatios()) <= maxAbsLg2NormalCopyRatio && Math.abs(gs.getLogMeanTumorCopyRatios()) >= minAbsLg2TumorCopyRatio) passThres = true;
if (normalPresent == false) {
if (Math.abs(gs.getLogMeanTumorCopyRatios()) >= minAbsLg2TumorCopyRatio) passThres = true;
else passThres = false;
}
else if (Math.abs(gs.getLgMeanTNRatios()) >= minCopyRatioMeanTNRatios && Math.abs(gs.getLogMeanNormalCopyRatios()) <= maxAbsLg2NormalCopyRatio && Math.abs(gs.getLogMeanTumorCopyRatios()) >= minAbsLg2TumorCopyRatio) passThres = true;
else passThres = false;
out.println(gs.toSpreadSheet(passThres));
}
Expand All @@ -134,12 +149,15 @@ private void saveFilteredBedResults() throws IOException {
out.println("#Chr Start Stop Info Lg2(meanTumCRs/meanNormCRs) Call");
out.println("##Info numOb = number of copy ratio observations, typically exons");
out.println("##Info lg2Tum = log2 of mean tumor copy ratio observations");
out.println("##Info lg2Norm = log2 of mean normal copy ratio observations");
if (normalPresent) out.println("##Info lg2Norm = log2 of mean normal copy ratio observations");
out.println("##Info genes = affected genes");
out.println("##Call + for amplification, - for loss");

for (GatkSegment gs : gatkSegments) {
if (Math.abs(gs.getLgMeanTNRatios()) >= minCopyRatioMeanTNRatios && Math.abs(gs.getLogMeanNormalCopyRatios()) <= maxAbsLg2NormalCopyRatio && Math.abs(gs.getLogMeanTumorCopyRatios()) >= minAbsLg2TumorCopyRatio) {
if (normalPresent == false) {
if (Math.abs(gs.getLogMeanTumorCopyRatios()) >= minAbsLg2TumorCopyRatio) out.println(gs.toBed());
}
else if (Math.abs(gs.getLgMeanTNRatios()) >= minCopyRatioMeanTNRatios && Math.abs(gs.getLogMeanNormalCopyRatios()) <= maxAbsLg2NormalCopyRatio && Math.abs(gs.getLogMeanTumorCopyRatios()) >= minAbsLg2TumorCopyRatio) {
out.println(gs.toBed());
}
}
Expand All @@ -148,8 +166,8 @@ private void saveFilteredBedResults() throws IOException {

private void checkCopyRatios() {
for (GatkSegment gs : gatkSegments) {
gs.calculateMeans();
if (gs.copyRatiosOK() == false){
gs.calculateMeans(normalPresent);
if (normalPresent && gs.copyRatiosOK() == false){
IO.el("Problem with number of copy ratio points? These should be identical:");
gs.printData();
System.exit(1);
Expand All @@ -158,6 +176,7 @@ private void checkCopyRatios() {
}

private void addNormalAlleleFrequencies() throws IOException {
if (normalPresent == false) return;
TabixReader tr = new TabixReader(normalAlleleFreqFile.toString());
String line;
TabixReader.Iterator it;
Expand Down Expand Up @@ -238,6 +257,7 @@ private void addTumorCopyRatios() throws IOException {
}

private void addNormalCopyRatios() throws IOException {
if (normalPresent == false) return;
TabixReader tr = new TabixReader(normalCopyRatioFile.toString());
String line;
TabixReader.Iterator it;
Expand Down Expand Up @@ -334,7 +354,15 @@ public void processArgs(String[] args){
resultsDirectory.mkdirs();

//check files
File[] files = new File[]{segFile, tumorCopyRatioFile, normalCopyRatioFile, tumorAlleleFreqFile, normalAlleleFreqFile, ucscGeneTableFile};
File[] files = null;
if (normalCopyRatioFile == null && normalAlleleFreqFile == null) {
normalPresent = false;
files = new File[]{segFile, tumorCopyRatioFile, tumorAlleleFreqFile, ucscGeneTableFile};
}
else {
normalPresent = true;
files = new File[]{segFile, tumorCopyRatioFile, normalCopyRatioFile, tumorAlleleFreqFile, normalAlleleFreqFile, ucscGeneTableFile};
}
boolean pass = true;
for (File f: files) {
if (f== null || f.exists() == false) {
Expand All @@ -358,15 +386,15 @@ private void printOptions() {
IO.pl("Settings:");
IO.pl("\t-s SegFile\t"+segFile);
IO.pl("\t-t TumorCopyRatioFile\t"+tumorCopyRatioFile);
IO.pl("\t-n NormalCopyRatioFile\t"+normalCopyRatioFile);
if (normalPresent) IO.pl("\t-n NormalCopyRatioFile\t"+normalCopyRatioFile);
IO.pl("\t-u TumorAlleleFreqFile\t"+tumorAlleleFreqFile);
IO.pl("\t-o NormalAlleleFreqFile\t"+normalAlleleFreqFile);
if (normalPresent) IO.pl("\t-o NormalAlleleFreqFile\t"+normalAlleleFreqFile);
IO.pl("\t-r ResultsDirectory\t"+resultsDirectory);
IO.pl("\t-g RefFlatGeneFile\t"+ucscGeneTableFile);
IO.pl("\t-a MaxGapForGeneIntersection\t"+maxGapGeneIntersection);
IO.pl("\t-m MinimumAbsLg2TNRatioOfCopyRatios\t"+minCopyRatioMeanTNRatios);
if (normalPresent) IO.pl("\t-m MinimumAbsLg2TNRatioOfCopyRatios\t"+minCopyRatioMeanTNRatios);
IO.pl("\t-c MinimumAbsLg2TumorCopyRatio\t"+minAbsLg2TumorCopyRatio);
IO.pl("\t-x MaximumAbsLg2NormalCopyRatio\t"+maxAbsLg2NormalCopyRatio);
if (normalPresent) IO.pl("\t-x MaximumAbsLg2NormalCopyRatio\t"+maxAbsLg2NormalCopyRatio);
IO.pl();
}

Expand All @@ -375,31 +403,29 @@ public static void printDocs(){

System.out.println("\n" +
"**************************************************************************************\n" +
"** Gatk Called Segment Annotator: August 2019 **\n" +
"** Gatk Called Segment Annotator: April 2023 **\n" +
"**************************************************************************************\n" +
"Annotates GATKs CallCopyRatioSegments output with denoised copy ratio and heterozygous\n"+
"allele frequency data from the tumor and matched normal samples. Enables filtering\n"+
"using these values to remove copy ratio calls with high normal background. Adds \n"+
"intersecting gene names.\n"+

"\nRequired Options:\n"+
"\nOptions:\n"+
"-r Results directory to save the passing and failing segments.\n"+
"-s Called segment file from GATKs CallCopyRatioSegments app, e.g. xxx.called.seg\n"+
"-t Tumor denoised copy ratio file, from GATKs DenoiseReadCounts app. Bgzip compress\n"+
" and tabix index it with https://github.com/samtools/htslib :\n"+
" grep -vE '(@|CONTIG)' tumor.cr.tsv > tumor.cr.txt\n"+
" ~/HTSLib/bgzip tumor.cr.txt\n"+
" ~/HTSLib/tabix -s 1 -b 2 -e 3 tumor.cr.txt.gz\n"+
"-n Normal denoised copy ratio file, ditto.\n"+
"-n (Optional) Normal denoised copy ratio file, ditto.\n"+
"-u Tumor allele frequency file, from GATKs ModelSegments app. Bgzip compress\n"+
" and tabix index it with https://github.com/samtools/htslib :\n"+
" grep -vE '(@|CONTIG)' gbm7.hets.tsv > gbm7.hets.txt\n"+
" ~/HTSLib/bgzip gbm7.hets.txt\n"+
" ~/HTSLib/tabix -s 1 -b 2 -e 2 gbm7.hets.txt.gz\n"+
"-o Normal allele frequency file, ditto.\n"+
"-o (Optional) Normal allele frequency file, ditto.\n"+
"-g RefFlat UCSC gene file, run USeq's MergeUCSCGeneTable to collapse transcripts.\n"+

"\nDefault Options:\n"+
"-c Minimum absolute tumor log2 copy ratio, defaults to 0.15\n"+
"-x Maximum absolute normal log2 copy ratio, defaults to 0.5\n"+
"-m Minimum absolute log2 TN ratio of copy ratios, defaults to 0.15\n"+
Expand Down
18 changes: 12 additions & 6 deletions Source/edu/utah/seq/cnv/GatkSegment.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,17 +42,23 @@ public GatkSegment (String line){
call = tokens[5];
}

public void calculateMeans(){
public void calculateMeans(boolean normalPresent){
float[] tumorCRs = Num.antiLog(Num.arrayListOfFloatToArray(tumorCopyRatios), 2);
double tumorCRMean = Num.mean(tumorCRs);
if (tumorCRMean != 0) logMeanTumorCopyRatios = Num.log2(tumorCRMean);

float[] normalCRs = Num.antiLog(Num.arrayListOfFloatToArray(normalCopyRatios), 2);
double normalCRMean = Num.mean(normalCRs);
if (normalCRMean != 0) logMeanNormalCopyRatios = Num.log2(normalCRMean);
if (normalPresent) {
float[] normalCRs = Num.antiLog(Num.arrayListOfFloatToArray(normalCopyRatios), 2);
double normalCRMean = Num.mean(normalCRs);
if (normalCRMean != 0) logMeanNormalCopyRatios = Num.log2(normalCRMean);
double[] tnRatios = Num.ratio(tumorCRs, normalCRs);
lgMeanTNRatios = Num.log2(Num.mean(tnRatios));
//watch for when the ratio goes off the rail reporting values of 30! don't use?
//double lgDiff = logMeanTumorCopyRatios - logMeanNormalCopyRatios;
//if (Math.abs(lgDiff) < Math.abs(lgMeanTNRatios)) lgMeanTNRatios = lgDiff;

}

double[] tnRatios = Num.ratio(tumorCRs, normalCRs);
lgMeanTNRatios = Num.log2(Num.mean(tnRatios));
//could do a t-test here if >2 obs...

//probably should match the AFs and calc individual ratios then the mean of the ratios. Prob is these aren't always present in each set.
Expand Down
94 changes: 94 additions & 0 deletions Source/edu/utah/seq/pmr/AvatarClinicalInfo.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
package edu.utah.seq.pmr;

import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;

import org.json.JSONArray;
import org.json.JSONObject;
import util.gen.IO;

public class AvatarClinicalInfo {

private HashMap<String, String> patient = new HashMap<String, String>();
private HashMap<String, String> root = new HashMap<String, String>();
private HashMap<String, String> tumorDna = new HashMap<String, String>();
private HashMap<String, String> tumorRna = new HashMap<String, String>();
private ArrayList<HashMap<String, String>> normalDna = new ArrayList<HashMap<String, String>>();

public static final String TUMOR_DNA_NAME = "TumorDNA";
public static final String TUMOR_RNA_NAME = "TumorRNA";
public static final String NORMAL_DNA_NAME = "NormalDNA";
public static final String PATIENT_NAME = "Patient";


public AvatarClinicalInfo (File json) {
String jString = IO.loadFile(json, " ", true);
JSONObject mainJsonObject = new JSONObject(jString);
Iterator it = mainJsonObject.keys();
while (it.hasNext()) {
String key = (String)it.next();
if (key.equals(PATIENT_NAME)) loadHashMap(patient, mainJsonObject.getJSONObject(key));
else if (key.equals(TUMOR_DNA_NAME)) loadHashMap(tumorDna, mainJsonObject.getJSONObject(key));
else if (key.equals(TUMOR_RNA_NAME)) loadHashMap(tumorRna, mainJsonObject.getJSONObject(key));
else if (key.equals(NORMAL_DNA_NAME)) {
JSONArray allNorm = mainJsonObject.getJSONArray(key);
int num = allNorm.length();
for (int i=0; i< num; i++) {
JSONObject no = allNorm.getJSONObject(i);
HashMap<String, String> normal = new HashMap<String, String>();
loadHashMap(normal, no);
normalDna.add(normal);
}
}
else {
root.put(key, mainJsonObject.get(key).toString());
}
}

}

public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("Patient:\t"+ patient+"\n");
sb.append("Root:\t"+ root+"\n");
sb.append("TumorDNA:\t"+ tumorDna+"\n");
sb.append("TumorRNA:\t"+ tumorRna+"\n");
for (HashMap<String, String> n: normalDna) {
sb.append("NormalDNA:\t"+ n+"\n");
}
return sb.toString();
}



private void loadHashMap(HashMap<String, String> hm, JSONObject j) {
Iterator it = j.keys();
while (it.hasNext()) {
String key = (String)it.next();
hm.put(key, j.getString(key));
}
}

public HashMap<String, String> getPatient() {
return patient;
}

public HashMap<String, String> getRoot() {
return root;
}

public HashMap<String, String> getTumorDna() {
return tumorDna;
}

public HashMap<String, String> getTumorRna() {
return tumorRna;
}

public ArrayList<HashMap<String, String>> getNormalDna() {
return normalDna;
}

}
70 changes: 70 additions & 0 deletions Source/edu/utah/seq/pmr/Dataset.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package edu.utah.seq.pmr;

import java.io.File;
import java.util.ArrayList;
import java.util.LinkedHashMap;

import edu.utah.seq.vcf.json.TempusJsonSummary;

public class Dataset {

private String source; //Avatar, Tempus, Caris
private String datasetId; //A032049_SL419345_SL419548_SL420681, TL-20-B70ACE, TN21-123058_2022-01-27
private ArrayList<String> partialPaths = new ArrayList<String>();
private ArrayList<File> clinicalInfoFiles = null;
private AvatarClinicalInfo avatarClinicalInfo = null; //only for Avatar datasets
private TempusJsonSummary tempusJsonReportInfo = null; //only for Tempus datasets
private LinkedHashMap<String, String> carisClinicalInfo = null; //only for Caris datasets

public Dataset(String source, String datasetId) {
this.source = source;
this.datasetId = datasetId;
}

public String getSource() {
return source;
}

public String getDatasetId() {
return datasetId;
}

public ArrayList<String> getPartialPaths() {
return partialPaths;
}

public ArrayList<File> getClinicalInfoFiles() {
return clinicalInfoFiles;
}

public void setClinicalInfoFile(File clinicalInfo) {
if (clinicalInfoFiles == null) clinicalInfoFiles = new ArrayList<File>();
clinicalInfoFiles.add(clinicalInfo);
}

public AvatarClinicalInfo getAvatarClinicalInfo() {
return avatarClinicalInfo;
}

public void setAvatarClinicalInfo(AvatarClinicalInfo avatarClinicalInfo) {
this.avatarClinicalInfo = avatarClinicalInfo;
}

public TempusJsonSummary getTempusJsonReportInfo() {
return tempusJsonReportInfo;
}

public void setTempusJsonReportInfo(TempusJsonSummary tempusJsonReportInfo) {
this.tempusJsonReportInfo = tempusJsonReportInfo;
}

public void setCarisXmlReportInfo(LinkedHashMap<String, String> linkedHashMap) {
carisClinicalInfo = linkedHashMap;

}

public LinkedHashMap<String, String> getCarisClinicalInfo() {
return carisClinicalInfo;
}

}
41 changes: 41 additions & 0 deletions Source/edu/utah/seq/pmr/Patient.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package edu.utah.seq.pmr;
import java.util.HashMap;

public class Patient {

//fields
private String coreId = null;
private HashMap<String, Dataset> idDataSets = new HashMap<String, Dataset>();

public Patient(String coreId) {
this.coreId = coreId;
}

public void addDataFile(String[] keys) {
//Patients AA2mF6Vy Avatar A032049_SL419345_SL419548_SL420681 ClinicalReport A032049_SL419345_SL419548_SL420681_IDTv1_SAR_F.json
// 0 1 2 3 4 5+
String source = keys[2];
String datasetId = keys[3];
Dataset d = idDataSets.get(datasetId);
if (d == null) {
d = new Dataset(source, datasetId);
idDataSets.put(datasetId, d);
}
StringBuilder sb = new StringBuilder(keys[4]);
for (int i=5; i< keys.length; i++) {
sb.append("/");
sb.append(keys[i]);
}
d.getPartialPaths().add(sb.toString());
}

public String getCoreId() {
return coreId;
}

public HashMap<String, Dataset> getIdDataSets() {
return idDataSets;
}


}
Loading

0 comments on commit 5b3d3ea

Please sign in to comment.