Skip to content

Commit

Permalink
Misc mods.
Browse files Browse the repository at this point in the history
  • Loading branch information
DavidAustinNix committed Sep 7, 2017
1 parent eb88793 commit c84ddec
Show file tree
Hide file tree
Showing 24 changed files with 2,836 additions and 30 deletions.
55 changes: 55 additions & 0 deletions Source/edu/utah/seq/amazon/Ec2Launcher.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package edu.utah.seq.amazon;
import java.io.InputStream;
import com.jcraft.jsch.*;

public class Ec2Launcher {

/**
* @param args
*/
public static void main(String[] args) {
String host="ember5.chpc.utah.edu";
String user="u0028003";
String password="";
String command1="ls -ltr && sleep 5s && hostname";
try{

java.util.Properties config = new java.util.Properties();
config.put("StrictHostKeyChecking", "no");
JSch jsch = new JSch();
Session session=jsch.getSession(user, host, 22);
session.setPassword(password);
session.setConfig(config);
session.connect();
System.out.println("Connected");

Channel channel=session.openChannel("exec");
((ChannelExec)channel).setCommand(command1);
channel.setInputStream(null);
((ChannelExec)channel).setErrStream(System.err);

InputStream in=channel.getInputStream();
channel.connect();
byte[] tmp=new byte[1024];
while(true){
while(in.available()>0){
int i=in.read(tmp, 0, 1024);
if(i<0)break;
System.out.print(new String(tmp, 0, i));
}
if(channel.isClosed()){
System.out.println("exit-status: "+channel.getExitStatus());
break;
}
try{Thread.sleep(1000);}catch(Exception ee){}
}
channel.disconnect();
session.disconnect();
System.out.println("DONE");
}catch(Exception e){
e.printStackTrace();
}

}

}
104 changes: 104 additions & 0 deletions Source/edu/utah/seq/parsers/BamLoader.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
package edu.utah.seq.parsers;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import htsjdk.samtools.QueryInterval;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMRecordIterator;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.ValidationStringency;
import util.gen.Gzipper;
import util.gen.IO;

public class BamLoader implements Runnable {

//fields
private boolean failed = false;
private int threadNumber = 0;
private ArrayList<QueryInterval> al = new ArrayList<QueryInterval>();
private QueryInterval[] toFetch = null;
private SamReader samReader = null;
private SamAlignmentLoader sal = null;
private Gzipper[] writers = null;
private File[] alignments = null;

public BamLoader(SamAlignmentLoader sal, int threadNumber) throws Exception {
this.sal = sal;
this.threadNumber = threadNumber;
SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT);
samReader = factory.open(sal.getBam());

createWriters();
}

public void run() {
try {
//get next chunk of work
while (sal.loadRegions(al)){

//pull regions to query
toFetch = new QueryInterval[al.size()];
al.toArray(toFetch);
al.clear();

//search for overlapping records
SAMRecordIterator samIterator = samReader.queryOverlapping(toFetch);

//for each record
while (samIterator.hasNext()) {
SAMRecord sam = samIterator.next();
parseSam(sam);
}

samIterator.close();
}
for (Gzipper g: writers) g.close();
samReader.close();

//check sizes and delete if empty
IO.deleteZeroSizedFiles(alignments);

} catch (Exception e) {
failed = true;
IO.deleteFiles(alignments);
System.err.println("\nError: problem fetching alignments" );
e.printStackTrace();
try {
samReader.close();
for (Gzipper g: writers) g.close();
} catch (IOException e1) {
}

}
}

public boolean isFailed() {
return failed;
}

/*Extend and override these methods to extend the functionality*/

private void parseSam(SAMRecord sam) throws Exception{
//is it paired
if (sam.getReadPairedFlag() && sam.getProperPairFlag()){
if (sam.getFirstOfPairFlag()) writers[0].print(sam.getSAMString());
else if (sam.getSecondOfPairFlag()) writers[1].print(sam.getSAMString());
}
}


private void createWriters() throws Exception {
alignments = new File[2];
alignments[0] = new File (sal.getResultsDir(), threadNumber+"_R1.sam.gz");
alignments[1] = new File (sal.getResultsDir(), threadNumber+"_R2.sam.gz");

writers = new Gzipper[2];
writers[0] = new Gzipper(alignments[0]);
writers[1] = new Gzipper(alignments[1]);
}


}
181 changes: 181 additions & 0 deletions Source/edu/utah/seq/parsers/SamAlignmentLoader.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
package edu.utah.seq.parsers;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import htsjdk.samtools.QueryInterval;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.ValidationStringency;
import util.bio.annotation.Bed;
import util.gen.Gzipper;
import util.gen.IO;
import util.gen.Misc;

public class SamAlignmentLoader {

//fields
private File bam = null;
private File bed = null;
private File resultsDir = null;
private QueryInterval[] regions = null;
private int regionIndex = 0;
private int chunkSize = 200;
private String samHeader = null;
private File headerFile = null;
private File[] parsedBams = null;


public SamAlignmentLoader(File bam, File bed, File workingDir, int numberThreads){
try {
this.bam = bam;
this.bed = bed;

//make the dir and delete
resultsDir = new File(workingDir, Misc.getRandomString(10)+"_"+Misc.removeExtension(bam.getName()));
resultsDir.mkdir();
parseRegions();
System.out.println("\tParsed "+regions.length+ " regions...");

//create loaders
System.out.println("\tLaunching "+numberThreads +" loaders...");
BamLoader[] loaders = new BamLoader[numberThreads];
for (int i=0; i< numberThreads; i++) loaders[i] = new BamLoader(this, i+1);
chunkSize = regions.length/ numberThreads;
if (chunkSize < 10) chunkSize = 10;

//launch em!
ExecutorService executor = Executors.newFixedThreadPool(numberThreads);
for (BamLoader l: loaders) executor.execute(l);
executor.shutdown();
while (!executor.isTerminated()) {} //wait here until complete

//check loaders
for (BamLoader l: loaders) {
if (l.isFailed()) throw new IOException("ERROR: Failed to extract alignments from "+bam.getName());
}

//write out header
headerFile = new File(resultsDir, "0_header.sam.gz");
Gzipper header = new Gzipper( headerFile );
header.println(samHeader);
header.close();

parsedBams = combineAndSortResults();

System.out.println("\tComplete");

} catch (Exception e){
IO.deleteDirectory(resultsDir);
IO.deleteDirectoryViaCmdLine(resultsDir);
e.printStackTrace();
}
}

private File[] combineAndSortResults() throws Exception {
//cat results R1
System.out.println("\tCombinging temp files...");
ArrayList<File> al = new ArrayList<File>();
al.add(headerFile);
for (File f: IO.extractFiles(resultsDir, "_R1.sam.gz")) al.add(f);
File toSortR1= new File(resultsDir,"toSortR1.sam.gz");
IO.concatinateFiles(al, toSortR1);
al.clear();

//cat results R2
al.add(headerFile);
for (File f: IO.extractFiles(resultsDir, "_R2.sam.gz")) al.add(f);
File toSortR2= new File(resultsDir,"toSortR2.sam.gz");
IO.concatinateFiles(al, toSortR2);

//sort em
System.out.println("\tSorting results...");
File r1Bam = new File (resultsDir, "sortedR1.bam");
File r2Bam = new File (resultsDir, "sortedR2.bam");
File[] bams = new File[]{r1Bam, r2Bam};
sortSams(new File[]{toSortR1, toSortR2}, bams, 2);

//delete sams
IO.deleteFiles(resultsDir, "sam.gz");

return bams;

}

/**Launches a threaded version of Picard's sort sam.*/
public static void sortSams(File[] toSort, File[] bamOutput, int maxNumThreads) throws IOException{
SamSorter[] ss = new SamSorter[toSort.length];
for (int i=0; i< toSort.length; i++) ss[i] = new SamSorter(toSort[i], bamOutput[i]);
int numThreads = ss.length;
if (numThreads > maxNumThreads) numThreads = maxNumThreads;

//launch em!
ExecutorService executor = Executors.newFixedThreadPool(numThreads);
for (SamSorter l: ss) executor.execute(l);
executor.shutdown();
while (!executor.isTerminated()) {} //wait here until complete

//check loaders
for (SamSorter l: ss) if (l.isFailed()) throw new IOException("ERROR: Failed to sort alignments!");

}

public synchronized boolean loadRegions(ArrayList<QueryInterval> al){
int count = 0;
for (; regionIndex< regions.length; regionIndex++){
al.add(regions[regionIndex]);
if (++count >= chunkSize) break;
}
if (al.size() != 0) return true;
return false;
}

private void parseRegions() throws Exception {
//parse regions
Bed[] bedRegions = Bed.parseFile(bed, 0, 0);
regions = new QueryInterval[bedRegions.length];

//pull indexes
SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT);
SamReader samReader = factory.open(bam);
SAMSequenceDictionary sd = samReader.getFileHeader().getSequenceDictionary();


//create QueryIntervals
for (int i=0; i< bedRegions.length; i++) {
int index = sd.getSequenceIndex(bedRegions[i].getChromosome());
if (index == -1) throw new Exception("Failed to find a chromosome index for this bed line "+bedRegions[i].toString() +" in "+bam.getName());
regions[i] = new QueryInterval(index, bedRegions[i].getStart(), bedRegions[i].getStop());
}
Arrays.sort(regions);
//samReader.getFileHeader().setProgramRecords(new ArrayList<SAMProgramRecord>());
samHeader = samReader.getFileHeader().getTextHeader().trim();

samReader.close();
}

/**For testing.*/
public static void main (String[] args){
if (args.length == 0) System.out.println("\nUSAGE: bamFile, bedFile, workingDir, numberThreads, bpPadding\n");
else new SamAlignmentLoader(new File(args[0]), new File(args[1]), new File(args[2]), 5);
}


public File getBam(){
return bam;
}
public File getResultsDir() {
return resultsDir;
}

public File[] getParsedBams() {
return parsedBams;
}

}


32 changes: 32 additions & 0 deletions Source/edu/utah/seq/parsers/SamSorter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package edu.utah.seq.parsers;

import java.io.File;
import edu.utah.seq.data.sam.PicardSortSam;

public class SamSorter implements Runnable {

private boolean failed = false;
private File samToSort = null;
private File bamOutput = null;

public SamSorter (File samToSort, File bamOutput){
this.samToSort = samToSort;
this.bamOutput = bamOutput;
}

public void run() {
try {
new PicardSortSam (samToSort, bamOutput);
} catch (Exception e) {
failed = true;
bamOutput.delete();
System.err.println("\nError: problem sorting "+samToSort );
e.printStackTrace();
}
}

public boolean isFailed() {
return failed;
}
}

Loading

0 comments on commit c84ddec

Please sign in to comment.