-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
eb88793
commit c84ddec
Showing
24 changed files
with
2,836 additions
and
30 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
package edu.utah.seq.amazon; | ||
import java.io.InputStream; | ||
import com.jcraft.jsch.*; | ||
|
||
public class Ec2Launcher { | ||
|
||
/** | ||
* @param args | ||
*/ | ||
public static void main(String[] args) { | ||
String host="ember5.chpc.utah.edu"; | ||
String user="u0028003"; | ||
String password=""; | ||
String command1="ls -ltr && sleep 5s && hostname"; | ||
try{ | ||
|
||
java.util.Properties config = new java.util.Properties(); | ||
config.put("StrictHostKeyChecking", "no"); | ||
JSch jsch = new JSch(); | ||
Session session=jsch.getSession(user, host, 22); | ||
session.setPassword(password); | ||
session.setConfig(config); | ||
session.connect(); | ||
System.out.println("Connected"); | ||
|
||
Channel channel=session.openChannel("exec"); | ||
((ChannelExec)channel).setCommand(command1); | ||
channel.setInputStream(null); | ||
((ChannelExec)channel).setErrStream(System.err); | ||
|
||
InputStream in=channel.getInputStream(); | ||
channel.connect(); | ||
byte[] tmp=new byte[1024]; | ||
while(true){ | ||
while(in.available()>0){ | ||
int i=in.read(tmp, 0, 1024); | ||
if(i<0)break; | ||
System.out.print(new String(tmp, 0, i)); | ||
} | ||
if(channel.isClosed()){ | ||
System.out.println("exit-status: "+channel.getExitStatus()); | ||
break; | ||
} | ||
try{Thread.sleep(1000);}catch(Exception ee){} | ||
} | ||
channel.disconnect(); | ||
session.disconnect(); | ||
System.out.println("DONE"); | ||
}catch(Exception e){ | ||
e.printStackTrace(); | ||
} | ||
|
||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
package edu.utah.seq.parsers; | ||
|
||
import java.io.File; | ||
import java.io.FileNotFoundException; | ||
import java.io.IOException; | ||
import java.util.ArrayList; | ||
import htsjdk.samtools.QueryInterval; | ||
import htsjdk.samtools.SAMRecord; | ||
import htsjdk.samtools.SAMRecordIterator; | ||
import htsjdk.samtools.SamReader; | ||
import htsjdk.samtools.SamReaderFactory; | ||
import htsjdk.samtools.ValidationStringency; | ||
import util.gen.Gzipper; | ||
import util.gen.IO; | ||
|
||
public class BamLoader implements Runnable { | ||
|
||
//fields | ||
private boolean failed = false; | ||
private int threadNumber = 0; | ||
private ArrayList<QueryInterval> al = new ArrayList<QueryInterval>(); | ||
private QueryInterval[] toFetch = null; | ||
private SamReader samReader = null; | ||
private SamAlignmentLoader sal = null; | ||
private Gzipper[] writers = null; | ||
private File[] alignments = null; | ||
|
||
public BamLoader(SamAlignmentLoader sal, int threadNumber) throws Exception { | ||
this.sal = sal; | ||
this.threadNumber = threadNumber; | ||
SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT); | ||
samReader = factory.open(sal.getBam()); | ||
|
||
createWriters(); | ||
} | ||
|
||
public void run() { | ||
try { | ||
//get next chunk of work | ||
while (sal.loadRegions(al)){ | ||
|
||
//pull regions to query | ||
toFetch = new QueryInterval[al.size()]; | ||
al.toArray(toFetch); | ||
al.clear(); | ||
|
||
//search for overlapping records | ||
SAMRecordIterator samIterator = samReader.queryOverlapping(toFetch); | ||
|
||
//for each record | ||
while (samIterator.hasNext()) { | ||
SAMRecord sam = samIterator.next(); | ||
parseSam(sam); | ||
} | ||
|
||
samIterator.close(); | ||
} | ||
for (Gzipper g: writers) g.close(); | ||
samReader.close(); | ||
|
||
//check sizes and delete if empty | ||
IO.deleteZeroSizedFiles(alignments); | ||
|
||
} catch (Exception e) { | ||
failed = true; | ||
IO.deleteFiles(alignments); | ||
System.err.println("\nError: problem fetching alignments" ); | ||
e.printStackTrace(); | ||
try { | ||
samReader.close(); | ||
for (Gzipper g: writers) g.close(); | ||
} catch (IOException e1) { | ||
} | ||
|
||
} | ||
} | ||
|
||
public boolean isFailed() { | ||
return failed; | ||
} | ||
|
||
/*Extend and override these methods to extend the functionality*/ | ||
|
||
private void parseSam(SAMRecord sam) throws Exception{ | ||
//is it paired | ||
if (sam.getReadPairedFlag() && sam.getProperPairFlag()){ | ||
if (sam.getFirstOfPairFlag()) writers[0].print(sam.getSAMString()); | ||
else if (sam.getSecondOfPairFlag()) writers[1].print(sam.getSAMString()); | ||
} | ||
} | ||
|
||
|
||
private void createWriters() throws Exception { | ||
alignments = new File[2]; | ||
alignments[0] = new File (sal.getResultsDir(), threadNumber+"_R1.sam.gz"); | ||
alignments[1] = new File (sal.getResultsDir(), threadNumber+"_R2.sam.gz"); | ||
|
||
writers = new Gzipper[2]; | ||
writers[0] = new Gzipper(alignments[0]); | ||
writers[1] = new Gzipper(alignments[1]); | ||
} | ||
|
||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,181 @@ | ||
package edu.utah.seq.parsers; | ||
|
||
import java.io.File; | ||
import java.io.IOException; | ||
import java.util.ArrayList; | ||
import java.util.Arrays; | ||
import java.util.concurrent.ExecutorService; | ||
import java.util.concurrent.Executors; | ||
import htsjdk.samtools.QueryInterval; | ||
import htsjdk.samtools.SAMSequenceDictionary; | ||
import htsjdk.samtools.SamReader; | ||
import htsjdk.samtools.SamReaderFactory; | ||
import htsjdk.samtools.ValidationStringency; | ||
import util.bio.annotation.Bed; | ||
import util.gen.Gzipper; | ||
import util.gen.IO; | ||
import util.gen.Misc; | ||
|
||
public class SamAlignmentLoader { | ||
|
||
//fields | ||
private File bam = null; | ||
private File bed = null; | ||
private File resultsDir = null; | ||
private QueryInterval[] regions = null; | ||
private int regionIndex = 0; | ||
private int chunkSize = 200; | ||
private String samHeader = null; | ||
private File headerFile = null; | ||
private File[] parsedBams = null; | ||
|
||
|
||
public SamAlignmentLoader(File bam, File bed, File workingDir, int numberThreads){ | ||
try { | ||
this.bam = bam; | ||
this.bed = bed; | ||
|
||
//make the dir and delete | ||
resultsDir = new File(workingDir, Misc.getRandomString(10)+"_"+Misc.removeExtension(bam.getName())); | ||
resultsDir.mkdir(); | ||
parseRegions(); | ||
System.out.println("\tParsed "+regions.length+ " regions..."); | ||
|
||
//create loaders | ||
System.out.println("\tLaunching "+numberThreads +" loaders..."); | ||
BamLoader[] loaders = new BamLoader[numberThreads]; | ||
for (int i=0; i< numberThreads; i++) loaders[i] = new BamLoader(this, i+1); | ||
chunkSize = regions.length/ numberThreads; | ||
if (chunkSize < 10) chunkSize = 10; | ||
|
||
//launch em! | ||
ExecutorService executor = Executors.newFixedThreadPool(numberThreads); | ||
for (BamLoader l: loaders) executor.execute(l); | ||
executor.shutdown(); | ||
while (!executor.isTerminated()) {} //wait here until complete | ||
|
||
//check loaders | ||
for (BamLoader l: loaders) { | ||
if (l.isFailed()) throw new IOException("ERROR: Failed to extract alignments from "+bam.getName()); | ||
} | ||
|
||
//write out header | ||
headerFile = new File(resultsDir, "0_header.sam.gz"); | ||
Gzipper header = new Gzipper( headerFile ); | ||
header.println(samHeader); | ||
header.close(); | ||
|
||
parsedBams = combineAndSortResults(); | ||
|
||
System.out.println("\tComplete"); | ||
|
||
} catch (Exception e){ | ||
IO.deleteDirectory(resultsDir); | ||
IO.deleteDirectoryViaCmdLine(resultsDir); | ||
e.printStackTrace(); | ||
} | ||
} | ||
|
||
private File[] combineAndSortResults() throws Exception { | ||
//cat results R1 | ||
System.out.println("\tCombinging temp files..."); | ||
ArrayList<File> al = new ArrayList<File>(); | ||
al.add(headerFile); | ||
for (File f: IO.extractFiles(resultsDir, "_R1.sam.gz")) al.add(f); | ||
File toSortR1= new File(resultsDir,"toSortR1.sam.gz"); | ||
IO.concatinateFiles(al, toSortR1); | ||
al.clear(); | ||
|
||
//cat results R2 | ||
al.add(headerFile); | ||
for (File f: IO.extractFiles(resultsDir, "_R2.sam.gz")) al.add(f); | ||
File toSortR2= new File(resultsDir,"toSortR2.sam.gz"); | ||
IO.concatinateFiles(al, toSortR2); | ||
|
||
//sort em | ||
System.out.println("\tSorting results..."); | ||
File r1Bam = new File (resultsDir, "sortedR1.bam"); | ||
File r2Bam = new File (resultsDir, "sortedR2.bam"); | ||
File[] bams = new File[]{r1Bam, r2Bam}; | ||
sortSams(new File[]{toSortR1, toSortR2}, bams, 2); | ||
|
||
//delete sams | ||
IO.deleteFiles(resultsDir, "sam.gz"); | ||
|
||
return bams; | ||
|
||
} | ||
|
||
/**Launches a threaded version of Picard's sort sam.*/ | ||
public static void sortSams(File[] toSort, File[] bamOutput, int maxNumThreads) throws IOException{ | ||
SamSorter[] ss = new SamSorter[toSort.length]; | ||
for (int i=0; i< toSort.length; i++) ss[i] = new SamSorter(toSort[i], bamOutput[i]); | ||
int numThreads = ss.length; | ||
if (numThreads > maxNumThreads) numThreads = maxNumThreads; | ||
|
||
//launch em! | ||
ExecutorService executor = Executors.newFixedThreadPool(numThreads); | ||
for (SamSorter l: ss) executor.execute(l); | ||
executor.shutdown(); | ||
while (!executor.isTerminated()) {} //wait here until complete | ||
|
||
//check loaders | ||
for (SamSorter l: ss) if (l.isFailed()) throw new IOException("ERROR: Failed to sort alignments!"); | ||
|
||
} | ||
|
||
public synchronized boolean loadRegions(ArrayList<QueryInterval> al){ | ||
int count = 0; | ||
for (; regionIndex< regions.length; regionIndex++){ | ||
al.add(regions[regionIndex]); | ||
if (++count >= chunkSize) break; | ||
} | ||
if (al.size() != 0) return true; | ||
return false; | ||
} | ||
|
||
private void parseRegions() throws Exception { | ||
//parse regions | ||
Bed[] bedRegions = Bed.parseFile(bed, 0, 0); | ||
regions = new QueryInterval[bedRegions.length]; | ||
|
||
//pull indexes | ||
SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT); | ||
SamReader samReader = factory.open(bam); | ||
SAMSequenceDictionary sd = samReader.getFileHeader().getSequenceDictionary(); | ||
|
||
|
||
//create QueryIntervals | ||
for (int i=0; i< bedRegions.length; i++) { | ||
int index = sd.getSequenceIndex(bedRegions[i].getChromosome()); | ||
if (index == -1) throw new Exception("Failed to find a chromosome index for this bed line "+bedRegions[i].toString() +" in "+bam.getName()); | ||
regions[i] = new QueryInterval(index, bedRegions[i].getStart(), bedRegions[i].getStop()); | ||
} | ||
Arrays.sort(regions); | ||
//samReader.getFileHeader().setProgramRecords(new ArrayList<SAMProgramRecord>()); | ||
samHeader = samReader.getFileHeader().getTextHeader().trim(); | ||
|
||
samReader.close(); | ||
} | ||
|
||
/**For testing.*/ | ||
public static void main (String[] args){ | ||
if (args.length == 0) System.out.println("\nUSAGE: bamFile, bedFile, workingDir, numberThreads, bpPadding\n"); | ||
else new SamAlignmentLoader(new File(args[0]), new File(args[1]), new File(args[2]), 5); | ||
} | ||
|
||
|
||
public File getBam(){ | ||
return bam; | ||
} | ||
public File getResultsDir() { | ||
return resultsDir; | ||
} | ||
|
||
public File[] getParsedBams() { | ||
return parsedBams; | ||
} | ||
|
||
} | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
package edu.utah.seq.parsers; | ||
|
||
import java.io.File; | ||
import edu.utah.seq.data.sam.PicardSortSam; | ||
|
||
public class SamSorter implements Runnable { | ||
|
||
private boolean failed = false; | ||
private File samToSort = null; | ||
private File bamOutput = null; | ||
|
||
public SamSorter (File samToSort, File bamOutput){ | ||
this.samToSort = samToSort; | ||
this.bamOutput = bamOutput; | ||
} | ||
|
||
public void run() { | ||
try { | ||
new PicardSortSam (samToSort, bamOutput); | ||
} catch (Exception e) { | ||
failed = true; | ||
bamOutput.delete(); | ||
System.err.println("\nError: problem sorting "+samToSort ); | ||
e.printStackTrace(); | ||
} | ||
} | ||
|
||
public boolean isFailed() { | ||
return failed; | ||
} | ||
} | ||
|
Oops, something went wrong.