Skip to content

Commit

Permalink
Updated the sequence reader
Browse files Browse the repository at this point in the history
  • Loading branch information
wangqion committed Feb 11, 2015
1 parent c0a4a11 commit 9dd8a14
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 30 deletions.
2 changes: 1 addition & 1 deletion nbproject/project.properties
Original file line number Diff line number Diff line change
Expand Up @@ -91,4 +91,4 @@ run.test.classpath=\
source.encoding=UTF-8
src.dir=src
test.src.dir=test
project.license=gpl20
project.license=gpl30
16 changes: 10 additions & 6 deletions src/edu/msu/cme/rdp/classifier/Classifier.java
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ public ClassificationResult classify(Sequence seq) throws IOException {
return classify(new ClassifierSequence(seq));
}

public ClassificationResult classify(ClassifierSequence seq) throws IOException {
public ClassificationResult classify(ClassifierSequence seq) {
return classify(seq, MIN_BOOTSTRSP_WORDS );
}

Expand All @@ -74,15 +74,16 @@ public ClassificationResult classify(ClassifierSequence seq) throws IOException
* the number of bootstrap trials was used as an estimate of confidence in the assignment to that genus.
* @throws ShortSequenceException if the sequence length is less than the minimum sequence length.
*/
public ClassificationResult classify(ClassifierSequence seq, int min_bootstrap_words) throws IOException {
public ClassificationResult classify(ClassifierSequence seq, int min_bootstrap_words) {
GenusWordConditionalProb gProb = null;
int nodeListSize = trainingInfo.getGenusNodeListSize();
boolean reversed = false;

int [] wordIndexArr = seq.createWordIndexArr();
try {
int [] wordIndexArr = seq.getWordIndexArr();
if (trainingInfo.isSeqReversed(wordIndexArr, seq.getGoodWordCount())) {
seq = seq.getReversedSeq();
wordIndexArr = seq.createWordIndexArr();
wordIndexArr = seq.getWordIndexArr();
reversed = true;
}

Expand All @@ -99,8 +100,7 @@ public ClassificationResult classify(ClassifierSequence seq, int min_bootstrap_w
}

if (goodWordCount > MAX_NUM_OF_WORDS) {
querySeq_wordProbArr = new float[goodWordCount][nodeListSize];
System.err.println("increase the array size to " + goodWordCount);
querySeq_wordProbArr = new float[goodWordCount][nodeListSize];
}

int NUM_OF_SELECTIONS = Math.max( goodWordCount / GoodWordIterator.getWordsize(), min_bootstrap_words);
Expand Down Expand Up @@ -199,6 +199,10 @@ public ClassificationResult classify(ClassifierSequence seq, int min_bootstrap_w
ClassificationResult finalResult = new ClassificationResult(seq, reversed, finalAssigns, trainingInfo.getHierarchyInfo());

return finalResult;
} catch (IOException ex){
throw new ShortSequenceException(seq.getSeqName(), "ShortSequenceException: The length of sequence with recordID="
+ seq.getSeqName() + " is less than " + MIN_SEQ_LEN);
}
}

/**
Expand Down
2 changes: 1 addition & 1 deletion src/edu/msu/cme/rdp/classifier/TrainingInfo.java
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ private void createGenusNodeList(HierarchyTree root) {
* less that zero, the query sequence is in reverse orientation.
*/
public boolean isSeqReversed(ClassifierSequence seq) throws IOException {
int[] wordIndexArr = seq.createWordIndexArr();
int[] wordIndexArr = seq.getWordIndexArr();
boolean reverse = false;
float priorDiff = 0;
for (int offset = 0; offset < wordIndexArr.length; offset++) {
Expand Down
35 changes: 15 additions & 20 deletions src/edu/msu/cme/rdp/classifier/utils/ClassifierSequence.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,23 @@
public class ClassifierSequence extends Sequence {
private boolean reverse = false;
private Integer goodWordCount = null; // the number of words with only valid bases

private int [] wordIndexArr = null;
/**
* Creates new ParsedSequence.
*/
public ClassifierSequence(Sequence seq) {
public ClassifierSequence(Sequence seq) throws IOException{
this(seq.getSeqName(), seq.getDesc(), seq.getSeqString());
}

public ClassifierSequence(String seqName, String desc, String seqString) {
public ClassifierSequence(String seqName, String desc, String seqString) throws IOException {
super(seqName, desc, SeqUtils.getUnalignedSeqString(seqString));
/**
* Fetches every overlapping word from the sequence string, changes each
* word to integer format and saves in an array.
*/
GoodWordIterator iterator = new GoodWordIterator(this.getSeqString());
this.wordIndexArr = iterator.getWordArr();
this.goodWordCount = wordIndexArr.length;
}

/**
Expand All @@ -41,6 +48,9 @@ protected void setSeqString(String s) {
seqString = s;
}

public int[] getWordIndexArr(){
return this.wordIndexArr;
}
/**
* Returns true if the sequence string is a minus strand.
*/
Expand All @@ -52,31 +62,16 @@ public boolean isReverse() {
* Returns a Sequence object whose sequence string is the reverse complement
* of the current rRNA sequence string.
*/
public ClassifierSequence getReversedSeq() {
public ClassifierSequence getReversedSeq() throws IOException {
ClassifierSequence retval = new ClassifierSequence(seqName, desc, IUBUtilities.reverseComplement(seqString));
retval.reverse = true;
return retval;
}

/**
* Fetches every overlapping word from the sequence string, changes each
* word to integer format and saves in an array.
*/
public int[] createWordIndexArr() throws IOException {
GoodWordIterator iterator = new GoodWordIterator(this.getSeqString());
int [] wordIndexArr = iterator.getWordArr();

this.goodWordCount = wordIndexArr.length;
return wordIndexArr;
}

/**
* Returns the number of words with valid bases.
*/
public int getGoodWordCount() throws IOException {
if (goodWordCount == null) {
this.createWordIndexArr();
}
public int getGoodWordCount() {
return goodWordCount;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ public static Test suite() {
* Test of getReversedSeq method, of class
* edu.msu.cme.rdp.classifier.readseqwrapper.ParsedSequence.
*/
public void testGetReversedSeq() {
public void testGetReversedSeq() throws IOException {
System.out.println("testGetReversedSeq");

// "AAAAAAAAAG-CCCCCCCCUGAGGGUUACnAA";
Expand Down Expand Up @@ -107,7 +107,7 @@ public void testCreateWordIndexArr() throws IOException {
String seqString = "AAAAAAAAAG-CCCCCCCCUGAGGGUUACnAA";
ClassifierSequence aSeq = new ClassifierSequence("test", "", seqString);

int[] wordIndexArr = aSeq.createWordIndexArr();
int[] wordIndexArr = aSeq.getWordIndexArr();

assertEquals(0, wordIndexArr[0]); //AAAAAAAA

Expand Down

0 comments on commit 9dd8a14

Please sign in to comment.