Skip to content

Commit

Permalink
Add a check on the text to be sure that the candidate sentence is not…
Browse files Browse the repository at this point in the history
… already selected.
  • Loading branch information
ftesser authored and giuliopaci committed Mar 8, 2013
1 parent 1675c00 commit 197e233
Showing 1 changed file with 14 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ public void select(Set<Integer>selectedIdSents,

//select the next sentence
//selectNext(coverageDefinition, logFile, sentIndex, basenameList, vectorArray);
boolean haveSelected = selectNext(selectedIdSents, unwantedIdSents, coverageDefinition, cfProvider);
boolean haveSelected = selectNext(selectedIdSents, unwantedIdSents, coverageDefinition, cfProvider, wikiToDB);

if (haveSelected) {
assert selectedIdSentence >= 0;
Expand Down Expand Up @@ -401,7 +401,8 @@ static void printOnFileWordPipeTrascription(Element segment, PrintWriter pWordPi
private boolean selectNext(Set<Integer>selectedIdSents,
Set<Integer>unwantedIdSents,
CoverageDefinition coverageDefinition,
CoverageFeatureProvider cfProvider)
CoverageFeatureProvider cfProvider,
DBHandler wikiToDB)
throws IOException {
// TODO: MS, May 2011 -- I have refactored this code but could not test it. Bad me.

Expand All @@ -413,15 +414,15 @@ private boolean selectNext(Set<Integer>selectedIdSents,
// we bulk-load a chunk of them at a time.
if (cfProvider instanceof InMemoryCFProvider) {
// already in memory, can loop through all
determineMostUsefulSentence(selectedIdSents, unwantedIdSents, coverageDefinition, cfProvider);
determineMostUsefulSentence(selectedIdSents, unwantedIdSents, coverageDefinition, cfProvider, wikiToDB);
} else {
assert cfProvider instanceof DatabaseCFProvider;
DatabaseCFProvider dbCfProvider = (DatabaseCFProvider) cfProvider;
int chunkSize = 100000;
for (int c=0, max=dbCfProvider.getNumSentences(); c<max; c+= chunkSize) {
int len = Math.min(chunkSize, max-c);
CoverageFeatureProvider chunk = dbCfProvider.getFeaturesInMemory(c, len);
determineMostUsefulSentence(selectedIdSents, unwantedIdSents, coverageDefinition, chunk);
determineMostUsefulSentence(selectedIdSents, unwantedIdSents, coverageDefinition, chunk, wikiToDB);
}
}
return selectedIdSentence >= 0;
Expand All @@ -436,23 +437,26 @@ private boolean selectNext(Set<Integer>selectedIdSents,
private void determineMostUsefulSentence(Set<Integer> selectedIdSents,
Set<Integer> unwantedIdSents,
CoverageDefinition coverageDefinition,
CoverageFeatureProvider cfProvider) {
CoverageFeatureProvider cfProvider,
DBHandler wikiToDB) {
for (int l=0, num=cfProvider.getNumSentences(); l<num; l++) {
int id = cfProvider.getID(l);
// skip previously selected or excluded sentences:
if (selectedIdSents.contains(id) || unwantedIdSents.contains(id)) {
if (selectedIdSents.contains(id) || unwantedIdSents.contains(id)){
continue;
}
byte[] nextFeatVects = cfProvider.getCoverageFeatures(l);
//calculate how useful the feature vectors are
double usefulness = coverageDefinition.usefulnessOfFVs(nextFeatVects);

if (usefulness > selectedUsefulness) {
if (usefulness > selectedUsefulness ) {
if (!wikiToDB.textSentenceIsContainedInSelectedIdSents(id, selectedIdSents, unwantedIdSents)) {
//the current sentence is (currently) the best sentence to add
selectedIdSentence = id;
selectedVectors = nextFeatVects;
selectedUsefulness = usefulness;
}
}
}
if (usefulness == -1.0){
unwantedIdSents.add(id);
// idSentenceList[i] = -1; // Here the sentence should be marked as unwanted?
Expand All @@ -461,8 +465,8 @@ private void determineMostUsefulSentence(Set<Integer> selectedIdSents,
}
}

/**

/**
* Determine if the stop criterion is reached
*
* @param sentences the list of selected sentences
Expand Down

0 comments on commit 197e233

Please sign in to comment.