Skip to content

Commit

Permalink
Switching to Emory lemmatizer
Browse files Browse the repository at this point in the history
  • Loading branch information
dlutz2 committed Sep 26, 2016
1 parent 1122be0 commit 385cf9e
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 7 deletions.
8 changes: 4 additions & 4 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -109,11 +109,11 @@
<version>1.7.14</version>
</dependency>

<!-- Stanford NLP core, only word normalizer used -->
<!-- Emory Lemmatizer -->
<dependency>
<groupId>edu.stanford.nlp</groupId>
<artifactId>stanford-corenlp</artifactId>
<version>3.6.0</version>
<groupId>edu.emory.mathcs.nlp</groupId>
<artifactId>nlp4j-morphology</artifactId>
<version>1.1.2</version>
</dependency>

<!-- Part of speech tagger, used in Lexer -->
Expand Down
15 changes: 12 additions & 3 deletions src/org/opensextant/howler/utils/OWLUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,16 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import edu.stanford.nlp.process.Morphology;
import edu.emory.mathcs.nlp.common.util.StringUtils;
import edu.emory.mathcs.nlp.component.morph.MorphAnalyzer;
import edu.emory.mathcs.nlp.component.morph.english.EnglishMorphAnalyzer;

public class OWLUtils {

static Map<String, Integer> numbers = new HashMap<String, Integer>();


static MorphAnalyzer lemmatizer = new EnglishMorphAnalyzer();

static {
numbers.put("one", 1);
numbers.put("two", 2);
Expand Down Expand Up @@ -199,12 +203,17 @@ public static String normalize(String word, String pos, boolean lower) {
return word;
}

// don't change numbers or Fixed vocab
if (pos.equals("CD") || pos.equals("FIXED")) {
return word;
}

// don't normalize verbs yet
if (pos.startsWith("V")) {
return word;
}

return (Morphology.lemmaStatic(word, pos, lower));
return lemmatizer.lemmatize(StringUtils.toSimplifiedForm(word, lower), pos);
}

public static SubjectPredicateObject rewriteSPO(
Expand Down

0 comments on commit 385cf9e

Please sign in to comment.