diff --git a/pom.xml b/pom.xml index 173780472c..9b7839d482 100644 --- a/pom.xml +++ b/pom.xml @@ -335,6 +335,11 @@ lucene-analysis-morfologik ${lucene.version} + + org.apache.lucene + lucene-analysis-nori + ${lucene.version} + org.apache.lucene lucene-test-framework diff --git a/src/main/java/io/anserini/analysis/AnalyzerMap.java b/src/main/java/io/anserini/analysis/AnalyzerMap.java index e6a889c3bf..db26a167f2 100644 --- a/src/main/java/io/anserini/analysis/AnalyzerMap.java +++ b/src/main/java/io/anserini/analysis/AnalyzerMap.java @@ -42,7 +42,7 @@ public class AnalyzerMap { put("id", "org.apache.lucene.analysis.id.IndonesianAnalyzer"); put("it", "org.apache.lucene.analysis.it.ItalianAnalyzer"); put("ja", "org.apache.lucene.analysis.ja.JapaneseAnalyzer"); - put("ko", "org.apache.lucene.analysis.cjk.CJKAnalyzer"); + put("ko", "org.apache.lucene.analysis.ko.KoreanAnalyzer"); put("nl", "org.apache.lucene.analysis.nl.DutchAnalyzer"); put("no", "org.apache.lucene.analysis.no.NorwegianAnalyzer"); put("pl", "org.apache.lucene.analysis.morfologik.MorfologikAnalyzer"); diff --git a/src/main/java/io/anserini/search/SimpleSearcher.java b/src/main/java/io/anserini/search/SimpleSearcher.java index fc81d4c641..de26e8703a 100644 --- a/src/main/java/io/anserini/search/SimpleSearcher.java +++ b/src/main/java/io/anserini/search/SimpleSearcher.java @@ -46,6 +46,7 @@ import org.apache.lucene.analysis.id.IndonesianAnalyzer; import org.apache.lucene.analysis.it.ItalianAnalyzer; import org.apache.lucene.analysis.ja.JapaneseAnalyzer; +import org.apache.lucene.analysis.ko.KoreanAnalyzer; import org.apache.lucene.analysis.morfologik.MorfologikAnalyzer; import org.apache.lucene.analysis.nl.DutchAnalyzer; import org.apache.lucene.analysis.no.NorwegianAnalyzer; @@ -220,6 +221,8 @@ public void set_language(String language) { this.analyzer = new ItalianAnalyzer(); } else if (language.equals("ja")) { this.analyzer = new JapaneseAnalyzer(); + } else if (language.equals("ko")) { + this.analyzer = new KoreanAnalyzer(); } else if (language.equals("nl")) { this.analyzer = new DutchAnalyzer(); } else if (language.equals("no")) { @@ -240,7 +243,7 @@ public void set_language(String language) { this.analyzer = new TurkishAnalyzer(); } else if (language.equals("uk")) { this.analyzer = new UkrainianMorfologikAnalyzer(); - } else if (language.equals("zh") || language.equals("ko")) { + } else if (language.equals("zh")) { this.analyzer = new CJKAnalyzer(); } else if (language.equals("sw") || language.equals("te")) { this.analyzer = new WhitespaceAnalyzer();