Skip to content

Commit

Permalink
Merge pull request #259 from WorksApplications/feature/258-deprecate-…
Browse files Browse the repository at this point in the history
…dictfactory

Deprecate dictionary factory
  • Loading branch information
mh-northlander authored Dec 4, 2024
2 parents 76e3522 + e697437 commit 29fc435
Show file tree
Hide file tree
Showing 13 changed files with 53 additions and 34 deletions.
13 changes: 13 additions & 0 deletions src/main/java/com/worksap/nlp/sudachi/Dictionary.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,19 @@
*/
public interface Dictionary extends AutoCloseable {

/**
* Creates {@code Dictionary} from configuration.
*
* @param config
* configuration of the dictionary to create
* @return {@link Dictionary}
* @throws IOException
* if reading a file is failed
*/
public static Dictionary load(Config config) throws IOException {
return new JapaneseDictionary(config);
}

/**
* Creates a tokenizer instance.
*
Expand Down
6 changes: 6 additions & 0 deletions src/main/java/com/worksap/nlp/sudachi/DictionaryFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@

/**
* Build a {@link Dictionary} instance from a dictionary file.
*
* @deprecated use {@link Dictionary#load} instead
*/
@Deprecated
public class DictionaryFactory {

/**
Expand All @@ -47,7 +50,10 @@ public Dictionary create() throws IOException {
* @return {@link Dictionary}
* @throws IOException
* if reading a file is failed
*
* @deprecated use {@link Dictionary#load(Config)} instead
*/
@Deprecated
public Dictionary create(Config config) throws IOException {
return new JapaneseDictionary(config);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ class JapaneseDictionaryTest {
@Test
fun instantiateConfigWithoutCharDef() {
val config = setupMinimumConfig()
val jdict = DictionaryFactory().create(config)
val jdict = Dictionary.load(config)

assertNotNull(jdict)
assertNotNull(jdict.tokenizer())
Expand All @@ -87,7 +87,7 @@ class JapaneseDictionaryTest {
@Test
fun throwExceptionOnDictionaryUsageAfterClose() {
val config = setupMinimumConfig()
val jdict = DictionaryFactory().create(config)
val jdict = Dictionary.load(config)
jdict.close()

assertFailsWith(IllegalStateException::class) { jdict.tokenizer() }
Expand All @@ -96,7 +96,7 @@ class JapaneseDictionaryTest {
@Test
fun throwExceptionOnTokenizerUsageAfterClose() {
val config = setupMinimumConfig()
val jdict = DictionaryFactory().create(config)
val jdict = Dictionary.load(config)
val tok = jdict.tokenizer()
jdict.close()

Expand Down Expand Up @@ -189,7 +189,7 @@ abc,1,1,4675,AbC,名詞,普通名詞,一般,*,*,*,エービーシー,,,,,""")
.clearUserDictionaries()
.systemDictionary(sdict)
.addUserDictionary(udict)
val mdict = DictionaryFactory().create(cfg)
val mdict = Dictionary.load(cfg)

val found = mdict.lookup("ABC")
assertEquals(4, found.size)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022 Works Applications Co., Ltd.
* Copyright (c) 2022-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -40,7 +40,7 @@ class JapaneseTokenizerMaskTest {
cfg0.addOovProviderPlugin(CaptureOtherWords::class.java)
cfg0.addOovProviderPlugin(SimpleOovProviderPlugin::class.java)
val cfg = cfg0.withFallback(TestDictionary.user0Cfg())
val dic = DictionaryFactory().create(cfg) as JapaneseDictionary
val dic = Dictionary.load(cfg) as JapaneseDictionary
val tokenizer = dic.tokenizer()

assertEquals(2, dic.oovProviderPlugins.size)
Expand All @@ -61,7 +61,7 @@ class JapaneseTokenizerMaskTest {
fun correctMasksWithSecondProvider() {
val cfg = TestDictionary.user0Cfg()
cfg.addOovProviderPlugin(CaptureOtherWords::class.java)
val dic = DictionaryFactory().create(cfg) as JapaneseDictionary
val dic = Dictionary.load(cfg) as JapaneseDictionary
val tokenizer = dic.tokenizer()

assertIs<SimpleOovProviderPlugin>(dic.oovProviderPlugins[0])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ public void zeroLengthMorpheme() {
@Test
public void disableEmptyMorpheme() throws IOException {
Config config = TestDictionary.INSTANCE.user1Cfg();
dict = new DictionaryFactory().create(Config.empty().withFallback(config).allowEmptyMorpheme(false));
dict = Dictionary.load(Config.empty().withFallback(config).allowEmptyMorpheme(false));
tokenizer = (JapaneseTokenizer) dict.tokenizer();

List<Morpheme> s = tokenizer.tokenize("…");
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2022 Works Applications Co., Ltd.
* Copyright (c) 2017-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -32,7 +32,7 @@ public class JoinNumericPluginTest {
public void setUp() throws IOException {
Config config = TestDictionary.INSTANCE.user0Cfg()
.characterDefinition(getClass().getClassLoader().getResource("joinnumeric/char.def"));
Dictionary dict = new DictionaryFactory().create(config);
Dictionary dict = Dictionary.load(config);
tokenizer = (JapaneseTokenizer) dict.tokenizer();

plugin = new JoinNumericPlugin();
Expand Down
14 changes: 7 additions & 7 deletions src/test/java/com/worksap/nlp/sudachi/OovProviderPluginTest.kt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022 Works Applications Co., Ltd.
* Copyright (c) 2022-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -49,7 +49,7 @@ class OovProviderPluginTest {
val cfg = TestDictionary.user0Cfg()
cfg.addOovProviderPlugin(FakeOovProvider::class.java)
.addList("pos", "名詞", "普通名詞", "一般", "*", "*", "*")
val inst = DictionaryFactory().create(cfg) as JapaneseDictionary
val inst = Dictionary.load(cfg) as JapaneseDictionary
val plugin = assertIs<FakeOovProvider>(inst.oovProviderPlugins.last())
assertEquals(4, plugin.posId)
}
Expand All @@ -60,7 +60,7 @@ class OovProviderPluginTest {
cfg.addOovProviderPlugin(FakeOovProvider::class.java)
.addList("pos", "名詞", "普通名詞", "一般", "*", "*", "new")
.add(USER_POS, USER_POS_ALLOW)
val inst = DictionaryFactory().create(cfg) as JapaneseDictionary
val inst = Dictionary.load(cfg) as JapaneseDictionary
val plugin = assertIs<FakeOovProvider>(inst.oovProviderPlugins.last())
assertEquals(8, plugin.posId)
}
Expand All @@ -71,15 +71,15 @@ class OovProviderPluginTest {
cfg.addOovProviderPlugin(FakeOovProvider::class.java)
.addList("pos", "名詞", "普通名詞", "一般", "*", "*", "*")
.add(USER_POS, "test")
assertFails { DictionaryFactory().create(cfg) }
assertFails { Dictionary.load(cfg) }
}

@Test
fun failInvalidPos() {
val cfg = TestDictionary.user0Cfg()
cfg.addOovProviderPlugin(FakeOovProvider::class.java)
.addList("pos", "名詞", "普通名詞", "一般", "*", "*", "test")
assertFails { DictionaryFactory().create(cfg) }
assertFails { Dictionary.load(cfg) }
}

@Test
Expand All @@ -91,7 +91,7 @@ class OovProviderPluginTest {
cfg.addOovProviderPlugin(FakeOovProvider::class.java)
.addList("pos", "名詞", "普通名詞", "一般", "*", "*", "new")
.add(USER_POS, USER_POS_ALLOW)
val inst = DictionaryFactory().create(cfg) as JapaneseDictionary
val inst = Dictionary.load(cfg) as JapaneseDictionary
val oovPlugins = inst.oovProviderPlugins
val p1 = assertIs<FakeOovProvider>(oovPlugins[oovPlugins.size - 2])
assertEquals(8, p1.posId)
Expand All @@ -105,7 +105,7 @@ class OovProviderPluginTest {
cfg.addOovProviderPlugin(FakeOovProvider::class.java)
.addList("pos", "名詞", "普通名詞", "一般", "*", "*", "new")
.add(USER_POS, USER_POS_ALLOW)
val dict = DictionaryFactory().create(cfg) as JapaneseDictionary
val dict = Dictionary.load(cfg) as JapaneseDictionary
val plugin = assertIs<FakeOovProvider>(dict.oovProviderPlugins.last())
assertEquals(8, plugin.posId)
val tokinzer = dict.tokenizer()
Expand Down
4 changes: 2 additions & 2 deletions src/test/java/com/worksap/nlp/sudachi/PosMatcherTest.kt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022 Works Applications Co., Ltd.
* Copyright (c) 2022-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -21,7 +21,7 @@ import kotlin.test.*

class PosMatcherTest {

private val dic = DictionaryFactory().create(TestDictionary.user2Cfg()) as JapaneseDictionary
private val dic = Dictionary.load(TestDictionary.user2Cfg()) as JapaneseDictionary
private val tok = dic.tokenizer()

@Test
Expand Down
4 changes: 2 additions & 2 deletions src/test/java/com/worksap/nlp/sudachi/RegexOovProviderTest.kt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022 Works Applications Co., Ltd.
* Copyright (c) 2022-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -34,7 +34,7 @@ class RegexOovProviderTest {
.addList("pos", "名詞", "普通名詞", "一般", "*", "*", "*")
@Suppress("UNCHECKED_CAST") block(cfg, pluginCfg as Config.PluginConf<RegexOovProvider>)
// prepend our OOV configuration to the main configuration
return DictionaryFactory().create(cfg.withFallback(TestDictionary.user0Cfg())).tokenizer()
return Dictionary.load(cfg.withFallback(TestDictionary.user0Cfg())).tokenizer()
}

@Test
Expand Down
6 changes: 3 additions & 3 deletions src/test/java/com/worksap/nlp/sudachi/TestDictionary.kt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2022 Works Applications Co., Ltd.
* Copyright (c) 2017-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -67,11 +67,11 @@ object TestDictionary {

/** System only */
fun user0(): JapaneseDictionary {
return DictionaryFactory().create(user0Cfg()) as JapaneseDictionary
return Dictionary.load(user0Cfg()) as JapaneseDictionary
}

/** System + One User dictionary */
fun user1(): JapaneseDictionary {
return DictionaryFactory().create(user1Cfg()) as JapaneseDictionary
return Dictionary.load(user1Cfg()) as JapaneseDictionary
}
}
4 changes: 2 additions & 2 deletions src/test/java/com/worksap/nlp/sudachi/TextNormalizerTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ import kotlin.test.*
class TextNormalizerTest {

private val dic =
DictionaryFactory()
.create(TestDictionary.user2Cfg().characterDefinition(CharacterCategory.loadDefault()))
Dictionary.load(
TestDictionary.user2Cfg().characterDefinition(CharacterCategory.loadDefault()))
as JapaneseDictionary

@Test
Expand Down
12 changes: 6 additions & 6 deletions src/test/java/com/worksap/nlp/sudachi/UserDictionaryTest.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2022 Works Applications Co., Ltd.
* Copyright (c) 2017-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -37,7 +37,7 @@ public void fullUserDict() throws IOException {
}
config.addUserDictionary(instance.getUserDict2());

try (Dictionary dict = new DictionaryFactory().create(config)) {
try (Dictionary dict = Dictionary.load(config)) {
Tokenizer tokenizer = dict.tokenizer();
List<Morpheme> morphs = tokenizer.tokenize("ぴさる");
assertThat(morphs.size(), is(1));
Expand All @@ -54,14 +54,14 @@ public void openTooManyUserDict() throws IOException {
for (int i = 0; i < 15; i++) {
config.addUserDictionary(instance.getUserDict1());
}
new DictionaryFactory().create(config);
Dictionary.load(config);
}

@Test
public void splitForUserDict() throws IOException {
TestDictionary td = TestDictionary.INSTANCE;
Config config = td.user0Cfg().addUserDictionary(td.getUserDict2()).addUserDictionary(td.getUserDict1());
try (Dictionary dict = new DictionaryFactory().create(config)) {
try (Dictionary dict = Dictionary.load(config)) {
Tokenizer tokenizer = dict.tokenizer();
List<Morpheme> morphs = tokenizer.tokenize("東京府");
assertThat(morphs.size(), is(1));
Expand All @@ -76,7 +76,7 @@ public void splitForUserDict() throws IOException {
@Test
public void userDefinedPos() throws IOException {
Config config = TestDictionary.INSTANCE.user2Cfg();
try (Dictionary dict = new DictionaryFactory().create(config)) {
try (Dictionary dict = Dictionary.load(config)) {
Tokenizer tokenizer = dict.tokenizer();
List<Morpheme> morphs = tokenizer.tokenize("すだちかぼす");
assertThat(morphs.size(), is(2));
Expand All @@ -88,7 +88,7 @@ public void userDefinedPos() throws IOException {

TestDictionary td = TestDictionary.INSTANCE;
config = td.user0Cfg().addUserDictionary(td.getUserDict2()).addUserDictionary(td.getUserDict1());
try (Dictionary dict = new DictionaryFactory().create(config)) {
try (Dictionary dict = Dictionary.load(config)) {
Tokenizer tokenizer = dict.tokenizer();
List<Morpheme> morphs = tokenizer.tokenize("すだちかぼす");
assertThat(morphs.size(), is(2));
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2022 Works Applications Co., Ltd.
* Copyright (c) 2017-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -71,7 +71,7 @@ class TestDic {
fun load(): Dictionary {
val config = Config.fromClasspath(config).systemDictionary(systemDic)
userDics.forEach { config.addUserDictionary(it) }
return DictionaryFactory().create(config)
return Dictionary.load(config)
}
}

Expand Down

0 comments on commit 29fc435

Please sign in to comment.