From 0cbd390c18e18988039f5c7983ba0b5c55b95139 Mon Sep 17 00:00:00 2001 From: mh-northlander Date: Wed, 4 Dec 2024 16:18:55 +0900 Subject: [PATCH 1/2] deprecate DictionaryFactory.create and add Dictionary.load --- .../java/com/worksap/nlp/sudachi/Dictionary.java | 13 +++++++++++++ .../com/worksap/nlp/sudachi/DictionaryFactory.java | 6 ++++++ 2 files changed, 19 insertions(+) diff --git a/src/main/java/com/worksap/nlp/sudachi/Dictionary.java b/src/main/java/com/worksap/nlp/sudachi/Dictionary.java index 9b4426a1..1dd1f689 100644 --- a/src/main/java/com/worksap/nlp/sudachi/Dictionary.java +++ b/src/main/java/com/worksap/nlp/sudachi/Dictionary.java @@ -37,6 +37,19 @@ */ public interface Dictionary extends AutoCloseable { + /** + * Creates {@code Dictionary} from configuration. + * + * @param config + * configuration of the dictionary to create + * @return {@link Dictionary} + * @throws IOException + * if reading a file is failed + */ + public static Dictionary load(Config config) throws IOException { + return new JapaneseDictionary(config); + } + /** * Creates a tokenizer instance. * diff --git a/src/main/java/com/worksap/nlp/sudachi/DictionaryFactory.java b/src/main/java/com/worksap/nlp/sudachi/DictionaryFactory.java index 22a0f04d..6c377fc7 100644 --- a/src/main/java/com/worksap/nlp/sudachi/DictionaryFactory.java +++ b/src/main/java/com/worksap/nlp/sudachi/DictionaryFactory.java @@ -21,7 +21,10 @@ /** * Build a {@link Dictionary} instance from a dictionary file. + * + * @deprecated use {@link Dictionary#load} instead */ +@Deprecated public class DictionaryFactory { /** @@ -47,7 +50,10 @@ public Dictionary create() throws IOException { * @return {@link Dictionary} * @throws IOException * if reading a file is failed + * + * @deprecated use {@link Dictionary#load(Config)} instead */ + @Deprecated public Dictionary create(Config config) throws IOException { return new JapaneseDictionary(config); } From e6974377761390c10d59cf2b48da000281eb86f1 Mon Sep 17 00:00:00 2001 From: mh-northlander Date: Wed, 4 Dec 2024 16:19:09 +0900 Subject: [PATCH 2/2] update the use of DictionaryFactory.create in tests --- .../worksap/nlp/sudachi/JapaneseDictionaryTest.kt | 8 ++++---- .../nlp/sudachi/JapaneseTokenizerMaskTest.kt | 6 +++--- .../worksap/nlp/sudachi/JapaneseTokenizerTest.java | 2 +- .../worksap/nlp/sudachi/JoinNumericPluginTest.java | 4 ++-- .../worksap/nlp/sudachi/OovProviderPluginTest.kt | 14 +++++++------- .../java/com/worksap/nlp/sudachi/PosMatcherTest.kt | 4 ++-- .../worksap/nlp/sudachi/RegexOovProviderTest.kt | 4 ++-- .../java/com/worksap/nlp/sudachi/TestDictionary.kt | 6 +++--- .../com/worksap/nlp/sudachi/TextNormalizerTest.kt | 4 ++-- .../worksap/nlp/sudachi/UserDictionaryTest.java | 12 ++++++------ .../nlp/sudachi/dictionary/build/UserDicTest.kt | 4 ++-- 11 files changed, 34 insertions(+), 34 deletions(-) diff --git a/src/test/java/com/worksap/nlp/sudachi/JapaneseDictionaryTest.kt b/src/test/java/com/worksap/nlp/sudachi/JapaneseDictionaryTest.kt index 551ada8e..fc9552b3 100644 --- a/src/test/java/com/worksap/nlp/sudachi/JapaneseDictionaryTest.kt +++ b/src/test/java/com/worksap/nlp/sudachi/JapaneseDictionaryTest.kt @@ -77,7 +77,7 @@ class JapaneseDictionaryTest { @Test fun instantiateConfigWithoutCharDef() { val config = setupMinimumConfig() - val jdict = DictionaryFactory().create(config) + val jdict = Dictionary.load(config) assertNotNull(jdict) assertNotNull(jdict.tokenizer()) @@ -87,7 +87,7 @@ class JapaneseDictionaryTest { @Test fun throwExceptionOnDictionaryUsageAfterClose() { val config = setupMinimumConfig() - val jdict = DictionaryFactory().create(config) + val jdict = Dictionary.load(config) jdict.close() assertFailsWith(IllegalStateException::class) { jdict.tokenizer() } @@ -96,7 +96,7 @@ class JapaneseDictionaryTest { @Test fun throwExceptionOnTokenizerUsageAfterClose() { val config = setupMinimumConfig() - val jdict = DictionaryFactory().create(config) + val jdict = Dictionary.load(config) val tok = jdict.tokenizer() jdict.close() @@ -189,7 +189,7 @@ abc,1,1,4675,AbC,名詞,普通名詞,一般,*,*,*,エービーシー,,,,,""") .clearUserDictionaries() .systemDictionary(sdict) .addUserDictionary(udict) - val mdict = DictionaryFactory().create(cfg) + val mdict = Dictionary.load(cfg) val found = mdict.lookup("ABC") assertEquals(4, found.size) diff --git a/src/test/java/com/worksap/nlp/sudachi/JapaneseTokenizerMaskTest.kt b/src/test/java/com/worksap/nlp/sudachi/JapaneseTokenizerMaskTest.kt index 5fd6fbf7..e9999902 100644 --- a/src/test/java/com/worksap/nlp/sudachi/JapaneseTokenizerMaskTest.kt +++ b/src/test/java/com/worksap/nlp/sudachi/JapaneseTokenizerMaskTest.kt @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Works Applications Co., Ltd. + * Copyright (c) 2022-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,7 +40,7 @@ class JapaneseTokenizerMaskTest { cfg0.addOovProviderPlugin(CaptureOtherWords::class.java) cfg0.addOovProviderPlugin(SimpleOovProviderPlugin::class.java) val cfg = cfg0.withFallback(TestDictionary.user0Cfg()) - val dic = DictionaryFactory().create(cfg) as JapaneseDictionary + val dic = Dictionary.load(cfg) as JapaneseDictionary val tokenizer = dic.tokenizer() assertEquals(2, dic.oovProviderPlugins.size) @@ -61,7 +61,7 @@ class JapaneseTokenizerMaskTest { fun correctMasksWithSecondProvider() { val cfg = TestDictionary.user0Cfg() cfg.addOovProviderPlugin(CaptureOtherWords::class.java) - val dic = DictionaryFactory().create(cfg) as JapaneseDictionary + val dic = Dictionary.load(cfg) as JapaneseDictionary val tokenizer = dic.tokenizer() assertIs(dic.oovProviderPlugins[0]) diff --git a/src/test/java/com/worksap/nlp/sudachi/JapaneseTokenizerTest.java b/src/test/java/com/worksap/nlp/sudachi/JapaneseTokenizerTest.java index f34a3abf..861fa915 100644 --- a/src/test/java/com/worksap/nlp/sudachi/JapaneseTokenizerTest.java +++ b/src/test/java/com/worksap/nlp/sudachi/JapaneseTokenizerTest.java @@ -353,7 +353,7 @@ public void zeroLengthMorpheme() { @Test public void disableEmptyMorpheme() throws IOException { Config config = TestDictionary.INSTANCE.user1Cfg(); - dict = new DictionaryFactory().create(Config.empty().withFallback(config).allowEmptyMorpheme(false)); + dict = Dictionary.load(Config.empty().withFallback(config).allowEmptyMorpheme(false)); tokenizer = (JapaneseTokenizer) dict.tokenizer(); List s = tokenizer.tokenize("…"); diff --git a/src/test/java/com/worksap/nlp/sudachi/JoinNumericPluginTest.java b/src/test/java/com/worksap/nlp/sudachi/JoinNumericPluginTest.java index 384d9436..4758c651 100644 --- a/src/test/java/com/worksap/nlp/sudachi/JoinNumericPluginTest.java +++ b/src/test/java/com/worksap/nlp/sudachi/JoinNumericPluginTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2022 Works Applications Co., Ltd. + * Copyright (c) 2017-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,7 +32,7 @@ public class JoinNumericPluginTest { public void setUp() throws IOException { Config config = TestDictionary.INSTANCE.user0Cfg() .characterDefinition(getClass().getClassLoader().getResource("joinnumeric/char.def")); - Dictionary dict = new DictionaryFactory().create(config); + Dictionary dict = Dictionary.load(config); tokenizer = (JapaneseTokenizer) dict.tokenizer(); plugin = new JoinNumericPlugin(); diff --git a/src/test/java/com/worksap/nlp/sudachi/OovProviderPluginTest.kt b/src/test/java/com/worksap/nlp/sudachi/OovProviderPluginTest.kt index bc865397..7caf6358 100644 --- a/src/test/java/com/worksap/nlp/sudachi/OovProviderPluginTest.kt +++ b/src/test/java/com/worksap/nlp/sudachi/OovProviderPluginTest.kt @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Works Applications Co., Ltd. + * Copyright (c) 2022-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -49,7 +49,7 @@ class OovProviderPluginTest { val cfg = TestDictionary.user0Cfg() cfg.addOovProviderPlugin(FakeOovProvider::class.java) .addList("pos", "名詞", "普通名詞", "一般", "*", "*", "*") - val inst = DictionaryFactory().create(cfg) as JapaneseDictionary + val inst = Dictionary.load(cfg) as JapaneseDictionary val plugin = assertIs(inst.oovProviderPlugins.last()) assertEquals(4, plugin.posId) } @@ -60,7 +60,7 @@ class OovProviderPluginTest { cfg.addOovProviderPlugin(FakeOovProvider::class.java) .addList("pos", "名詞", "普通名詞", "一般", "*", "*", "new") .add(USER_POS, USER_POS_ALLOW) - val inst = DictionaryFactory().create(cfg) as JapaneseDictionary + val inst = Dictionary.load(cfg) as JapaneseDictionary val plugin = assertIs(inst.oovProviderPlugins.last()) assertEquals(8, plugin.posId) } @@ -71,7 +71,7 @@ class OovProviderPluginTest { cfg.addOovProviderPlugin(FakeOovProvider::class.java) .addList("pos", "名詞", "普通名詞", "一般", "*", "*", "*") .add(USER_POS, "test") - assertFails { DictionaryFactory().create(cfg) } + assertFails { Dictionary.load(cfg) } } @Test @@ -79,7 +79,7 @@ class OovProviderPluginTest { val cfg = TestDictionary.user0Cfg() cfg.addOovProviderPlugin(FakeOovProvider::class.java) .addList("pos", "名詞", "普通名詞", "一般", "*", "*", "test") - assertFails { DictionaryFactory().create(cfg) } + assertFails { Dictionary.load(cfg) } } @Test @@ -91,7 +91,7 @@ class OovProviderPluginTest { cfg.addOovProviderPlugin(FakeOovProvider::class.java) .addList("pos", "名詞", "普通名詞", "一般", "*", "*", "new") .add(USER_POS, USER_POS_ALLOW) - val inst = DictionaryFactory().create(cfg) as JapaneseDictionary + val inst = Dictionary.load(cfg) as JapaneseDictionary val oovPlugins = inst.oovProviderPlugins val p1 = assertIs(oovPlugins[oovPlugins.size - 2]) assertEquals(8, p1.posId) @@ -105,7 +105,7 @@ class OovProviderPluginTest { cfg.addOovProviderPlugin(FakeOovProvider::class.java) .addList("pos", "名詞", "普通名詞", "一般", "*", "*", "new") .add(USER_POS, USER_POS_ALLOW) - val dict = DictionaryFactory().create(cfg) as JapaneseDictionary + val dict = Dictionary.load(cfg) as JapaneseDictionary val plugin = assertIs(dict.oovProviderPlugins.last()) assertEquals(8, plugin.posId) val tokinzer = dict.tokenizer() diff --git a/src/test/java/com/worksap/nlp/sudachi/PosMatcherTest.kt b/src/test/java/com/worksap/nlp/sudachi/PosMatcherTest.kt index 2dc1d931..c5509a32 100644 --- a/src/test/java/com/worksap/nlp/sudachi/PosMatcherTest.kt +++ b/src/test/java/com/worksap/nlp/sudachi/PosMatcherTest.kt @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Works Applications Co., Ltd. + * Copyright (c) 2022-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,7 +21,7 @@ import kotlin.test.* class PosMatcherTest { - private val dic = DictionaryFactory().create(TestDictionary.user2Cfg()) as JapaneseDictionary + private val dic = Dictionary.load(TestDictionary.user2Cfg()) as JapaneseDictionary private val tok = dic.tokenizer() @Test diff --git a/src/test/java/com/worksap/nlp/sudachi/RegexOovProviderTest.kt b/src/test/java/com/worksap/nlp/sudachi/RegexOovProviderTest.kt index 660a226d..05fa3e0b 100644 --- a/src/test/java/com/worksap/nlp/sudachi/RegexOovProviderTest.kt +++ b/src/test/java/com/worksap/nlp/sudachi/RegexOovProviderTest.kt @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Works Applications Co., Ltd. + * Copyright (c) 2022-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,7 +34,7 @@ class RegexOovProviderTest { .addList("pos", "名詞", "普通名詞", "一般", "*", "*", "*") @Suppress("UNCHECKED_CAST") block(cfg, pluginCfg as Config.PluginConf) // prepend our OOV configuration to the main configuration - return DictionaryFactory().create(cfg.withFallback(TestDictionary.user0Cfg())).tokenizer() + return Dictionary.load(cfg.withFallback(TestDictionary.user0Cfg())).tokenizer() } @Test diff --git a/src/test/java/com/worksap/nlp/sudachi/TestDictionary.kt b/src/test/java/com/worksap/nlp/sudachi/TestDictionary.kt index f3f0f7df..5416402b 100644 --- a/src/test/java/com/worksap/nlp/sudachi/TestDictionary.kt +++ b/src/test/java/com/worksap/nlp/sudachi/TestDictionary.kt @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2022 Works Applications Co., Ltd. + * Copyright (c) 2017-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -67,11 +67,11 @@ object TestDictionary { /** System only */ fun user0(): JapaneseDictionary { - return DictionaryFactory().create(user0Cfg()) as JapaneseDictionary + return Dictionary.load(user0Cfg()) as JapaneseDictionary } /** System + One User dictionary */ fun user1(): JapaneseDictionary { - return DictionaryFactory().create(user1Cfg()) as JapaneseDictionary + return Dictionary.load(user1Cfg()) as JapaneseDictionary } } diff --git a/src/test/java/com/worksap/nlp/sudachi/TextNormalizerTest.kt b/src/test/java/com/worksap/nlp/sudachi/TextNormalizerTest.kt index 9c66f5bc..68ccbd30 100644 --- a/src/test/java/com/worksap/nlp/sudachi/TextNormalizerTest.kt +++ b/src/test/java/com/worksap/nlp/sudachi/TextNormalizerTest.kt @@ -23,8 +23,8 @@ import kotlin.test.* class TextNormalizerTest { private val dic = - DictionaryFactory() - .create(TestDictionary.user2Cfg().characterDefinition(CharacterCategory.loadDefault())) + Dictionary.load( + TestDictionary.user2Cfg().characterDefinition(CharacterCategory.loadDefault())) as JapaneseDictionary @Test diff --git a/src/test/java/com/worksap/nlp/sudachi/UserDictionaryTest.java b/src/test/java/com/worksap/nlp/sudachi/UserDictionaryTest.java index 82563cc6..cf19c84c 100644 --- a/src/test/java/com/worksap/nlp/sudachi/UserDictionaryTest.java +++ b/src/test/java/com/worksap/nlp/sudachi/UserDictionaryTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2022 Works Applications Co., Ltd. + * Copyright (c) 2017-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,7 +37,7 @@ public void fullUserDict() throws IOException { } config.addUserDictionary(instance.getUserDict2()); - try (Dictionary dict = new DictionaryFactory().create(config)) { + try (Dictionary dict = Dictionary.load(config)) { Tokenizer tokenizer = dict.tokenizer(); List morphs = tokenizer.tokenize("ぴさる"); assertThat(morphs.size(), is(1)); @@ -54,14 +54,14 @@ public void openTooManyUserDict() throws IOException { for (int i = 0; i < 15; i++) { config.addUserDictionary(instance.getUserDict1()); } - new DictionaryFactory().create(config); + Dictionary.load(config); } @Test public void splitForUserDict() throws IOException { TestDictionary td = TestDictionary.INSTANCE; Config config = td.user0Cfg().addUserDictionary(td.getUserDict2()).addUserDictionary(td.getUserDict1()); - try (Dictionary dict = new DictionaryFactory().create(config)) { + try (Dictionary dict = Dictionary.load(config)) { Tokenizer tokenizer = dict.tokenizer(); List morphs = tokenizer.tokenize("東京府"); assertThat(morphs.size(), is(1)); @@ -76,7 +76,7 @@ public void splitForUserDict() throws IOException { @Test public void userDefinedPos() throws IOException { Config config = TestDictionary.INSTANCE.user2Cfg(); - try (Dictionary dict = new DictionaryFactory().create(config)) { + try (Dictionary dict = Dictionary.load(config)) { Tokenizer tokenizer = dict.tokenizer(); List morphs = tokenizer.tokenize("すだちかぼす"); assertThat(morphs.size(), is(2)); @@ -88,7 +88,7 @@ public void userDefinedPos() throws IOException { TestDictionary td = TestDictionary.INSTANCE; config = td.user0Cfg().addUserDictionary(td.getUserDict2()).addUserDictionary(td.getUserDict1()); - try (Dictionary dict = new DictionaryFactory().create(config)) { + try (Dictionary dict = Dictionary.load(config)) { Tokenizer tokenizer = dict.tokenizer(); List morphs = tokenizer.tokenize("すだちかぼす"); assertThat(morphs.size(), is(2)); diff --git a/src/test/java/com/worksap/nlp/sudachi/dictionary/build/UserDicTest.kt b/src/test/java/com/worksap/nlp/sudachi/dictionary/build/UserDicTest.kt index 8d64de2c..5601cff6 100644 --- a/src/test/java/com/worksap/nlp/sudachi/dictionary/build/UserDicTest.kt +++ b/src/test/java/com/worksap/nlp/sudachi/dictionary/build/UserDicTest.kt @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2022 Works Applications Co., Ltd. + * Copyright (c) 2017-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -71,7 +71,7 @@ class TestDic { fun load(): Dictionary { val config = Config.fromClasspath(config).systemDictionary(systemDic) userDics.forEach { config.addUserDictionary(it) } - return DictionaryFactory().create(config) + return Dictionary.load(config) } }