From 8b1b4c4f9a385a9f4e669a96fd7d5d2cccc8faff Mon Sep 17 00:00:00 2001 From: Seonho Kim Date: Sat, 1 Dec 2018 17:42:24 +0900 Subject: [PATCH 1/3] change midje to clojure.test --- project.clj | 3 - test/open_korean_text_4clj/core_test.clj | 126 +++++++++++++---------- 2 files changed, 69 insertions(+), 60 deletions(-) diff --git a/project.clj b/project.clj index 8bbfc35..c02bbf6 100644 --- a/project.clj +++ b/project.clj @@ -7,9 +7,6 @@ :dependencies [[org.clojure/clojure "1.9.0"] [org.openkoreantext/open-korean-text "2.3.0"]] - :profiles {:dev {:dependencies [[midje "1.9.1"]] - :plugins [[lein-midje "3.2.1"]]}} - :repl-options {:init-ns open-korean-text-4clj.core} :lein-release {:deploy-via :clojars}) diff --git a/test/open_korean_text_4clj/core_test.clj b/test/open_korean_text_4clj/core_test.clj index f814666..c08a220 100644 --- a/test/open_korean_text_4clj/core_test.clj +++ b/test/open_korean_text_4clj/core_test.clj @@ -1,82 +1,94 @@ (ns open-korean-text-4clj.core-test - (:require [midje.sweet :refer :all] + (:require [clojure.test :refer :all] [open-korean-text-4clj.core :refer :all]) (:import [org.openkoreantext.processor KoreanPosJava])) -(fact "test normalize" - (normalize "한국어를 처리하는 예시입니닼ㅋㅋㅋㅋㅋ") => "한국어를 처리하는 예시입니다ㅋㅋㅋ") +(deftest normalize-test + (is (= (normalize "한국어를 처리하는 예시입니닼ㅋㅋㅋㅋㅋ") + "한국어를 처리하는 예시입니다ㅋㅋㅋ"))) -(facts "test tokenize" +(deftest tokenize-test + (testing "default operation" + (let [tokens (tokenize "한국어를 처리하는 예시입니닼ㅋㅋ")] + (is (= (-> tokens count) + 8)) + (is (= (-> tokens (get 6) :text) + "닼")))) - (fact "default operation" - (let [tokens (tokenize "한국어를 처리하는 예시입니닼ㅋㅋ")] - (-> tokens count) => 8 - (-> tokens (get 6) :text) => "닼")) + (testing "with normalization" + (let [tokens (tokenize "한국어를 처리하는 예시입니닼ㅋㅋ" :norm true)] + (is (= (-> tokens count) + 7)) + (is (= (-> tokens (get 5) :text) + "입니다")))) - (fact "with normalization" - (let [tokens (tokenize "한국어를 처리하는 예시입니닼ㅋㅋ" :norm true)] - (-> tokens count) => 7 - (-> tokens (get 5) :text) => "입니다")) + (testing "with normalization & stemming" + (let [tokens (tokenize "한국어를 처리하는 예시입니닼ㅋㅋ" :norm true :stem true)] + (is (= (-> tokens count) + 7)) + (is (= (-> tokens (get 5) :text) + "이다")))) - (fact "with normalization & stemming" - (let [tokens (tokenize "한국어를 처리하는 예시입니닼ㅋㅋ" :norm true :stem true)] - (-> tokens count) => 7 - (-> tokens (get 5) :text) => "이다")) + (testing "as-strs (return texts only)" + (is (.contains (tokenize "한국어를 처리하는 예시입니닼ㅋㅋ" :as-strs true) + "닼")) - (fact "as-strs (return texts only)" - (tokenize "한국어를 처리하는 예시입니닼ㅋㅋ" :as-strs true) - => (contains "닼") + (is (.contains (tokenize "한국어를 처리하는 예시입니닼ㅋㅋ" :as-strs true :norm true) + "입니다")) - (tokenize "한국어를 처리하는 예시입니닼ㅋㅋ" :as-strs true :norm true) - => (contains "입니다") + (is (.contains (tokenize "한국어를 처리하는 예시입니닼ㅋㅋ" :as-strs true :norm true :stem true) + "이다")))) - (tokenize "한국어를 처리하는 예시입니닼ㅋㅋ" :as-strs true :norm true :stem true) - => (contains "이다"))) +(deftest tokenize-top-n-test + (let [tokens (tokenize-top-n "대선 후보" 3)] + (is (= (-> tokens count) + 2)) + (is (= (-> tokens first first first :text) + "대선")))) -(fact "tokenize-top-n" - (let [tokens (tokenize-top-n "대선 후보" 3)] - (-> tokens count) => 2 - (-> tokens first first first :text) => "대선")) -(fact "detokenize" - (let [s (detokenize ["연세", "대학교", "보건", "대학원","에","오신","것","을","환영","합니다", "!"])] - s => (contains "연세대학교 보건 대학원") - s => (contains "환영합니다"))) +(deftest detokenize-test + (let [s (detokenize ["연세", "대학교", "보건", "대학원","에","오신","것","을","환영","합니다", "!"])] + (is (.contains s "연세대학교 보건 대학원")) + (is (.contains s "환영합니다!")))) -(facts "extract-phrases" - (fact "default operation" - (let [phrases (extract-phrases "한국어를 처리하는 예시입니닼ㅋㅋ")] - (-> phrases count) => 4 - (-> phrases (get 2) :text) => "처리하는 예시")) +(deftest extract-phrases-test + (testing "default operation" + (let [phrases (extract-phrases "한국어를 처리하는 예시입니닼ㅋㅋ")] + (is (= (-> phrases count) + 4)) + (is (= (-> phrases (get 2) :text) + "처리하는 예시")))) - (fact "as-strs (return texts only)" - (extract-phrases "한국어를 처리하는 예시입니닼ㅋㅋ" :as-strs true) - => (contains "처리하는 예시"))) + (testing "as-strs (return texts only)" + (is (.contains (extract-phrases "한국어를 처리하는 예시입니닼ㅋㅋ" :as-strs true) + "처리하는 예시")))) -(fact "split-sentences" - (-> (split-sentences "가을이다! 남자는 가을을 탄다...... 그렇지? 루루야! 버버리코트 사러 가자!!!!") - (get 1) - :text) - => - "남자는 가을을 탄다......") +(deftest split-sentences-test + (is (= (-> (split-sentences "가을이다! 남자는 가을을 탄다...... 그렇지? 루루야! 버버리코트 사러 가자!!!!") + (get 1) + :text) + "남자는 가을을 탄다......"))) -(fact "add-nouns-to-dictionary" - (-> (tokenize "불방망이") (get 0) :text) - => "불" - (add-nouns-to-dictionary ["불방망이"]) +(deftest add-nouns-to-dictionary-test + (is (= (-> (tokenize "불방망이") (get 0) :text) + "불")) - (-> (tokenize "불방망이") (get 0) :text) - => "불방망이") + (add-nouns-to-dictionary ["불방망이"]) -(fact "add-words-to-dictionary" - (-> (tokenize "그라믄 당신 먼저 얼렁 가이소") (get 0) :text) - => "그" + (is (= (-> (tokenize "불방망이") (get 0) :text) + "불방망이"))) - (add-words-to-dictionary KoreanPosJava/Conjunction ["그라믄"]) - (-> (tokenize "그라믄 당신 먼저 얼렁 가이소") (get 0) :text) - => "그라믄") +(deftest add-words-to-dictionary-test + (is (= (-> (tokenize "그라믄 당신 먼저 얼렁 가이소") (get 0) :text) + "그")) + + (add-words-to-dictionary KoreanPosJava/Conjunction ["그라믄"]) + + (is (= (-> (tokenize "그라믄 당신 먼저 얼렁 가이소") (get 0) :text) + "그라믄"))) From 38053b486171cf0d8595d7bc04a482bdcc544e5d Mon Sep 17 00:00:00 2001 From: Seonho Kim Date: Sat, 1 Dec 2018 17:43:56 +0900 Subject: [PATCH 2/3] upgrade open-korean-text version to 2.3.1 --- project.clj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project.clj b/project.clj index c02bbf6..300d6ee 100644 --- a/project.clj +++ b/project.clj @@ -5,7 +5,7 @@ :url "http://www.eclipse.org/legal/epl-v10.html"} :dependencies [[org.clojure/clojure "1.9.0"] - [org.openkoreantext/open-korean-text "2.3.0"]] + [org.openkoreantext/open-korean-text "2.3.1"]] :repl-options {:init-ns open-korean-text-4clj.core} From 9206ee55d6c0cfee2f2d8c7c4ad50ca74f42e24a Mon Sep 17 00:00:00 2001 From: Seonho Kim Date: Sat, 1 Dec 2018 17:52:29 +0900 Subject: [PATCH 3/3] update to relase-0.3 --- README.md | 6 +++--- project.clj | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index df8b3ce..ca67956 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ A [Open Korean Text Processor](https://github.com/open-korean-text/open-korean-t ## Dependencies -* [org.openkoreantext/open-korean-text 2.3.0](https://github.com/open-korean-text/open-korean-text/releases/tag/open-korean-text-2.3.0) +* [org.openkoreantext/open-korean-text 2.3.1](https://github.com/open-korean-text/open-korean-text/releases/tag/open-korean-text-2.3.1) ## Get Started @@ -17,7 +17,7 @@ A [Open Korean Text Processor](https://github.com/open-korean-text/open-korean-t [Leiningen](https://leiningen.org) dependency in `project.clj` (from [Clojars](https://clojars.org/open-korean-text-4clj)): ```clojure -[open-korean-text-4clj "0.2.5"] +[open-korean-text-4clj "0.3"] ``` [Maven](http://maven.apache.org/) dependency information in pom.xml: @@ -26,7 +26,7 @@ A [Open Korean Text Processor](https://github.com/open-korean-text/open-korean-t open-korean-text-4clj open-korean-text-4clj - 0.2.5 + 0.3 ``` diff --git a/project.clj b/project.clj index 300d6ee..489f7f4 100644 --- a/project.clj +++ b/project.clj @@ -1,4 +1,4 @@ -(defproject open-korean-text-4clj "0.2.5" +(defproject open-korean-text-4clj "0.3" :description "Open Korean Text Processor wrapper for Clojure" :url "http://github.com/open-korean-text/open-korean-text-4clj" :license {:name "Eclipse Public License"