diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 0000000..d645695
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,202 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..3ad82fe
--- /dev/null
+++ b/README.md
@@ -0,0 +1,466 @@
+# 玻森数据中文分析器Elasticsearch插件 (Beta版)
+
+##概述
+Elasticsearch 是一个基于 lucene 的强大搜索服务器,也是企业最受欢迎的搜索引擎之一。对于中文搜索,Elasticsearch 自带的标准分析器(Standard Analyzer)显然不能满足大家的要求。因此[玻森数据](http://bosonnlp.com)开发了一款基于玻森中文分词的 Elasticsearch 的插件(BosonNLP Analysis for Elasticsearch)方便大家准确的使用中文搜索。
+
+##安装
+
+###依赖
+Elasticsearch 官网安装说明 https://www.elastic.co/guide/en/elasticsearch/guide/1.x/_installing_elasticsearch.html
+
+###选择插件版本
+其对应的版本号和插件版本号如下:
+
+| BosonNLP version | ES Version |
+| :--------------: | :---------:|
+| 1.3.0-beta | 2.2.0 |
+| 1.2.2-beta | 2.1.2 |
+| 1.2.1-beta | 2.1.1 |
+| 1.2.0-beta | 2.1.0 |
+| 1.1.0-beta | 2.0.0 |
+| 1.0.0-beta | 1.7.x |
+
+###安装插件
+现在提供以下两种方式安装插件。
+
+####方法一
+```markdown
+TO DO download from github
+
+```
+####方法二
+
+1. 构建项目包
+
+ 下载玻森中文分析器项目到本地,并在项目根目录下通过 Maven 构建项目包:
+
+ ```
+ mvn clean package
+ ```
+
+ 构建后的项目包`elasticsearch-analysis-bosonnlp-{version}-plugin.zip`在`target/releases/`生成。
+
+2. 安装插件
+
+ 通过 Elasticsearch 的 plugin 加载插件,在 Elasticsearch 根目录执行以下命令即可:
+
+ ```
+ $ sudo bin/plugin install file:/root/path/to/your/elasticsearch-analysis-bosonnlp-{version}-plugin.zip
+ ```
+
+###设置
+
+运行 Elasticsearch 之前需要在 config 文件夹中修改`elasticsearch.yml`来定义使用玻森中文分析器,并填写玻森 API_TOKEN 以及玻森分词 API 的地址,即在该文件结尾处添加:
+
+```makefile
+index:
+ analysis:
+ analyzer:
+ bosonnlp:
+ type: bosonnlp
+ API_URL: http://api.bosonnlp.com/tag/analysis
+ # You MUST give the API_TOKEN value, otherwise it doesn't work
+ API_TOKEN: *PUT YOUR API TOKEN HERE*
+ # Please uncomment if you want to specify ANY ONE of the following
+ # areguments, otherwise the DEFAULT value will be used, i.e.,
+ # space_mode is 0,
+ # oov_level is 3,
+ # t2s is 0,
+ # special_char_conv is 0.
+ # More detials can be found in bosonnlp docs:
+ # http://docs.bosonnlp.com/tag.html
+ #
+ #
+ # space_mode: put your value here(range from 0-3)
+ # oov_level: put your value here(range from 0-4)
+ # t2s: put your value here(range from 0-1)
+ # special_char_conv: put your value here(range from 0-1)
+```
+**需要注意的是**
+
+1. `必须在 API_URL 填写给定的分词地址以及在API_TOKEN:*PUT YOUR API TOKEN HERE* 中填写给定的玻森数据API_TOKEN`,否则无法使用玻森中文分析器。该 API_TOKEN 是[注册玻森数据账号](http://bosonnlp.com/)所获得。
+
+2. 如果配置文件中已经有配置过其他的 analyzer,请直接在 analyzer 下如上添加 bosonnlp analyzer。
+
+3. 如果有多个 node 并且都需要 BosonNLP 的分词插件,则每个 node 下的 yml 文件都需要如上安装和设置。
+
+4. 另外,玻森中文分词还提供了4个参数(*space_mode*,*oov_level*,*t2s*,*special_char_conv*)可满足不同的分词需求。如果取默认值,则无需任何修改;否则,可取消对应参数的注释并赋值。
+
+> 例:需开启繁体转换成简体(*t2s*)功能,则取消*t2s*的注释并赋值。
+```makefile
+ t2s: 1
+```
+
+更多关于玻森中文分词参数的信息,可以在此[了解](http://docs.bosonnlp.com/tag.html)。
+
+设置完之后就可以运行 Elasticsearch 了,如果对该设置有新的改动,需要重启 Elasticsearch 才可生效。
+
+###测试
+
+####分词测试
+运行 Elasiticsearch
+
+显示插件加载成功
+```
+...
+[time][INFO ][plugins] [Gaza] loaded [analysis-bosonnlp]
+...
+```
+建立 index
+```markdown
+curl -XPUT 'localhost:9200/test'
+```
+测试分析器是否配置成功
+```markdown
+curl -XGET 'localhost:9200/test/_analyze?analyzer=bosonnlp&pretty' -d '这是玻森数据分词的测试'
+```
+结果
+```json
+{
+ "tokens" : [ {
+ "token" : "这",
+ "start_offset" : 0,
+ "end_offset" : 1,
+ "type" : "word",
+ "position" : 0
+ }, {
+ "token" : "是",
+ "start_offset" : 1,
+ "end_offset" : 2,
+ "type" : "word",
+ "position" : 1
+ }, {
+ "token" : "玻森",
+ "start_offset" : 2,
+ "end_offset" : 4,
+ "type" : "word",
+ "position" : 2
+ }, {
+ "token" : "数据",
+ "start_offset" : 4,
+ "end_offset" : 6,
+ "type" : "word",
+ "position" : 3
+ }, {
+ "token" : "分词",
+ "start_offset" : 6,
+ "end_offset" : 8,
+ "type" : "word",
+ "position" : 4
+ }, {
+ "token" : "的",
+ "start_offset" : 8,
+ "end_offset" : 9,
+ "type" : "word",
+ "position" : 5
+ }, {
+ "token" : "测试",
+ "start_offset" : 9,
+ "end_offset" : 11,
+ "type" : "word",
+ "position" : 6
+ } ]
+}
+
+```
+
+####搜索测试
+建立 mapping
+```markdown
+curl -XPUT 'localhost:9200/test/text/_mapping' -d'
+{
+ "text": {
+ "properties": {
+ "content": {
+ "type": "string",
+ "analyzer": "bosonnlp",
+ "search_analyzer": "bosonnlp"
+ }
+ }
+ }
+}
+```
+输入数据
+```markdown
+curl -XPUT 'localhost:9200/test/text/1' -d'
+{"content": "美称中国武器商很神秘 花巨资海外参展却一言不发"}
+'
+```
+```markdown
+curl -XPUT 'localhost:9200/test/text/2' -d'
+{"content": "复旦发现江浙沪儿童体内普遍有兽用抗生素"}
+'
+```
+```markdown
+curl -XPUT 'localhost:9200/test/text/3' -d'
+{"content": "37年后重启顶层设计 中国未来城市发展料现四大变化"}
+'
+```
+查询搜索
+```markdown
+curl -XPOST 'localhost:9200/test/text/_search?pretty' -d'
+{
+ "query" : { "term" : { "content" : "中国" }}
+}
+'
+```
+结果
+```json
+{
+ "took" : 1,
+ "timed_out" : false,
+ "_shards" : {
+ "total" : 5,
+ "successful" : 5,
+ "failed" : 0
+ },
+ "hits" : {
+ "total" : 2,
+ "max_score" : 0.076713204,
+ "hits" : [ {
+ "_index" : "test",
+ "_type" : "text",
+ "_id" : "1",
+ "_score" : 0.076713204,
+ "_source":
+{
+ "content": "美称中国武器商很神秘 花巨资海外参展却一言不发"}
+ }, {
+ "_index" : "test",
+ "_type" : "text",
+ "_id" : "3",
+ "_score" : 0.076713204,
+ "_source":
+{
+ "content": "37年后重启顶层设计 中国未来城市发展料现四大变化"}
+ } ]
+ }
+}
+```
+查询搜索
+```markdown
+curl -XPOST 'localhost:9200/test/text/_search?pretty' -d'
+{
+ "query" : { "term" : { "content" : "国武" }}
+}'
+```
+结果
+```json
+{
+ "took" : 1,
+ "timed_out" : false,
+ "_shards" : {
+ "total" : 5,
+ "successful" : 5,
+ "failed" : 0
+ },
+ "hits" : {
+ "total" : 0,
+ "max_score" : null,
+ "hits" : [ ]
+ }
+}
+
+```
+查询搜索
+```markdown
+curl -XPOST 'localhost:9200/test/text/_search?pretty' -d'
+{
+ "query" : { "term" : { "content" : "国" }}
+}'
+```
+结果
+```json
+{
+ "took" : 1,
+ "timed_out" : false,
+ "_shards" : {
+ "total" : 5,
+ "successful" : 5,
+ "failed" : 0
+ },
+ "hits" : {
+ "total" : 0,
+ "max_score" : null,
+ "hits" : [ ]
+ }
+}
+
+```
+如果用 ES 默认的分析器(Standard Analyzer)去查询,得到如下结果:
+
+查询搜索
+```markdown
+curl -XPOST 'localhost:9200/test/text/_search?pretty' -d'
+{
+ "query" : { "term" : { "content" : "国" }}
+}'
+```
+结果
+```json
+{
+ "took" : 8,
+ "timed_out" : false,
+ "_shards" : {
+ "total" : 5,
+ "successful" : 5,
+ "failed" : 0
+ },
+ "hits" : {
+ "total" : 2,
+ "max_score" : 0.057534903,
+ "hits" : [ {
+ "_index" : "test3",
+ "_type" : "text",
+ "_id" : "1",
+ "_score" : 0.057534903,
+ "_source":
+{"content": "美称中国武器商很神秘 花巨资海外参展却一言不发"}
+ }, {
+ "_index" : "test3",
+ "_type" : "text",
+ "_id" : "3",
+ "_score" : 0.057534903,
+ "_source":
+{"content": "37年后重启顶层设计 中国未来城市发展料现四大变化"}
+
+ } ]
+ }
+}
+
+```
+查询搜索
+```markdown
+curl -XPOST 'localhost:9200/test3/text/_search?pretty' -d '
+{
+ "query": {"term":{"content":"中国"}}
+}'
+```
+结果
+```json
+{
+ "took" : 14,
+ "timed_out" : false,
+ "_shards" : {
+ "total" : 5,
+ "successful" : 5,
+ "failed" : 0
+ },
+ "hits" : {
+ "total" : 0,
+ "max_score" : null,
+ "hits" : [ ]
+ }
+}
+
+```
+
+###配置 Token Filter
+现有的 BosonNLP 分析器没有内置 token filter,如果有过滤 Token 的需求,可以利用 BosonNLP Tokenizer 和 ES 提供的 token filter 搭建定制分析器。
+
+####步骤
+配置定制的 analyzer 有以下三个步骤:
+
+- 添加 BosonNLP tokenizer
+
+在 `elasticsearch.yml` 文件中 analysis 下添加 tokenizer, 并在 tokenizer 中添加 BosonNLP tokenizer 的配置:
+```makefile
+index:
+ analysis:
+ analyzer:
+ ...
+ tokenizer:
+ bosonnlp:
+ type: bosonnlp
+ API_URL: http://api.bosonnlp.com/tag/analysis
+ # You MUST give the API_TOKEN value, otherwise it doesn't work
+ API_TOKEN: *PUT YOUR API TOKEN HERE*
+ # Please uncomment if you want to specify ANY ONE of the following
+ # areguments, otherwise the DEFAULT value will be used, i.e.,
+ # space_mode is 0,
+ # oov_level is 3,
+ # t2s is 0,
+ # special_char_conv is 0.
+ # More detials can be found in bosonnlp docs:
+ # http://docs.bosonnlp.com/tag.html
+ #
+ #
+ # space_mode: put your value here(range from 0-3)
+ # oov_level: put your value here(range from 0-4)
+ # t2s: put your value here(range from 0-1)
+ # special_char_conv: put your value here(range from 0-1)
+```
+
+**同样需要注意的是**
+
+1. `必须在 API_URL 中填写给定的分词地址以及在 API_TOKEN:*PUT YOUR API TOKEN HERE* 中填写给定的玻森数据API_TOKEN`,否则无法使用玻森 tokenizer。
+2. 如果配置文件中已经有配置过其他的 tokenizer,请直接在 tokenizer 下如上添加 bosonnlp tokenizer。
+3. 如果需要改动参数的默认值,请可取消对应参数的注释并赋值。
+
+- 添加 token filter
+
+在 `elasticsearch.yml` 文件中 analysis 下添加 filter, 并在 filter 中添加所需 filter 的配置(下面例子中,我们以 lowercase filter 为例):
+```makefile
+index:
+ analysis:
+ analyzer:
+ ...
+ tokenizer:
+ ...
+ filter:
+ lowercase:
+ type: lowercase
+
+```
+
+- 添加定制的 anayzer
+
+在 `elasticsearch.yml` 文件中 analysis 下添加 analyzer, 并在 analyzer 中添加定制的 analyzer 的配置(下面例子中,我们把定制的 analyzer 命名为 filter_bosonnlp):
+```markdown
+index:
+ analysis:
+ analyzer:
+ ...
+ filter_bosonnlp:
+ type: custom
+ tokenizer: bosonnlp
+ filter: [lowercase]
+```
+如有其他想要添加的 filter,可以在配置完 filter 之后在上述 filter:[] 列表中添加,以逗号隔开。
+
+附上完整的定制 analyzer 配置:
+```makefile
+index:
+ analysis:
+ analyzer:
+ filter_bosonnlp:
+ type: custom
+ tokenizer: bosonnlp
+ filter: [lowercase]
+ tokenizer:
+ bosonnlp:
+ type: bosonnlp
+ API_URL: http://api.bosonnlp.com/tag/analysis
+ # You MUST give the API_TOKEN value, otherwise it doesn't work
+ API_TOKEN: *PUT YOUR API TOKEN HERE*
+ # Please uncomment if you want to specify ANY ONE of the following
+ # areguments, otherwise the DEFAULT value will be used, i.e.,
+ # space_mode is 0,
+ # oov_level is 3,
+ # t2s is 0,
+ # special_char_conv is 0.
+ # More detials can be found in bosonnlp docs:
+ # http://docs.bosonnlp.com/tag.html
+ #
+ #
+ # space_mode: put your value here(range from 0-3)
+ # oov_level: put your value here(range from 0-4)
+ # t2s: put your value here(range from 0-1)
+ # special_char_conv: put your value here(range from 0-1)
+ filter:
+ lowercase:
+ type: lowercase
+
+```
+## 注意
+由于 ES 搜索内核 Lucene 索引文件的设计结构所限,每个文档的每个字段必须单独分析, 无法采用 BosonNLP 的批处理调用,从而在 Network IO 上会有较大的时间开销。
diff --git a/pom.xml b/pom.xml
new file mode 100644
index 0000000..8b38abc
--- /dev/null
+++ b/pom.xml
@@ -0,0 +1,167 @@
+
+
+ 4.0.0
+ org.elasticsearch
+ elasticsearch-analysis-bosonnlp
+ jar
+ 1.3.0-beta
+ elasticsearch-analysis-bosonnlp
+ http://maven.apache.org
+ BosonNLP Analyzer for ElasticSearch
+
+
+ 1.7
+ 1.7
+ 2.2.0
+ ${project.basedir}/src/main/assemblies/plugin.xml
+ elasticsearch-analysis-bosonnlp
+ org.elasticsearch.plugin.analysis.bosonnlp.AnalysisBosonNLPPlugin
+ true
+ false
+ UTF-8
+
+
+
+
+ The Apache Software License, Version 2.0
+ http://www.apache.org/licenses/LICENSE-2.0.txt
+ repo
+
+
+
+
+
+ junit
+ junit
+ 4.10
+ test
+
+
+ org.elasticsearch
+ elasticsearch
+ ${elasticsearch.version}
+ compile
+
+
+ org.json
+ json
+ 20151123
+ compile
+
+
+ com.mashape.unirest
+ unirest-java
+ 1.4.5
+ compile
+
+
+ org.apache.httpcomponents
+ httpclient
+ 4.5.1
+
+
+ org.apache.httpcomponents
+ httpcore
+ 4.4.4
+
+
+ org.apache.httpcomponents
+ httpcore-nio
+ 4.4.4
+
+
+ org.apache.httpcomponents
+ httpmime
+ 4.5.1
+
+
+ org.apache.httpcomponents
+ httpasyncclient
+ 4.1.1
+
+
+ commons-logging
+ commons-logging
+ 1.2
+
+
+ log4j
+ log4j
+ 1.2.16
+ runtime
+
+
+ commons-codec
+ commons-codec
+ 1.10
+
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-eclipse-plugin
+ 2.9
+
+ true
+ true
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ 2.3.2
+
+ ${jdk.version}
+ ${jdk.version}
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-source-plugin
+ 2.1.2
+
+
+ attach-sources
+
+ jar-no-fork
+
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-assembly-plugin
+
+ ${project.build.directory}/releases/
+
+ ${basedir}/src/main/assemblies/plugin.xml
+
+
+
+ fully.qualified.MainClass
+
+
+
+
+
+ package
+
+ single
+
+
+
+
+
+
+
+
diff --git a/src/main/assemblies/plugin.xml b/src/main/assemblies/plugin.xml
new file mode 100644
index 0000000..0feb182
--- /dev/null
+++ b/src/main/assemblies/plugin.xml
@@ -0,0 +1,25 @@
+
+
+
+
+ zip
+
+ false
+
+
+ /
+ true
+ false
+
+ org.elasticsearch:elasticsearch
+
+
+
+
+
+
+ ${project.basedir}/src/main/resources/plugin-descriptor.properties
+ true
+
+
+
diff --git a/src/main/java/org/bosonnlp/analyzer/core/BosonNLPWordSegmenter.java b/src/main/java/org/bosonnlp/analyzer/core/BosonNLPWordSegmenter.java
new file mode 100644
index 0000000..0910c79
--- /dev/null
+++ b/src/main/java/org/bosonnlp/analyzer/core/BosonNLPWordSegmenter.java
@@ -0,0 +1,150 @@
+/**
+ * BosonNLP word segmenter release 0.8.2
+ * 玻森中文分词 版本 0.8.2
+ */
+package org.bosonnlp.analyzer.core;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.Reader;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+
+import org.elasticsearch.common.logging.ESLogger;
+import org.elasticsearch.common.logging.ESLoggerFactory;
+import org.json.JSONArray;
+import org.json.JSONException;
+import org.json.JSONObject;
+
+import com.mashape.unirest.http.HttpResponse;
+import com.mashape.unirest.http.JsonNode;
+import com.mashape.unirest.http.Unirest;
+import com.mashape.unirest.http.exceptions.UnirestException;
+
+public final class BosonNLPWordSegmenter {
+
+ private String TAG_URL;
+ private String BOSONNLP_API_TOKEN;
+ private int spaceMode;
+ private int oovLevel;
+ private int t2s;
+ private int specialCharConv;
+
+ private List words = new ArrayList();
+ private Iterator wordsIter = Collections.emptyIterator();
+ private Reader input;
+
+ private ESLogger logger = ESLoggerFactory.getLogger("bosonnlp plugin");
+
+ public BosonNLPWordSegmenter(Reader input, String URL, String BAT, int spaceMode, int oovLevel, int t2s, int specialCharConv)
+ throws IOException, JSONException, UnirestException {
+ this.input = input;
+ this.TAG_URL = URL;
+ this.BOSONNLP_API_TOKEN = BAT;
+ this.spaceMode = spaceMode;
+ this.oovLevel = oovLevel;
+ this.t2s = t2s;
+ this.specialCharConv = specialCharConv;
+ }
+
+ /**
+ * Get the input string
+ *
+ * @param input
+ * @return
+ * @throws IOException
+ */
+ public String getStringText(Reader input) throws IOException {
+ StringBuffer target = new StringBuffer();
+ try (BufferedReader br = new BufferedReader(input)) {
+ String temp;
+ while ((temp = br.readLine()) != null) {
+ target.append(temp + "\n");
+ }
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ return target.toString();
+ }
+
+ /**
+ * Call BosonNLP word segmenter API via Java library Unirest.
+ *
+ * @param target, the text to be processed
+ * @throws JSONException
+ * @throws UnirestException
+ * @throws IOException
+ */
+ public void segment(String target) throws JSONException, UnirestException, IOException {
+ // Clean the word token
+ this.words.clear();
+ // Get the new word token of target
+ String body = new JSONArray(new String[] { target }).toString();
+ HttpResponse jsonResponse = Unirest.post(this.TAG_URL)
+ .queryString("space_mode", this.spaceMode)
+ .queryString("oov_level", this.oovLevel)
+ .queryString("t2s", this.t2s)
+ .queryString("special_char_conv", this.specialCharConv)
+ .header("Accept", "application/json")
+ .header("X-Token", this.BOSONNLP_API_TOKEN).body(body).asJson();
+
+ makeToken(jsonResponse.getBody());
+ }
+
+ /**
+ * Get the token result from BosonNLP word segmenter.
+ *
+ * @param jn
+ */
+ private void makeToken(JsonNode jn) {
+ try {
+ // Get Json-array as it encoded before
+ JSONArray jaTemp = jn.getArray();
+ if (jaTemp.length() > 0) {
+ JSONObject jo = jaTemp.getJSONObject(0);
+ if (jo != null && jo.has("word")) {
+ JSONArray ja = jo.getJSONArray("word");
+
+ for (int i = 0; i < ja.length(); i++) {
+ this.words.add(ja.get(i).toString());
+ }
+ } else {
+ logger.error("Check the validation of your API TOKEN or internet",
+ new UnirestException(jo.toString()), jo);
+ throw new RuntimeException("Check validation of API TOKEN or internet: " + jo.toString());
+ }
+ } else {
+ logger.info("No string input", jaTemp);
+ }
+
+ } catch (JSONException e) {
+ logger.error("JSONException", e, e);
+ throw new RuntimeException("JSONException");
+ } finally {
+ // Assign to words iterator
+ this.wordsIter = this.words.iterator();
+ }
+ }
+
+ public void reset(Reader input) throws IOException, JSONException, UnirestException {
+ // Reset input
+ setInput(input);
+ String target = getStringText(input);
+ // Do segmentation
+ segment(target);
+ }
+
+ public Reader getInput() {
+ return input;
+ }
+
+ public void setInput(Reader input) {
+ this.input = input;
+ }
+
+ public Iterator getWordsIter() {
+ return this.wordsIter;
+ }
+}
diff --git a/src/main/java/org/bosonnlp/analyzer/lucene/BosonNLPAnalyzer.java b/src/main/java/org/bosonnlp/analyzer/lucene/BosonNLPAnalyzer.java
new file mode 100644
index 0000000..be23a1c
--- /dev/null
+++ b/src/main/java/org/bosonnlp/analyzer/lucene/BosonNLPAnalyzer.java
@@ -0,0 +1,58 @@
+/**
+ * 玻森数据 中文分词 版本 0.8.2
+ *
+ */
+package org.bosonnlp.analyzer.lucene;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.Tokenizer;
+import org.json.JSONException;
+
+import com.mashape.unirest.http.exceptions.UnirestException;
+
+
+/**
+ * Implementation of Bosonnlp word segmenter
+ * on Lucene Analyzer interface
+ */
+public final class BosonNLPAnalyzer extends Analyzer{
+
+ private int spaceMode = 0;
+ private int oovLevel = 3;
+ private int t2s = 0;
+ private int specialCharConv = 0;
+ private String BOSONNLP_API_TOKEN;
+ private String TAG_URL;
+
+ public BosonNLPAnalyzer(String URL, String BAT){
+ super();
+ this.TAG_URL = URL;
+ this.BOSONNLP_API_TOKEN = BAT;
+ }
+
+ public BosonNLPAnalyzer(String URL, String BAT, int spaceMode, int oovLevel, int t2s, int specialCharConv){
+ super();
+ this.TAG_URL = URL;
+ this.BOSONNLP_API_TOKEN = BAT;
+ this.spaceMode = spaceMode;
+ this.oovLevel = oovLevel;
+ this.t2s = t2s;
+ this.specialCharConv = specialCharConv;
+ }
+
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName){
+ Tokenizer BTokenizer = null;
+ try {
+
+ BTokenizer = new BosonNLPTokenizer(TAG_URL, BOSONNLP_API_TOKEN, spaceMode,
+ oovLevel, t2s, specialCharConv);
+ } catch (IOException | JSONException | UnirestException e) {
+ e.printStackTrace();
+ }
+ return new TokenStreamComponents(BTokenizer);
+ }
+
+}
diff --git a/src/main/java/org/bosonnlp/analyzer/lucene/BosonNLPTokenizer.java b/src/main/java/org/bosonnlp/analyzer/lucene/BosonNLPTokenizer.java
new file mode 100644
index 0000000..0acce5a
--- /dev/null
+++ b/src/main/java/org/bosonnlp/analyzer/lucene/BosonNLPTokenizer.java
@@ -0,0 +1,103 @@
+/**
+ * BosonNLP word segmenter version 0.8.2
+ * 玻森中文分词 版本 0.8.2
+ */
+package org.bosonnlp.analyzer.lucene;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.bosonnlp.analyzer.core.BosonNLPWordSegmenter;
+import org.json.JSONException;
+
+import com.mashape.unirest.http.exceptions.UnirestException;
+
+/**
+ * Implementation of BosonNLP word segmenter on Lucene Tokenizer interface
+ *
+ */
+public final class BosonNLPTokenizer extends Tokenizer {
+ // bosonnlp word segmenter
+ private BosonNLPWordSegmenter BosonSeg;
+ private Iterator wordToken;
+ // Attributes to be added
+ private final CharTermAttribute charTermAttr;
+ private final OffsetAttribute offsetAttr;
+ private final TypeAttribute typeAttr;
+ private final PositionIncrementAttribute piAttr;
+
+ // others
+ private int endPosition = -1;
+ private int extraIncrement = 0;
+
+ /**
+ * Lucene constructor
+ *
+ * @throws UnirestException
+ * @throws JSONException
+ * @throws IOException
+ */
+ public BosonNLPTokenizer(String URL, String BAT, int spaceMode, int oovLevel, int t2s, int specialCharConv)
+ throws IOException, JSONException, UnirestException {
+ super();
+ // Add token offset attribute
+ offsetAttr = addAttribute(OffsetAttribute.class);
+ // Add token content attribute
+ charTermAttr = addAttribute(CharTermAttribute.class);
+ // Add token type attribute
+ typeAttr = addAttribute(TypeAttribute.class);
+ // Add token position attribute
+ piAttr = addAttribute(PositionIncrementAttribute.class);
+ // Create a new word segmenter to get tokens
+ BosonSeg = new BosonNLPWordSegmenter(input, URL, BAT, spaceMode, oovLevel, t2s, specialCharConv);
+ }
+
+ @Override
+ public boolean incrementToken() throws IOException {
+ // clear all the attributes
+ clearAttributes();
+ if (wordToken.hasNext()) {
+ String word = wordToken.next();
+ piAttr.setPositionIncrement(extraIncrement + 1);
+ charTermAttr.append(word);
+ charTermAttr.setLength(word.length());
+ offsetAttr.setOffset(endPosition + 1, endPosition + word.length() + 1);
+ // The type can be extended later
+ typeAttr.setType("word");
+ endPosition += word.length();
+ return true;
+ }
+ // No more token
+ return false;
+ }
+
+ @Override
+ public void reset() throws IOException {
+ try {
+ super.reset();
+ BosonSeg.reset(input);
+ wordToken = BosonSeg.getWordsIter();
+ extraIncrement = 0;
+ endPosition = -1;
+ } catch (JSONException | UnirestException e) {
+ e.printStackTrace();
+ }
+ }
+
+ @Override
+ public final void end() throws IOException {
+ super.end();
+ if (endPosition < 0) {
+ endPosition = 0;
+ }
+ int finalOffset = correctOffset(endPosition);
+ offsetAttr.setOffset(finalOffset, finalOffset);
+ piAttr.setPositionIncrement(piAttr.getPositionIncrement() + extraIncrement);
+ }
+
+}
diff --git a/src/main/java/org/elasticsearch/index/analysis/BosonNLPAnalysisBinderProcessor.java b/src/main/java/org/elasticsearch/index/analysis/BosonNLPAnalysisBinderProcessor.java
new file mode 100644
index 0000000..3136c6d
--- /dev/null
+++ b/src/main/java/org/elasticsearch/index/analysis/BosonNLPAnalysisBinderProcessor.java
@@ -0,0 +1,21 @@
+package org.elasticsearch.index.analysis;
+
+import org.elasticsearch.index.analysis.AnalysisModule.AnalysisBinderProcessor;
+
+
+public class BosonNLPAnalysisBinderProcessor extends AnalysisBinderProcessor {
+
+ /*
+ * It simply adds our analyzer provider class to a list of bindings.
+ */
+ @Override
+ public void processAnalyzers(AnalyzersBindings analyzersBindings) {
+ analyzersBindings.processAnalyzer(BosonNLPAnalyzerProvider.NAME, BosonNLPAnalyzerProvider.class);
+ }
+
+ @Override
+ public void processTokenizers(TokenizersBindings tokenizersBindings) {
+ tokenizersBindings.processTokenizer(BosonNLPTokenizerFactory.NAME, BosonNLPTokenizerFactory.class);
+ }
+
+}
diff --git a/src/main/java/org/elasticsearch/index/analysis/BosonNLPAnalyzerProvider.java b/src/main/java/org/elasticsearch/index/analysis/BosonNLPAnalyzerProvider.java
new file mode 100644
index 0000000..a81a321
--- /dev/null
+++ b/src/main/java/org/elasticsearch/index/analysis/BosonNLPAnalyzerProvider.java
@@ -0,0 +1,44 @@
+package org.elasticsearch.index.analysis;
+
+import org.bosonnlp.analyzer.lucene.BosonNLPAnalyzer;
+import org.elasticsearch.common.inject.Inject;
+import org.elasticsearch.common.inject.assistedinject.Assisted;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.env.Environment;
+import org.elasticsearch.index.Index;
+import org.elasticsearch.index.settings.IndexSettingsService;
+
+public class BosonNLPAnalyzerProvider extends AbstractIndexAnalyzerProvider {
+ private final BosonNLPAnalyzer analyzer;
+ private String BOSONNLP_API_TOKEN;
+ private String TAG_URL;
+ private int spaceMode;
+ private int oovLevel;
+ private int t2s;
+ private int specialCharConv;
+
+ /*
+ * Name to associate with this class. It will be used in BinderProcesser
+ */
+ public static final String NAME = "bosonnlp";
+
+ @Inject
+ public BosonNLPAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
+
+ super(index, indexSettingsService.getSettings(), name, settings);
+ this.TAG_URL = settings.get("API_URL", "").toString();
+ this.BOSONNLP_API_TOKEN = settings.get("API_TOKEN", "").toString();
+ this.spaceMode = Integer.parseInt(settings.get("space_mode", "0"));
+ this.oovLevel = Integer.parseInt(settings.get("oov_level", "3"));
+ this.t2s = Integer.parseInt(settings.get("t2s", "0"));
+ this.specialCharConv = Integer.parseInt(settings.get("spechial_char_conv", "0"));
+
+ this.analyzer = new BosonNLPAnalyzer(TAG_URL, BOSONNLP_API_TOKEN, spaceMode, oovLevel, t2s, specialCharConv);
+
+ }
+
+ @Override
+ public BosonNLPAnalyzer get() {
+ return this.analyzer;
+ }
+}
diff --git a/src/main/java/org/elasticsearch/index/analysis/BosonNLPTokenizerFactory.java b/src/main/java/org/elasticsearch/index/analysis/BosonNLPTokenizerFactory.java
new file mode 100644
index 0000000..525cd4c
--- /dev/null
+++ b/src/main/java/org/elasticsearch/index/analysis/BosonNLPTokenizerFactory.java
@@ -0,0 +1,53 @@
+package org.elasticsearch.index.analysis;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.Tokenizer;
+import org.bosonnlp.analyzer.lucene.BosonNLPTokenizer;
+import org.elasticsearch.common.inject.Inject;
+import org.elasticsearch.common.inject.assistedinject.Assisted;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.index.Index;
+import org.elasticsearch.index.settings.IndexSettingsService;
+import org.json.JSONException;
+
+import com.mashape.unirest.http.exceptions.UnirestException;
+
+public class BosonNLPTokenizerFactory extends AbstractTokenizerFactory {
+ private final Settings settings;
+ private String BOSONNLP_API_TOKEN;
+ private String TAG_URL;
+ private int spaceMode;
+ private int oovLevel;
+ private int t2s;
+ private int specialCharConv;
+
+ // The name is associate with this class, which will be
+ // called in BinderProcesser
+ public static final String NAME = "bosonnlp";
+
+ @Inject
+ public BosonNLPTokenizerFactory(Index index, IndexSettingsService indexSettingsService, @Assisted String name, @Assisted Settings settings) {
+ super(index, indexSettingsService.getSettings(), name, settings);
+ this.settings = settings;
+ }
+
+ @Override
+ public Tokenizer create() {
+ TAG_URL = settings.get("API_URL", "").toString();
+ BOSONNLP_API_TOKEN = settings.get("API_TOKEN", "").toString();
+ BosonNLPTokenizer BTokenizer = null;
+ spaceMode = Integer.parseInt(settings.get("space_mode", "0"));
+ oovLevel = Integer.parseInt(settings.get("oov_level", "3"));
+ t2s = Integer.parseInt(settings.get("t2s", "0"));
+ specialCharConv = Integer.parseInt(settings.get("spechial_char_conv", "0"));
+
+ try {
+ BTokenizer = new BosonNLPTokenizer(TAG_URL, BOSONNLP_API_TOKEN, spaceMode, oovLevel, t2s, specialCharConv);
+ } catch (IOException | JSONException | UnirestException e) {
+ e.printStackTrace();
+ }
+ return BTokenizer;
+ }
+
+}
diff --git a/src/main/java/org/elasticsearch/indices/analysis/BosonNLPIndicesAnalysis.java b/src/main/java/org/elasticsearch/indices/analysis/BosonNLPIndicesAnalysis.java
new file mode 100644
index 0000000..13038df
--- /dev/null
+++ b/src/main/java/org/elasticsearch/indices/analysis/BosonNLPIndicesAnalysis.java
@@ -0,0 +1,73 @@
+package org.elasticsearch.indices.analysis;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.Tokenizer;
+import org.bosonnlp.analyzer.lucene.BosonNLPAnalyzer;
+import org.bosonnlp.analyzer.lucene.BosonNLPTokenizer;
+import org.elasticsearch.common.component.AbstractComponent;
+import org.elasticsearch.common.inject.Inject;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.index.analysis.AnalyzerScope;
+import org.elasticsearch.index.analysis.PreBuiltAnalyzerProviderFactory;
+import org.elasticsearch.index.analysis.PreBuiltTokenizerFactoryFactory;
+import org.elasticsearch.index.analysis.TokenizerFactory;
+import org.json.JSONException;
+
+import com.mashape.unirest.http.exceptions.UnirestException;
+
+
+/**
+ * Registers indices level analysis components.
+ */
+public class BosonNLPIndicesAnalysis extends AbstractComponent {
+
+ private String BOSONNLP_API_TOKEN;
+ private String TAG_URL;
+ private int spaceMode;
+ private int oovLevel;
+ private int t2s;
+ private int specialCharConv;
+
+ @Inject
+ public BosonNLPIndicesAnalysis(final Settings settings, IndicesAnalysisService indicesAnalysisService) {
+ super(settings);
+ // Get all the arguments from settings
+ this.TAG_URL = settings.get("API_URL", "").toString();
+ this.BOSONNLP_API_TOKEN = settings.get("API_TOKEN", "").toString();
+ this.spaceMode = Integer.parseInt(settings.get("space_mode", "0"));
+ this.oovLevel = Integer.parseInt(settings.get("oov_level", "3"));
+ this.t2s = Integer.parseInt(settings.get("t2s", "0"));
+ this.specialCharConv = Integer.parseInt(settings.get("spechial_char_conv", "0"));
+
+ // Register the bosonnlp type analyzer
+ indicesAnalysisService.analyzerProviderFactories().put("bosonnlp",
+ new PreBuiltAnalyzerProviderFactory("bosonnlp", AnalyzerScope.GLOBAL,
+ new BosonNLPAnalyzer(TAG_URL, BOSONNLP_API_TOKEN, spaceMode, oovLevel, t2s, specialCharConv)));
+
+ // Register the bosonnlp type tokenizer
+ indicesAnalysisService.tokenizerFactories().put("bosonnlp",
+ new PreBuiltTokenizerFactoryFactory(new TokenizerFactory(){
+
+ @Override
+ public String name() {
+ return "bosonnlp";
+ }
+
+ @Override
+ public Tokenizer create() {
+ BosonNLPTokenizer BToken = null;
+ try {
+ BToken = new BosonNLPTokenizer(TAG_URL, BOSONNLP_API_TOKEN, spaceMode, oovLevel, t2s, specialCharConv);
+ } catch (JSONException | IOException | UnirestException e) {
+
+ e.printStackTrace();
+ }
+ return BToken;
+ }
+
+ }));
+
+ }
+
+}
diff --git a/src/main/java/org/elasticsearch/indices/analysis/BosonNLPIndicesAnalysisModule.java b/src/main/java/org/elasticsearch/indices/analysis/BosonNLPIndicesAnalysisModule.java
new file mode 100644
index 0000000..bd336a5
--- /dev/null
+++ b/src/main/java/org/elasticsearch/indices/analysis/BosonNLPIndicesAnalysisModule.java
@@ -0,0 +1,13 @@
+package org.elasticsearch.indices.analysis;
+
+import org.elasticsearch.common.inject.AbstractModule;
+
+public class BosonNLPIndicesAnalysisModule extends AbstractModule{
+
+ @Override
+ protected void configure() {
+ bind(BosonNLPIndicesAnalysis.class).asEagerSingleton();
+
+ }
+
+}
diff --git a/src/main/java/org/elasticsearch/plugin/analysis/bosonnlp/AnalysisBosonNLPPlugin.java b/src/main/java/org/elasticsearch/plugin/analysis/bosonnlp/AnalysisBosonNLPPlugin.java
new file mode 100644
index 0000000..042dfb9
--- /dev/null
+++ b/src/main/java/org/elasticsearch/plugin/analysis/bosonnlp/AnalysisBosonNLPPlugin.java
@@ -0,0 +1,41 @@
+package org.elasticsearch.plugin.analysis.bosonnlp;
+
+import java.util.Collection;
+import java.util.Collections;
+
+import org.elasticsearch.common.inject.Inject;
+import org.elasticsearch.common.inject.Module;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.index.analysis.AnalysisModule;
+import org.elasticsearch.index.analysis.BosonNLPAnalysisBinderProcessor;
+import org.elasticsearch.indices.analysis.BosonNLPIndicesAnalysisModule;
+import org.elasticsearch.plugins.Plugin;
+
+public class AnalysisBosonNLPPlugin extends Plugin {
+
+ private final Settings settings;
+
+ public AnalysisBosonNLPPlugin(Settings settings) {
+ this.settings = settings;
+ }
+
+ @Override
+ public String name() {
+ return "analysis-bosonnlp";
+ }
+
+ @Override
+ public String description() {
+ return "BosonNLP analysis plugin for elasticsearch.";
+ }
+
+ @Override
+ public Collection nodeModules() {
+ return Collections. singletonList(new BosonNLPIndicesAnalysisModule());
+ }
+
+ public void onModule(AnalysisModule module) {
+ module.addProcessor(new BosonNLPAnalysisBinderProcessor());
+ }
+
+}
diff --git a/src/main/resources/plugin-descriptor.properties b/src/main/resources/plugin-descriptor.properties
new file mode 100644
index 0000000..24d1551
--- /dev/null
+++ b/src/main/resources/plugin-descriptor.properties
@@ -0,0 +1,82 @@
+# Elasticsearch plugin descriptor file
+# This file must exist as 'plugin-descriptor.properties' at
+# the root directory of all plugins.
+#
+# A plugin can be 'site', 'jvm', or both.
+#
+### example site plugin for "foo":
+#
+# foo.zip <-- zip file for the plugin, with this structure:
+# _site/ <-- the contents that will be served
+# plugin-descriptor.properties <-- example contents below:
+#
+# site=true
+# description=My cool plugin
+# version=1.0
+#
+### example jvm plugin for "foo"
+#
+# foo.zip <-- zip file for the plugin, with this structure:
+# .jar <-- classes, resources, dependencies
+# .jar <-- any number of jars
+# plugin-descriptor.properties <-- example contents below:
+#
+# jvm=true
+# classname=foo.bar.BazPlugin
+# description=My cool plugin
+# version=2.0.0-rc1
+# elasticsearch.version=2.0
+# java.version=1.7
+#
+### mandatory elements for all plugins:
+#
+# 'description': simple summary of the plugin
+description=${project.description}
+#
+# 'version': plugin's version
+version=${project.version}
+#
+# 'name': the plugin name
+name=${elasticsearch.plugin.name}
+
+### mandatory elements for site plugins:
+#
+# 'site': set to true to indicate contents of the _site/
+# directory in the root of the plugin should be served.
+# site=${elasticsearch.plugin.site}
+#
+### mandatory elements for jvm plugins :
+#
+# 'jvm': true if the 'classname' class should be loaded
+# from jar files in the root directory of the plugin.
+# Note that only jar files in the root directory are
+# added to the classpath for the plugin! If you need
+# other resources, package them into a resources jar.
+jvm=${elasticsearch.plugin.jvm}
+#
+# 'classname': the name of the class to load, fully-qualified.# classname=${elasticsearch.plugin.classname}
+classname=${elasticsearch.plugin.classname}
+#
+# 'java.version' version of java the code is built against
+# use the system property java.specification.version
+# version string must be a sequence of nonnegative decimal integers
+# separated by "."'s and may have leading zeros
+java.version=${maven.compiler.target}
+#
+# 'elasticsearch.version' version of elasticsearch compiled against
+# You will have to release a new version of the plugin for each new
+# elasticsearch release. This version is checked when the plugin
+# is loaded so Elasticsearch will refuse to start in the presence of
+# plugins with the incorrect elasticsearch.version.
+elasticsearch.version=${elasticsearch.version}
+#
+### deprecated elements for jvm plugins :
+#
+# 'isolated': true if the plugin should have its own classloader.
+# passing false is deprecated, and only intended to support plugins
+# that have hard dependencies against each other. If this is
+# not specified, then the plugin is isolated by default.
+isolated=${elasticsearch.plugin.isolated}
+#
+
+# plugin=org.elasticsearch.plugin.analysis.bosonnlp.AnalysisBosonNLPPlugin
diff --git a/src/test/java/org/elasticsearch/BosonNLPTest.java b/src/test/java/org/elasticsearch/BosonNLPTest.java
new file mode 100644
index 0000000..0e21273
--- /dev/null
+++ b/src/test/java/org/elasticsearch/BosonNLPTest.java
@@ -0,0 +1,38 @@
+package org.elasticsearch;
+
+import junit.framework.Test;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+
+/**
+ * Unit test for Bosonnlp word segmenter.
+ */
+public class BosonNLPTest
+ extends TestCase
+{
+ /**
+ * Create the test case
+ *
+ * @param testName name of the test case
+ */
+ public BosonNLPTest( String testName )
+ {
+ super( testName );
+ }
+
+ /**
+ * @return the suite of tests being tested
+ */
+ public static Test suite()
+ {
+ return new TestSuite( BosonNLPTest.class );
+ }
+
+ /**
+ * Rigourous Test :-)
+ */
+ public void testApp()
+ {
+ assertTrue( true );
+ }
+}