Skip to content

Commit

Permalink
Removed thin abstraction layer "TokenizerBuilder" combining into load…
Browse files Browse the repository at this point in the history
…er. Made loader/builder terminology more consistent.
  • Loading branch information
mikob committed Dec 18, 2018
1 parent 71ea847 commit 09913ec
Show file tree
Hide file tree
Showing 14 changed files with 63 additions and 171 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ Load this library as follows:

You can prepare tokenizer like this:

kuromoji.builder({ dicPath: "path/to/dictionary/dir/" }).build(function (err, tokenizer) {
kuromoji.loader({ dic_path: "path/to/dictionary/dir/" }).load(function (err, tokenizer) {
// tokenizer is ready
var path = tokenizer.tokenize("すもももももももものうち");
console.log(path);
Expand All @@ -71,7 +71,7 @@ In your HTML:

In your JavaScript:

kuromoji.builder({ dicPath: "/url/to/dictionary/dir/" }).build(function (err, tokenizer) {
kuromoji.loader({ dic_path: "/url/to/dictionary/dir/" }).load(function (err, tokenizer) {
// tokenizer is ready
var path = tokenizer.tokenize("すもももももももものうち");
console.log(path);
Expand Down
121 changes: 33 additions & 88 deletions build/kuromoji.js
Original file line number Diff line number Diff line change
Expand Up @@ -6963,64 +6963,7 @@ Tokenizer.prototype.getLattice = function (text) {

module.exports = Tokenizer;

},{"./util/IpadicFormatter":22,"./viterbi/ViterbiBuilder":24,"./viterbi/ViterbiSearcher":27}],7:[function(require,module,exports){
/*
* Copyright 2014 Takuya Asano
* Copyright 2010-2014 Atilika Inc. and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

"use strict";

var Tokenizer = require("./Tokenizer");
var DictionaryLoader = require("./loader/NodeDictionaryLoader");

/**
* TokenizerBuilder create Tokenizer instance.
* @param {Object} option JSON object which have key-value pairs settings
* @param {string} option.dicPath Dictionary directory path (or URL using in browser)
* @constructor
*/
function TokenizerBuilder(option) {
if (option.dicPath == null) {
this.dic_path = "dict/";
} else {
this.dic_path = option.dicPath;
}
}

/**
* Build Tokenizer instance by asynchronous manner
* @param {TokenizerBuilder~onLoad} callback Callback function
*/
TokenizerBuilder.prototype.build = function (callback) {
var loader = new DictionaryLoader(this.dic_path);
loader.load(function (err, dic) {
callback(err, new Tokenizer(dic));
});
};

/**
* Callback used by build
* @callback TokenizerBuilder~onLoad
* @param {Object} err Error object
* @param {Tokenizer} tokenizer Prepared Tokenizer
*/

module.exports = TokenizerBuilder;

},{"./Tokenizer":6,"./loader/NodeDictionaryLoader":19}],8:[function(require,module,exports){
},{"./util/IpadicFormatter":21,"./viterbi/ViterbiBuilder":23,"./viterbi/ViterbiSearcher":26}],7:[function(require,module,exports){
/*
* Copyright 2014 Takuya Asano
* Copyright 2010-2014 Atilika Inc. and contributors
Expand Down Expand Up @@ -7059,7 +7002,7 @@ function CharacterClass(class_id, class_name, is_always_invoke, is_grouping, max

module.exports = CharacterClass;

},{}],9:[function(require,module,exports){
},{}],8:[function(require,module,exports){
/*
* Copyright 2014 Takuya Asano
* Copyright 2010-2014 Atilika Inc. and contributors
Expand Down Expand Up @@ -7266,7 +7209,7 @@ CharacterDefinition.prototype.lookup = function (ch) {

module.exports = CharacterDefinition;

},{"../util/SurrogateAwareString":23,"./CharacterClass":8,"./InvokeDefinitionMap":12}],10:[function(require,module,exports){
},{"../util/SurrogateAwareString":22,"./CharacterClass":7,"./InvokeDefinitionMap":11}],9:[function(require,module,exports){
/*
* Copyright 2014 Takuya Asano
* Copyright 2010-2014 Atilika Inc. and contributors
Expand Down Expand Up @@ -7327,7 +7270,7 @@ ConnectionCosts.prototype.loadConnectionCosts = function (connection_costs_buffe

module.exports = ConnectionCosts;

},{}],11:[function(require,module,exports){
},{}],10:[function(require,module,exports){
/*
* Copyright 2014 Takuya Asano
* Copyright 2010-2014 Atilika Inc. and contributors
Expand Down Expand Up @@ -7411,7 +7354,7 @@ DynamicDictionaries.prototype.loadUnknownDictionaries = function (unk_buffer, un

module.exports = DynamicDictionaries;

},{"./ConnectionCosts":10,"./TokenInfoDictionary":13,"./UnknownDictionary":14,"doublearray":2}],12:[function(require,module,exports){
},{"./ConnectionCosts":9,"./TokenInfoDictionary":12,"./UnknownDictionary":13,"doublearray":2}],11:[function(require,module,exports){
/*
* Copyright 2014 Takuya Asano
* Copyright 2010-2014 Atilika Inc. and contributors
Expand Down Expand Up @@ -7523,7 +7466,7 @@ InvokeDefinitionMap.prototype.toBuffer = function () {

module.exports = InvokeDefinitionMap;

},{"../util/ByteBuffer":21,"./CharacterClass":8}],13:[function(require,module,exports){
},{"../util/ByteBuffer":20,"./CharacterClass":7}],12:[function(require,module,exports){
/*
* Copyright 2014 Takuya Asano
* Copyright 2010-2014 Atilika Inc. and contributors
Expand Down Expand Up @@ -7677,7 +7620,7 @@ TokenInfoDictionary.prototype.getFeatures = function (token_info_id_str) {

module.exports = TokenInfoDictionary;

},{"../util/ByteBuffer":21}],14:[function(require,module,exports){
},{"../util/ByteBuffer":20}],13:[function(require,module,exports){
/*
* Copyright 2014 Takuya Asano
* Copyright 2010-2014 Atilika Inc. and contributors
Expand Down Expand Up @@ -7737,7 +7680,7 @@ UnknownDictionary.prototype.loadUnknownDictionaries = function (unk_buffer, unk_

module.exports = UnknownDictionary;

},{"../util/ByteBuffer":21,"./CharacterDefinition":9,"./TokenInfoDictionary":13}],15:[function(require,module,exports){
},{"../util/ByteBuffer":20,"./CharacterDefinition":8,"./TokenInfoDictionary":12}],14:[function(require,module,exports){
/*
* Copyright 2014 Takuya Asano
* Copyright 2010-2014 Atilika Inc. and contributors
Expand Down Expand Up @@ -7807,7 +7750,7 @@ CharacterDefinitionBuilder.prototype.build = function () {

module.exports = CharacterDefinitionBuilder;

},{"../CharacterDefinition":9,"../InvokeDefinitionMap":12}],16:[function(require,module,exports){
},{"../CharacterDefinition":8,"../InvokeDefinitionMap":11}],15:[function(require,module,exports){
/*
* Copyright 2014 Takuya Asano
* Copyright 2010-2014 Atilika Inc. and contributors
Expand Down Expand Up @@ -7879,7 +7822,7 @@ ConnectionCostsBuilder.prototype.build = function () {

module.exports = ConnectionCostsBuilder;

},{"../ConnectionCosts":10}],17:[function(require,module,exports){
},{"../ConnectionCosts":9}],16:[function(require,module,exports){
/*
* Copyright 2014 Takuya Asano
* Copyright 2010-2014 Atilika Inc. and contributors
Expand Down Expand Up @@ -8039,7 +7982,7 @@ DictionaryBuilder.prototype.buildDoubleArray = function () {

module.exports = DictionaryBuilder;

},{"../DynamicDictionaries":11,"../TokenInfoDictionary":13,"../UnknownDictionary":14,"./CharacterDefinitionBuilder":15,"./ConnectionCostsBuilder":16,"doublearray":2}],18:[function(require,module,exports){
},{"../DynamicDictionaries":10,"../TokenInfoDictionary":12,"../UnknownDictionary":13,"./CharacterDefinitionBuilder":14,"./ConnectionCostsBuilder":15,"doublearray":2}],17:[function(require,module,exports){
/*
* Copyright 2014 Takuya Asano
* Copyright 2010-2014 Atilika Inc. and contributors
Expand All @@ -8059,22 +8002,22 @@ module.exports = DictionaryBuilder;

"use strict";

var TokenizerBuilder = require("./TokenizerBuilder");
var DictionaryBuilder = require("./dict/builder/DictionaryBuilder");
var DictionaryLoader = require("./loader/NodeDictionaryLoader");

// Public methods
var kuromoji = {
builder: function (option) {
return new TokenizerBuilder(option);
loader: function (options) {
return new DictionaryLoader(options);
},
dictionaryBuilder: function () {
builder: function () {
return new DictionaryBuilder();
}
};

module.exports = kuromoji;

},{"./TokenizerBuilder":7,"./dict/builder/DictionaryBuilder":17}],19:[function(require,module,exports){
},{"./dict/builder/DictionaryBuilder":16,"./loader/NodeDictionaryLoader":18}],18:[function(require,module,exports){
/*
* Copyright 2014 Takuya Asano
* Copyright 2010-2014 Atilika Inc. and contributors
Expand All @@ -8099,11 +8042,11 @@ var DictionaryLoader = require("./DictionaryLoader");

/**
* BrowserDictionaryLoader inherits DictionaryLoader, using jQuery XHR for download
* @param {string} dic_path Dictionary path
* @param {object} options Options for the dictionary
* @constructor
*/
function BrowserDictionaryLoader(dic_path) {
DictionaryLoader.apply(this, [dic_path]);
function BrowserDictionaryLoader(options) {
DictionaryLoader.call(this, options);
}

BrowserDictionaryLoader.prototype = Object.create(DictionaryLoader.prototype);
Expand All @@ -8114,6 +8057,7 @@ BrowserDictionaryLoader.prototype = Object.create(DictionaryLoader.prototype);
* @param {BrowserDictionaryLoader~onLoad} callback Callback function
*/
BrowserDictionaryLoader.prototype.loadArrayBuffer = function (url, callback) {
// Check if we have it cached
var xhr = new XMLHttpRequest();
xhr.open("GET", url, true);
xhr.responseType = "arraybuffer";
Expand Down Expand Up @@ -8143,7 +8087,7 @@ BrowserDictionaryLoader.prototype.loadArrayBuffer = function (url, callback) {

module.exports = BrowserDictionaryLoader;

},{"./DictionaryLoader":20,"zlibjs/bin/gunzip.min.js":5}],20:[function(require,module,exports){
},{"./DictionaryLoader":19,"zlibjs/bin/gunzip.min.js":5}],19:[function(require,module,exports){
/*
* Copyright 2014 Takuya Asano
* Copyright 2010-2014 Atilika Inc. and contributors
Expand All @@ -8166,15 +8110,16 @@ module.exports = BrowserDictionaryLoader;
var path = require("path");
var async = require("async");
var DynamicDictionaries = require("../dict/DynamicDictionaries");
var Tokenizer = require("../Tokenizer");

/**
* DictionaryLoader base constructor
* @param {string} dic_path Dictionary path
* @constructor
*/
function DictionaryLoader(dic_path) {
function DictionaryLoader(options) {
this.dic = new DynamicDictionaries();
this.dic_path = dic_path;
this.dic_path = options.dic_path || 'dict/';
}

DictionaryLoader.prototype.loadArrayBuffer = function (file, callback) {
Expand Down Expand Up @@ -8269,7 +8214,7 @@ DictionaryLoader.prototype.load = function (load_callback) {
});
}
], function (err) {
load_callback(err, dic);
load_callback(err, new Tokenizer(dic));
});
};

Expand All @@ -8282,7 +8227,7 @@ DictionaryLoader.prototype.load = function (load_callback) {

module.exports = DictionaryLoader;

},{"../dict/DynamicDictionaries":11,"async":1,"path":3}],21:[function(require,module,exports){
},{"../Tokenizer":6,"../dict/DynamicDictionaries":10,"async":1,"path":3}],20:[function(require,module,exports){
/*
* Copyright 2014 Takuya Asano
* Copyright 2010-2014 Atilika Inc. and contributors
Expand Down Expand Up @@ -8573,7 +8518,7 @@ ByteBuffer.prototype.getString = function (index) {

module.exports = ByteBuffer;

},{}],22:[function(require,module,exports){
},{}],21:[function(require,module,exports){
/*
* Copyright 2014 Takuya Asano
* Copyright 2010-2014 Atilika Inc. and contributors
Expand Down Expand Up @@ -8642,7 +8587,7 @@ IpadicFormatter.prototype.formatUnknownEntry = function (word_id, position, type

module.exports = IpadicFormatter;

},{}],23:[function(require,module,exports){
},{}],22:[function(require,module,exports){
/*
* Copyright 2014 Takuya Asano
* Copyright 2010-2014 Atilika Inc. and contributors
Expand Down Expand Up @@ -8735,7 +8680,7 @@ SurrogateAwareString.isSurrogatePair = function (ch) {

module.exports = SurrogateAwareString;

},{}],24:[function(require,module,exports){
},{}],23:[function(require,module,exports){
/*
* Copyright 2014 Takuya Asano
* Copyright 2010-2014 Atilika Inc. and contributors
Expand Down Expand Up @@ -8838,7 +8783,7 @@ ViterbiBuilder.prototype.build = function (sentence_str) {

module.exports = ViterbiBuilder;

},{"../util/SurrogateAwareString":23,"./ViterbiLattice":25,"./ViterbiNode":26}],25:[function(require,module,exports){
},{"../util/SurrogateAwareString":22,"./ViterbiLattice":24,"./ViterbiNode":25}],24:[function(require,module,exports){
/*
* Copyright 2014 Takuya Asano
* Copyright 2010-2014 Atilika Inc. and contributors
Expand Down Expand Up @@ -8900,7 +8845,7 @@ ViterbiLattice.prototype.appendEos = function () {

module.exports = ViterbiLattice;

},{"./ViterbiNode":26}],26:[function(require,module,exports){
},{"./ViterbiNode":25}],25:[function(require,module,exports){
/*
* Copyright 2014 Takuya Asano
* Copyright 2010-2014 Atilika Inc. and contributors
Expand Down Expand Up @@ -8951,7 +8896,7 @@ function ViterbiNode(node_name, node_cost, start_pos, length, type, left_id, rig

module.exports = ViterbiNode;

},{}],27:[function(require,module,exports){
},{}],26:[function(require,module,exports){
/*
* Copyright 2014 Takuya Asano
* Copyright 2010-2014 Atilika Inc. and contributors
Expand Down Expand Up @@ -9055,5 +9000,5 @@ ViterbiSearcher.prototype.backward = function (lattice) {

module.exports = ViterbiSearcher;

},{}]},{},[18])(18)
},{}]},{},[17])(17)
});
4 changes: 2 additions & 2 deletions demo/js/tokenize.js
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ vm.$watch("inputText", function (value) {


// Load and prepare tokenizer
kuromoji.builder({ dicPath: DIC_URL }).build(function (error, _tokenizer) {
kuromoji.loader({ dic_path: DIC_URL }).load(function (error, _tokenizer) {
if (error != null) {
console.log(error);
}
Expand Down Expand Up @@ -168,4 +168,4 @@ function drawLattice () {
.rankDir("LR");
renderer.layout(layout).run(g, d3.select("svg g"));
}
*/
*/
2 changes: 1 addition & 1 deletion example/load-node.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ var kuromoji = require("../src/kuromoji");
var DIC_DIR = "dict/";

// Load dictionaries from file, and prepare tokenizer
kuromoji.builder({ dicPath: DIC_DIR }).build(function (error, tokenizer) {
kuromoji.loader({ dic_path: DIC_DIR }).load(function (error, tokenizer) {
var path = tokenizer.tokenize("すもももももももものうち");
console.log(path);
module.exports = tokenizer;
Expand Down
2 changes: 1 addition & 1 deletion gulpfile.js
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ gulp.task("create-dat-files", (done) => {
}

const dic = new IPADic();
const builder = kuromoji.dictionaryBuilder();
const builder = kuromoji.builder();

// Build token info dictionary
const tokenInfoPromise = dic.readTokenInfo((line) => {
Expand Down
Loading

0 comments on commit 09913ec

Please sign in to comment.