diff --git a/api/src/test/resources/dat/sample-trn.tsv b/api/src/test/resources/dat/sample-trn.tsv new file mode 100644 index 0000000..2aaadbc --- /dev/null +++ b/api/src/test/resources/dat/sample-trn.tsv @@ -0,0 +1,170 @@ +1 The the DT _ 3 det _ B-ORG +2 Education education NNP _ 3 compound _ I-ORG +3 Directorate directorate NNP _ 8 nsubj 8:A0;9:A0 I-ORG +4 for for IN _ 3 prep _ I-ORG +5 Holy holy NNP _ 6 compound _ I-ORG +6 Mecca mecca NNP _ 4 pobj _ L-ORG +7 has have VBZ _ 8 aux _ O +8 finished finish VBN pb=finish.01 0 root _ O +9 preparing prepare VBG pb=prepare.01 8 xcomp 8:A1 O +10 a a DT _ 13 det _ O +11 new new JJ _ 13 amod _ O +12 computer computer NN _ 13 compound _ O +13 program program NN _ 9 dobj 15:A0;9:A1 O +14 to to TO _ 15 aux _ O +15 monitor monitor VB pb=monitor.01 13 relcl _ O +16 disadvantaged disadvantaged JJ _ 19 amod _ O +17 and and CC _ 16 cc _ O +18 deprived deprived JJ _ 16 conj _ O +19 students student NNS _ 15 dobj 15:A1 O +20 in in IN sem=LOC 19 prep _ O +21 schools school NNS _ 20 pobj 22:A1 O +22 supervised supervise VBN pb=supervise.01 21 acl _ O +23 by by IN _ 22 agent 22:A0 O +24 the the DT _ 25 det _ O +25 directorate directorate NN _ 23 pobj _ O +26 , , , _ 21 punct _ O +27 of of IN _ 32 prep _ O +28 which which WDT _ 27 pobj _ O +29 there there EX _ 30 expl _ O +30 are be VBP pb=be.02 21 relcl _ O +31 over over IN syn=PRD 30 prep 30:A1 B-QUANTITY +32 500 0 CD _ 31 pobj _ L-QUANTITY +33 . . . _ 8 punct _ O + +1 Bakr bakr NNP _ 3 compound _ B-PERSON +2 Ibrahim ibrahim NNP _ 3 compound _ I-PERSON +3 Basfar basfar NNP _ 11 nsubj 11:A0 L-PERSON +4 , , , _ 3 punct _ O +5 Director director NNP _ 7 compound _ O +6 - - HYPH _ 7 punct _ O +7 General general NNP _ 3 appos _ O +8 of of IN _ 7 prep _ O +9 Education education NNP _ 8 pobj _ O +10 , , , _ 11 punct _ O +11 said say VBD pb=say.01 0 root _ O +12 the the DT _ 13 det _ O +13 program program NN _ 15 nsubjpass 15:A1 O +14 was be VBD _ 15 auxpass _ O +15 aimed aim VBN pb=aim.02 11 ccomp 11:A1 O +16 at at IN syn=CLR 15 prep 15:A2 O +17 identifying identify VBG pb=identify.01|syn=NOM 16 pcomp _ O +18 students student NNS _ 17 dobj 17:A1 O +19 in in IN _ 18 prep _ O +20 need need NN _ 19 pobj _ O +21 in in IN sem=LOC 18 prep _ O +22 schools school NNS _ 21 pobj 23:A1 O +23 run run VBN pb=run.01 22 acl _ O +24 by by IN _ 23 agent 23:A0 O +25 the the DT _ 26 det _ O +26 directorate directorate NN _ 24 pobj _ O +27 . . . _ 11 punct _ O + +1 The the DT _ 2 det _ _ +2 program program NN _ 4 nsubj 4:A1 _ +3 will will MD _ 4 aux 4:AM-MOD _ +4 be be VB pb=be.01 0 root _ _ +5 an an DT _ 7 det _ _ +6 important important JJ _ 7 amod _ _ +7 resource resource NN syn=PRD 4 attr 4:A2 _ +8 for for IN _ 7 prep _ _ +9 all all DT _ 10 det _ _ +10 associations association NNS _ 8 pobj 15:A0;17:A0 _ +11 and and CC _ 10 cc _ _ +12 charitable charitable JJ _ 13 amod _ _ +13 organizations organization NNS _ 10 conj _ _ +14 who who WP _ 15 nsubj 15:R-A0;17:R-A0 _ +15 wish wish VBP pb=wish.01 10 relcl _ _ +16 to to TO _ 17 aux _ _ +17 identify identify VB pb=identify.01 15 xcomp 15:A1 _ +18 poor poor JJ _ 19 amod _ _ +19 students student NNS _ 17 dobj 17:A1 _ +20 in in IN _ 19 prep _ _ +21 need need NN _ 20 pobj _ _ +22 of of IN _ 21 prep _ _ +23 support support NN _ 22 pobj _ _ +24 . . . _ 4 punct _ _ + +1 He he PRP _ 2 nsubj 2:A0 _ +2 said say VBD pb=say.01 0 root _ _ +3 that that IN _ 7 mark _ _ +4 school school NN _ 5 compound _ _ +5 principals principal NNS _ 7 nsubj 7:A0 _ +6 would would MD _ 7 aux 7:AM-MOD _ +7 supervise supervise VB pb=supervise.01 2 ccomp 2:A1 _ +8 its its PRP$ _ 9 poss _ _ +9 implementation implementation NN _ 7 dobj 7:A1 _ +10 to to TO _ 11 aux _ _ +11 assure assure VB pb=assure.01|sem=PRP 7 advcl 7:AM-PRP _ +12 the the DT _ 13 det _ _ +13 accuracy accuracy NN _ 11 dobj 11:A2 _ +14 and and CC _ 13 cc _ _ +15 correctness correctness NN _ 13 conj _ _ +16 of of IN _ 13 prep _ _ +17 data datum NNS _ 16 pobj _ _ +18 and and CC _ 11 cc _ _ +19 to to TO _ 20 aux _ _ +20 register register VB pb=register.02 11 conj _ _ +21 disadvantaged disadvantaged JJ _ 22 amod _ _ +22 students student NNS _ 20 dobj 20:A1 _ +23 and and CC _ 20 cc _ _ +24 attach attach VB pb=attach.01 20 conj _ _ +25 a a DT _ 26 det _ _ +26 copy copy NN _ 24 dobj 24:A1 _ +27 of of IN _ 26 prep _ _ +28 their their PRP$ _ 31 poss _ _ +29 family family NN _ 31 compound _ _ +30 identity identity NN _ 31 compound _ _ +31 card card NN _ 27 pobj _ _ +32 . . . _ 2 punct _ _ + +1 The the DT _ 4 det _ O +2 Director director NNP _ 4 compound _ O +3 - - HYPH _ 4 punct _ O +4 General general NNP _ 10 nsubj 10:A0;13:A0 O +5 of of IN _ 4 prep _ O +6 Education education NNP _ 5 pobj _ O +7 for for IN _ 6 prep _ O +8 Holy holy NNP _ 9 compound _ B-GPE +9 Mecca mecca NNP _ 7 pobj _ L-GPE +10 went go VBD pb=go.06 0 root _ O +11 on on RP _ 10 prt 10:A2 O +12 to to TO _ 13 aux _ O +13 say say VB pb=say.01 10 xcomp 10:A1 O +14 that that IN _ 19 mark _ O +15 the the DT _ 16 det _ O +16 program program NN _ 19 nsubjpass 19:A1 O +17 would would MD _ 19 aux 19:AM-MOD O +18 be be VB _ 19 auxpass _ O +19 restricted restrict VBN pb=restrict.01 13 ccomp 13:A1 O +20 to to IN syn=CLR 19 prep 19:A2 O +21 students student NNS _ 20 pobj 27:A0 O +22 in in IN _ 21 prep _ O +23 need need NN _ 22 pobj _ O +24 of of IN _ 23 prep _ O +25 zakat zakat FW _ 24 pobj _ O +26 who who WP _ 27 nsubj 27:R-A0 O +27 receive receive VBP pb=receive.01 21 relcl _ O +28 no no DT _ 29 det _ O +29 assistance assistance NN _ 27 dobj 27:A1 O +30 from from IN syn=CLR 27 prep 27:A2 O +31 the the DT _ 32 det _ O +32 school school NN _ 30 pobj _ O +33 and and CC _ 21 cc _ O +34 those those DT _ 21 conj 37:A1 O +35 whose whose WP$ _ 36 poss _ O +36 fathers father NNS _ 37 nsubj 37:R-A1 O +37 are be VBP pb=be.01 34 relcl _ O +38 disabled disabled JJ syn=PRD 37 acomp 37:A2 O +39 , , , _ 38 punct _ O +40 in in IN _ 38 conj _ O +41 prison prison NN _ 40 pobj _ O +42 or or CC _ 40 cc _ O +43 absent absent JJ _ 40 conj _ O +44 and and CC _ 38 cc _ O +45 with with IN _ 38 conj _ O +46 no no DT _ 49 det _ O +47 other other JJ _ 49 amod _ O +48 financial financial JJ _ 49 amod _ O +49 support support NN _ 45 pobj _ O +50 . . . _ 10 punct _ O diff --git a/md/quickstart/decode.md b/md/quickstart/decode.md index 0334a85..d6291b7 100644 --- a/md/quickstart/decode.md +++ b/md/quickstart/decode.md @@ -24,10 +24,10 @@ java edu.emory.mathcs.nlp.bin.NLPDecode -c -i [-ie `: see [`configuration#tsv`](train.md#configuration). This does not need to be specified when `raw` or `sen` is used. * ``: see [`configuration#lexica`](train.md#configuration). -* `` specifies the statistical model for each component (e.g., [english models](../supplements/english-lexica-models.md#models); see [`NLPMode`](https://github.com/emorynlp/nlp4j-core/blob/master/src/main/java/edu/emory/mathcs/nlp/component/template/util/NLPMode.java)). \ No newline at end of file +* `` specifies the statistical model for each component (e.g., [english models](../supplements/english-lexica-models.md#models); see [`NLPMode`](https://github.com/emorynlp/nlp4j-core/blob/master/src/main/java/edu/emory/mathcs/nlp/component/template/util/NLPMode.java)). diff --git a/md/quickstart/train.md b/md/quickstart/train.md index 174c082..6cba97e 100644 --- a/md/quickstart/train.md +++ b/md/quickstart/train.md @@ -5,7 +5,7 @@ The following command trains an NLP component: ``` -java edu.emory.mathcs.nlp.bin.NLPTrain -mode -c -t -d [-f -m -p -te -de ] +cli/target/appassembler/bin/nlptrain -mode -c -t -d [-f -m -p -te -de ] -c : configuration file (required) -m : output model file (optional) @@ -28,10 +28,10 @@ java edu.emory.mathcs.nlp.bin.NLPTrain -mode -c -t