-
-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Mika
committed
May 14, 2021
1 parent
c8e9f6e
commit 808c9b0
Showing
7 changed files
with
190 additions
and
242 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,7 +23,7 @@ | |
# Versions should comply with PEP440. For a discussion on single-sourcing | ||
# the version across setup.py and the project code, see | ||
# https://packaging.python.org/en/latest/single_source_version.html | ||
version='1.2.3', | ||
version='1.3.0', | ||
|
||
description='An NLP library for Uralic languages such as Finnish and Sami. Also supports Arabic, Russian etc.', | ||
long_description=long_description, | ||
|
@@ -37,7 +37,7 @@ | |
author_email='[email protected]', | ||
|
||
# Choose your license | ||
license='Apache License, Version 2.0', | ||
license='Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International Public License', | ||
|
||
# See https://pypi.python.org/pypi?%3Aaction=list_classifiers | ||
classifiers=[ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
try: | ||
from natas.normalize import call_onmt | ||
except: | ||
call_onmt = None | ||
import os | ||
class NatasNotInstalled(Exception): | ||
pass | ||
|
||
class NeuralFST(object): | ||
"""docstring for NeuralFST""" | ||
def __init__(self, model_path): | ||
if call_onmt is None: | ||
raise NatasNotInstalled("Natas is needed for neural models, run:\n\npip install natas") | ||
|
||
self.model_path = model_path | ||
|
||
def analyze(self, word): | ||
if len(word) == 0: | ||
return [] | ||
model_a = os.path.join(self.model_path, "analyzer.pt") | ||
model_l = os.path.join(self.model_path, "lemmatizer.pt") | ||
tags = call_onmt([" ".join(word)] ,model_a,n_best=1)[0][0].replace(" ", "+") | ||
lemma = call_onmt([" ".join(word)], model_l,n_best=1)[0][0].replace(" ", "") | ||
return [(lemma + "+" + tags, 0.0)] | ||
|
||
def generate(self, word): | ||
if len(word) ==0: | ||
return [] | ||
model_g = os.path.join(self.model_path, "generator.pt") | ||
parts = word.split("+") | ||
parts[0] = " ".join(parts[0]) | ||
form = call_onmt([" ".join(parts)] ,model_g, n_best=1)[0][0].replace(" ", "") | ||
return [(form, 0.0)] |
Oops, something went wrong.