From dae54334a7474d7de6dd5c435092ca589db314a3 Mon Sep 17 00:00:00 2001 From: scossu Date: Wed, 28 Dec 2022 12:38:50 -0500 Subject: [PATCH] Add Arabic transliteration via 3d party. --- .gitmodules | 3 +++ Dockerfile | 5 ++++- TODO.md | 4 ++-- doc/hooks.md | 2 +- ext/.keep | 0 ext/arabic_transliterator | 1 + scriptshifter/hooks/arabic_ext.py | 22 ++++++++++++++++++++++ scriptshifter/tables/__init__.py | 2 +- scriptshifter/tables/data/arabic_ext.yml | 12 ++++++++++++ scriptshifter/tables/data/index.yml | 3 +++ 10 files changed, 49 insertions(+), 5 deletions(-) create mode 100644 .gitmodules create mode 100644 ext/.keep create mode 160000 ext/arabic_transliterator create mode 100644 scriptshifter/hooks/arabic_ext.py create mode 100644 scriptshifter/tables/data/arabic_ext.yml diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..2630881 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "ext/arabic_transliterator"] + path = ext/arabic_transliterator + url = git@github.com:MTG/ArabicTransliterator.git diff --git a/Dockerfile b/Dockerfile index c99b6c8..bfb02b6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,9 +9,12 @@ WORKDIR ${_workroot} COPY requirements.txt ./ RUN pip install -r requirements.txt COPY entrypoint.sh uwsgi.ini wsgi.py ./ + +COPY ext ./ext/ +RUN pip install ext/arabic_transliterator + COPY scriptshifter ./scriptshifter/ RUN chmod +x ./entrypoint.sh - RUN addgroup -S www && adduser -S www -G www RUN chown -R www:www ${_workroot} . diff --git a/TODO.md b/TODO.md index 22d2e6e..d1613c7 100644 --- a/TODO.md +++ b/TODO.md @@ -35,7 +35,7 @@ discussion, etc.); *X* = not implementing. - *D* Transliteration - *D* REST API - *W* Complete conversion of existing tables to YAML - - *P* Arabic + - *X* Arabic - *P* Armenian - *D* Asian Cyrillic - *D* Azerbajani @@ -65,6 +65,6 @@ discussion, etc.); *X* = not implementing. - *P* Urdu - *D* Uzbek - *P* Additional languages not in legacy tables, but in other software - - *B* Arabic S2R (ArabicTransliterator) + - *D* Arabic S2R (ArabicTransliterator) - *B* Japanese (?) - *B* Korean (K-romanizer) diff --git a/doc/hooks.md b/doc/hooks.md index feb81d7..32e3856 100644 --- a/doc/hooks.md +++ b/doc/hooks.md @@ -99,7 +99,7 @@ configuration file. See [`config.md`](./config.md) for details. The function name takes the form of `.` and must correspond to an existing module and function under the `scriptshifter.hooks` -package. Check the [`rot3.yml`](../scriptshifter/tables/data/rot3.yml) test +package. Check the [`rot3.yml`](../tests/data/rot3.yml) test configuration and the referred functions for a working example. Each hook requires some arguments to be defined in each function associated diff --git a/ext/.keep b/ext/.keep new file mode 100644 index 0000000..e69de29 diff --git a/ext/arabic_transliterator b/ext/arabic_transliterator new file mode 160000 index 0000000..df0296c --- /dev/null +++ b/ext/arabic_transliterator @@ -0,0 +1 @@ +Subproject commit df0296c5688a7bf07113fbab0f3c68c07df75edd diff --git a/scriptshifter/hooks/arabic_ext.py b/scriptshifter/hooks/arabic_ext.py new file mode 100644 index 0000000..c40ba74 --- /dev/null +++ b/scriptshifter/hooks/arabic_ext.py @@ -0,0 +1,22 @@ +import logging + +# This requires ArabicTransliterator to be installed as a package. +from arabic.ArabicTransliterator import ALA_LC_Transliterator as Trans +from mishkal.tashkeel.tashkeel import TashkeelClass + +from scriptshifter.exceptions import BREAK + + +__doc__ = """ Integrate external ArabicTransliterator library. """ + + +logger = logging.getLogger(__name__) + + +def s2r_post_config(ctx): + trans = Trans() + vocalizer = TashkeelClass() + voc = vocalizer.tashkeel(ctx.src) + ctx.dest = trans.do(voc.strip()) + + return BREAK diff --git a/scriptshifter/tables/__init__.py b/scriptshifter/tables/__init__.py index b57f6ef..f08d364 100644 --- a/scriptshifter/tables/__init__.py +++ b/scriptshifter/tables/__init__.py @@ -232,7 +232,7 @@ def load_hook_fn(cname, sec): for cfg_hook_fn in cfg_hook_fns: modname, fnname = path.splitext(cfg_hook_fn[0]) fnname = fnname.lstrip(".") - fn_kwargs = cfg_hook_fn[1] + fn_kwargs = cfg_hook_fn[1] if len(cfg_hook_fn) > 1 else {} try: fn = getattr(import_module( "." + modname, HOOK_PKG_PATH), fnname) diff --git a/scriptshifter/tables/data/arabic_ext.yml b/scriptshifter/tables/data/arabic_ext.yml new file mode 100644 index 0000000..535f30b --- /dev/null +++ b/scriptshifter/tables/data/arabic_ext.yml @@ -0,0 +1,12 @@ +# Arabic S2R using the 3rd-party ArabicTransliterator library: +# https://github.com/MTG/ArabicTransliterator + +general: + name: Arabic (ArabicTransliterator) + description: Arabic S2R using a 3rd party library. + +script_to_roman: + hooks: + post_config: + - + - arabic_ext.s2r_post_config diff --git a/scriptshifter/tables/data/index.yml b/scriptshifter/tables/data/index.yml index 647426c..a6d6b26 100644 --- a/scriptshifter/tables/data/index.yml +++ b/scriptshifter/tables/data/index.yml @@ -7,6 +7,9 @@ # key within the entry is the human-readable label that can be used in a # multiple-choice menu. +arabic_ext: + name: Arabic (S2R) + description: Arabic-to-Roman transliterator using the ArabicTransliterator external library. armenian: name: Armenian azerbaijani: