From 1a2dc2565b504055a33f1af0d15a333025bb5c94 Mon Sep 17 00:00:00 2001 From: scossu Date: Wed, 28 Feb 2024 08:38:25 -0500 Subject: [PATCH] Yiddish transliteration via submodules. --- .github/workflows/push-test-image.yml | 2 +- .gitmodules | 4 ++ Dockerfile | 17 ++++---- ext/yiddish | 1 + requirements.txt | 1 + scriptshifter/hooks/yiddish_/__init__.py | 51 ++++++++++++++++++++++++ scriptshifter/tables/data/index.yml | 2 + scriptshifter/tables/data/yiddish.yml | 21 ++++++++++ 8 files changed, 90 insertions(+), 9 deletions(-) create mode 160000 ext/yiddish create mode 100644 scriptshifter/hooks/yiddish_/__init__.py create mode 100644 scriptshifter/tables/data/yiddish.yml diff --git a/.github/workflows/push-test-image.yml b/.github/workflows/push-test-image.yml index 2bcc042..786b893 100644 --- a/.github/workflows/push-test-image.yml +++ b/.github/workflows/push-test-image.yml @@ -2,7 +2,7 @@ name: Push test image to Docker Hub. on: push: branch: - - "main" + - "test" env: DOCKER_USER: lcnetdev diff --git a/.gitmodules b/.gitmodules index 45cbf19..cc82df3 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,7 @@ [submodule "ext/arabic_rom"] path = ext/arabic_rom url = https://github.com/fadhleryani/Arabic_ALA-LC_Romanization.git +[submodule "ext/yiddish"] + path = ext/yiddish + url = https://github.com/ibleaman/yiddish.git + branch = loc diff --git a/Dockerfile b/Dockerfile index 1ff0a43..578751e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,14 +6,6 @@ RUN apt install -y build-essential tzdata gfortran libopenblas-dev libboost-all- ENV TZ=America/New_York ENV _workroot "/usr/local/scriptshifter/src" -WORKDIR ${_workroot} -COPY requirements.txt ./ -RUN pip install --no-cache-dir -r requirements.txt - -# Remove development packages. -RUN apt remove -y build-essential -RUN apt autoremove -y - RUN addgroup --system www RUN adduser --system www RUN gpasswd -a www www @@ -22,6 +14,15 @@ COPY entrypoint.sh uwsgi.ini wsgi.py ./ COPY ext ./ext/ COPY scriptshifter ./scriptshifter/ +WORKDIR ${_workroot} +COPY requirements.txt ./ +RUN pip install --no-cache-dir -r requirements.txt + +# Remove development packages. +RUN apt remove -y build-essential git +RUN apt autoremove -y +RUN rm -rf ext/yiddish + RUN chmod +x ./entrypoint.sh RUN chown -R www:www ${_workroot} . diff --git a/ext/yiddish b/ext/yiddish new file mode 160000 index 0000000..9bf22c5 --- /dev/null +++ b/ext/yiddish @@ -0,0 +1 @@ +Subproject commit 9bf22c55ca76710940e141de5d88922a9f55ed1f diff --git a/requirements.txt b/requirements.txt index 3136aa2..ccd5d4c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,4 @@ python-dotenv>=1.0,<2 pyyaml>=6.0,<7 repackage>=0.7.3 uwsgi>=2.0,<2.1 +./ext/yiddish diff --git a/scriptshifter/hooks/yiddish_/__init__.py b/scriptshifter/hooks/yiddish_/__init__.py new file mode 100644 index 0000000..22ca3a8 --- /dev/null +++ b/scriptshifter/hooks/yiddish_/__init__.py @@ -0,0 +1,51 @@ +# @package ext + +__doc__ = """ +Yiddish transliteration module. + +Courtesy of Isaac Bleaman and Asher Lewis. + +https://github.com/ibleaman/yiddish.git + +Note the underscore in the module name to disambiguate with the `yiddish` +external package name. +""" + + +from yiddish import detransliterate, transliterate + +from scriptshifter.exceptions import BREAK +from scriptshifter.tools import capitalize + + +def s2r_post_config(ctx): + """ + Script to Roman. + """ + + rom = transliterate( + ctx.src, loc=True, + loshn_koydesh=ctx.options.get("loshn_koydesh")) + + if ctx.options["capitalize"] == "all": + rom = capitalize(rom) + elif ctx.options["capitalize"] == "first": + rom = rom[0].upper() + rom[1:] + + ctx.dest = rom + + return BREAK + + +def r2s_post_config(ctx): + """ + Roman to script. + + NOTE: This doesn't support the `loc` option. + """ + + ctx.dest = detransliterate( + ctx.src, + loshn_koydesh=ctx.options.get("loshn_koydesh")) + + return BREAK diff --git a/scriptshifter/tables/data/index.yml b/scriptshifter/tables/data/index.yml index f6d89ac..889ba8f 100644 --- a/scriptshifter/tables/data/index.yml +++ b/scriptshifter/tables/data/index.yml @@ -154,5 +154,7 @@ uzbek_cyrillic: name: Uzbek (Cyrillic) yakut_cyrillic: name: Yakut (Cyrillic) +yiddish: + name: Yiddish yuit_cyrillic: name: Yuit (Cyrillic) diff --git a/scriptshifter/tables/data/yiddish.yml b/scriptshifter/tables/data/yiddish.yml new file mode 100644 index 0000000..c55c431 --- /dev/null +++ b/scriptshifter/tables/data/yiddish.yml @@ -0,0 +1,21 @@ +general: + name: Yiddish + +options: + - id: loshn_koydesh + label: Loshn Koydesh + description: [TODO] + type: boolean + default: false + +script_to_roman: + hooks: + post_config: + - + - yiddish_.s2r_post_config + +roman_to_script: + hooks: + post_config: + - + - yiddish_.r2s_post_config