diff --git a/scriptshifter/tables/data/arabic.yml b/scriptshifter/tables/data/arabic.yml index 5e76b9b..ddcbe19 100644 --- a/scriptshifter/tables/data/arabic.yml +++ b/scriptshifter/tables/data/arabic.yml @@ -4,9 +4,384 @@ --- general: name: Arabic - description: Arabic S2R using a 3rd party library. + description: Arabic R2S using a conversion table and S2R using a 3rd party library. case_sensitive: false + parents: + - _ignore_base + + +roman_to_script: + map: + + # Original table by David Bucknum + # Last updated 25 January 2019 + # Modified by WK with testing by Arabic Cat Staff LOC-CAIRO + # Additional info from R. Vassie, [n.d.] "Marrying the Arabic and Latin Scripts Conceptually" + + + # Punctuation marks: + "*": "\u066D" + ",": "\u060C" + ";": "\u061B" + "?": "\u061F" + + # Exceptions for specific words + # Allah + "Alla\u0304h": "\u0627\u0644\u0644\u0647" + + # Qur'an + "Qur\u02BCa\u0304n": "\u0642\u0631\u0622\u0646" + + # lillah + "lilla\u0304h": "\u0644\u0644\u0647" + + # billah + "billa\u0304h": "\u0628\u0644\u0644\u0647" + + # Rahman + "Rah\u0323ma\u0304n": "\u0631\u062D\u0645\u0646" + + # Ruwat + "Ruwa\u0304t": "\u0631\u0648\u0627\u0629" + "ruwa\u0304t": "\u0631\u0648\u0627\u0629" + + # Hadha + "Ha\u0304dha\u0304": "\u0647\u0630\u0627" + "ha\u0304dha\u0304": "\u0647\u0630\u0627" + + # Hadhihi + "Ha\u0304dhi\u0304hi": "\u0647\u0630\u0647" + "ha\u0304dhi\u0304hi": "\u0647\u0630\u0647" + + # dhalika + "dha\u0304lika": "\u0630\u0644\u0643" + + # Ibn when it appears in the middle of a name sequence + "ibn": "\u0628\u0646" + + # H[dot below]aya[macron]t + "h\u0323aya\u0304t": "\u062D\u064A\u0627\u0629" + "H\u0323aya\u0304t": "\u062D\u064A\u0627\u0629" + + # "sh[dot below] as in "Ishaq" + + "%sh\u0323%": "\u0633\u062D" + + # "s[prime]h" combos + + "%s\u02B9h%": "\u0633\u0647" + + # "th[dot below]" + + "%th\u0323%": "\u062A\u062D" + + # dh[dot under] + + "%dh\u0323%": "\u062F\u062D" + + # La-hu + + "la-hu": "\u0644\u0647" + + # Mi'ah + "Mi\u02BEah": "\u0645\u0627\u0626\u0629" + "Mi\u02BCah": "\u0645\u0627\u0626\u0629" + "mi\u02BEah": "\u0645\u0627\u0626\u0629" + "mi\u02BCah": "\u0645\u0627\u0626\u0629" + + # Mi'at + "Mi\u02BEat": "\u0645\u0627\u0626\u0629" + "Mi\u02BCat": "\u0645\u0627\u0626\u0629" + "mi\u02BEat": "\u0645\u0627\u0626\u0629" + "mi\u02BCat": "\u0645\u0627\u0626\u0629" + + # Numbers (I have set these to Hindi numbers. Note that Persian and Urdu will technically use \u06F0-06F9. This needs further discussion with PSD as RLIN21 used Hindi numbers, Connexion and Voyager does not.) + + # Edition statements with Latin number + "al-T\u0323ab\u02BBah 1": "\u0627\u0644\u0637\u0628\u0639\u0629 1" + "al-T\u0323ab\u02BBah 2": "\u0627\u0644\u0637\u0628\u0639\u0629 2" + "al-T\u0323ab\u02BBah 3": "\u0627\u0644\u0637\u0628\u0639\u0629 3" + "al-T\u0323ab\u02BBah 4": "\u0627\u0644\u0637\u0628\u0639\u0629 4" + "al-T\u0323ab\u02BBah 5": "\u0627\u0644\u0637\u0628\u0639\u0629 5" + "al-T\u0323ab\u02BBah 6": "\u0627\u0644\u0637\u0628\u0639\u0629 6" + "al-T\u0323ab\u02BBah 7": "\u0627\u0644\u0637\u0628\u0639\u0629 7" + "al-T\u0323ab\u02BBah 8": "\u0627\u0644\u0637\u0628\u0639\u0629 8" + "al-T\u0323ab\u02BBah 9": "\u0627\u0644\u0637\u0628\u0639\u0629 9" + + # Use Basic Arabic-Indic \u0660-0669 + "0": "\u0660" + "1": "\u0661" + "2": "\u0662" + "3": "\u0663" + "4": "\u0664" + "5": "\u0665" + "6": "\u0666" + "7": "\u0667" + "8": "\u0668" + "9": "\u0669" + + # Hyphenated prefixes: + "wa-": "\u0648" + "bi-": "\u0628" + "al-": "\u0627\u0644" + "lil-": "\u0644\u0644" + "li-": "\u0644" + "la\u0304-": "\u0644" + "fi\u0304-": "\u0641\u064A" + "ka-": "\u0643" + + # Vowels and vowel/consonant combinations + "%ah": "\u0629" + "%at": "\u0629" + + #tanwin + "%an": "\u0627" + + # ayn-alif combo + "%\u02BBa\u0304\u02BE": "\u0639\u0627\u0621" + "%\u02BBa\u0304\u02BC": "\u0639\u0627\u0621" + + "\u02BBA\u0304": "\u0639\u0627" + "\u02BBa\u0304": "\u0639\u0627" + + "\u02BBI\u0304": "\u0639\u064A" + "\u02BBi\u0304": "\u0639\u064A" + + "\u02BBU\u0304": "\u0639\u0648" + "\u02BBu\u0304": "\u0639\u0648" + "\u02BBU": "\u0639" + "\u02BBu": "\u0639" + + "\u02BBA%": "\u0639" + #"\u02BBa%": "\u0639" + + # alif and hamzas for all occasions + + # truncation necessary? It seems to work fine with. + + "%i\u0304\u02BEah": "\u064A\u0626\u0629" + "%i\u0304\u02BCah": "\u064A\u0626\u0629" + + "%i\u0304\u02BEat": "\u064A\u0626\u0629" + "%i\u0304\u02BCat": "\u064A\u0626\u0629" + + "%i\u02BEa\u0304": "\u0626\u0627" + "%i\u02BCa\u0304": "\u0626\u0627" + + "%i\u02BE": "\u0626" + "%i\u02BC": "\u0626" + "a\u0304\u02BEa\u0304": "\u0627\u0621\u0627" + "a\u0304\u02BCa\u0304": "\u0627\u0621\u0627" + + "a\u02BE": "\u0623" + "a\u02BC": "\u0623" + "\u02BEi": "\u0626" + "\u02BCi": "\u0626" + "\u02BEa\u0304": "\u0622" + "\u02BCa\u0304": "\u0622" + "\u02BEa": "\u0623" + "\u02BCa": "\u0623" + + "y\u02BCah": "\u064A\u0626\u0629" + "y\u02BEah": "\u064A\u0626\u0629" + + "y\u02BCat": "\u064A\u0626\u0629" + "y\u02BEat": "\u064A\u0626\u0629" + + # A + + "a\u0304\u02BCi\u0304": "\u0627\u0626\u064A" + "a\u0304\u02BEi\u0304": "\u0627\u0626\u064A" + + "a\u0304\u02BCi": "\u0627\u0626" + "a\u0304\u02BEi": "\u0627\u0626" + "a\u0304\u02BC": "\u0627\u0621" + "a\u0304\u02BE": "\u0627\u0621" + "A\u0304%": "\u0622" + "a\u0304%": "\u0622" + "A\u0304": "\u0627" + "a\u0304": "\u0627" + + # These next two lines were intended to convert to alif-ayn when it is at the beginning of a word, definite or indefinine (i.e. al-a[ayn]ma[macron]l or [space]a[ayn]ma[macron]l" + "A\u02BB%": "\u0623\u0639" + "a\u02BB%": "\u0623\u0639" + "a\u02BB": "\u0639" + "A\u0301": "\u0649" + "a\u0301": "\u0649" + + "ayy": "\u064A" + "A%": "\u0623" + "a%": "\u0627" + "A": "\u0623" + "a": "" + + # I - Capital I at beginning of word is usually alif hamzah-below. + + "%i\u0304": "\u064A" + "i\u0304y": "\u064A" + "iy": "\u064A" + "I\u0304%": "\u0625\u064A" + "i\u0304": "\u064A" + "\u02BBI%": "\u0639" + + #"i\u02BB": "\u0625\u0639" + + "I\u02BE": "\u0627\u0626" + "I\u02BC": "\u0627\u0626" + "i\u02BE": "\u0626" + "i\u02BC": "\u0627\u0626" + + "I%": "\u0625" + "i%": "\u0625" + "I": "\u0625" + "i": "" + + # U + + "u\u0304\u02BE": "\u0624" + "u\u0304\u02BC": "\u0624" + "U\u0304w%": "\u0623\u0648" + "u\u0304w%": "\u0623\u0648" + "U\u0304%": "\u0623\u0648" + "u\u0304%": "\u0623\u0648" + "u\u0304w": "\u0648" + "u\u0304": "\u0648" + "u\u02BE": "\u0624" + "u\u02BC": "\u0624" + + "U%": "\u0623" + "u%": "\u0623" + "U": "\u0623" + "u": "" + + # Consonants, with tashdid added + + "B": "\u0628" + "bb": "\u0628" + "b": "\u0628" + "Th": "\u062B" + "thth": "\u062B" + "th": "\u062B" + "T\u0323": "\u0637" + "t\u0323t\u0323": "\u0637" + "t\u0323": "\u0637" + "T": "\u062A" + "tt": "\u062A" + "t": "\u062A" + "J": "\u062C" + "jj": "\u062C" + "j": "\u062C" + "H\u0323": "\u062D" + "h\u0323h\u0323": "\u062D" + "h\u0323": "\u062D" + "H": "\u0647" + "hh": "\u0647" + "h": "\u0647" + "Kh": "\u062E" + "khkh": "\u062E" + "kh": "\u062E" + "K": "\u0643" + "kk": "\u0643" + "k": "\u0643" + "Dh": "\u0630" + "dhdh": "\u0630" + "dh": "\u0630" + "D\u0323": "\u0636" + "d\u0323d\u0323": "\u0636" + "d\u0323": "\u0636" + "D": "\u062F" + "dd": "\u062F" + "d": "\u062F" + "R": "\u0631" + "rr": "\u0631" + "r": "\u0631" + "Z\u0323": "\u0638" + "z\u0323z\u0323": "\u0638" + "z\u0323": "\u0638" + "Z": "\u0632" + "zz": "\u0632" + "z": "\u0632" + "Sh": "\u0634" + "shsh": "\u0634" + "sh": "\u0634" + "S\u0323": "\u0635" + "s\u0323s\u0323": "\u0635" + "s\u0323": "\u0635" + "S": "\u0633" + "ss": "\u0633" + "s": "\u0633" + "Gh": "\u063A" + "ghgh": "\u063A" + "gh": "\u063A" + "F": "\u0641" + "ff": "\u0641" + "f": "\u0641" + "Q": "\u0642" + "qq": "\u0642" + "q": "\u0642" + "L": "\u0644" + "ll": "\u0644" + "l": "\u0644" + "M": "\u0645" + "mm": "\u0645" + "m": "\u0645" + "N": "\u0646" + "nn": "\u0646" + "n": "\u0646" + "W": "\u0648" + "ww": "\u0648" + "w": "\u0648" + "Y": "\u064A" + "yy": "\u064A" + "y": "\u064A" + + # non-Arabic consonants: + "P": "\u067E" + "p": "\u067E" + "Ch": "\u0686" + "ch": "\u0686" + "V": "\u06A4" + "v": "\u06A4" + "G": "\u06AF" + "g": "\u06AF" + + # Diacritic characters: + # ain (\u0639) - not transliterated alone: + "\u02BB": "\u0639" + # hamza - not romanized + # "\u0621" + # hamza (alone in final position) + "%\u02BE": "\u0621" + "%\u02BC": "\u0621" + + # Do not know what, if anything, is needed here: + # tatweel: + # "\u0640" + # fathatan: + # "\u064B" + # dammatan: + # "\u064C" + # kasratan: + # "\u064D" + # fatha: + # "\u064E" + # damma: + # "\u064F" + # kasra: + # "\u0650" + # shadda: + # "\u0651" + # sukun: + # "\u0652" + # superscript alef: + # "\u0670" + # alef wasla + # "\u0671" + + + + script_to_roman: hooks: post_config: diff --git a/scriptshifter/tables/index.yml b/scriptshifter/tables/index.yml index 2e428ce..f0e0f4d 100644 --- a/scriptshifter/tables/index.yml +++ b/scriptshifter/tables/index.yml @@ -19,7 +19,7 @@ arabic: Arabic-to-Roman transliterator using the ArabicTransliterator external library. marc_code: ara - name: Arabic (S2R) + name: Arabic armenian: marc_code: arm name: Armenian diff --git a/uwsgi.ini b/uwsgi.ini index a467469..4c2d1b3 100644 --- a/uwsgi.ini +++ b/uwsgi.ini @@ -9,3 +9,4 @@ logger = errorlog file:/dev/stderr log-route = errorlog (HTTP/1.\d 50) uid = www gid = www +buffer-size = 16384