Skip to content

Commit

Permalink
Merge branch 'uwsgi_header_size' into test
Browse files Browse the repository at this point in the history
  • Loading branch information
scossu committed Dec 25, 2024
2 parents 57d3df9 + 08ad739 commit 44397bf
Show file tree
Hide file tree
Showing 3 changed files with 378 additions and 2 deletions.
377 changes: 376 additions & 1 deletion scriptshifter/tables/data/arabic.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,384 @@
---
general:
name: Arabic
description: Arabic S2R using a 3rd party library.
description: Arabic R2S using a conversion table and S2R using a 3rd party library.
case_sensitive: false

parents:
- _ignore_base


roman_to_script:
map:

# Original table by David Bucknum
# Last updated 25 January 2019
# Modified by WK with testing by Arabic Cat Staff LOC-CAIRO
# Additional info from R. Vassie, [n.d.] "Marrying the Arabic and Latin Scripts Conceptually"


# Punctuation marks:
"*": "\u066D"
",": "\u060C"
";": "\u061B"
"?": "\u061F"

# Exceptions for specific words
# Allah
"Alla\u0304h": "\u0627\u0644\u0644\u0647"

# Qur'an
"Qur\u02BCa\u0304n": "\u0642\u0631\u0622\u0646"

# lillah
"lilla\u0304h": "\u0644\u0644\u0647"

# billah
"billa\u0304h": "\u0628\u0644\u0644\u0647"

# Rahman
"Rah\u0323ma\u0304n": "\u0631\u062D\u0645\u0646"

# Ruwat
"Ruwa\u0304t": "\u0631\u0648\u0627\u0629"
"ruwa\u0304t": "\u0631\u0648\u0627\u0629"

# Hadha
"Ha\u0304dha\u0304": "\u0647\u0630\u0627"
"ha\u0304dha\u0304": "\u0647\u0630\u0627"

# Hadhihi
"Ha\u0304dhi\u0304hi": "\u0647\u0630\u0647"
"ha\u0304dhi\u0304hi": "\u0647\u0630\u0647"

# dhalika
"dha\u0304lika": "\u0630\u0644\u0643"

# Ibn when it appears in the middle of a name sequence
"ibn": "\u0628\u0646"

# H[dot below]aya[macron]t
"h\u0323aya\u0304t": "\u062D\u064A\u0627\u0629"
"H\u0323aya\u0304t": "\u062D\u064A\u0627\u0629"

# "sh[dot below] as in "Ishaq"

"%sh\u0323%": "\u0633\u062D"

# "s[prime]h" combos

"%s\u02B9h%": "\u0633\u0647"

# "th[dot below]"

"%th\u0323%": "\u062A\u062D"

# dh[dot under]

"%dh\u0323%": "\u062F\u062D"

# La-hu

"la-hu": "\u0644\u0647"

# Mi'ah
"Mi\u02BEah": "\u0645\u0627\u0626\u0629"
"Mi\u02BCah": "\u0645\u0627\u0626\u0629"
"mi\u02BEah": "\u0645\u0627\u0626\u0629"
"mi\u02BCah": "\u0645\u0627\u0626\u0629"

# Mi'at
"Mi\u02BEat": "\u0645\u0627\u0626\u0629"
"Mi\u02BCat": "\u0645\u0627\u0626\u0629"
"mi\u02BEat": "\u0645\u0627\u0626\u0629"
"mi\u02BCat": "\u0645\u0627\u0626\u0629"

# Numbers (I have set these to Hindi numbers. Note that Persian and Urdu will technically use \u06F0-06F9. This needs further discussion with PSD as RLIN21 used Hindi numbers, Connexion and Voyager does not.)

# Edition statements with Latin number
"al-T\u0323ab\u02BBah 1": "\u0627\u0644\u0637\u0628\u0639\u0629 1"
"al-T\u0323ab\u02BBah 2": "\u0627\u0644\u0637\u0628\u0639\u0629 2"
"al-T\u0323ab\u02BBah 3": "\u0627\u0644\u0637\u0628\u0639\u0629 3"
"al-T\u0323ab\u02BBah 4": "\u0627\u0644\u0637\u0628\u0639\u0629 4"
"al-T\u0323ab\u02BBah 5": "\u0627\u0644\u0637\u0628\u0639\u0629 5"
"al-T\u0323ab\u02BBah 6": "\u0627\u0644\u0637\u0628\u0639\u0629 6"
"al-T\u0323ab\u02BBah 7": "\u0627\u0644\u0637\u0628\u0639\u0629 7"
"al-T\u0323ab\u02BBah 8": "\u0627\u0644\u0637\u0628\u0639\u0629 8"
"al-T\u0323ab\u02BBah 9": "\u0627\u0644\u0637\u0628\u0639\u0629 9"

# Use Basic Arabic-Indic \u0660-0669
"0": "\u0660"
"1": "\u0661"
"2": "\u0662"
"3": "\u0663"
"4": "\u0664"
"5": "\u0665"
"6": "\u0666"
"7": "\u0667"
"8": "\u0668"
"9": "\u0669"

# Hyphenated prefixes:
"wa-": "\u0648"
"bi-": "\u0628"
"al-": "\u0627\u0644"
"lil-": "\u0644\u0644"
"li-": "\u0644"
"la\u0304-": "\u0644"
"fi\u0304-": "\u0641\u064A"
"ka-": "\u0643"

# Vowels and vowel/consonant combinations
"%ah": "\u0629"
"%at": "\u0629"

#tanwin
"%an": "\u0627"

# ayn-alif combo
"%\u02BBa\u0304\u02BE": "\u0639\u0627\u0621"
"%\u02BBa\u0304\u02BC": "\u0639\u0627\u0621"

"\u02BBA\u0304": "\u0639\u0627"
"\u02BBa\u0304": "\u0639\u0627"

"\u02BBI\u0304": "\u0639\u064A"
"\u02BBi\u0304": "\u0639\u064A"

"\u02BBU\u0304": "\u0639\u0648"
"\u02BBu\u0304": "\u0639\u0648"
"\u02BBU": "\u0639"
"\u02BBu": "\u0639"

"\u02BBA%": "\u0639"
#"\u02BBa%": "\u0639"

# alif and hamzas for all occasions

# truncation necessary? It seems to work fine with.

"%i\u0304\u02BEah": "\u064A\u0626\u0629"
"%i\u0304\u02BCah": "\u064A\u0626\u0629"

"%i\u0304\u02BEat": "\u064A\u0626\u0629"
"%i\u0304\u02BCat": "\u064A\u0626\u0629"

"%i\u02BEa\u0304": "\u0626\u0627"
"%i\u02BCa\u0304": "\u0626\u0627"

"%i\u02BE": "\u0626"
"%i\u02BC": "\u0626"
"a\u0304\u02BEa\u0304": "\u0627\u0621\u0627"
"a\u0304\u02BCa\u0304": "\u0627\u0621\u0627"

"a\u02BE": "\u0623"
"a\u02BC": "\u0623"
"\u02BEi": "\u0626"
"\u02BCi": "\u0626"
"\u02BEa\u0304": "\u0622"
"\u02BCa\u0304": "\u0622"
"\u02BEa": "\u0623"
"\u02BCa": "\u0623"

"y\u02BCah": "\u064A\u0626\u0629"
"y\u02BEah": "\u064A\u0626\u0629"

"y\u02BCat": "\u064A\u0626\u0629"
"y\u02BEat": "\u064A\u0626\u0629"

# A

"a\u0304\u02BCi\u0304": "\u0627\u0626\u064A"
"a\u0304\u02BEi\u0304": "\u0627\u0626\u064A"

"a\u0304\u02BCi": "\u0627\u0626"
"a\u0304\u02BEi": "\u0627\u0626"
"a\u0304\u02BC": "\u0627\u0621"
"a\u0304\u02BE": "\u0627\u0621"
"A\u0304%": "\u0622"
"a\u0304%": "\u0622"
"A\u0304": "\u0627"
"a\u0304": "\u0627"

# These next two lines were intended to convert to alif-ayn when it is at the beginning of a word, definite or indefinine (i.e. al-a[ayn]ma[macron]l or [space]a[ayn]ma[macron]l"
"A\u02BB%": "\u0623\u0639"
"a\u02BB%": "\u0623\u0639"
"a\u02BB": "\u0639"
"A\u0301": "\u0649"
"a\u0301": "\u0649"

"ayy": "\u064A"
"A%": "\u0623"
"a%": "\u0627"
"A": "\u0623"
"a": ""

# I - Capital I at beginning of word is usually alif hamzah-below.

"%i\u0304": "\u064A"
"i\u0304y": "\u064A"
"iy": "\u064A"
"I\u0304%": "\u0625\u064A"
"i\u0304": "\u064A"
"\u02BBI%": "\u0639"

#"i\u02BB": "\u0625\u0639"

"I\u02BE": "\u0627\u0626"
"I\u02BC": "\u0627\u0626"
"i\u02BE": "\u0626"
"i\u02BC": "\u0627\u0626"

"I%": "\u0625"
"i%": "\u0625"
"I": "\u0625"
"i": ""

# U

"u\u0304\u02BE": "\u0624"
"u\u0304\u02BC": "\u0624"
"U\u0304w%": "\u0623\u0648"
"u\u0304w%": "\u0623\u0648"
"U\u0304%": "\u0623\u0648"
"u\u0304%": "\u0623\u0648"
"u\u0304w": "\u0648"
"u\u0304": "\u0648"
"u\u02BE": "\u0624"
"u\u02BC": "\u0624"

"U%": "\u0623"
"u%": "\u0623"
"U": "\u0623"
"u": ""

# Consonants, with tashdid added

"B": "\u0628"
"bb": "\u0628"
"b": "\u0628"
"Th": "\u062B"
"thth": "\u062B"
"th": "\u062B"
"T\u0323": "\u0637"
"t\u0323t\u0323": "\u0637"
"t\u0323": "\u0637"
"T": "\u062A"
"tt": "\u062A"
"t": "\u062A"
"J": "\u062C"
"jj": "\u062C"
"j": "\u062C"
"H\u0323": "\u062D"
"h\u0323h\u0323": "\u062D"
"h\u0323": "\u062D"
"H": "\u0647"
"hh": "\u0647"
"h": "\u0647"
"Kh": "\u062E"
"khkh": "\u062E"
"kh": "\u062E"
"K": "\u0643"
"kk": "\u0643"
"k": "\u0643"
"Dh": "\u0630"
"dhdh": "\u0630"
"dh": "\u0630"
"D\u0323": "\u0636"
"d\u0323d\u0323": "\u0636"
"d\u0323": "\u0636"
"D": "\u062F"
"dd": "\u062F"
"d": "\u062F"
"R": "\u0631"
"rr": "\u0631"
"r": "\u0631"
"Z\u0323": "\u0638"
"z\u0323z\u0323": "\u0638"
"z\u0323": "\u0638"
"Z": "\u0632"
"zz": "\u0632"
"z": "\u0632"
"Sh": "\u0634"
"shsh": "\u0634"
"sh": "\u0634"
"S\u0323": "\u0635"
"s\u0323s\u0323": "\u0635"
"s\u0323": "\u0635"
"S": "\u0633"
"ss": "\u0633"
"s": "\u0633"
"Gh": "\u063A"
"ghgh": "\u063A"
"gh": "\u063A"
"F": "\u0641"
"ff": "\u0641"
"f": "\u0641"
"Q": "\u0642"
"qq": "\u0642"
"q": "\u0642"
"L": "\u0644"
"ll": "\u0644"
"l": "\u0644"
"M": "\u0645"
"mm": "\u0645"
"m": "\u0645"
"N": "\u0646"
"nn": "\u0646"
"n": "\u0646"
"W": "\u0648"
"ww": "\u0648"
"w": "\u0648"
"Y": "\u064A"
"yy": "\u064A"
"y": "\u064A"

# non-Arabic consonants:
"P": "\u067E"
"p": "\u067E"
"Ch": "\u0686"
"ch": "\u0686"
"V": "\u06A4"
"v": "\u06A4"
"G": "\u06AF"
"g": "\u06AF"

# Diacritic characters:
# ain (\u0639) - not transliterated alone:
"\u02BB": "\u0639"
# hamza - not romanized
# "\u0621"
# hamza (alone in final position)
"%\u02BE": "\u0621"
"%\u02BC": "\u0621"

# Do not know what, if anything, is needed here:
# tatweel:
# "\u0640"
# fathatan:
# "\u064B"
# dammatan:
# "\u064C"
# kasratan:
# "\u064D"
# fatha:
# "\u064E"
# damma:
# "\u064F"
# kasra:
# "\u0650"
# shadda:
# "\u0651"
# sukun:
# "\u0652"
# superscript alef:
# "\u0670"
# alef wasla
# "\u0671"




script_to_roman:
hooks:
post_config:
Expand Down
2 changes: 1 addition & 1 deletion scriptshifter/tables/index.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ arabic:
Arabic-to-Roman transliterator using the ArabicTransliterator external
library.
marc_code: ara
name: Arabic (S2R)
name: Arabic
armenian:
marc_code: arm
name: Armenian
Expand Down
1 change: 1 addition & 0 deletions uwsgi.ini
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ logger = errorlog file:/dev/stderr
log-route = errorlog (HTTP/1.\d 50)
uid = www
gid = www
buffer-size = 16384

0 comments on commit 44397bf

Please sign in to comment.