Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge markups #90

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
14 changes: 13 additions & 1 deletion grammars/inventory.grm
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ oral_vowel = "a" | "e" | "i" | "o" | "u";
nasal_vowel = "ã" | "ẽ" | "ĩ" | "õ" | "ũ";
export SHORT_VOWEL = Optimize[oral_vowel | nasal_vowel];
export LONG_VOWEL = SHORT_VOWEL "ː";
export PHONEMIC_VOWEL = SHORT_VOWEL | LONG_VOWEL;
export PHONEMIC_VOWEL = SHORT_VOWEL | LONG_VOWEL | "ü";

# Phonemic consonants.
export VOICELESS_STOP = Optimize["p" | "t" | "k"];
Expand All @@ -48,6 +48,18 @@ export PHONEME = Optimize[
"o" | "oː" | "õː" | "p" | "r" | "s" | "t" |
"u" | "uː" | "ũː" | "w" | "z" | b.kSpace];

export STRIKETHROUGH = Optimize[
"a̶ ̶" | "e̶ ̶" | "i̶ ̶" | "o̶ ̶" | "u̶ ̶" | "ã̶ː̶ ̶" |
"ẽ̶ː̶ ̶" | "ĩ̶ː̶ ̶" | "õ̶ː̶ ̶" | "ũ̶ː̶ ̶" | "ü̶ː̶ " |
"a̶ː̶ ̶" | "e̶ː̶ ̶" | "i̶ː̶ ̶" | "o̶ː̶ ̶" | "u̶ː̶ ̶" |
"ü̶m̶ ̶" | "o̶j̶ ̶" | "a̶j̶ ̶" | "a̶w̶ ̶" | "h̶"];

export UMLAUT = Optimize["ü" | "ï"];

export MARKUPS = Optimize[
STRIKETHROUGH | UMLAUT | "‿" | "͡"
];

# Prosodic symbols.
export NUCLEUS = Optimize["U" | # Short nucleus.
"-"]; # Long nucleus.
Expand Down
59 changes: 56 additions & 3 deletions grammars/syllable.grm
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,29 @@ import 'inventory.grm' as i;
import 'byte.grm' as b;
import 'utility.grm' as u;

sigma_star = (i.PROSODIC_SYMBOL | i.PHONEME)*;
sigma_star = (i.PROSODIC_SYMBOL | i.PHONEME | i.MARKUPS)*;

insert_space_resyllabification = Optimize[CDRewrite[u.Insert[" "],
"", i.CONSONANT "‿", sigma_star]];

# Removes markups for elision and resyllabification.
delete_strikethrough_undertie = Optimize[u.Rewrite[u.Delete[i.STRIKETHROUGH | "‿"],
sigma_star]];

# Removes the ligature tie and rewrites "i" or "u" to "j" or "w," respectively.
synizesis_cleanup = Optimize[
CDRewrite[(("i" | "iː") : "j") |
(("u" | "uː") : "w"), "", "͡" i.PHONEMIC_VOWEL, sigma_star] @
CDRewrite[(("i" | "iː") : "j") |
(("u" | "uː") : "w"), i.PHONEMIC_VOWEL "͡", "", sigma_star] @
u.Rewrite[u.Delete["͡"], sigma_star]
];

## Syllable parsing.

long_nucleus = Optimize[u.Rewrite[i.LONG_VOWEL : "-", sigma_star]];

short_nucleus = Optimize[u.Rewrite[i.SHORT_VOWEL : "U", sigma_star]];
short_nucleus = Optimize[u.Rewrite[i.SHORT_VOWEL | i.UMLAUT : "U", sigma_star]];

obstruent = i.STOP | "f";
muta_cum_liquida = (obstruent i.LIQUID) - ("tl" | "dl");
Expand All @@ -27,6 +43,10 @@ onset = Optimize[
i.BOW,
i.NUCLEUS,
sigma_star] @

# Resyllabification consonants.
CDRewrite[i.CONSONANT : "O", "", "‿", sigma_star] @

# Mandatorily splits medial "tl" and "dl" clusters into coda/onset.
CDRewrite["l" : "O", "t" | "d", i.NUCLEUS, sigma_star] @
# Optionally (and with non-zero cost) splits other muta cum liquida clusters
Expand All @@ -51,7 +71,9 @@ onset = Optimize[

coda = Optimize[u.Rewrite[i.CONSONANT+ : "C", sigma_star]];

export SYLLABLE = Optimize[long_nucleus @ short_nucleus @ onset @ coda];
export SYLLABLE = Optimize[insert_space_resyllabification @
delete_strikethrough_undertie @ synizesis_cleanup @
long_nucleus @ short_nucleus @ onset @ coda];

## Tests syllable parsing.
test_syllable_1 = AssertEqual[
Expand Down Expand Up @@ -105,3 +127,34 @@ test_ks_2 = AssertEqual[
"uːnius ob noksãː et furiaːs ajjaːkis oiːleiː" @ SYLLABLE,
"-OUUC UC OUCO- UC OUOU-C UCO-OUC U-OU-"
];

## Tests syllable parsing on variable rule markups.

test_syllable_resyllabify = AssertEqual[
"ab‿oris" @ SYLLABLE,
"U OUOUC"
];
test_syllable_elision = AssertEqual[
"multũ̶ː̶ ̶ille" @ SYLLABLE,
"OUCOUCOU"
];
test_syllable_h_deletion = AssertEqual[
"fuit‿h̶ok" @ SYLLABLE,
"OUU OUC"
];
test_syllable_synizesis_1 = AssertEqual[
"su͡aːsit" @ SYLLABLE,
"O-OUC"
];
test_syllable_synizesis_2 = AssertEqual[
"laːwiːni͡akwe" @ SYLLABLE,
"O-O-COUOU"
];
test_syllable_synizesis_3 = AssertEqual[
"awre͡iːs" @ SYLLABLE,
"UCOUC"
];
test_diaeresis_1 = AssertEqual[
"troːïa" @ SYLLABLE,
"O-UU"
];
62 changes: 41 additions & 21 deletions grammars/variable.grm
Original file line number Diff line number Diff line change
@@ -1,51 +1,71 @@
# Implements variable sandhi phenomena, such as elision, resyllabification,
# diaeresis, synizesis, hypermetric lengthening, etc.
# diaeresis, and synizesis.

import 'byte.grm' as b;
import 'inventory.grm' as i;
import 'utility.grm' as u;

sigma_star = (i.PROSODIC_SYMBOL | i.PHONEME)*;
sigma_star = (i.PROSODIC_SYMBOL | i.PHONEME | i.MARKUPS)*;

h_deletion = Optimize[
CDRewrite[u.Delete["h"] <10>, "", "", sigma_star, 'ltr', 'opt']
# Delete medial "h" to permit additional variable rules, if needed (such as synizesis).
CDRewrite[u.Delete["h"] <10>,
i.PHONEME - b.kSpace, i.PHONEME - b.kSpace, sigma_star, 'ltr', 'opt'] @
CDRewrite[("h" : "h̶" <10>),
"", "", sigma_star, 'ltr', 'opt']
];

# A final syllable ending in a vowel, letter m, or diphthong is removed
# before a word beginning with a vowel (or an h-).
elision = Optimize[
CDRewrite[u.Delete[(i.PHONEMIC_VOWEL | "oj" | "aj" | "aw") " "] <1000>,
"", i.PHONEMIC_VOWEL, sigma_star, 'ltr', 'opt']
];
elision = Optimize[CDRewrite[(("a " : "a̶ ̶") |
("e " : "e̶ ̶") |
("i " : "i̶ ̶") |
("o " : "o̶ ̶") |
("u " : "u̶ ̶") |
("ãː " : "ã̶ː̶ ̶") |
("ẽː " : "ẽ̶ː̶ ̶") |
("ĩː " : "ĩ̶ː̶ ̶") |
("õː " : "õ̶ː̶ ̶") |
("ũː " : "ũ̶ː̶ ̶") |
("aː " : "a̶ː̶ ̶") |
("eː " : "e̶ː̶ ̶") |
("iː " : "i̶ː̶ ̶") |
("oː " : "o̶ː̶ ̶") |
("uː " : "u̶ː̶ ̶") |
("üm " : "ü̶m̶ ̶") |
("oj " : "o̶j̶ ̶") |
("aj " : "a̶j̶ ̶") |
("aw " : "a̶w̶ ̶")) <1000>
, "", "h̶"? i.PHONEMIC_VOWEL, sigma_star, 'ltr', 'opt']];

# Word-final consonant reattaches to the following word with an initial vowel.
# TODO: orthographically annotate using "‿".
resyllabification = Optimize[
CDRewrite[u.Insert[" "] <100>,
"", i.CONSONANT " " i.PHONEMIC_VOWEL, sigma_star, 'ltr', 'opt'] @
CDRewrite[u.Delete[" "] <100>,
" " i.CONSONANT, i.PHONEMIC_VOWEL, sigma_star]
CDRewrite[(" " : "‿") <100>,
i.CONSONANT, "h̶"? i.PHONEMIC_VOWEL, sigma_star, 'ltr', 'opt']
];

# High vowels may strengthen to their corresponding glides when adjacent to
# another vowel.
synizesis = Optimize[
CDRewrite[((("i" | "iː") : "j") | (("u" | "uː") : "w")) <10000>,
"", i.PHONEMIC_VOWEL, sigma_star, 'ltr', 'opt'] @
CDRewrite[((("i" | "iː") : "j") | (("u" | "uː") : "w")) <10000>,
i.PHONEMIC_VOWEL, "", sigma_star, 'ltr', 'opt']
CDRewrite[u.Insert["͡"] <10000>,
("i" | "iː" | "u" | "uː") , i.PHONEMIC_VOWEL, sigma_star, 'ltr', 'opt'] @
CDRewrite[u.Insert["͡"] <10000>,
i.PHONEMIC_VOWEL, ("i" | "iː" | "u" | "uː"), sigma_star, 'ltr', 'opt']
];

# Glides may lenite to their corresponding vowels.
# Glides may lenite to their corresponding vowels.
diaeresis = Optimize[
CDRewrite[("w" : "u") <10000>,
CDRewrite[("w" : "ü") <1000>,
"k" | "g", "", sigma_star, 'ltr', 'opt'] @
CDRewrite[("jj" : "i") <10000>,
# "aw" diphthongs preceding final m may lenite.
CDRewrite[("w" : "ü") <1000>,
"", "m", sigma_star, 'ltr', 'opt'] @
CDRewrite[("jj" : "ï") <1000>,
i.PHONEMIC_VOWEL, i.PHONEMIC_VOWEL, sigma_star, 'ltr', 'opt']
];

export VARIABLE = Optimize[h_deletion @ resyllabification @ elision @
synizesis @ diaeresis];
export VARIABLE = Optimize[h_deletion @ resyllabification @
synizesis @ diaeresis @ elision];

# Not currently handled:
# * Systole and diastole
Expand Down
Loading