diff --git a/grammars/inventory.grm b/grammars/inventory.grm index ff57459..65fa007 100644 --- a/grammars/inventory.grm +++ b/grammars/inventory.grm @@ -27,7 +27,7 @@ oral_vowel = "a" | "e" | "i" | "o" | "u"; nasal_vowel = "ã" | "ẽ" | "ĩ" | "õ" | "ũ"; export SHORT_VOWEL = Optimize[oral_vowel | nasal_vowel]; export LONG_VOWEL = SHORT_VOWEL "ː"; -export PHONEMIC_VOWEL = SHORT_VOWEL | LONG_VOWEL; +export PHONEMIC_VOWEL = SHORT_VOWEL | LONG_VOWEL | "ü"; # Phonemic consonants. export VOICELESS_STOP = Optimize["p" | "t" | "k"]; @@ -48,6 +48,18 @@ export PHONEME = Optimize[ "o" | "oː" | "õː" | "p" | "r" | "s" | "t" | "u" | "uː" | "ũː" | "w" | "z" | b.kSpace]; +export STRIKETHROUGH = Optimize[ + "a̶ ̶" | "e̶ ̶" | "i̶ ̶" | "o̶ ̶" | "u̶ ̶" | "ã̶ː̶ ̶" | + "ẽ̶ː̶ ̶" | "ĩ̶ː̶ ̶" | "õ̶ː̶ ̶" | "ũ̶ː̶ ̶" | "ü̶ː̶ " | + "a̶ː̶ ̶" | "e̶ː̶ ̶" | "i̶ː̶ ̶" | "o̶ː̶ ̶" | "u̶ː̶ ̶" | + "ü̶m̶ ̶" | "o̶j̶ ̶" | "a̶j̶ ̶" | "a̶w̶ ̶" | "h̶"]; + +export UMLAUT = Optimize["ü" | "ï"]; + +export MARKUPS = Optimize[ + STRIKETHROUGH | UMLAUT | "‿" | "͡" +]; + # Prosodic symbols. export NUCLEUS = Optimize["U" | # Short nucleus. "-"]; # Long nucleus. diff --git a/grammars/syllable.grm b/grammars/syllable.grm index ab32e19..86a22aa 100644 --- a/grammars/syllable.grm +++ b/grammars/syllable.grm @@ -4,13 +4,29 @@ import 'inventory.grm' as i; import 'byte.grm' as b; import 'utility.grm' as u; -sigma_star = (i.PROSODIC_SYMBOL | i.PHONEME)*; +sigma_star = (i.PROSODIC_SYMBOL | i.PHONEME | i.MARKUPS)*; + +insert_space_resyllabification = Optimize[CDRewrite[u.Insert[" "], + "", i.CONSONANT "‿", sigma_star]]; + +# Removes markups for elision and resyllabification. +delete_strikethrough_undertie = Optimize[u.Rewrite[u.Delete[i.STRIKETHROUGH | "‿"], + sigma_star]]; + +# Removes the ligature tie and rewrites "i" or "u" to "j" or "w," respectively. +synizesis_cleanup = Optimize[ + CDRewrite[(("i" | "iː") : "j") | + (("u" | "uː") : "w"), "", "͡" i.PHONEMIC_VOWEL, sigma_star] @ + CDRewrite[(("i" | "iː") : "j") | + (("u" | "uː") : "w"), i.PHONEMIC_VOWEL "͡", "", sigma_star] @ + u.Rewrite[u.Delete["͡"], sigma_star] + ]; ## Syllable parsing. long_nucleus = Optimize[u.Rewrite[i.LONG_VOWEL : "-", sigma_star]]; -short_nucleus = Optimize[u.Rewrite[i.SHORT_VOWEL : "U", sigma_star]]; +short_nucleus = Optimize[u.Rewrite[i.SHORT_VOWEL | i.UMLAUT : "U", sigma_star]]; obstruent = i.STOP | "f"; muta_cum_liquida = (obstruent i.LIQUID) - ("tl" | "dl"); @@ -27,6 +43,10 @@ onset = Optimize[ i.BOW, i.NUCLEUS, sigma_star] @ + + # Resyllabification consonants. + CDRewrite[i.CONSONANT : "O", "", "‿", sigma_star] @ + # Mandatorily splits medial "tl" and "dl" clusters into coda/onset. CDRewrite["l" : "O", "t" | "d", i.NUCLEUS, sigma_star] @ # Optionally (and with non-zero cost) splits other muta cum liquida clusters @@ -51,7 +71,9 @@ onset = Optimize[ coda = Optimize[u.Rewrite[i.CONSONANT+ : "C", sigma_star]]; -export SYLLABLE = Optimize[long_nucleus @ short_nucleus @ onset @ coda]; +export SYLLABLE = Optimize[insert_space_resyllabification @ + delete_strikethrough_undertie @ synizesis_cleanup @ + long_nucleus @ short_nucleus @ onset @ coda]; ## Tests syllable parsing. test_syllable_1 = AssertEqual[ @@ -105,3 +127,34 @@ test_ks_2 = AssertEqual[ "uːnius ob noksãː et furiaːs ajjaːkis oiːleiː" @ SYLLABLE, "-OUUC UC OUCO- UC OUOU-C UCO-OUC U-OU-" ]; + +## Tests syllable parsing on variable rule markups. + +test_syllable_resyllabify = AssertEqual[ + "ab‿oris" @ SYLLABLE, + "U OUOUC" +]; +test_syllable_elision = AssertEqual[ + "multũ̶ː̶ ̶ille" @ SYLLABLE, + "OUCOUCOU" +]; +test_syllable_h_deletion = AssertEqual[ + "fuit‿h̶ok" @ SYLLABLE, + "OUU OUC" +]; +test_syllable_synizesis_1 = AssertEqual[ + "su͡aːsit" @ SYLLABLE, + "O-OUC" +]; +test_syllable_synizesis_2 = AssertEqual[ + "laːwiːni͡akwe" @ SYLLABLE, + "O-O-COUOU" +]; +test_syllable_synizesis_3 = AssertEqual[ + "awre͡iːs" @ SYLLABLE, + "UCOUC" +]; +test_diaeresis_1 = AssertEqual[ + "troːïa" @ SYLLABLE, + "O-UU" +]; diff --git a/grammars/variable.grm b/grammars/variable.grm index 0894523..33ae2bf 100644 --- a/grammars/variable.grm +++ b/grammars/variable.grm @@ -1,51 +1,71 @@ # Implements variable sandhi phenomena, such as elision, resyllabification, -# diaeresis, synizesis, hypermetric lengthening, etc. +# diaeresis, and synizesis. import 'byte.grm' as b; import 'inventory.grm' as i; import 'utility.grm' as u; -sigma_star = (i.PROSODIC_SYMBOL | i.PHONEME)*; +sigma_star = (i.PROSODIC_SYMBOL | i.PHONEME | i.MARKUPS)*; h_deletion = Optimize[ - CDRewrite[u.Delete["h"] <10>, "", "", sigma_star, 'ltr', 'opt'] + # Delete medial "h" to permit additional variable rules, if needed (such as synizesis). + CDRewrite[u.Delete["h"] <10>, + i.PHONEME - b.kSpace, i.PHONEME - b.kSpace, sigma_star, 'ltr', 'opt'] @ + CDRewrite[("h" : "h̶" <10>), + "", "", sigma_star, 'ltr', 'opt'] ]; # A final syllable ending in a vowel, letter m, or diphthong is removed # before a word beginning with a vowel (or an h-). -elision = Optimize[ - CDRewrite[u.Delete[(i.PHONEMIC_VOWEL | "oj" | "aj" | "aw") " "] <1000>, - "", i.PHONEMIC_VOWEL, sigma_star, 'ltr', 'opt'] -]; +elision = Optimize[CDRewrite[(("a " : "a̶ ̶") | + ("e " : "e̶ ̶") | + ("i " : "i̶ ̶") | + ("o " : "o̶ ̶") | + ("u " : "u̶ ̶") | + ("ãː " : "ã̶ː̶ ̶") | + ("ẽː " : "ẽ̶ː̶ ̶") | + ("ĩː " : "ĩ̶ː̶ ̶") | + ("õː " : "õ̶ː̶ ̶") | + ("ũː " : "ũ̶ː̶ ̶") | + ("aː " : "a̶ː̶ ̶") | + ("eː " : "e̶ː̶ ̶") | + ("iː " : "i̶ː̶ ̶") | + ("oː " : "o̶ː̶ ̶") | + ("uː " : "u̶ː̶ ̶") | + ("üm " : "ü̶m̶ ̶") | + ("oj " : "o̶j̶ ̶") | + ("aj " : "a̶j̶ ̶") | + ("aw " : "a̶w̶ ̶")) <1000> + , "", "h̶"? i.PHONEMIC_VOWEL, sigma_star, 'ltr', 'opt']]; # Word-final consonant reattaches to the following word with an initial vowel. -# TODO: orthographically annotate using "‿". resyllabification = Optimize[ - CDRewrite[u.Insert[" "] <100>, - "", i.CONSONANT " " i.PHONEMIC_VOWEL, sigma_star, 'ltr', 'opt'] @ - CDRewrite[u.Delete[" "] <100>, - " " i.CONSONANT, i.PHONEMIC_VOWEL, sigma_star] + CDRewrite[(" " : "‿") <100>, + i.CONSONANT, "h̶"? i.PHONEMIC_VOWEL, sigma_star, 'ltr', 'opt'] ]; # High vowels may strengthen to their corresponding glides when adjacent to # another vowel. synizesis = Optimize[ - CDRewrite[((("i" | "iː") : "j") | (("u" | "uː") : "w")) <10000>, - "", i.PHONEMIC_VOWEL, sigma_star, 'ltr', 'opt'] @ - CDRewrite[((("i" | "iː") : "j") | (("u" | "uː") : "w")) <10000>, - i.PHONEMIC_VOWEL, "", sigma_star, 'ltr', 'opt'] + CDRewrite[u.Insert["͡"] <10000>, + ("i" | "iː" | "u" | "uː") , i.PHONEMIC_VOWEL, sigma_star, 'ltr', 'opt'] @ + CDRewrite[u.Insert["͡"] <10000>, + i.PHONEMIC_VOWEL, ("i" | "iː" | "u" | "uː"), sigma_star, 'ltr', 'opt'] ]; -# Glides may lenite to their corresponding vowels. +# Glides may lenite to their corresponding vowels. diaeresis = Optimize[ - CDRewrite[("w" : "u") <10000>, + CDRewrite[("w" : "ü") <1000>, "k" | "g", "", sigma_star, 'ltr', 'opt'] @ - CDRewrite[("jj" : "i") <10000>, + # "aw" diphthongs preceding final m may lenite. + CDRewrite[("w" : "ü") <1000>, + "", "m", sigma_star, 'ltr', 'opt'] @ + CDRewrite[("jj" : "ï") <1000>, i.PHONEMIC_VOWEL, i.PHONEMIC_VOWEL, sigma_star, 'ltr', 'opt'] ]; -export VARIABLE = Optimize[h_deletion @ resyllabification @ elision @ - synizesis @ diaeresis]; +export VARIABLE = Optimize[h_deletion @ resyllabification @ + synizesis @ diaeresis @ elision]; # Not currently handled: # * Systole and diastole diff --git a/tests/scansion_test.py b/tests/scansion_test.py index d23ad2c..bede4a5 100644 --- a/tests/scansion_test.py +++ b/tests/scansion_test.py @@ -37,8 +37,9 @@ def test_aen_1_1(self): verse.raw_pron, "arma wirũːkwe kanoː trojjaj kwiː priːmus ab oːris" ) self.assertEqual( - verse.var_pron, "arma wirũːkwe kanoː trojjaj kwiː priːmu sa boːris" + verse.var_pron, "arma wirũːkwe kanoː trojjaj kwiː priːmus‿ab‿oːris" ) + # Tests foot structures. self.assertEqual(verse.foot[0].type, latin_scansion.Foot.DACTYL) self.assertEqual(verse.foot[1].type, latin_scansion.Foot.DACTYL) @@ -148,7 +149,7 @@ def test_aen_1_26(self): text = "exciderant animō; manet altā mente repostum" verse = self.scan_verse(text) self.assertEqual( - verse.var_pron, "ekskiderant animoː mane taltaː mente repostũː" + verse.var_pron, "ekskiderant animoː manet‿altaː mente repostũː" ) # Tests that the grammar does not unnecessarily apply elision. @@ -156,28 +157,30 @@ def test_aen_1_42(self): text = "Ipsa Jovis rapidum jaculāta ē nūbibus ignem" verse = self.scan_verse(text) self.assertEqual( - verse.var_pron, "ipsa jowis rapidũː jakulaːteː nuːbibu siŋnẽː" + verse.var_pron, "ipsa jowis rapidũː jakulaːta̶ ̶eː nuːbibus‿iŋnẽː" ) def test_aen_1_247(self): text = "Hīc tamen ille urbem Patavī sēdēsque locāvit" verse = self.scan_verse(text) self.assertEqual( - verse.var_pron, "hiːk tame nillurbẽː patawiː seːdeːskwe lokaːwit" + verse.var_pron, + "hiːk tamen‿ille̶ ̶urbẽː patawiː seːdeːskwe lokaːwit", ) def test_aen_1_254(self): text = "Ollī subrīdēns hominum sator atque deōrum" verse = self.scan_verse(text) self.assertEqual( - verse.var_pron, "olliː subriːdeːns hominũː sato ratkwe deoːrũː" + verse.var_pron, "olliː subriːdeːns hominũː sator‿atkwe deoːrũː" ) def test_aen_1_450(self): text = "Hōc prīmum in lūcō nova rēs oblāta timōrem" verse = self.scan_verse(text) self.assertEqual( - verse.var_pron, "hoːk priːmin luːkoː nowa reːs oblaːta timoːrẽː" + verse.var_pron, + "hoːk priːmũ̶ː̶ ̶in luːkoː nowa reːs oblaːta timoːrẽː", ) def test_aen_1_477(self): @@ -185,35 +188,36 @@ def test_aen_1_477(self): verse = self.scan_verse(text) self.assertEqual( verse.var_pron, - "loːra teneːns tame nujk kerwiːkskwe komajkwe trahuntur", + "loːra teneːns tamen‿h̶ujk kerwiːkskwe komajkwe trahuntur", ) def test_aen_1_593(self): text = "argentum Pariusve lapis circumdatur aurō." verse = self.scan_verse(text) self.assertEqual( - verse.var_pron, "argentũː pariuswe lapis kirkumdatu rawroː" + verse.var_pron, "argentũː pariuswe lapis kirkumdatur‿awroː" ) def test_aen_1_649(self): text = "et circumtextum croceō vēlāmen acanthō," verse = self.scan_verse(text) self.assertEqual( - verse.var_pron, "et kirkumtekstũː krokeoː weːlaːme nakantoː" + verse.var_pron, "et kirkumtekstũː krokeoː weːlaːmen‿akantoː" ) def test_aen_1_682(self): text = "nē quā scīre dolōs mediusve occurrere possit." verse = self.scan_verse(text) self.assertEqual( - verse.var_pron, "neː kwaː skiːre doloːs mediuswokkurrere possit" + verse.var_pron, + "neː kwaː skiːre doloːs mediuswe̶ ̶okkurrere possit", ) def test_aen_1_697(self): text = "pallamque et pictum croceō vēlāmen acanthō." verse = self.scan_verse(text) self.assertEqual( - verse.var_pron, "pallãːkwet piktũː krokeoː weːlaːme nakantoː" + verse.var_pron, "pallãːkwe̶ ̶et piktũː krokeoː weːlaːmen‿akantoː" ) # Tests handling of brackets. @@ -238,7 +242,8 @@ def test_aen_2_219(self): text = "bis medium amplexī, bis collō squāmea circum" verse = self.scan_verse(text) self.assertEqual( - verse.var_pron, "bis mediampleksiː bis kolloː skwaːmea kirkũː" + verse.var_pron, + "bis mediũ̶ː̶ ̶ampleksiː bis kolloː skwaːmea kirkũː", ) # Elision. @@ -246,7 +251,8 @@ def test_aen_2_278(self): text = "squālentem barbam et concrētōs sanguine crīnīs" verse = self.scan_verse(text) self.assertEqual( - verse.var_pron, "skwaːlentẽː barbet koŋkreːtoːs saŋgwine kriːniːs" + verse.var_pron, + "skwaːlentẽː barbã̶ː̶ ̶et koŋkreːtoːs saŋgwine kriːniːs", ) # Defective verse – first syllable is short. @@ -275,14 +281,14 @@ def test_aen_2_764(self): text = "praedam adservābant hūc undique trōja gaza" verse = self.scan_verse(text) self.assertEqual( - verse.var_pron, "prajdadserwaːbant huːk undikwe troːia gazza" + verse.var_pron, "prajdã̶ː̶ ̶adserwaːbant huːk undikwe troːïa gazza" ) def test_aen_3_158(self): text = "īdem ventūrōs tollēmus in astra nepōtēs" verse = self.scan_verse(text) self.assertEqual( - verse.var_pron, "iːdẽː wentuːroːs tolleːmu si nastra nepoːteːs" + verse.var_pron, "iːdẽː wentuːroːs tolleːmus‿in‿astra nepoːteːs" ) # Synizesis. @@ -290,7 +296,7 @@ def test_aen_3_161(self): text = "Mūtandae sēdēs. Nōn haec tibi lītora suāsit" verse = self.scan_verse(text) self.assertEqual( - verse.var_pron, "muːtandaj seːdeːs noːn hajk tibi liːtora swaːsit" + verse.var_pron, "muːtandaj seːdeːs noːn hajk tibi liːtora su͡aːsit" ) def test_aen_3_365(self): @@ -304,7 +310,7 @@ def test_aen_3_464(self): text = "dōna dehinc aurō gravia ac sectō elephantō" verse = self.scan_verse(text) self.assertEqual( - verse.var_pron, "doːna dehiŋk awroː grawiak sektoː elepantoː" + verse.var_pron, "doːna dehiŋk awroː grawia̶ ̶ak sektoː elepantoː" ) @unittest.skip("Requires diastole.") @@ -319,7 +325,8 @@ def test_aen_3_567(self): text = "ter spūmam ēlīsam et rōrantia vīdimus astra." verse = self.scan_verse(text) self.assertEqual( - verse.var_pron, "ter spuːmeːliːset roːrantia wiːdimu sastra" + verse.var_pron, + "ter spuːmã̶ː̶ ̶eːliːsã̶ː̶ ̶et roːrantia wiːdimus‿astra", ) @unittest.skip("Requires diastole.") @@ -335,7 +342,8 @@ def test_aen_4_302(self): text = "Thyjas, ubi audītō stimulant trietērica Bacchō" verse = self.scan_verse(text) self.assertEqual( - verse.var_pron, "tujja subawdiːtoː stimulant trieteːrika bakkoː" + verse.var_pron, + "tujjas‿ubi̶ ̶awdiːtoː stimulant trieteːrika bakkoː", ) def test_aen_4_324(self): @@ -349,14 +357,16 @@ def test_aen_4_369(self): text = "Num flētū ingemuit nostrō? Num lūmina flexit?" verse = self.scan_verse(text) self.assertEqual( - verse.var_pron, "nũː fleːtiŋgemuit nostroː nũː luːmina fleksit" + verse.var_pron, + "nũː fleːtu̶ː̶ ̶iŋgemuit nostroː nũː luːmina fleksit", ) def test_aen_4_569(self): text = "Heja age, rumpe morās. Varium et mūtābile semper" verse = self.scan_verse(text) self.assertEqual( - verse.var_pron, "hejjage rumpe moraːs wariet muːtaːbile semper" + verse.var_pron, + "hejja̶ ̶age rumpe moraːs wariũ̶ː̶ ̶et muːtaːbile semper", ) @unittest.skip("Requires diastole.") @@ -391,7 +401,8 @@ def test_aen_5_352(self): text = "dat Saliō villīs onerōsum atque unguibus aureīs." verse = self.scan_verse(text) self.assertEqual( - verse.var_pron, "dat salioː williːs oneroːsatkwuŋgwibu sawrejs" + verse.var_pron, + "dat salioː williːs oneroːsũ̶ː̶ ̶atkwe̶ ̶uŋgwibus‿awre͡iːs", ) @unittest.skip("Requires diastole.") @@ -414,7 +425,7 @@ def test_aen_5_870(self): text = '"Ō nimium caelō et pelagō cōnfīse serēnō' verse = self.scan_verse(text) self.assertEqual( - verse.var_pron, "oː nimiũː kajlet pelagoː koːnfiːse sereːnoː" + verse.var_pron, "oː nimiũː kajlo̶ː̶ ̶et pelagoː koːnfiːse sereːnoː" ) @unittest.skip("Requires synizesis, but Cj is not a valid onset.")