Skip to content

Commit

Permalink
Merge pull request #134 from lcnetdev/casei
Browse files Browse the repository at this point in the history
Fix case-insensitive R2S.
  • Loading branch information
scossu authored Sep 30, 2024
2 parents f5b1e37 + 9539063 commit 456aae0
Show file tree
Hide file tree
Showing 44 changed files with 2,401 additions and 1,048 deletions.
1,266 changes: 1,266 additions & 0 deletions ext/oriya.html

Large diffs are not rendered by default.

File renamed without changes.
8 changes: 7 additions & 1 deletion scriptshifter/tables/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from yaml import Loader

from scriptshifter import DB_PATH
from scriptshifter.exceptions import BREAK, ConfigError
from scriptshifter.exceptions import BREAK, ApiError, ConfigError


__doc__ = """
Expand Down Expand Up @@ -209,6 +209,9 @@ def populate_table(conn, tid, tname):
if "roman_to_script" in data:
flags |= FEAT_R2S

if not data.get("general", {}).get("case_sensitive", True):
flags |= FEAT_CASEI

conn.execute(
"UPDATE tbl_language SET features = ? WHERE id = ?",
(flags, tid))
Expand Down Expand Up @@ -555,6 +558,9 @@ def get_lang_general(conn, lang):
FROM tbl_language WHERE name = ?""", (lang,))
lang_data = lang_q.fetchone()

if not lang_data:
raise ApiError(f"No language data found for {lang}", 404)

return {
"id": lang_data[0],
"data": {
Expand Down
4 changes: 2 additions & 2 deletions scriptshifter/tables/data/_chinese_base.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
# This file is derived and kept in sync with Princeton's OCLC Connexion Pinyin
# converter (https://github.com/pulibrary/oclcpinyin/).

general: # Section names and other keywords are all snake_cased.
general: # Section names and other keywords are all snake_cased.
name: Chinese base (from Princeton)
parents:
- _ignore_base

script_to_roman:
map: # Mapping section.
map: # Mapping section.
"\u5DF4\u57FA\u65AF\u5766\u4F0A\u65AF\u862D\u5171\u548C\u570B": "Bajisitan Yisilan Gongheguo "
"\u5DF4\u57FA\u65AF\u5766\u4F0A\u65AF\u5170\u5171\u548C\u56FD": "Bajisitan Yisilan Gongheguo "
"\u5DF4\u97F3\u90ED\u695E\u8499\u53E4\u81EA\u6CBB\u5DDE": "Bayinguoleng Menggu Zizhizhou "
Expand Down
2 changes: 2 additions & 0 deletions scriptshifter/tables/data/arabic.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
# Arabic S2R using the 3rd-party ArabicTransliterator library:
# https://github.com/MTG/ArabicTransliterator

---
general:
name: Arabic
description: Arabic S2R using a 3rd party library.
case_sensitive: false

script_to_roman:
hooks:
Expand Down
2 changes: 2 additions & 0 deletions scriptshifter/tables/data/burmese.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
---
general:
name: Burmese (Myanmar)
case_sensitive: false

script_to_roman:
hooks:
Expand Down
3 changes: 3 additions & 0 deletions scriptshifter/tables/data/chinese.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@
#
# All other Chinese mappings are kept in _chinese_base.yml. This mapping only
# adds an overlay for parsing numerals and Scriptshifter-specific features.

---
general:
name: Chinese
parents:
- _chinese_base
case_sensitive: false

options:
- id: marc_field
Expand Down
2 changes: 2 additions & 0 deletions scriptshifter/tables/data/devanagari.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
---
general:
name: Devanagari
case_sensitive: false

script_to_roman:
hooks:
Expand Down
4 changes: 3 additions & 1 deletion scriptshifter/tables/data/divehi_thaana.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
---
general:
name: Divehi (Thaana)
case_sensitive: false

roman_to_script:
map:
Expand Down Expand Up @@ -50,7 +52,7 @@ roman_to_script:
"bb": "\u0787\u07B0\u0784"
"b": "\u0784"

# THAANA LETTER "L/l" WITH DOT BELOW (0323)
# THAANA LETTER "L/l" WITH DOT BELOW (0323)
"L\u0323": "\u0785"
"l\u0323": "\u0785"
"K": "\u0786"
Expand Down
2 changes: 2 additions & 0 deletions scriptshifter/tables/data/dogri_devanagari.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
---
general:
name: Dogri (Devanagari)
case_sensitive: false

script_to_roman:
hooks:
Expand Down
2 changes: 2 additions & 0 deletions scriptshifter/tables/data/gujarati.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
---
general:
name: Gujarati
case_sensitive: false

script_to_roman:
hooks:
Expand Down
2 changes: 2 additions & 0 deletions scriptshifter/tables/data/gurmukhi.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
---
general:
name: Punjabi (Gurmukhi)
case_sensitive: false

script_to_roman:
hooks:
Expand Down
3 changes: 2 additions & 1 deletion scriptshifter/tables/data/hebrew.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
---
general:
name: Hebrew
description: Hebrew S2R.
case_sensitive: false

options:
- id: genre
Expand All @@ -19,4 +21,3 @@ script_to_roman:
post_config:
-
- hebrew.dicta_api.s2r_post_config

Loading

0 comments on commit 456aae0

Please sign in to comment.