Skip to content

Commit

Permalink
Add missing Chinese Wiktionary language and subtitle data
Browse files Browse the repository at this point in the history
  • Loading branch information
xxyzz committed Aug 1, 2023
1 parent 76465c2 commit 1769e06
Show file tree
Hide file tree
Showing 6 changed files with 156 additions and 12 deletions.
4 changes: 4 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,7 @@ trim_trailing_whitespace = true
indent_style = space
indent_size = 4
max_line_length = 80

[*.json]
indent_style = space
indent_size = 2
12 changes: 12 additions & 0 deletions languages/override/zh.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,18 @@
"id": [
"印度尼西亞語"
],
"it": [
"義大利語"
],
"osx": [
"古薩克遜語"
],
"stq": [
"沙特弗里西語"
],
"sw": [
"斯瓦西里語"
],
"xcl": [
"古亞美尼亞語"
],
Expand Down
12 changes: 8 additions & 4 deletions wiktextract/data/zh/languages.json
Original file line number Diff line number Diff line change
Expand Up @@ -11651,7 +11651,8 @@
],
"it": [
"意大利語",
"意大利语"
"意大利语",
"義大利語"
],
"itb": [
"Binongan Itneg"
Expand Down Expand Up @@ -21338,7 +21339,8 @@
"osx": [
"古撒克遜語",
"古撒克逊语",
"Old Low German"
"Old Low German",
"古薩克遜語"
],
"ota": [
"鄂圖曼土耳其語",
Expand Down Expand Up @@ -26265,7 +26267,8 @@
"薩特弗里斯蘭語",
"萨特弗里斯兰语",
"East Frisian",
"Eastern Frisian"
"Eastern Frisian",
"沙特弗里西語"
],
"str": [
"薩尼奇語",
Expand Down Expand Up @@ -26392,7 +26395,8 @@
],
"sw": [
"斯瓦希里語",
"斯瓦希里语"
"斯瓦希里语",
"斯瓦西里語"
],
"swb": [
"馬約特科摩羅語",
Expand Down
39 changes: 37 additions & 2 deletions wiktextract/data/zh/linkage_subtitles.json
Original file line number Diff line number Diff line change
@@ -1,30 +1,51 @@
{
"上下位關係": "hypernym",
"上义词": "hypernym",
"上位詞": "hypernym",
"上位語": "hypernym",
"上位词": "hypernym",
"上義詞": "hypernym",
"下义词": "hyponym",
"下位詞": "hyponym",
"下位語": "hyponym",
"下位词": "hyponym",
"下層概念": "derived",
"下義詞": "hyponym",
"俗语": "related",
"其他写法": "synonyms",
"其他寫法": "synonyms",
"其他形式": "synonyms",
"其他拼写方法": "synonyms",
"其他拼寫": "synonyms",
"其他拼法": "synonyms",
"其他書寫系統": "synonyms",
"其他表記": "synonyms",
"其他詞形": "synonyms",
"其他词形": "synonyms",
"其它词形": "synonyms",
"分体词": "meronyms",
"分體詞": "meronyms",
"參考詞彙": "synonyms",
"反义词": "antonyms",
"反義詞": "antonyms",
"另見": "related",
"另见": "related",
"可替代拼寫": "synonyms",
"同一類別詞彙": "coordinate_terms",
"同义词": "synonyms",
"同类词汇": "related",
"同級詞彙": "coordinate_terms",
"同義字": "related",
"同義詞": "synonyms",
"同類別詞彙": "coordinate_terms",
"同類詞彙": "coordinate_terms",
"整体词": "holonymy",
"复合词": "derived",
"對應詞": "coordinate_terms",
"對等詞": "coordinate_terms",
"局部關係詞": "meronyms",
"搭配詞": "derived",
"整体词": "holonyms",
"整體詞": "holonyms",
"替代寫法": "synonyms",
"替代形式": "synonyms",
"杂项": "various",
Expand All @@ -34,16 +55,30 @@
"派生詞彙": "derived",
"派生語彙": "derived",
"派生词": "derived",
"派生词组": "derived",
"相似后缀": "related",
"相似符號": "related",
"相关后缀": "related",
"相关术语": "related",
"相关词": "related",
"相关词汇": "related",
"相关语": "related",
"相關詞": "related",
"相關詞匯": "related",
"相關詞彙": "related",
"相關詞彙變格": "related",
"相關詞語": "related",
"类似中缀": "related",
"类似后缀": "related",
"組詞": "derived",
"衍生字": "derived",
"衍生詞": "derived",
"衍生詞彙": "derived",
"衍生词": "derived",
"衍生词汇": "derived",
"變體": "synonyms",
"近义词": "synonyms",
"近義詞": "synonyms"
"近義詞": "synonyms",
"部分詞": "meronyms",
"關聯詞": "related"
}
22 changes: 18 additions & 4 deletions wiktextract/data/zh/other_subtitles.json
Original file line number Diff line number Diff line change
@@ -1,42 +1,56 @@
{
"compounds": "复合词",
"etymology": [
"詞源",
"词源",
"典故",
"語源",
"语源",
"字源"
"字源",
"詞語",
"組成"
],
"ignored_sections": [
"異序詞",
"异序词",
"異序词",
"來源",
"參考文獻",
"参考文献",
"參考資料",
"参考资料",
"参考",
"參考",
"參見",
"参见",
"參閱",
"拓展閱讀",
"拓展閲讀",
"拓展阅读",
"延伸阅读",
"延伸閲讀",
"延伸閱讀",
"扩展阅读",
"編碼",
"编码",
"回文",
"回文構詞",
"易位構詞"
"易位構詞",
"外部鏈接",
"外部链接"
],
"inflection_sections": [
"变格",
"變格",
"变位",
"变形",
"變位",
"詞形變化",
"词形变化",
"輔音變化",
"語尾變化",
"活用",
"屈折"
"屈折",
"軟化變形"
],
"pronunciation": [
"發音",
Expand Down
79 changes: 77 additions & 2 deletions wiktextract/data/zh/pos_subtitles.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
"intransitive"
]
},
"不定冠詞": {
"pos": "article"
},
"专有名词": {
"pos": "name"
},
Expand All @@ -15,6 +18,12 @@
"morpheme"
]
},
"中缀": {
"pos": "infix",
"tags": [
"morpheme"
]
},
"人称代词": {
"pos": "pron",
"tags": [
Expand All @@ -27,9 +36,15 @@
"介詞": {
"pos": "prep"
},
"介詞短語": {
"pos": "prep_phrase"
},
"介词": {
"pos": "prep"
},
"介词短语": {
"pos": "prep_phrase"
},
"代詞": {
"pos": "pron"
},
Expand All @@ -42,6 +57,12 @@
"idiomatic"
]
},
"俗语": {
"pos": "phrase",
"tags": [
"idiomatic"
]
},
"冠詞": {
"pos": "article"
},
Expand Down Expand Up @@ -142,6 +163,9 @@
"專有名詞": {
"pos": "name"
},
"小品词": {
"pos": "particle"
},
"序數": {
"debug": "ordinal numbers should be adjectives",
"pos": "adj",
Expand Down Expand Up @@ -170,20 +194,41 @@
"clitic"
]
},
"感叹词": {
"pos": "intj"
},
"感嘆詞": {
"pos": "intj"
},
"感歎詞": {
"pos": "intj"
},
"成句": {
"pos": "proverb"
},
"成語": {
"pos": "proverb"
},
"拼音": {
"pos": "romanization"
},
"数字": {
"pos": "num",
"tags": [
"number"
]
},
"数詞": {
"pos": "num"
"pos": "num",
"tags": [
"number"
]
},
"数词": {
"pos": "num"
"pos": "num",
"tags": [
"number"
]
},
"數詞": {
"pos": "num",
Expand Down Expand Up @@ -219,6 +264,9 @@
"片語": {
"pos": "phrase"
},
"物主代词": {
"pos": "pron"
},
"環綴": {
"pos": "circumfix",
"tags": [
Expand All @@ -229,18 +277,36 @@
"debug": "usually used in singular",
"pos": "phrase"
},
"短语": {
"pos": "phrase",
"tags": [
"idiomatic"
]
},
"符号": {
"pos": "symbol"
},
"符號": {
"pos": "symbol"
},
"简写": {
"pos": "abbrev",
"tags": [
"abbreviation"
]
},
"縮寫": {
"pos": "abbrev",
"tags": [
"abbreviation"
]
},
"縮約形": {
"pos": "contraction",
"tags": [
"contraction"
]
},
"缩写": {
"debug": "part-of-speech Abbreviation is proscribed",
"pos": "abbrev",
Expand All @@ -263,6 +329,9 @@
"羅馬化": {
"pos": "romanization"
},
"羅馬字": {
"pos": "romanization"
},
"諺語": {
"pos": "proverb"
},
Expand Down Expand Up @@ -312,6 +381,12 @@
"abbreviation"
]
},
"首字母縮略詞": {
"pos": "abbrev",
"tags": [
"abbreviation"
]
},
"首字母缩略词": {
"pos": "abbrev",
"tags": [
Expand Down

0 comments on commit 1769e06

Please sign in to comment.