-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Import terms from Ethereum.org Translations' Glossary (#20)
Co-authored-by: Sloth Service <[email protected]>
- Loading branch information
1 parent
b6473d6
commit 569e358
Showing
9 changed files
with
1,220 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,4 +31,4 @@ jobs: | |
- name: Check term sorting | ||
run: | | ||
cargo build | ||
cargo run -- --check | ||
cargo run --bin terms -- --check |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
/target | ||
index.html | ||
build/index.html | ||
build/index.html | ||
*.tbx |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
# Run the main script | ||
.PHONY: run | ||
run: | ||
cargo run --release | ||
cargo run --release --bin terms | ||
|
||
|
||
# Default target | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
use std::{ | ||
fs::{self, File}, | ||
io::Write, | ||
}; | ||
|
||
use regex::Regex; | ||
use serde::Deserialize; | ||
use serde_xml_rs::from_str; | ||
use terms::terms::{Term, Terms}; | ||
|
||
#[derive(Debug, Deserialize)] | ||
struct LangSet { | ||
#[serde(rename = "xmllang")] | ||
lang: String, | ||
#[serde(rename = "tig")] | ||
tig: Tig, | ||
} | ||
|
||
#[derive(Debug, Deserialize)] | ||
struct Tig { | ||
#[serde(rename = "term")] | ||
term: String, | ||
#[serde(rename = "termNote")] | ||
_term_note: Option<TermNote>, | ||
} | ||
|
||
#[derive(Debug, Deserialize)] | ||
struct TermNote { | ||
#[serde(rename = "type")] | ||
_note_type: String, | ||
} | ||
|
||
fn main() { | ||
// Specify the file path to your XML file | ||
let file_path = "Ethereum.org_Translationss_Glossary.tbx"; | ||
|
||
// Read the XML data from the file into a string | ||
let mut xml_str = match fs::read_to_string(file_path) { | ||
Ok(content) => content, | ||
Err(err) => { | ||
eprintln!("Error reading the XML file: {:?}", err); | ||
return; | ||
} | ||
}; | ||
|
||
// Define a regular expression pattern to find 'xml:lang' | ||
let pattern = r#"xml:lang"#; | ||
let regex = Regex::new(pattern).unwrap(); | ||
|
||
// Replace 'xml:lang' with 'xmllang' throughout the XML string | ||
xml_str = regex.replace_all(&xml_str, "xmllang").to_string(); | ||
|
||
let parsed: Result<martif, serde_xml_rs::Error> = from_str(&xml_str); | ||
|
||
let path = "terms.toml"; | ||
let mut terms = Terms::load_terms(path).unwrap(); | ||
|
||
let file_path = "untranslated.txt"; | ||
let mut untranslated = File::create(file_path).unwrap(); | ||
|
||
match parsed { | ||
Ok(data) => { | ||
// Iterate through langSet elements and print zh-TW translations | ||
for entry in data.text.body.term_entry.iter() { | ||
let en = entry.lang_set.iter().find(|lang_set| lang_set.lang == "en"); | ||
let zh_tw = entry | ||
.lang_set | ||
.iter() | ||
.find(|lang_set| lang_set.lang == "zh-TW"); | ||
if let Some(en) = en { | ||
if let Some(zh_tw) = zh_tw { | ||
let term = Term { | ||
term: en.tig.term.to_string(), | ||
tags: vec![], | ||
translation: zh_tw.tig.term.to_string(), | ||
}; | ||
terms.terms.push(term); | ||
} else { | ||
let line = format!("untranslated: {}", en.tig.term); | ||
println!("{}", line); | ||
untranslated.write_all(line.as_bytes()).unwrap(); | ||
untranslated.write_all(b"\n").unwrap(); | ||
} | ||
} | ||
} | ||
} | ||
Err(e) => { | ||
eprintln!("Error parsing XML: {:?}", e); | ||
} | ||
} | ||
terms.sort_terms(); | ||
terms.to_file(path).unwrap(); | ||
} | ||
|
||
#[derive(Debug, Deserialize)] | ||
#[allow(non_camel_case_types)] | ||
struct martif { | ||
#[serde(rename = "xmllang")] | ||
_lang: String, | ||
#[serde(rename = "text")] | ||
text: Text, | ||
} | ||
|
||
#[derive(Debug, Deserialize)] | ||
struct Text { | ||
#[serde(rename = "body")] | ||
body: Body, | ||
} | ||
|
||
#[derive(Debug, Deserialize)] | ||
struct Body { | ||
#[serde(rename = "termEntry")] | ||
term_entry: Vec<TermEntry>, | ||
} | ||
|
||
#[derive(Debug, Deserialize)] | ||
struct TermEntry { | ||
#[serde(rename = "langSet")] | ||
lang_set: Vec<LangSet>, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
pub mod terms; |
Oops, something went wrong.