From 763943e026e9d50a549a36aec1ed86969af283ad Mon Sep 17 00:00:00 2001 From: mh-northlander Date: Fri, 28 Jun 2024 16:19:58 +0900 Subject: [PATCH 1/6] auto fix clippy warnings --- python/src/build.rs | 14 +++++--- python/src/dictionary.rs | 13 +++---- python/src/morpheme.rs | 10 +++--- python/src/pos_matcher.rs | 5 ++- python/src/pretokenizer.rs | 4 +-- python/src/tokenizer.rs | 8 ++--- sudachi-cli/src/build.rs | 8 ++--- sudachi-cli/src/main.rs | 17 ++++----- sudachi-cli/src/output.rs | 4 +-- sudachi/src/analysis/created.rs | 18 +++++----- sudachi/src/analysis/lattice.rs | 17 ++------- sudachi/src/analysis/mlist.rs | 8 ++--- sudachi/src/analysis/morpheme.rs | 10 +++--- sudachi/src/analysis/node.rs | 6 ++-- sudachi/src/analysis/stateful_tokenizer.rs | 10 +++--- sudachi/src/analysis/stateless_tokenizer.rs | 4 +-- sudachi/src/config.rs | 30 +++++++--------- sudachi/src/dic/build/conn.rs | 10 +++--- sudachi/src/dic/build/error.rs | 12 ++----- sudachi/src/dic/build/lexicon.rs | 18 +++++----- sudachi/src/dic/build/lexicon/test.rs | 8 ++--- sudachi/src/dic/build/mod.rs | 10 +++--- sudachi/src/dic/build/parse.rs | 8 ++--- sudachi/src/dic/build/primitives.rs | 4 +-- sudachi/src/dic/build/test/mod.rs | 4 +-- sudachi/src/dic/character_category.rs | 9 ++--- sudachi/src/dic/dictionary.rs | 10 +++--- sudachi/src/dic/grammar.rs | 35 +++++++++---------- sudachi/src/dic/header.rs | 12 +++++-- sudachi/src/dic/lexicon/mod.rs | 4 +-- sudachi/src/dic/lexicon/trie.rs | 4 +-- sudachi/src/dic/lexicon/word_id_table.rs | 6 ++-- sudachi/src/dic/lexicon_set.rs | 6 ++-- sudachi/src/dic/mod.rs | 16 ++++----- sudachi/src/dic/read/mod.rs | 4 +-- sudachi/src/dic/storage.rs | 4 +-- sudachi/src/dic/word_id.rs | 8 ++--- sudachi/src/hash.rs | 4 +-- sudachi/src/input_text/buffer/mod.rs | 11 ++---- .../plugin/connect_cost/inhibit_connection.rs | 10 +++--- sudachi/src/plugin/connect_cost/mod.rs | 4 +-- .../input_text/default_input_text/mod.rs | 10 +++--- sudachi/src/plugin/input_text/mod.rs | 8 ++--- sudachi/src/plugin/loader.rs | 14 ++++---- sudachi/src/plugin/oov/mecab_oov/mod.rs | 12 +++---- sudachi/src/plugin/oov/mod.rs | 8 ++--- sudachi/src/plugin/oov/regex_oov/mod.rs | 12 +++---- .../join_numeric/numeric_parser/mod.rs | 4 +-- .../numeric_parser/string_number.rs | 6 ++-- sudachi/src/plugin/path_rewrite/mod.rs | 6 ++-- sudachi/src/sentence_detector.rs | 16 ++++++--- sudachi/src/sentence_splitter.rs | 8 ++++- sudachi/src/util/check_params.rs | 8 ++--- sudachi/src/util/cow_array.rs | 6 ++-- sudachi/src/util/testing.rs | 8 ++--- sudachi/src/util/user_pos.rs | 10 ++---- sudachi/tests/common/mod.rs | 12 +++---- sudachi/tests/morpheme.rs | 4 +-- sudachi/tests/regex_oov.rs | 4 +-- 59 files changed, 260 insertions(+), 303 deletions(-) diff --git a/python/src/build.rs b/python/src/build.rs index a6005b26..9cff1d71 100644 --- a/python/src/build.rs +++ b/python/src/build.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -68,12 +68,14 @@ fn build_system_dic<'p>( description: Option<&str>, ) -> PyResult<&'p PyList> { let mut builder = DictBuilder::new_system(); - description.map(|d| builder.set_description(d)); + if let Some(d) = description { + builder.set_description(d) + } let matrix_src = as_data_source(py, matrix)?; errors::wrap_ctx(builder.read_conn(matrix_src), matrix)?; for f in lex.iter() { - let lex_src = as_data_source(py, &f)?; + let lex_src = as_data_source(py, f)?; errors::wrap_ctx(builder.read_lexicon(lex_src), &f)?; } let out_file = match as_data_source(py, output)? { @@ -110,10 +112,12 @@ fn build_user_dic<'p>( }; let mut builder = DictBuilder::new_user(&system_dic); - description.map(|d| builder.set_description(d)); + if let Some(d) = description { + builder.set_description(d) + } for f in lex.iter() { - let lex_src = as_data_source(py, &f)?; + let lex_src = as_data_source(py, f)?; errors::wrap_ctx(builder.read_lexicon(lex_src), &f)?; } let out_file = match as_data_source(py, output)? { diff --git a/python/src/dictionary.rs b/python/src/dictionary.rs index bc333c8e..93918c2c 100644 --- a/python/src/dictionary.rs +++ b/python/src/dictionary.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -178,10 +178,7 @@ impl PyDictionary { } let jdic = JapaneseDictionary::from_cfg(&config).map_err(|e| { - SudachiErr::new_err(format!( - "Error while constructing dictionary: {}", - e.to_string() - )) + SudachiErr::new_err(format!("Error while constructing dictionary: {}", e)) })?; let pos_data = jdic @@ -414,7 +411,7 @@ fn config_repr(cfg: &Config) -> Result { pub(crate) fn extract_mode<'py>(py: Python<'py>, mode: &'py PyAny) -> PyResult { if mode.is_instance_of::() { let mode = mode.str()?.to_str()?; - Mode::from_str(mode).map_err(|e| SudachiErr::new_err(e).into()) + Mode::from_str(mode).map_err(SudachiErr::new_err) } else if mode.is_instance_of::() { let mode = mode.extract::()?; Ok(Mode::from(mode)) @@ -431,7 +428,7 @@ fn read_config(config_opt: &PyAny) -> PyResult { if config_opt.is_instance_of::() { let config_str = config_opt.str()?.to_str()?.trim(); // looks like json - if config_str.starts_with("{") && config_str.ends_with("}") { + if config_str.starts_with('{') && config_str.ends_with('}') { let result = ConfigBuilder::from_bytes(config_str.as_bytes()); return wrap(result); } @@ -451,7 +448,7 @@ fn read_config(config_opt: &PyAny) -> PyResult { return read_config(cfg_as_str); } Err(SudachiErr::new_err(( - format!("passed config was not a string, json object or sudachipy.config.Config object"), + "passed config was not a string, json object or sudachipy.config.Config object".to_string(), config_opt.into_py(py), ))) } diff --git a/python/src/morpheme.rs b/python/src/morpheme.rs index ad3929dd..28490eec 100644 --- a/python/src/morpheme.rs +++ b/python/src/morpheme.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -158,7 +158,7 @@ impl PyMorphemeListWrapper { for (i, m) in list.iter().enumerate() { result.push_str(m.surface().deref()); if i + 1 != nmorphs { - result.push_str(" "); + result.push(' '); } } PyString::new(py, result.as_str()) @@ -193,7 +193,7 @@ impl PyMorphemeListWrapper { } fn __bool__(&self, py: Python) -> bool { - self.internal(py).len() != 0 + !self.internal(py).is_empty() } } @@ -387,9 +387,7 @@ impl PyMorpheme { let splitted = list .internal(py) .split_into(mode, self.index, out_ref) - .map_err(|e| { - PyException::new_err(format!("Error while splitting morpheme: {}", e.to_string())) - })?; + .map_err(|e| PyException::new_err(format!("Error while splitting morpheme: {}", e)))?; if add_single.unwrap_or(true) && !splitted { list.internal(py) diff --git a/python/src/pos_matcher.rs b/python/src/pos_matcher.rs index 7c6a884d..c62a2f6e 100644 --- a/python/src/pos_matcher.rs +++ b/python/src/pos_matcher.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -49,7 +49,7 @@ impl PyPosMatcher { fn create_from_fn(dic: &Arc, func: &PyAny, py: Python) -> PyResult { let mut data = Vec::new(); for (pos_id, pos) in dic.pos.iter().enumerate() { - let args = PyTuple::new(py, &[pos]); + let args = PyTuple::new(py, [pos]); if func.call1(args)?.downcast::()?.is_true() { data.push(pos_id as u16); } @@ -178,7 +178,6 @@ impl PyPosMatcher { let max_id = self.dic.pos.len(); // map -> filter chain is needed to handle exactly u16::MAX POS entries let values = (0..max_id) - .into_iter() .map(|x| x as u16) .filter(|id| !self.matcher.matches_id(*id)); let matcher = PosMatcher::new(values); diff --git a/python/src/pretokenizer.rs b/python/src/pretokenizer.rs index 755f040b..d959285f 100644 --- a/python/src/pretokenizer.rs +++ b/python/src/pretokenizer.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -150,7 +150,7 @@ impl PyPretokenizer { } Some(h) => { let mrp: &PyAny = morphs.as_ref(py); - let args = PyTuple::new(py, &[index, string, mrp]); + let args = PyTuple::new(py, [index, string, mrp]); h.as_ref(py).call1(args) } } diff --git a/python/src/tokenizer.rs b/python/src/tokenizer.rs index 558d02cb..1a8446e0 100644 --- a/python/src/tokenizer.rs +++ b/python/src/tokenizer.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -145,7 +145,7 @@ impl PyTokenizer { None => None, Some(m) => Some(extract_mode(py, m)?), }; - let default_mode = mode.map(|m| self.tokenizer.set_mode(m.into())); + let default_mode = mode.map(|m| self.tokenizer.set_mode(m)); let mut tokenizer = scopeguard::guard(&mut self.tokenizer, |t| { default_mode.map(|m| t.set_mode(m)); }); @@ -156,7 +156,7 @@ impl PyTokenizer { tokenizer.do_tokenize() }); - err.map_err(|e| SudachiPyErr::new_err(format!("Tokenization error: {}", e.to_string())))?; + err.map_err(|e| SudachiPyErr::new_err(format!("Tokenization error: {}", e)))?; let out_list = match out { None => { @@ -177,7 +177,7 @@ impl PyTokenizer { morphemes .collect_results(tokenizer.deref_mut()) - .map_err(|e| SudachiPyErr::new_err(format!("Tokenization error: {}", e.to_string())))?; + .map_err(|e| SudachiPyErr::new_err(format!("Tokenization error: {}", e)))?; Ok(out_list) } diff --git a/sudachi-cli/src/build.rs b/sudachi-cli/src/build.rs index eea11ecf..4c92dc53 100644 --- a/sudachi-cli/src/build.rs +++ b/sudachi-cli/src/build.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -172,12 +172,12 @@ fn output_file(p: &Path) -> File { OpenOptions::new() .write(true) .create_new(true) - .open(&p) + .open(p) .unwrap_or_else(|e| panic!("failed to open {:?} for writing:\n{:?}", p, e)) } fn dump_part(dict: PathBuf, part: String, output: PathBuf) { - let file = File::open(&dict).expect("open failed"); + let file = File::open(dict).expect("open failed"); let data = unsafe { Mmap::map(&file) }.expect("mmap failed"); let loader = unsafe { DictionaryLoader::read_any_dictionary(&data) }.expect("failed to load dictionary"); @@ -215,7 +215,7 @@ fn dump_matrix(grammar: &Grammar, w: &mut W) { for left in 0..conn.num_left() { for right in 0..conn.num_right() { let cost = conn.cost(left as _, right as _); - write!(w, "{} {} {}\n", left, right, cost).unwrap(); + writeln!(w, "{} {} {}", left, right, cost).unwrap(); } } } diff --git a/sudachi-cli/src/main.rs b/sudachi-cli/src/main.rs index 69aabc7b..3d825d15 100644 --- a/sudachi-cli/src/main.rs +++ b/sudachi-cli/src/main.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,9 +34,10 @@ use sudachi::prelude::*; #[cfg(feature = "bake_dictionary")] const BAKED_DICTIONARY_BYTES: &[u8] = include_bytes!(env!("SUDACHI_DICT_PATH")); -#[derive(Clone, Debug, Eq, PartialEq)] +#[derive(Clone, Debug, Eq, PartialEq, Default)] pub enum SentenceSplitMode { /// Do both sentence splitting and analysis + #[default] Default, /// Do only sentence splitting and not analysis Only, @@ -44,12 +45,6 @@ pub enum SentenceSplitMode { None, } -impl Default for SentenceSplitMode { - fn default() -> Self { - SentenceSplitMode::Default - } -} - impl FromStr for SentenceSplitMode { type Err = &'static str; @@ -156,7 +151,7 @@ fn main() { // output: stdout or file let inner_writer: Box = match &args.output_file { Some(output_path) => Box::new( - File::create(&output_path) + File::create(output_path) .unwrap_or_else(|_| panic!("Failed to open output file {:?}", &output_path)), ), None => Box::new(io::stdout()), @@ -207,10 +202,10 @@ fn strip_eol(data: &str) -> &str { let mut bytes = data.as_bytes(); let mut len = bytes.len(); if len > 1 && bytes[len - 1] == b'\n' { - len = len - 1; + len -= 1; bytes = &bytes[..len]; if len > 1 && bytes[len - 1] == b'\r' { - len = len - 1; + len -= 1; bytes = &bytes[..len]; } } diff --git a/sudachi-cli/src/output.rs b/sudachi-cli/src/output.rs index d242ce53..94871dab 100644 --- a/sudachi-cli/src/output.rs +++ b/sudachi-cli/src/output.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -44,7 +44,7 @@ impl Wakachi { impl SudachiOutput for Wakachi { fn write(&self, writer: &mut Writer, morphemes: &MorphemeList) -> SudachiResult<()> { - if morphemes.len() == 0 { + if morphemes.is_empty() { writer.write_all(b"\n")?; return Ok(()); } diff --git a/sudachi/src/analysis/created.rs b/sudachi/src/analysis/created.rs index 87313bbe..2bce363f 100644 --- a/sudachi/src/analysis/created.rs +++ b/sudachi/src/analysis/created.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,7 +40,7 @@ impl CreatedWords { const MAX_SHIFT: Carrier = CreatedWords::MAX_VALUE - 1; pub fn empty() -> CreatedWords { - return Default::default(); + Default::default() } pub fn single>(length: Pos) -> CreatedWords { @@ -55,7 +55,7 @@ impl CreatedWords { #[must_use] pub fn add_word>(&self, length: P) -> CreatedWords { let mask = CreatedWords::single(length); - return self.add(mask); + self.add(mask) } #[must_use] @@ -67,21 +67,19 @@ impl CreatedWords { let mask = CreatedWords::single(length); if (self.0 & mask.0) == 0 { HasWord::No + } else if length.into() >= CreatedWords::MAX_VALUE as _ { + HasWord::Maybe } else { - if length.into() >= CreatedWords::MAX_VALUE as _ { - HasWord::Maybe - } else { - HasWord::Yes - } + HasWord::Yes } } pub fn is_empty(&self) -> bool { - return self.0 == 0; + self.0 == 0 } pub fn not_empty(&self) -> bool { - return !self.is_empty(); + !self.is_empty() } } diff --git a/sudachi/src/analysis/lattice.rs b/sudachi/src/analysis/lattice.rs index 7d1c4a97..a0ddab0a 100644 --- a/sudachi/src/analysis/lattice.rs +++ b/sudachi/src/analysis/lattice.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -70,6 +70,7 @@ impl VNode { /// the size of vectors never shrink. /// You must use the size parameter to check the current size and never /// access vectors after the end. +#[derive(Default)] pub struct Lattice { ends: Vec>, ends_full: Vec>, @@ -78,18 +79,6 @@ pub struct Lattice { size: usize, } -impl Default for Lattice { - fn default() -> Self { - Lattice { - ends: Vec::new(), - ends_full: Vec::new(), - indices: Vec::new(), - eos: None, - size: 0, - } - } -} - impl Lattice { fn reset_vec(data: &mut Vec>, target: usize) { for v in data.iter_mut() { @@ -282,7 +271,7 @@ impl Lattice { write!(out, " {}", connect_cost)?; } - write!(out, "\n")?; + writeln!(out)?; dump_idx += 1; } diff --git a/sudachi/src/analysis/mlist.rs b/sudachi/src/analysis/mlist.rs index 2d1a76f0..2e80cb56 100644 --- a/sudachi/src/analysis/mlist.rs +++ b/sudachi/src/analysis/mlist.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -93,11 +93,7 @@ impl MorphemeList { match self.input.try_borrow_mut() { Ok(mut i) => { let mref = i.deref_mut(); - analyzer.swap_result( - &mut mref.input, - &mut self.nodes.mut_data(), - &mut mref.subset, - ); + analyzer.swap_result(&mut mref.input, self.nodes.mut_data(), &mut mref.subset); Ok(()) } Err(_) => Err(SudachiError::MorphemeListBorrowed), diff --git a/sudachi/src/analysis/morpheme.rs b/sudachi/src/analysis/morpheme.rs index c412e66b..39929c63 100644 --- a/sudachi/src/analysis/morpheme.rs +++ b/sudachi/src/analysis/morpheme.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -91,21 +91,21 @@ impl<'a, T: DictionaryAccess> Morpheme<'a, T> { /// /// "Dictionary form" means a word's lemma and "終止形" in Japanese. pub fn dictionary_form(&self) -> &str { - &self.get_word_info().dictionary_form() + self.get_word_info().dictionary_form() } /// Returns the normalized form of morpheme /// /// This method returns the form normalizing inconsistent spellings and inflected forms pub fn normalized_form(&self) -> &str { - &self.get_word_info().normalized_form() + self.get_word_info().normalized_form() } /// Returns the reading form of morpheme. /// /// Returns Japanese syllabaries 'フリガナ' in katakana. pub fn reading_form(&self) -> &str { - &self.get_word_info().reading_form() + self.get_word_info().reading_form() } /// Returns if this morpheme is out of vocabulary @@ -131,7 +131,7 @@ impl<'a, T: DictionaryAccess> Morpheme<'a, T> { } pub fn synonym_group_ids(&self) -> &[u32] { - &self.get_word_info().synonym_group_ids() + self.get_word_info().synonym_group_ids() } pub fn get_word_info(&self) -> &WordInfo { diff --git a/sudachi/src/analysis/node.rs b/sudachi/src/analysis/node.rs index 2e840080..fa2537d3 100644 --- a/sudachi/src/analysis/node.rs +++ b/sudachi/src/analysis/node.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -186,8 +186,8 @@ impl ResultNode { text: &'a InputBuffer, ) -> NodeSplitIterator<'a> { let splits: &[WordId] = match mode { - Mode::A => &self.word_info.a_unit_split(), - Mode::B => &self.word_info.b_unit_split(), + Mode::A => self.word_info.a_unit_split(), + Mode::B => self.word_info.b_unit_split(), Mode::C => panic!("splitting Node with Mode::C is not supported"), }; diff --git a/sudachi/src/analysis/stateful_tokenizer.rs b/sudachi/src/analysis/stateful_tokenizer.rs index fa69402e..ec7e8db5 100644 --- a/sudachi/src/analysis/stateful_tokenizer.rs +++ b/sudachi/src/analysis/stateful_tokenizer.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -88,7 +88,7 @@ impl StatefulTokenizer { /// Return current analysis mode pub fn mode(&self) -> Mode { - return self.mode; + self.mode } /// Analyzer will read only following [`WordInfo`] field subset @@ -105,7 +105,9 @@ impl StatefulTokenizer { /// Prepare StatefulTokenizer for the next data. /// Data must be written in the returned reference. pub fn reset(&mut self) -> &mut String { - self.top_path.as_mut().map(|p| p.clear()); + if let Some(p) = self.top_path.as_mut() { + p.clear() + } self.oov.clear(); self.input.reset() } @@ -169,7 +171,7 @@ impl StatefulTokenizer { /// Resolve the path (as ResultNodes) with the smallest cost fn resolve_best_path(&mut self) -> SudachiResult> { let lex = self.dictionary.lexicon(); - let mut path = std::mem::replace(&mut self.top_path, None).unwrap_or_else(|| Vec::new()); + let mut path = self.top_path.take().unwrap_or_default(); self.lattice.fill_top_path(&mut self.top_path_ids); self.top_path_ids.reverse(); for pid in self.top_path_ids.drain(..) { diff --git a/sudachi/src/analysis/stateless_tokenizer.rs b/sudachi/src/analysis/stateless_tokenizer.rs index 5670d217..669d7720 100644 --- a/sudachi/src/analysis/stateless_tokenizer.rs +++ b/sudachi/src/analysis/stateless_tokenizer.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -133,7 +133,7 @@ pub(super) fn split_path( } pub(super) fn dump_path(path: &Vec) { - for (i, node) in (&path).iter().enumerate() { + for (i, node) in path.iter().enumerate() { println!("{}: {}", i, node); } } diff --git a/sudachi/src/config.rs b/sudachi/src/config.rs index d67f12e9..b924e35a 100644 --- a/sudachi/src/config.rs +++ b/sudachi/src/config.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -61,9 +61,9 @@ struct PathResolver { impl PathResolver { fn with_capacity(capacity: usize) -> PathResolver { - return PathResolver { + PathResolver { roots: Vec::with_capacity(capacity), - }; + } } fn add>(&mut self, path: P) { @@ -72,7 +72,7 @@ impl PathResolver { fn contains>(&self, path: P) -> bool { let query = path.as_ref(); - return self.roots.iter().find(|p| p.as_path() == query).is_some(); + return self.roots.iter().any(|p| p.as_path() == query); } pub fn first_existing + Clone>(&self, path: P) -> Option { @@ -96,13 +96,15 @@ impl PathResolver { } pub fn roots(&self) -> &[PathBuf] { - return &self.roots; + &self.roots } } #[derive(Deserialize, Clone, Copy, Debug, Eq, PartialEq)] #[serde(rename_all = "snake_case")] +#[derive(Default)] pub enum SurfaceProjection { + #[default] Surface, Normalized, Reading, @@ -112,12 +114,6 @@ pub enum SurfaceProjection { NormalizedNouns, } -impl Default for SurfaceProjection { - fn default() -> Self { - SurfaceProjection::Surface - } -} - impl SurfaceProjection { /// Return required InfoSubset for the current projection type pub fn required_subset(&self) -> InfoSubset { @@ -293,13 +289,13 @@ impl ConfigBuilder { Config { resolver, system_dict: self.systemDict, - user_dicts: self.userDict.unwrap_or_else(|| Vec::new()), + user_dicts: self.userDict.unwrap_or_default(), character_definition_file, - connection_cost_plugins: self.connectionCostPlugin.unwrap_or(Vec::new()), - input_text_plugins: self.inputTextPlugin.unwrap_or(Vec::new()), - oov_provider_plugins: self.oovProviderPlugin.unwrap_or(Vec::new()), - path_rewrite_plugins: self.pathRewritePlugin.unwrap_or(Vec::new()), + connection_cost_plugins: self.connectionCostPlugin.unwrap_or_default(), + input_text_plugins: self.inputTextPlugin.unwrap_or_default(), + oov_provider_plugins: self.oovProviderPlugin.unwrap_or_default(), + path_rewrite_plugins: self.pathRewritePlugin.unwrap_or_default(), projection: self.projection.unwrap_or(SurfaceProjection::Surface), } } @@ -423,7 +419,7 @@ impl Config { } // Report an error - return Err(self.resolver.resolution_failure(&file_path)); + Err(self.resolver.resolution_failure(&file_path)) } pub fn resolved_system_dict(&self) -> Result { diff --git a/sudachi/src/dic/build/conn.rs b/sudachi/src/dic/build/conn.rs index d73cd17e..338e4f59 100644 --- a/sudachi/src/dic/build/conn.rs +++ b/sudachi/src/dic/build/conn.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -138,7 +138,7 @@ impl ConnBuffer { } fn parse_header(&mut self) -> DicWriteResult<(i16, i16)> { - let mut items = SPLIT_REGEX.splitn(&self.line.trim(), 2); + let mut items = SPLIT_REGEX.splitn(self.line.trim(), 2); // TODO: fix get_next error message let left = it_next(&self.line, &mut items, "left_num", parse_i16)?; let right = it_next(&self.line, &mut items, "right_num", parse_i16)?; @@ -146,7 +146,7 @@ impl ConnBuffer { } fn parse_line(&mut self) -> DicWriteResult<()> { - let mut items = SPLIT_REGEX.splitn(&self.line.trim(), 3); + let mut items = SPLIT_REGEX.splitn(self.line.trim(), 3); let left = it_next(&self.line, &mut items, "left", parse_i16)?; let right = it_next(&self.line, &mut items, "right", parse_i16)?; let cost = it_next(&self.line, &mut items, "cost", parse_i16)?; @@ -164,12 +164,12 @@ impl ConnBuffer { } fn num_error(part: &'static str, value: i16) -> SudachiResult { - return Err(DicBuildError { + Err(DicBuildError { file: "".to_owned(), line: 0, cause: BuildFailure::InvalidConnSize(part, value), } - .into()); + .into()) } #[cfg(test)] diff --git a/sudachi/src/dic/build/error.rs b/sudachi/src/dic/build/error.rs index 169604f7..487adf98 100644 --- a/sudachi/src/dic/build/error.rs +++ b/sudachi/src/dic/build/error.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -95,20 +95,12 @@ pub enum BuildFailure { TrieBuildFailure, } +#[derive(Default)] pub(crate) struct DicCompilationCtx { name: String, line: usize, } -impl Default for DicCompilationCtx { - fn default() -> Self { - DicCompilationCtx { - name: Default::default(), - line: Default::default(), - } - } -} - impl DicCompilationCtx { pub fn memory() -> Self { DicCompilationCtx { diff --git a/sudachi/src/dic/build/lexicon.rs b/sudachi/src/dic/build/lexicon.rs index e0f15e07..931b1fe1 100644 --- a/sudachi/src/dic/build/lexicon.rs +++ b/sudachi/src/dic/build/lexicon.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -146,7 +146,7 @@ pub(crate) trait SplitUnitResolver { surface, pos, reading, - } => self.resolve_inline(&surface, *pos, reading.as_deref()), + } => self.resolve_inline(surface, *pos, reading.as_deref()), } } @@ -206,7 +206,7 @@ impl RawLexiconEntry { ) -> DicWriteResult { let mut size = 0; - size += u16w.write(w, &self.headword())?; // surface of WordInfo + size += u16w.write(w, self.headword())?; // surface of WordInfo size += u16w.write_len(w, self.surface.len())?; // surface for trie w.write_all(&self.pos.to_le_bytes())?; size += 2; @@ -344,12 +344,10 @@ impl LexiconReader { let pos = rec.ctx.transform(self.pos_of([p1, p2, p3, p4, p5, p6]))?; - if splitting == Mode::A { - if !split_a.is_empty() || !split_b.is_empty() { - return rec.ctx.err(BuildFailure::InvalidSplit( - "A-mode tokens can't have splits".to_owned(), - )); - } + if splitting == Mode::A && (!split_a.is_empty() || !split_b.is_empty()) { + return rec.ctx.err(BuildFailure::InvalidSplit( + "A-mode tokens can't have splits".to_owned(), + )); } self.unresolved += resolve_a + resolve_b; @@ -497,7 +495,7 @@ impl LexiconReader { if WORD_ID_LITERAL.is_match(data) { Ok(SplitUnit::Ref(parse_wordid(data)?)) } else { - let mut iter = data.splitn(8, ","); + let mut iter = data.splitn(8, ','); let surface = it_next(data, &mut iter, "(1) surface", unescape)?; let p1 = it_next(data, &mut iter, "(2) pos-1", unescape_cow)?; let p2 = it_next(data, &mut iter, "(3) pos-2", unescape_cow)?; diff --git a/sudachi/src/dic/build/lexicon/test.rs b/sudachi/src/dic/build/lexicon/test.rs index 3a62c084..6039fc00 100644 --- a/sudachi/src/dic/build/lexicon/test.rs +++ b/sudachi/src/dic/build/lexicon/test.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -105,7 +105,7 @@ fn parse_kyoto_ignored() { let entries = rdr.entries(); assert_eq!(entries.len(), 1); let kyoto = &entries[0]; - assert_eq!(false, kyoto.should_index()); + assert!(!kyoto.should_index()); } #[test] @@ -155,9 +155,9 @@ fn parse_pos_exhausted() { let mut rdr = LexiconReader::new(); let mut data = String::new(); for i in 0..=MAX_POS_IDS + 1 { - write!( + writeln!( data, - "x,-1,-1,5293,京都,名詞,固有名詞,地名,一般,*,{},キョウト,京都,*,A,*,*,*,*\n", + "x,-1,-1,5293,京都,名詞,固有名詞,地名,一般,*,{},キョウト,京都,*,A,*,*,*,*", i ) .unwrap() diff --git a/sudachi/src/dic/build/mod.rs b/sudachi/src/dic/build/mod.rs index 2cd1c49a..6a034a8a 100644 --- a/sudachi/src/dic/build/mod.rs +++ b/sudachi/src/dic/build/mod.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -109,15 +109,15 @@ impl DictionaryAccess for NoDic { } fn input_text_plugins(&self) -> &[Box] { - return &[]; + &[] } fn oov_provider_plugins(&self) -> &[Box] { - return &[]; + &[] } fn path_rewrite_plugins(&self) -> &[Box] { - return &[]; + &[] } } @@ -305,7 +305,7 @@ impl DictBuilder { } /// this function must only be used in resolve_impl - fn unsafe_make_resolver<'a, 'b>(&'a self) -> RawDictResolver<'b> { + fn unsafe_make_resolver<'b>(&self) -> RawDictResolver<'b> { let resolver = RawDictResolver::new(self.lexicon.entries(), self.user); // resolver borrows parts of entries, but it does not touch splits // resolve function only modifies splits diff --git a/sudachi/src/dic/build/parse.rs b/sudachi/src/dic/build/parse.rs index ecf4d32e..0c25fb23 100644 --- a/sudachi/src/dic/build/parse.rs +++ b/sudachi/src/dic/build/parse.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -95,7 +95,7 @@ pub(crate) fn parse_dic_form(data: &str) -> DicWriteResult { #[inline] pub(crate) fn parse_wordid(data: &str) -> DicWriteResult { - if data.starts_with("U") { + if data.starts_with('U') { let wid = parse_wordid_raw(&data[1..]); wid.map(|w| WordId::new(1, w.word())) } else { @@ -143,7 +143,7 @@ where { let mut result = Vec::with_capacity(4); - for part in data.split("/") { + for part in data.split('/') { result.push(f(part)?); } @@ -179,7 +179,7 @@ pub(crate) fn unescape_cow(data: &str) -> DicWriteResult> { if !UNICODE_LITERAL.is_match(data) { Ok(Cow::Borrowed(data)) } else { - unescape_slow(data).map(|s| Cow::Owned(s)) + unescape_slow(data).map(Cow::Owned) } } diff --git a/sudachi/src/dic/build/primitives.rs b/sudachi/src/dic/build/primitives.rs index d1b83c9e..2ee91e69 100644 --- a/sudachi/src/dic/build/primitives.rs +++ b/sudachi/src/dic/build/primitives.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,7 +45,7 @@ impl Utf16Writer { w.write_all(&[length as u8])?; 1 } else { - let b0 = (length as u8) & 0xff; + let b0 = length as u8; let b1 = ((length >> 8) as u8) | 0x80; w.write_all(&[b1, b0])?; 2 diff --git a/sudachi/src/dic/build/test/mod.rs b/sudachi/src/dic/build/test/mod.rs index 5710c42d..f2ee3e98 100644 --- a/sudachi/src/dic/build/test/mod.rs +++ b/sudachi/src/dic/build/test/mod.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,7 +28,7 @@ use crate::dic::DictionaryLoader; use crate::error::SudachiError; use std::io::sink; -static MATRIX_10_10: &'static [u8] = include_bytes!("matrix_10x10.def"); +static MATRIX_10_10: &[u8] = include_bytes!("matrix_10x10.def"); #[test] fn build_grammar() { diff --git a/sudachi/src/dic/character_category.rs b/sudachi/src/dic/character_category.rs index 47d2bd15..cd2fafea 100644 --- a/sudachi/src/dic/character_category.rs +++ b/sudachi/src/dic/character_category.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -149,10 +149,7 @@ impl CharacterCategory { } let mut categories = CategoryType::empty(); - for elem in cols[1..] - .iter() - .take_while(|elem| elem.chars().next().unwrap() != '#') - { + for elem in cols[1..].iter().take_while(|elem| !elem.starts_with('#')) { categories.insert(match elem.parse() { Ok(t) => t, Err(_) => { @@ -292,7 +289,7 @@ impl Iterator for CharCategoryIter<'_> { (left..char::MAX, *self.categories.categories.last().unwrap()) } else if self.current == 0 { let right = char::from_u32(*self.categories.boundaries.first().unwrap()).unwrap(); - let r = (0 as char)..right as char; + let r = (0 as char)..right; (r, self.categories.categories[0]) } else { let left = char::from_u32(self.categories.boundaries[self.current - 1]).unwrap(); diff --git a/sudachi/src/dic/dictionary.rs b/sudachi/src/dic/dictionary.rs index 0092c21f..b3f8c8b3 100644 --- a/sudachi/src/dic/dictionary.rs +++ b/sudachi/src/dic/dictionary.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -87,7 +87,7 @@ impl JapaneseDictionary { let plugins = { let grammar = &mut basic_dict.grammar; - let cfg = &*cfg; + let cfg = cfg; Plugins::load(cfg, grammar)? }; @@ -126,7 +126,7 @@ impl JapaneseDictionary { let plugins = { let grammar = &mut basic_dict.grammar; - let cfg = &*cfg; + let cfg = cfg; Plugins::load(cfg, grammar)? }; @@ -155,12 +155,12 @@ impl JapaneseDictionary { } /// Returns grammar with the correct lifetime - pub fn grammar<'a>(&'a self) -> &Grammar<'a> { + pub fn grammar(&self) -> &Grammar<'_> { &self._grammar } /// Returns lexicon with the correct lifetime - pub fn lexicon<'a>(&'a self) -> &LexiconSet<'a> { + pub fn lexicon(&self) -> &LexiconSet<'_> { &self._lexicon } diff --git a/sudachi/src/dic/grammar.rs b/sudachi/src/dic/grammar.rs index 1642a3bf..c145cbf7 100644 --- a/sudachi/src/dic/grammar.rs +++ b/sudachi/src/dic/grammar.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -259,14 +259,11 @@ mod tests { storage } fn string_to_bytes(s: &str) -> Vec { - s.encode_utf16() - .map(|c| c.to_le_bytes()) - .flatten() - .collect() + s.encode_utf16().flat_map(|c| c.to_le_bytes()).collect() } - fn build_partofspeech(storage: &mut Vec) -> () { + fn build_partofspeech(storage: &mut Vec) { // number of part of speech - storage.extend(&(3 as i16).to_le_bytes()); + storage.extend(&3_i16.to_le_bytes()); storage.extend( b"\x07B\x00O\x00S\x00/\x00E\x00O\x00S\x00\x01*\x00\x01*\x00\x01*\x00\x01*\x00\x01*\x00", @@ -287,20 +284,20 @@ mod tests { storage.extend(b"\x06"); storage.extend(string_to_bytes("終止形-一般")); } - fn build_connect_table(storage: &mut Vec) -> () { - storage.extend(&(3 as i16).to_le_bytes()); - storage.extend(&(3 as i16).to_le_bytes()); + fn build_connect_table(storage: &mut Vec) { + storage.extend(&3_i16.to_le_bytes()); + storage.extend(&3_i16.to_le_bytes()); - storage.extend(&(0 as i16).to_le_bytes()); - storage.extend(&(-300 as i16).to_le_bytes()); - storage.extend(&(300 as i16).to_le_bytes()); + storage.extend(&0_i16.to_le_bytes()); + storage.extend(&(-300_i16).to_le_bytes()); + storage.extend(&300_i16.to_le_bytes()); - storage.extend(&(300 as i16).to_le_bytes()); - storage.extend(&(-500 as i16).to_le_bytes()); - storage.extend(&(-100 as i16).to_le_bytes()); + storage.extend(&300_i16.to_le_bytes()); + storage.extend(&(-500_i16).to_le_bytes()); + storage.extend(&(-100_i16).to_le_bytes()); - storage.extend(&(-3000 as i16).to_le_bytes()); - storage.extend(&(200 as i16).to_le_bytes()); - storage.extend(&(2000 as i16).to_le_bytes()); + storage.extend(&(-3000_i16).to_le_bytes()); + storage.extend(&200_i16.to_le_bytes()); + storage.extend(&2000_i16.to_le_bytes()); } } diff --git a/sudachi/src/dic/header.rs b/sudachi/src/dic/header.rs index 3df2e0e5..7663c1dd 100644 --- a/sudachi/src/dic/header.rs +++ b/sudachi/src/dic/header.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -119,6 +119,12 @@ pub struct Header { pub description: String, } +impl Default for Header { + fn default() -> Self { + Self::new() + } +} + impl Header { const DESCRIPTION_SIZE: usize = 256; pub const STORAGE_SIZE: usize = 8 + 8 + Header::DESCRIPTION_SIZE; @@ -191,7 +197,7 @@ impl Header { w.write_all(&self.version.to_u64().to_le_bytes())?; w.write_all(&self.create_time.to_le_bytes())?; - w.write_all(&self.description.as_bytes())?; + w.write_all(self.description.as_bytes())?; for _ in 0..Header::DESCRIPTION_SIZE - self.description.len() { w.write_all(&[0])?; } @@ -204,7 +210,7 @@ fn nul_terminated_str_from_slice(buf: &[u8]) -> String { let str_bytes: &[u8] = if let Some(nul_idx) = buf.iter().position(|b| *b == 0) { &buf[..nul_idx] } else { - &buf + buf }; String::from_utf8_lossy(str_bytes).to_string() } diff --git a/sudachi/src/dic/lexicon/mod.rs b/sudachi/src/dic/lexicon/mod.rs index 20043e81..f6e9f599 100644 --- a/sudachi/src/dic/lexicon/mod.rs +++ b/sudachi/src/dic/lexicon/mod.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -110,7 +110,7 @@ impl<'a> Lexicon<'a> { #[inline] fn word_id(&self, raw_id: u32) -> WordId { - return WordId::new(self.lex_id, raw_id); + WordId::new(self.lex_id, raw_id) } /// Returns an iterator of word_id and end of words that matches given input diff --git a/sudachi/src/dic/lexicon/trie.rs b/sudachi/src/dic/lexicon/trie.rs index d6e76c31..2dfc0d3e 100644 --- a/sudachi/src/dic/lexicon/trie.rs +++ b/sudachi/src/dic/lexicon/trie.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -100,7 +100,7 @@ impl<'a> Trie<'a> { } pub fn total_size(&self) -> usize { - 4 * self.array.len() as usize + 4 * self.array.len() } #[inline] diff --git a/sudachi/src/dic/lexicon/word_id_table.rs b/sudachi/src/dic/lexicon/word_id_table.rs index 48683497..bd79cfc5 100644 --- a/sudachi/src/dic/lexicon/word_id_table.rs +++ b/sudachi/src/dic/lexicon/word_id_table.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,10 +39,10 @@ impl<'a> WordIdTable<'a> { #[inline] pub fn entries(&self, index: usize) -> WordIdIter { debug_assert!(index < self.bytes.len()); - let ptr = unsafe { self.bytes.as_ptr().offset((index + self.offset) as isize) }; + let ptr = unsafe { self.bytes.as_ptr().add(index + self.offset) }; let cnt = unsafe { ptr.read() } as usize; let data_ptr = unsafe { ptr.offset(1) } as *const u32; - debug_assert!(index + cnt * std::mem::size_of::() + 1 <= self.bytes.len()); + debug_assert!(index + cnt * std::mem::size_of::() < self.bytes.len()); WordIdIter { data: unsafe { NonNull::new_unchecked(data_ptr as _) }, remaining: cnt, diff --git a/sudachi/src/dic/lexicon_set.rs b/sudachi/src/dic/lexicon_set.rs index 32ffde1d..3a00ab68 100644 --- a/sudachi/src/dic/lexicon_set.rs +++ b/sudachi/src/dic/lexicon_set.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -116,8 +116,8 @@ impl LexiconSet<'_> { let pos_id = word_info.pos_id as usize; if dict_id > 0 && pos_id >= self.num_system_pos { // user defined part-of-speech - word_info.pos_id = (pos_id as usize - self.num_system_pos - + self.pos_offsets[dict_id as usize]) as u16; + word_info.pos_id = + (pos_id - self.num_system_pos + self.pos_offsets[dict_id as usize]) as u16; } } diff --git a/sudachi/src/dic/mod.rs b/sudachi/src/dic/mod.rs index 8de28bc4..585392ab 100644 --- a/sudachi/src/dic/mod.rs +++ b/sudachi/src/dic/mod.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -77,10 +77,7 @@ impl<'a> LoadedDictionary<'a> { character_category_file: &Path, ) -> SudachiResult> { let character_category = CharacterCategory::from_file(character_category_file)?; - Ok(Self::from_system_dictionary_and_chardef( - dictionary_bytes, - character_category, - )?) + Self::from_system_dictionary_and_chardef(dictionary_bytes, character_category) } /// Creates a system dictionary from bytes, and load embedded default character category @@ -88,10 +85,7 @@ impl<'a> LoadedDictionary<'a> { dictionary_bytes: &'a [u8], ) -> SudachiResult> { let character_category = CharacterCategory::from_bytes(DEFAULT_CHAR_DEF_BYTES)?; - Ok(Self::from_system_dictionary_and_chardef( - dictionary_bytes, - character_category, - )?) + Self::from_system_dictionary_and_chardef(dictionary_bytes, character_category) } #[cfg(test)] @@ -103,7 +97,9 @@ impl<'a> LoadedDictionary<'a> { let lexicon = other.lexicon; let grammar = other.grammar; self.lexicon_set.append(lexicon, npos)?; - grammar.map(|g| self.grammar.merge(g)); + if let Some(g) = grammar { + self.grammar.merge(g) + } Ok(self) } } diff --git a/sudachi/src/dic/read/mod.rs b/sudachi/src/dic/read/mod.rs index d218e8d3..494855e2 100644 --- a/sudachi/src/dic/read/mod.rs +++ b/sudachi/src/dic/read/mod.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,7 +30,7 @@ pub fn u32_array_parser(input: &[u8]) -> SudachiNomResult<&[u8], Vec> { pub fn u32_wid_array_parser(input: &[u8]) -> SudachiNomResult<&[u8], Vec> { let (rest, length) = le_u8(input)?; - nom::multi::count(le_u32.map(|id| WordId::from_raw(id)), length as usize)(rest) + nom::multi::count(le_u32.map(WordId::from_raw), length as usize)(rest) } pub fn skip_wid_array(input: &[u8]) -> SudachiNomResult<&[u8], Vec> { diff --git a/sudachi/src/dic/storage.rs b/sudachi/src/dic/storage.rs index b724ccbc..90a8b8ce 100644 --- a/sudachi/src/dic/storage.rs +++ b/sudachi/src/dic/storage.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,7 +28,7 @@ impl AsRef<[u8]> for Storage { match self { Storage::File(m) => m.as_bytes(), Storage::Borrowed(b) => b, - Storage::Owned(v) => &v, + Storage::Owned(v) => v, } } } diff --git a/sudachi/src/dic/word_id.rs b/sudachi/src/dic/word_id.rs index 32240093..af1acce4 100644 --- a/sudachi/src/dic/word_id.rs +++ b/sudachi/src/dic/word_id.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -57,7 +57,7 @@ impl WordId { let dic_part = ((dic & 0xf) as u32) << 28; let word_part = word & WORD_MASK; let raw = dic_part | word_part; - return Self::from_raw(raw); + Self::from_raw(raw) } /// Creates the WordId with correctness checking @@ -84,12 +84,12 @@ impl WordId { /// Extract Dictionary ID pub fn dic(&self) -> u8 { - return (self.raw >> 28) as u8; + (self.raw >> 28) as u8 } /// Extract Word ID pub fn word(&self) -> u32 { - return self.raw & WORD_MASK; + self.raw & WORD_MASK } /// Check if the word comes from the system dictionary diff --git a/sudachi/src/hash.rs b/sudachi/src/hash.rs index 041f3460..96987b19 100644 --- a/sudachi/src/hash.rs +++ b/sudachi/src/hash.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -91,7 +91,7 @@ impl Hasher for RoMuHash { #[inline(always)] fn write_u64(&mut self, i: u64) { - self.consume(i as u64); + self.consume(i); } #[inline(always)] diff --git a/sudachi/src/input_text/buffer/mod.rs b/sudachi/src/input_text/buffer/mod.rs index db2996af..f9877b73 100644 --- a/sudachi/src/input_text/buffer/mod.rs +++ b/sudachi/src/input_text/buffer/mod.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,19 +34,14 @@ const MAX_LENGTH: usize = u16::MAX as usize / 4 * 3; /// if the limit of the rewritten sentence is more than this number, then all bets are off const REALLY_MAX_LENGTH: usize = u16::MAX as usize; -#[derive(Eq, PartialEq, Debug, Clone)] +#[derive(Eq, PartialEq, Debug, Clone, Default)] enum BufferState { + #[default] Clean, RW, RO, } -impl Default for BufferState { - fn default() -> Self { - BufferState::Clean - } -} - /// InputBuffer - prepares the input data for the analysis /// /// By saying char we actually mean Unicode codepoint here. diff --git a/sudachi/src/plugin/connect_cost/inhibit_connection.rs b/sudachi/src/plugin/connect_cost/inhibit_connection.rs index c664280d..3076dc8a 100644 --- a/sudachi/src/plugin/connect_cost/inhibit_connection.rs +++ b/sudachi/src/plugin/connect_cost/inhibit_connection.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -96,10 +96,10 @@ mod tests { fn build_mock_bytes() -> Vec { let mut buf = Vec::new(); // 0 - pos size, 1x1 connection with 0 element - buf.extend(&(0 as i16).to_le_bytes()); - buf.extend(&(1 as i16).to_le_bytes()); - buf.extend(&(1 as i16).to_le_bytes()); - buf.extend(&(0 as i16).to_le_bytes()); + buf.extend(&0_i16.to_le_bytes()); + buf.extend(&1_i16.to_le_bytes()); + buf.extend(&1_i16.to_le_bytes()); + buf.extend(&0_i16.to_le_bytes()); buf } diff --git a/sudachi/src/plugin/connect_cost/mod.rs b/sudachi/src/plugin/connect_cost/mod.rs index ad012354..eefbe335 100644 --- a/sudachi/src/plugin/connect_cost/mod.rs +++ b/sudachi/src/plugin/connect_cost/mod.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -44,7 +44,7 @@ impl PluginCategory for dyn EditConnectionCostPlugin { fn bundled_impl(name: &str) -> Option { match name { - "InhibitConnectionPlugin" => Some(Box::new(InhibitConnectionPlugin::default())), + "InhibitConnectionPlugin" => Some(Box::::default()), _ => None, } } diff --git a/sudachi/src/plugin/input_text/default_input_text/mod.rs b/sudachi/src/plugin/input_text/default_input_text/mod.rs index 7bd53323..070ca5c9 100644 --- a/sudachi/src/plugin/input_text/default_input_text/mod.rs +++ b/sudachi/src/plugin/input_text/default_input_text/mod.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -78,7 +78,7 @@ impl DefaultInputTextPlugin { for (i, line) in reader.lines().enumerate() { let line = line?; let line = line.trim(); - if line.is_empty() || line.chars().next().unwrap() == '#' { + if line.is_empty() || line.starts_with('#') { continue; } let cols: Vec<_> = line.split_whitespace().collect(); @@ -104,7 +104,7 @@ impl DefaultInputTextPlugin { } let first_char = cols[0].chars().next().unwrap(); let n_char = cols[0].chars().count(); - if key_lengths.get(&first_char).map(|v| *v).unwrap_or(0) < n_char { + if key_lengths.get(&first_char).copied().unwrap_or(0) < n_char { key_lengths.insert(first_char, n_char); } replace_char_map.insert(cols[0].to_string(), cols[1].to_string()); @@ -245,7 +245,7 @@ impl DefaultInputTextPlugin { } replacer.replace_char_iter(start..start + len, ch2, data) } - None => return, + None => (), } } } @@ -266,7 +266,7 @@ impl InputTextPlugin for DefaultInputTextPlugin { ); if rewrite_file_path.is_ok() { - let reader = BufReader::new(fs::File::open(&rewrite_file_path?)?); + let reader = BufReader::new(fs::File::open(rewrite_file_path?)?); self.read_rewrite_lists(reader)?; } else { let reader = BufReader::new(DEFAULT_REWRITE_DEF_BYTES); diff --git a/sudachi/src/plugin/input_text/mod.rs b/sudachi/src/plugin/input_text/mod.rs index b0aee380..67f00d91 100644 --- a/sudachi/src/plugin/input_text/mod.rs +++ b/sudachi/src/plugin/input_text/mod.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -70,9 +70,9 @@ impl PluginCategory for dyn InputTextPlugin { fn bundled_impl(name: &str) -> Option { match name { - "IgnoreYomiganaPlugin" => Some(Box::new(IgnoreYomiganaPlugin::default())), - "DefaultInputTextPlugin" => Some(Box::new(DefaultInputTextPlugin::default())), - "ProlongedSoundMarkPlugin" => Some(Box::new(ProlongedSoundMarkPlugin::default())), + "IgnoreYomiganaPlugin" => Some(Box::::default()), + "DefaultInputTextPlugin" => Some(Box::::default()), + "ProlongedSoundMarkPlugin" => Some(Box::::default()), _ => None, } } diff --git a/sudachi/src/plugin/loader.rs b/sudachi/src/plugin/loader.rs index e4d18d20..f874f95b 100644 --- a/sudachi/src/plugin/loader.rs +++ b/sudachi/src/plugin/loader.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -75,7 +75,7 @@ fn system_specific_name(s: &str) -> Option { let fname = p .file_name() .and_then(|np| np.to_str()) - .map(|f| make_system_specific_name(f)); + .map(make_system_specific_name); let parent = p.parent().and_then(|np| np.to_str()); match (parent, fname) { (Some(p), Some(c)) => Some(format!("{}/{}", p, c)), @@ -104,10 +104,10 @@ impl<'a, 'b, T: PluginCategory + ?Sized> PluginLoader<'a, 'b, T> { } pub fn freeze(self) -> PluginContainer { - return PluginContainer { + PluginContainer { libraries: self.libraries, plugins: self.plugins, - }; + } } fn load_plugin(&mut self, name: &str, plugin_cfg: &Value) -> SudachiResult<()> { @@ -127,7 +127,7 @@ impl<'a, 'b, T: PluginCategory + ?Sized> PluginLoader<'a, 'b, T> { self.load_plugin_from_dso(&candidates)? }; - ::do_setup(&mut plugin, plugin_cfg, &self.cfg, &mut self.grammar) + ::do_setup(&mut plugin, plugin_cfg, self.cfg, self.grammar) .map_err(|e| e.with_context(format!("plugin {} setup", name)))?; self.plugins.push(plugin); Ok(()) @@ -231,9 +231,9 @@ pub trait PluginCategory { /// Helper function to load the plugins of a single category /// Should be called with turbofish syntax and trait object type: /// `let plugins = load_plugins_of::(...)`. -pub fn load_plugins_of<'a, 'b, T: PluginCategory + ?Sized>( +pub fn load_plugins_of<'a, T: PluginCategory + ?Sized>( cfg: &'a Config, - grammar: &'a mut Grammar<'b>, + grammar: &'a mut Grammar<'_>, ) -> SudachiResult> { let mut loader: PluginLoader = PluginLoader::new(grammar, cfg); loader.load()?; diff --git a/sudachi/src/plugin/oov/mecab_oov/mod.rs b/sudachi/src/plugin/oov/mecab_oov/mod.rs index 8e2f3a8d..83f073a8 100644 --- a/sudachi/src/plugin/oov/mecab_oov/mod.rs +++ b/sudachi/src/plugin/oov/mecab_oov/mod.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -72,7 +72,7 @@ impl MeCabOovPlugin { let line = line?; let line = line.trim(); if line.is_empty() - || line.chars().next().unwrap() == '#' + || line.starts_with('#') || line.chars().take(2).collect::>() == vec!['0', 'x'] { continue; @@ -125,13 +125,13 @@ impl MeCabOovPlugin { for (i, line) in reader.lines().enumerate() { let line = line?; let line = line.trim(); - if line.is_empty() || line.chars().next().unwrap() == '#' { + if line.is_empty() || line.starts_with('#') { continue; } let cols: Vec<_> = line.split(',').collect(); if cols.len() < 10 { - return Err(SudachiError::InvalidDataFormat(i, format!("{}", line))); + return Err(SudachiError::InvalidDataFormat(i, line.to_string())); } let category_type: CategoryType = cols[0].parse()?; if !categories.contains_key(&category_type) { @@ -262,7 +262,7 @@ impl OovProviderPlugin for MeCabOovPlugin { ); let categories = if char_def_path.is_ok() { - let reader = BufReader::new(fs::File::open(&char_def_path?)?); + let reader = BufReader::new(fs::File::open(char_def_path?)?); MeCabOovPlugin::read_character_property(reader)? } else { let reader = BufReader::new(DEFAULT_CHAR_DEF_BYTES); @@ -276,7 +276,7 @@ impl OovProviderPlugin for MeCabOovPlugin { ); let oov_list = if unk_def_path.is_ok() { - let reader = BufReader::new(fs::File::open(&unk_def_path?)?); + let reader = BufReader::new(fs::File::open(unk_def_path?)?); MeCabOovPlugin::read_oov(reader, &categories, grammar, settings.userPOS)? } else { let reader = BufReader::new(DEFAULT_UNK_DEF_BYTES); diff --git a/sudachi/src/plugin/oov/mod.rs b/sudachi/src/plugin/oov/mod.rs index 847f7c23..986fe8a0 100644 --- a/sudachi/src/plugin/oov/mod.rs +++ b/sudachi/src/plugin/oov/mod.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -61,9 +61,9 @@ impl PluginCategory for dyn OovProviderPlugin { fn bundled_impl(name: &str) -> Option { match name { - "SimpleOovPlugin" => Some(Box::new(SimpleOovPlugin::default())), - "MeCabOovPlugin" => Some(Box::new(MeCabOovPlugin::default())), - "RegexOovProvider" => Some(Box::new(RegexOovProvider::default())), + "SimpleOovPlugin" => Some(Box::::default()), + "MeCabOovPlugin" => Some(Box::::default()), + "RegexOovProvider" => Some(Box::::default()), _ => None, } } diff --git a/sudachi/src/plugin/oov/regex_oov/mod.rs b/sudachi/src/plugin/oov/regex_oov/mod.rs index ec0b68f4..20698e7d 100644 --- a/sudachi/src/plugin/oov/regex_oov/mod.rs +++ b/sudachi/src/plugin/oov/regex_oov/mod.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Works Applications Co., Ltd. + * Copyright (c) 2022-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -46,17 +46,13 @@ pub(crate) struct RegexOovProvider { #[derive(Deserialize, Eq, PartialEq, Debug, Copy, Clone)] #[serde(rename_all = "lowercase")] +#[derive(Default)] pub enum BoundaryMode { + #[default] Strict, Relaxed, } -impl Default for BoundaryMode { - fn default() -> Self { - BoundaryMode::Strict - } -} - fn default_max_length() -> usize { 32 } @@ -89,7 +85,7 @@ impl OovProviderPlugin for RegexOovProvider { ) -> SudachiResult<()> { let mut parsed: RegexProviderConfig = serde_json::from_value(settings.clone())?; - if !parsed.regex.starts_with("^") { + if !parsed.regex.starts_with('^') { parsed.regex.insert(0, '^'); } diff --git a/sudachi/src/plugin/path_rewrite/join_numeric/numeric_parser/mod.rs b/sudachi/src/plugin/path_rewrite/join_numeric/numeric_parser/mod.rs index 30e61e1f..2bdd5736 100644 --- a/sudachi/src/plugin/path_rewrite/join_numeric/numeric_parser/mod.rs +++ b/sudachi/src/plugin/path_rewrite/join_numeric/numeric_parser/mod.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -193,7 +193,7 @@ impl NumericParser { } fn is_small_unit(n: i32) -> bool { - -3 <= n && n < 0 + (-3..0).contains(&n) } fn is_large_unit(n: i32) -> bool { n < -3 diff --git a/sudachi/src/plugin/path_rewrite/join_numeric/numeric_parser/string_number.rs b/sudachi/src/plugin/path_rewrite/join_numeric/numeric_parser/string_number.rs index 5c8d0ea4..eaf86e60 100644 --- a/sudachi/src/plugin/path_rewrite/join_numeric/numeric_parser/string_number.rs +++ b/sudachi/src/plugin/path_rewrite/join_numeric/numeric_parser/string_number.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -97,7 +97,7 @@ impl StringNumber { } pub fn is_zero(&self) -> bool { - self.significand.len() == 0 + self.significand.is_empty() } pub fn to_string(&mut self) -> String { @@ -121,7 +121,7 @@ impl StringNumber { .count(); self.significand .truncate(self.significand.len() - n_last_zero); - if self.significand.chars().last().unwrap() == '.' { + if self.significand.ends_with('.') { self.significand.truncate(self.significand.len() - 1); } } diff --git a/sudachi/src/plugin/path_rewrite/mod.rs b/sudachi/src/plugin/path_rewrite/mod.rs index 64c86d4e..df0d2919 100644 --- a/sudachi/src/plugin/path_rewrite/mod.rs +++ b/sudachi/src/plugin/path_rewrite/mod.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -53,8 +53,8 @@ impl PluginCategory for dyn PathRewritePlugin { fn bundled_impl(name: &str) -> Option { match name { - "JoinNumericPlugin" => Some(Box::new(JoinNumericPlugin::default())), - "JoinKatakanaOovPlugin" => Some(Box::new(JoinKatakanaOovPlugin::default())), + "JoinNumericPlugin" => Some(Box::::default()), + "JoinKatakanaOovPlugin" => Some(Box::::default()), _ => None, } } diff --git a/sudachi/src/sentence_detector.rs b/sudachi/src/sentence_detector.rs index 805b1888..9b6cf330 100644 --- a/sudachi/src/sentence_detector.rs +++ b/sudachi/src/sentence_detector.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -76,6 +76,12 @@ pub struct SentenceDetector { limit: usize, } +impl Default for SentenceDetector { + fn default() -> Self { + Self::new() + } +} + impl SentenceDetector { pub fn new() -> Self { SentenceDetector { @@ -175,13 +181,13 @@ fn parenthesis_level(s: &str) -> SudachiResult { )) .unwrap(); } - let mut level = 0; + let mut level: usize = 0; for caps in PARENTHESIS.captures_iter(s) { if let Some(_) = caps?.get(1) { // open level += 1; - } else if level > 0 { - level -= 1; + } else { + level = level.saturating_sub(1); } } Ok(level) @@ -225,7 +231,7 @@ fn is_continuous_phrase(s: &str, eos: usize) -> SudachiResult { } // we can safely unwrap since eos < s.len() - let c = s[eos..].chars().nth(0).unwrap(); + let c = s[eos..].chars().next().unwrap(); Ok((c == 'と' || c == 'や' || c == 'の') && EOS_ITEMIZE_HEADER.is_match(&s[..eos])?) } diff --git a/sudachi/src/sentence_splitter.rs b/sudachi/src/sentence_splitter.rs index 5fa77b7c..cd465951 100644 --- a/sudachi/src/sentence_splitter.rs +++ b/sudachi/src/sentence_splitter.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -56,6 +56,12 @@ pub struct SentenceSplitter<'a> { checker: Option>, } +impl Default for SentenceSplitter<'_> { + fn default() -> Self { + Self::new() + } +} + impl SentenceSplitter<'_> { pub fn new() -> Self { SentenceSplitter { diff --git a/sudachi/src/util/check_params.rs b/sudachi/src/util/check_params.rs index aeab14e5..80ca1888 100644 --- a/sudachi/src/util/check_params.rs +++ b/sudachi/src/util/check_params.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,7 +39,7 @@ impl<'a> CheckParams for Grammar<'a> { format!("max grammar leftId is {}", self.conn_matrix().num_left()), )); } - return Ok(x as u16); + Ok(x as u16) } fn check_right_id>(&self, raw: T) -> SudachiResult { @@ -57,7 +57,7 @@ impl<'a> CheckParams for Grammar<'a> { format!("max grammar rightId is {}", self.conn_matrix().num_right()), )); } - return Ok(x as u16); + Ok(x as u16) } fn check_cost>(&self, raw: T) -> SudachiResult { @@ -82,6 +82,6 @@ impl<'a> CheckParams for Grammar<'a> { ), )); } - return Ok(x as i16); + Ok(x as i16) } } diff --git a/sudachi/src/util/cow_array.rs b/sudachi/src/util/cow_array.rs index 73d8556a..7d66e92e 100644 --- a/sudachi/src/util/cow_array.rs +++ b/sudachi/src/util/cow_array.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,7 +26,7 @@ pub trait ReadLE { impl ReadLE for i16 { fn from_le_bytes(bytes: &[u8]) -> Result { - bytes.try_into().map(|b| Self::from_le_bytes(b)) + bytes.try_into().map(Self::from_le_bytes) } } @@ -35,7 +35,7 @@ impl ReadLE for u32 { where Self: Sized, { - bytes.try_into().map(|b| Self::from_le_bytes(b)) + bytes.try_into().map(Self::from_le_bytes) } } diff --git a/sudachi/src/util/testing.rs b/sudachi/src/util/testing.rs index c1c4b860..520919ff 100644 --- a/sudachi/src/util/testing.rs +++ b/sudachi/src/util/testing.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,7 +32,7 @@ pub fn char_cats() -> CharacterCategory { pub fn build_mock_bytes() -> Vec { let mut buf = Vec::new(); // encode pos for oov - buf.extend(&(1 as i16).to_le_bytes()); + buf.extend(&1_i16.to_le_bytes()); let pos = vec!["補助記号", "一般", "*", "*", "*", "*"]; for s in pos { let utf16: Vec<_> = s.encode_utf16().collect(); @@ -42,8 +42,8 @@ pub fn build_mock_bytes() -> Vec { } } // set 10 for left and right id sizes - buf.extend(&(10 as i16).to_le_bytes()); - buf.extend(&(10 as i16).to_le_bytes()); + buf.extend(&10_i16.to_le_bytes()); + buf.extend(&10_i16.to_le_bytes()); for i in 0..10 { for j in 0..10 { let val = i * 100 + j; diff --git a/sudachi/src/util/user_pos.rs b/sudachi/src/util/user_pos.rs index 7290e07a..87694189 100644 --- a/sudachi/src/util/user_pos.rs +++ b/sudachi/src/util/user_pos.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,17 +22,13 @@ use std::fmt::Display; #[derive(Eq, PartialEq, Deserialize, Clone, Copy, Debug)] #[serde(rename_all = "lowercase")] +#[derive(Default)] pub enum UserPosMode { Allow, + #[default] Forbid, } -impl Default for UserPosMode { - fn default() -> Self { - UserPosMode::Forbid - } -} - pub trait UserPosSupport { fn handle_user_pos + ToString + Display>( &mut self, diff --git a/sudachi/tests/common/mod.rs b/sudachi/tests/common/mod.rs index 430dd462..969d618d 100644 --- a/sudachi/tests/common/mod.rs +++ b/sudachi/tests/common/mod.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,7 +38,7 @@ use sudachi::dic::subset::InfoSubset; pub fn dictionary_bytes_from_path>(dictionary_path: P) -> SudachiResult> { let dictionary_path = dictionary_path.as_ref(); - let dictionary_stat = fs::metadata(&dictionary_path)?; + let dictionary_stat = fs::metadata(dictionary_path)?; let mut dictionary_file = File::open(dictionary_path)?; let mut dictionary_bytes = Vec::with_capacity(dictionary_stat.len() as usize); dictionary_file.read_to_end(&mut dictionary_bytes)?; @@ -59,9 +59,7 @@ lazy_static! { .resolved_system_dict() .expect("system dict failure"); - let dictionary_bytes = dictionary_bytes_from_path(dictionary_path) - .expect("Failed to read dictionary from path"); - dictionary_bytes + dictionary_bytes_from_path(dictionary_path).expect("Failed to read dictionary from path") }; static ref USER_DICTIONARY_BYTES: Vec> = { let mut bytes = Vec::with_capacity(TEST_CONFIG.user_dicts.len()); @@ -98,7 +96,7 @@ impl TestTokenizer { pub fn new() -> TestTokenizer { let dict = JapaneseDictionary::from_cfg(&TEST_CONFIG).expect("failed to make dictionary"); let tok = StatelessTokenizer::new(Arc::new(dict)); - return TestTokenizer { tok }; + TestTokenizer { tok } } pub fn tokenize<'a>( @@ -111,7 +109,7 @@ impl TestTokenizer { } pub fn dict(&self) -> &JapaneseDictionary { - &self.tok.as_dict() + self.tok.as_dict() } } diff --git a/sudachi/tests/morpheme.rs b/sudachi/tests/morpheme.rs index c13624d4..2fdb1937 100644 --- a/sudachi/tests/morpheme.rs +++ b/sudachi/tests/morpheme.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -51,7 +51,7 @@ fn morpheme_attributes() { assert_eq!("京都", ms.get(0).normalized_form()); assert_eq!("キョウト", ms.get(0).reading_form()); - assert_eq!(false, ms.get(0).is_oov()); + assert!(!ms.get(0).is_oov()); assert_eq!(3, ms.get(0).word_id().word()); assert_eq!(0, ms.get(0).dictionary_id()); diff --git a/sudachi/tests/regex_oov.rs b/sudachi/tests/regex_oov.rs index 0d21f855..a1956153 100644 --- a/sudachi/tests/regex_oov.rs +++ b/sudachi/tests/regex_oov.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,7 +19,7 @@ use std::ops::Deref; mod common; -const REGEX_CONFIG: &'static [u8] = include_bytes!("resources/sudachi.regex.json"); +const REGEX_CONFIG: &[u8] = include_bytes!("resources/sudachi.regex.json"); #[test] fn no_other_words() { From 04742015b07d91e71d9301a61b077d8961a5a07f Mon Sep 17 00:00:00 2001 From: mh-northlander Date: Fri, 31 May 2024 16:19:04 +0900 Subject: [PATCH 2/6] read csv records using Reader::records instead of read_record --- sudachi/src/dic/build/lexicon.rs | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/sudachi/src/dic/build/lexicon.rs b/sudachi/src/dic/build/lexicon.rs index 931b1fe1..f1e297bb 100644 --- a/sudachi/src/dic/build/lexicon.rs +++ b/sudachi/src/dic/build/lexicon.rs @@ -297,17 +297,21 @@ impl LexiconReader { .trim(Trim::None) .flexible(true) .from_reader(data); - let mut record = StringRecord::new(); let mut nread = 0; - while reader.read_record(&mut record).map_err(|e| { - let line = e.position().map_or(0, |p| p.line()); - self.ctx.set_line(line as usize); - self.ctx.to_sudachi_err(BuildFailure::CsvError(e)) - })? { - let line = record.position().map_or(0, |p| p.line()) as usize; - self.ctx.set_line(line); - self.read_record(&record)?; - nread += 1; + for record in reader.records() { + match record { + Ok(r) => { + let line = r.position().map_or(0, |p| p.line()) as usize; + self.ctx.set_line(line); + self.read_record(&r)?; + nread += 1; + } + Err(e) => { + let line = e.position().map_or(0, |p| p.line()) as usize; + self.ctx.set_line(line); + return Err(self.ctx.to_sudachi_err(BuildFailure::CsvError(e))); + } + } } Ok(nread) } From 531a6bed8ec2ffa7c4ecd58c162c747a1f698942 Mon Sep 17 00:00:00 2001 From: mh-northlander Date: Fri, 31 May 2024 17:50:15 +0900 Subject: [PATCH 3/6] type alias for complex types --- sudachi/src/dic/build/resolve.rs | 15 ++++++++------- sudachi/src/dic/grammar.rs | 11 +++++------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/sudachi/src/dic/build/resolve.rs b/sudachi/src/dic/build/resolve.rs index 3160bbab..19d4e9be 100644 --- a/sudachi/src/dic/build/resolve.rs +++ b/sudachi/src/dic/build/resolve.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,18 +23,20 @@ use crate::error::SudachiResult; use crate::util::fxhash::FxBuildHasher; use std::collections::HashMap; +// HashMap from surface to (pos_id, reading_form, word-id)s +type ResolutionCandidateMap = HashMap, WordId)>, FxBuildHasher>; + /// We can't use trie to resolve splits because it is possible that refs are not in trie /// This resolver has to be owning because the dictionary content is lazily loaded and transient pub struct BinDictResolver { - index: HashMap, WordId)>, FxBuildHasher>, + index: ResolutionCandidateMap, } impl BinDictResolver { pub fn new(dict: D) -> SudachiResult { let lex = dict.lexicon(); let size = lex.size(); - let mut index: HashMap, WordId)>, FxBuildHasher> = - HashMap::default(); + let mut index: ResolutionCandidateMap = HashMap::default(); for id in 0..size { let wid = WordId::new(0, id); let winfo: WordInfoData = lex @@ -77,13 +79,12 @@ impl SplitUnitResolver for BinDictResolver { } pub struct RawDictResolver<'a> { - data: HashMap<&'a str, Vec<(u16, Option<&'a str>, WordId)>, FxBuildHasher>, + data: ResolutionCandidateMap<&'a str>, } impl<'a> RawDictResolver<'a> { pub(crate) fn new(entries: &'a [RawLexiconEntry], user: bool) -> Self { - let mut data: HashMap<&'a str, Vec<(u16, Option<&'a str>, WordId)>, FxBuildHasher> = - HashMap::default(); + let mut data: ResolutionCandidateMap<&'a str> = HashMap::default(); let dic_id = if user { 1 } else { 0 }; diff --git a/sudachi/src/dic/grammar.rs b/sudachi/src/dic/grammar.rs index c145cbf7..70da5ce3 100644 --- a/sudachi/src/dic/grammar.rs +++ b/sudachi/src/dic/grammar.rs @@ -27,13 +27,15 @@ use nom::{ }; use std::ops::Index; +type PosList = Vec>; + /// Dictionary grammar /// /// Contains part_of_speech list and connection cost map. /// It also holds character category. pub struct Grammar<'a> { _bytes: &'a [u8], - pub pos_list: Vec>, + pub pos_list: PosList, pub storage_size: usize, /// The mapping to overload cost table @@ -163,7 +165,7 @@ impl<'a> Grammar<'a> { } } -fn pos_list_parser(input: &[u8]) -> SudachiNomResult<&[u8], Vec>> { +fn pos_list_parser(input: &[u8]) -> SudachiNomResult<&[u8], PosList> { let (rest, pos_size) = le_u16(input)?; nom::multi::count( nom::multi::count(utf16_string_parser, POS_DEPTH), @@ -171,10 +173,7 @@ fn pos_list_parser(input: &[u8]) -> SudachiNomResult<&[u8], Vec>> { )(rest) } -fn grammar_parser( - input: &[u8], - offset: usize, -) -> SudachiNomResult<&[u8], (Vec>, i16, i16)> { +fn grammar_parser(input: &[u8], offset: usize) -> SudachiNomResult<&[u8], (PosList, i16, i16)> { nom::sequence::preceded( take(offset), nom::sequence::tuple((pos_list_parser, le_i16, le_i16)), From 402c17856ea34c21db9bd594d40e5db50ddadf4e Mon Sep 17 00:00:00 2001 From: mh-northlander Date: Fri, 28 Jun 2024 17:20:18 +0900 Subject: [PATCH 4/6] fix clippy warnings --- sudachi/src/analysis/lattice.rs | 5 +-- sudachi/src/dic/build/lexicon.rs | 4 +-- sudachi/src/dic/build/parse.rs | 4 +-- sudachi/src/dic/build/test/with_analysis.rs | 13 +++---- sudachi/src/dic/dictionary.rs | 2 -- sudachi/src/dic/header.rs | 22 ++++++------ sudachi/src/dic/mod.rs | 1 + sudachi/src/dic/storage.rs | 2 ++ sudachi/src/dic/word_id.rs | 5 +-- sudachi/src/input_text/buffer/mod.rs | 2 +- .../plugin/connect_cost/inhibit_connection.rs | 5 +-- .../input_text/default_input_text/mod.rs | 23 ++++-------- sudachi/src/plugin/oov/mecab_oov/mod.rs | 12 +++---- sudachi/src/plugin/oov/mecab_oov/test.rs | 6 ++-- .../path_rewrite/join_katakana_oov/tests.rs | 14 +++++--- .../plugin/path_rewrite/join_numeric/mod.rs | 14 ++++---- .../join_numeric/numeric_parser/mod.rs | 36 +++++++++---------- sudachi/src/sentence_detector.rs | 2 +- sudachi/src/util/cow_array.rs | 4 ++- 19 files changed, 81 insertions(+), 95 deletions(-) diff --git a/sudachi/src/analysis/lattice.rs b/sudachi/src/analysis/lattice.rs index a0ddab0a..b7bd227a 100644 --- a/sudachi/src/analysis/lattice.rs +++ b/sudachi/src/analysis/lattice.rs @@ -229,10 +229,7 @@ impl Lattice { let mut dump_idx = 0; for boundary in (0..self.indices.len()).rev() { - let nodes = &self.ends_full[boundary]; - - for node_idx in 0..nodes.len() { - let r_node = &nodes[node_idx]; + for r_node in &self.ends_full[boundary] { let (surface, pos) = if r_node.is_special_node() { ("(null)", PosData::Bos) } else if r_node.is_oov() { diff --git a/sudachi/src/dic/build/lexicon.rs b/sudachi/src/dic/build/lexicon.rs index f1e297bb..49e44963 100644 --- a/sudachi/src/dic/build/lexicon.rs +++ b/sudachi/src/dic/build/lexicon.rs @@ -543,8 +543,7 @@ impl LexiconReader { resolver: &R, ) -> Result { let mut total = 0; - let mut line: usize = 0; - for e in self.entries.iter_mut() { + for (line, e) in self.entries.iter_mut().enumerate() { for s in e.splits_a.iter_mut() { match Self::resolve_split(s, resolver) { Some(val) => total += val, @@ -571,7 +570,6 @@ impl LexiconReader { } } } - line += 1; } Ok(total) } diff --git a/sudachi/src/dic/build/parse.rs b/sudachi/src/dic/build/parse.rs index 0c25fb23..d39e16b2 100644 --- a/sudachi/src/dic/build/parse.rs +++ b/sudachi/src/dic/build/parse.rs @@ -95,8 +95,8 @@ pub(crate) fn parse_dic_form(data: &str) -> DicWriteResult { #[inline] pub(crate) fn parse_wordid(data: &str) -> DicWriteResult { - if data.starts_with('U') { - let wid = parse_wordid_raw(&data[1..]); + if let Some(stripped) = data.strip_prefix('U') { + let wid = parse_wordid_raw(stripped); wid.map(|w| WordId::new(1, w.word())) } else { parse_wordid_raw(data) diff --git a/sudachi/src/dic/build/test/with_analysis.rs b/sudachi/src/dic/build/test/with_analysis.rs index 18f94a4d..12898bd3 100644 --- a/sudachi/src/dic/build/test/with_analysis.rs +++ b/sudachi/src/dic/build/test/with_analysis.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -78,14 +78,9 @@ impl ConfigTestSupport { let mut result = String::new(); - loop { - match data.next() { - Some(x) => { - write!(result, "{:?}{}", prev, sep).unwrap(); - prev = x; - } - None => break, - } + for x in data { + write!(result, "{:?}{}", prev, sep).unwrap(); + prev = x; } write!(result, "{:?}", prev).unwrap(); result diff --git a/sudachi/src/dic/dictionary.rs b/sudachi/src/dic/dictionary.rs index b3f8c8b3..f5a7c429 100644 --- a/sudachi/src/dic/dictionary.rs +++ b/sudachi/src/dic/dictionary.rs @@ -87,7 +87,6 @@ impl JapaneseDictionary { let plugins = { let grammar = &mut basic_dict.grammar; - let cfg = cfg; Plugins::load(cfg, grammar)? }; @@ -126,7 +125,6 @@ impl JapaneseDictionary { let plugins = { let grammar = &mut basic_dict.grammar; - let cfg = cfg; Plugins::load(cfg, grammar)? }; diff --git a/sudachi/src/dic/header.rs b/sudachi/src/dic/header.rs index 7663c1dd..306dd0e3 100644 --- a/sudachi/src/dic/header.rs +++ b/sudachi/src/dic/header.rs @@ -170,21 +170,21 @@ impl Header { /// Returns if this header version has grammar pub fn has_grammar(&self) -> bool { - match self.version { - HeaderVersion::SystemDict(_) => true, - HeaderVersion::UserDict(UserDictVersion::Version2) => true, - HeaderVersion::UserDict(UserDictVersion::Version3) => true, - _ => false, - } + matches!( + self.version, + HeaderVersion::SystemDict(_) + | HeaderVersion::UserDict(UserDictVersion::Version2) + | HeaderVersion::UserDict(UserDictVersion::Version3) + ) } /// Returns if this header version has synonym group ids pub fn has_synonym_group_ids(&self) -> bool { - match self.version { - HeaderVersion::SystemDict(SystemDictVersion::Version2) => true, - HeaderVersion::UserDict(UserDictVersion::Version3) => true, - _ => false, - } + matches!( + self.version, + HeaderVersion::SystemDict(SystemDictVersion::Version2) + | HeaderVersion::UserDict(UserDictVersion::Version3) + ) } pub fn write_to(&self, w: &mut W) -> SudachiResult { diff --git a/sudachi/src/dic/mod.rs b/sudachi/src/dic/mod.rs index 585392ab..95374299 100644 --- a/sudachi/src/dic/mod.rs +++ b/sudachi/src/dic/mod.rs @@ -136,6 +136,7 @@ pub struct DictionaryLoader<'a> { impl<'a> DictionaryLoader<'a> { /// Creates a binary dictionary from bytes /// + /// # Safety /// This function is marked unsafe because it does not perform header validation pub unsafe fn read_any_dictionary(dictionary_bytes: &[u8]) -> SudachiResult { let header = Header::parse(&dictionary_bytes[..Header::STORAGE_SIZE])?; diff --git a/sudachi/src/dic/storage.rs b/sudachi/src/dic/storage.rs index 90a8b8ce..d11e4610 100644 --- a/sudachi/src/dic/storage.rs +++ b/sudachi/src/dic/storage.rs @@ -56,6 +56,8 @@ impl SudachiDicData { self.system.as_ref() } + /// # Safety + /// Call this function only after system dictionary data is ready. pub unsafe fn system_static_slice(&self) -> &'static [u8] { std::mem::transmute(self.system()) } diff --git a/sudachi/src/dic/word_id.rs b/sudachi/src/dic/word_id.rs index af1acce4..5de0e69a 100644 --- a/sudachi/src/dic/word_id.rs +++ b/sudachi/src/dic/word_id.rs @@ -99,10 +99,7 @@ impl WordId { /// Check if the word comes from the user dictionary pub fn is_user(&self) -> bool { - match self.dic() { - 0 | 0xf => false, - _ => true, - } + !matches!(self.dic(), 0 | 0xf) } pub fn as_raw(&self) -> u32 { diff --git a/sudachi/src/input_text/buffer/mod.rs b/sudachi/src/input_text/buffer/mod.rs index f9877b73..9ac27d00 100644 --- a/sudachi/src/input_text/buffer/mod.rs +++ b/sudachi/src/input_text/buffer/mod.rs @@ -107,7 +107,7 @@ impl InputBuffer { /// Creates input from the passed string. Should be used mostly for tests. /// /// Panics if the input string is too long. - pub fn from<'a, T: AsRef>(data: T) -> InputBuffer { + pub fn from>(data: T) -> InputBuffer { let mut buf = Self::new(); buf.reset().push_str(data.as_ref()); buf.start_build().expect(""); diff --git a/sudachi/src/plugin/connect_cost/inhibit_connection.rs b/sudachi/src/plugin/connect_cost/inhibit_connection.rs index 3076dc8a..c25aba41 100644 --- a/sudachi/src/plugin/connect_cost/inhibit_connection.rs +++ b/sudachi/src/plugin/connect_cost/inhibit_connection.rs @@ -83,8 +83,9 @@ mod tests { let right = 0; let bytes = build_mock_bytes(); let mut grammar = build_mock_grammar(&bytes); - let mut plugin = InhibitConnectionPlugin::default(); - plugin.inhibit_pairs = vec![(left, right)]; + let plugin = InhibitConnectionPlugin { + inhibit_pairs: vec![(left, right)], + }; plugin.edit(&mut grammar); assert_eq!( diff --git a/sudachi/src/plugin/input_text/default_input_text/mod.rs b/sudachi/src/plugin/input_text/default_input_text/mod.rs index 070ca5c9..7379587f 100644 --- a/sudachi/src/plugin/input_text/default_input_text/mod.rs +++ b/sudachi/src/plugin/input_text/default_input_text/mod.rs @@ -200,11 +200,8 @@ impl DefaultInputTextPlugin { // 2. handle normalization let need_lowercase = ch.is_uppercase(); - let need_nkfc = !self.should_ignore(ch) - && match is_nfkc_quick(std::iter::once(ch)) { - IsNormalized::Yes => false, - _ => true, - }; + let need_nkfc = + !self.should_ignore(ch) && is_nfkc_quick(std::iter::once(ch)) != IsNormalized::Yes; // iterator types are incompatible, so calls can't be moved outside branches match (need_lowercase, need_nkfc) { @@ -238,14 +235,11 @@ impl DefaultInputTextPlugin { len: usize, ch: char, ) { - match data.next() { - Some(ch2) => { - if ch2 == ch { - return; - } - replacer.replace_char_iter(start..start + len, ch2, data) + if let Some(ch2) = data.next() { + if ch2 == ch { + return; } - None => (), + replacer.replace_char_iter(start..start + len, ch2, data) } } } @@ -286,10 +280,7 @@ impl InputTextPlugin for DefaultInputTextPlugin { edit: InputEditor<'a>, ) -> SudachiResult> { let chars = buffer.current_chars(); - let need_nkfc = match is_nfkc_quick(chars.iter().cloned()) { - IsNormalized::Yes => false, - _ => true, - }; + let need_nkfc = is_nfkc_quick(chars.iter().cloned()) != IsNormalized::Yes; let need_lowercase = chars.iter().any(|c| c.is_uppercase()); diff --git a/sudachi/src/plugin/oov/mecab_oov/mod.rs b/sudachi/src/plugin/oov/mecab_oov/mod.rs index 83f073a8..c3895067 100644 --- a/sudachi/src/plugin/oov/mecab_oov/mod.rs +++ b/sudachi/src/plugin/oov/mecab_oov/mod.rs @@ -47,7 +47,7 @@ const DEFAULT_UNK_DEF_BYTES: &[u8] = include_bytes!("../../../../../resources/un #[derive(Default)] pub struct MeCabOovPlugin { categories: HashMap, - oov_list: HashMap, RoMu>, + oov_list: HashMap, RoMu>, } /// Struct corresponds with raw config json file. @@ -120,8 +120,8 @@ impl MeCabOovPlugin { categories: &HashMap, mut grammar: &mut Grammar, user_pos: UserPosMode, - ) -> SudachiResult, RoMu>> { - let mut oov_list: HashMap, RoMu> = HashMap::with_hasher(RoMu::new()); + ) -> SudachiResult, RoMu>> { + let mut oov_list: HashMap, RoMu> = HashMap::with_hasher(RoMu::new()); for (i, line) in reader.lines().enumerate() { let line = line?; let line = line.trim(); @@ -141,7 +141,7 @@ impl MeCabOovPlugin { )); } - let oov = OOV { + let oov = Oov { left_id: cols[1].parse()?, right_id: cols[2].parse()?, cost: cols[3].parse()?, @@ -184,7 +184,7 @@ impl MeCabOovPlugin { } /// Creates a new oov node - fn get_oov_node(&self, oov: &OOV, start: usize, end: usize) -> Node { + fn get_oov_node(&self, oov: &Oov, start: usize, end: usize) -> Node { Node::new( start as u16, end as u16, @@ -311,7 +311,7 @@ struct CategoryInfo { /// The OOV definition #[derive(Debug, Default, Clone)] -struct OOV { +struct Oov { left_id: i16, right_id: i16, cost: i16, diff --git a/sudachi/src/plugin/oov/mecab_oov/test.rs b/sudachi/src/plugin/oov/mecab_oov/test.rs index f46dc443..0e352ac6 100644 --- a/sudachi/src/plugin/oov/mecab_oov/test.rs +++ b/sudachi/src/plugin/oov/mecab_oov/test.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -492,13 +492,13 @@ fn read_oov_with_category_not_in_character_property() { fn build_plugin() -> MeCabOovPlugin { let mut plugin = MeCabOovPlugin::default(); - let oov1 = OOV { + let oov1 = Oov { right_id: -1, left_id: -1, cost: -1, pos_id: 1, }; - let oov2 = OOV { + let oov2 = Oov { right_id: -1, left_id: -1, cost: -1, diff --git a/sudachi/src/plugin/path_rewrite/join_katakana_oov/tests.rs b/sudachi/src/plugin/path_rewrite/join_katakana_oov/tests.rs index afaac93b..2a26d632 100644 --- a/sudachi/src/plugin/path_rewrite/join_katakana_oov/tests.rs +++ b/sudachi/src/plugin/path_rewrite/join_katakana_oov/tests.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -88,7 +88,10 @@ fn start_with_middle() { #[test] fn start_with_tail() { - let mut plugin = JoinKatakanaOovPlugin::default(); + let plugin = JoinKatakanaOovPlugin { + min_length: 3, + ..Default::default() + }; let text = build_text("アイウアイウアイ"); let path = vec![ build_node_aiu(0, 9, 5562), @@ -96,7 +99,6 @@ fn start_with_tail() { build_node_ai(18, 24, 19594), ]; - plugin.min_length = 3; let path = plugin .rewrite(&text, path, &Lattice::default()) .expect("Failed to rewrite path"); @@ -105,8 +107,10 @@ fn start_with_tail() { #[test] fn with_noovbow() { - let mut plugin = JoinKatakanaOovPlugin::default(); - plugin.min_length = 3; + let plugin = JoinKatakanaOovPlugin { + min_length: 3, + ..Default::default() + }; let text = build_text("ァアイアイウ"); diff --git a/sudachi/src/plugin/path_rewrite/join_numeric/mod.rs b/sudachi/src/plugin/path_rewrite/join_numeric/mod.rs index 730b492b..d014781e 100644 --- a/sudachi/src/plugin/path_rewrite/join_numeric/mod.rs +++ b/sudachi/src/plugin/path_rewrite/join_numeric/mod.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Works Applications Co., Ltd. + * Copyright (c) 2021-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -102,10 +102,10 @@ impl JoinNumericPlugin { for c in s.chars() { if !parser.append(&c) { if begin_idx >= 0 { - if parser.error_state == numeric_parser::Error::COMMA { + if parser.error_state == numeric_parser::Error::Comma { comma_as_digit = false; i = begin_idx - 1; - } else if parser.error_state == numeric_parser::Error::POINT { + } else if parser.error_state == numeric_parser::Error::Point { period_as_digit = false; i = begin_idx - 1; } @@ -132,8 +132,8 @@ impl JoinNumericPlugin { i = begin_idx + 1; } else { let ss = path[i as usize - 1].word_info().normalized_form(); - if (parser.error_state == numeric_parser::Error::COMMA && ss == ",") - || (parser.error_state == numeric_parser::Error::POINT && ss == ".") + if (parser.error_state == numeric_parser::Error::Comma && ss == ",") + || (parser.error_state == numeric_parser::Error::Point && ss == ".") { path = self.concat(path, begin_idx as usize, i as usize - 1, &mut parser)?; @@ -157,8 +157,8 @@ impl JoinNumericPlugin { path = self.concat(path, begin_idx as usize, len, &mut parser)?; } else { let ss = path[len - 1].word_info().normalized_form(); - if (parser.error_state == numeric_parser::Error::COMMA && ss == ",") - || (parser.error_state == numeric_parser::Error::POINT && ss == ".") + if (parser.error_state == numeric_parser::Error::Comma && ss == ",") + || (parser.error_state == numeric_parser::Error::Point && ss == ".") { path = self.concat(path, begin_idx as usize, len - 1, &mut parser)?; } diff --git a/sudachi/src/plugin/path_rewrite/join_numeric/numeric_parser/mod.rs b/sudachi/src/plugin/path_rewrite/join_numeric/numeric_parser/mod.rs index 2bdd5736..c8658f2f 100644 --- a/sudachi/src/plugin/path_rewrite/join_numeric/numeric_parser/mod.rs +++ b/sudachi/src/plugin/path_rewrite/join_numeric/numeric_parser/mod.rs @@ -24,9 +24,9 @@ use string_number::StringNumber; /// State of the parser #[derive(Debug, Eq, PartialEq)] pub enum Error { - NONE, - POINT, - COMMA, + None, + Point, + Comma, // OTHER, } @@ -83,7 +83,7 @@ impl NumericParser { is_first_digit: true, has_comma: false, has_hanging_point: false, - error_state: Error::NONE, + error_state: Error::None, total: StringNumber::new(), subtotal: StringNumber::new(), tmp: StringNumber::new(), @@ -95,7 +95,7 @@ impl NumericParser { self.is_first_digit = true; self.has_comma = false; self.has_hanging_point = false; - self.error_state = Error::NONE; + self.error_state = Error::None; self.total.clear(); self.subtotal.clear(); self.tmp.clear(); @@ -105,15 +105,15 @@ impl NumericParser { if *c == '.' { self.has_hanging_point = true; if self.is_first_digit { - self.error_state = Error::POINT; + self.error_state = Error::Point; return false; } if self.has_comma && !self.check_comma() { - self.error_state = Error::COMMA; + self.error_state = Error::Comma; return false; } if !self.tmp.set_point() { - self.error_state = Error::POINT; + self.error_state = Error::Point; return false; } self.has_comma = false; @@ -121,7 +121,7 @@ impl NumericParser { } if *c == ',' { if !self.check_comma() { - self.error_state = Error::COMMA; + self.error_state = Error::Comma; return false; } self.has_comma = true; @@ -168,11 +168,11 @@ impl NumericParser { pub fn done(&mut self) -> bool { let ret = self.subtotal.add(&mut self.tmp) && self.total.add(&mut self.subtotal); if self.has_hanging_point { - self.error_state = Error::POINT; + self.error_state = Error::Point; return false; } if self.has_comma && self.digit_length != 3 { - self.error_state = Error::COMMA; + self.error_state = Error::Comma; return false; } ret @@ -327,23 +327,23 @@ mod tests { parser.clear(); assert!(!parse(&mut parser, "200,00,000")); - assert_eq!(Error::COMMA, parser.error_state); + assert_eq!(Error::Comma, parser.error_state); parser.clear(); assert!(!parse(&mut parser, "2,4")); - assert_eq!(Error::COMMA, parser.error_state); + assert_eq!(Error::Comma, parser.error_state); parser.clear(); assert!(!parse(&mut parser, "000,000")); - assert_eq!(Error::COMMA, parser.error_state); + assert_eq!(Error::Comma, parser.error_state); parser.clear(); assert!(!parse(&mut parser, ",000")); - assert_eq!(Error::COMMA, parser.error_state); + assert_eq!(Error::Comma, parser.error_state); parser.clear(); assert!(!parse(&mut parser, "256,55.1")); - assert_eq!(Error::COMMA, parser.error_state); + assert_eq!(Error::Comma, parser.error_state); } #[test] @@ -360,10 +360,10 @@ mod tests { parser.clear(); assert!(!parse(&mut parser, "6.")); - assert_eq!(Error::POINT, parser.error_state); + assert_eq!(Error::Point, parser.error_state); parser.clear(); assert!(!parse(&mut parser, "1.2.3")); - assert_eq!(Error::POINT, parser.error_state); + assert_eq!(Error::Point, parser.error_state); } } diff --git a/sudachi/src/sentence_detector.rs b/sudachi/src/sentence_detector.rs index 9b6cf330..29285c76 100644 --- a/sudachi/src/sentence_detector.rs +++ b/sudachi/src/sentence_detector.rs @@ -183,7 +183,7 @@ fn parenthesis_level(s: &str) -> SudachiResult { } let mut level: usize = 0; for caps in PARENTHESIS.captures_iter(s) { - if let Some(_) = caps?.get(1) { + if caps?.get(1).is_some() { // open level += 1; } else { diff --git a/sudachi/src/util/cow_array.rs b/sudachi/src/util/cow_array.rs index 7d66e92e..10cc079f 100644 --- a/sudachi/src/util/cow_array.rs +++ b/sudachi/src/util/cow_array.rs @@ -111,7 +111,9 @@ impl<'a, T: ReadLE + Clone> CowArray<'a, T> { let slice: &[T] = self.storage.as_ref().unwrap().as_slice(); self.slice = unsafe { std::mem::transmute(slice) }; } - self.storage.as_mut().map(|s| s[offset] = value); + if let Some(s) = self.storage.as_mut() { + s[offset] = value; + } } } From dd388b8646c4f7d413e5dc95fb75d4569c5afe5f Mon Sep 17 00:00:00 2001 From: mh-northlander Date: Mon, 1 Jul 2024 09:18:47 +0900 Subject: [PATCH 5/6] small fix --- python/src/projection.rs | 7 +++---- sudachi/src/config.rs | 3 +-- sudachi/src/dic/build/mod.rs | 2 +- sudachi/src/plugin/input_text/default_input_text/mod.rs | 5 ++--- sudachi/src/plugin/oov/mecab_oov/mod.rs | 5 ++++- sudachi/src/plugin/oov/mecab_oov/test.rs | 2 +- sudachi/src/plugin/oov/regex_oov/mod.rs | 3 +-- .../plugin/path_rewrite/join_numeric/numeric_parser/mod.rs | 2 +- sudachi/src/util/user_pos.rs | 5 ++--- 9 files changed, 16 insertions(+), 18 deletions(-) diff --git a/python/src/projection.rs b/python/src/projection.rs index 8bea35be..2c2cc2be 100644 --- a/python/src/projection.rs +++ b/python/src/projection.rs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Works Applications Co., Ltd. + * Copyright (c) 2023-2024 Works Applications Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -112,9 +112,8 @@ impl MorphemeProjection for NormalizedNouns { } fn conjugating_matcher(dic: &D) -> PosMatcher { - make_matcher(dic, |pos| match pos[0].deref() { - "動詞" | "形容詞" | "助動詞" => true, - _ => false, + make_matcher(dic, |pos| { + matches!(pos[0].deref(), "動詞" | "形容詞" | "助動詞") }) } diff --git a/sudachi/src/config.rs b/sudachi/src/config.rs index b924e35a..d65dfb1c 100644 --- a/sudachi/src/config.rs +++ b/sudachi/src/config.rs @@ -100,9 +100,8 @@ impl PathResolver { } } -#[derive(Deserialize, Clone, Copy, Debug, Eq, PartialEq)] +#[derive(Deserialize, Clone, Copy, Debug, Eq, PartialEq, Default)] #[serde(rename_all = "snake_case")] -#[derive(Default)] pub enum SurfaceProjection { #[default] Surface, diff --git a/sudachi/src/dic/build/mod.rs b/sudachi/src/dic/build/mod.rs index 6a034a8a..dbbdda1f 100644 --- a/sudachi/src/dic/build/mod.rs +++ b/sudachi/src/dic/build/mod.rs @@ -305,7 +305,7 @@ impl DictBuilder { } /// this function must only be used in resolve_impl - fn unsafe_make_resolver<'b>(&self) -> RawDictResolver<'b> { + fn unsafe_make_resolver<'a>(&self) -> RawDictResolver<'a> { let resolver = RawDictResolver::new(self.lexicon.entries(), self.user); // resolver borrows parts of entries, but it does not touch splits // resolve function only modifies splits diff --git a/sudachi/src/plugin/input_text/default_input_text/mod.rs b/sudachi/src/plugin/input_text/default_input_text/mod.rs index 7379587f..0f92ea56 100644 --- a/sudachi/src/plugin/input_text/default_input_text/mod.rs +++ b/sudachi/src/plugin/input_text/default_input_text/mod.rs @@ -236,10 +236,9 @@ impl DefaultInputTextPlugin { ch: char, ) { if let Some(ch2) = data.next() { - if ch2 == ch { - return; + if ch2 != ch { + replacer.replace_char_iter(start..start + len, ch2, data) } - replacer.replace_char_iter(start..start + len, ch2, data) } } } diff --git a/sudachi/src/plugin/oov/mecab_oov/mod.rs b/sudachi/src/plugin/oov/mecab_oov/mod.rs index c3895067..5c27524e 100644 --- a/sudachi/src/plugin/oov/mecab_oov/mod.rs +++ b/sudachi/src/plugin/oov/mecab_oov/mod.rs @@ -131,7 +131,10 @@ impl MeCabOovPlugin { let cols: Vec<_> = line.split(',').collect(); if cols.len() < 10 { - return Err(SudachiError::InvalidDataFormat(i, line.to_string())); + return Err(SudachiError::InvalidDataFormat( + i, + format!("Invalid number of columns ({})", line), + )); } let category_type: CategoryType = cols[0].parse()?; if !categories.contains_key(&category_type) { diff --git a/sudachi/src/plugin/oov/mecab_oov/test.rs b/sudachi/src/plugin/oov/mecab_oov/test.rs index 0e352ac6..f5ca6550 100644 --- a/sudachi/src/plugin/oov/mecab_oov/test.rs +++ b/sudachi/src/plugin/oov/mecab_oov/test.rs @@ -437,7 +437,7 @@ fn read_oov_with_too_few_columns() { &mut grammar, UserPosMode::Forbid, ); - assert_matches!(result, Err(SudachiError::InvalidDataFormat(0, s)) if s == data); + assert_matches!(result, Err(SudachiError::InvalidDataFormat(0, s)) if s.contains(data)); } #[test] diff --git a/sudachi/src/plugin/oov/regex_oov/mod.rs b/sudachi/src/plugin/oov/regex_oov/mod.rs index 20698e7d..1fdd1359 100644 --- a/sudachi/src/plugin/oov/regex_oov/mod.rs +++ b/sudachi/src/plugin/oov/regex_oov/mod.rs @@ -44,9 +44,8 @@ pub(crate) struct RegexOovProvider { boundaries: BoundaryMode, } -#[derive(Deserialize, Eq, PartialEq, Debug, Copy, Clone)] +#[derive(Deserialize, Eq, PartialEq, Debug, Copy, Clone, Default)] #[serde(rename_all = "lowercase")] -#[derive(Default)] pub enum BoundaryMode { #[default] Strict, diff --git a/sudachi/src/plugin/path_rewrite/join_numeric/numeric_parser/mod.rs b/sudachi/src/plugin/path_rewrite/join_numeric/numeric_parser/mod.rs index c8658f2f..27c3c481 100644 --- a/sudachi/src/plugin/path_rewrite/join_numeric/numeric_parser/mod.rs +++ b/sudachi/src/plugin/path_rewrite/join_numeric/numeric_parser/mod.rs @@ -27,7 +27,7 @@ pub enum Error { None, Point, Comma, - // OTHER, + // Other, } /// Parses number written by arabic or kanji diff --git a/sudachi/src/util/user_pos.rs b/sudachi/src/util/user_pos.rs index 87694189..06a2f332 100644 --- a/sudachi/src/util/user_pos.rs +++ b/sudachi/src/util/user_pos.rs @@ -20,13 +20,12 @@ use itertools::Itertools; use serde::Deserialize; use std::fmt::Display; -#[derive(Eq, PartialEq, Deserialize, Clone, Copy, Debug)] +#[derive(Eq, PartialEq, Deserialize, Clone, Copy, Debug, Default)] #[serde(rename_all = "lowercase")] -#[derive(Default)] pub enum UserPosMode { - Allow, #[default] Forbid, + Allow, } pub trait UserPosSupport { From 81ede267b11b9d2532cacc21af58c819159fad8c Mon Sep 17 00:00:00 2001 From: mh-northlander Date: Fri, 7 Jun 2024 11:26:24 +0900 Subject: [PATCH 6/6] reorder use statements --- python/src/build.rs | 11 +++++++---- python/src/dictionary.rs | 9 +++++---- python/src/errors.rs | 4 +++- python/src/pretokenizer.rs | 16 +++++++++------- python/src/projection.rs | 12 ++++++++---- python/src/tokenizer.rs | 1 - 6 files changed, 32 insertions(+), 21 deletions(-) diff --git a/python/src/build.rs b/python/src/build.rs index 9cff1d71..baf98b2a 100644 --- a/python/src/build.rs +++ b/python/src/build.rs @@ -14,18 +14,21 @@ * limitations under the License. */ -use crate::dictionary::get_default_resource_dir; -use crate::errors; -use pyo3::prelude::*; -use pyo3::types::{PyBytes, PyList, PyString, PyTuple, PyType}; use std::fs::{File, OpenOptions}; use std::io::BufWriter; use std::path::Path; + +use pyo3::prelude::*; +use pyo3::types::{PyBytes, PyList, PyString, PyTuple, PyType}; + use sudachi::analysis::stateless_tokenizer::DictionaryAccess; use sudachi::config::Config; use sudachi::dic::build::{DataSource, DictBuilder}; use sudachi::dic::dictionary::JapaneseDictionary; +use crate::dictionary::get_default_resource_dir; +use crate::errors; + pub fn register_functions(m: &PyModule) -> PyResult<()> { m.add_function(wrap_pyfunction!(build_system_dic, m)?)?; m.add_function(wrap_pyfunction!(build_user_dic, m)?)?; diff --git a/python/src/dictionary.rs b/python/src/dictionary.rs index 93918c2c..4bf18532 100644 --- a/python/src/dictionary.rs +++ b/python/src/dictionary.rs @@ -14,18 +14,18 @@ * limitations under the License. */ -use pyo3::prelude::*; -use pyo3::types::{PySet, PyString, PyTuple}; use std::convert::TryFrom; use std::fmt::Write; use std::ops::Deref; use std::path::{Path, PathBuf}; use std::str::FromStr; use std::sync::Arc; -use sudachi::analysis::Mode; -use crate::errors::{wrap, wrap_ctx, SudachiError as SudachiErr}; +use pyo3::prelude::*; +use pyo3::types::{PySet, PyString, PyTuple}; + use sudachi::analysis::stateless_tokenizer::DictionaryAccess; +use sudachi::analysis::Mode; use sudachi::config::{Config, ConfigBuilder, SurfaceProjection}; use sudachi::dic::dictionary::JapaneseDictionary; use sudachi::dic::grammar::Grammar; @@ -35,6 +35,7 @@ use sudachi::plugin::input_text::InputTextPlugin; use sudachi::plugin::oov::OovProviderPlugin; use sudachi::plugin::path_rewrite::PathRewritePlugin; +use crate::errors::{wrap, wrap_ctx, SudachiError as SudachiErr}; use crate::morpheme::{PyMorphemeListWrapper, PyProjector}; use crate::pos_matcher::PyPosMatcher; use crate::pretokenizer::PyPretokenizer; diff --git a/python/src/errors.rs b/python/src/errors.rs index 04827fd4..27bc0c7f 100644 --- a/python/src/errors.rs +++ b/python/src/errors.rs @@ -14,9 +14,11 @@ * limitations under the License. */ -use pyo3::{import_exception, PyResult}; use std::fmt::{Debug, Display}; +use pyo3::prelude::*; +use pyo3::{import_exception, PyResult}; + // Sudachi exception class is defined in Python import_exception!(sudachipy.errors, SudachiError); diff --git a/python/src/pretokenizer.rs b/python/src/pretokenizer.rs index d959285f..6e373289 100644 --- a/python/src/pretokenizer.rs +++ b/python/src/pretokenizer.rs @@ -14,21 +14,23 @@ * limitations under the License. */ -use crate::dictionary::PyDicData; -use crate::errors::wrap; -use crate::morpheme::{PyMorphemeList, PyMorphemeListWrapper, PyProjector}; +use std::cell::RefCell; +use std::sync::Arc; + use pyo3::intern; use pyo3::prelude::*; use pyo3::sync::GILOnceCell; use pyo3::types::{PyList, PySlice, PyTuple, PyType}; -use std::cell::RefCell; -use std::sync::Arc; +use thread_local::ThreadLocal; -use crate::projection::MorphemeProjection; use sudachi::analysis::stateful_tokenizer::StatefulTokenizer; use sudachi::dic::subset::InfoSubset; use sudachi::prelude::Mode; -use thread_local::ThreadLocal; + +use crate::dictionary::PyDicData; +use crate::errors::wrap; +use crate::morpheme::{PyMorphemeList, PyMorphemeListWrapper, PyProjector}; +use crate::projection::MorphemeProjection; /// This struct perform actual tokenization /// There should be at most one instance per thread of execution diff --git a/python/src/projection.rs b/python/src/projection.rs index 2c2cc2be..a7d61c33 100644 --- a/python/src/projection.rs +++ b/python/src/projection.rs @@ -14,18 +14,22 @@ * limitations under the License. */ -use crate::dictionary::PyDicData; -use crate::morpheme::PyProjector; -use pyo3::types::PyString; -use pyo3::{PyResult, Python}; use std::convert::TryFrom; use std::ops::Deref; use std::sync::Arc; + +use pyo3::prelude::*; +use pyo3::types::PyString; +use pyo3::{PyResult, Python}; + use sudachi::analysis::stateless_tokenizer::DictionaryAccess; use sudachi::config::SurfaceProjection; use sudachi::pos::PosMatcher; use sudachi::prelude::Morpheme; +use crate::dictionary::PyDicData; +use crate::morpheme::PyProjector; + pub(crate) trait MorphemeProjection { fn project<'py>(&self, m: &Morpheme>, py: Python<'py>) -> &'py PyString; } diff --git a/python/src/tokenizer.rs b/python/src/tokenizer.rs index 1a8446e0..4e94a040 100644 --- a/python/src/tokenizer.rs +++ b/python/src/tokenizer.rs @@ -21,7 +21,6 @@ use std::sync::Arc; use pyo3::prelude::*; use sudachi::analysis::stateful_tokenizer::StatefulTokenizer; - use sudachi::dic::subset::InfoSubset; use sudachi::prelude::*;