Skip to content

Commit

Permalink
Merge pull request #284 from WorksApplications/feature/280-pyo3-23
Browse files Browse the repository at this point in the history
update pyo3 to v0.23
  • Loading branch information
mh-northlander authored Dec 4, 2024
2 parents 287f3d7 + 97aec81 commit fc3ffa4
Show file tree
Hide file tree
Showing 9 changed files with 72 additions and 77 deletions.
2 changes: 1 addition & 1 deletion python/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ name = "sudachipy"
crate-type = ["cdylib"]

[dependencies]
pyo3 = { version = "0.22", features = ["extension-module"] }
pyo3 = { version = "0.23", features = ["extension-module"] }
scopeguard = "1" # Apache 2.0/MIT
thread_local = "1.1" # Apache 2.0/MIT

Expand Down
3 changes: 1 addition & 2 deletions python/build-wheels-manylinux-pgo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,7 @@ export CARGO_BUILD_TARGET=x86_64-unknown-linux-gnu

# see following link for the list of cpython bin
# https://github.com/pypa/manylinux?tab=readme-ov-file#image-content
# TODO: after supporting py313t, "/opt/python/cp{37,38,39,310,311,312,313}-*/bin" would suffice.
for PYBIN in /opt/python/cp*-cp{37m,38,39,310,311,312,313}/bin; do
for PYBIN in /opt/python/cp{37,38,39,310,311,312,313}-*/bin; do
"${PYBIN}/pip" install -U setuptools wheel setuptools-rust
find . -iname 'sudachipy*.so'
rm -f build/lib/sudachipy/sudachipy*.so
Expand Down
17 changes: 5 additions & 12 deletions python/src/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use std::io::BufWriter;
use std::path::Path;

use pyo3::prelude::*;
use pyo3::types::{PyBytes, PyList, PyString, PyTuple, PyType};
use pyo3::types::{PyBytes, PyList, PyString, PyType};

use sudachi::analysis::stateless_tokenizer::DictionaryAccess;
use sudachi::config::Config;
Expand All @@ -36,18 +36,11 @@ pub fn register_functions(m: &Bound<PyModule>) -> PyResult<()> {
}

fn to_stats<T: DictionaryAccess>(py: Python, builder: DictBuilder<T>) -> PyResult<Bound<PyList>> {
let stats = PyList::empty_bound(py);
let stats = PyList::empty(py);

for p in builder.report() {
let t = PyTuple::new_bound(
py,
[
p.part().into_py(py),
p.size().into_py(py),
p.time().as_secs_f64().into_py(py),
],
);
stats.append(t)?;
let values = (p.part(), p.size(), p.time().as_secs_f64());
stats.append(values.into_pyobject(py)?)?;
}

Ok(stats)
Expand Down Expand Up @@ -174,7 +167,7 @@ fn resolve_as_pypathstr<'py>(
py: Python<'py>,
data: &Bound<'py, PyAny>,
) -> PyResult<Option<Bound<'py, PyString>>> {
let binding = py.import_bound("pathlib")?.getattr("Path")?;
let binding = py.import("pathlib")?.getattr("Path")?;
let path = binding.downcast::<PyType>()?;
if data.is_instance(path)? {
Ok(Some(data.call_method0("resolve")?.str()?))
Expand Down
34 changes: 14 additions & 20 deletions python/src/dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ use std::path::{Path, PathBuf};
use std::str::FromStr;
use std::sync::Arc;

use pyo3::ffi::c_str;
use pyo3::prelude::*;
use pyo3::types::{PySet, PyString, PyTuple};

Expand Down Expand Up @@ -160,7 +161,7 @@ impl PyDictionary {
if dict_type.is_some() {
errors::warn_deprecation(
py,
"Parameter dict_type of Dictionary() is deprecated, use dict instead",
c_str!("Parameter dict_type of Dictionary() is deprecated, use dict instead"),
)?
}

Expand Down Expand Up @@ -211,7 +212,9 @@ impl PyDictionary {
.pos_list
.iter()
.map(|pos| {
let tuple: Py<PyTuple> = PyTuple::new_bound(py, pos).into_py(py);
let tuple: Py<PyTuple> = PyTuple::new(py, pos)
.expect("failed to convert POS tuple")
.unbind();
tuple
})
.collect();
Expand Down Expand Up @@ -288,12 +291,8 @@ impl PyDictionary {
/// :param target: can be either a list of POS partial tuples or a callable which maps POS to bool.
///
/// :type target: Iterable[PartialPOS] | Callable[[POS], bool]
fn pos_matcher<'py>(
&'py self,
py: Python<'py>,
target: &Bound<'py, PyAny>,
) -> PyResult<PyPosMatcher> {
PyPosMatcher::create(py, self.dictionary.as_ref().unwrap(), target)
fn pos_matcher<'py>(&'py self, target: &Bound<'py, PyAny>) -> PyResult<PyPosMatcher> {
PyPosMatcher::create(self.dictionary.as_ref().unwrap(), target)
}

/// Creates HuggingFace Tokenizers-compatible PreTokenizer.
Expand Down Expand Up @@ -367,13 +366,12 @@ impl PyDictionary {
)
};

let internal = PyPretokenizer::new(dict, mode, required_fields, handler, projection);
let internal_cell = Bound::new(py, internal)?;
let module = py.import_bound("tokenizers.pre_tokenizers")?;
let pretokenizer = PyPretokenizer::new(dict, mode, required_fields, handler, projection);
let module = py.import("tokenizers.pre_tokenizers")?;
module
.getattr("PreTokenizer")?
.getattr("custom")?
.call1(PyTuple::new_bound(py, [internal_cell]))
.call1((pretokenizer,))
}

/// Look up morphemes in the binary dictionary without performing the analysis.
Expand Down Expand Up @@ -507,7 +505,7 @@ fn read_config(config_opt: &Bound<PyAny>) -> PyResult<ConfigBuilder> {
)));
}
let py = config_opt.py();
let cfg_type = py.import_bound("sudachipy.config")?.getattr("Config")?;
let cfg_type = py.import("sudachipy.config")?.getattr("Config")?;
if config_opt.is_instance(&cfg_type)? {
let cfg_as_str = config_opt.call_method0("as_jsons")?;
return read_config(&cfg_as_str);
Expand All @@ -520,24 +518,20 @@ fn read_config(config_opt: &Bound<PyAny>) -> PyResult<ConfigBuilder> {
}

pub(crate) fn read_default_config(py: Python) -> PyResult<ConfigBuilder> {
let path = py
.import_bound("sudachipy")?
.getattr("_DEFAULT_SETTINGFILE")?;
let path = py.import("sudachipy")?.getattr("_DEFAULT_SETTINGFILE")?;
let path = path.downcast::<PyString>()?.to_str()?;
let path = PathBuf::from(path);
errors::wrap_ctx(ConfigBuilder::from_opt_file(Some(&path)), &path)
}

pub(crate) fn get_default_resource_dir(py: Python) -> PyResult<PathBuf> {
let path = py
.import_bound("sudachipy")?
.getattr("_DEFAULT_RESOURCEDIR")?;
let path = py.import("sudachipy")?.getattr("_DEFAULT_RESOURCEDIR")?;
let path = path.downcast::<PyString>()?.to_str()?;
Ok(PathBuf::from(path))
}

fn find_dict_path(py: Python, dict_type: &str) -> PyResult<PathBuf> {
let pyfunc = py.import_bound("sudachipy")?.getattr("_find_dict_path")?;
let pyfunc = py.import("sudachipy")?.getattr("_find_dict_path")?;
let path = pyfunc.call1((dict_type,))?;
let path = path.downcast::<PyString>()?.to_str()?;
Ok(PathBuf::from(path))
Expand Down
5 changes: 3 additions & 2 deletions python/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
* limitations under the License.
*/

use core::ffi::CStr;
use std::fmt::{Debug, Display};

use pyo3::exceptions::PyDeprecationWarning;
Expand All @@ -37,6 +38,6 @@ pub fn wrap_ctx<T, E: Display, C: Debug + ?Sized>(v: Result<T, E>, ctx: &C) -> P
}
}

pub fn warn_deprecation(py: Python<'_>, msg: &str) -> PyResult<()> {
PyErr::warn_bound(py, &py.get_type_bound::<PyDeprecationWarning>(), msg, 1)
pub fn warn_deprecation(py: Python<'_>, msg: &CStr) -> PyResult<()> {
PyErr::warn(py, &py.get_type::<PyDeprecationWarning>(), msg, 1)
}
41 changes: 27 additions & 14 deletions python/src/morpheme.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use std::ops::Deref;
use std::sync::Arc;

use pyo3::exceptions::PyIndexError;
use pyo3::ffi::c_str;
use pyo3::prelude::*;
use pyo3::types::{PyList, PyString, PyTuple, PyType};

Expand Down Expand Up @@ -101,7 +102,7 @@ impl PyMorphemeListWrapper {
fn empty(_cls: &Bound<PyType>, py: Python, dict: &PyDictionary) -> PyResult<Self> {
errors::warn_deprecation(
py,
"Use Tokenizer.tokenize(\"\") if you need an empty MorphemeList.",
c_str!("Use Tokenizer.tokenize(\"\") if you need an empty MorphemeList."),
)?;

let cloned = dict.dictionary.as_ref().unwrap().clone();
Expand Down Expand Up @@ -165,7 +166,7 @@ impl PyMorphemeListWrapper {
result.push(' ');
}
}
PyString::new_bound(py, result.as_str())
PyString::new(py, result.as_str())
}

fn __repr__(slf: Py<PyMorphemeListWrapper>, py: Python) -> PyResult<Bound<PyString>> {
Expand All @@ -184,7 +185,7 @@ impl PyMorphemeListWrapper {
result.push_str(",\n");
}
result.push_str("]>");
Ok(PyString::new_bound(py, result.as_str()))
Ok(PyString::new(py, result.as_str()))
}

fn __iter__(slf: Py<Self>) -> PyMorphemeIter {
Expand Down Expand Up @@ -301,7 +302,7 @@ impl PyMorpheme {
let list = self.list(py);
let morph = self.morph(py);
match list.projection() {
None => PyString::new_bound(py, morph.surface().deref()),
None => PyString::new(py, morph.surface().deref()),
Some(proj) => proj.project(morph.deref(), py),
}
}
Expand All @@ -311,7 +312,7 @@ impl PyMorpheme {
/// See `Config.projection`.
#[pyo3(text_signature = "(self, /) -> str")]
fn raw_surface<'py>(&'py self, py: Python<'py>) -> Bound<'py, PyString> {
PyString::new_bound(py, self.morph(py).surface().deref())
PyString::new(py, self.morph(py).surface().deref())
}

/// Returns the part of speech as a six-element tuple.
Expand All @@ -334,20 +335,32 @@ impl PyMorpheme {

/// Returns the dictionary form.
#[pyo3(text_signature = "(self, /) -> str")]
fn dictionary_form<'py>(&'py self, py: Python<'py>) -> PyObject {
self.morph(py).get_word_info().dictionary_form().into_py(py)
fn dictionary_form<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<PyString>> {
Ok(self
.morph(py)
.get_word_info()
.dictionary_form()
.into_pyobject(py)?)
}

/// Returns the normalized form.
#[pyo3(text_signature = "(self, /) -> str")]
fn normalized_form<'py>(&'py self, py: Python<'py>) -> PyObject {
self.morph(py).get_word_info().normalized_form().into_py(py)
fn normalized_form<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<PyString>> {
Ok(self
.morph(py)
.get_word_info()
.normalized_form()
.into_pyobject(py)?)
}

/// Returns the reading form.
#[pyo3(text_signature = "(self, /) -> str")]
fn reading_form<'py>(&'py self, py: Python<'py>) -> PyObject {
self.morph(py).get_word_info().reading_form().into_py(py)
fn reading_form<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<PyString>> {
Ok(self
.morph(py)
.get_word_info()
.reading_form()
.into_pyobject(py)?)
}

/// Returns sub-morphemes in the provided split mode.
Expand Down Expand Up @@ -431,10 +444,10 @@ impl PyMorpheme {

/// Returns the list of synonym group ids.
#[pyo3(text_signature = "(self, /) -> List[int]")]
fn synonym_group_ids<'py>(&'py self, py: Python<'py>) -> Bound<PyList> {
fn synonym_group_ids<'py>(&'py self, py: Python<'py>) -> PyResult<Bound<PyList>> {
let mref = self.morph(py);
let ids = mref.get_word_info().synonym_group_ids();
PyList::new_bound(py, ids)
PyList::new(py, ids)
}

/// Returns the word info.
Expand All @@ -443,7 +456,7 @@ impl PyMorpheme {
/// Users should not touch the raw WordInfo.
#[pyo3(text_signature = "(self, /) -> WordInfo")]
fn get_word_info(&self, py: Python) -> PyResult<PyWordInfo> {
errors::warn_deprecation(py, "Users should not touch the raw WordInfo.")?;
errors::warn_deprecation(py, c_str!("Users should not touch the raw WordInfo."))?;
Ok(self.morph(py).get_word_info().clone().into())
}

Expand Down
10 changes: 4 additions & 6 deletions python/src/pos_matcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,23 +39,21 @@ pub struct PyPosMatcher {

impl PyPosMatcher {
pub(crate) fn create<'py>(
py: Python<'py>,
dic: &'py Arc<PyDicData>,
data: &Bound<'py, PyAny>,
) -> PyResult<PyPosMatcher> {
if data.is_callable() {
Self::create_from_fn(dic, data, py)
Self::create_from_fn(dic, data)
} else {
let iter = data.iter()?;
let iter = data.try_iter()?;
Self::create_from_iter(dic, &iter)
}
}

fn create_from_fn(dic: &Arc<PyDicData>, func: &Bound<PyAny>, py: Python) -> PyResult<Self> {
fn create_from_fn(dic: &Arc<PyDicData>, func: &Bound<PyAny>) -> PyResult<Self> {
let mut data = Vec::new();
for (pos_id, pos) in dic.pos.iter().enumerate() {
let args = PyTuple::new_bound(py, [pos]);
if func.call1(args)?.downcast::<PyBool>()?.is_true() {
if func.call1((pos,))?.downcast::<PyBool>()?.is_true() {
data.push(pos_id as u16);
}
}
Expand Down
21 changes: 9 additions & 12 deletions python/src/pretokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use std::sync::Arc;
use pyo3::intern;
use pyo3::prelude::*;
use pyo3::sync::GILOnceCell;
use pyo3::types::{PyList, PySlice, PyTuple, PyType};
use pyo3::types::{PyList, PySlice, PyType};
use thread_local::ThreadLocal;

use sudachi::analysis::stateful_tokenizer::StatefulTokenizer;
Expand Down Expand Up @@ -154,8 +154,7 @@ impl PyPretokenizer {
}
Some(h) => {
let mrp: &Bound<PyAny> = morphs.bind(py);
let args = PyTuple::new_bound(py, [index, string, mrp]);
h.bind(py).call1(args)
h.bind(py).call1((index, string, mrp))
}
}
}
Expand All @@ -166,7 +165,7 @@ impl PyPretokenizer {
py: Python<'py>,
data: &Bound<'py, PyAny>,
) -> PyResult<Bound<'py, PyAny>> {
data.call_method1(intern!(py, "split"), PyTuple::new_bound(py, [self_]))
data.call_method1(intern!(py, "split"), (self_,))
}
}

Expand All @@ -175,12 +174,11 @@ fn make_result_for_surface<'py>(
morphs: &PyMorphemeList,
string: &Bound<'py, PyAny>,
) -> PyResult<Bound<'py, PyList>> {
let result = PyList::empty_bound(py);
let result = PyList::empty(py);
for idx in 0..morphs.len() {
let node = morphs.get(idx);
let slice = PySlice::new_bound(py, node.begin_c() as isize, node.end_c() as isize, 1);
let args = PyTuple::new_bound(py, [slice]);
let substring = string.call_method1(intern!(py, "slice"), args)?;
let slice = PySlice::new(py, node.begin_c() as isize, node.end_c() as isize, 1);
let substring = string.call_method1(intern!(py, "slice"), (slice,))?;
result.append(substring)?;
}
Ok(result)
Expand All @@ -191,20 +189,19 @@ fn make_result_for_projection<'py>(
morphs: &PyMorphemeList,
proj: &dyn MorphemeProjection,
) -> PyResult<Bound<'py, PyList>> {
let result = PyList::empty_bound(py);
let result = PyList::empty(py);
let nstring = {
static NORMALIZED_STRING: GILOnceCell<Py<PyType>> = GILOnceCell::new();
NORMALIZED_STRING.get_or_try_init(py, || -> PyResult<Py<PyType>> {
let ns = py.import_bound("tokenizers")?.getattr("NormalizedString")?;
let ns = py.import("tokenizers")?.getattr("NormalizedString")?;
let tpe = ns.downcast::<PyType>()?;
Ok(tpe.clone().unbind())
})?
};
for idx in 0..morphs.len() {
let node = morphs.get(idx);
let value = proj.project(&node, py);
let args = PyTuple::new_bound(py, [value]);
let substring = nstring.call1(py, args)?;
let substring = nstring.call1(py, (value,))?;
result.append(substring)?;
}
Ok(result)
Expand Down
Loading

0 comments on commit fc3ffa4

Please sign in to comment.