Skip to content

Commit

Permalink
prefer bound Python token over Python::with_gil
Browse files Browse the repository at this point in the history
When available, using an already bound python token is zero-cost.

Python::with_gil carries a runtime check.

Ref: PyO3/pyo3#4274
  • Loading branch information
Michael-J-Ward committed Jun 25, 2024
1 parent a326fab commit f3bee82
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 35 deletions.
55 changes: 22 additions & 33 deletions src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -423,17 +423,15 @@ impl PyDataFrame {

/// Convert to Arrow Table
/// Collect the batches and pass to Arrow Table
fn to_arrow_table(&self, py: Python) -> PyResult<PyObject> {
fn to_arrow_table(&self, py: Python<'_>) -> PyResult<PyObject> {
let batches = self.collect(py)?.to_object(py);
let schema: PyObject = self.schema().into_py(py);

Python::with_gil(|py| {
// Instantiate pyarrow Table object and use its from_batches method
let table_class = py.import_bound("pyarrow")?.getattr("Table")?;
let args = PyTuple::new_bound(py, &[batches, schema]);
let table: PyObject = table_class.call_method1("from_batches", args)?.into();
Ok(table)
})
// Instantiate pyarrow Table object and use its from_batches method
let table_class = py.import_bound("pyarrow")?.getattr("Table")?;
let args = PyTuple::new_bound(py, &[batches, schema]);
let table: PyObject = table_class.call_method1("from_batches", args)?.into();
Ok(table)
}

fn execute_stream(&self, py: Python) -> PyResult<PyRecordBatchStream> {
Expand Down Expand Up @@ -464,51 +462,42 @@ impl PyDataFrame {

/// Convert to pandas dataframe with pyarrow
/// Collect the batches, pass to Arrow Table & then convert to Pandas DataFrame
fn to_pandas(&self, py: Python) -> PyResult<PyObject> {
fn to_pandas(&self, py: Python<'_>) -> PyResult<PyObject> {
let table = self.to_arrow_table(py)?;

Python::with_gil(|py| {
// See also: https://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table.to_pandas
let result = table.call_method0(py, "to_pandas")?;
Ok(result)
})
// See also: https://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table.to_pandas
let result = table.call_method0(py, "to_pandas")?;
Ok(result)
}

/// Convert to Python list using pyarrow
/// Each list item represents one row encoded as dictionary
fn to_pylist(&self, py: Python) -> PyResult<PyObject> {
fn to_pylist(&self, py: Python<'_>) -> PyResult<PyObject> {
let table = self.to_arrow_table(py)?;

Python::with_gil(|py| {
// See also: https://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table.to_pylist
let result = table.call_method0(py, "to_pylist")?;
Ok(result)
})
// See also: https://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table.to_pylist
let result = table.call_method0(py, "to_pylist")?;
Ok(result)
}

/// Convert to Python dictionary using pyarrow
/// Each dictionary key is a column and the dictionary value represents the column values
fn to_pydict(&self, py: Python) -> PyResult<PyObject> {
let table = self.to_arrow_table(py)?;

Python::with_gil(|py| {
// See also: https://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table.to_pydict
let result = table.call_method0(py, "to_pydict")?;
Ok(result)
})
// See also: https://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table.to_pydict
let result = table.call_method0(py, "to_pydict")?;
Ok(result)
}

/// Convert to polars dataframe with pyarrow
/// Collect the batches, pass to Arrow Table & then convert to polars DataFrame
fn to_polars(&self, py: Python) -> PyResult<PyObject> {
fn to_polars(&self, py: Python<'_>) -> PyResult<PyObject> {
let table = self.to_arrow_table(py)?;

Python::with_gil(|py| {
let dataframe = py.import_bound("polars")?.getattr("DataFrame")?;
let args = PyTuple::new_bound(py, &[table]);
let result: PyObject = dataframe.call1(args)?.into();
Ok(result)
})
let dataframe = py.import_bound("polars")?.getattr("DataFrame")?;
let args = PyTuple::new_bound(py, &[table]);
let result: PyObject = dataframe.call1(args)?.into();
Ok(result)
}

// Executes this DataFrame to get the total number of rows.
Expand Down
4 changes: 2 additions & 2 deletions src/sql/logical.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ impl PyLogicalPlan {
impl PyLogicalPlan {
/// Return the specific logical operator
pub fn to_variant(&self, py: Python) -> PyResult<PyObject> {
Python::with_gil(|_| match self.plan.as_ref() {
match self.plan.as_ref() {
LogicalPlan::Aggregate(plan) => PyAggregate::from(plan.clone()).to_variant(py),
LogicalPlan::Analyze(plan) => PyAnalyze::from(plan.clone()).to_variant(py),
LogicalPlan::CrossJoin(plan) => PyCrossJoin::from(plan.clone()).to_variant(py),
Expand All @@ -85,7 +85,7 @@ impl PyLogicalPlan {
"Cannot convert this plan to a LogicalNode: {:?}",
other
))),
})
}
}

/// Get the inputs to this plan
Expand Down

0 comments on commit f3bee82

Please sign in to comment.